diff --git a/kernel/config-5.4.x-aarch64-rt b/kernel/config-5.11.x-aarch64-rt similarity index 100% rename from kernel/config-5.4.x-aarch64-rt rename to kernel/config-5.11.x-aarch64-rt diff --git a/kernel/config-5.4.x-x86_64-rt b/kernel/config-5.11.x-x86_64-rt similarity index 100% rename from kernel/config-5.4.x-x86_64-rt rename to kernel/config-5.11.x-x86_64-rt diff --git a/kernel/patches-5.11.x-rt/0001-highmem-Don-t-disable-preemption-on-RT-in-kmap_atomi.patch b/kernel/patches-5.11.x-rt/0001-highmem-Don-t-disable-preemption-on-RT-in-kmap_atomi.patch new file mode 100644 index 000000000..ae076a298 --- /dev/null +++ b/kernel/patches-5.11.x-rt/0001-highmem-Don-t-disable-preemption-on-RT-in-kmap_atomi.patch @@ -0,0 +1,77 @@ +From: Sebastian Andrzej Siewior +Date: Fri, 30 Oct 2020 13:59:06 +0100 +Subject: [PATCH] highmem: Don't disable preemption on RT in kmap_atomic() + +Disabling preemption makes it impossible to acquire sleeping locks within +kmap_atomic() section. +For PREEMPT_RT it is sufficient to disable migration. + +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/highmem-internal.h | 27 ++++++++++++++++++++++----- + 1 file changed, 22 insertions(+), 5 deletions(-) + +--- a/include/linux/highmem-internal.h ++++ b/include/linux/highmem-internal.h +@@ -90,7 +90,11 @@ static inline void __kunmap_local(void * + + static inline void *kmap_atomic_prot(struct page *page, pgprot_t prot) + { +- preempt_disable(); ++ if (IS_ENABLED(CONFIG_PREEMPT_RT)) ++ migrate_disable(); ++ else ++ preempt_disable(); ++ + pagefault_disable(); + return __kmap_local_page_prot(page, prot); + } +@@ -102,7 +106,11 @@ static inline void *kmap_atomic(struct p + + static inline void *kmap_atomic_pfn(unsigned long pfn) + { +- preempt_disable(); ++ if (IS_ENABLED(CONFIG_PREEMPT_RT)) ++ migrate_disable(); ++ else ++ preempt_disable(); ++ + pagefault_disable(); + return __kmap_local_pfn_prot(pfn, kmap_prot); + } +@@ -111,7 +119,10 @@ static inline void __kunmap_atomic(void + { + kunmap_local_indexed(addr); + pagefault_enable(); +- preempt_enable(); ++ if (IS_ENABLED(CONFIG_PREEMPT_RT)) ++ migrate_enable(); ++ else ++ preempt_enable(); + } + + unsigned int __nr_free_highpages(void); +@@ -184,7 +195,10 @@ static inline void __kunmap_local(void * + + static inline void *kmap_atomic(struct page *page) + { +- preempt_disable(); ++ if (IS_ENABLED(CONFIG_PREEMPT_RT)) ++ migrate_disable(); ++ else ++ preempt_disable(); + pagefault_disable(); + return page_address(page); + } +@@ -205,7 +219,10 @@ static inline void __kunmap_atomic(void + kunmap_flush_on_unmap(addr); + #endif + pagefault_enable(); +- preempt_enable(); ++ if (IS_ENABLED(CONFIG_PREEMPT_RT)) ++ migrate_enable(); ++ else ++ preempt_enable(); + } + + static inline unsigned int nr_free_highpages(void) { return 0; } diff --git a/kernel/patches-5.11.x-rt/0002-timers-Move-clearing-of-base-timer_running-under-bas.patch b/kernel/patches-5.11.x-rt/0002-timers-Move-clearing-of-base-timer_running-under-bas.patch new file mode 100644 index 000000000..9faa964b5 --- /dev/null +++ b/kernel/patches-5.11.x-rt/0002-timers-Move-clearing-of-base-timer_running-under-bas.patch @@ -0,0 +1,55 @@ +From: Thomas Gleixner +Date: Sun, 6 Dec 2020 22:40:07 +0100 +Subject: [PATCH] timers: Move clearing of base::timer_running under base::lock + +syzbot reported KCSAN data races vs. timer_base::timer_running being set to +NULL without holding base::lock in expire_timers(). + +This looks innocent and most reads are clearly not problematic but for a +non-RT kernel it's completely irrelevant whether the store happens before +or after taking the lock. For an RT kernel moving the store under the lock +requires an extra unlock/lock pair in the case that there is a waiter for +the timer. But that's not the end of the world and definitely not worth the +trouble of adding boatloads of comments and annotations to the code. Famous +last words... + +Reported-by: syzbot+aa7c2385d46c5eba0b89@syzkaller.appspotmail.com +Reported-by: syzbot+abea4558531bae1ba9fe@syzkaller.appspotmail.com +Link: https://lkml.kernel.org/r/87lfea7gw8.fsf@nanos.tec.linutronix.de +Signed-off-by: Thomas Gleixner +Signed-off-by: Sebastian Andrzej Siewior +Cc: stable-rt@vger.kernel.org +--- + kernel/time/timer.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/kernel/time/timer.c ++++ b/kernel/time/timer.c +@@ -1263,8 +1263,10 @@ static inline void timer_base_unlock_exp + static void timer_sync_wait_running(struct timer_base *base) + { + if (atomic_read(&base->timer_waiters)) { ++ raw_spin_unlock_irq(&base->lock); + spin_unlock(&base->expiry_lock); + spin_lock(&base->expiry_lock); ++ raw_spin_lock_irq(&base->lock); + } + } + +@@ -1455,14 +1457,14 @@ static void expire_timers(struct timer_b + if (timer->flags & TIMER_IRQSAFE) { + raw_spin_unlock(&base->lock); + call_timer_fn(timer, fn, baseclk); +- base->running_timer = NULL; + raw_spin_lock(&base->lock); ++ base->running_timer = NULL; + } else { + raw_spin_unlock_irq(&base->lock); + call_timer_fn(timer, fn, baseclk); ++ raw_spin_lock_irq(&base->lock); + base->running_timer = NULL; + timer_sync_wait_running(base); +- raw_spin_lock_irq(&base->lock); + } + } + } diff --git a/kernel/patches-5.11.x-rt/0003-0001-mm-zswap-add-a-flag-to-indicate-if-zpool-can-do-slee.patch b/kernel/patches-5.11.x-rt/0003-0001-mm-zswap-add-a-flag-to-indicate-if-zpool-can-do-slee.patch new file mode 100644 index 000000000..649f059e7 --- /dev/null +++ b/kernel/patches-5.11.x-rt/0003-0001-mm-zswap-add-a-flag-to-indicate-if-zpool-can-do-slee.patch @@ -0,0 +1,245 @@ +From: Tian Tao +Date: Sat, 13 Feb 2021 20:58:30 +1300 +Subject: [PATCH 1/2] mm/zswap: add a flag to indicate if zpool can do sleep + map + +Patch series "Fix the compatibility of zsmalloc and zswap". + +The compatibility of zsmalloc and zswap was broken by commit 1ec3b5fe6eec +("mm/zswap: move to use crypto_acomp API for hardware acceleration"). + +Patch #1 adds a flag to zpool, then zswap used it to determine if zpool +drivers such as zbud/z3fold/zsmalloc will enter an atomic context after +mapping. + +The difference between zbud/z3fold and zsmalloc is that zsmalloc requires +an atomic context that since its map function holds a preempt-disabled +lock, but zbud/z3fold don't require an atomic context. So patch #2 sets +flag sleep_mapped to true indicating that zbud/z3fold can sleep after +mapping. zsmalloc didn't support sleep after mapping, so don't set that +flag to true. + +This patch (of 2): + +Add a flag to zpool, named as "sleep_mapped", and have it set true for +zbud/z3fold, not set this flag for zsmalloc, so its default value is +false. Then zswap could go the current path if the flag is true; and if +it's false, copy data from src to a temporary buffer, then unmap the +handle, take the mutex, process the buffer instead of src to avoid +sleeping function called from atomic context. + +[natechancellor@gmail.com: add return value in zswap_frontswap_load] + Link: https://lkml.kernel.org/r/20210121214804.926843-1-natechancellor@gmail.com +[tiantao6@hisilicon.com: fix potential memory leak] + Link: https://lkml.kernel.org/r/1611538365-51811-1-git-send-email-tiantao6@hisilicon.com +[colin.king@canonical.com: fix potential uninitialized pointer read on tmp] + Link: https://lkml.kernel.org/r/20210128141728.639030-1-colin.king@canonical.com +[tiantao6@hisilicon.com: fix variable 'entry' is uninitialized when used] + Link: https://lkml.kernel.org/r/1611223030-58346-1-git-send-email-tiantao6@hisilicon.com + Link: https://lkml.kernel.org/r/1611035683-12732-1-git-send-email-tiantao6@hisilicon.com + Link: https://lkml.kernel.org/r/1611035683-12732-2-git-send-email-tiantao6@hisilicon.com +[song.bao.hua@hisilicon.com: Rewrote changelog] +Fixes: 1ec3b5fe6e ("mm/zswap: move to use crypto_acomp API for hardware acceleration") +Signed-off-by: Tian Tao +Signed-off-by: Nathan Chancellor +Signed-off-by: Colin Ian King +Reviewed-by: Vitaly Wool +Acked-by: Sebastian Andrzej Siewior +Reported-by: Mike Galbraith +Cc: Dan Streetman +Cc: Seth Jennings +Cc: Dan Carpenter +Signed-off-by: Andrew Morton +Signed-off-by: Barry Song +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/zpool.h | 3 ++ + mm/zpool.c | 13 ++++++++++++ + mm/zswap.c | 51 +++++++++++++++++++++++++++++++++++++++++++++----- + 3 files changed, 62 insertions(+), 5 deletions(-) + +--- a/include/linux/zpool.h ++++ b/include/linux/zpool.h +@@ -73,6 +73,7 @@ u64 zpool_get_total_size(struct zpool *p + * @malloc: allocate mem from a pool. + * @free: free mem from a pool. + * @shrink: shrink the pool. ++ * @sleep_mapped: whether zpool driver can sleep during map. + * @map: map a handle. + * @unmap: unmap a handle. + * @total_size: get total size of a pool. +@@ -100,6 +101,7 @@ struct zpool_driver { + int (*shrink)(void *pool, unsigned int pages, + unsigned int *reclaimed); + ++ bool sleep_mapped; + void *(*map)(void *pool, unsigned long handle, + enum zpool_mapmode mm); + void (*unmap)(void *pool, unsigned long handle); +@@ -112,5 +114,6 @@ void zpool_register_driver(struct zpool_ + int zpool_unregister_driver(struct zpool_driver *driver); + + bool zpool_evictable(struct zpool *pool); ++bool zpool_can_sleep_mapped(struct zpool *pool); + + #endif +--- a/mm/zpool.c ++++ b/mm/zpool.c +@@ -23,6 +23,7 @@ struct zpool { + void *pool; + const struct zpool_ops *ops; + bool evictable; ++ bool can_sleep_mapped; + + struct list_head list; + }; +@@ -183,6 +184,7 @@ struct zpool *zpool_create_pool(const ch + zpool->pool = driver->create(name, gfp, ops, zpool); + zpool->ops = ops; + zpool->evictable = driver->shrink && ops && ops->evict; ++ zpool->can_sleep_mapped = driver->sleep_mapped; + + if (!zpool->pool) { + pr_err("couldn't create %s pool\n", type); +@@ -393,6 +395,17 @@ bool zpool_evictable(struct zpool *zpool + return zpool->evictable; + } + ++/** ++ * zpool_can_sleep_mapped - Test if zpool can sleep when do mapped. ++ * @zpool: The zpool to test ++ * ++ * Returns: true if zpool can sleep; false otherwise. ++ */ ++bool zpool_can_sleep_mapped(struct zpool *zpool) ++{ ++ return zpool->can_sleep_mapped; ++} ++ + MODULE_LICENSE("GPL"); + MODULE_AUTHOR("Dan Streetman "); + MODULE_DESCRIPTION("Common API for compressed memory storage"); +--- a/mm/zswap.c ++++ b/mm/zswap.c +@@ -935,13 +935,19 @@ static int zswap_writeback_entry(struct + struct scatterlist input, output; + struct crypto_acomp_ctx *acomp_ctx; + +- u8 *src; ++ u8 *src, *tmp = NULL; + unsigned int dlen; + int ret; + struct writeback_control wbc = { + .sync_mode = WB_SYNC_NONE, + }; + ++ if (!zpool_can_sleep_mapped(pool)) { ++ tmp = kmalloc(PAGE_SIZE, GFP_ATOMIC); ++ if (!tmp) ++ return -ENOMEM; ++ } ++ + /* extract swpentry from data */ + zhdr = zpool_map_handle(pool, handle, ZPOOL_MM_RO); + swpentry = zhdr->swpentry; /* here */ +@@ -955,6 +961,7 @@ static int zswap_writeback_entry(struct + /* entry was invalidated */ + spin_unlock(&tree->lock); + zpool_unmap_handle(pool, handle); ++ kfree(tmp); + return 0; + } + spin_unlock(&tree->lock); +@@ -979,6 +986,14 @@ static int zswap_writeback_entry(struct + dlen = PAGE_SIZE; + src = (u8 *)zhdr + sizeof(struct zswap_header); + ++ if (!zpool_can_sleep_mapped(pool)) { ++ ++ memcpy(tmp, src, entry->length); ++ src = tmp; ++ ++ zpool_unmap_handle(pool, handle); ++ } ++ + mutex_lock(acomp_ctx->mutex); + sg_init_one(&input, src, entry->length); + sg_init_table(&output, 1); +@@ -1033,7 +1048,11 @@ static int zswap_writeback_entry(struct + spin_unlock(&tree->lock); + + end: +- zpool_unmap_handle(pool, handle); ++ if (zpool_can_sleep_mapped(pool)) ++ zpool_unmap_handle(pool, handle); ++ else ++ kfree(tmp); ++ + return ret; + } + +@@ -1235,7 +1254,7 @@ static int zswap_frontswap_load(unsigned + struct zswap_entry *entry; + struct scatterlist input, output; + struct crypto_acomp_ctx *acomp_ctx; +- u8 *src, *dst; ++ u8 *src, *dst, *tmp; + unsigned int dlen; + int ret; + +@@ -1253,15 +1272,33 @@ static int zswap_frontswap_load(unsigned + dst = kmap_atomic(page); + zswap_fill_page(dst, entry->value); + kunmap_atomic(dst); ++ ret = 0; + goto freeentry; + } + ++ if (!zpool_can_sleep_mapped(entry->pool->zpool)) { ++ ++ tmp = kmalloc(entry->length, GFP_ATOMIC); ++ if (!tmp) { ++ ret = -ENOMEM; ++ goto freeentry; ++ } ++ } ++ + /* decompress */ + dlen = PAGE_SIZE; + src = zpool_map_handle(entry->pool->zpool, entry->handle, ZPOOL_MM_RO); + if (zpool_evictable(entry->pool->zpool)) + src += sizeof(struct zswap_header); + ++ if (!zpool_can_sleep_mapped(entry->pool->zpool)) { ++ ++ memcpy(tmp, src, entry->length); ++ src = tmp; ++ ++ zpool_unmap_handle(entry->pool->zpool, entry->handle); ++ } ++ + acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx); + mutex_lock(acomp_ctx->mutex); + sg_init_one(&input, src, entry->length); +@@ -1271,7 +1308,11 @@ static int zswap_frontswap_load(unsigned + ret = crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait); + mutex_unlock(acomp_ctx->mutex); + +- zpool_unmap_handle(entry->pool->zpool, entry->handle); ++ if (zpool_can_sleep_mapped(entry->pool->zpool)) ++ zpool_unmap_handle(entry->pool->zpool, entry->handle); ++ else ++ kfree(tmp); ++ + BUG_ON(ret); + + freeentry: +@@ -1279,7 +1320,7 @@ static int zswap_frontswap_load(unsigned + zswap_entry_put(tree, entry); + spin_unlock(&tree->lock); + +- return 0; ++ return ret; + } + + /* frees an entry in zswap */ diff --git a/kernel/patches-5.11.x-rt/0004-0002-mm-set-the-sleep_mapped-to-true-for-zbud-and-z3fold.patch b/kernel/patches-5.11.x-rt/0004-0002-mm-set-the-sleep_mapped-to-true-for-zbud-and-z3fold.patch new file mode 100644 index 000000000..854759720 --- /dev/null +++ b/kernel/patches-5.11.x-rt/0004-0002-mm-set-the-sleep_mapped-to-true-for-zbud-and-z3fold.patch @@ -0,0 +1,45 @@ +From: Tian Tao +Date: Sat, 13 Feb 2021 20:58:31 +1300 +Subject: [PATCH 2/2] mm: set the sleep_mapped to true for zbud and z3fold + +zpool driver adds a flag to indicate whether the zpool driver can enter +an atomic context after mapping. This patch sets it true for z3fold and +zbud. + +Link: https://lkml.kernel.org/r/1611035683-12732-3-git-send-email-tiantao6@hisilicon.com +[song.bao.hua@hisilicon.com: Rewrote changelog] +Fixes: 1ec3b5fe6e ("mm/zswap: move to use crypto_acomp API for hardware acceleration") +Signed-off-by: Tian Tao +Reviewed-by: Vitaly Wool +Acked-by: Sebastian Andrzej Siewior +Reported-by: Mike Galbraith +Cc: Seth Jennings +Cc: Dan Streetman +Signed-off-by: Andrew Morton +Signed-off-by: Barry Song +Signed-off-by: Sebastian Andrzej Siewior +--- + mm/z3fold.c | 1 + + mm/zbud.c | 1 + + 2 files changed, 2 insertions(+) + +--- a/mm/z3fold.c ++++ b/mm/z3fold.c +@@ -1778,6 +1778,7 @@ static u64 z3fold_zpool_total_size(void + + static struct zpool_driver z3fold_zpool_driver = { + .type = "z3fold", ++ .sleep_mapped = true, + .owner = THIS_MODULE, + .create = z3fold_zpool_create, + .destroy = z3fold_zpool_destroy, +--- a/mm/zbud.c ++++ b/mm/zbud.c +@@ -203,6 +203,7 @@ static u64 zbud_zpool_total_size(void *p + + static struct zpool_driver zbud_zpool_driver = { + .type = "zbud", ++ .sleep_mapped = true, + .owner = THIS_MODULE, + .create = zbud_zpool_create, + .destroy = zbud_zpool_destroy, diff --git a/kernel/patches-5.11.x-rt/0005-blk-mq-Always-complete-remote-completions-requests-i.patch b/kernel/patches-5.11.x-rt/0005-blk-mq-Always-complete-remote-completions-requests-i.patch new file mode 100644 index 000000000..ded9e4620 --- /dev/null +++ b/kernel/patches-5.11.x-rt/0005-blk-mq-Always-complete-remote-completions-requests-i.patch @@ -0,0 +1,42 @@ +From: Sebastian Andrzej Siewior +Date: Sat, 23 Jan 2021 21:10:26 +0100 +Subject: [PATCH] blk-mq: Always complete remote completions requests in + softirq + +Controllers with multiple queues have their IRQ-handelers pinned to a +CPU. The core shouldn't need to complete the request on a remote CPU. + +Remove this case and always raise the softirq to complete the request. + +Reviewed-by: Christoph Hellwig +Reviewed-by: Daniel Wagner +Signed-off-by: Sebastian Andrzej Siewior +Signed-off-by: Jens Axboe +Signed-off-by: Sebastian Andrzej Siewior +--- + block/blk-mq.c | 14 +------------- + 1 file changed, 1 insertion(+), 13 deletions(-) + +--- a/block/blk-mq.c ++++ b/block/blk-mq.c +@@ -628,19 +628,7 @@ static void __blk_mq_complete_request_re + { + struct request *rq = data; + +- /* +- * For most of single queue controllers, there is only one irq vector +- * for handling I/O completion, and the only irq's affinity is set +- * to all possible CPUs. On most of ARCHs, this affinity means the irq +- * is handled on one specific CPU. +- * +- * So complete I/O requests in softirq context in case of single queue +- * devices to avoid degrading I/O performance due to irqsoff latency. +- */ +- if (rq->q->nr_hw_queues == 1) +- blk_mq_trigger_softirq(rq); +- else +- rq->q->mq_ops->complete(rq); ++ blk_mq_trigger_softirq(rq); + } + + static inline bool blk_mq_complete_need_ipi(struct request *rq) diff --git a/kernel/patches-5.11.x-rt/0006-blk-mq-Use-llist_head-for-blk_cpu_done.patch b/kernel/patches-5.11.x-rt/0006-blk-mq-Use-llist_head-for-blk_cpu_done.patch new file mode 100644 index 000000000..ab914c851 --- /dev/null +++ b/kernel/patches-5.11.x-rt/0006-blk-mq-Use-llist_head-for-blk_cpu_done.patch @@ -0,0 +1,188 @@ +From: Sebastian Andrzej Siewior +Date: Sat, 23 Jan 2021 21:10:27 +0100 +Subject: [PATCH] blk-mq: Use llist_head for blk_cpu_done + +With llist_head it is possible to avoid the locking (the irq-off region) +when items are added. This makes it possible to add items on a remote +CPU without additional locking. +llist_add() returns true if the list was previously empty. This can be +used to invoke the SMP function call / raise sofirq only if the first +item was added (otherwise it is already pending). +This simplifies the code a little and reduces the IRQ-off regions. + +blk_mq_raise_softirq() needs a preempt-disable section to ensure the +request is enqueued on the same CPU as the softirq is raised. +Some callers (USB-storage) invoke this path in preemptible context. + +Reviewed-by: Christoph Hellwig +Reviewed-by: Daniel Wagner +Signed-off-by: Sebastian Andrzej Siewior +Signed-off-by: Jens Axboe +Signed-off-by: Sebastian Andrzej Siewior +--- + block/blk-mq.c | 97 ++++++++++++++++++++----------------------------- + include/linux/blkdev.h | 2 - + 2 files changed, 42 insertions(+), 57 deletions(-) + +--- a/block/blk-mq.c ++++ b/block/blk-mq.c +@@ -41,7 +41,7 @@ + #include "blk-mq-sched.h" + #include "blk-rq-qos.h" + +-static DEFINE_PER_CPU(struct list_head, blk_cpu_done); ++static DEFINE_PER_CPU(struct llist_head, blk_cpu_done); + + static void blk_mq_poll_stats_start(struct request_queue *q); + static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb); +@@ -567,68 +567,29 @@ void blk_mq_end_request(struct request * + } + EXPORT_SYMBOL(blk_mq_end_request); + +-/* +- * Softirq action handler - move entries to local list and loop over them +- * while passing them to the queue registered handler. +- */ +-static __latent_entropy void blk_done_softirq(struct softirq_action *h) ++static void blk_complete_reqs(struct llist_head *list) + { +- struct list_head *cpu_list, local_list; +- +- local_irq_disable(); +- cpu_list = this_cpu_ptr(&blk_cpu_done); +- list_replace_init(cpu_list, &local_list); +- local_irq_enable(); +- +- while (!list_empty(&local_list)) { +- struct request *rq; ++ struct llist_node *entry = llist_reverse_order(llist_del_all(list)); ++ struct request *rq, *next; + +- rq = list_entry(local_list.next, struct request, ipi_list); +- list_del_init(&rq->ipi_list); ++ llist_for_each_entry_safe(rq, next, entry, ipi_list) + rq->q->mq_ops->complete(rq); +- } + } + +-static void blk_mq_trigger_softirq(struct request *rq) ++static __latent_entropy void blk_done_softirq(struct softirq_action *h) + { +- struct list_head *list; +- unsigned long flags; +- +- local_irq_save(flags); +- list = this_cpu_ptr(&blk_cpu_done); +- list_add_tail(&rq->ipi_list, list); +- +- /* +- * If the list only contains our just added request, signal a raise of +- * the softirq. If there are already entries there, someone already +- * raised the irq but it hasn't run yet. +- */ +- if (list->next == &rq->ipi_list) +- raise_softirq_irqoff(BLOCK_SOFTIRQ); +- local_irq_restore(flags); ++ blk_complete_reqs(this_cpu_ptr(&blk_cpu_done)); + } + + static int blk_softirq_cpu_dead(unsigned int cpu) + { +- /* +- * If a CPU goes away, splice its entries to the current CPU +- * and trigger a run of the softirq +- */ +- local_irq_disable(); +- list_splice_init(&per_cpu(blk_cpu_done, cpu), +- this_cpu_ptr(&blk_cpu_done)); +- raise_softirq_irqoff(BLOCK_SOFTIRQ); +- local_irq_enable(); +- ++ blk_complete_reqs(&per_cpu(blk_cpu_done, cpu)); + return 0; + } + +- + static void __blk_mq_complete_request_remote(void *data) + { +- struct request *rq = data; +- +- blk_mq_trigger_softirq(rq); ++ __raise_softirq_irqoff(BLOCK_SOFTIRQ); + } + + static inline bool blk_mq_complete_need_ipi(struct request *rq) +@@ -657,6 +618,30 @@ static inline bool blk_mq_complete_need_ + return cpu_online(rq->mq_ctx->cpu); + } + ++static void blk_mq_complete_send_ipi(struct request *rq) ++{ ++ struct llist_head *list; ++ unsigned int cpu; ++ ++ cpu = rq->mq_ctx->cpu; ++ list = &per_cpu(blk_cpu_done, cpu); ++ if (llist_add(&rq->ipi_list, list)) { ++ INIT_CSD(&rq->csd, __blk_mq_complete_request_remote, rq); ++ smp_call_function_single_async(cpu, &rq->csd); ++ } ++} ++ ++static void blk_mq_raise_softirq(struct request *rq) ++{ ++ struct llist_head *list; ++ ++ preempt_disable(); ++ list = this_cpu_ptr(&blk_cpu_done); ++ if (llist_add(&rq->ipi_list, list)) ++ raise_softirq(BLOCK_SOFTIRQ); ++ preempt_enable(); ++} ++ + bool blk_mq_complete_request_remote(struct request *rq) + { + WRITE_ONCE(rq->state, MQ_RQ_COMPLETE); +@@ -669,15 +654,15 @@ bool blk_mq_complete_request_remote(stru + return false; + + if (blk_mq_complete_need_ipi(rq)) { +- INIT_CSD(&rq->csd, __blk_mq_complete_request_remote, rq); +- smp_call_function_single_async(rq->mq_ctx->cpu, &rq->csd); +- } else { +- if (rq->q->nr_hw_queues > 1) +- return false; +- blk_mq_trigger_softirq(rq); ++ blk_mq_complete_send_ipi(rq); ++ return true; + } + +- return true; ++ if (rq->q->nr_hw_queues == 1) { ++ blk_mq_raise_softirq(rq); ++ return true; ++ } ++ return false; + } + EXPORT_SYMBOL_GPL(blk_mq_complete_request_remote); + +@@ -3892,7 +3877,7 @@ static int __init blk_mq_init(void) + int i; + + for_each_possible_cpu(i) +- INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i)); ++ init_llist_head(&per_cpu(blk_cpu_done, i)); + open_softirq(BLOCK_SOFTIRQ, blk_done_softirq); + + cpuhp_setup_state_nocalls(CPUHP_BLOCK_SOFTIRQ_DEAD, +--- a/include/linux/blkdev.h ++++ b/include/linux/blkdev.h +@@ -153,7 +153,7 @@ struct request { + */ + union { + struct hlist_node hash; /* merge hash */ +- struct list_head ipi_list; ++ struct llist_node ipi_list; + }; + + /* diff --git a/kernel/patches-5.11.x-rt/0007-0001-kthread-Move-prio-affinite-change-into-the-newly-cre.patch b/kernel/patches-5.11.x-rt/0007-0001-kthread-Move-prio-affinite-change-into-the-newly-cre.patch new file mode 100644 index 000000000..6d45e5677 --- /dev/null +++ b/kernel/patches-5.11.x-rt/0007-0001-kthread-Move-prio-affinite-change-into-the-newly-cre.patch @@ -0,0 +1,79 @@ +From: Sebastian Andrzej Siewior +Date: Mon, 9 Nov 2020 21:30:41 +0100 +Subject: [PATCH 1/2] kthread: Move prio/affinite change into the newly created + thread + +With enabled threaded interrupts the nouveau driver reported the +following: +| Chain exists of: +| &mm->mmap_lock#2 --> &device->mutex --> &cpuset_rwsem +| +| Possible unsafe locking scenario: +| +| CPU0 CPU1 +| ---- ---- +| lock(&cpuset_rwsem); +| lock(&device->mutex); +| lock(&cpuset_rwsem); +| lock(&mm->mmap_lock#2); + +The device->mutex is nvkm_device::mutex. + +Unblocking the lockchain at `cpuset_rwsem' is probably the easiest thing +to do. +Move the priority reset to the start of the newly created thread. + +Fixes: 710da3c8ea7df ("sched/core: Prevent race condition between cpuset and __sched_setscheduler()") +Reported-by: Mike Galbraith +Signed-off-by: Sebastian Andrzej Siewior +Link: https://lkml.kernel.org/r/a23a826af7c108ea5651e73b8fbae5e653f16e86.camel@gmx.de +--- + kernel/kthread.c | 16 ++++++++-------- + 1 file changed, 8 insertions(+), 8 deletions(-) + +--- a/kernel/kthread.c ++++ b/kernel/kthread.c +@@ -243,6 +243,7 @@ EXPORT_SYMBOL_GPL(kthread_parkme); + + static int kthread(void *_create) + { ++ static const struct sched_param param = { .sched_priority = 0 }; + /* Copy data: it's on kthread's stack */ + struct kthread_create_info *create = _create; + int (*threadfn)(void *data) = create->threadfn; +@@ -273,6 +274,13 @@ static int kthread(void *_create) + init_completion(&self->parked); + current->vfork_done = &self->exited; + ++ /* ++ * The new thread inherited kthreadd's priority and CPU mask. Reset ++ * back to default in case they have been changed. ++ */ ++ sched_setscheduler_nocheck(current, SCHED_NORMAL, ¶m); ++ set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_FLAG_KTHREAD)); ++ + /* OK, tell user we're spawned, wait for stop or wakeup */ + __set_current_state(TASK_UNINTERRUPTIBLE); + create->result = current; +@@ -370,7 +378,6 @@ struct task_struct *__kthread_create_on_ + } + task = create->result; + if (!IS_ERR(task)) { +- static const struct sched_param param = { .sched_priority = 0 }; + char name[TASK_COMM_LEN]; + + /* +@@ -379,13 +386,6 @@ struct task_struct *__kthread_create_on_ + */ + vsnprintf(name, sizeof(name), namefmt, args); + set_task_comm(task, name); +- /* +- * root may have changed our (kthreadd's) priority or CPU mask. +- * The kernel thread should not inherit these properties. +- */ +- sched_setscheduler_nocheck(task, SCHED_NORMAL, ¶m); +- set_cpus_allowed_ptr(task, +- housekeeping_cpumask(HK_FLAG_KTHREAD)); + } + kfree(create); + return task; diff --git a/kernel/patches-5.11.x-rt/0008-0002-genirq-Move-prio-assignment-into-the-newly-created-t.patch b/kernel/patches-5.11.x-rt/0008-0002-genirq-Move-prio-assignment-into-the-newly-created-t.patch new file mode 100644 index 000000000..90d6e528a --- /dev/null +++ b/kernel/patches-5.11.x-rt/0008-0002-genirq-Move-prio-assignment-into-the-newly-created-t.patch @@ -0,0 +1,55 @@ +From: Thomas Gleixner +Date: Mon, 9 Nov 2020 23:32:39 +0100 +Subject: [PATCH 2/2] genirq: Move prio assignment into the newly created + thread + +With enabled threaded interrupts the nouveau driver reported the +following: +| Chain exists of: +| &mm->mmap_lock#2 --> &device->mutex --> &cpuset_rwsem +| +| Possible unsafe locking scenario: +| +| CPU0 CPU1 +| ---- ---- +| lock(&cpuset_rwsem); +| lock(&device->mutex); +| lock(&cpuset_rwsem); +| lock(&mm->mmap_lock#2); + +The device->mutex is nvkm_device::mutex. + +Unblocking the lockchain at `cpuset_rwsem' is probably the easiest thing +to do. +Move the priority assignment to the start of the newly created thread. + +Fixes: 710da3c8ea7df ("sched/core: Prevent race condition between cpuset and __sched_setscheduler()") +Reported-by: Mike Galbraith +Signed-off-by: Thomas Gleixner +[bigeasy: Patch description] +Signed-off-by: Sebastian Andrzej Siewior +Link: https://lkml.kernel.org/r/a23a826af7c108ea5651e73b8fbae5e653f16e86.camel@gmx.de +--- + kernel/irq/manage.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/kernel/irq/manage.c ++++ b/kernel/irq/manage.c +@@ -1225,6 +1225,8 @@ static int irq_thread(void *data) + irqreturn_t (*handler_fn)(struct irq_desc *desc, + struct irqaction *action); + ++ sched_set_fifo(current); ++ + if (force_irqthreads && test_bit(IRQTF_FORCED_THREAD, + &action->thread_flags)) + handler_fn = irq_forced_thread_fn; +@@ -1390,8 +1392,6 @@ setup_irq_thread(struct irqaction *new, + if (IS_ERR(t)) + return PTR_ERR(t); + +- sched_set_fifo(t); +- + /* + * We keep the reference to the task struct even if + * the thread dies to avoid that the interrupt code diff --git a/kernel/patches-5.11.x-rt/0009-notifier-Make-atomic_notifiers-use-raw_spinlock.patch b/kernel/patches-5.11.x-rt/0009-notifier-Make-atomic_notifiers-use-raw_spinlock.patch new file mode 100644 index 000000000..3ef97ac3f --- /dev/null +++ b/kernel/patches-5.11.x-rt/0009-notifier-Make-atomic_notifiers-use-raw_spinlock.patch @@ -0,0 +1,123 @@ +From: Valentin Schneider +Date: Sun, 22 Nov 2020 20:19:04 +0000 +Subject: [PATCH] notifier: Make atomic_notifiers use raw_spinlock + +Booting a recent PREEMPT_RT kernel (v5.10-rc3-rt7-rebase) on my arm64 Juno +leads to the idle task blocking on an RT sleeping spinlock down some +notifier path: + + [ 1.809101] BUG: scheduling while atomic: swapper/5/0/0x00000002 + [ 1.809116] Modules linked in: + [ 1.809123] Preemption disabled at: + [ 1.809125] secondary_start_kernel (arch/arm64/kernel/smp.c:227) + [ 1.809146] CPU: 5 PID: 0 Comm: swapper/5 Tainted: G W 5.10.0-rc3-rt7 #168 + [ 1.809153] Hardware name: ARM Juno development board (r0) (DT) + [ 1.809158] Call trace: + [ 1.809160] dump_backtrace (arch/arm64/kernel/stacktrace.c:100 (discriminator 1)) + [ 1.809170] show_stack (arch/arm64/kernel/stacktrace.c:198) + [ 1.809178] dump_stack (lib/dump_stack.c:122) + [ 1.809188] __schedule_bug (kernel/sched/core.c:4886) + [ 1.809197] __schedule (./arch/arm64/include/asm/preempt.h:18 kernel/sched/core.c:4913 kernel/sched/core.c:5040) + [ 1.809204] preempt_schedule_lock (kernel/sched/core.c:5365 (discriminator 1)) + [ 1.809210] rt_spin_lock_slowlock_locked (kernel/locking/rtmutex.c:1072) + [ 1.809217] rt_spin_lock_slowlock (kernel/locking/rtmutex.c:1110) + [ 1.809224] rt_spin_lock (./include/linux/rcupdate.h:647 kernel/locking/rtmutex.c:1139) + [ 1.809231] atomic_notifier_call_chain_robust (kernel/notifier.c:71 kernel/notifier.c:118 kernel/notifier.c:186) + [ 1.809240] cpu_pm_enter (kernel/cpu_pm.c:39 kernel/cpu_pm.c:93) + [ 1.809249] psci_enter_idle_state (drivers/cpuidle/cpuidle-psci.c:52 drivers/cpuidle/cpuidle-psci.c:129) + [ 1.809258] cpuidle_enter_state (drivers/cpuidle/cpuidle.c:238) + [ 1.809267] cpuidle_enter (drivers/cpuidle/cpuidle.c:353) + [ 1.809275] do_idle (kernel/sched/idle.c:132 kernel/sched/idle.c:213 kernel/sched/idle.c:273) + [ 1.809282] cpu_startup_entry (kernel/sched/idle.c:368 (discriminator 1)) + [ 1.809288] secondary_start_kernel (arch/arm64/kernel/smp.c:273) + +Two points worth noting: + +1) That this is conceptually the same issue as pointed out in: + 313c8c16ee62 ("PM / CPU: replace raw_notifier with atomic_notifier") +2) Only the _robust() variant of atomic_notifier callchains suffer from + this + +AFAICT only the cpu_pm_notifier_chain really needs to be changed, but +singling it out would mean introducing a new (truly) non-blocking API. At +the same time, callers that are fine with any blocking within the call +chain should use blocking notifiers, so patching up all atomic_notifier's +doesn't seem *too* crazy to me. + +Fixes: 70d932985757 ("notifier: Fix broken error handling pattern") +Signed-off-by: Valentin Schneider +Reviewed-by: Daniel Bristot de Oliveira +Link: https://lkml.kernel.org/r/20201122201904.30940-1-valentin.schneider@arm.com +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/notifier.h | 6 +++--- + kernel/notifier.c | 12 ++++++------ + 2 files changed, 9 insertions(+), 9 deletions(-) + +--- a/include/linux/notifier.h ++++ b/include/linux/notifier.h +@@ -58,7 +58,7 @@ struct notifier_block { + }; + + struct atomic_notifier_head { +- spinlock_t lock; ++ raw_spinlock_t lock; + struct notifier_block __rcu *head; + }; + +@@ -78,7 +78,7 @@ struct srcu_notifier_head { + }; + + #define ATOMIC_INIT_NOTIFIER_HEAD(name) do { \ +- spin_lock_init(&(name)->lock); \ ++ raw_spin_lock_init(&(name)->lock); \ + (name)->head = NULL; \ + } while (0) + #define BLOCKING_INIT_NOTIFIER_HEAD(name) do { \ +@@ -95,7 +95,7 @@ extern void srcu_init_notifier_head(stru + cleanup_srcu_struct(&(name)->srcu); + + #define ATOMIC_NOTIFIER_INIT(name) { \ +- .lock = __SPIN_LOCK_UNLOCKED(name.lock), \ ++ .lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \ + .head = NULL } + #define BLOCKING_NOTIFIER_INIT(name) { \ + .rwsem = __RWSEM_INITIALIZER((name).rwsem), \ +--- a/kernel/notifier.c ++++ b/kernel/notifier.c +@@ -142,9 +142,9 @@ int atomic_notifier_chain_register(struc + unsigned long flags; + int ret; + +- spin_lock_irqsave(&nh->lock, flags); ++ raw_spin_lock_irqsave(&nh->lock, flags); + ret = notifier_chain_register(&nh->head, n); +- spin_unlock_irqrestore(&nh->lock, flags); ++ raw_spin_unlock_irqrestore(&nh->lock, flags); + return ret; + } + EXPORT_SYMBOL_GPL(atomic_notifier_chain_register); +@@ -164,9 +164,9 @@ int atomic_notifier_chain_unregister(str + unsigned long flags; + int ret; + +- spin_lock_irqsave(&nh->lock, flags); ++ raw_spin_lock_irqsave(&nh->lock, flags); + ret = notifier_chain_unregister(&nh->head, n); +- spin_unlock_irqrestore(&nh->lock, flags); ++ raw_spin_unlock_irqrestore(&nh->lock, flags); + synchronize_rcu(); + return ret; + } +@@ -182,9 +182,9 @@ int atomic_notifier_call_chain_robust(st + * Musn't use RCU; because then the notifier list can + * change between the up and down traversal. + */ +- spin_lock_irqsave(&nh->lock, flags); ++ raw_spin_lock_irqsave(&nh->lock, flags); + ret = notifier_call_chain_robust(&nh->head, val_up, val_down, v); +- spin_unlock_irqrestore(&nh->lock, flags); ++ raw_spin_unlock_irqrestore(&nh->lock, flags); + + return ret; + } diff --git a/kernel/patches-5.11.x-rt/0010-0001-rcu-Make-RCU_BOOST-default-on-CONFIG_PREEMPT_RT.patch b/kernel/patches-5.11.x-rt/0010-0001-rcu-Make-RCU_BOOST-default-on-CONFIG_PREEMPT_RT.patch new file mode 100644 index 000000000..c1bfcb03e --- /dev/null +++ b/kernel/patches-5.11.x-rt/0010-0001-rcu-Make-RCU_BOOST-default-on-CONFIG_PREEMPT_RT.patch @@ -0,0 +1,34 @@ +From: Sebastian Andrzej Siewior +Date: Tue, 15 Dec 2020 15:16:45 +0100 +Subject: [PATCH 1/5] rcu: Make RCU_BOOST default on CONFIG_PREEMPT_RT + +On PREEMPT_RT kernels, RCU callbacks are deferred to the `rcuc' kthread. +This can stall RCU grace periods due to lengthy preemption not only of RCU +readers but also of 'rcuc' kthreads, either of which prevent grace periods +from completing, which can in turn result in OOM. Because PREEMPT_RT +kernels have more kthreads that can block grace periods, it is more +important for such kernels to enable RCU_BOOST. + +This commit therefore makes RCU_BOOST the default on PREEMPT_RT. +RCU_BOOST can still be manually disabled if need be. + +Signed-off-by: Sebastian Andrzej Siewior +Signed-off-by: Paul E. McKenney +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/rcu/Kconfig | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/kernel/rcu/Kconfig ++++ b/kernel/rcu/Kconfig +@@ -188,8 +188,8 @@ config RCU_FAST_NO_HZ + + config RCU_BOOST + bool "Enable RCU priority boosting" +- depends on RT_MUTEXES && PREEMPT_RCU && RCU_EXPERT +- default n ++ depends on (RT_MUTEXES && PREEMPT_RCU && RCU_EXPERT) || PREEMPT_RT ++ default y if PREEMPT_RT + help + This option boosts the priority of preempted RCU readers that + block the current preemptible RCU grace period for too long. diff --git a/kernel/patches-5.11.x-rt/0011-0002-rcu-Unconditionally-use-rcuc-threads-on-PREEMPT_RT.patch b/kernel/patches-5.11.x-rt/0011-0002-rcu-Unconditionally-use-rcuc-threads-on-PREEMPT_RT.patch new file mode 100644 index 000000000..39fc562b0 --- /dev/null +++ b/kernel/patches-5.11.x-rt/0011-0002-rcu-Unconditionally-use-rcuc-threads-on-PREEMPT_RT.patch @@ -0,0 +1,57 @@ +From: Scott Wood +Date: Tue, 15 Dec 2020 15:16:46 +0100 +Subject: [PATCH 2/5] rcu: Unconditionally use rcuc threads on PREEMPT_RT + +PREEMPT_RT systems have long used the rcutree.use_softirq kernel +boot parameter to avoid use of RCU_SOFTIRQ handlers, which can disrupt +real-time applications by invoking callbacks during return from interrupts +that arrived while executing time-critical code. This kernel boot +parameter instead runs RCU core processing in an 'rcuc' kthread, thus +allowing the scheduler to do its job of avoiding disrupting time-critical +code. + +This commit therefore disables the rcutree.use_softirq kernel boot +parameter on PREEMPT_RT systems, thus forcing such systems to do RCU +core processing in 'rcuc' kthreads. This approach has long been in +use by users of the -rt patchset, and there have been no complaints. +There is therefore no way for the system administrator to override this +choice, at least without modifying and rebuilding the kernel. + +Signed-off-by: Scott Wood +[bigeasy: Reword commit message] +Signed-off-by: Sebastian Andrzej Siewior +[ paulmck: Update kernel-parameters.txt accordingly. ] +Signed-off-by: Paul E. McKenney +Signed-off-by: Sebastian Andrzej Siewior +--- + Documentation/admin-guide/kernel-parameters.txt | 4 ++++ + kernel/rcu/tree.c | 4 +++- + 2 files changed, 7 insertions(+), 1 deletion(-) + +--- a/Documentation/admin-guide/kernel-parameters.txt ++++ b/Documentation/admin-guide/kernel-parameters.txt +@@ -4092,6 +4092,10 @@ + value, meaning that RCU_SOFTIRQ is used by default. + Specify rcutree.use_softirq=0 to use rcuc kthreads. + ++ But note that CONFIG_PREEMPT_RT=y kernels disable ++ this kernel boot parameter, forcibly setting it ++ to zero. ++ + rcutree.rcu_fanout_exact= [KNL] + Disable autobalancing of the rcu_node combining + tree. This is used by rcutorture, and might +--- a/kernel/rcu/tree.c ++++ b/kernel/rcu/tree.c +@@ -100,8 +100,10 @@ static struct rcu_state rcu_state = { + static bool dump_tree; + module_param(dump_tree, bool, 0444); + /* By default, use RCU_SOFTIRQ instead of rcuc kthreads. */ +-static bool use_softirq = true; ++static bool use_softirq = !IS_ENABLED(CONFIG_PREEMPT_RT); ++#ifndef CONFIG_PREEMPT_RT + module_param(use_softirq, bool, 0444); ++#endif + /* Control rcu_node-tree auto-balancing at boot time. */ + static bool rcu_fanout_exact; + module_param(rcu_fanout_exact, bool, 0444); diff --git a/kernel/patches-5.11.x-rt/0012-0003-rcu-Enable-rcu_normal_after_boot-unconditionally-for.patch b/kernel/patches-5.11.x-rt/0012-0003-rcu-Enable-rcu_normal_after_boot-unconditionally-for.patch new file mode 100644 index 000000000..9adfeab89 --- /dev/null +++ b/kernel/patches-5.11.x-rt/0012-0003-rcu-Enable-rcu_normal_after_boot-unconditionally-for.patch @@ -0,0 +1,62 @@ +From: Julia Cartwright +Date: Tue, 15 Dec 2020 15:16:47 +0100 +Subject: [PATCH 3/5] rcu: Enable rcu_normal_after_boot unconditionally for RT + +Expedited RCU grace periods send IPIs to all non-idle CPUs, and thus can +disrupt time-critical code in real-time applications. However, there +is a portion of boot-time processing (presumably before any real-time +applications have started) where expedited RCU grace periods are the only +option. And so it is that experience with the -rt patchset indicates that +PREEMPT_RT systems should always set the rcupdate.rcu_normal_after_boot +kernel boot parameter. + +This commit therefore makes the post-boot application environment safe +for real-time applications by making PREEMPT_RT systems disable the +rcupdate.rcu_normal_after_boot kernel boot parameter and acting as +if this parameter had been set. This means that post-boot calls to +synchronize_rcu_expedited() will be treated as if they were instead +calls to synchronize_rcu(), thus preventing the IPIs, and thus avoiding +disrupting real-time applications. + +Suggested-by: Luiz Capitulino +Acked-by: Paul E. McKenney +Signed-off-by: Julia Cartwright +Signed-off-by: Sebastian Andrzej Siewior +[ paulmck: Update kernel-parameters.txt accordingly. ] +Signed-off-by: Paul E. McKenney +Signed-off-by: Sebastian Andrzej Siewior +--- + Documentation/admin-guide/kernel-parameters.txt | 7 +++++++ + kernel/rcu/update.c | 4 +++- + 2 files changed, 10 insertions(+), 1 deletion(-) + +--- a/Documentation/admin-guide/kernel-parameters.txt ++++ b/Documentation/admin-guide/kernel-parameters.txt +@@ -4474,6 +4474,13 @@ + only normal grace-period primitives. No effect + on CONFIG_TINY_RCU kernels. + ++ But note that CONFIG_PREEMPT_RT=y kernels enables ++ this kernel boot parameter, forcibly setting ++ it to the value one, that is, converting any ++ post-boot attempt at an expedited RCU grace ++ period to instead use normal non-expedited ++ grace-period processing. ++ + rcupdate.rcu_task_ipi_delay= [KNL] + Set time in jiffies during which RCU tasks will + avoid sending IPIs, starting with the beginning +--- a/kernel/rcu/update.c ++++ b/kernel/rcu/update.c +@@ -56,8 +56,10 @@ + #ifndef CONFIG_TINY_RCU + module_param(rcu_expedited, int, 0); + module_param(rcu_normal, int, 0); +-static int rcu_normal_after_boot; ++static int rcu_normal_after_boot = IS_ENABLED(CONFIG_PREEMPT_RT); ++#ifndef CONFIG_PREEMPT_RT + module_param(rcu_normal_after_boot, int, 0); ++#endif + #endif /* #ifndef CONFIG_TINY_RCU */ + + #ifdef CONFIG_DEBUG_LOCK_ALLOC diff --git a/kernel/patches-5.11.x-rt/0013-0004-doc-Update-RCU-s-requirements-page-about-the-PREEMPT.patch b/kernel/patches-5.11.x-rt/0013-0004-doc-Update-RCU-s-requirements-page-about-the-PREEMPT.patch new file mode 100644 index 000000000..9e96cea34 --- /dev/null +++ b/kernel/patches-5.11.x-rt/0013-0004-doc-Update-RCU-s-requirements-page-about-the-PREEMPT.patch @@ -0,0 +1,28 @@ +From: Sebastian Andrzej Siewior +Date: Tue, 15 Dec 2020 15:16:48 +0100 +Subject: [PATCH 4/5] doc: Update RCU's requirements page about the PREEMPT_RT + wiki. + +The PREEMPT_RT wiki moved from kernel.org to the Linux Foundation wiki. +The kernel.org wiki is read only. + +This commit therefore updates the URL of the active PREEMPT_RT wiki. + +Signed-off-by: Sebastian Andrzej Siewior +Signed-off-by: Paul E. McKenney +Signed-off-by: Sebastian Andrzej Siewior +--- + Documentation/RCU/Design/Requirements/Requirements.rst | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/Documentation/RCU/Design/Requirements/Requirements.rst ++++ b/Documentation/RCU/Design/Requirements/Requirements.rst +@@ -2319,7 +2319,7 @@ decides to throw at it. + + The Linux kernel is used for real-time workloads, especially in + conjunction with the `-rt +-patchset `__. The ++patchset `__. The + real-time-latency response requirements are such that the traditional + approach of disabling preemption across RCU read-side critical sections + is inappropriate. Kernels built with ``CONFIG_PREEMPT=y`` therefore use diff --git a/kernel/patches-5.11.x-rt/0014-0005-doc-Use-CONFIG_PREEMPTION.patch b/kernel/patches-5.11.x-rt/0014-0005-doc-Use-CONFIG_PREEMPTION.patch new file mode 100644 index 000000000..34735954e --- /dev/null +++ b/kernel/patches-5.11.x-rt/0014-0005-doc-Use-CONFIG_PREEMPTION.patch @@ -0,0 +1,233 @@ +From: Sebastian Andrzej Siewior +Date: Tue, 15 Dec 2020 15:16:49 +0100 +Subject: [PATCH 5/5] doc: Use CONFIG_PREEMPTION + +CONFIG_PREEMPTION is selected by CONFIG_PREEMPT and by CONFIG_PREEMPT_RT. +Both PREEMPT and PREEMPT_RT require the same functionality which today +depends on CONFIG_PREEMPT. + +Update the documents and mention CONFIG_PREEMPTION. Spell out +CONFIG_PREEMPT_RT (instead PREEMPT_RT) since it is an option now. + +Signed-off-by: Sebastian Andrzej Siewior +Signed-off-by: Paul E. McKenney +Signed-off-by: Sebastian Andrzej Siewior +--- + Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.rst | 4 - + Documentation/RCU/Design/Requirements/Requirements.rst | 24 +++++----- + Documentation/RCU/checklist.rst | 2 + Documentation/RCU/rcubarrier.rst | 6 +- + Documentation/RCU/stallwarn.rst | 4 - + Documentation/RCU/whatisRCU.rst | 10 ++-- + 6 files changed, 25 insertions(+), 25 deletions(-) + +--- a/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.rst ++++ b/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.rst +@@ -38,7 +38,7 @@ sections. + RCU-preempt Expedited Grace Periods + =================================== + +-``CONFIG_PREEMPT=y`` kernels implement RCU-preempt. ++``CONFIG_PREEMPTION=y`` kernels implement RCU-preempt. + The overall flow of the handling of a given CPU by an RCU-preempt + expedited grace period is shown in the following diagram: + +@@ -112,7 +112,7 @@ things. + RCU-sched Expedited Grace Periods + --------------------------------- + +-``CONFIG_PREEMPT=n`` kernels implement RCU-sched. The overall flow of ++``CONFIG_PREEMPTION=n`` kernels implement RCU-sched. The overall flow of + the handling of a given CPU by an RCU-sched expedited grace period is + shown in the following diagram: + +--- a/Documentation/RCU/Design/Requirements/Requirements.rst ++++ b/Documentation/RCU/Design/Requirements/Requirements.rst +@@ -78,7 +78,7 @@ RCU treats a nested set as one big RCU r + Production-quality implementations of ``rcu_read_lock()`` and + ``rcu_read_unlock()`` are extremely lightweight, and in fact have + exactly zero overhead in Linux kernels built for production use with +-``CONFIG_PREEMPT=n``. ++``CONFIG_PREEMPTION=n``. + + This guarantee allows ordering to be enforced with extremely low + overhead to readers, for example: +@@ -1182,7 +1182,7 @@ and has become decreasingly so as memory + costs have plummeted. However, as I learned from Matt Mackall's + `bloatwatch `__ efforts, memory + footprint is critically important on single-CPU systems with +-non-preemptible (``CONFIG_PREEMPT=n``) kernels, and thus `tiny ++non-preemptible (``CONFIG_PREEMPTION=n``) kernels, and thus `tiny + RCU `__ + was born. Josh Triplett has since taken over the small-memory banner + with his `Linux kernel tinification `__ +@@ -1498,7 +1498,7 @@ limitations. + + Implementations of RCU for which ``rcu_read_lock()`` and + ``rcu_read_unlock()`` generate no code, such as Linux-kernel RCU when +-``CONFIG_PREEMPT=n``, can be nested arbitrarily deeply. After all, there ++``CONFIG_PREEMPTION=n``, can be nested arbitrarily deeply. After all, there + is no overhead. Except that if all these instances of + ``rcu_read_lock()`` and ``rcu_read_unlock()`` are visible to the + compiler, compilation will eventually fail due to exhausting memory, +@@ -1771,7 +1771,7 @@ implementation can be a no-op. + + However, once the scheduler has spawned its first kthread, this early + boot trick fails for ``synchronize_rcu()`` (as well as for +-``synchronize_rcu_expedited()``) in ``CONFIG_PREEMPT=y`` kernels. The ++``synchronize_rcu_expedited()``) in ``CONFIG_PREEMPTION=y`` kernels. The + reason is that an RCU read-side critical section might be preempted, + which means that a subsequent ``synchronize_rcu()`` really does have to + wait for something, as opposed to simply returning immediately. +@@ -2040,7 +2040,7 @@ The compiler must not be permitted to tr + 5 rcu_read_unlock(); + 6 do_something_with(v, user_v); + +-If the compiler did make this transformation in a ``CONFIG_PREEMPT=n`` kernel ++If the compiler did make this transformation in a ``CONFIG_PREEMPTION=n`` kernel + build, and if ``get_user()`` did page fault, the result would be a quiescent + state in the middle of an RCU read-side critical section. This misplaced + quiescent state could result in line 4 being a use-after-free access, +@@ -2322,7 +2322,7 @@ conjunction with the `-rt + patchset `__. The + real-time-latency response requirements are such that the traditional + approach of disabling preemption across RCU read-side critical sections +-is inappropriate. Kernels built with ``CONFIG_PREEMPT=y`` therefore use ++is inappropriate. Kernels built with ``CONFIG_PREEMPTION=y`` therefore use + an RCU implementation that allows RCU read-side critical sections to be + preempted. This requirement made its presence known after users made it + clear that an earlier `real-time +@@ -2444,7 +2444,7 @@ includes ``rcu_read_lock_bh()``, ``rcu_r + ``call_rcu_bh()``, ``rcu_barrier_bh()``, and + ``rcu_read_lock_bh_held()``. However, the update-side APIs are now + simple wrappers for other RCU flavors, namely RCU-sched in +-CONFIG_PREEMPT=n kernels and RCU-preempt otherwise. ++CONFIG_PREEMPTION=n kernels and RCU-preempt otherwise. + + Sched Flavor (Historical) + ~~~~~~~~~~~~~~~~~~~~~~~~~ +@@ -2462,11 +2462,11 @@ not have this property, given that any p + RCU read-side critical section can be a quiescent state. Therefore, + *RCU-sched* was created, which follows “classic” RCU in that an + RCU-sched grace period waits for pre-existing interrupt and NMI +-handlers. In kernels built with ``CONFIG_PREEMPT=n``, the RCU and ++handlers. In kernels built with ``CONFIG_PREEMPTION=n``, the RCU and + RCU-sched APIs have identical implementations, while kernels built with +-``CONFIG_PREEMPT=y`` provide a separate implementation for each. ++``CONFIG_PREEMPTION=y`` provide a separate implementation for each. + +-Note well that in ``CONFIG_PREEMPT=y`` kernels, ++Note well that in ``CONFIG_PREEMPTION=y`` kernels, + ``rcu_read_lock_sched()`` and ``rcu_read_unlock_sched()`` disable and + re-enable preemption, respectively. This means that if there was a + preemption attempt during the RCU-sched read-side critical section, +@@ -2629,10 +2629,10 @@ userspace execution also delimit tasks-R + + The tasks-RCU API is quite compact, consisting only of + ``call_rcu_tasks()``, ``synchronize_rcu_tasks()``, and +-``rcu_barrier_tasks()``. In ``CONFIG_PREEMPT=n`` kernels, trampolines ++``rcu_barrier_tasks()``. In ``CONFIG_PREEMPTION=n`` kernels, trampolines + cannot be preempted, so these APIs map to ``call_rcu()``, + ``synchronize_rcu()``, and ``rcu_barrier()``, respectively. In +-``CONFIG_PREEMPT=y`` kernels, trampolines can be preempted, and these ++``CONFIG_PREEMPTION=y`` kernels, trampolines can be preempted, and these + three APIs are therefore implemented by separate functions that check + for voluntary context switches. + +--- a/Documentation/RCU/checklist.rst ++++ b/Documentation/RCU/checklist.rst +@@ -214,7 +214,7 @@ over a rather long period of time, but i + the rest of the system. + + 7. As of v4.20, a given kernel implements only one RCU flavor, +- which is RCU-sched for PREEMPT=n and RCU-preempt for PREEMPT=y. ++ which is RCU-sched for PREEMPTION=n and RCU-preempt for PREEMPTION=y. + If the updater uses call_rcu() or synchronize_rcu(), + then the corresponding readers my use rcu_read_lock() and + rcu_read_unlock(), rcu_read_lock_bh() and rcu_read_unlock_bh(), +--- a/Documentation/RCU/rcubarrier.rst ++++ b/Documentation/RCU/rcubarrier.rst +@@ -9,7 +9,7 @@ RCU (read-copy update) is a synchronizat + of as a replacement for read-writer locking (among other things), but with + very low-overhead readers that are immune to deadlock, priority inversion, + and unbounded latency. RCU read-side critical sections are delimited +-by rcu_read_lock() and rcu_read_unlock(), which, in non-CONFIG_PREEMPT ++by rcu_read_lock() and rcu_read_unlock(), which, in non-CONFIG_PREEMPTION + kernels, generate no code whatsoever. + + This means that RCU writers are unaware of the presence of concurrent +@@ -329,10 +329,10 @@ Answer: This cannot happen. The reason i + to smp_call_function() and further to smp_call_function_on_cpu(), + causing this latter to spin until the cross-CPU invocation of + rcu_barrier_func() has completed. This by itself would prevent +- a grace period from completing on non-CONFIG_PREEMPT kernels, ++ a grace period from completing on non-CONFIG_PREEMPTION kernels, + since each CPU must undergo a context switch (or other quiescent + state) before the grace period can complete. However, this is +- of no use in CONFIG_PREEMPT kernels. ++ of no use in CONFIG_PREEMPTION kernels. + + Therefore, on_each_cpu() disables preemption across its call + to smp_call_function() and also across the local call to +--- a/Documentation/RCU/stallwarn.rst ++++ b/Documentation/RCU/stallwarn.rst +@@ -25,7 +25,7 @@ So your kernel printed an RCU CPU stall + + - A CPU looping with bottom halves disabled. + +-- For !CONFIG_PREEMPT kernels, a CPU looping anywhere in the kernel ++- For !CONFIG_PREEMPTION kernels, a CPU looping anywhere in the kernel + without invoking schedule(). If the looping in the kernel is + really expected and desirable behavior, you might need to add + some calls to cond_resched(). +@@ -44,7 +44,7 @@ So your kernel printed an RCU CPU stall + result in the ``rcu_.*kthread starved for`` console-log message, + which will include additional debugging information. + +-- A CPU-bound real-time task in a CONFIG_PREEMPT kernel, which might ++- A CPU-bound real-time task in a CONFIG_PREEMPTION kernel, which might + happen to preempt a low-priority task in the middle of an RCU + read-side critical section. This is especially damaging if + that low-priority task is not permitted to run on any other CPU, +--- a/Documentation/RCU/whatisRCU.rst ++++ b/Documentation/RCU/whatisRCU.rst +@@ -683,7 +683,7 @@ so there can be no deadlock cycle. + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + This section presents a "toy" RCU implementation that is based on + "classic RCU". It is also short on performance (but only for updates) and +-on features such as hotplug CPU and the ability to run in CONFIG_PREEMPT ++on features such as hotplug CPU and the ability to run in CONFIG_PREEMPTION + kernels. The definitions of rcu_dereference() and rcu_assign_pointer() + are the same as those shown in the preceding section, so they are omitted. + :: +@@ -739,7 +739,7 @@ to that data item, so we can safely recl + Quick Quiz #3: + If it is illegal to block in an RCU read-side + critical section, what the heck do you do in +- PREEMPT_RT, where normal spinlocks can block??? ++ CONFIG_PREEMPT_RT, where normal spinlocks can block??? + + :ref:`Answers to Quick Quiz <8_whatisRCU>` + +@@ -1093,7 +1093,7 @@ the right tool for your job. + overhead is **negative**. + + Answer: +- Imagine a single-CPU system with a non-CONFIG_PREEMPT ++ Imagine a single-CPU system with a non-CONFIG_PREEMPTION + kernel where a routing table is used by process-context + code, but can be updated by irq-context code (for example, + by an "ICMP REDIRECT" packet). The usual way of handling +@@ -1120,10 +1120,10 @@ the right tool for your job. + Quick Quiz #3: + If it is illegal to block in an RCU read-side + critical section, what the heck do you do in +- PREEMPT_RT, where normal spinlocks can block??? ++ CONFIG_PREEMPT_RT, where normal spinlocks can block??? + + Answer: +- Just as PREEMPT_RT permits preemption of spinlock ++ Just as CONFIG_PREEMPT_RT permits preemption of spinlock + critical sections, it permits preemption of RCU + read-side critical sections. It also permits + spinlocks blocking while in RCU read-side critical diff --git a/kernel/patches-5.11.x-rt/0015-0001-tracing-Merge-irqflags-preempt-counter.patch b/kernel/patches-5.11.x-rt/0015-0001-tracing-Merge-irqflags-preempt-counter.patch new file mode 100644 index 000000000..394633e8b --- /dev/null +++ b/kernel/patches-5.11.x-rt/0015-0001-tracing-Merge-irqflags-preempt-counter.patch @@ -0,0 +1,1861 @@ +From: Sebastian Andrzej Siewior +Date: Mon, 25 Jan 2021 20:45:08 +0100 +Subject: [PATCH 1/4] tracing: Merge irqflags + preempt counter. +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The state of the interrupts (irqflags) and the preemption counter are +both passed down to tracing_generic_entry_update(). Only one bit of +irqflags is actually required: The on/off state. The complete 32bit +of the preemption counter isn't needed. Just whether of the upper bits +(softirq, hardirq and NMI) are set and the preemption depth is needed. + +The irqflags and the preemption counter could be evaluated early and the +information stored in an integer `trace_ctx'. +tracing_generic_entry_update() would use the upper bits as the +TRACE_FLAG_* and the lower 8bit as the disabled-preemption depth +(considering that one must be substracted from the counter in one +special cases). + +The actual preemption value is not used except for the tracing record. +The `irqflags' variable is mostly used only for the tracing record. An +exception here is for instance wakeup_tracer_call() or +probe_wakeup_sched_switch() which explicilty disable interrupts and use +that `irqflags' to save (and restore) the IRQ state and to record the +state. + +Struct trace_event_buffer has also the `pc' and flags' members which can +be replaced with `trace_ctx' since their actual value is not used +outside of trace recording. + +This will reduce tracing_generic_entry_update() to simply assign values +to struct trace_entry. The evaluation of the TRACE_FLAG_* bits is moved +to _tracing_gen_ctx_flags() which replaces preempt_count() and +local_save_flags() invocations. + +As an example, ftrace_syscall_enter() may invoke: +- trace_buffer_lock_reserve() -> … -> tracing_generic_entry_update() +- event_trigger_unlock_commit() + -> ftrace_trace_stack() -> … -> tracing_generic_entry_update() + -> ftrace_trace_userstack() -> … -> tracing_generic_entry_update() + +In this case the TRACE_FLAG_* bits were evaluated three times. By using +the `trace_ctx' they are evaluated once and assigned three times. + +A build with all tracers enabled on x86-64 with and without the patch: + + text data bss dec hex filename +21970669 17084168 7639260 46694097 2c87ed1 vmlinux.old +21970293 17084168 7639260 46693721 2c87d59 vmlinux.new + +text shrank by 379 bytes, data remained constant. + +Link: https://lkml.kernel.org/r/20210125194511.3924915-2-bigeasy@linutronix.de + +Signed-off-by: Sebastian Andrzej Siewior +Signed-off-by: Steven Rostedt (VMware) +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/trace_events.h | 25 +++- + kernel/trace/blktrace.c | 17 +- + kernel/trace/trace.c | 206 ++++++++++++++++++----------------- + kernel/trace/trace.h | 38 ++---- + kernel/trace/trace_branch.c | 6 - + kernel/trace/trace_event_perf.c | 5 + kernel/trace/trace_events.c | 18 +-- + kernel/trace/trace_events_inject.c | 6 - + kernel/trace/trace_functions.c | 28 ++-- + kernel/trace/trace_functions_graph.c | 32 ++--- + kernel/trace/trace_hwlat.c | 7 - + kernel/trace/trace_irqsoff.c | 86 ++++++-------- + kernel/trace/trace_kprobe.c | 10 - + kernel/trace/trace_mmiotrace.c | 14 +- + kernel/trace/trace_sched_wakeup.c | 71 +++++------- + kernel/trace/trace_syscalls.c | 20 +-- + kernel/trace/trace_uprobe.c | 4 + 17 files changed, 286 insertions(+), 307 deletions(-) + +--- a/include/linux/trace_events.h ++++ b/include/linux/trace_events.h +@@ -148,17 +148,29 @@ enum print_line_t { + + enum print_line_t trace_handle_return(struct trace_seq *s); + +-void tracing_generic_entry_update(struct trace_entry *entry, +- unsigned short type, +- unsigned long flags, +- int pc); ++static inline void tracing_generic_entry_update(struct trace_entry *entry, ++ unsigned short type, ++ unsigned int trace_ctx) ++{ ++ struct task_struct *tsk = current; ++ ++ entry->preempt_count = trace_ctx & 0xff; ++ entry->pid = (tsk) ? tsk->pid : 0; ++ entry->type = type; ++ entry->flags = trace_ctx >> 16; ++} ++ ++unsigned int tracing_gen_ctx_flags(unsigned long irqflags); ++unsigned int tracing_gen_ctx(void); ++unsigned int tracing_gen_ctx_dec(void); ++ + struct trace_event_file; + + struct ring_buffer_event * + trace_event_buffer_lock_reserve(struct trace_buffer **current_buffer, + struct trace_event_file *trace_file, + int type, unsigned long len, +- unsigned long flags, int pc); ++ unsigned int trace_ctx); + + #define TRACE_RECORD_CMDLINE BIT(0) + #define TRACE_RECORD_TGID BIT(1) +@@ -232,8 +244,7 @@ struct trace_event_buffer { + struct ring_buffer_event *event; + struct trace_event_file *trace_file; + void *entry; +- unsigned long flags; +- int pc; ++ unsigned int trace_ctx; + struct pt_regs *regs; + }; + +--- a/kernel/trace/blktrace.c ++++ b/kernel/trace/blktrace.c +@@ -72,17 +72,17 @@ static void trace_note(struct blk_trace + struct blk_io_trace *t; + struct ring_buffer_event *event = NULL; + struct trace_buffer *buffer = NULL; +- int pc = 0; ++ unsigned int trace_ctx = 0; + int cpu = smp_processor_id(); + bool blk_tracer = blk_tracer_enabled; + ssize_t cgid_len = cgid ? sizeof(cgid) : 0; + + if (blk_tracer) { + buffer = blk_tr->array_buffer.buffer; +- pc = preempt_count(); ++ trace_ctx = tracing_gen_ctx_flags(0); + event = trace_buffer_lock_reserve(buffer, TRACE_BLK, + sizeof(*t) + len + cgid_len, +- 0, pc); ++ trace_ctx); + if (!event) + return; + t = ring_buffer_event_data(event); +@@ -107,7 +107,7 @@ static void trace_note(struct blk_trace + memcpy((void *) t + sizeof(*t) + cgid_len, data, len); + + if (blk_tracer) +- trace_buffer_unlock_commit(blk_tr, buffer, event, 0, pc); ++ trace_buffer_unlock_commit(blk_tr, buffer, event, trace_ctx); + } + } + +@@ -222,8 +222,9 @@ static void __blk_add_trace(struct blk_t + struct blk_io_trace *t; + unsigned long flags = 0; + unsigned long *sequence; ++ unsigned int trace_ctx = 0; + pid_t pid; +- int cpu, pc = 0; ++ int cpu; + bool blk_tracer = blk_tracer_enabled; + ssize_t cgid_len = cgid ? sizeof(cgid) : 0; + +@@ -252,10 +253,10 @@ static void __blk_add_trace(struct blk_t + tracing_record_cmdline(current); + + buffer = blk_tr->array_buffer.buffer; +- pc = preempt_count(); ++ trace_ctx = tracing_gen_ctx_flags(0); + event = trace_buffer_lock_reserve(buffer, TRACE_BLK, + sizeof(*t) + pdu_len + cgid_len, +- 0, pc); ++ trace_ctx); + if (!event) + return; + t = ring_buffer_event_data(event); +@@ -301,7 +302,7 @@ static void __blk_add_trace(struct blk_t + memcpy((void *)t + sizeof(*t) + cgid_len, pdu_data, pdu_len); + + if (blk_tracer) { +- trace_buffer_unlock_commit(blk_tr, buffer, event, 0, pc); ++ trace_buffer_unlock_commit(blk_tr, buffer, event, trace_ctx); + return; + } + } +--- a/kernel/trace/trace.c ++++ b/kernel/trace/trace.c +@@ -176,7 +176,7 @@ static union trace_eval_map_item *trace_ + int tracing_set_tracer(struct trace_array *tr, const char *buf); + static void ftrace_trace_userstack(struct trace_array *tr, + struct trace_buffer *buffer, +- unsigned long flags, int pc); ++ unsigned int trace_ctx); + + #define MAX_TRACER_SIZE 100 + static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata; +@@ -905,23 +905,23 @@ static inline void trace_access_lock_ini + + #ifdef CONFIG_STACKTRACE + static void __ftrace_trace_stack(struct trace_buffer *buffer, +- unsigned long flags, +- int skip, int pc, struct pt_regs *regs); ++ unsigned int trace_ctx, ++ int skip, struct pt_regs *regs); + static inline void ftrace_trace_stack(struct trace_array *tr, + struct trace_buffer *buffer, +- unsigned long flags, +- int skip, int pc, struct pt_regs *regs); ++ unsigned int trace_ctx, ++ int skip, struct pt_regs *regs); + + #else + static inline void __ftrace_trace_stack(struct trace_buffer *buffer, +- unsigned long flags, +- int skip, int pc, struct pt_regs *regs) ++ unsigned int trace_ctx, ++ int skip, struct pt_regs *regs) + { + } + static inline void ftrace_trace_stack(struct trace_array *tr, + struct trace_buffer *buffer, +- unsigned long flags, +- int skip, int pc, struct pt_regs *regs) ++ unsigned long trace_ctx, ++ int skip, struct pt_regs *regs) + { + } + +@@ -929,24 +929,24 @@ static inline void ftrace_trace_stack(st + + static __always_inline void + trace_event_setup(struct ring_buffer_event *event, +- int type, unsigned long flags, int pc) ++ int type, unsigned int trace_ctx) + { + struct trace_entry *ent = ring_buffer_event_data(event); + +- tracing_generic_entry_update(ent, type, flags, pc); ++ tracing_generic_entry_update(ent, type, trace_ctx); + } + + static __always_inline struct ring_buffer_event * + __trace_buffer_lock_reserve(struct trace_buffer *buffer, + int type, + unsigned long len, +- unsigned long flags, int pc) ++ unsigned int trace_ctx) + { + struct ring_buffer_event *event; + + event = ring_buffer_lock_reserve(buffer, len); + if (event != NULL) +- trace_event_setup(event, type, flags, pc); ++ trace_event_setup(event, type, trace_ctx); + + return event; + } +@@ -1007,25 +1007,22 @@ int __trace_puts(unsigned long ip, const + struct ring_buffer_event *event; + struct trace_buffer *buffer; + struct print_entry *entry; +- unsigned long irq_flags; ++ unsigned int trace_ctx; + int alloc; +- int pc; + + if (!(global_trace.trace_flags & TRACE_ITER_PRINTK)) + return 0; + +- pc = preempt_count(); +- + if (unlikely(tracing_selftest_running || tracing_disabled)) + return 0; + + alloc = sizeof(*entry) + size + 2; /* possible \n added */ + +- local_save_flags(irq_flags); ++ trace_ctx = tracing_gen_ctx(); + buffer = global_trace.array_buffer.buffer; + ring_buffer_nest_start(buffer); +- event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, +- irq_flags, pc); ++ event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, ++ trace_ctx); + if (!event) { + size = 0; + goto out; +@@ -1044,7 +1041,7 @@ int __trace_puts(unsigned long ip, const + entry->buf[size] = '\0'; + + __buffer_unlock_commit(buffer, event); +- ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL); ++ ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL); + out: + ring_buffer_nest_end(buffer); + return size; +@@ -1061,25 +1058,22 @@ int __trace_bputs(unsigned long ip, cons + struct ring_buffer_event *event; + struct trace_buffer *buffer; + struct bputs_entry *entry; +- unsigned long irq_flags; ++ unsigned int trace_ctx; + int size = sizeof(struct bputs_entry); + int ret = 0; +- int pc; + + if (!(global_trace.trace_flags & TRACE_ITER_PRINTK)) + return 0; + +- pc = preempt_count(); +- + if (unlikely(tracing_selftest_running || tracing_disabled)) + return 0; + +- local_save_flags(irq_flags); ++ trace_ctx = tracing_gen_ctx(); + buffer = global_trace.array_buffer.buffer; + + ring_buffer_nest_start(buffer); + event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size, +- irq_flags, pc); ++ trace_ctx); + if (!event) + goto out; + +@@ -1088,7 +1082,7 @@ int __trace_bputs(unsigned long ip, cons + entry->str = str; + + __buffer_unlock_commit(buffer, event); +- ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL); ++ ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL); + + ret = 1; + out: +@@ -2584,36 +2578,69 @@ enum print_line_t trace_handle_return(st + } + EXPORT_SYMBOL_GPL(trace_handle_return); + +-void +-tracing_generic_entry_update(struct trace_entry *entry, unsigned short type, +- unsigned long flags, int pc) ++unsigned int tracing_gen_ctx_flags(unsigned long irqflags) + { +- struct task_struct *tsk = current; ++ unsigned int trace_flags = 0; ++ unsigned int pc; ++ ++ pc = preempt_count(); + +- entry->preempt_count = pc & 0xff; +- entry->pid = (tsk) ? tsk->pid : 0; +- entry->type = type; +- entry->flags = + #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT +- (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) | ++ if (irqs_disabled_flags(irqflags)) ++ trace_flags |= TRACE_FLAG_IRQS_OFF; + #else +- TRACE_FLAG_IRQS_NOSUPPORT | ++ trace_flags |= TRACE_FLAG_IRQS_NOSUPPORT; + #endif +- ((pc & NMI_MASK ) ? TRACE_FLAG_NMI : 0) | +- ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) | +- ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) | +- (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) | +- (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0); ++ ++ if (pc & NMI_MASK) ++ trace_flags |= TRACE_FLAG_NMI; ++ if (pc & HARDIRQ_MASK) ++ trace_flags |= TRACE_FLAG_HARDIRQ; ++ ++ if (pc & SOFTIRQ_OFFSET) ++ trace_flags |= TRACE_FLAG_SOFTIRQ; ++ ++ if (tif_need_resched()) ++ trace_flags |= TRACE_FLAG_NEED_RESCHED; ++ if (test_preempt_need_resched()) ++ trace_flags |= TRACE_FLAG_PREEMPT_RESCHED; ++ return (trace_flags << 16) | (pc & 0xff); ++} ++ ++unsigned int tracing_gen_ctx(void) ++{ ++ unsigned long irqflags; ++ ++#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT ++ local_save_flags(irqflags); ++#else ++ irqflags = 0; ++#endif ++ return tracing_gen_ctx_flags(irqflags); ++} ++ ++unsigned int tracing_gen_ctx_dec(void) ++{ ++ unsigned int trace_ctx; ++ ++ trace_ctx = tracing_gen_ctx(); ++ ++ /* ++ * Subtract one from the preeption counter if preemption is enabled, ++ * see trace_event_buffer_reserve()for details. ++ */ ++ if (IS_ENABLED(CONFIG_PREEMPTION)) ++ trace_ctx--; ++ return trace_ctx; + } +-EXPORT_SYMBOL_GPL(tracing_generic_entry_update); + + struct ring_buffer_event * + trace_buffer_lock_reserve(struct trace_buffer *buffer, + int type, + unsigned long len, +- unsigned long flags, int pc) ++ unsigned int trace_ctx) + { +- return __trace_buffer_lock_reserve(buffer, type, len, flags, pc); ++ return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx); + } + + DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event); +@@ -2733,7 +2760,7 @@ struct ring_buffer_event * + trace_event_buffer_lock_reserve(struct trace_buffer **current_rb, + struct trace_event_file *trace_file, + int type, unsigned long len, +- unsigned long flags, int pc) ++ unsigned int trace_ctx) + { + struct ring_buffer_event *entry; + int val; +@@ -2746,7 +2773,7 @@ trace_event_buffer_lock_reserve(struct t + /* Try to use the per cpu buffer first */ + val = this_cpu_inc_return(trace_buffered_event_cnt); + if ((len < (PAGE_SIZE - sizeof(*entry))) && val == 1) { +- trace_event_setup(entry, type, flags, pc); ++ trace_event_setup(entry, type, trace_ctx); + entry->array[0] = len; + return entry; + } +@@ -2754,7 +2781,7 @@ trace_event_buffer_lock_reserve(struct t + } + + entry = __trace_buffer_lock_reserve(*current_rb, +- type, len, flags, pc); ++ type, len, trace_ctx); + /* + * If tracing is off, but we have triggers enabled + * we still need to look at the event data. Use the temp_buffer +@@ -2763,8 +2790,8 @@ trace_event_buffer_lock_reserve(struct t + */ + if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) { + *current_rb = temp_buffer; +- entry = __trace_buffer_lock_reserve(*current_rb, +- type, len, flags, pc); ++ entry = __trace_buffer_lock_reserve(*current_rb, type, len, ++ trace_ctx); + } + return entry; + } +@@ -2850,7 +2877,7 @@ void trace_event_buffer_commit(struct tr + ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT); + event_trigger_unlock_commit_regs(fbuffer->trace_file, fbuffer->buffer, + fbuffer->event, fbuffer->entry, +- fbuffer->flags, fbuffer->pc, fbuffer->regs); ++ fbuffer->trace_ctx, fbuffer->regs); + } + EXPORT_SYMBOL_GPL(trace_event_buffer_commit); + +@@ -2866,7 +2893,7 @@ EXPORT_SYMBOL_GPL(trace_event_buffer_com + void trace_buffer_unlock_commit_regs(struct trace_array *tr, + struct trace_buffer *buffer, + struct ring_buffer_event *event, +- unsigned long flags, int pc, ++ unsigned int trace_ctx, + struct pt_regs *regs) + { + __buffer_unlock_commit(buffer, event); +@@ -2877,8 +2904,8 @@ void trace_buffer_unlock_commit_regs(str + * and mmiotrace, but that's ok if they lose a function or + * two. They are not that meaningful. + */ +- ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs); +- ftrace_trace_userstack(tr, buffer, flags, pc); ++ ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs); ++ ftrace_trace_userstack(tr, buffer, trace_ctx); + } + + /* +@@ -2892,9 +2919,8 @@ trace_buffer_unlock_commit_nostack(struc + } + + void +-trace_function(struct trace_array *tr, +- unsigned long ip, unsigned long parent_ip, unsigned long flags, +- int pc) ++trace_function(struct trace_array *tr, unsigned long ip, unsigned long ++ parent_ip, unsigned int trace_ctx) + { + struct trace_event_call *call = &event_function; + struct trace_buffer *buffer = tr->array_buffer.buffer; +@@ -2902,7 +2928,7 @@ trace_function(struct trace_array *tr, + struct ftrace_entry *entry; + + event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry), +- flags, pc); ++ trace_ctx); + if (!event) + return; + entry = ring_buffer_event_data(event); +@@ -2936,8 +2962,8 @@ static DEFINE_PER_CPU(struct ftrace_stac + static DEFINE_PER_CPU(int, ftrace_stack_reserve); + + static void __ftrace_trace_stack(struct trace_buffer *buffer, +- unsigned long flags, +- int skip, int pc, struct pt_regs *regs) ++ unsigned int trace_ctx, ++ int skip, struct pt_regs *regs) + { + struct trace_event_call *call = &event_kernel_stack; + struct ring_buffer_event *event; +@@ -2984,7 +3010,7 @@ static void __ftrace_trace_stack(struct + + size = nr_entries * sizeof(unsigned long); + event = __trace_buffer_lock_reserve(buffer, TRACE_STACK, +- sizeof(*entry) + size, flags, pc); ++ sizeof(*entry) + size, trace_ctx); + if (!event) + goto out; + entry = ring_buffer_event_data(event); +@@ -3005,22 +3031,22 @@ static void __ftrace_trace_stack(struct + + static inline void ftrace_trace_stack(struct trace_array *tr, + struct trace_buffer *buffer, +- unsigned long flags, +- int skip, int pc, struct pt_regs *regs) ++ unsigned int trace_ctx, ++ int skip, struct pt_regs *regs) + { + if (!(tr->trace_flags & TRACE_ITER_STACKTRACE)) + return; + +- __ftrace_trace_stack(buffer, flags, skip, pc, regs); ++ __ftrace_trace_stack(buffer, trace_ctx, skip, regs); + } + +-void __trace_stack(struct trace_array *tr, unsigned long flags, int skip, +- int pc) ++void __trace_stack(struct trace_array *tr, unsigned int trace_ctx, ++ int skip) + { + struct trace_buffer *buffer = tr->array_buffer.buffer; + + if (rcu_is_watching()) { +- __ftrace_trace_stack(buffer, flags, skip, pc, NULL); ++ __ftrace_trace_stack(buffer, trace_ctx, skip, NULL); + return; + } + +@@ -3034,7 +3060,7 @@ void __trace_stack(struct trace_array *t + return; + + rcu_irq_enter_irqson(); +- __ftrace_trace_stack(buffer, flags, skip, pc, NULL); ++ __ftrace_trace_stack(buffer, trace_ctx, skip, NULL); + rcu_irq_exit_irqson(); + } + +@@ -3044,19 +3070,15 @@ void __trace_stack(struct trace_array *t + */ + void trace_dump_stack(int skip) + { +- unsigned long flags; +- + if (tracing_disabled || tracing_selftest_running) + return; + +- local_save_flags(flags); +- + #ifndef CONFIG_UNWINDER_ORC + /* Skip 1 to skip this function. */ + skip++; + #endif + __ftrace_trace_stack(global_trace.array_buffer.buffer, +- flags, skip, preempt_count(), NULL); ++ tracing_gen_ctx(), skip, NULL); + } + EXPORT_SYMBOL_GPL(trace_dump_stack); + +@@ -3065,7 +3087,7 @@ static DEFINE_PER_CPU(int, user_stack_co + + static void + ftrace_trace_userstack(struct trace_array *tr, +- struct trace_buffer *buffer, unsigned long flags, int pc) ++ struct trace_buffer *buffer, unsigned int trace_ctx) + { + struct trace_event_call *call = &event_user_stack; + struct ring_buffer_event *event; +@@ -3092,7 +3114,7 @@ ftrace_trace_userstack(struct trace_arra + __this_cpu_inc(user_stack_count); + + event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK, +- sizeof(*entry), flags, pc); ++ sizeof(*entry), trace_ctx); + if (!event) + goto out_drop_count; + entry = ring_buffer_event_data(event); +@@ -3112,7 +3134,7 @@ ftrace_trace_userstack(struct trace_arra + #else /* CONFIG_USER_STACKTRACE_SUPPORT */ + static void ftrace_trace_userstack(struct trace_array *tr, + struct trace_buffer *buffer, +- unsigned long flags, int pc) ++ unsigned int trace_ctx) + { + } + #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */ +@@ -3242,9 +3264,9 @@ int trace_vbprintk(unsigned long ip, con + struct trace_buffer *buffer; + struct trace_array *tr = &global_trace; + struct bprint_entry *entry; +- unsigned long flags; ++ unsigned int trace_ctx; + char *tbuffer; +- int len = 0, size, pc; ++ int len = 0, size; + + if (unlikely(tracing_selftest_running || tracing_disabled)) + return 0; +@@ -3252,7 +3274,7 @@ int trace_vbprintk(unsigned long ip, con + /* Don't pollute graph traces with trace_vprintk internals */ + pause_graph_tracing(); + +- pc = preempt_count(); ++ trace_ctx = tracing_gen_ctx(); + preempt_disable_notrace(); + + tbuffer = get_trace_buf(); +@@ -3266,12 +3288,11 @@ int trace_vbprintk(unsigned long ip, con + if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0) + goto out_put; + +- local_save_flags(flags); + size = sizeof(*entry) + sizeof(u32) * len; + buffer = tr->array_buffer.buffer; + ring_buffer_nest_start(buffer); + event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size, +- flags, pc); ++ trace_ctx); + if (!event) + goto out; + entry = ring_buffer_event_data(event); +@@ -3281,7 +3302,7 @@ int trace_vbprintk(unsigned long ip, con + memcpy(entry->buf, tbuffer, sizeof(u32) * len); + if (!call_filter_check_discard(call, entry, buffer, event)) { + __buffer_unlock_commit(buffer, event); +- ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL); ++ ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL); + } + + out: +@@ -3304,9 +3325,9 @@ static int + { + struct trace_event_call *call = &event_print; + struct ring_buffer_event *event; +- int len = 0, size, pc; ++ int len = 0, size; + struct print_entry *entry; +- unsigned long flags; ++ unsigned int trace_ctx; + char *tbuffer; + + if (tracing_disabled || tracing_selftest_running) +@@ -3315,7 +3336,7 @@ static int + /* Don't pollute graph traces with trace_vprintk internals */ + pause_graph_tracing(); + +- pc = preempt_count(); ++ trace_ctx = tracing_gen_ctx(); + preempt_disable_notrace(); + + +@@ -3327,11 +3348,10 @@ static int + + len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args); + +- local_save_flags(flags); + size = sizeof(*entry) + len + 1; + ring_buffer_nest_start(buffer); + event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, +- flags, pc); ++ trace_ctx); + if (!event) + goto out; + entry = ring_buffer_event_data(event); +@@ -3340,7 +3360,7 @@ static int + memcpy(&entry->buf, tbuffer, len + 1); + if (!call_filter_check_discard(call, entry, buffer, event)) { + __buffer_unlock_commit(buffer, event); +- ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL); ++ ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL); + } + + out: +@@ -6653,7 +6673,6 @@ tracing_mark_write(struct file *filp, co + enum event_trigger_type tt = ETT_NONE; + struct trace_buffer *buffer; + struct print_entry *entry; +- unsigned long irq_flags; + ssize_t written; + int size; + int len; +@@ -6673,7 +6692,6 @@ tracing_mark_write(struct file *filp, co + + BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE); + +- local_save_flags(irq_flags); + size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */ + + /* If less than "", then make sure we can still add that */ +@@ -6682,7 +6700,7 @@ tracing_mark_write(struct file *filp, co + + buffer = tr->array_buffer.buffer; + event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, +- irq_flags, preempt_count()); ++ tracing_gen_ctx()); + if (unlikely(!event)) + /* Ring buffer disabled, return as if not open for write */ + return -EBADF; +@@ -6734,7 +6752,6 @@ tracing_mark_raw_write(struct file *filp + struct ring_buffer_event *event; + struct trace_buffer *buffer; + struct raw_data_entry *entry; +- unsigned long irq_flags; + ssize_t written; + int size; + int len; +@@ -6756,14 +6773,13 @@ tracing_mark_raw_write(struct file *filp + + BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE); + +- local_save_flags(irq_flags); + size = sizeof(*entry) + cnt; + if (cnt < FAULT_SIZE_ID) + size += FAULT_SIZE_ID - cnt; + + buffer = tr->array_buffer.buffer; + event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size, +- irq_flags, preempt_count()); ++ tracing_gen_ctx()); + if (!event) + /* Ring buffer disabled, return as if not open for write */ + return -EBADF; +--- a/kernel/trace/trace.h ++++ b/kernel/trace/trace.h +@@ -589,8 +589,7 @@ struct ring_buffer_event * + trace_buffer_lock_reserve(struct trace_buffer *buffer, + int type, + unsigned long len, +- unsigned long flags, +- int pc); ++ unsigned int trace_ctx); + + struct trace_entry *tracing_get_trace_entry(struct trace_array *tr, + struct trace_array_cpu *data); +@@ -615,11 +614,11 @@ unsigned long trace_total_entries(struct + void trace_function(struct trace_array *tr, + unsigned long ip, + unsigned long parent_ip, +- unsigned long flags, int pc); ++ unsigned int trace_ctx); + void trace_graph_function(struct trace_array *tr, + unsigned long ip, + unsigned long parent_ip, +- unsigned long flags, int pc); ++ unsigned int trace_ctx); + void trace_latency_header(struct seq_file *m); + void trace_default_header(struct seq_file *m); + void print_trace_header(struct seq_file *m, struct trace_iterator *iter); +@@ -687,11 +686,10 @@ static inline void latency_fsnotify(stru + #endif + + #ifdef CONFIG_STACKTRACE +-void __trace_stack(struct trace_array *tr, unsigned long flags, int skip, +- int pc); ++void __trace_stack(struct trace_array *tr, unsigned int trace_ctx, int skip); + #else +-static inline void __trace_stack(struct trace_array *tr, unsigned long flags, +- int skip, int pc) ++static inline void __trace_stack(struct trace_array *tr, unsigned int trace_ctx, ++ int skip) + { + } + #endif /* CONFIG_STACKTRACE */ +@@ -831,10 +829,10 @@ extern void graph_trace_open(struct trac + extern void graph_trace_close(struct trace_iterator *iter); + extern int __trace_graph_entry(struct trace_array *tr, + struct ftrace_graph_ent *trace, +- unsigned long flags, int pc); ++ unsigned int trace_ctx); + extern void __trace_graph_return(struct trace_array *tr, + struct ftrace_graph_ret *trace, +- unsigned long flags, int pc); ++ unsigned int trace_ctx); + + #ifdef CONFIG_DYNAMIC_FTRACE + extern struct ftrace_hash __rcu *ftrace_graph_hash; +@@ -1297,15 +1295,15 @@ extern int call_filter_check_discard(str + void trace_buffer_unlock_commit_regs(struct trace_array *tr, + struct trace_buffer *buffer, + struct ring_buffer_event *event, +- unsigned long flags, int pc, ++ unsigned int trcace_ctx, + struct pt_regs *regs); + + static inline void trace_buffer_unlock_commit(struct trace_array *tr, + struct trace_buffer *buffer, + struct ring_buffer_event *event, +- unsigned long flags, int pc) ++ unsigned int trace_ctx) + { +- trace_buffer_unlock_commit_regs(tr, buffer, event, flags, pc, NULL); ++ trace_buffer_unlock_commit_regs(tr, buffer, event, trace_ctx, NULL); + } + + DECLARE_PER_CPU(struct ring_buffer_event *, trace_buffered_event); +@@ -1366,8 +1364,7 @@ static inline bool + * @buffer: The ring buffer that the event is being written to + * @event: The event meta data in the ring buffer + * @entry: The event itself +- * @irq_flags: The state of the interrupts at the start of the event +- * @pc: The state of the preempt count at the start of the event. ++ * @trace_ctx: The tracing context flags. + * + * This is a helper function to handle triggers that require data + * from the event itself. It also tests the event against filters and +@@ -1377,12 +1374,12 @@ static inline void + event_trigger_unlock_commit(struct trace_event_file *file, + struct trace_buffer *buffer, + struct ring_buffer_event *event, +- void *entry, unsigned long irq_flags, int pc) ++ void *entry, unsigned int trace_ctx) + { + enum event_trigger_type tt = ETT_NONE; + + if (!__event_trigger_test_discard(file, buffer, event, entry, &tt)) +- trace_buffer_unlock_commit(file->tr, buffer, event, irq_flags, pc); ++ trace_buffer_unlock_commit(file->tr, buffer, event, trace_ctx); + + if (tt) + event_triggers_post_call(file, tt); +@@ -1394,8 +1391,7 @@ event_trigger_unlock_commit(struct trace + * @buffer: The ring buffer that the event is being written to + * @event: The event meta data in the ring buffer + * @entry: The event itself +- * @irq_flags: The state of the interrupts at the start of the event +- * @pc: The state of the preempt count at the start of the event. ++ * @trace_ctx: The tracing context flags. + * + * This is a helper function to handle triggers that require data + * from the event itself. It also tests the event against filters and +@@ -1408,14 +1404,14 @@ static inline void + event_trigger_unlock_commit_regs(struct trace_event_file *file, + struct trace_buffer *buffer, + struct ring_buffer_event *event, +- void *entry, unsigned long irq_flags, int pc, ++ void *entry, unsigned int trace_ctx, + struct pt_regs *regs) + { + enum event_trigger_type tt = ETT_NONE; + + if (!__event_trigger_test_discard(file, buffer, event, entry, &tt)) + trace_buffer_unlock_commit_regs(file->tr, buffer, event, +- irq_flags, pc, regs); ++ trace_ctx, regs); + + if (tt) + event_triggers_post_call(file, tt); +--- a/kernel/trace/trace_branch.c ++++ b/kernel/trace/trace_branch.c +@@ -37,7 +37,7 @@ probe_likely_condition(struct ftrace_lik + struct ring_buffer_event *event; + struct trace_branch *entry; + unsigned long flags; +- int pc; ++ unsigned int trace_ctx; + const char *p; + + if (current->trace_recursion & TRACE_BRANCH_BIT) +@@ -59,10 +59,10 @@ probe_likely_condition(struct ftrace_lik + if (atomic_read(&data->disabled)) + goto out; + +- pc = preempt_count(); ++ trace_ctx = tracing_gen_ctx_flags(flags); + buffer = tr->array_buffer.buffer; + event = trace_buffer_lock_reserve(buffer, TRACE_BRANCH, +- sizeof(*entry), flags, pc); ++ sizeof(*entry), trace_ctx); + if (!event) + goto out; + +--- a/kernel/trace/trace_event_perf.c ++++ b/kernel/trace/trace_event_perf.c +@@ -421,11 +421,8 @@ NOKPROBE_SYMBOL(perf_trace_buf_alloc); + void perf_trace_buf_update(void *record, u16 type) + { + struct trace_entry *entry = record; +- int pc = preempt_count(); +- unsigned long flags; + +- local_save_flags(flags); +- tracing_generic_entry_update(entry, type, flags, pc); ++ tracing_generic_entry_update(entry, type, tracing_gen_ctx()); + } + NOKPROBE_SYMBOL(perf_trace_buf_update); + +--- a/kernel/trace/trace_events.c ++++ b/kernel/trace/trace_events.c +@@ -258,22 +258,19 @@ void *trace_event_buffer_reserve(struct + trace_event_ignore_this_pid(trace_file)) + return NULL; + +- local_save_flags(fbuffer->flags); +- fbuffer->pc = preempt_count(); + /* + * If CONFIG_PREEMPTION is enabled, then the tracepoint itself disables + * preemption (adding one to the preempt_count). Since we are + * interested in the preempt_count at the time the tracepoint was + * hit, we need to subtract one to offset the increment. + */ +- if (IS_ENABLED(CONFIG_PREEMPTION)) +- fbuffer->pc--; ++ fbuffer->trace_ctx = tracing_gen_ctx_dec(); + fbuffer->trace_file = trace_file; + + fbuffer->event = + trace_event_buffer_lock_reserve(&fbuffer->buffer, trace_file, + event_call->event.type, len, +- fbuffer->flags, fbuffer->pc); ++ fbuffer->trace_ctx); + if (!fbuffer->event) + return NULL; + +@@ -3679,12 +3676,11 @@ function_test_events_call(unsigned long + struct trace_buffer *buffer; + struct ring_buffer_event *event; + struct ftrace_entry *entry; +- unsigned long flags; ++ unsigned int trace_ctx; + long disabled; + int cpu; +- int pc; + +- pc = preempt_count(); ++ trace_ctx = tracing_gen_ctx(); + preempt_disable_notrace(); + cpu = raw_smp_processor_id(); + disabled = atomic_inc_return(&per_cpu(ftrace_test_event_disable, cpu)); +@@ -3692,11 +3688,9 @@ function_test_events_call(unsigned long + if (disabled != 1) + goto out; + +- local_save_flags(flags); +- + event = trace_event_buffer_lock_reserve(&buffer, &event_trace_file, + TRACE_FN, sizeof(*entry), +- flags, pc); ++ trace_ctx); + if (!event) + goto out; + entry = ring_buffer_event_data(event); +@@ -3704,7 +3698,7 @@ function_test_events_call(unsigned long + entry->parent_ip = parent_ip; + + event_trigger_unlock_commit(&event_trace_file, buffer, event, +- entry, flags, pc); ++ entry, trace_ctx); + out: + atomic_dec(&per_cpu(ftrace_test_event_disable, cpu)); + preempt_enable_notrace(); +--- a/kernel/trace/trace_events_inject.c ++++ b/kernel/trace/trace_events_inject.c +@@ -192,7 +192,6 @@ static void *trace_alloc_entry(struct tr + static int parse_entry(char *str, struct trace_event_call *call, void **pentry) + { + struct ftrace_event_field *field; +- unsigned long irq_flags; + void *entry = NULL; + int entry_size; + u64 val = 0; +@@ -203,9 +202,8 @@ static int parse_entry(char *str, struct + if (!entry) + return -ENOMEM; + +- local_save_flags(irq_flags); +- tracing_generic_entry_update(entry, call->event.type, irq_flags, +- preempt_count()); ++ tracing_generic_entry_update(entry, call->event.type, ++ tracing_gen_ctx()); + + while ((len = parse_field(str, call, &field, &val)) > 0) { + if (is_function_field(field)) +--- a/kernel/trace/trace_functions.c ++++ b/kernel/trace/trace_functions.c +@@ -132,10 +132,9 @@ function_trace_call(unsigned long ip, un + { + struct trace_array *tr = op->private; + struct trace_array_cpu *data; +- unsigned long flags; ++ unsigned int trace_ctx; + int bit; + int cpu; +- int pc; + + if (unlikely(!tr->function_enabled)) + return; +@@ -144,15 +143,14 @@ function_trace_call(unsigned long ip, un + if (bit < 0) + return; + +- pc = preempt_count(); ++ trace_ctx = tracing_gen_ctx(); + preempt_disable_notrace(); + + cpu = smp_processor_id(); + data = per_cpu_ptr(tr->array_buffer.data, cpu); +- if (!atomic_read(&data->disabled)) { +- local_save_flags(flags); +- trace_function(tr, ip, parent_ip, flags, pc); +- } ++ if (!atomic_read(&data->disabled)) ++ trace_function(tr, ip, parent_ip, trace_ctx); ++ + ftrace_test_recursion_unlock(bit); + preempt_enable_notrace(); + } +@@ -184,7 +182,7 @@ function_stack_trace_call(unsigned long + unsigned long flags; + long disabled; + int cpu; +- int pc; ++ unsigned int trace_ctx; + + if (unlikely(!tr->function_enabled)) + return; +@@ -199,9 +197,9 @@ function_stack_trace_call(unsigned long + disabled = atomic_inc_return(&data->disabled); + + if (likely(disabled == 1)) { +- pc = preempt_count(); +- trace_function(tr, ip, parent_ip, flags, pc); +- __trace_stack(tr, flags, STACK_SKIP, pc); ++ trace_ctx = tracing_gen_ctx_flags(flags); ++ trace_function(tr, ip, parent_ip, trace_ctx); ++ __trace_stack(tr, trace_ctx, STACK_SKIP); + } + + atomic_dec(&data->disabled); +@@ -404,13 +402,11 @@ ftrace_traceoff(unsigned long ip, unsign + + static __always_inline void trace_stack(struct trace_array *tr) + { +- unsigned long flags; +- int pc; ++ unsigned int trace_ctx; + +- local_save_flags(flags); +- pc = preempt_count(); ++ trace_ctx = tracing_gen_ctx(); + +- __trace_stack(tr, flags, FTRACE_STACK_SKIP, pc); ++ __trace_stack(tr, trace_ctx, FTRACE_STACK_SKIP); + } + + static void +--- a/kernel/trace/trace_functions_graph.c ++++ b/kernel/trace/trace_functions_graph.c +@@ -96,8 +96,7 @@ print_graph_duration(struct trace_array + + int __trace_graph_entry(struct trace_array *tr, + struct ftrace_graph_ent *trace, +- unsigned long flags, +- int pc) ++ unsigned int trace_ctx) + { + struct trace_event_call *call = &event_funcgraph_entry; + struct ring_buffer_event *event; +@@ -105,7 +104,7 @@ int __trace_graph_entry(struct trace_arr + struct ftrace_graph_ent_entry *entry; + + event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_ENT, +- sizeof(*entry), flags, pc); ++ sizeof(*entry), trace_ctx); + if (!event) + return 0; + entry = ring_buffer_event_data(event); +@@ -129,10 +128,10 @@ int trace_graph_entry(struct ftrace_grap + struct trace_array *tr = graph_array; + struct trace_array_cpu *data; + unsigned long flags; ++ unsigned int trace_ctx; + long disabled; + int ret; + int cpu; +- int pc; + + if (trace_recursion_test(TRACE_GRAPH_NOTRACE_BIT)) + return 0; +@@ -174,8 +173,8 @@ int trace_graph_entry(struct ftrace_grap + data = per_cpu_ptr(tr->array_buffer.data, cpu); + disabled = atomic_inc_return(&data->disabled); + if (likely(disabled == 1)) { +- pc = preempt_count(); +- ret = __trace_graph_entry(tr, trace, flags, pc); ++ trace_ctx = tracing_gen_ctx_flags(flags); ++ ret = __trace_graph_entry(tr, trace, trace_ctx); + } else { + ret = 0; + } +@@ -188,7 +187,7 @@ int trace_graph_entry(struct ftrace_grap + + static void + __trace_graph_function(struct trace_array *tr, +- unsigned long ip, unsigned long flags, int pc) ++ unsigned long ip, unsigned int trace_ctx) + { + u64 time = trace_clock_local(); + struct ftrace_graph_ent ent = { +@@ -202,22 +201,21 @@ static void + .rettime = time, + }; + +- __trace_graph_entry(tr, &ent, flags, pc); +- __trace_graph_return(tr, &ret, flags, pc); ++ __trace_graph_entry(tr, &ent, trace_ctx); ++ __trace_graph_return(tr, &ret, trace_ctx); + } + + void + trace_graph_function(struct trace_array *tr, + unsigned long ip, unsigned long parent_ip, +- unsigned long flags, int pc) ++ unsigned int trace_ctx) + { +- __trace_graph_function(tr, ip, flags, pc); ++ __trace_graph_function(tr, ip, trace_ctx); + } + + void __trace_graph_return(struct trace_array *tr, + struct ftrace_graph_ret *trace, +- unsigned long flags, +- int pc) ++ unsigned int trace_ctx) + { + struct trace_event_call *call = &event_funcgraph_exit; + struct ring_buffer_event *event; +@@ -225,7 +223,7 @@ void __trace_graph_return(struct trace_a + struct ftrace_graph_ret_entry *entry; + + event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_RET, +- sizeof(*entry), flags, pc); ++ sizeof(*entry), trace_ctx); + if (!event) + return; + entry = ring_buffer_event_data(event); +@@ -239,9 +237,9 @@ void trace_graph_return(struct ftrace_gr + struct trace_array *tr = graph_array; + struct trace_array_cpu *data; + unsigned long flags; ++ unsigned int trace_ctx; + long disabled; + int cpu; +- int pc; + + ftrace_graph_addr_finish(trace); + +@@ -255,8 +253,8 @@ void trace_graph_return(struct ftrace_gr + data = per_cpu_ptr(tr->array_buffer.data, cpu); + disabled = atomic_inc_return(&data->disabled); + if (likely(disabled == 1)) { +- pc = preempt_count(); +- __trace_graph_return(tr, trace, flags, pc); ++ trace_ctx = tracing_gen_ctx_flags(flags); ++ __trace_graph_return(tr, trace, trace_ctx); + } + atomic_dec(&data->disabled); + local_irq_restore(flags); +--- a/kernel/trace/trace_hwlat.c ++++ b/kernel/trace/trace_hwlat.c +@@ -108,14 +108,9 @@ static void trace_hwlat_sample(struct hw + struct trace_buffer *buffer = tr->array_buffer.buffer; + struct ring_buffer_event *event; + struct hwlat_entry *entry; +- unsigned long flags; +- int pc; +- +- pc = preempt_count(); +- local_save_flags(flags); + + event = trace_buffer_lock_reserve(buffer, TRACE_HWLAT, sizeof(*entry), +- flags, pc); ++ tracing_gen_ctx()); + if (!event) + return; + entry = ring_buffer_event_data(event); +--- a/kernel/trace/trace_irqsoff.c ++++ b/kernel/trace/trace_irqsoff.c +@@ -143,11 +143,14 @@ irqsoff_tracer_call(unsigned long ip, un + struct trace_array *tr = irqsoff_trace; + struct trace_array_cpu *data; + unsigned long flags; ++ unsigned int trace_ctx; + + if (!func_prolog_dec(tr, &data, &flags)) + return; + +- trace_function(tr, ip, parent_ip, flags, preempt_count()); ++ trace_ctx = tracing_gen_ctx_flags(flags); ++ ++ trace_function(tr, ip, parent_ip, trace_ctx); + + atomic_dec(&data->disabled); + } +@@ -177,8 +180,8 @@ static int irqsoff_graph_entry(struct ft + struct trace_array *tr = irqsoff_trace; + struct trace_array_cpu *data; + unsigned long flags; ++ unsigned int trace_ctx; + int ret; +- int pc; + + if (ftrace_graph_ignore_func(trace)) + return 0; +@@ -195,8 +198,8 @@ static int irqsoff_graph_entry(struct ft + if (!func_prolog_dec(tr, &data, &flags)) + return 0; + +- pc = preempt_count(); +- ret = __trace_graph_entry(tr, trace, flags, pc); ++ trace_ctx = tracing_gen_ctx_flags(flags); ++ ret = __trace_graph_entry(tr, trace, trace_ctx); + atomic_dec(&data->disabled); + + return ret; +@@ -207,15 +210,15 @@ static void irqsoff_graph_return(struct + struct trace_array *tr = irqsoff_trace; + struct trace_array_cpu *data; + unsigned long flags; +- int pc; ++ unsigned int trace_ctx; + + ftrace_graph_addr_finish(trace); + + if (!func_prolog_dec(tr, &data, &flags)) + return; + +- pc = preempt_count(); +- __trace_graph_return(tr, trace, flags, pc); ++ trace_ctx = tracing_gen_ctx_flags(flags); ++ __trace_graph_return(tr, trace, trace_ctx); + atomic_dec(&data->disabled); + } + +@@ -267,12 +270,12 @@ static void irqsoff_print_header(struct + static void + __trace_function(struct trace_array *tr, + unsigned long ip, unsigned long parent_ip, +- unsigned long flags, int pc) ++ unsigned int trace_ctx) + { + if (is_graph(tr)) +- trace_graph_function(tr, ip, parent_ip, flags, pc); ++ trace_graph_function(tr, ip, parent_ip, trace_ctx); + else +- trace_function(tr, ip, parent_ip, flags, pc); ++ trace_function(tr, ip, parent_ip, trace_ctx); + } + + #else +@@ -322,15 +325,13 @@ check_critical_timing(struct trace_array + { + u64 T0, T1, delta; + unsigned long flags; +- int pc; ++ unsigned int trace_ctx; + + T0 = data->preempt_timestamp; + T1 = ftrace_now(cpu); + delta = T1-T0; + +- local_save_flags(flags); +- +- pc = preempt_count(); ++ trace_ctx = tracing_gen_ctx(); + + if (!report_latency(tr, delta)) + goto out; +@@ -341,9 +342,9 @@ check_critical_timing(struct trace_array + if (!report_latency(tr, delta)) + goto out_unlock; + +- __trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc); ++ __trace_function(tr, CALLER_ADDR0, parent_ip, trace_ctx); + /* Skip 5 functions to get to the irq/preempt enable function */ +- __trace_stack(tr, flags, 5, pc); ++ __trace_stack(tr, trace_ctx, 5); + + if (data->critical_sequence != max_sequence) + goto out_unlock; +@@ -363,16 +364,15 @@ check_critical_timing(struct trace_array + out: + data->critical_sequence = max_sequence; + data->preempt_timestamp = ftrace_now(cpu); +- __trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc); ++ __trace_function(tr, CALLER_ADDR0, parent_ip, trace_ctx); + } + + static nokprobe_inline void +-start_critical_timing(unsigned long ip, unsigned long parent_ip, int pc) ++start_critical_timing(unsigned long ip, unsigned long parent_ip) + { + int cpu; + struct trace_array *tr = irqsoff_trace; + struct trace_array_cpu *data; +- unsigned long flags; + + if (!tracer_enabled || !tracing_is_enabled()) + return; +@@ -393,9 +393,7 @@ start_critical_timing(unsigned long ip, + data->preempt_timestamp = ftrace_now(cpu); + data->critical_start = parent_ip ? : ip; + +- local_save_flags(flags); +- +- __trace_function(tr, ip, parent_ip, flags, pc); ++ __trace_function(tr, ip, parent_ip, tracing_gen_ctx()); + + per_cpu(tracing_cpu, cpu) = 1; + +@@ -403,12 +401,12 @@ start_critical_timing(unsigned long ip, + } + + static nokprobe_inline void +-stop_critical_timing(unsigned long ip, unsigned long parent_ip, int pc) ++stop_critical_timing(unsigned long ip, unsigned long parent_ip) + { + int cpu; + struct trace_array *tr = irqsoff_trace; + struct trace_array_cpu *data; +- unsigned long flags; ++ unsigned int trace_ctx; + + cpu = raw_smp_processor_id(); + /* Always clear the tracing cpu on stopping the trace */ +@@ -428,8 +426,8 @@ stop_critical_timing(unsigned long ip, u + + atomic_inc(&data->disabled); + +- local_save_flags(flags); +- __trace_function(tr, ip, parent_ip, flags, pc); ++ trace_ctx = tracing_gen_ctx(); ++ __trace_function(tr, ip, parent_ip, trace_ctx); + check_critical_timing(tr, data, parent_ip ? : ip, cpu); + data->critical_start = 0; + atomic_dec(&data->disabled); +@@ -438,20 +436,16 @@ stop_critical_timing(unsigned long ip, u + /* start and stop critical timings used to for stoppage (in idle) */ + void start_critical_timings(void) + { +- int pc = preempt_count(); +- +- if (preempt_trace(pc) || irq_trace()) +- start_critical_timing(CALLER_ADDR0, CALLER_ADDR1, pc); ++ if (preempt_trace(preempt_count()) || irq_trace()) ++ start_critical_timing(CALLER_ADDR0, CALLER_ADDR1); + } + EXPORT_SYMBOL_GPL(start_critical_timings); + NOKPROBE_SYMBOL(start_critical_timings); + + void stop_critical_timings(void) + { +- int pc = preempt_count(); +- +- if (preempt_trace(pc) || irq_trace()) +- stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1, pc); ++ if (preempt_trace(preempt_count()) || irq_trace()) ++ stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1); + } + EXPORT_SYMBOL_GPL(stop_critical_timings); + NOKPROBE_SYMBOL(stop_critical_timings); +@@ -613,19 +607,15 @@ static void irqsoff_tracer_stop(struct t + */ + void tracer_hardirqs_on(unsigned long a0, unsigned long a1) + { +- unsigned int pc = preempt_count(); +- +- if (!preempt_trace(pc) && irq_trace()) +- stop_critical_timing(a0, a1, pc); ++ if (!preempt_trace(preempt_count()) && irq_trace()) ++ stop_critical_timing(a0, a1); + } + NOKPROBE_SYMBOL(tracer_hardirqs_on); + + void tracer_hardirqs_off(unsigned long a0, unsigned long a1) + { +- unsigned int pc = preempt_count(); +- +- if (!preempt_trace(pc) && irq_trace()) +- start_critical_timing(a0, a1, pc); ++ if (!preempt_trace(preempt_count()) && irq_trace()) ++ start_critical_timing(a0, a1); + } + NOKPROBE_SYMBOL(tracer_hardirqs_off); + +@@ -665,18 +655,14 @@ static struct tracer irqsoff_tracer __re + #ifdef CONFIG_PREEMPT_TRACER + void tracer_preempt_on(unsigned long a0, unsigned long a1) + { +- int pc = preempt_count(); +- +- if (preempt_trace(pc) && !irq_trace()) +- stop_critical_timing(a0, a1, pc); ++ if (preempt_trace(preempt_count()) && !irq_trace()) ++ stop_critical_timing(a0, a1); + } + + void tracer_preempt_off(unsigned long a0, unsigned long a1) + { +- int pc = preempt_count(); +- +- if (preempt_trace(pc) && !irq_trace()) +- start_critical_timing(a0, a1, pc); ++ if (preempt_trace(preempt_count()) && !irq_trace()) ++ start_critical_timing(a0, a1); + } + + static int preemptoff_tracer_init(struct trace_array *tr) +--- a/kernel/trace/trace_kprobe.c ++++ b/kernel/trace/trace_kprobe.c +@@ -1386,8 +1386,7 @@ static nokprobe_inline void + if (trace_trigger_soft_disabled(trace_file)) + return; + +- local_save_flags(fbuffer.flags); +- fbuffer.pc = preempt_count(); ++ fbuffer.trace_ctx = tracing_gen_ctx(); + fbuffer.trace_file = trace_file; + + dsize = __get_data_size(&tk->tp, regs); +@@ -1396,7 +1395,7 @@ static nokprobe_inline void + trace_event_buffer_lock_reserve(&fbuffer.buffer, trace_file, + call->event.type, + sizeof(*entry) + tk->tp.size + dsize, +- fbuffer.flags, fbuffer.pc); ++ fbuffer.trace_ctx); + if (!fbuffer.event) + return; + +@@ -1434,8 +1433,7 @@ static nokprobe_inline void + if (trace_trigger_soft_disabled(trace_file)) + return; + +- local_save_flags(fbuffer.flags); +- fbuffer.pc = preempt_count(); ++ fbuffer.trace_ctx = tracing_gen_ctx(); + fbuffer.trace_file = trace_file; + + dsize = __get_data_size(&tk->tp, regs); +@@ -1443,7 +1441,7 @@ static nokprobe_inline void + trace_event_buffer_lock_reserve(&fbuffer.buffer, trace_file, + call->event.type, + sizeof(*entry) + tk->tp.size + dsize, +- fbuffer.flags, fbuffer.pc); ++ fbuffer.trace_ctx); + if (!fbuffer.event) + return; + +--- a/kernel/trace/trace_mmiotrace.c ++++ b/kernel/trace/trace_mmiotrace.c +@@ -300,10 +300,11 @@ static void __trace_mmiotrace_rw(struct + struct trace_buffer *buffer = tr->array_buffer.buffer; + struct ring_buffer_event *event; + struct trace_mmiotrace_rw *entry; +- int pc = preempt_count(); ++ unsigned int trace_ctx; + ++ trace_ctx = tracing_gen_ctx_flags(0); + event = trace_buffer_lock_reserve(buffer, TRACE_MMIO_RW, +- sizeof(*entry), 0, pc); ++ sizeof(*entry), trace_ctx); + if (!event) { + atomic_inc(&dropped_count); + return; +@@ -312,7 +313,7 @@ static void __trace_mmiotrace_rw(struct + entry->rw = *rw; + + if (!call_filter_check_discard(call, entry, buffer, event)) +- trace_buffer_unlock_commit(tr, buffer, event, 0, pc); ++ trace_buffer_unlock_commit(tr, buffer, event, trace_ctx); + } + + void mmio_trace_rw(struct mmiotrace_rw *rw) +@@ -330,10 +331,11 @@ static void __trace_mmiotrace_map(struct + struct trace_buffer *buffer = tr->array_buffer.buffer; + struct ring_buffer_event *event; + struct trace_mmiotrace_map *entry; +- int pc = preempt_count(); ++ unsigned int trace_ctx; + ++ trace_ctx = tracing_gen_ctx_flags(0); + event = trace_buffer_lock_reserve(buffer, TRACE_MMIO_MAP, +- sizeof(*entry), 0, pc); ++ sizeof(*entry), trace_ctx); + if (!event) { + atomic_inc(&dropped_count); + return; +@@ -342,7 +344,7 @@ static void __trace_mmiotrace_map(struct + entry->map = *map; + + if (!call_filter_check_discard(call, entry, buffer, event)) +- trace_buffer_unlock_commit(tr, buffer, event, 0, pc); ++ trace_buffer_unlock_commit(tr, buffer, event, trace_ctx); + } + + void mmio_trace_mapping(struct mmiotrace_map *map) +--- a/kernel/trace/trace_sched_wakeup.c ++++ b/kernel/trace/trace_sched_wakeup.c +@@ -67,7 +67,7 @@ static bool function_enabled; + static int + func_prolog_preempt_disable(struct trace_array *tr, + struct trace_array_cpu **data, +- int *pc) ++ unsigned int *trace_ctx) + { + long disabled; + int cpu; +@@ -75,7 +75,7 @@ func_prolog_preempt_disable(struct trace + if (likely(!wakeup_task)) + return 0; + +- *pc = preempt_count(); ++ *trace_ctx = tracing_gen_ctx(); + preempt_disable_notrace(); + + cpu = raw_smp_processor_id(); +@@ -116,8 +116,8 @@ static int wakeup_graph_entry(struct ftr + { + struct trace_array *tr = wakeup_trace; + struct trace_array_cpu *data; +- unsigned long flags; +- int pc, ret = 0; ++ unsigned int trace_ctx; ++ int ret = 0; + + if (ftrace_graph_ignore_func(trace)) + return 0; +@@ -131,11 +131,10 @@ static int wakeup_graph_entry(struct ftr + if (ftrace_graph_notrace_addr(trace->func)) + return 1; + +- if (!func_prolog_preempt_disable(tr, &data, &pc)) ++ if (!func_prolog_preempt_disable(tr, &data, &trace_ctx)) + return 0; + +- local_save_flags(flags); +- ret = __trace_graph_entry(tr, trace, flags, pc); ++ ret = __trace_graph_entry(tr, trace, trace_ctx); + atomic_dec(&data->disabled); + preempt_enable_notrace(); + +@@ -146,16 +145,14 @@ static void wakeup_graph_return(struct f + { + struct trace_array *tr = wakeup_trace; + struct trace_array_cpu *data; +- unsigned long flags; +- int pc; ++ unsigned int trace_ctx; + + ftrace_graph_addr_finish(trace); + +- if (!func_prolog_preempt_disable(tr, &data, &pc)) ++ if (!func_prolog_preempt_disable(tr, &data, &trace_ctx)) + return; + +- local_save_flags(flags); +- __trace_graph_return(tr, trace, flags, pc); ++ __trace_graph_return(tr, trace, trace_ctx); + atomic_dec(&data->disabled); + + preempt_enable_notrace(); +@@ -217,13 +214,13 @@ wakeup_tracer_call(unsigned long ip, uns + struct trace_array *tr = wakeup_trace; + struct trace_array_cpu *data; + unsigned long flags; +- int pc; ++ unsigned int trace_ctx; + +- if (!func_prolog_preempt_disable(tr, &data, &pc)) ++ if (!func_prolog_preempt_disable(tr, &data, &trace_ctx)) + return; + + local_irq_save(flags); +- trace_function(tr, ip, parent_ip, flags, pc); ++ trace_function(tr, ip, parent_ip, trace_ctx); + local_irq_restore(flags); + + atomic_dec(&data->disabled); +@@ -303,12 +300,12 @@ static void wakeup_print_header(struct s + static void + __trace_function(struct trace_array *tr, + unsigned long ip, unsigned long parent_ip, +- unsigned long flags, int pc) ++ unsigned int trace_ctx) + { + if (is_graph(tr)) +- trace_graph_function(tr, ip, parent_ip, flags, pc); ++ trace_graph_function(tr, ip, parent_ip, trace_ctx); + else +- trace_function(tr, ip, parent_ip, flags, pc); ++ trace_function(tr, ip, parent_ip, trace_ctx); + } + + static int wakeup_flag_changed(struct trace_array *tr, u32 mask, int set) +@@ -375,7 +372,7 @@ static void + tracing_sched_switch_trace(struct trace_array *tr, + struct task_struct *prev, + struct task_struct *next, +- unsigned long flags, int pc) ++ unsigned int trace_ctx) + { + struct trace_event_call *call = &event_context_switch; + struct trace_buffer *buffer = tr->array_buffer.buffer; +@@ -383,7 +380,7 @@ tracing_sched_switch_trace(struct trace_ + struct ctx_switch_entry *entry; + + event = trace_buffer_lock_reserve(buffer, TRACE_CTX, +- sizeof(*entry), flags, pc); ++ sizeof(*entry), trace_ctx); + if (!event) + return; + entry = ring_buffer_event_data(event); +@@ -396,14 +393,14 @@ tracing_sched_switch_trace(struct trace_ + entry->next_cpu = task_cpu(next); + + if (!call_filter_check_discard(call, entry, buffer, event)) +- trace_buffer_unlock_commit(tr, buffer, event, flags, pc); ++ trace_buffer_unlock_commit(tr, buffer, event, trace_ctx); + } + + static void + tracing_sched_wakeup_trace(struct trace_array *tr, + struct task_struct *wakee, + struct task_struct *curr, +- unsigned long flags, int pc) ++ unsigned int trace_ctx) + { + struct trace_event_call *call = &event_wakeup; + struct ring_buffer_event *event; +@@ -411,7 +408,7 @@ tracing_sched_wakeup_trace(struct trace_ + struct trace_buffer *buffer = tr->array_buffer.buffer; + + event = trace_buffer_lock_reserve(buffer, TRACE_WAKE, +- sizeof(*entry), flags, pc); ++ sizeof(*entry), trace_ctx); + if (!event) + return; + entry = ring_buffer_event_data(event); +@@ -424,7 +421,7 @@ tracing_sched_wakeup_trace(struct trace_ + entry->next_cpu = task_cpu(wakee); + + if (!call_filter_check_discard(call, entry, buffer, event)) +- trace_buffer_unlock_commit(tr, buffer, event, flags, pc); ++ trace_buffer_unlock_commit(tr, buffer, event, trace_ctx); + } + + static void notrace +@@ -436,7 +433,7 @@ probe_wakeup_sched_switch(void *ignore, + unsigned long flags; + long disabled; + int cpu; +- int pc; ++ unsigned int trace_ctx; + + tracing_record_cmdline(prev); + +@@ -455,8 +452,6 @@ probe_wakeup_sched_switch(void *ignore, + if (next != wakeup_task) + return; + +- pc = preempt_count(); +- + /* disable local data, not wakeup_cpu data */ + cpu = raw_smp_processor_id(); + disabled = atomic_inc_return(&per_cpu_ptr(wakeup_trace->array_buffer.data, cpu)->disabled); +@@ -464,6 +459,8 @@ probe_wakeup_sched_switch(void *ignore, + goto out; + + local_irq_save(flags); ++ trace_ctx = tracing_gen_ctx_flags(flags); ++ + arch_spin_lock(&wakeup_lock); + + /* We could race with grabbing wakeup_lock */ +@@ -473,9 +470,9 @@ probe_wakeup_sched_switch(void *ignore, + /* The task we are waiting for is waking up */ + data = per_cpu_ptr(wakeup_trace->array_buffer.data, wakeup_cpu); + +- __trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, flags, pc); +- tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc); +- __trace_stack(wakeup_trace, flags, 0, pc); ++ __trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, trace_ctx); ++ tracing_sched_switch_trace(wakeup_trace, prev, next, trace_ctx); ++ __trace_stack(wakeup_trace, trace_ctx, 0); + + T0 = data->preempt_timestamp; + T1 = ftrace_now(cpu); +@@ -527,9 +524,8 @@ probe_wakeup(void *ignore, struct task_s + { + struct trace_array_cpu *data; + int cpu = smp_processor_id(); +- unsigned long flags; + long disabled; +- int pc; ++ unsigned int trace_ctx; + + if (likely(!tracer_enabled)) + return; +@@ -550,11 +546,12 @@ probe_wakeup(void *ignore, struct task_s + (!dl_task(p) && (p->prio >= wakeup_prio || p->prio >= current->prio))) + return; + +- pc = preempt_count(); + disabled = atomic_inc_return(&per_cpu_ptr(wakeup_trace->array_buffer.data, cpu)->disabled); + if (unlikely(disabled != 1)) + goto out; + ++ trace_ctx = tracing_gen_ctx(); ++ + /* interrupts should be off from try_to_wake_up */ + arch_spin_lock(&wakeup_lock); + +@@ -581,19 +578,17 @@ probe_wakeup(void *ignore, struct task_s + + wakeup_task = get_task_struct(p); + +- local_save_flags(flags); +- + data = per_cpu_ptr(wakeup_trace->array_buffer.data, wakeup_cpu); + data->preempt_timestamp = ftrace_now(cpu); +- tracing_sched_wakeup_trace(wakeup_trace, p, current, flags, pc); +- __trace_stack(wakeup_trace, flags, 0, pc); ++ tracing_sched_wakeup_trace(wakeup_trace, p, current, trace_ctx); ++ __trace_stack(wakeup_trace, trace_ctx, 0); + + /* + * We must be careful in using CALLER_ADDR2. But since wake_up + * is not called by an assembly function (where as schedule is) + * it should be safe to use it here. + */ +- __trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc); ++ __trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, trace_ctx); + + out_locked: + arch_spin_unlock(&wakeup_lock); +--- a/kernel/trace/trace_syscalls.c ++++ b/kernel/trace/trace_syscalls.c +@@ -298,9 +298,8 @@ static void ftrace_syscall_enter(void *d + struct syscall_metadata *sys_data; + struct ring_buffer_event *event; + struct trace_buffer *buffer; +- unsigned long irq_flags; ++ unsigned int trace_ctx; + unsigned long args[6]; +- int pc; + int syscall_nr; + int size; + +@@ -322,12 +321,11 @@ static void ftrace_syscall_enter(void *d + + size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args; + +- local_save_flags(irq_flags); +- pc = preempt_count(); ++ trace_ctx = tracing_gen_ctx(); + + buffer = tr->array_buffer.buffer; + event = trace_buffer_lock_reserve(buffer, +- sys_data->enter_event->event.type, size, irq_flags, pc); ++ sys_data->enter_event->event.type, size, trace_ctx); + if (!event) + return; + +@@ -337,7 +335,7 @@ static void ftrace_syscall_enter(void *d + memcpy(entry->args, args, sizeof(unsigned long) * sys_data->nb_args); + + event_trigger_unlock_commit(trace_file, buffer, event, entry, +- irq_flags, pc); ++ trace_ctx); + } + + static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) +@@ -348,8 +346,7 @@ static void ftrace_syscall_exit(void *da + struct syscall_metadata *sys_data; + struct ring_buffer_event *event; + struct trace_buffer *buffer; +- unsigned long irq_flags; +- int pc; ++ unsigned int trace_ctx; + int syscall_nr; + + syscall_nr = trace_get_syscall_nr(current, regs); +@@ -368,13 +365,12 @@ static void ftrace_syscall_exit(void *da + if (!sys_data) + return; + +- local_save_flags(irq_flags); +- pc = preempt_count(); ++ trace_ctx = tracing_gen_ctx(); + + buffer = tr->array_buffer.buffer; + event = trace_buffer_lock_reserve(buffer, + sys_data->exit_event->event.type, sizeof(*entry), +- irq_flags, pc); ++ trace_ctx); + if (!event) + return; + +@@ -383,7 +379,7 @@ static void ftrace_syscall_exit(void *da + entry->ret = syscall_get_return_value(current, regs); + + event_trigger_unlock_commit(trace_file, buffer, event, entry, +- irq_flags, pc); ++ trace_ctx); + } + + static int reg_event_syscall_enter(struct trace_event_file *file, +--- a/kernel/trace/trace_uprobe.c ++++ b/kernel/trace/trace_uprobe.c +@@ -961,7 +961,7 @@ static void __uprobe_trace_func(struct t + esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); + size = esize + tu->tp.size + dsize; + event = trace_event_buffer_lock_reserve(&buffer, trace_file, +- call->event.type, size, 0, 0); ++ call->event.type, size, 0); + if (!event) + return; + +@@ -977,7 +977,7 @@ static void __uprobe_trace_func(struct t + + memcpy(data, ucb->buf, tu->tp.size + dsize); + +- event_trigger_unlock_commit(trace_file, buffer, event, entry, 0, 0); ++ event_trigger_unlock_commit(trace_file, buffer, event, entry, 0); + } + + /* uprobe handler */ diff --git a/kernel/patches-5.11.x-rt/0016-0002-tracing-Inline-tracing_gen_ctx_flags.patch b/kernel/patches-5.11.x-rt/0016-0002-tracing-Inline-tracing_gen_ctx_flags.patch new file mode 100644 index 000000000..a178c6cab --- /dev/null +++ b/kernel/patches-5.11.x-rt/0016-0002-tracing-Inline-tracing_gen_ctx_flags.patch @@ -0,0 +1,173 @@ +From: Sebastian Andrzej Siewior +Date: Mon, 25 Jan 2021 20:45:09 +0100 +Subject: [PATCH 2/4] tracing: Inline tracing_gen_ctx_flags() + +Inline tracing_gen_ctx_flags(). This allows to have one ifdef +CONFIG_TRACE_IRQFLAGS_SUPPORT. + +This requires to move `trace_flag_type' so tracing_gen_ctx_flags() can +use it. + +Link: https://lkml.kernel.org/r/20210125194511.3924915-3-bigeasy@linutronix.de + +Suggested-by: Steven Rostedt +Link: https://lkml.kernel.org/r/20210125140323.6b1ff20c@gandalf.local.home +Signed-off-by: Sebastian Andrzej Siewior +Signed-off-by: Steven Rostedt (VMware) +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/trace_events.h | 54 ++++++++++++++++++++++++++++++++++++++++--- + kernel/trace/trace.c | 38 +----------------------------- + kernel/trace/trace.h | 19 --------------- + 3 files changed, 53 insertions(+), 58 deletions(-) + +--- a/include/linux/trace_events.h ++++ b/include/linux/trace_events.h +@@ -160,9 +160,57 @@ static inline void tracing_generic_entry + entry->flags = trace_ctx >> 16; + } + +-unsigned int tracing_gen_ctx_flags(unsigned long irqflags); +-unsigned int tracing_gen_ctx(void); +-unsigned int tracing_gen_ctx_dec(void); ++unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status); ++ ++enum trace_flag_type { ++ TRACE_FLAG_IRQS_OFF = 0x01, ++ TRACE_FLAG_IRQS_NOSUPPORT = 0x02, ++ TRACE_FLAG_NEED_RESCHED = 0x04, ++ TRACE_FLAG_HARDIRQ = 0x08, ++ TRACE_FLAG_SOFTIRQ = 0x10, ++ TRACE_FLAG_PREEMPT_RESCHED = 0x20, ++ TRACE_FLAG_NMI = 0x40, ++}; ++ ++#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT ++static inline unsigned int tracing_gen_ctx_flags(unsigned long irqflags) ++{ ++ unsigned int irq_status = irqs_disabled_flags(irqflags) ? ++ TRACE_FLAG_IRQS_OFF : 0; ++ return tracing_gen_ctx_irq_test(irq_status); ++} ++static inline unsigned int tracing_gen_ctx(void) ++{ ++ unsigned long irqflags; ++ ++ local_save_flags(irqflags); ++ return tracing_gen_ctx_flags(irqflags); ++} ++#else ++ ++static inline unsigned int tracing_gen_ctx_flags(unsigned long irqflags) ++{ ++ return tracing_gen_ctx_irq_test(TRACE_FLAG_IRQS_NOSUPPORT); ++} ++static inline unsigned int tracing_gen_ctx(void) ++{ ++ return tracing_gen_ctx_irq_test(TRACE_FLAG_IRQS_NOSUPPORT); ++} ++#endif ++ ++static inline unsigned int tracing_gen_ctx_dec(void) ++{ ++ unsigned int trace_ctx; ++ ++ trace_ctx = tracing_gen_ctx(); ++ /* ++ * Subtract one from the preeption counter if preemption is enabled, ++ * see trace_event_buffer_reserve()for details. ++ */ ++ if (IS_ENABLED(CONFIG_PREEMPTION)) ++ trace_ctx--; ++ return trace_ctx; ++} + + struct trace_event_file; + +--- a/kernel/trace/trace.c ++++ b/kernel/trace/trace.c +@@ -2578,20 +2578,13 @@ enum print_line_t trace_handle_return(st + } + EXPORT_SYMBOL_GPL(trace_handle_return); + +-unsigned int tracing_gen_ctx_flags(unsigned long irqflags) ++unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status) + { +- unsigned int trace_flags = 0; ++ unsigned int trace_flags = irqs_status; + unsigned int pc; + + pc = preempt_count(); + +-#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT +- if (irqs_disabled_flags(irqflags)) +- trace_flags |= TRACE_FLAG_IRQS_OFF; +-#else +- trace_flags |= TRACE_FLAG_IRQS_NOSUPPORT; +-#endif +- + if (pc & NMI_MASK) + trace_flags |= TRACE_FLAG_NMI; + if (pc & HARDIRQ_MASK) +@@ -2607,33 +2600,6 @@ unsigned int tracing_gen_ctx_flags(unsig + return (trace_flags << 16) | (pc & 0xff); + } + +-unsigned int tracing_gen_ctx(void) +-{ +- unsigned long irqflags; +- +-#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT +- local_save_flags(irqflags); +-#else +- irqflags = 0; +-#endif +- return tracing_gen_ctx_flags(irqflags); +-} +- +-unsigned int tracing_gen_ctx_dec(void) +-{ +- unsigned int trace_ctx; +- +- trace_ctx = tracing_gen_ctx(); +- +- /* +- * Subtract one from the preeption counter if preemption is enabled, +- * see trace_event_buffer_reserve()for details. +- */ +- if (IS_ENABLED(CONFIG_PREEMPTION)) +- trace_ctx--; +- return trace_ctx; +-} +- + struct ring_buffer_event * + trace_buffer_lock_reserve(struct trace_buffer *buffer, + int type, +--- a/kernel/trace/trace.h ++++ b/kernel/trace/trace.h +@@ -136,25 +136,6 @@ struct kretprobe_trace_entry_head { + unsigned long ret_ip; + }; + +-/* +- * trace_flag_type is an enumeration that holds different +- * states when a trace occurs. These are: +- * IRQS_OFF - interrupts were disabled +- * IRQS_NOSUPPORT - arch does not support irqs_disabled_flags +- * NEED_RESCHED - reschedule is requested +- * HARDIRQ - inside an interrupt handler +- * SOFTIRQ - inside a softirq handler +- */ +-enum trace_flag_type { +- TRACE_FLAG_IRQS_OFF = 0x01, +- TRACE_FLAG_IRQS_NOSUPPORT = 0x02, +- TRACE_FLAG_NEED_RESCHED = 0x04, +- TRACE_FLAG_HARDIRQ = 0x08, +- TRACE_FLAG_SOFTIRQ = 0x10, +- TRACE_FLAG_PREEMPT_RESCHED = 0x20, +- TRACE_FLAG_NMI = 0x40, +-}; +- + #define TRACE_BUF_SIZE 1024 + + struct trace_array; diff --git a/kernel/patches-5.11.x-rt/0017-0003-tracing-Use-in_serving_softirq-to-deduct-softirq-sta.patch b/kernel/patches-5.11.x-rt/0017-0003-tracing-Use-in_serving_softirq-to-deduct-softirq-sta.patch new file mode 100644 index 000000000..8bdd06884 --- /dev/null +++ b/kernel/patches-5.11.x-rt/0017-0003-tracing-Use-in_serving_softirq-to-deduct-softirq-sta.patch @@ -0,0 +1,41 @@ +From: Sebastian Andrzej Siewior +Date: Mon, 25 Jan 2021 20:45:10 +0100 +Subject: [PATCH 3/4] tracing: Use in_serving_softirq() to deduct softirq + status. + +PREEMPT_RT does not report "serving softirq" because the tracing core +looks at the preemption counter while PREEMPT_RT does not update it +while processing softirqs in order to remain preemptible. The +information is stored somewhere else. +The in_serving_softirq() macro and the SOFTIRQ_OFFSET define are still +working but not on the preempt-counter. + +Use in_serving_softirq() macro which works on PREEMPT_RT. On !PREEMPT_RT +the compiler (gcc-10 / clang-11) is smart enough to optimize the +in_serving_softirq() related read of the preemption counter away. +The only difference I noticed by using in_serving_softirq() on +!PREEMPT_RT is that gcc-10 implemented tracing_gen_ctx_flags() as +reading FLAG, jmp _tracing_gen_ctx_flags(). Without in_serving_softirq() +it inlined _tracing_gen_ctx_flags() into tracing_gen_ctx_flags(). + +Link: https://lkml.kernel.org/r/20210125194511.3924915-4-bigeasy@linutronix.de + +Signed-off-by: Sebastian Andrzej Siewior +Signed-off-by: Steven Rostedt (VMware) +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/trace/trace.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/kernel/trace/trace.c ++++ b/kernel/trace/trace.c +@@ -2589,8 +2589,7 @@ unsigned int tracing_gen_ctx_irq_test(un + trace_flags |= TRACE_FLAG_NMI; + if (pc & HARDIRQ_MASK) + trace_flags |= TRACE_FLAG_HARDIRQ; +- +- if (pc & SOFTIRQ_OFFSET) ++ if (in_serving_softirq()) + trace_flags |= TRACE_FLAG_SOFTIRQ; + + if (tif_need_resched()) diff --git a/kernel/patches-5.11.x-rt/0018-0004-tracing-Remove-NULL-check-from-current-in-tracing_ge.patch b/kernel/patches-5.11.x-rt/0018-0004-tracing-Remove-NULL-check-from-current-in-tracing_ge.patch new file mode 100644 index 000000000..6ad166ebc --- /dev/null +++ b/kernel/patches-5.11.x-rt/0018-0004-tracing-Remove-NULL-check-from-current-in-tracing_ge.patch @@ -0,0 +1,36 @@ +From: Sebastian Andrzej Siewior +Date: Mon, 25 Jan 2021 20:45:11 +0100 +Subject: [PATCH 4/4] tracing: Remove NULL check from current in + tracing_generic_entry_update(). + +I can't imagine when or why `current' would return a NULL pointer. This +check was added in commit + 72829bc3d63cd ("ftrace: move enums to ftrace.h and make helper function global") + +but it doesn't give me hint why it was needed. + +Assume `current' never returns a NULL pointer and remove the check. + +Link: https://lkml.kernel.org/r/20210125194511.3924915-5-bigeasy@linutronix.de + +Signed-off-by: Sebastian Andrzej Siewior +Signed-off-by: Steven Rostedt (VMware) +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/trace_events.h | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +--- a/include/linux/trace_events.h ++++ b/include/linux/trace_events.h +@@ -152,10 +152,8 @@ static inline void tracing_generic_entry + unsigned short type, + unsigned int trace_ctx) + { +- struct task_struct *tsk = current; +- + entry->preempt_count = trace_ctx & 0xff; +- entry->pid = (tsk) ? tsk->pid : 0; ++ entry->pid = current->pid; + entry->type = type; + entry->flags = trace_ctx >> 16; + } diff --git a/kernel/patches-5.4.x-rt/0083-tpm-remove-tpm_dev_wq_lock.patch b/kernel/patches-5.11.x-rt/0019-tpm-remove-tpm_dev_wq_lock.patch similarity index 100% rename from kernel/patches-5.4.x-rt/0083-tpm-remove-tpm_dev_wq_lock.patch rename to kernel/patches-5.11.x-rt/0019-tpm-remove-tpm_dev_wq_lock.patch diff --git a/kernel/patches-5.11.x-rt/0020-powerpc-mm-Move-the-linear_mapping_mutex-to-the-ifde.patch b/kernel/patches-5.11.x-rt/0020-powerpc-mm-Move-the-linear_mapping_mutex-to-the-ifde.patch new file mode 100644 index 000000000..80f237522 --- /dev/null +++ b/kernel/patches-5.11.x-rt/0020-powerpc-mm-Move-the-linear_mapping_mutex-to-the-ifde.patch @@ -0,0 +1,37 @@ +From: Sebastian Andrzej Siewior +Date: Fri, 19 Feb 2021 17:51:07 +0100 +Subject: [PATCH] powerpc/mm: Move the linear_mapping_mutex to the ifdef where + it is used + +The mutex linear_mapping_mutex is defined at the of the file while its +only two user are within the CONFIG_MEMORY_HOTPLUG block. +A compile without CONFIG_MEMORY_HOTPLUG set fails on PREEMPT_RT because +its mutex implementation is smart enough to realize that it is unused. + +Move the definition of linear_mapping_mutex to ifdef block where it is +used. + +Fixes: 1f73ad3e8d755 ("powerpc/mm: print warning in arch_remove_linear_mapping()") +Signed-off-by: Sebastian Andrzej Siewior +--- + arch/powerpc/mm/mem.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/powerpc/mm/mem.c ++++ b/arch/powerpc/mm/mem.c +@@ -54,7 +54,6 @@ + + #include + +-static DEFINE_MUTEX(linear_mapping_mutex); + unsigned long long memory_limit; + bool init_mem_is_free; + +@@ -72,6 +71,7 @@ pgprot_t phys_mem_access_prot(struct fil + EXPORT_SYMBOL(phys_mem_access_prot); + + #ifdef CONFIG_MEMORY_HOTPLUG ++static DEFINE_MUTEX(linear_mapping_mutex); + + #ifdef CONFIG_NUMA + int memory_add_physaddr_to_nid(u64 start) diff --git a/kernel/patches-5.11.x-rt/0021-0002-printk-limit-second-loop-of-syslog_print_all.patch b/kernel/patches-5.11.x-rt/0021-0002-printk-limit-second-loop-of-syslog_print_all.patch new file mode 100644 index 000000000..af0b0190c --- /dev/null +++ b/kernel/patches-5.11.x-rt/0021-0002-printk-limit-second-loop-of-syslog_print_all.patch @@ -0,0 +1,49 @@ +From: John Ogness +Date: Wed, 17 Feb 2021 16:15:31 +0100 +Subject: [PATCH 02/28] printk: limit second loop of syslog_print_all + +The second loop of syslog_print_all() subtracts lengths that were +added in the first loop. With commit b031a684bfd0 ("printk: remove +logbuf_lock writer-protection of ringbuffer") it is possible that +records are (over)written during syslog_print_all(). This allows the +possibility of the second loop subtracting lengths that were never +added in the first loop. + +This situation can result in syslog_print_all() filling the buffer +starting from a later record, even though there may have been room +to fit the earlier record(s) as well. + +Fixes: b031a684bfd0 ("printk: remove logbuf_lock writer-protection of ringbuffer") +Signed-off-by: John Ogness +Reviewed-by: Petr Mladek +--- + kernel/printk/printk.c | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +--- a/kernel/printk/printk.c ++++ b/kernel/printk/printk.c +@@ -1494,6 +1494,7 @@ static int syslog_print_all(char __user + struct printk_info info; + unsigned int line_count; + struct printk_record r; ++ u64 max_seq; + char *text; + int len = 0; + u64 seq; +@@ -1512,9 +1513,15 @@ static int syslog_print_all(char __user + prb_for_each_info(clear_seq, prb, seq, &info, &line_count) + len += get_record_print_text_size(&info, line_count, true, time); + ++ /* ++ * Set an upper bound for the next loop to avoid subtracting lengths ++ * that were never added. ++ */ ++ max_seq = seq; ++ + /* move first record forward until length fits into the buffer */ + prb_for_each_info(clear_seq, prb, seq, &info, &line_count) { +- if (len <= size) ++ if (len <= size || info.seq >= max_seq) + break; + len -= get_record_print_text_size(&info, line_count, true, time); + } diff --git a/kernel/patches-5.11.x-rt/0022-0003-printk-kmsg_dump-remove-unused-fields.patch b/kernel/patches-5.11.x-rt/0022-0003-printk-kmsg_dump-remove-unused-fields.patch new file mode 100644 index 000000000..1139b97b6 --- /dev/null +++ b/kernel/patches-5.11.x-rt/0022-0003-printk-kmsg_dump-remove-unused-fields.patch @@ -0,0 +1,36 @@ +From: John Ogness +Date: Mon, 21 Dec 2020 11:19:39 +0106 +Subject: [PATCH 03/28] printk: kmsg_dump: remove unused fields + +struct kmsg_dumper still contains some fields that were used to +iterate the old ringbuffer. They are no longer used. Remove them +and update the struct documentation. + +Signed-off-by: John Ogness +Reviewed-by: Petr Mladek +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/kmsg_dump.h | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/include/linux/kmsg_dump.h ++++ b/include/linux/kmsg_dump.h +@@ -36,6 +36,9 @@ enum kmsg_dump_reason { + * through the record iterator + * @max_reason: filter for highest reason number that should be dumped + * @registered: Flag that specifies if this is already registered ++ * @active: Flag that specifies if this is currently dumping ++ * @cur_seq: Points to the oldest message to dump (private) ++ * @next_seq: Points after the newest message to dump (private) + */ + struct kmsg_dumper { + struct list_head list; +@@ -45,8 +48,6 @@ struct kmsg_dumper { + bool registered; + + /* private state of the kmsg iterator */ +- u32 cur_idx; +- u32 next_idx; + u64 cur_seq; + u64 next_seq; + }; diff --git a/kernel/patches-5.11.x-rt/0023-0004-printk-refactor-kmsg_dump_get_buffer.patch b/kernel/patches-5.11.x-rt/0023-0004-printk-refactor-kmsg_dump_get_buffer.patch new file mode 100644 index 000000000..6e99e8108 --- /dev/null +++ b/kernel/patches-5.11.x-rt/0023-0004-printk-refactor-kmsg_dump_get_buffer.patch @@ -0,0 +1,136 @@ +From: John Ogness +Date: Mon, 30 Nov 2020 01:41:56 +0106 +Subject: [PATCH 04/28] printk: refactor kmsg_dump_get_buffer() + +kmsg_dump_get_buffer() requires nearly the same logic as +syslog_print_all(), but uses different variable names and +does not make use of the ringbuffer loop macros. Modify +kmsg_dump_get_buffer() so that the implementation is as similar +to syslog_print_all() as possible. + +A follow-up commit will move this common logic into a +separate helper function. + +Signed-off-by: John Ogness +Reviewed-by: Petr Mladek +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/kmsg_dump.h | 2 - + kernel/printk/printk.c | 60 ++++++++++++++++++++++++---------------------- + 2 files changed, 33 insertions(+), 29 deletions(-) + +--- a/include/linux/kmsg_dump.h ++++ b/include/linux/kmsg_dump.h +@@ -62,7 +62,7 @@ bool kmsg_dump_get_line(struct kmsg_dump + char *line, size_t size, size_t *len); + + bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, +- char *buf, size_t size, size_t *len); ++ char *buf, size_t size, size_t *len_out); + + void kmsg_dump_rewind_nolock(struct kmsg_dumper *dumper); + +--- a/kernel/printk/printk.c ++++ b/kernel/printk/printk.c +@@ -3424,7 +3424,7 @@ EXPORT_SYMBOL_GPL(kmsg_dump_get_line); + * read. + */ + bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, +- char *buf, size_t size, size_t *len) ++ char *buf, size_t size, size_t *len_out) + { + struct printk_info info; + unsigned int line_count; +@@ -3432,12 +3432,10 @@ bool kmsg_dump_get_buffer(struct kmsg_du + unsigned long flags; + u64 seq; + u64 next_seq; +- size_t l = 0; ++ size_t len = 0; + bool ret = false; + bool time = printk_time; + +- prb_rec_init_rd(&r, &info, buf, size); +- + if (!dumper->active || !buf || !size) + goto out; + +@@ -3455,48 +3453,54 @@ bool kmsg_dump_get_buffer(struct kmsg_du + goto out; + } + +- /* calculate length of entire buffer */ +- seq = dumper->cur_seq; +- while (prb_read_valid_info(prb, seq, &info, &line_count)) { +- if (r.info->seq >= dumper->next_seq) ++ /* ++ * Find first record that fits, including all following records, ++ * into the user-provided buffer for this dump. ++ */ ++ ++ prb_for_each_info(dumper->cur_seq, prb, seq, &info, &line_count) { ++ if (info.seq >= dumper->next_seq) + break; +- l += get_record_print_text_size(&info, line_count, syslog, time); +- seq = r.info->seq + 1; ++ len += get_record_print_text_size(&info, line_count, syslog, time); + } + +- /* move first record forward until length fits into the buffer */ +- seq = dumper->cur_seq; +- while (l >= size && prb_read_valid_info(prb, seq, +- &info, &line_count)) { +- if (r.info->seq >= dumper->next_seq) ++ /* ++ * Move first record forward until length fits into the buffer. Ignore ++ * newest messages that were not counted in the above cycle. Messages ++ * might appear and get lost in the meantime. This is the best effort ++ * that prevents an infinite loop. ++ */ ++ prb_for_each_info(dumper->cur_seq, prb, seq, &info, &line_count) { ++ if (len < size || info.seq >= dumper->next_seq) + break; +- l -= get_record_print_text_size(&info, line_count, syslog, time); +- seq = r.info->seq + 1; ++ len -= get_record_print_text_size(&info, line_count, syslog, time); + } + +- /* last message in next interation */ ++ /* ++ * Next kmsg_dump_get_buffer() invocation will dump block of ++ * older records stored right before this one. ++ */ + next_seq = seq; + +- /* actually read text into the buffer now */ +- l = 0; +- while (prb_read_valid(prb, seq, &r)) { ++ prb_rec_init_rd(&r, &info, buf, size); ++ ++ len = 0; ++ prb_for_each_record(seq, prb, seq, &r) { + if (r.info->seq >= dumper->next_seq) + break; + +- l += record_print_text(&r, syslog, time); +- +- /* adjust record to store to remaining buffer space */ +- prb_rec_init_rd(&r, &info, buf + l, size - l); ++ len += record_print_text(&r, syslog, time); + +- seq = r.info->seq + 1; ++ /* Adjust record to store to remaining buffer space. */ ++ prb_rec_init_rd(&r, &info, buf + len, size - len); + } + + dumper->next_seq = next_seq; + ret = true; + logbuf_unlock_irqrestore(flags); + out: +- if (len) +- *len = l; ++ if (len_out) ++ *len_out = len; + return ret; + } + EXPORT_SYMBOL_GPL(kmsg_dump_get_buffer); diff --git a/kernel/patches-5.11.x-rt/0024-0005-printk-consolidate-kmsg_dump_get_buffer-syslog_print.patch b/kernel/patches-5.11.x-rt/0024-0005-printk-consolidate-kmsg_dump_get_buffer-syslog_print.patch new file mode 100644 index 000000000..4a64c2fe1 --- /dev/null +++ b/kernel/patches-5.11.x-rt/0024-0005-printk-consolidate-kmsg_dump_get_buffer-syslog_print.patch @@ -0,0 +1,140 @@ +From: John Ogness +Date: Wed, 13 Jan 2021 11:29:53 +0106 +Subject: [PATCH 05/28] printk: consolidate + kmsg_dump_get_buffer/syslog_print_all code + +The logic for finding records to fit into a buffer is the same for +kmsg_dump_get_buffer() and syslog_print_all(). Introduce a helper +function find_first_fitting_seq() to handle this logic. + +Signed-off-by: John Ogness +--- + kernel/printk/printk.c | 87 ++++++++++++++++++++++++++++--------------------- + 1 file changed, 50 insertions(+), 37 deletions(-) + +--- a/kernel/printk/printk.c ++++ b/kernel/printk/printk.c +@@ -1421,6 +1421,50 @@ static size_t get_record_print_text_size + return ((prefix_len * line_count) + info->text_len + 1); + } + ++/* ++ * Beginning with @start_seq, find the first record where it and all following ++ * records up to (but not including) @max_seq fit into @size. ++ * ++ * @max_seq is simply an upper bound and does not need to exist. If the caller ++ * does not require an upper bound, -1 can be used for @max_seq. ++ */ ++static u64 find_first_fitting_seq(u64 start_seq, u64 max_seq, size_t size, ++ bool syslog, bool time) ++{ ++ struct printk_info info; ++ unsigned int line_count; ++ size_t len = 0; ++ u64 seq; ++ ++ /* Determine the size of the records up to @max_seq. */ ++ prb_for_each_info(start_seq, prb, seq, &info, &line_count) { ++ if (info.seq >= max_seq) ++ break; ++ len += get_record_print_text_size(&info, line_count, syslog, time); ++ } ++ ++ /* ++ * Adjust the upper bound for the next loop to avoid subtracting ++ * lengths that were never added. ++ */ ++ if (seq < max_seq) ++ max_seq = seq; ++ ++ /* ++ * Move first record forward until length fits into the buffer. Ignore ++ * newest messages that were not counted in the above cycle. Messages ++ * might appear and get lost in the meantime. This is a best effort ++ * that prevents an infinite loop that could occur with a retry. ++ */ ++ prb_for_each_info(start_seq, prb, seq, &info, &line_count) { ++ if (len <= size || info.seq >= max_seq) ++ break; ++ len -= get_record_print_text_size(&info, line_count, syslog, time); ++ } ++ ++ return seq; ++} ++ + static int syslog_print(char __user *buf, int size) + { + struct printk_info info; +@@ -1492,9 +1536,7 @@ static int syslog_print(char __user *buf + static int syslog_print_all(char __user *buf, int size, bool clear) + { + struct printk_info info; +- unsigned int line_count; + struct printk_record r; +- u64 max_seq; + char *text; + int len = 0; + u64 seq; +@@ -1510,21 +1552,7 @@ static int syslog_print_all(char __user + * Find first record that fits, including all following records, + * into the user-provided buffer for this dump. + */ +- prb_for_each_info(clear_seq, prb, seq, &info, &line_count) +- len += get_record_print_text_size(&info, line_count, true, time); +- +- /* +- * Set an upper bound for the next loop to avoid subtracting lengths +- * that were never added. +- */ +- max_seq = seq; +- +- /* move first record forward until length fits into the buffer */ +- prb_for_each_info(clear_seq, prb, seq, &info, &line_count) { +- if (len <= size || info.seq >= max_seq) +- break; +- len -= get_record_print_text_size(&info, line_count, true, time); +- } ++ seq = find_first_fitting_seq(clear_seq, -1, size, true, time); + + prb_rec_init_rd(&r, &info, text, LOG_LINE_MAX + PREFIX_MAX); + +@@ -3427,7 +3455,6 @@ bool kmsg_dump_get_buffer(struct kmsg_du + char *buf, size_t size, size_t *len_out) + { + struct printk_info info; +- unsigned int line_count; + struct printk_record r; + unsigned long flags; + u64 seq; +@@ -3455,26 +3482,12 @@ bool kmsg_dump_get_buffer(struct kmsg_du + + /* + * Find first record that fits, including all following records, +- * into the user-provided buffer for this dump. ++ * into the user-provided buffer for this dump. Pass in size-1 ++ * because this function (by way of record_print_text()) will ++ * not write more than size-1 bytes of text into @buf. + */ +- +- prb_for_each_info(dumper->cur_seq, prb, seq, &info, &line_count) { +- if (info.seq >= dumper->next_seq) +- break; +- len += get_record_print_text_size(&info, line_count, syslog, time); +- } +- +- /* +- * Move first record forward until length fits into the buffer. Ignore +- * newest messages that were not counted in the above cycle. Messages +- * might appear and get lost in the meantime. This is the best effort +- * that prevents an infinite loop. +- */ +- prb_for_each_info(dumper->cur_seq, prb, seq, &info, &line_count) { +- if (len < size || info.seq >= dumper->next_seq) +- break; +- len -= get_record_print_text_size(&info, line_count, syslog, time); +- } ++ seq = find_first_fitting_seq(dumper->cur_seq, dumper->next_seq, ++ size - 1, syslog, time); + + /* + * Next kmsg_dump_get_buffer() invocation will dump block of diff --git a/kernel/patches-5.11.x-rt/0025-0006-printk-introduce-CONSOLE_LOG_MAX-for-improved-multi-.patch b/kernel/patches-5.11.x-rt/0025-0006-printk-introduce-CONSOLE_LOG_MAX-for-improved-multi-.patch new file mode 100644 index 000000000..0e0211496 --- /dev/null +++ b/kernel/patches-5.11.x-rt/0025-0006-printk-introduce-CONSOLE_LOG_MAX-for-improved-multi-.patch @@ -0,0 +1,88 @@ +From: John Ogness +Date: Thu, 10 Dec 2020 12:48:01 +0106 +Subject: [PATCH 06/28] printk: introduce CONSOLE_LOG_MAX for improved + multi-line support + +Instead of using "LOG_LINE_MAX + PREFIX_MAX" for temporary buffer +sizes, introduce CONSOLE_LOG_MAX. This represents the maximum size +that is allowed to be printed to the console for a single record. + +Rather than setting CONSOLE_LOG_MAX to "LOG_LINE_MAX + PREFIX_MAX" +(1024), increase it to 4096. With a larger buffer size, multi-line +records that are nearly LOG_LINE_MAX in length will have a better +chance of being fully printed. (When formatting a record for the +console, each line of a multi-line record is prepended with a copy +of the prefix.) + +Signed-off-by: John Ogness +--- + kernel/printk/printk.c | 18 +++++++++++------- + 1 file changed, 11 insertions(+), 7 deletions(-) + +--- a/kernel/printk/printk.c ++++ b/kernel/printk/printk.c +@@ -410,8 +410,13 @@ static u64 clear_seq; + #else + #define PREFIX_MAX 32 + #endif ++ ++/* the maximum size allowed to be reserved for a record */ + #define LOG_LINE_MAX (1024 - PREFIX_MAX) + ++/* the maximum size of a formatted record (i.e. with prefix added per line) */ ++#define CONSOLE_LOG_MAX 4096 ++ + #define LOG_LEVEL(v) ((v) & 0x07) + #define LOG_FACILITY(v) ((v) >> 3 & 0xff) + +@@ -1472,11 +1477,11 @@ static int syslog_print(char __user *buf + char *text; + int len = 0; + +- text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL); ++ text = kmalloc(CONSOLE_LOG_MAX, GFP_KERNEL); + if (!text) + return -ENOMEM; + +- prb_rec_init_rd(&r, &info, text, LOG_LINE_MAX + PREFIX_MAX); ++ prb_rec_init_rd(&r, &info, text, CONSOLE_LOG_MAX); + + while (size > 0) { + size_t n; +@@ -1542,7 +1547,7 @@ static int syslog_print_all(char __user + u64 seq; + bool time; + +- text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL); ++ text = kmalloc(CONSOLE_LOG_MAX, GFP_KERNEL); + if (!text) + return -ENOMEM; + +@@ -1554,7 +1559,7 @@ static int syslog_print_all(char __user + */ + seq = find_first_fitting_seq(clear_seq, -1, size, true, time); + +- prb_rec_init_rd(&r, &info, text, LOG_LINE_MAX + PREFIX_MAX); ++ prb_rec_init_rd(&r, &info, text, CONSOLE_LOG_MAX); + + len = 0; + prb_for_each_record(seq, prb, seq, &r) { +@@ -2187,8 +2192,7 @@ EXPORT_SYMBOL(printk); + + #else /* CONFIG_PRINTK */ + +-#define LOG_LINE_MAX 0 +-#define PREFIX_MAX 0 ++#define CONSOLE_LOG_MAX 0 + #define printk_time false + + #define prb_read_valid(rb, seq, r) false +@@ -2506,7 +2510,7 @@ static inline int can_use_console(void) + void console_unlock(void) + { + static char ext_text[CONSOLE_EXT_LOG_MAX]; +- static char text[LOG_LINE_MAX + PREFIX_MAX]; ++ static char text[CONSOLE_LOG_MAX]; + unsigned long flags; + bool do_cond_resched, retry; + struct printk_info info; diff --git a/kernel/patches-5.11.x-rt/0026-0007-printk-use-seqcount_latch-for-clear_seq.patch b/kernel/patches-5.11.x-rt/0026-0007-printk-use-seqcount_latch-for-clear_seq.patch new file mode 100644 index 000000000..618e5b2fb --- /dev/null +++ b/kernel/patches-5.11.x-rt/0026-0007-printk-use-seqcount_latch-for-clear_seq.patch @@ -0,0 +1,140 @@ +From: John Ogness +Date: Mon, 30 Nov 2020 01:41:58 +0106 +Subject: [PATCH 07/28] printk: use seqcount_latch for clear_seq + +kmsg_dump_rewind_nolock() locklessly reads @clear_seq. However, +this is not done atomically. Since @clear_seq is 64-bit, this +cannot be an atomic operation for all platforms. Therefore, use +a seqcount_latch to allow readers to always read a consistent +value. + +Signed-off-by: John Ogness +Reviewed-by: Petr Mladek +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/printk/printk.c | 58 ++++++++++++++++++++++++++++++++++++++++++------- + 1 file changed, 50 insertions(+), 8 deletions(-) + +--- a/kernel/printk/printk.c ++++ b/kernel/printk/printk.c +@@ -402,8 +402,21 @@ static u64 console_seq; + static u64 exclusive_console_stop_seq; + static unsigned long console_dropped; + +-/* the next printk record to read after the last 'clear' command */ +-static u64 clear_seq; ++struct latched_seq { ++ seqcount_latch_t latch; ++ u64 val[2]; ++}; ++ ++/* ++ * The next printk record to read after the last 'clear' command. There are ++ * two copies (updated with seqcount_latch) so that reads can locklessly ++ * access a valid value. Writers are synchronized by @logbuf_lock. ++ */ ++static struct latched_seq clear_seq = { ++ .latch = SEQCNT_LATCH_ZERO(clear_seq.latch), ++ .val[0] = 0, ++ .val[1] = 0, ++}; + + #ifdef CONFIG_PRINTK_CALLER + #define PREFIX_MAX 48 +@@ -457,6 +470,31 @@ bool printk_percpu_data_ready(void) + return __printk_percpu_data_ready; + } + ++/* Must be called under logbuf_lock. */ ++static void latched_seq_write(struct latched_seq *ls, u64 val) ++{ ++ raw_write_seqcount_latch(&ls->latch); ++ ls->val[0] = val; ++ raw_write_seqcount_latch(&ls->latch); ++ ls->val[1] = val; ++} ++ ++/* Can be called from any context. */ ++static u64 latched_seq_read_nolock(struct latched_seq *ls) ++{ ++ unsigned int seq; ++ unsigned int idx; ++ u64 val; ++ ++ do { ++ seq = raw_read_seqcount_latch(&ls->latch); ++ idx = seq & 0x1; ++ val = ls->val[idx]; ++ } while (read_seqcount_latch_retry(&ls->latch, seq)); ++ ++ return val; ++} ++ + /* Return log buffer address */ + char *log_buf_addr_get(void) + { +@@ -801,7 +839,7 @@ static loff_t devkmsg_llseek(struct file + * like issued by 'dmesg -c'. Reading /dev/kmsg itself + * changes no global state, and does not clear anything. + */ +- user->seq = clear_seq; ++ user->seq = latched_seq_read_nolock(&clear_seq); + break; + case SEEK_END: + /* after the last record */ +@@ -960,6 +998,9 @@ void log_buf_vmcoreinfo_setup(void) + + VMCOREINFO_SIZE(atomic_long_t); + VMCOREINFO_TYPE_OFFSET(atomic_long_t, counter); ++ ++ VMCOREINFO_STRUCT_SIZE(latched_seq); ++ VMCOREINFO_OFFSET(latched_seq, val); + } + #endif + +@@ -1557,7 +1598,8 @@ static int syslog_print_all(char __user + * Find first record that fits, including all following records, + * into the user-provided buffer for this dump. + */ +- seq = find_first_fitting_seq(clear_seq, -1, size, true, time); ++ seq = find_first_fitting_seq(latched_seq_read_nolock(&clear_seq), -1, ++ size, true, time); + + prb_rec_init_rd(&r, &info, text, CONSOLE_LOG_MAX); + +@@ -1584,7 +1626,7 @@ static int syslog_print_all(char __user + } + + if (clear) +- clear_seq = seq; ++ latched_seq_write(&clear_seq, seq); + logbuf_unlock_irq(); + + kfree(text); +@@ -1594,7 +1636,7 @@ static int syslog_print_all(char __user + static void syslog_clear(void) + { + logbuf_lock_irq(); +- clear_seq = prb_next_seq(prb); ++ latched_seq_write(&clear_seq, prb_next_seq(prb)); + logbuf_unlock_irq(); + } + +@@ -3336,7 +3378,7 @@ void kmsg_dump(enum kmsg_dump_reason rea + dumper->active = true; + + logbuf_lock_irqsave(flags); +- dumper->cur_seq = clear_seq; ++ dumper->cur_seq = latched_seq_read_nolock(&clear_seq); + dumper->next_seq = prb_next_seq(prb); + logbuf_unlock_irqrestore(flags); + +@@ -3534,7 +3576,7 @@ EXPORT_SYMBOL_GPL(kmsg_dump_get_buffer); + */ + void kmsg_dump_rewind_nolock(struct kmsg_dumper *dumper) + { +- dumper->cur_seq = clear_seq; ++ dumper->cur_seq = latched_seq_read_nolock(&clear_seq); + dumper->next_seq = prb_next_seq(prb); + } + diff --git a/kernel/patches-5.11.x-rt/0027-0008-printk-use-atomic64_t-for-devkmsg_user.seq.patch b/kernel/patches-5.11.x-rt/0027-0008-printk-use-atomic64_t-for-devkmsg_user.seq.patch new file mode 100644 index 000000000..75cc7499e --- /dev/null +++ b/kernel/patches-5.11.x-rt/0027-0008-printk-use-atomic64_t-for-devkmsg_user.seq.patch @@ -0,0 +1,105 @@ +From: John Ogness +Date: Thu, 10 Dec 2020 15:33:40 +0106 +Subject: [PATCH 08/28] printk: use atomic64_t for devkmsg_user.seq + +@user->seq is indirectly protected by @logbuf_lock. Once @logbuf_lock +is removed, @user->seq will be no longer safe from an atomicity point +of view. + +In preparation for the removal of @logbuf_lock, change it to +atomic64_t to provide this safety. + +Signed-off-by: John Ogness +--- + kernel/printk/printk.c | 22 +++++++++++----------- + 1 file changed, 11 insertions(+), 11 deletions(-) + +--- a/kernel/printk/printk.c ++++ b/kernel/printk/printk.c +@@ -662,7 +662,7 @@ static ssize_t msg_print_ext_body(char * + + /* /dev/kmsg - userspace message inject/listen interface */ + struct devkmsg_user { +- u64 seq; ++ atomic64_t seq; + struct ratelimit_state rs; + struct mutex lock; + char buf[CONSOLE_EXT_LOG_MAX]; +@@ -763,7 +763,7 @@ static ssize_t devkmsg_read(struct file + return ret; + + logbuf_lock_irq(); +- if (!prb_read_valid(prb, user->seq, r)) { ++ if (!prb_read_valid(prb, atomic64_read(&user->seq), r)) { + if (file->f_flags & O_NONBLOCK) { + ret = -EAGAIN; + logbuf_unlock_irq(); +@@ -772,15 +772,15 @@ static ssize_t devkmsg_read(struct file + + logbuf_unlock_irq(); + ret = wait_event_interruptible(log_wait, +- prb_read_valid(prb, user->seq, r)); ++ prb_read_valid(prb, atomic64_read(&user->seq), r)); + if (ret) + goto out; + logbuf_lock_irq(); + } + +- if (r->info->seq != user->seq) { ++ if (r->info->seq != atomic64_read(&user->seq)) { + /* our last seen message is gone, return error and reset */ +- user->seq = r->info->seq; ++ atomic64_set(&user->seq, r->info->seq); + ret = -EPIPE; + logbuf_unlock_irq(); + goto out; +@@ -791,7 +791,7 @@ static ssize_t devkmsg_read(struct file + &r->text_buf[0], r->info->text_len, + &r->info->dev_info); + +- user->seq = r->info->seq + 1; ++ atomic64_set(&user->seq, r->info->seq + 1); + logbuf_unlock_irq(); + + if (len > count) { +@@ -831,7 +831,7 @@ static loff_t devkmsg_llseek(struct file + switch (whence) { + case SEEK_SET: + /* the first record */ +- user->seq = prb_first_valid_seq(prb); ++ atomic64_set(&user->seq, prb_first_valid_seq(prb)); + break; + case SEEK_DATA: + /* +@@ -839,11 +839,11 @@ static loff_t devkmsg_llseek(struct file + * like issued by 'dmesg -c'. Reading /dev/kmsg itself + * changes no global state, and does not clear anything. + */ +- user->seq = latched_seq_read_nolock(&clear_seq); ++ atomic64_set(&user->seq, latched_seq_read_nolock(&clear_seq)); + break; + case SEEK_END: + /* after the last record */ +- user->seq = prb_next_seq(prb); ++ atomic64_set(&user->seq, prb_next_seq(prb)); + break; + default: + ret = -EINVAL; +@@ -866,7 +866,7 @@ static __poll_t devkmsg_poll(struct file + logbuf_lock_irq(); + if (prb_read_valid_info(prb, user->seq, &info, NULL)) { + /* return error when data has vanished underneath us */ +- if (info.seq != user->seq) ++ if (info.seq != atomic64_read(&user->seq)) + ret = EPOLLIN|EPOLLRDNORM|EPOLLERR|EPOLLPRI; + else + ret = EPOLLIN|EPOLLRDNORM; +@@ -905,7 +905,7 @@ static int devkmsg_open(struct inode *in + &user->text_buf[0], sizeof(user->text_buf)); + + logbuf_lock_irq(); +- user->seq = prb_first_valid_seq(prb); ++ atomic64_set(&user->seq, prb_first_valid_seq(prb)); + logbuf_unlock_irq(); + + file->private_data = user; diff --git a/kernel/patches-5.11.x-rt/0028-0009-printk-add-syslog_lock.patch b/kernel/patches-5.11.x-rt/0028-0009-printk-add-syslog_lock.patch new file mode 100644 index 000000000..495230980 --- /dev/null +++ b/kernel/patches-5.11.x-rt/0028-0009-printk-add-syslog_lock.patch @@ -0,0 +1,152 @@ +From: John Ogness +Date: Thu, 10 Dec 2020 16:58:02 +0106 +Subject: [PATCH 09/28] printk: add syslog_lock + +The global variables @syslog_seq, @syslog_partial, @syslog_time +and write access to @clear_seq are protected by @logbuf_lock. +Once @logbuf_lock is removed, these variables will need their +own synchronization method. Introduce @syslog_lock for this +purpose. + +@syslog_lock is a raw_spin_lock for now. This simplifies the +transition to removing @logbuf_lock. Once @logbuf_lock and the +safe buffers are removed, @syslog_lock can change to spin_lock. + +Signed-off-by: John Ogness +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/printk/printk.c | 41 +++++++++++++++++++++++++++++++++++++---- + 1 file changed, 37 insertions(+), 4 deletions(-) + +--- a/kernel/printk/printk.c ++++ b/kernel/printk/printk.c +@@ -390,8 +390,12 @@ DEFINE_RAW_SPINLOCK(logbuf_lock); + printk_safe_exit_irqrestore(flags); \ + } while (0) + ++/* syslog_lock protects syslog_* variables and write access to clear_seq. */ ++static DEFINE_RAW_SPINLOCK(syslog_lock); ++ + #ifdef CONFIG_PRINTK + DECLARE_WAIT_QUEUE_HEAD(log_wait); ++/* All 3 protected by @syslog_lock. */ + /* the next printk record to read by syslog(READ) or /proc/kmsg */ + static u64 syslog_seq; + static size_t syslog_partial; +@@ -410,7 +414,7 @@ struct latched_seq { + /* + * The next printk record to read after the last 'clear' command. There are + * two copies (updated with seqcount_latch) so that reads can locklessly +- * access a valid value. Writers are synchronized by @logbuf_lock. ++ * access a valid value. Writers are synchronized by @syslog_lock. + */ + static struct latched_seq clear_seq = { + .latch = SEQCNT_LATCH_ZERO(clear_seq.latch), +@@ -470,7 +474,7 @@ bool printk_percpu_data_ready(void) + return __printk_percpu_data_ready; + } + +-/* Must be called under logbuf_lock. */ ++/* Must be called under syslog_lock. */ + static void latched_seq_write(struct latched_seq *ls, u64 val) + { + raw_write_seqcount_latch(&ls->latch); +@@ -1529,7 +1533,9 @@ static int syslog_print(char __user *buf + size_t skip; + + logbuf_lock_irq(); ++ raw_spin_lock(&syslog_lock); + if (!prb_read_valid(prb, syslog_seq, &r)) { ++ raw_spin_unlock(&syslog_lock); + logbuf_unlock_irq(); + break; + } +@@ -1559,6 +1565,7 @@ static int syslog_print(char __user *buf + syslog_partial += n; + } else + n = 0; ++ raw_spin_unlock(&syslog_lock); + logbuf_unlock_irq(); + + if (!n) +@@ -1625,8 +1632,11 @@ static int syslog_print_all(char __user + break; + } + +- if (clear) ++ if (clear) { ++ raw_spin_lock(&syslog_lock); + latched_seq_write(&clear_seq, seq); ++ raw_spin_unlock(&syslog_lock); ++ } + logbuf_unlock_irq(); + + kfree(text); +@@ -1636,10 +1646,24 @@ static int syslog_print_all(char __user + static void syslog_clear(void) + { + logbuf_lock_irq(); ++ raw_spin_lock(&syslog_lock); + latched_seq_write(&clear_seq, prb_next_seq(prb)); ++ raw_spin_unlock(&syslog_lock); + logbuf_unlock_irq(); + } + ++/* Return a consistent copy of @syslog_seq. */ ++static u64 read_syslog_seq_irq(void) ++{ ++ u64 seq; ++ ++ raw_spin_lock_irq(&syslog_lock); ++ seq = syslog_seq; ++ raw_spin_unlock_irq(&syslog_lock); ++ ++ return seq; ++} ++ + int do_syslog(int type, char __user *buf, int len, int source) + { + struct printk_info info; +@@ -1663,8 +1687,9 @@ int do_syslog(int type, char __user *buf + return 0; + if (!access_ok(buf, len)) + return -EFAULT; ++ + error = wait_event_interruptible(log_wait, +- prb_read_valid(prb, syslog_seq, NULL)); ++ prb_read_valid(prb, read_syslog_seq_irq(), NULL)); + if (error) + return error; + error = syslog_print(buf, len); +@@ -1713,8 +1738,10 @@ int do_syslog(int type, char __user *buf + /* Number of chars in the log buffer */ + case SYSLOG_ACTION_SIZE_UNREAD: + logbuf_lock_irq(); ++ raw_spin_lock(&syslog_lock); + if (!prb_read_valid_info(prb, syslog_seq, &info, NULL)) { + /* No unread messages. */ ++ raw_spin_unlock(&syslog_lock); + logbuf_unlock_irq(); + return 0; + } +@@ -1743,6 +1770,7 @@ int do_syslog(int type, char __user *buf + } + error -= syslog_partial; + } ++ raw_spin_unlock(&syslog_lock); + logbuf_unlock_irq(); + break; + /* Size of the log buffer */ +@@ -2992,7 +3020,12 @@ void register_console(struct console *ne + */ + exclusive_console = newcon; + exclusive_console_stop_seq = console_seq; ++ ++ /* Get a consistent copy of @syslog_seq. */ ++ raw_spin_lock(&syslog_lock); + console_seq = syslog_seq; ++ raw_spin_unlock(&syslog_lock); ++ + logbuf_unlock_irqrestore(flags); + } + console_unlock(); diff --git a/kernel/patches-5.11.x-rt/0029-0010-printk-introduce-a-kmsg_dump-iterator.patch b/kernel/patches-5.11.x-rt/0029-0010-printk-introduce-a-kmsg_dump-iterator.patch new file mode 100644 index 000000000..d2017fb55 --- /dev/null +++ b/kernel/patches-5.11.x-rt/0029-0010-printk-introduce-a-kmsg_dump-iterator.patch @@ -0,0 +1,535 @@ +From: John Ogness +Date: Fri, 18 Dec 2020 11:40:08 +0000 +Subject: [PATCH 10/28] printk: introduce a kmsg_dump iterator + +Rather than store the iterator information into the registered +kmsg_dump structure, create a separate iterator structure. The +kmsg_dump_iter structure can reside on the stack of the caller, +thus allowing lockless use of the kmsg_dump functions. + +This is in preparation for removal of @logbuf_lock. + +Signed-off-by: John Ogness +Signed-off-by: Sebastian Andrzej Siewior +--- + arch/powerpc/kernel/nvram_64.c | 12 +++-- + arch/powerpc/platforms/powernv/opal-kmsg.c | 3 - + arch/powerpc/xmon/xmon.c | 6 +- + arch/um/kernel/kmsg_dump.c | 5 +- + drivers/hv/vmbus_drv.c | 5 +- + drivers/mtd/mtdoops.c | 5 +- + fs/pstore/platform.c | 5 +- + include/linux/kmsg_dump.h | 43 ++++++++++--------- + kernel/debug/kdb/kdb_main.c | 10 ++-- + kernel/printk/printk.c | 65 +++++++++++++---------------- + 10 files changed, 84 insertions(+), 75 deletions(-) + +--- a/arch/powerpc/kernel/nvram_64.c ++++ b/arch/powerpc/kernel/nvram_64.c +@@ -73,7 +73,8 @@ static const char *nvram_os_partitions[] + }; + + static void oops_to_nvram(struct kmsg_dumper *dumper, +- enum kmsg_dump_reason reason); ++ enum kmsg_dump_reason reason, ++ struct kmsg_dumper_iter *iter); + + static struct kmsg_dumper nvram_kmsg_dumper = { + .dump = oops_to_nvram +@@ -643,7 +644,8 @@ void __init nvram_init_oops_partition(in + * partition. If that's too much, go back and capture uncompressed text. + */ + static void oops_to_nvram(struct kmsg_dumper *dumper, +- enum kmsg_dump_reason reason) ++ enum kmsg_dump_reason reason, ++ struct kmsg_dumper_iter *iter) + { + struct oops_log_info *oops_hdr = (struct oops_log_info *)oops_buf; + static unsigned int oops_count = 0; +@@ -681,13 +683,13 @@ static void oops_to_nvram(struct kmsg_du + return; + + if (big_oops_buf) { +- kmsg_dump_get_buffer(dumper, false, ++ kmsg_dump_get_buffer(iter, false, + big_oops_buf, big_oops_buf_sz, &text_len); + rc = zip_oops(text_len); + } + if (rc != 0) { +- kmsg_dump_rewind(dumper); +- kmsg_dump_get_buffer(dumper, false, ++ kmsg_dump_rewind(iter); ++ kmsg_dump_get_buffer(iter, false, + oops_data, oops_data_sz, &text_len); + err_type = ERR_TYPE_KERNEL_PANIC; + oops_hdr->version = cpu_to_be16(OOPS_HDR_VERSION); +--- a/arch/powerpc/platforms/powernv/opal-kmsg.c ++++ b/arch/powerpc/platforms/powernv/opal-kmsg.c +@@ -20,7 +20,8 @@ + * message, it just ensures that OPAL completely flushes the console buffer. + */ + static void kmsg_dump_opal_console_flush(struct kmsg_dumper *dumper, +- enum kmsg_dump_reason reason) ++ enum kmsg_dump_reason reason, ++ struct kmsg_dumper_iter *iter) + { + /* + * Outside of a panic context the pollers will continue to run, +--- a/arch/powerpc/xmon/xmon.c ++++ b/arch/powerpc/xmon/xmon.c +@@ -3005,7 +3005,7 @@ print_address(unsigned long addr) + static void + dump_log_buf(void) + { +- struct kmsg_dumper dumper = { .active = 1 }; ++ struct kmsg_dumper_iter iter = { .active = 1 }; + unsigned char buf[128]; + size_t len; + +@@ -3017,9 +3017,9 @@ dump_log_buf(void) + catch_memory_errors = 1; + sync(); + +- kmsg_dump_rewind_nolock(&dumper); ++ kmsg_dump_rewind_nolock(&iter); + xmon_start_pagination(); +- while (kmsg_dump_get_line_nolock(&dumper, false, buf, sizeof(buf), &len)) { ++ while (kmsg_dump_get_line_nolock(&iter, false, buf, sizeof(buf), &len)) { + buf[len] = '\0'; + printf("%s", buf); + } +--- a/arch/um/kernel/kmsg_dump.c ++++ b/arch/um/kernel/kmsg_dump.c +@@ -7,7 +7,8 @@ + #include + + static void kmsg_dumper_stdout(struct kmsg_dumper *dumper, +- enum kmsg_dump_reason reason) ++ enum kmsg_dump_reason reason, ++ struct kmsg_dumper_iter *iter) + { + static char line[1024]; + struct console *con; +@@ -30,7 +31,7 @@ static void kmsg_dumper_stdout(struct km + return; + + printf("kmsg_dump:\n"); +- while (kmsg_dump_get_line(dumper, true, line, sizeof(line), &len)) { ++ while (kmsg_dump_get_line(iter, true, line, sizeof(line), &len)) { + line[len] = '\0'; + printf("%s", line); + } +--- a/drivers/hv/vmbus_drv.c ++++ b/drivers/hv/vmbus_drv.c +@@ -1362,7 +1362,8 @@ static void vmbus_isr(void) + * buffer and call into Hyper-V to transfer the data. + */ + static void hv_kmsg_dump(struct kmsg_dumper *dumper, +- enum kmsg_dump_reason reason) ++ enum kmsg_dump_reason reason, ++ struct kmsg_dumper_iter *iter) + { + size_t bytes_written; + phys_addr_t panic_pa; +@@ -1377,7 +1378,7 @@ static void hv_kmsg_dump(struct kmsg_dum + * Write dump contents to the page. No need to synchronize; panic should + * be single-threaded. + */ +- kmsg_dump_get_buffer(dumper, false, hv_panic_page, HV_HYP_PAGE_SIZE, ++ kmsg_dump_get_buffer(iter, false, hv_panic_page, HV_HYP_PAGE_SIZE, + &bytes_written); + if (bytes_written) + hyperv_report_panic_msg(panic_pa, bytes_written); +--- a/drivers/mtd/mtdoops.c ++++ b/drivers/mtd/mtdoops.c +@@ -267,7 +267,8 @@ static void find_next_position(struct mt + } + + static void mtdoops_do_dump(struct kmsg_dumper *dumper, +- enum kmsg_dump_reason reason) ++ enum kmsg_dump_reason reason, ++ struct kmsg_dumper_iter *iter) + { + struct mtdoops_context *cxt = container_of(dumper, + struct mtdoops_context, dump); +@@ -276,7 +277,7 @@ static void mtdoops_do_dump(struct kmsg_ + if (reason == KMSG_DUMP_OOPS && !dump_oops) + return; + +- kmsg_dump_get_buffer(dumper, true, cxt->oops_buf + MTDOOPS_HEADER_SIZE, ++ kmsg_dump_get_buffer(iter, true, cxt->oops_buf + MTDOOPS_HEADER_SIZE, + record_size - MTDOOPS_HEADER_SIZE, NULL); + + if (reason != KMSG_DUMP_OOPS) { +--- a/fs/pstore/platform.c ++++ b/fs/pstore/platform.c +@@ -383,7 +383,8 @@ void pstore_record_init(struct pstore_re + * end of the buffer. + */ + static void pstore_dump(struct kmsg_dumper *dumper, +- enum kmsg_dump_reason reason) ++ enum kmsg_dump_reason reason, ++ struct kmsg_dumper_iter *iter) + { + unsigned long total = 0; + const char *why; +@@ -435,7 +436,7 @@ static void pstore_dump(struct kmsg_dump + dst_size -= header_size; + + /* Write dump contents. */ +- if (!kmsg_dump_get_buffer(dumper, true, dst + header_size, ++ if (!kmsg_dump_get_buffer(iter, true, dst + header_size, + dst_size, &dump_size)) + break; + +--- a/include/linux/kmsg_dump.h ++++ b/include/linux/kmsg_dump.h +@@ -30,43 +30,48 @@ enum kmsg_dump_reason { + }; + + /** ++ * struct kmsg_dumper_iter - iterator for kernel crash message dumper ++ * @active: Flag that specifies if this is currently dumping ++ * @cur_seq: Points to the oldest message to dump (private) ++ * @next_seq: Points after the newest message to dump (private) ++ */ ++struct kmsg_dumper_iter { ++ bool active; ++ u64 cur_seq; ++ u64 next_seq; ++}; ++ ++/** + * struct kmsg_dumper - kernel crash message dumper structure + * @list: Entry in the dumper list (private) + * @dump: Call into dumping code which will retrieve the data with + * through the record iterator + * @max_reason: filter for highest reason number that should be dumped + * @registered: Flag that specifies if this is already registered +- * @active: Flag that specifies if this is currently dumping +- * @cur_seq: Points to the oldest message to dump (private) +- * @next_seq: Points after the newest message to dump (private) + */ + struct kmsg_dumper { + struct list_head list; +- void (*dump)(struct kmsg_dumper *dumper, enum kmsg_dump_reason reason); ++ void (*dump)(struct kmsg_dumper *dumper, enum kmsg_dump_reason reason, ++ struct kmsg_dumper_iter *iter); + enum kmsg_dump_reason max_reason; +- bool active; + bool registered; +- +- /* private state of the kmsg iterator */ +- u64 cur_seq; +- u64 next_seq; + }; + + #ifdef CONFIG_PRINTK + void kmsg_dump(enum kmsg_dump_reason reason); + +-bool kmsg_dump_get_line_nolock(struct kmsg_dumper *dumper, bool syslog, ++bool kmsg_dump_get_line_nolock(struct kmsg_dumper_iter *iter, bool syslog, + char *line, size_t size, size_t *len); + +-bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog, ++bool kmsg_dump_get_line(struct kmsg_dumper_iter *iter, bool syslog, + char *line, size_t size, size_t *len); + +-bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, ++bool kmsg_dump_get_buffer(struct kmsg_dumper_iter *iter, bool syslog, + char *buf, size_t size, size_t *len_out); + +-void kmsg_dump_rewind_nolock(struct kmsg_dumper *dumper); ++void kmsg_dump_rewind_nolock(struct kmsg_dumper_iter *iter); + +-void kmsg_dump_rewind(struct kmsg_dumper *dumper); ++void kmsg_dump_rewind(struct kmsg_dumper_iter *dumper_iter); + + int kmsg_dump_register(struct kmsg_dumper *dumper); + +@@ -78,30 +83,30 @@ static inline void kmsg_dump(enum kmsg_d + { + } + +-static inline bool kmsg_dump_get_line_nolock(struct kmsg_dumper *dumper, ++static inline bool kmsg_dump_get_line_nolock(struct kmsg_dumper_iter *iter, + bool syslog, const char *line, + size_t size, size_t *len) + { + return false; + } + +-static inline bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog, ++static inline bool kmsg_dump_get_line(struct kmsg_dumper_iter *iter, bool syslog, + const char *line, size_t size, size_t *len) + { + return false; + } + +-static inline bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, ++static inline bool kmsg_dump_get_buffer(struct kmsg_dumper_iter *iter, bool syslog, + char *buf, size_t size, size_t *len) + { + return false; + } + +-static inline void kmsg_dump_rewind_nolock(struct kmsg_dumper *dumper) ++static inline void kmsg_dump_rewind_nolock(struct kmsg_dumper_iter *iter) + { + } + +-static inline void kmsg_dump_rewind(struct kmsg_dumper *dumper) ++static inline void kmsg_dump_rewind(struct kmsg_dumper_iter *iter) + { + } + +--- a/kernel/debug/kdb/kdb_main.c ++++ b/kernel/debug/kdb/kdb_main.c +@@ -2101,7 +2101,7 @@ static int kdb_dmesg(int argc, const cha + int adjust = 0; + int n = 0; + int skip = 0; +- struct kmsg_dumper dumper = { .active = 1 }; ++ struct kmsg_dumper_iter iter = { .active = 1 }; + size_t len; + char buf[201]; + +@@ -2126,8 +2126,8 @@ static int kdb_dmesg(int argc, const cha + kdb_set(2, setargs); + } + +- kmsg_dump_rewind_nolock(&dumper); +- while (kmsg_dump_get_line_nolock(&dumper, 1, NULL, 0, NULL)) ++ kmsg_dump_rewind_nolock(&iter); ++ while (kmsg_dump_get_line_nolock(&iter, 1, NULL, 0, NULL)) + n++; + + if (lines < 0) { +@@ -2159,8 +2159,8 @@ static int kdb_dmesg(int argc, const cha + if (skip >= n || skip < 0) + return 0; + +- kmsg_dump_rewind_nolock(&dumper); +- while (kmsg_dump_get_line_nolock(&dumper, 1, buf, sizeof(buf), &len)) { ++ kmsg_dump_rewind_nolock(&iter); ++ while (kmsg_dump_get_line_nolock(&iter, 1, buf, sizeof(buf), &len)) { + if (skip) { + skip--; + continue; +--- a/kernel/printk/printk.c ++++ b/kernel/printk/printk.c +@@ -3389,6 +3389,7 @@ EXPORT_SYMBOL_GPL(kmsg_dump_reason_str); + */ + void kmsg_dump(enum kmsg_dump_reason reason) + { ++ struct kmsg_dumper_iter iter; + struct kmsg_dumper *dumper; + unsigned long flags; + +@@ -3408,25 +3409,21 @@ void kmsg_dump(enum kmsg_dump_reason rea + continue; + + /* initialize iterator with data about the stored records */ +- dumper->active = true; +- ++ iter.active = true; + logbuf_lock_irqsave(flags); +- dumper->cur_seq = latched_seq_read_nolock(&clear_seq); +- dumper->next_seq = prb_next_seq(prb); ++ iter.cur_seq = latched_seq_read_nolock(&clear_seq); ++ iter.next_seq = prb_next_seq(prb); + logbuf_unlock_irqrestore(flags); + + /* invoke dumper which will iterate over records */ +- dumper->dump(dumper, reason); +- +- /* reset iterator */ +- dumper->active = false; ++ dumper->dump(dumper, reason, &iter); + } + rcu_read_unlock(); + } + + /** + * kmsg_dump_get_line_nolock - retrieve one kmsg log line (unlocked version) +- * @dumper: registered kmsg dumper ++ * @iter: kmsg dumper iterator + * @syslog: include the "<4>" prefixes + * @line: buffer to copy the line to + * @size: maximum size of the buffer +@@ -3443,7 +3440,7 @@ void kmsg_dump(enum kmsg_dump_reason rea + * + * The function is similar to kmsg_dump_get_line(), but grabs no locks. + */ +-bool kmsg_dump_get_line_nolock(struct kmsg_dumper *dumper, bool syslog, ++bool kmsg_dump_get_line_nolock(struct kmsg_dumper_iter *iter, bool syslog, + char *line, size_t size, size_t *len) + { + struct printk_info info; +@@ -3454,16 +3451,16 @@ bool kmsg_dump_get_line_nolock(struct km + + prb_rec_init_rd(&r, &info, line, size); + +- if (!dumper->active) ++ if (!iter->active) + goto out; + + /* Read text or count text lines? */ + if (line) { +- if (!prb_read_valid(prb, dumper->cur_seq, &r)) ++ if (!prb_read_valid(prb, iter->cur_seq, &r)) + goto out; + l = record_print_text(&r, syslog, printk_time); + } else { +- if (!prb_read_valid_info(prb, dumper->cur_seq, ++ if (!prb_read_valid_info(prb, iter->cur_seq, + &info, &line_count)) { + goto out; + } +@@ -3472,7 +3469,7 @@ bool kmsg_dump_get_line_nolock(struct km + + } + +- dumper->cur_seq = r.info->seq + 1; ++ iter->cur_seq = r.info->seq + 1; + ret = true; + out: + if (len) +@@ -3482,7 +3479,7 @@ bool kmsg_dump_get_line_nolock(struct km + + /** + * kmsg_dump_get_line - retrieve one kmsg log line +- * @dumper: registered kmsg dumper ++ * @iter: kmsg dumper iterator + * @syslog: include the "<4>" prefixes + * @line: buffer to copy the line to + * @size: maximum size of the buffer +@@ -3497,14 +3494,14 @@ bool kmsg_dump_get_line_nolock(struct km + * A return value of FALSE indicates that there are no more records to + * read. + */ +-bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog, ++bool kmsg_dump_get_line(struct kmsg_dumper_iter *iter, bool syslog, + char *line, size_t size, size_t *len) + { + unsigned long flags; + bool ret; + + logbuf_lock_irqsave(flags); +- ret = kmsg_dump_get_line_nolock(dumper, syslog, line, size, len); ++ ret = kmsg_dump_get_line_nolock(iter, syslog, line, size, len); + logbuf_unlock_irqrestore(flags); + + return ret; +@@ -3513,7 +3510,7 @@ EXPORT_SYMBOL_GPL(kmsg_dump_get_line); + + /** + * kmsg_dump_get_buffer - copy kmsg log lines +- * @dumper: registered kmsg dumper ++ * @iter: kmsg dumper iterator + * @syslog: include the "<4>" prefixes + * @buf: buffer to copy the line to + * @size: maximum size of the buffer +@@ -3530,7 +3527,7 @@ EXPORT_SYMBOL_GPL(kmsg_dump_get_line); + * A return value of FALSE indicates that there are no more records to + * read. + */ +-bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, ++bool kmsg_dump_get_buffer(struct kmsg_dumper_iter *iter, bool syslog, + char *buf, size_t size, size_t *len_out) + { + struct printk_info info; +@@ -3542,19 +3539,19 @@ bool kmsg_dump_get_buffer(struct kmsg_du + bool ret = false; + bool time = printk_time; + +- if (!dumper->active || !buf || !size) ++ if (!iter->active || !buf || !size) + goto out; + + logbuf_lock_irqsave(flags); +- if (prb_read_valid_info(prb, dumper->cur_seq, &info, NULL)) { +- if (info.seq != dumper->cur_seq) { ++ if (prb_read_valid_info(prb, iter->cur_seq, &info, NULL)) { ++ if (info.seq != iter->cur_seq) { + /* messages are gone, move to first available one */ +- dumper->cur_seq = info.seq; ++ iter->cur_seq = info.seq; + } + } + + /* last entry */ +- if (dumper->cur_seq >= dumper->next_seq) { ++ if (iter->cur_seq >= iter->next_seq) { + logbuf_unlock_irqrestore(flags); + goto out; + } +@@ -3565,7 +3562,7 @@ bool kmsg_dump_get_buffer(struct kmsg_du + * because this function (by way of record_print_text()) will + * not write more than size-1 bytes of text into @buf. + */ +- seq = find_first_fitting_seq(dumper->cur_seq, dumper->next_seq, ++ seq = find_first_fitting_seq(iter->cur_seq, iter->next_seq, + size - 1, syslog, time); + + /* +@@ -3578,7 +3575,7 @@ bool kmsg_dump_get_buffer(struct kmsg_du + + len = 0; + prb_for_each_record(seq, prb, seq, &r) { +- if (r.info->seq >= dumper->next_seq) ++ if (r.info->seq >= iter->next_seq) + break; + + len += record_print_text(&r, syslog, time); +@@ -3587,7 +3584,7 @@ bool kmsg_dump_get_buffer(struct kmsg_du + prb_rec_init_rd(&r, &info, buf + len, size - len); + } + +- dumper->next_seq = next_seq; ++ iter->next_seq = next_seq; + ret = true; + logbuf_unlock_irqrestore(flags); + out: +@@ -3599,7 +3596,7 @@ EXPORT_SYMBOL_GPL(kmsg_dump_get_buffer); + + /** + * kmsg_dump_rewind_nolock - reset the iterator (unlocked version) +- * @dumper: registered kmsg dumper ++ * @iter: kmsg dumper iterator + * + * Reset the dumper's iterator so that kmsg_dump_get_line() and + * kmsg_dump_get_buffer() can be called again and used multiple +@@ -3607,26 +3604,26 @@ EXPORT_SYMBOL_GPL(kmsg_dump_get_buffer); + * + * The function is similar to kmsg_dump_rewind(), but grabs no locks. + */ +-void kmsg_dump_rewind_nolock(struct kmsg_dumper *dumper) ++void kmsg_dump_rewind_nolock(struct kmsg_dumper_iter *iter) + { +- dumper->cur_seq = latched_seq_read_nolock(&clear_seq); +- dumper->next_seq = prb_next_seq(prb); ++ iter->cur_seq = latched_seq_read_nolock(&clear_seq); ++ iter->next_seq = prb_next_seq(prb); + } + + /** + * kmsg_dump_rewind - reset the iterator +- * @dumper: registered kmsg dumper ++ * @iter: kmsg dumper iterator + * + * Reset the dumper's iterator so that kmsg_dump_get_line() and + * kmsg_dump_get_buffer() can be called again and used multiple + * times within the same dumper.dump() callback. + */ +-void kmsg_dump_rewind(struct kmsg_dumper *dumper) ++void kmsg_dump_rewind(struct kmsg_dumper_iter *iter) + { + unsigned long flags; + + logbuf_lock_irqsave(flags); +- kmsg_dump_rewind_nolock(dumper); ++ kmsg_dump_rewind_nolock(iter); + logbuf_unlock_irqrestore(flags); + } + EXPORT_SYMBOL_GPL(kmsg_dump_rewind); diff --git a/kernel/patches-5.11.x-rt/0030-0011-um-synchronize-kmsg_dumper.patch b/kernel/patches-5.11.x-rt/0030-0011-um-synchronize-kmsg_dumper.patch new file mode 100644 index 000000000..08c8b97b2 --- /dev/null +++ b/kernel/patches-5.11.x-rt/0030-0011-um-synchronize-kmsg_dumper.patch @@ -0,0 +1,54 @@ +From: John Ogness +Date: Mon, 21 Dec 2020 11:10:03 +0106 +Subject: [PATCH 11/28] um: synchronize kmsg_dumper + +The kmsg_dumper can be called from any context and CPU, possibly +from multiple CPUs simultaneously. Since a static buffer is used +to retrieve the kernel logs, this buffer must be protected against +simultaneous dumping. + +Cc: Richard Weinberger +Signed-off-by: John Ogness +Reviewed-by: Petr Mladek +Signed-off-by: Sebastian Andrzej Siewior +--- + arch/um/kernel/kmsg_dump.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +--- a/arch/um/kernel/kmsg_dump.c ++++ b/arch/um/kernel/kmsg_dump.c +@@ -1,5 +1,6 @@ + // SPDX-License-Identifier: GPL-2.0 + #include ++#include + #include + #include + #include +@@ -10,8 +11,10 @@ static void kmsg_dumper_stdout(struct km + enum kmsg_dump_reason reason, + struct kmsg_dumper_iter *iter) + { ++ static DEFINE_SPINLOCK(lock); + static char line[1024]; + struct console *con; ++ unsigned long flags; + size_t len = 0; + + /* only dump kmsg when no console is available */ +@@ -30,11 +33,16 @@ static void kmsg_dumper_stdout(struct km + if (con) + return; + ++ if (!spin_trylock_irqsave(&lock, flags)) ++ return; ++ + printf("kmsg_dump:\n"); + while (kmsg_dump_get_line(iter, true, line, sizeof(line), &len)) { + line[len] = '\0'; + printf("%s", line); + } ++ ++ spin_unlock_irqrestore(&lock, flags); + } + + static struct kmsg_dumper kmsg_dumper = { diff --git a/kernel/patches-5.11.x-rt/0031-0012-printk-remove-logbuf_lock.patch b/kernel/patches-5.11.x-rt/0031-0012-printk-remove-logbuf_lock.patch new file mode 100644 index 000000000..2e5e2198b --- /dev/null +++ b/kernel/patches-5.11.x-rt/0031-0012-printk-remove-logbuf_lock.patch @@ -0,0 +1,475 @@ +From: John Ogness +Date: Tue, 26 Jan 2021 17:43:19 +0106 +Subject: [PATCH 12/28] printk: remove logbuf_lock + +Since the ringbuffer is lockless, there is no need for it to be +protected by @logbuf_lock. Remove @logbuf_lock. + +This means that printk_nmi_direct and printk_safe_flush_on_panic() +no longer need to acquire any lock to run. + +@console_seq, @exclusive_console_stop_seq, @console_dropped are +protected by @console_lock. + +Signed-off-by: John Ogness +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/printk/internal.h | 4 - + kernel/printk/printk.c | 118 ++++++++++++++------------------------------ + kernel/printk/printk_safe.c | 29 ++-------- + 3 files changed, 48 insertions(+), 103 deletions(-) + +--- a/kernel/printk/internal.h ++++ b/kernel/printk/internal.h +@@ -12,8 +12,6 @@ + + #define PRINTK_NMI_CONTEXT_OFFSET 0x010000000 + +-extern raw_spinlock_t logbuf_lock; +- + __printf(4, 0) + int vprintk_store(int facility, int level, + const struct dev_printk_info *dev_info, +@@ -59,7 +57,7 @@ void defer_console_output(void); + __printf(1, 0) int vprintk_func(const char *fmt, va_list args) { return 0; } + + /* +- * In !PRINTK builds we still export logbuf_lock spin_lock, console_sem ++ * In !PRINTK builds we still export console_sem + * semaphore and some of console functions (console_unlock()/etc.), so + * printk-safe must preserve the existing local IRQ guarantees. + */ +--- a/kernel/printk/printk.c ++++ b/kernel/printk/printk.c +@@ -355,41 +355,6 @@ enum log_flags { + LOG_CONT = 8, /* text is a fragment of a continuation line */ + }; + +-/* +- * The logbuf_lock protects kmsg buffer, indices, counters. This can be taken +- * within the scheduler's rq lock. It must be released before calling +- * console_unlock() or anything else that might wake up a process. +- */ +-DEFINE_RAW_SPINLOCK(logbuf_lock); +- +-/* +- * Helper macros to lock/unlock logbuf_lock and switch between +- * printk-safe/unsafe modes. +- */ +-#define logbuf_lock_irq() \ +- do { \ +- printk_safe_enter_irq(); \ +- raw_spin_lock(&logbuf_lock); \ +- } while (0) +- +-#define logbuf_unlock_irq() \ +- do { \ +- raw_spin_unlock(&logbuf_lock); \ +- printk_safe_exit_irq(); \ +- } while (0) +- +-#define logbuf_lock_irqsave(flags) \ +- do { \ +- printk_safe_enter_irqsave(flags); \ +- raw_spin_lock(&logbuf_lock); \ +- } while (0) +- +-#define logbuf_unlock_irqrestore(flags) \ +- do { \ +- raw_spin_unlock(&logbuf_lock); \ +- printk_safe_exit_irqrestore(flags); \ +- } while (0) +- + /* syslog_lock protects syslog_* variables and write access to clear_seq. */ + static DEFINE_RAW_SPINLOCK(syslog_lock); + +@@ -401,6 +366,7 @@ static u64 syslog_seq; + static size_t syslog_partial; + static bool syslog_time; + ++/* All 3 protected by @console_sem. */ + /* the next printk record to write to the console */ + static u64 console_seq; + static u64 exclusive_console_stop_seq; +@@ -766,27 +732,27 @@ static ssize_t devkmsg_read(struct file + if (ret) + return ret; + +- logbuf_lock_irq(); ++ printk_safe_enter_irq(); + if (!prb_read_valid(prb, atomic64_read(&user->seq), r)) { + if (file->f_flags & O_NONBLOCK) { + ret = -EAGAIN; +- logbuf_unlock_irq(); ++ printk_safe_exit_irq(); + goto out; + } + +- logbuf_unlock_irq(); ++ printk_safe_exit_irq(); + ret = wait_event_interruptible(log_wait, + prb_read_valid(prb, atomic64_read(&user->seq), r)); + if (ret) + goto out; +- logbuf_lock_irq(); ++ printk_safe_enter_irq(); + } + + if (r->info->seq != atomic64_read(&user->seq)) { + /* our last seen message is gone, return error and reset */ + atomic64_set(&user->seq, r->info->seq); + ret = -EPIPE; +- logbuf_unlock_irq(); ++ printk_safe_exit_irq(); + goto out; + } + +@@ -796,7 +762,7 @@ static ssize_t devkmsg_read(struct file + &r->info->dev_info); + + atomic64_set(&user->seq, r->info->seq + 1); +- logbuf_unlock_irq(); ++ printk_safe_exit_irq(); + + if (len > count) { + ret = -EINVAL; +@@ -831,7 +797,7 @@ static loff_t devkmsg_llseek(struct file + if (offset) + return -ESPIPE; + +- logbuf_lock_irq(); ++ printk_safe_enter_irq(); + switch (whence) { + case SEEK_SET: + /* the first record */ +@@ -852,7 +818,7 @@ static loff_t devkmsg_llseek(struct file + default: + ret = -EINVAL; + } +- logbuf_unlock_irq(); ++ printk_safe_exit_irq(); + return ret; + } + +@@ -867,15 +833,15 @@ static __poll_t devkmsg_poll(struct file + + poll_wait(file, &log_wait, wait); + +- logbuf_lock_irq(); +- if (prb_read_valid_info(prb, user->seq, &info, NULL)) { ++ printk_safe_enter_irq(); ++ if (prb_read_valid_info(prb, atomic64_read(&user->seq), &info, NULL)) { + /* return error when data has vanished underneath us */ + if (info.seq != atomic64_read(&user->seq)) + ret = EPOLLIN|EPOLLRDNORM|EPOLLERR|EPOLLPRI; + else + ret = EPOLLIN|EPOLLRDNORM; + } +- logbuf_unlock_irq(); ++ printk_safe_exit_irq(); + + return ret; + } +@@ -908,9 +874,9 @@ static int devkmsg_open(struct inode *in + prb_rec_init_rd(&user->record, &user->info, + &user->text_buf[0], sizeof(user->text_buf)); + +- logbuf_lock_irq(); ++ printk_safe_enter_irq(); + atomic64_set(&user->seq, prb_first_valid_seq(prb)); +- logbuf_unlock_irq(); ++ printk_safe_exit_irq(); + + file->private_data = user; + return 0; +@@ -1532,11 +1498,11 @@ static int syslog_print(char __user *buf + size_t n; + size_t skip; + +- logbuf_lock_irq(); ++ printk_safe_enter_irq(); + raw_spin_lock(&syslog_lock); + if (!prb_read_valid(prb, syslog_seq, &r)) { + raw_spin_unlock(&syslog_lock); +- logbuf_unlock_irq(); ++ printk_safe_exit_irq(); + break; + } + if (r.info->seq != syslog_seq) { +@@ -1566,7 +1532,7 @@ static int syslog_print(char __user *buf + } else + n = 0; + raw_spin_unlock(&syslog_lock); +- logbuf_unlock_irq(); ++ printk_safe_exit_irq(); + + if (!n) + break; +@@ -1600,7 +1566,7 @@ static int syslog_print_all(char __user + return -ENOMEM; + + time = printk_time; +- logbuf_lock_irq(); ++ printk_safe_enter_irq(); + /* + * Find first record that fits, including all following records, + * into the user-provided buffer for this dump. +@@ -1621,12 +1587,12 @@ static int syslog_print_all(char __user + break; + } + +- logbuf_unlock_irq(); ++ printk_safe_exit_irq(); + if (copy_to_user(buf + len, text, textlen)) + len = -EFAULT; + else + len += textlen; +- logbuf_lock_irq(); ++ printk_safe_enter_irq(); + + if (len < 0) + break; +@@ -1637,7 +1603,7 @@ static int syslog_print_all(char __user + latched_seq_write(&clear_seq, seq); + raw_spin_unlock(&syslog_lock); + } +- logbuf_unlock_irq(); ++ printk_safe_exit_irq(); + + kfree(text); + return len; +@@ -1645,11 +1611,11 @@ static int syslog_print_all(char __user + + static void syslog_clear(void) + { +- logbuf_lock_irq(); ++ printk_safe_enter_irq(); + raw_spin_lock(&syslog_lock); + latched_seq_write(&clear_seq, prb_next_seq(prb)); + raw_spin_unlock(&syslog_lock); +- logbuf_unlock_irq(); ++ printk_safe_exit_irq(); + } + + /* Return a consistent copy of @syslog_seq. */ +@@ -1737,12 +1703,12 @@ int do_syslog(int type, char __user *buf + break; + /* Number of chars in the log buffer */ + case SYSLOG_ACTION_SIZE_UNREAD: +- logbuf_lock_irq(); ++ printk_safe_enter_irq(); + raw_spin_lock(&syslog_lock); + if (!prb_read_valid_info(prb, syslog_seq, &info, NULL)) { + /* No unread messages. */ + raw_spin_unlock(&syslog_lock); +- logbuf_unlock_irq(); ++ printk_safe_exit_irq(); + return 0; + } + if (info.seq != syslog_seq) { +@@ -1771,7 +1737,7 @@ int do_syslog(int type, char __user *buf + error -= syslog_partial; + } + raw_spin_unlock(&syslog_lock); +- logbuf_unlock_irq(); ++ printk_safe_exit_irq(); + break; + /* Size of the log buffer */ + case SYSLOG_ACTION_SIZE_BUFFER: +@@ -2627,7 +2593,6 @@ void console_unlock(void) + size_t len; + + printk_safe_enter_irqsave(flags); +- raw_spin_lock(&logbuf_lock); + skip: + if (!prb_read_valid(prb, console_seq, &r)) + break; +@@ -2671,7 +2636,6 @@ void console_unlock(void) + console_msg_format & MSG_FORMAT_SYSLOG, + printk_time); + console_seq++; +- raw_spin_unlock(&logbuf_lock); + + /* + * While actively printing out messages, if another printk() +@@ -2698,8 +2662,6 @@ void console_unlock(void) + + console_locked = 0; + +- raw_spin_unlock(&logbuf_lock); +- + up_console_sem(); + + /* +@@ -2708,9 +2670,7 @@ void console_unlock(void) + * there's a new owner and the console_unlock() from them will do the + * flush, no worries. + */ +- raw_spin_lock(&logbuf_lock); + retry = prb_read_valid(prb, console_seq, NULL); +- raw_spin_unlock(&logbuf_lock); + printk_safe_exit_irqrestore(flags); + + if (retry && console_trylock()) +@@ -2777,9 +2737,9 @@ void console_flush_on_panic(enum con_flu + if (mode == CONSOLE_REPLAY_ALL) { + unsigned long flags; + +- logbuf_lock_irqsave(flags); ++ printk_safe_enter_irqsave(flags); + console_seq = prb_first_valid_seq(prb); +- logbuf_unlock_irqrestore(flags); ++ printk_safe_exit_irqrestore(flags); + } + console_unlock(); + } +@@ -3008,7 +2968,7 @@ void register_console(struct console *ne + * console_unlock(); will print out the buffered messages + * for us. + */ +- logbuf_lock_irqsave(flags); ++ printk_safe_enter_irqsave(flags); + /* + * We're about to replay the log buffer. Only do this to the + * just-registered console to avoid excessive message spam to +@@ -3026,7 +2986,7 @@ void register_console(struct console *ne + console_seq = syslog_seq; + raw_spin_unlock(&syslog_lock); + +- logbuf_unlock_irqrestore(flags); ++ printk_safe_exit_irqrestore(flags); + } + console_unlock(); + console_sysfs_notify(); +@@ -3410,10 +3370,10 @@ void kmsg_dump(enum kmsg_dump_reason rea + + /* initialize iterator with data about the stored records */ + iter.active = true; +- logbuf_lock_irqsave(flags); ++ printk_safe_enter_irqsave(flags); + iter.cur_seq = latched_seq_read_nolock(&clear_seq); + iter.next_seq = prb_next_seq(prb); +- logbuf_unlock_irqrestore(flags); ++ printk_safe_exit_irqrestore(flags); + + /* invoke dumper which will iterate over records */ + dumper->dump(dumper, reason, &iter); +@@ -3500,9 +3460,9 @@ bool kmsg_dump_get_line(struct kmsg_dump + unsigned long flags; + bool ret; + +- logbuf_lock_irqsave(flags); ++ printk_safe_enter_irqsave(flags); + ret = kmsg_dump_get_line_nolock(iter, syslog, line, size, len); +- logbuf_unlock_irqrestore(flags); ++ printk_safe_exit_irqrestore(flags); + + return ret; + } +@@ -3542,7 +3502,7 @@ bool kmsg_dump_get_buffer(struct kmsg_du + if (!iter->active || !buf || !size) + goto out; + +- logbuf_lock_irqsave(flags); ++ printk_safe_enter_irqsave(flags); + if (prb_read_valid_info(prb, iter->cur_seq, &info, NULL)) { + if (info.seq != iter->cur_seq) { + /* messages are gone, move to first available one */ +@@ -3552,7 +3512,7 @@ bool kmsg_dump_get_buffer(struct kmsg_du + + /* last entry */ + if (iter->cur_seq >= iter->next_seq) { +- logbuf_unlock_irqrestore(flags); ++ printk_safe_exit_irqrestore(flags); + goto out; + } + +@@ -3586,7 +3546,7 @@ bool kmsg_dump_get_buffer(struct kmsg_du + + iter->next_seq = next_seq; + ret = true; +- logbuf_unlock_irqrestore(flags); ++ printk_safe_exit_irqrestore(flags); + out: + if (len_out) + *len_out = len; +@@ -3622,9 +3582,9 @@ void kmsg_dump_rewind(struct kmsg_dumper + { + unsigned long flags; + +- logbuf_lock_irqsave(flags); ++ printk_safe_enter_irqsave(flags); + kmsg_dump_rewind_nolock(iter); +- logbuf_unlock_irqrestore(flags); ++ printk_safe_exit_irqrestore(flags); + } + EXPORT_SYMBOL_GPL(kmsg_dump_rewind); + +--- a/kernel/printk/printk_safe.c ++++ b/kernel/printk/printk_safe.c +@@ -16,7 +16,7 @@ + #include "internal.h" + + /* +- * printk() could not take logbuf_lock in NMI context. Instead, ++ * In NMI and safe mode, printk() avoids taking locks. Instead, + * it uses an alternative implementation that temporary stores + * the strings into a per-CPU buffer. The content of the buffer + * is later flushed into the main ring buffer via IRQ work. +@@ -266,18 +266,6 @@ void printk_safe_flush(void) + */ + void printk_safe_flush_on_panic(void) + { +- /* +- * Make sure that we could access the main ring buffer. +- * Do not risk a double release when more CPUs are up. +- */ +- if (raw_spin_is_locked(&logbuf_lock)) { +- if (num_online_cpus() > 1) +- return; +- +- debug_locks_off(); +- raw_spin_lock_init(&logbuf_lock); +- } +- + if (raw_spin_is_locked(&safe_read_lock)) { + if (num_online_cpus() > 1) + return; +@@ -319,9 +307,7 @@ void noinstr printk_nmi_exit(void) + * reordering. + * + * It has effect only when called in NMI context. Then printk() +- * will try to store the messages into the main logbuf directly +- * and use the per-CPU buffers only as a fallback when the lock +- * is not available. ++ * will store the messages into the main logbuf directly. + */ + void printk_nmi_direct_enter(void) + { +@@ -376,20 +362,21 @@ void __printk_safe_exit(void) + #endif + + /* +- * Try to use the main logbuf even in NMI. But avoid calling console ++ * Use the main logbuf even in NMI. But avoid calling console + * drivers that might have their own locks. + */ +- if ((this_cpu_read(printk_context) & PRINTK_NMI_DIRECT_CONTEXT_MASK) && +- raw_spin_trylock(&logbuf_lock)) { ++ if ((this_cpu_read(printk_context) & PRINTK_NMI_DIRECT_CONTEXT_MASK)) { ++ unsigned long flags; + int len; + ++ printk_safe_enter_irqsave(flags); + len = vprintk_store(0, LOGLEVEL_DEFAULT, NULL, fmt, args); +- raw_spin_unlock(&logbuf_lock); ++ printk_safe_exit_irqrestore(flags); + defer_console_output(); + return len; + } + +- /* Use extra buffer in NMI when logbuf_lock is taken or in safe mode. */ ++ /* Use extra buffer in NMI. */ + if (this_cpu_read(printk_context) & PRINTK_NMI_CONTEXT_MASK) + return vprintk_nmi(fmt, args); + diff --git a/kernel/patches-5.11.x-rt/0032-0013-printk-kmsg_dump-remove-_nolock-variants.patch b/kernel/patches-5.11.x-rt/0032-0013-printk-kmsg_dump-remove-_nolock-variants.patch new file mode 100644 index 000000000..556dd17d4 --- /dev/null +++ b/kernel/patches-5.11.x-rt/0032-0013-printk-kmsg_dump-remove-_nolock-variants.patch @@ -0,0 +1,213 @@ +From: John Ogness +Date: Mon, 21 Dec 2020 10:27:58 +0106 +Subject: [PATCH 13/28] printk: kmsg_dump: remove _nolock() variants + +kmsg_dump_rewind() and kmsg_dump_get_line() are lockless, so there is +no need for _nolock() variants. Remove these functions and switch all +callers of the _nolock() variants. + +The functions without _nolock() were chosen because they are already +exported to kernel modules. + +Signed-off-by: John Ogness +--- + arch/powerpc/xmon/xmon.c | 4 +- + include/linux/kmsg_dump.h | 18 ------------- + kernel/debug/kdb/kdb_main.c | 8 ++--- + kernel/printk/printk.c | 60 +++++--------------------------------------- + 4 files changed, 15 insertions(+), 75 deletions(-) + +--- a/arch/powerpc/xmon/xmon.c ++++ b/arch/powerpc/xmon/xmon.c +@@ -3017,9 +3017,9 @@ dump_log_buf(void) + catch_memory_errors = 1; + sync(); + +- kmsg_dump_rewind_nolock(&iter); ++ kmsg_dump_rewind(&iter); + xmon_start_pagination(); +- while (kmsg_dump_get_line_nolock(&iter, false, buf, sizeof(buf), &len)) { ++ while (kmsg_dump_get_line(&iter, false, buf, sizeof(buf), &len)) { + buf[len] = '\0'; + printf("%s", buf); + } +--- a/include/linux/kmsg_dump.h ++++ b/include/linux/kmsg_dump.h +@@ -60,18 +60,13 @@ struct kmsg_dumper { + #ifdef CONFIG_PRINTK + void kmsg_dump(enum kmsg_dump_reason reason); + +-bool kmsg_dump_get_line_nolock(struct kmsg_dumper_iter *iter, bool syslog, +- char *line, size_t size, size_t *len); +- + bool kmsg_dump_get_line(struct kmsg_dumper_iter *iter, bool syslog, + char *line, size_t size, size_t *len); + + bool kmsg_dump_get_buffer(struct kmsg_dumper_iter *iter, bool syslog, + char *buf, size_t size, size_t *len_out); + +-void kmsg_dump_rewind_nolock(struct kmsg_dumper_iter *iter); +- +-void kmsg_dump_rewind(struct kmsg_dumper_iter *dumper_iter); ++void kmsg_dump_rewind(struct kmsg_dumper_iter *iter); + + int kmsg_dump_register(struct kmsg_dumper *dumper); + +@@ -83,13 +78,6 @@ static inline void kmsg_dump(enum kmsg_d + { + } + +-static inline bool kmsg_dump_get_line_nolock(struct kmsg_dumper_iter *iter, +- bool syslog, const char *line, +- size_t size, size_t *len) +-{ +- return false; +-} +- + static inline bool kmsg_dump_get_line(struct kmsg_dumper_iter *iter, bool syslog, + const char *line, size_t size, size_t *len) + { +@@ -102,10 +90,6 @@ static inline bool kmsg_dump_get_buffer( + return false; + } + +-static inline void kmsg_dump_rewind_nolock(struct kmsg_dumper_iter *iter) +-{ +-} +- + static inline void kmsg_dump_rewind(struct kmsg_dumper_iter *iter) + { + } +--- a/kernel/debug/kdb/kdb_main.c ++++ b/kernel/debug/kdb/kdb_main.c +@@ -2126,8 +2126,8 @@ static int kdb_dmesg(int argc, const cha + kdb_set(2, setargs); + } + +- kmsg_dump_rewind_nolock(&iter); +- while (kmsg_dump_get_line_nolock(&iter, 1, NULL, 0, NULL)) ++ kmsg_dump_rewind(&iter); ++ while (kmsg_dump_get_line(&iter, 1, NULL, 0, NULL)) + n++; + + if (lines < 0) { +@@ -2159,8 +2159,8 @@ static int kdb_dmesg(int argc, const cha + if (skip >= n || skip < 0) + return 0; + +- kmsg_dump_rewind_nolock(&iter); +- while (kmsg_dump_get_line_nolock(&iter, 1, buf, sizeof(buf), &len)) { ++ kmsg_dump_rewind(&iter); ++ while (kmsg_dump_get_line(&iter, 1, buf, sizeof(buf), &len)) { + if (skip) { + skip--; + continue; +--- a/kernel/printk/printk.c ++++ b/kernel/printk/printk.c +@@ -3382,7 +3382,7 @@ void kmsg_dump(enum kmsg_dump_reason rea + } + + /** +- * kmsg_dump_get_line_nolock - retrieve one kmsg log line (unlocked version) ++ * kmsg_dump_get_line - retrieve one kmsg log line + * @iter: kmsg dumper iterator + * @syslog: include the "<4>" prefixes + * @line: buffer to copy the line to +@@ -3397,18 +3397,18 @@ void kmsg_dump(enum kmsg_dump_reason rea + * + * A return value of FALSE indicates that there are no more records to + * read. +- * +- * The function is similar to kmsg_dump_get_line(), but grabs no locks. + */ +-bool kmsg_dump_get_line_nolock(struct kmsg_dumper_iter *iter, bool syslog, +- char *line, size_t size, size_t *len) ++bool kmsg_dump_get_line(struct kmsg_dumper_iter *iter, bool syslog, ++ char *line, size_t size, size_t *len) + { + struct printk_info info; + unsigned int line_count; + struct printk_record r; ++ unsigned long flags; + size_t l = 0; + bool ret = false; + ++ printk_safe_enter_irqsave(flags); + prb_rec_init_rd(&r, &info, line, size); + + if (!iter->active) +@@ -3432,40 +3432,11 @@ bool kmsg_dump_get_line_nolock(struct km + iter->cur_seq = r.info->seq + 1; + ret = true; + out: ++ printk_safe_exit_irqrestore(flags); + if (len) + *len = l; + return ret; + } +- +-/** +- * kmsg_dump_get_line - retrieve one kmsg log line +- * @iter: kmsg dumper iterator +- * @syslog: include the "<4>" prefixes +- * @line: buffer to copy the line to +- * @size: maximum size of the buffer +- * @len: length of line placed into buffer +- * +- * Start at the beginning of the kmsg buffer, with the oldest kmsg +- * record, and copy one record into the provided buffer. +- * +- * Consecutive calls will return the next available record moving +- * towards the end of the buffer with the youngest messages. +- * +- * A return value of FALSE indicates that there are no more records to +- * read. +- */ +-bool kmsg_dump_get_line(struct kmsg_dumper_iter *iter, bool syslog, +- char *line, size_t size, size_t *len) +-{ +- unsigned long flags; +- bool ret; +- +- printk_safe_enter_irqsave(flags); +- ret = kmsg_dump_get_line_nolock(iter, syslog, line, size, len); +- printk_safe_exit_irqrestore(flags); +- +- return ret; +-} + EXPORT_SYMBOL_GPL(kmsg_dump_get_line); + + /** +@@ -3555,22 +3526,6 @@ bool kmsg_dump_get_buffer(struct kmsg_du + EXPORT_SYMBOL_GPL(kmsg_dump_get_buffer); + + /** +- * kmsg_dump_rewind_nolock - reset the iterator (unlocked version) +- * @iter: kmsg dumper iterator +- * +- * Reset the dumper's iterator so that kmsg_dump_get_line() and +- * kmsg_dump_get_buffer() can be called again and used multiple +- * times within the same dumper.dump() callback. +- * +- * The function is similar to kmsg_dump_rewind(), but grabs no locks. +- */ +-void kmsg_dump_rewind_nolock(struct kmsg_dumper_iter *iter) +-{ +- iter->cur_seq = latched_seq_read_nolock(&clear_seq); +- iter->next_seq = prb_next_seq(prb); +-} +- +-/** + * kmsg_dump_rewind - reset the iterator + * @iter: kmsg dumper iterator + * +@@ -3583,7 +3538,8 @@ void kmsg_dump_rewind(struct kmsg_dumper + unsigned long flags; + + printk_safe_enter_irqsave(flags); +- kmsg_dump_rewind_nolock(iter); ++ iter->cur_seq = latched_seq_read_nolock(&clear_seq); ++ iter->next_seq = prb_next_seq(prb); + printk_safe_exit_irqrestore(flags); + } + EXPORT_SYMBOL_GPL(kmsg_dump_rewind); diff --git a/kernel/patches-5.11.x-rt/0033-0014-printk-kmsg_dump-use-kmsg_dump_rewind.patch b/kernel/patches-5.11.x-rt/0033-0014-printk-kmsg_dump-use-kmsg_dump_rewind.patch new file mode 100644 index 000000000..8d15e8698 --- /dev/null +++ b/kernel/patches-5.11.x-rt/0033-0014-printk-kmsg_dump-use-kmsg_dump_rewind.patch @@ -0,0 +1,35 @@ +From: John Ogness +Date: Wed, 17 Feb 2021 18:23:16 +0100 +Subject: [PATCH 14/28] printk: kmsg_dump: use kmsg_dump_rewind + +kmsg_dump() is open coding the kmsg_dump_rewind(). Call +kmsg_dump_rewind() instead. + +Signed-off-by: John Ogness +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/printk/printk.c | 6 +----- + 1 file changed, 1 insertion(+), 5 deletions(-) + +--- a/kernel/printk/printk.c ++++ b/kernel/printk/printk.c +@@ -3351,7 +3351,6 @@ void kmsg_dump(enum kmsg_dump_reason rea + { + struct kmsg_dumper_iter iter; + struct kmsg_dumper *dumper; +- unsigned long flags; + + rcu_read_lock(); + list_for_each_entry_rcu(dumper, &dump_list, list) { +@@ -3370,10 +3369,7 @@ void kmsg_dump(enum kmsg_dump_reason rea + + /* initialize iterator with data about the stored records */ + iter.active = true; +- printk_safe_enter_irqsave(flags); +- iter.cur_seq = latched_seq_read_nolock(&clear_seq); +- iter.next_seq = prb_next_seq(prb); +- printk_safe_exit_irqrestore(flags); ++ kmsg_dump_rewind(&iter); + + /* invoke dumper which will iterate over records */ + dumper->dump(dumper, reason, &iter); diff --git a/kernel/patches-5.11.x-rt/0034-0015-printk-console-remove-unnecessary-safe-buffer-usage.patch b/kernel/patches-5.11.x-rt/0034-0015-printk-console-remove-unnecessary-safe-buffer-usage.patch new file mode 100644 index 000000000..6f8898206 --- /dev/null +++ b/kernel/patches-5.11.x-rt/0034-0015-printk-console-remove-unnecessary-safe-buffer-usage.patch @@ -0,0 +1,41 @@ +From: John Ogness +Date: Wed, 17 Feb 2021 18:28:05 +0100 +Subject: [PATCH 15/28] printk: console: remove unnecessary safe buffer usage + +Upon registering a console, safe buffers are activated when setting +up the sequence number to replay the log. However, these are already +protected by @console_sem and @syslog_lock. Remove the unnecessary +safe buffer usage. + +Signed-off-by: John Ogness +--- + kernel/printk/printk.c | 10 +++------- + 1 file changed, 3 insertions(+), 7 deletions(-) + +--- a/kernel/printk/printk.c ++++ b/kernel/printk/printk.c +@@ -2967,9 +2967,7 @@ void register_console(struct console *ne + /* + * console_unlock(); will print out the buffered messages + * for us. +- */ +- printk_safe_enter_irqsave(flags); +- /* ++ * + * We're about to replay the log buffer. Only do this to the + * just-registered console to avoid excessive message spam to + * the already-registered consoles. +@@ -2982,11 +2980,9 @@ void register_console(struct console *ne + exclusive_console_stop_seq = console_seq; + + /* Get a consistent copy of @syslog_seq. */ +- raw_spin_lock(&syslog_lock); ++ raw_spin_lock_irqsave(&syslog_lock, flags); + console_seq = syslog_seq; +- raw_spin_unlock(&syslog_lock); +- +- printk_safe_exit_irqrestore(flags); ++ raw_spin_unlock_irqrestore(&syslog_lock, flags); + } + console_unlock(); + console_sysfs_notify(); diff --git a/kernel/patches-5.11.x-rt/0035-0016-printk-track-limit-recursion.patch b/kernel/patches-5.11.x-rt/0035-0016-printk-track-limit-recursion.patch new file mode 100644 index 000000000..ab6c13a92 --- /dev/null +++ b/kernel/patches-5.11.x-rt/0035-0016-printk-track-limit-recursion.patch @@ -0,0 +1,136 @@ +From: John Ogness +Date: Fri, 11 Dec 2020 00:55:25 +0106 +Subject: [PATCH 16/28] printk: track/limit recursion + +Limit printk() recursion to 1 level. This is enough to print a +stacktrace for the printk call, should a WARN or BUG occur. + +Signed-off-by: John Ogness +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/printk/printk.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++-- + 1 file changed, 71 insertions(+), 3 deletions(-) + +--- a/kernel/printk/printk.c ++++ b/kernel/printk/printk.c +@@ -1940,6 +1940,65 @@ static void call_console_drivers(const c + } + } + ++#ifdef CONFIG_PRINTK_NMI ++#define NUM_RECURSION_CTX 2 ++#else ++#define NUM_RECURSION_CTX 1 ++#endif ++ ++struct printk_recursion { ++ char count[NUM_RECURSION_CTX]; ++}; ++ ++static DEFINE_PER_CPU(struct printk_recursion, percpu_printk_recursion); ++static char printk_recursion_count[NUM_RECURSION_CTX]; ++ ++static char *printk_recursion_counter(void) ++{ ++ struct printk_recursion *rec; ++ char *count; ++ ++ if (!printk_percpu_data_ready()) { ++ count = &printk_recursion_count[0]; ++ } else { ++ rec = this_cpu_ptr(&percpu_printk_recursion); ++ ++ count = &rec->count[0]; ++ } ++ ++#ifdef CONFIG_PRINTK_NMI ++ if (in_nmi()) ++ count++; ++#endif ++ ++ return count; ++} ++ ++static bool printk_enter_irqsave(unsigned long *flags) ++{ ++ char *count; ++ ++ local_irq_save(*flags); ++ count = printk_recursion_counter(); ++ /* Only 1 level of recursion allowed. */ ++ if (*count > 1) { ++ local_irq_restore(*flags); ++ return false; ++ } ++ (*count)++; ++ ++ return true; ++} ++ ++static void printk_exit_irqrestore(unsigned long flags) ++{ ++ char *count; ++ ++ count = printk_recursion_counter(); ++ (*count)--; ++ local_irq_restore(flags); ++} ++ + int printk_delay_msec __read_mostly; + + static inline void printk_delay(void) +@@ -2040,11 +2099,13 @@ int vprintk_store(int facility, int leve + struct prb_reserved_entry e; + enum log_flags lflags = 0; + struct printk_record r; ++ unsigned long irqflags; + u16 trunc_msg_len = 0; + char prefix_buf[8]; + u16 reserve_size; + va_list args2; + u16 text_len; ++ int ret = 0; + u64 ts_nsec; + + /* +@@ -2055,6 +2116,9 @@ int vprintk_store(int facility, int leve + */ + ts_nsec = local_clock(); + ++ if (!printk_enter_irqsave(&irqflags)) ++ return 0; ++ + /* + * The sprintf needs to come first since the syslog prefix might be + * passed in as a parameter. An extra byte must be reserved so that +@@ -2092,7 +2156,8 @@ int vprintk_store(int facility, int leve + prb_commit(&e); + } + +- return text_len; ++ ret = text_len; ++ goto out; + } + } + +@@ -2108,7 +2173,7 @@ int vprintk_store(int facility, int leve + + prb_rec_init_wr(&r, reserve_size + trunc_msg_len); + if (!prb_reserve(&e, prb, &r)) +- return 0; ++ goto out; + } + + /* fill message */ +@@ -2130,7 +2195,10 @@ int vprintk_store(int facility, int leve + else + prb_final_commit(&e); + +- return (text_len + trunc_msg_len); ++ ret = text_len + trunc_msg_len; ++out: ++ printk_exit_irqrestore(irqflags); ++ return ret; + } + + asmlinkage int vprintk_emit(int facility, int level, diff --git a/kernel/patches-5.11.x-rt/0036-0017-printk-remove-safe-buffers.patch b/kernel/patches-5.11.x-rt/0036-0017-printk-remove-safe-buffers.patch new file mode 100644 index 000000000..3834fab49 --- /dev/null +++ b/kernel/patches-5.11.x-rt/0036-0017-printk-remove-safe-buffers.patch @@ -0,0 +1,854 @@ +From: John Ogness +Date: Mon, 30 Nov 2020 01:42:00 +0106 +Subject: [PATCH 17/28] printk: remove safe buffers + +With @logbuf_lock removed, the high level printk functions for +storing messages are lockless. Messages can be stored from any +context, so there is no need for the NMI and safe buffers anymore. + +Remove the NMI and safe buffers. In NMI or safe contexts, store +the message immediately but still use irq_work to defer the console +printing. + +Signed-off-by: John Ogness +Signed-off-by: Sebastian Andrzej Siewior +--- + arch/powerpc/kernel/traps.c | 1 + arch/powerpc/kernel/watchdog.c | 5 + include/linux/printk.h | 10 - + kernel/kexec_core.c | 1 + kernel/panic.c | 3 + kernel/printk/internal.h | 2 + kernel/printk/printk.c | 85 +--------- + kernel/printk/printk_safe.c | 329 ----------------------------------------- + lib/nmi_backtrace.c | 6 + 9 files changed, 17 insertions(+), 425 deletions(-) + +--- a/arch/powerpc/kernel/traps.c ++++ b/arch/powerpc/kernel/traps.c +@@ -170,7 +170,6 @@ extern void panic_flush_kmsg_start(void) + + extern void panic_flush_kmsg_end(void) + { +- printk_safe_flush_on_panic(); + kmsg_dump(KMSG_DUMP_PANIC); + bust_spinlocks(0); + debug_locks_off(); +--- a/arch/powerpc/kernel/watchdog.c ++++ b/arch/powerpc/kernel/watchdog.c +@@ -181,11 +181,6 @@ static void watchdog_smp_panic(int cpu, + + wd_smp_unlock(&flags); + +- printk_safe_flush(); +- /* +- * printk_safe_flush() seems to require another print +- * before anything actually goes out to console. +- */ + if (sysctl_hardlockup_all_cpu_backtrace) + trigger_allbutself_cpu_backtrace(); + +--- a/include/linux/printk.h ++++ b/include/linux/printk.h +@@ -207,8 +207,6 @@ void __init setup_log_buf(int early); + void dump_stack_print_info(const char *log_lvl); + void show_regs_print_info(const char *log_lvl); + extern asmlinkage void dump_stack(void) __cold; +-extern void printk_safe_flush(void); +-extern void printk_safe_flush_on_panic(void); + #else + static inline __printf(1, 0) + int vprintk(const char *s, va_list args) +@@ -272,14 +270,6 @@ static inline void show_regs_print_info( + static inline void dump_stack(void) + { + } +- +-static inline void printk_safe_flush(void) +-{ +-} +- +-static inline void printk_safe_flush_on_panic(void) +-{ +-} + #endif + + extern int kptr_restrict; +--- a/kernel/kexec_core.c ++++ b/kernel/kexec_core.c +@@ -977,7 +977,6 @@ void crash_kexec(struct pt_regs *regs) + old_cpu = atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, this_cpu); + if (old_cpu == PANIC_CPU_INVALID) { + /* This is the 1st CPU which comes here, so go ahead. */ +- printk_safe_flush_on_panic(); + __crash_kexec(regs); + + /* +--- a/kernel/panic.c ++++ b/kernel/panic.c +@@ -247,7 +247,6 @@ void panic(const char *fmt, ...) + * Bypass the panic_cpu check and call __crash_kexec directly. + */ + if (!_crash_kexec_post_notifiers) { +- printk_safe_flush_on_panic(); + __crash_kexec(NULL); + + /* +@@ -271,8 +270,6 @@ void panic(const char *fmt, ...) + */ + atomic_notifier_call_chain(&panic_notifier_list, 0, buf); + +- /* Call flush even twice. It tries harder with a single online CPU */ +- printk_safe_flush_on_panic(); + kmsg_dump(KMSG_DUMP_PANIC); + + /* +--- a/kernel/printk/internal.h ++++ b/kernel/printk/internal.h +@@ -23,7 +23,6 @@ int vprintk_store(int facility, int leve + void __printk_safe_enter(void); + void __printk_safe_exit(void); + +-void printk_safe_init(void); + bool printk_percpu_data_ready(void); + + #define printk_safe_enter_irqsave(flags) \ +@@ -67,6 +66,5 @@ void defer_console_output(void); + #define printk_safe_enter_irq() local_irq_disable() + #define printk_safe_exit_irq() local_irq_enable() + +-static inline void printk_safe_init(void) { } + static inline bool printk_percpu_data_ready(void) { return false; } + #endif /* CONFIG_PRINTK */ +--- a/kernel/printk/printk.c ++++ b/kernel/printk/printk.c +@@ -732,27 +732,22 @@ static ssize_t devkmsg_read(struct file + if (ret) + return ret; + +- printk_safe_enter_irq(); + if (!prb_read_valid(prb, atomic64_read(&user->seq), r)) { + if (file->f_flags & O_NONBLOCK) { + ret = -EAGAIN; +- printk_safe_exit_irq(); + goto out; + } + +- printk_safe_exit_irq(); + ret = wait_event_interruptible(log_wait, + prb_read_valid(prb, atomic64_read(&user->seq), r)); + if (ret) + goto out; +- printk_safe_enter_irq(); + } + + if (r->info->seq != atomic64_read(&user->seq)) { + /* our last seen message is gone, return error and reset */ + atomic64_set(&user->seq, r->info->seq); + ret = -EPIPE; +- printk_safe_exit_irq(); + goto out; + } + +@@ -762,7 +757,6 @@ static ssize_t devkmsg_read(struct file + &r->info->dev_info); + + atomic64_set(&user->seq, r->info->seq + 1); +- printk_safe_exit_irq(); + + if (len > count) { + ret = -EINVAL; +@@ -797,7 +791,6 @@ static loff_t devkmsg_llseek(struct file + if (offset) + return -ESPIPE; + +- printk_safe_enter_irq(); + switch (whence) { + case SEEK_SET: + /* the first record */ +@@ -818,7 +811,6 @@ static loff_t devkmsg_llseek(struct file + default: + ret = -EINVAL; + } +- printk_safe_exit_irq(); + return ret; + } + +@@ -833,7 +825,6 @@ static __poll_t devkmsg_poll(struct file + + poll_wait(file, &log_wait, wait); + +- printk_safe_enter_irq(); + if (prb_read_valid_info(prb, atomic64_read(&user->seq), &info, NULL)) { + /* return error when data has vanished underneath us */ + if (info.seq != atomic64_read(&user->seq)) +@@ -841,7 +832,6 @@ static __poll_t devkmsg_poll(struct file + else + ret = EPOLLIN|EPOLLRDNORM; + } +- printk_safe_exit_irq(); + + return ret; + } +@@ -874,9 +864,7 @@ static int devkmsg_open(struct inode *in + prb_rec_init_rd(&user->record, &user->info, + &user->text_buf[0], sizeof(user->text_buf)); + +- printk_safe_enter_irq(); + atomic64_set(&user->seq, prb_first_valid_seq(prb)); +- printk_safe_exit_irq(); + + file->private_data = user; + return 0; +@@ -1042,9 +1030,6 @@ static inline void log_buf_add_cpu(void) + + static void __init set_percpu_data_ready(void) + { +- printk_safe_init(); +- /* Make sure we set this flag only after printk_safe() init is done */ +- barrier(); + __printk_percpu_data_ready = true; + } + +@@ -1142,8 +1127,6 @@ void __init setup_log_buf(int early) + new_descs, ilog2(new_descs_count), + new_infos); + +- printk_safe_enter_irqsave(flags); +- + log_buf_len = new_log_buf_len; + log_buf = new_log_buf; + new_log_buf_len = 0; +@@ -1159,8 +1142,6 @@ void __init setup_log_buf(int early) + */ + prb = &printk_rb_dynamic; + +- printk_safe_exit_irqrestore(flags); +- + if (seq != prb_next_seq(&printk_rb_static)) { + pr_err("dropped %llu messages\n", + prb_next_seq(&printk_rb_static) - seq); +@@ -1498,11 +1479,9 @@ static int syslog_print(char __user *buf + size_t n; + size_t skip; + +- printk_safe_enter_irq(); +- raw_spin_lock(&syslog_lock); ++ raw_spin_lock_irq(&syslog_lock); + if (!prb_read_valid(prb, syslog_seq, &r)) { +- raw_spin_unlock(&syslog_lock); +- printk_safe_exit_irq(); ++ raw_spin_unlock_irq(&syslog_lock); + break; + } + if (r.info->seq != syslog_seq) { +@@ -1531,8 +1510,7 @@ static int syslog_print(char __user *buf + syslog_partial += n; + } else + n = 0; +- raw_spin_unlock(&syslog_lock); +- printk_safe_exit_irq(); ++ raw_spin_unlock_irq(&syslog_lock); + + if (!n) + break; +@@ -1566,7 +1544,6 @@ static int syslog_print_all(char __user + return -ENOMEM; + + time = printk_time; +- printk_safe_enter_irq(); + /* + * Find first record that fits, including all following records, + * into the user-provided buffer for this dump. +@@ -1587,23 +1564,20 @@ static int syslog_print_all(char __user + break; + } + +- printk_safe_exit_irq(); + if (copy_to_user(buf + len, text, textlen)) + len = -EFAULT; + else + len += textlen; +- printk_safe_enter_irq(); + + if (len < 0) + break; + } + + if (clear) { +- raw_spin_lock(&syslog_lock); ++ raw_spin_lock_irq(&syslog_lock); + latched_seq_write(&clear_seq, seq); +- raw_spin_unlock(&syslog_lock); ++ raw_spin_unlock_irq(&syslog_lock); + } +- printk_safe_exit_irq(); + + kfree(text); + return len; +@@ -1611,11 +1585,9 @@ static int syslog_print_all(char __user + + static void syslog_clear(void) + { +- printk_safe_enter_irq(); +- raw_spin_lock(&syslog_lock); ++ raw_spin_lock_irq(&syslog_lock); + latched_seq_write(&clear_seq, prb_next_seq(prb)); +- raw_spin_unlock(&syslog_lock); +- printk_safe_exit_irq(); ++ raw_spin_unlock_irq(&syslog_lock); + } + + /* Return a consistent copy of @syslog_seq. */ +@@ -1703,12 +1675,10 @@ int do_syslog(int type, char __user *buf + break; + /* Number of chars in the log buffer */ + case SYSLOG_ACTION_SIZE_UNREAD: +- printk_safe_enter_irq(); +- raw_spin_lock(&syslog_lock); ++ raw_spin_lock_irq(&syslog_lock); + if (!prb_read_valid_info(prb, syslog_seq, &info, NULL)) { + /* No unread messages. */ +- raw_spin_unlock(&syslog_lock); +- printk_safe_exit_irq(); ++ raw_spin_unlock_irq(&syslog_lock); + return 0; + } + if (info.seq != syslog_seq) { +@@ -1736,8 +1706,7 @@ int do_syslog(int type, char __user *buf + } + error -= syslog_partial; + } +- raw_spin_unlock(&syslog_lock); +- printk_safe_exit_irq(); ++ raw_spin_unlock_irq(&syslog_lock); + break; + /* Size of the log buffer */ + case SYSLOG_ACTION_SIZE_BUFFER: +@@ -2207,7 +2176,6 @@ asmlinkage int vprintk_emit(int facility + { + int printed_len; + bool in_sched = false; +- unsigned long flags; + + /* Suppress unimportant messages after panic happens */ + if (unlikely(suppress_printk)) +@@ -2221,9 +2189,7 @@ asmlinkage int vprintk_emit(int facility + boot_delay_msec(level); + printk_delay(); + +- printk_safe_enter_irqsave(flags); + printed_len = vprintk_store(facility, level, dev_info, fmt, args); +- printk_safe_exit_irqrestore(flags); + + /* If called from the scheduler, we can not call up(). */ + if (!in_sched) { +@@ -2615,7 +2581,6 @@ void console_unlock(void) + { + static char ext_text[CONSOLE_EXT_LOG_MAX]; + static char text[CONSOLE_LOG_MAX]; +- unsigned long flags; + bool do_cond_resched, retry; + struct printk_info info; + struct printk_record r; +@@ -2660,7 +2625,6 @@ void console_unlock(void) + size_t ext_len = 0; + size_t len; + +- printk_safe_enter_irqsave(flags); + skip: + if (!prb_read_valid(prb, console_seq, &r)) + break; +@@ -2717,12 +2681,8 @@ void console_unlock(void) + call_console_drivers(ext_text, ext_len, text, len); + start_critical_timings(); + +- if (console_lock_spinning_disable_and_check()) { +- printk_safe_exit_irqrestore(flags); ++ if (console_lock_spinning_disable_and_check()) + return; +- } +- +- printk_safe_exit_irqrestore(flags); + + if (do_cond_resched) + cond_resched(); +@@ -2739,8 +2699,6 @@ void console_unlock(void) + * flush, no worries. + */ + retry = prb_read_valid(prb, console_seq, NULL); +- printk_safe_exit_irqrestore(flags); +- + if (retry && console_trylock()) + goto again; + } +@@ -2802,13 +2760,8 @@ void console_flush_on_panic(enum con_flu + console_trylock(); + console_may_schedule = 0; + +- if (mode == CONSOLE_REPLAY_ALL) { +- unsigned long flags; +- +- printk_safe_enter_irqsave(flags); ++ if (mode == CONSOLE_REPLAY_ALL) + console_seq = prb_first_valid_seq(prb); +- printk_safe_exit_irqrestore(flags); +- } + console_unlock(); + } + +@@ -3464,11 +3417,9 @@ bool kmsg_dump_get_line(struct kmsg_dump + struct printk_info info; + unsigned int line_count; + struct printk_record r; +- unsigned long flags; + size_t l = 0; + bool ret = false; + +- printk_safe_enter_irqsave(flags); + prb_rec_init_rd(&r, &info, line, size); + + if (!iter->active) +@@ -3492,7 +3443,6 @@ bool kmsg_dump_get_line(struct kmsg_dump + iter->cur_seq = r.info->seq + 1; + ret = true; + out: +- printk_safe_exit_irqrestore(flags); + if (len) + *len = l; + return ret; +@@ -3523,7 +3473,6 @@ bool kmsg_dump_get_buffer(struct kmsg_du + { + struct printk_info info; + struct printk_record r; +- unsigned long flags; + u64 seq; + u64 next_seq; + size_t len = 0; +@@ -3533,7 +3482,6 @@ bool kmsg_dump_get_buffer(struct kmsg_du + if (!iter->active || !buf || !size) + goto out; + +- printk_safe_enter_irqsave(flags); + if (prb_read_valid_info(prb, iter->cur_seq, &info, NULL)) { + if (info.seq != iter->cur_seq) { + /* messages are gone, move to first available one */ +@@ -3542,10 +3490,8 @@ bool kmsg_dump_get_buffer(struct kmsg_du + } + + /* last entry */ +- if (iter->cur_seq >= iter->next_seq) { +- printk_safe_exit_irqrestore(flags); ++ if (iter->cur_seq >= iter->next_seq) + goto out; +- } + + /* + * Find first record that fits, including all following records, +@@ -3577,7 +3523,6 @@ bool kmsg_dump_get_buffer(struct kmsg_du + + iter->next_seq = next_seq; + ret = true; +- printk_safe_exit_irqrestore(flags); + out: + if (len_out) + *len_out = len; +@@ -3595,12 +3540,8 @@ EXPORT_SYMBOL_GPL(kmsg_dump_get_buffer); + */ + void kmsg_dump_rewind(struct kmsg_dumper_iter *iter) + { +- unsigned long flags; +- +- printk_safe_enter_irqsave(flags); + iter->cur_seq = latched_seq_read_nolock(&clear_seq); + iter->next_seq = prb_next_seq(prb); +- printk_safe_exit_irqrestore(flags); + } + EXPORT_SYMBOL_GPL(kmsg_dump_rewind); + +--- a/kernel/printk/printk_safe.c ++++ b/kernel/printk/printk_safe.c +@@ -15,282 +15,9 @@ + + #include "internal.h" + +-/* +- * In NMI and safe mode, printk() avoids taking locks. Instead, +- * it uses an alternative implementation that temporary stores +- * the strings into a per-CPU buffer. The content of the buffer +- * is later flushed into the main ring buffer via IRQ work. +- * +- * The alternative implementation is chosen transparently +- * by examining current printk() context mask stored in @printk_context +- * per-CPU variable. +- * +- * The implementation allows to flush the strings also from another CPU. +- * There are situations when we want to make sure that all buffers +- * were handled or when IRQs are blocked. +- */ +- +-#define SAFE_LOG_BUF_LEN ((1 << CONFIG_PRINTK_SAFE_LOG_BUF_SHIFT) - \ +- sizeof(atomic_t) - \ +- sizeof(atomic_t) - \ +- sizeof(struct irq_work)) +- +-struct printk_safe_seq_buf { +- atomic_t len; /* length of written data */ +- atomic_t message_lost; +- struct irq_work work; /* IRQ work that flushes the buffer */ +- unsigned char buffer[SAFE_LOG_BUF_LEN]; +-}; +- +-static DEFINE_PER_CPU(struct printk_safe_seq_buf, safe_print_seq); + static DEFINE_PER_CPU(int, printk_context); + +-static DEFINE_RAW_SPINLOCK(safe_read_lock); +- +-#ifdef CONFIG_PRINTK_NMI +-static DEFINE_PER_CPU(struct printk_safe_seq_buf, nmi_print_seq); +-#endif +- +-/* Get flushed in a more safe context. */ +-static void queue_flush_work(struct printk_safe_seq_buf *s) +-{ +- if (printk_percpu_data_ready()) +- irq_work_queue(&s->work); +-} +- +-/* +- * Add a message to per-CPU context-dependent buffer. NMI and printk-safe +- * have dedicated buffers, because otherwise printk-safe preempted by +- * NMI-printk would have overwritten the NMI messages. +- * +- * The messages are flushed from irq work (or from panic()), possibly, +- * from other CPU, concurrently with printk_safe_log_store(). Should this +- * happen, printk_safe_log_store() will notice the buffer->len mismatch +- * and repeat the write. +- */ +-static __printf(2, 0) int printk_safe_log_store(struct printk_safe_seq_buf *s, +- const char *fmt, va_list args) +-{ +- int add; +- size_t len; +- va_list ap; +- +-again: +- len = atomic_read(&s->len); +- +- /* The trailing '\0' is not counted into len. */ +- if (len >= sizeof(s->buffer) - 1) { +- atomic_inc(&s->message_lost); +- queue_flush_work(s); +- return 0; +- } +- +- /* +- * Make sure that all old data have been read before the buffer +- * was reset. This is not needed when we just append data. +- */ +- if (!len) +- smp_rmb(); +- +- va_copy(ap, args); +- add = vscnprintf(s->buffer + len, sizeof(s->buffer) - len, fmt, ap); +- va_end(ap); +- if (!add) +- return 0; +- +- /* +- * Do it once again if the buffer has been flushed in the meantime. +- * Note that atomic_cmpxchg() is an implicit memory barrier that +- * makes sure that the data were written before updating s->len. +- */ +- if (atomic_cmpxchg(&s->len, len, len + add) != len) +- goto again; +- +- queue_flush_work(s); +- return add; +-} +- +-static inline void printk_safe_flush_line(const char *text, int len) +-{ +- /* +- * Avoid any console drivers calls from here, because we may be +- * in NMI or printk_safe context (when in panic). The messages +- * must go only into the ring buffer at this stage. Consoles will +- * get explicitly called later when a crashdump is not generated. +- */ +- printk_deferred("%.*s", len, text); +-} +- +-/* printk part of the temporary buffer line by line */ +-static int printk_safe_flush_buffer(const char *start, size_t len) +-{ +- const char *c, *end; +- bool header; +- +- c = start; +- end = start + len; +- header = true; +- +- /* Print line by line. */ +- while (c < end) { +- if (*c == '\n') { +- printk_safe_flush_line(start, c - start + 1); +- start = ++c; +- header = true; +- continue; +- } +- +- /* Handle continuous lines or missing new line. */ +- if ((c + 1 < end) && printk_get_level(c)) { +- if (header) { +- c = printk_skip_level(c); +- continue; +- } +- +- printk_safe_flush_line(start, c - start); +- start = c++; +- header = true; +- continue; +- } +- +- header = false; +- c++; +- } +- +- /* Check if there was a partial line. Ignore pure header. */ +- if (start < end && !header) { +- static const char newline[] = KERN_CONT "\n"; +- +- printk_safe_flush_line(start, end - start); +- printk_safe_flush_line(newline, strlen(newline)); +- } +- +- return len; +-} +- +-static void report_message_lost(struct printk_safe_seq_buf *s) +-{ +- int lost = atomic_xchg(&s->message_lost, 0); +- +- if (lost) +- printk_deferred("Lost %d message(s)!\n", lost); +-} +- +-/* +- * Flush data from the associated per-CPU buffer. The function +- * can be called either via IRQ work or independently. +- */ +-static void __printk_safe_flush(struct irq_work *work) +-{ +- struct printk_safe_seq_buf *s = +- container_of(work, struct printk_safe_seq_buf, work); +- unsigned long flags; +- size_t len; +- int i; +- +- /* +- * The lock has two functions. First, one reader has to flush all +- * available message to make the lockless synchronization with +- * writers easier. Second, we do not want to mix messages from +- * different CPUs. This is especially important when printing +- * a backtrace. +- */ +- raw_spin_lock_irqsave(&safe_read_lock, flags); +- +- i = 0; +-more: +- len = atomic_read(&s->len); +- +- /* +- * This is just a paranoid check that nobody has manipulated +- * the buffer an unexpected way. If we printed something then +- * @len must only increase. Also it should never overflow the +- * buffer size. +- */ +- if ((i && i >= len) || len > sizeof(s->buffer)) { +- const char *msg = "printk_safe_flush: internal error\n"; +- +- printk_safe_flush_line(msg, strlen(msg)); +- len = 0; +- } +- +- if (!len) +- goto out; /* Someone else has already flushed the buffer. */ +- +- /* Make sure that data has been written up to the @len */ +- smp_rmb(); +- i += printk_safe_flush_buffer(s->buffer + i, len - i); +- +- /* +- * Check that nothing has got added in the meantime and truncate +- * the buffer. Note that atomic_cmpxchg() is an implicit memory +- * barrier that makes sure that the data were copied before +- * updating s->len. +- */ +- if (atomic_cmpxchg(&s->len, len, 0) != len) +- goto more; +- +-out: +- report_message_lost(s); +- raw_spin_unlock_irqrestore(&safe_read_lock, flags); +-} +- +-/** +- * printk_safe_flush - flush all per-cpu nmi buffers. +- * +- * The buffers are flushed automatically via IRQ work. This function +- * is useful only when someone wants to be sure that all buffers have +- * been flushed at some point. +- */ +-void printk_safe_flush(void) +-{ +- int cpu; +- +- for_each_possible_cpu(cpu) { + #ifdef CONFIG_PRINTK_NMI +- __printk_safe_flush(&per_cpu(nmi_print_seq, cpu).work); +-#endif +- __printk_safe_flush(&per_cpu(safe_print_seq, cpu).work); +- } +-} +- +-/** +- * printk_safe_flush_on_panic - flush all per-cpu nmi buffers when the system +- * goes down. +- * +- * Similar to printk_safe_flush() but it can be called even in NMI context when +- * the system goes down. It does the best effort to get NMI messages into +- * the main ring buffer. +- * +- * Note that it could try harder when there is only one CPU online. +- */ +-void printk_safe_flush_on_panic(void) +-{ +- if (raw_spin_is_locked(&safe_read_lock)) { +- if (num_online_cpus() > 1) +- return; +- +- debug_locks_off(); +- raw_spin_lock_init(&safe_read_lock); +- } +- +- printk_safe_flush(); +-} +- +-#ifdef CONFIG_PRINTK_NMI +-/* +- * Safe printk() for NMI context. It uses a per-CPU buffer to +- * store the message. NMIs are not nested, so there is always only +- * one writer running. But the buffer might get flushed from another +- * CPU, so we need to be careful. +- */ +-static __printf(1, 0) int vprintk_nmi(const char *fmt, va_list args) +-{ +- struct printk_safe_seq_buf *s = this_cpu_ptr(&nmi_print_seq); +- +- return printk_safe_log_store(s, fmt, args); +-} +- + void noinstr printk_nmi_enter(void) + { + this_cpu_add(printk_context, PRINTK_NMI_CONTEXT_OFFSET); +@@ -305,9 +32,6 @@ void noinstr printk_nmi_exit(void) + * Marks a code that might produce many messages in NMI context + * and the risk of losing them is more critical than eventual + * reordering. +- * +- * It has effect only when called in NMI context. Then printk() +- * will store the messages into the main logbuf directly. + */ + void printk_nmi_direct_enter(void) + { +@@ -320,27 +44,8 @@ void printk_nmi_direct_exit(void) + this_cpu_and(printk_context, ~PRINTK_NMI_DIRECT_CONTEXT_MASK); + } + +-#else +- +-static __printf(1, 0) int vprintk_nmi(const char *fmt, va_list args) +-{ +- return 0; +-} +- + #endif /* CONFIG_PRINTK_NMI */ + +-/* +- * Lock-less printk(), to avoid deadlocks should the printk() recurse +- * into itself. It uses a per-CPU buffer to store the message, just like +- * NMI. +- */ +-static __printf(1, 0) int vprintk_safe(const char *fmt, va_list args) +-{ +- struct printk_safe_seq_buf *s = this_cpu_ptr(&safe_print_seq); +- +- return printk_safe_log_store(s, fmt, args); +-} +- + /* Can be preempted by NMI. */ + void __printk_safe_enter(void) + { +@@ -365,8 +70,10 @@ void __printk_safe_exit(void) + * Use the main logbuf even in NMI. But avoid calling console + * drivers that might have their own locks. + */ +- if ((this_cpu_read(printk_context) & PRINTK_NMI_DIRECT_CONTEXT_MASK)) { +- unsigned long flags; ++ if (this_cpu_read(printk_context) & ++ (PRINTK_NMI_DIRECT_CONTEXT_MASK | ++ PRINTK_NMI_CONTEXT_MASK | ++ PRINTK_SAFE_CONTEXT_MASK)) { + int len; + + printk_safe_enter_irqsave(flags); +@@ -376,34 +83,6 @@ void __printk_safe_exit(void) + return len; + } + +- /* Use extra buffer in NMI. */ +- if (this_cpu_read(printk_context) & PRINTK_NMI_CONTEXT_MASK) +- return vprintk_nmi(fmt, args); +- +- /* Use extra buffer to prevent a recursion deadlock in safe mode. */ +- if (this_cpu_read(printk_context) & PRINTK_SAFE_CONTEXT_MASK) +- return vprintk_safe(fmt, args); +- + /* No obstacles. */ + return vprintk_default(fmt, args); + } +- +-void __init printk_safe_init(void) +-{ +- int cpu; +- +- for_each_possible_cpu(cpu) { +- struct printk_safe_seq_buf *s; +- +- s = &per_cpu(safe_print_seq, cpu); +- init_irq_work(&s->work, __printk_safe_flush); +- +-#ifdef CONFIG_PRINTK_NMI +- s = &per_cpu(nmi_print_seq, cpu); +- init_irq_work(&s->work, __printk_safe_flush); +-#endif +- } +- +- /* Flush pending messages that did not have scheduled IRQ works. */ +- printk_safe_flush(); +-} +--- a/lib/nmi_backtrace.c ++++ b/lib/nmi_backtrace.c +@@ -75,12 +75,6 @@ void nmi_trigger_cpumask_backtrace(const + touch_softlockup_watchdog(); + } + +- /* +- * Force flush any remote buffers that might be stuck in IRQ context +- * and therefore could not run their irq_work. +- */ +- printk_safe_flush(); +- + clear_bit_unlock(0, &backtrace_flag); + put_cpu(); + } diff --git a/kernel/patches-5.11.x-rt/0037-0018-printk-convert-syslog_lock-to-spin_lock.patch b/kernel/patches-5.11.x-rt/0037-0018-printk-convert-syslog_lock-to-spin_lock.patch new file mode 100644 index 000000000..08bd6ea1f --- /dev/null +++ b/kernel/patches-5.11.x-rt/0037-0018-printk-convert-syslog_lock-to-spin_lock.patch @@ -0,0 +1,112 @@ +From: John Ogness +Date: Thu, 18 Feb 2021 17:37:41 +0100 +Subject: [PATCH 18/28] printk: convert @syslog_lock to spin_lock + +Signed-off-by: John Ogness +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/printk/printk.c | 30 +++++++++++++++--------------- + 1 file changed, 15 insertions(+), 15 deletions(-) + +--- a/kernel/printk/printk.c ++++ b/kernel/printk/printk.c +@@ -356,7 +356,7 @@ enum log_flags { + }; + + /* syslog_lock protects syslog_* variables and write access to clear_seq. */ +-static DEFINE_RAW_SPINLOCK(syslog_lock); ++static DEFINE_SPINLOCK(syslog_lock); + + #ifdef CONFIG_PRINTK + DECLARE_WAIT_QUEUE_HEAD(log_wait); +@@ -1479,9 +1479,9 @@ static int syslog_print(char __user *buf + size_t n; + size_t skip; + +- raw_spin_lock_irq(&syslog_lock); ++ spin_lock_irq(&syslog_lock); + if (!prb_read_valid(prb, syslog_seq, &r)) { +- raw_spin_unlock_irq(&syslog_lock); ++ spin_unlock_irq(&syslog_lock); + break; + } + if (r.info->seq != syslog_seq) { +@@ -1510,7 +1510,7 @@ static int syslog_print(char __user *buf + syslog_partial += n; + } else + n = 0; +- raw_spin_unlock_irq(&syslog_lock); ++ spin_unlock_irq(&syslog_lock); + + if (!n) + break; +@@ -1574,9 +1574,9 @@ static int syslog_print_all(char __user + } + + if (clear) { +- raw_spin_lock_irq(&syslog_lock); ++ spin_lock_irq(&syslog_lock); + latched_seq_write(&clear_seq, seq); +- raw_spin_unlock_irq(&syslog_lock); ++ spin_unlock_irq(&syslog_lock); + } + + kfree(text); +@@ -1585,9 +1585,9 @@ static int syslog_print_all(char __user + + static void syslog_clear(void) + { +- raw_spin_lock_irq(&syslog_lock); ++ spin_lock_irq(&syslog_lock); + latched_seq_write(&clear_seq, prb_next_seq(prb)); +- raw_spin_unlock_irq(&syslog_lock); ++ spin_unlock_irq(&syslog_lock); + } + + /* Return a consistent copy of @syslog_seq. */ +@@ -1595,9 +1595,9 @@ static u64 read_syslog_seq_irq(void) + { + u64 seq; + +- raw_spin_lock_irq(&syslog_lock); ++ spin_lock_irq(&syslog_lock); + seq = syslog_seq; +- raw_spin_unlock_irq(&syslog_lock); ++ spin_unlock_irq(&syslog_lock); + + return seq; + } +@@ -1675,10 +1675,10 @@ int do_syslog(int type, char __user *buf + break; + /* Number of chars in the log buffer */ + case SYSLOG_ACTION_SIZE_UNREAD: +- raw_spin_lock_irq(&syslog_lock); ++ spin_lock_irq(&syslog_lock); + if (!prb_read_valid_info(prb, syslog_seq, &info, NULL)) { + /* No unread messages. */ +- raw_spin_unlock_irq(&syslog_lock); ++ spin_unlock_irq(&syslog_lock); + return 0; + } + if (info.seq != syslog_seq) { +@@ -1706,7 +1706,7 @@ int do_syslog(int type, char __user *buf + } + error -= syslog_partial; + } +- raw_spin_unlock_irq(&syslog_lock); ++ spin_unlock_irq(&syslog_lock); + break; + /* Size of the log buffer */ + case SYSLOG_ACTION_SIZE_BUFFER: +@@ -3001,9 +3001,9 @@ void register_console(struct console *ne + exclusive_console_stop_seq = console_seq; + + /* Get a consistent copy of @syslog_seq. */ +- raw_spin_lock_irqsave(&syslog_lock, flags); ++ spin_lock_irqsave(&syslog_lock, flags); + console_seq = syslog_seq; +- raw_spin_unlock_irqrestore(&syslog_lock, flags); ++ spin_unlock_irqrestore(&syslog_lock, flags); + } + console_unlock(); + console_sysfs_notify(); diff --git a/kernel/patches-5.11.x-rt/0038-0019-console-add-write_atomic-interface.patch b/kernel/patches-5.11.x-rt/0038-0019-console-add-write_atomic-interface.patch new file mode 100644 index 000000000..2c772981c --- /dev/null +++ b/kernel/patches-5.11.x-rt/0038-0019-console-add-write_atomic-interface.patch @@ -0,0 +1,154 @@ +From: John Ogness +Date: Mon, 30 Nov 2020 01:42:01 +0106 +Subject: [PATCH 19/28] console: add write_atomic interface + +Add a write_atomic() callback to the console. This is an optional +function for console drivers. The function must be atomic (including +NMI safe) for writing to the console. + +Console drivers must still implement the write() callback. The +write_atomic() callback will only be used in special situations, +such as when the kernel panics. + +Creating an NMI safe write_atomic() that must synchronize with +write() requires a careful implementation of the console driver. To +aid with the implementation, a set of console_atomic_*() functions +are provided: + + void console_atomic_lock(unsigned int *flags); + void console_atomic_unlock(unsigned int flags); + +These functions synchronize using a processor-reentrant spinlock +(called a cpulock). + +Signed-off-by: John Ogness +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/console.h | 4 + + kernel/printk/printk.c | 100 ++++++++++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 104 insertions(+) + +--- a/include/linux/console.h ++++ b/include/linux/console.h +@@ -140,6 +140,7 @@ static inline int con_debug_leave(void) + struct console { + char name[16]; + void (*write)(struct console *, const char *, unsigned); ++ void (*write_atomic)(struct console *co, const char *s, unsigned int count); + int (*read)(struct console *, char *, unsigned); + struct tty_driver *(*device)(struct console *, int *); + void (*unblank)(void); +@@ -229,4 +230,7 @@ extern void console_init(void); + void dummycon_register_output_notifier(struct notifier_block *nb); + void dummycon_unregister_output_notifier(struct notifier_block *nb); + ++extern void console_atomic_lock(unsigned int *flags); ++extern void console_atomic_unlock(unsigned int flags); ++ + #endif /* _LINUX_CONSOLE_H */ +--- a/kernel/printk/printk.c ++++ b/kernel/printk/printk.c +@@ -3546,3 +3546,103 @@ void kmsg_dump_rewind(struct kmsg_dumper + EXPORT_SYMBOL_GPL(kmsg_dump_rewind); + + #endif ++ ++struct prb_cpulock { ++ atomic_t owner; ++ unsigned long __percpu *irqflags; ++}; ++ ++#define DECLARE_STATIC_PRINTKRB_CPULOCK(name) \ ++static DEFINE_PER_CPU(unsigned long, _##name##_percpu_irqflags); \ ++static struct prb_cpulock name = { \ ++ .owner = ATOMIC_INIT(-1), \ ++ .irqflags = &_##name##_percpu_irqflags, \ ++} ++ ++static bool __prb_trylock(struct prb_cpulock *cpu_lock, ++ unsigned int *cpu_store) ++{ ++ unsigned long *flags; ++ unsigned int cpu; ++ ++ cpu = get_cpu(); ++ ++ *cpu_store = atomic_read(&cpu_lock->owner); ++ /* memory barrier to ensure the current lock owner is visible */ ++ smp_rmb(); ++ if (*cpu_store == -1) { ++ flags = per_cpu_ptr(cpu_lock->irqflags, cpu); ++ local_irq_save(*flags); ++ if (atomic_try_cmpxchg_acquire(&cpu_lock->owner, ++ cpu_store, cpu)) { ++ return true; ++ } ++ local_irq_restore(*flags); ++ } else if (*cpu_store == cpu) { ++ return true; ++ } ++ ++ put_cpu(); ++ return false; ++} ++ ++/* ++ * prb_lock: Perform a processor-reentrant spin lock. ++ * @cpu_lock: A pointer to the lock object. ++ * @cpu_store: A "flags" pointer to store lock status information. ++ * ++ * If no processor has the lock, the calling processor takes the lock and ++ * becomes the owner. If the calling processor is already the owner of the ++ * lock, this function succeeds immediately. If lock is locked by another ++ * processor, this function spins until the calling processor becomes the ++ * owner. ++ * ++ * It is safe to call this function from any context and state. ++ */ ++static void prb_lock(struct prb_cpulock *cpu_lock, unsigned int *cpu_store) ++{ ++ for (;;) { ++ if (__prb_trylock(cpu_lock, cpu_store)) ++ break; ++ cpu_relax(); ++ } ++} ++ ++/* ++ * prb_unlock: Perform a processor-reentrant spin unlock. ++ * @cpu_lock: A pointer to the lock object. ++ * @cpu_store: A "flags" object storing lock status information. ++ * ++ * Release the lock. The calling processor must be the owner of the lock. ++ * ++ * It is safe to call this function from any context and state. ++ */ ++static void prb_unlock(struct prb_cpulock *cpu_lock, unsigned int cpu_store) ++{ ++ unsigned long *flags; ++ unsigned int cpu; ++ ++ cpu = atomic_read(&cpu_lock->owner); ++ atomic_set_release(&cpu_lock->owner, cpu_store); ++ ++ if (cpu_store == -1) { ++ flags = per_cpu_ptr(cpu_lock->irqflags, cpu); ++ local_irq_restore(*flags); ++ } ++ ++ put_cpu(); ++} ++ ++DECLARE_STATIC_PRINTKRB_CPULOCK(printk_cpulock); ++ ++void console_atomic_lock(unsigned int *flags) ++{ ++ prb_lock(&printk_cpulock, flags); ++} ++EXPORT_SYMBOL(console_atomic_lock); ++ ++void console_atomic_unlock(unsigned int flags) ++{ ++ prb_unlock(&printk_cpulock, flags); ++} ++EXPORT_SYMBOL(console_atomic_unlock); diff --git a/kernel/patches-5.4.x-rt/0037-0020-serial-8250-implement-write_atomic.patch b/kernel/patches-5.11.x-rt/0039-0020-serial-8250-implement-write_atomic.patch similarity index 53% rename from kernel/patches-5.4.x-rt/0037-0020-serial-8250-implement-write_atomic.patch rename to kernel/patches-5.11.x-rt/0039-0020-serial-8250-implement-write_atomic.patch index ea7e56c1a..91750fd28 100644 --- a/kernel/patches-5.4.x-rt/0037-0020-serial-8250-implement-write_atomic.patch +++ b/kernel/patches-5.11.x-rt/0039-0020-serial-8250-implement-write_atomic.patch @@ -1,83 +1,99 @@ From: John Ogness -Date: Tue, 12 Feb 2019 15:29:58 +0100 -Subject: [PATCH 20/25] serial: 8250: implement write_atomic +Date: Mon, 30 Nov 2020 01:42:02 +0106 +Subject: [PATCH 20/28] serial: 8250: implement write_atomic -Implement a non-sleeping NMI-safe write_atomic console function in -order to support emergency printk messages. +Implement a non-sleeping NMI-safe write_atomic() console function in +order to support emergency console printing. Since interrupts need to be disabled during transmit, all usage of -the IER register was wrapped with access functions that use the -console_atomic_lock function to synchronize register access while -tracking the state of the interrupts. This was necessary because -write_atomic is can be calling from an NMI context that has -preempted write_atomic. +the IER register is wrapped with access functions that use the +console_atomic_lock() function to synchronize register access while +tracking the state of the interrupts. This is necessary because +write_atomic() can be called from an NMI context that has preempted +write_atomic(). Signed-off-by: John Ogness Signed-off-by: Sebastian Andrzej Siewior --- - drivers/tty/serial/8250/8250.h | 22 +++++ - drivers/tty/serial/8250/8250_core.c | 19 +++- - drivers/tty/serial/8250/8250_dma.c | 4 - drivers/tty/serial/8250/8250_port.c | 154 ++++++++++++++++++++++++++---------- - include/linux/serial_8250.h | 5 + - 5 files changed, 157 insertions(+), 47 deletions(-) + drivers/tty/serial/8250/8250.h | 47 ++++++++++++++++ + drivers/tty/serial/8250/8250_core.c | 17 ++++-- + drivers/tty/serial/8250/8250_fsl.c | 9 +++ + drivers/tty/serial/8250/8250_ingenic.c | 7 ++ + drivers/tty/serial/8250/8250_mtk.c | 29 +++++++++- + drivers/tty/serial/8250/8250_port.c | 92 ++++++++++++++++++++------------- + include/linux/serial_8250.h | 5 + + 7 files changed, 162 insertions(+), 44 deletions(-) --- a/drivers/tty/serial/8250/8250.h +++ b/drivers/tty/serial/8250/8250.h -@@ -96,6 +96,10 @@ struct serial8250_config { - #define SERIAL8250_SHARE_IRQS 0 - #endif - -+void set_ier(struct uart_8250_port *up, unsigned char ier); -+void clear_ier(struct uart_8250_port *up); -+void restore_ier(struct uart_8250_port *up); -+ - #define SERIAL8250_PORT_FLAGS(_base, _irq, _flags) \ - { \ - .iobase = _base, \ -@@ -139,6 +143,15 @@ static inline bool serial8250_set_THRI(s - return true; +@@ -130,12 +130,55 @@ static inline void serial_dl_write(struc + up->dl_write(up, value); } -+static inline bool serial8250_set_THRI_sier(struct uart_8250_port *up) ++static inline void serial8250_set_IER(struct uart_8250_port *up, ++ unsigned char ier) +{ -+ if (up->ier & UART_IER_THRI) -+ return false; -+ up->ier |= UART_IER_THRI; -+ set_ier(up, up->ier); -+ return true; ++ struct uart_port *port = &up->port; ++ unsigned int flags; ++ bool is_console; ++ ++ is_console = uart_console(port); ++ ++ if (is_console) ++ console_atomic_lock(&flags); ++ ++ serial_out(up, UART_IER, ier); ++ ++ if (is_console) ++ console_atomic_unlock(flags); +} + - static inline bool serial8250_clear_THRI(struct uart_8250_port *up) ++static inline unsigned char serial8250_clear_IER(struct uart_8250_port *up) ++{ ++ struct uart_port *port = &up->port; ++ unsigned int clearval = 0; ++ unsigned int prior; ++ unsigned int flags; ++ bool is_console; ++ ++ is_console = uart_console(port); ++ ++ if (up->capabilities & UART_CAP_UUE) ++ clearval = UART_IER_UUE; ++ ++ if (is_console) ++ console_atomic_lock(&flags); ++ ++ prior = serial_port_in(port, UART_IER); ++ serial_port_out(port, UART_IER, clearval); ++ ++ if (is_console) ++ console_atomic_unlock(flags); ++ ++ return prior; ++} ++ + static inline bool serial8250_set_THRI(struct uart_8250_port *up) { - if (!(up->ier & UART_IER_THRI)) -@@ -148,6 +161,15 @@ static inline bool serial8250_clear_THRI + if (up->ier & UART_IER_THRI) + return false; + up->ier |= UART_IER_THRI; +- serial_out(up, UART_IER, up->ier); ++ serial8250_set_IER(up, up->ier); return true; } -+static inline bool serial8250_clear_THRI_sier(struct uart_8250_port *up) -+{ -+ if (!(up->ier & UART_IER_THRI)) -+ return false; -+ up->ier &= ~UART_IER_THRI; -+ set_ier(up, up->ier); -+ return true; -+} -+ - struct uart_8250_port *serial8250_get_port(int line); +@@ -144,7 +187,7 @@ static inline bool serial8250_clear_THRI + if (!(up->ier & UART_IER_THRI)) + return false; + up->ier &= ~UART_IER_THRI; +- serial_out(up, UART_IER, up->ier); ++ serial8250_set_IER(up, up->ier); + return true; + } - void serial8250_rpm_get(struct uart_8250_port *p); --- a/drivers/tty/serial/8250/8250_core.c +++ b/drivers/tty/serial/8250/8250_core.c -@@ -265,7 +265,7 @@ static void serial8250_timeout(struct ti - static void serial8250_backup_timeout(struct timer_list *t) - { - struct uart_8250_port *up = from_timer(up, t, timer); -- unsigned int iir, ier = 0, lsr; -+ unsigned int iir, lsr; - unsigned long flags; - - spin_lock_irqsave(&up->port.lock, flags); @@ -274,10 +274,8 @@ static void serial8250_backup_timeout(st * Must disable interrupts or else we risk racing with the interrupt * based handler. @@ -87,7 +103,7 @@ Signed-off-by: Sebastian Andrzej Siewior - serial_out(up, UART_IER, 0); - } + if (up->port.irq) -+ clear_ier(up); ++ ier = serial8250_clear_IER(up); iir = serial_in(up, UART_IIR); @@ -96,7 +112,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (up->port.irq) - serial_out(up, UART_IER, ier); -+ restore_ier(up); ++ serial8250_set_IER(up, ier); spin_unlock_irqrestore(&up->port.lock, flags); @@ -115,7 +131,7 @@ Signed-off-by: Sebastian Andrzej Siewior static void univ8250_console_write(struct console *co, const char *s, unsigned int count) { -@@ -663,6 +669,7 @@ static int univ8250_console_match(struct +@@ -671,6 +677,7 @@ static int univ8250_console_match(struct static struct console univ8250_console = { .name = "ttyS", @@ -123,149 +139,141 @@ Signed-off-by: Sebastian Andrzej Siewior .write = univ8250_console_write, .device = uart_console_device, .setup = univ8250_console_setup, ---- a/drivers/tty/serial/8250/8250_dma.c -+++ b/drivers/tty/serial/8250/8250_dma.c -@@ -35,7 +35,7 @@ static void __dma_tx_complete(void *para +--- a/drivers/tty/serial/8250/8250_fsl.c ++++ b/drivers/tty/serial/8250/8250_fsl.c +@@ -60,9 +60,18 @@ int fsl8250_handle_irq(struct uart_port - ret = serial8250_tx_dma(p); - if (ret) -- serial8250_set_THRI(p); -+ serial8250_set_THRI_sier(p); + /* Stop processing interrupts on input overrun */ + if ((orig_lsr & UART_LSR_OE) && (up->overrun_backoff_time_ms > 0)) { ++ unsigned int ca_flags; + unsigned long delay; ++ bool is_console; - spin_unlock_irqrestore(&p->port.lock, flags); ++ is_console = uart_console(port); ++ ++ if (is_console) ++ console_atomic_lock(&ca_flags); + up->ier = port->serial_in(port, UART_IER); ++ if (is_console) ++ console_atomic_unlock(ca_flags); ++ + if (up->ier & (UART_IER_RLSI | UART_IER_RDI)) { + port->ops->stop_rx(port); + } else { +--- a/drivers/tty/serial/8250/8250_ingenic.c ++++ b/drivers/tty/serial/8250/8250_ingenic.c +@@ -146,6 +146,8 @@ OF_EARLYCON_DECLARE(x1000_uart, "ingenic + + static void ingenic_uart_serial_out(struct uart_port *p, int offset, int value) + { ++ unsigned int flags; ++ bool is_console; + int ier; + + switch (offset) { +@@ -167,7 +169,12 @@ static void ingenic_uart_serial_out(stru + * If we have enabled modem status IRQs we should enable + * modem mode. + */ ++ is_console = uart_console(p); ++ if (is_console) ++ console_atomic_lock(&flags); + ier = p->serial_in(p, UART_IER); ++ if (is_console) ++ console_atomic_unlock(flags); + + if (ier & UART_IER_MSI) + value |= UART_MCR_MDCE | UART_MCR_FCM; +--- a/drivers/tty/serial/8250/8250_mtk.c ++++ b/drivers/tty/serial/8250/8250_mtk.c +@@ -213,12 +213,37 @@ static void mtk8250_shutdown(struct uart + + static void mtk8250_disable_intrs(struct uart_8250_port *up, int mask) + { +- serial_out(up, UART_IER, serial_in(up, UART_IER) & (~mask)); ++ struct uart_port *port = &up->port; ++ unsigned int flags; ++ unsigned int ier; ++ bool is_console; ++ ++ is_console = uart_console(port); ++ ++ if (is_console) ++ console_atomic_lock(&flags); ++ ++ ier = serial_in(up, UART_IER); ++ serial_out(up, UART_IER, ier & (~mask)); ++ ++ if (is_console) ++ console_atomic_unlock(flags); } -@@ -98,7 +98,7 @@ int serial8250_tx_dma(struct uart_8250_p - dma_async_issue_pending(dma->txchan); - if (dma->tx_err) { - dma->tx_err = 0; -- serial8250_clear_THRI(p); -+ serial8250_clear_THRI_sier(p); - } - return 0; - err: + + static void mtk8250_enable_intrs(struct uart_8250_port *up, int mask) + { +- serial_out(up, UART_IER, serial_in(up, UART_IER) | mask); ++ struct uart_port *port = &up->port; ++ unsigned int flags; ++ unsigned int ier; ++ ++ if (uart_console(port)) ++ console_atomic_lock(&flags); ++ ++ ier = serial_in(up, UART_IER); ++ serial_out(up, UART_IER, ier | mask); ++ ++ if (uart_console(port)) ++ console_atomic_unlock(flags); + } + + static void mtk8250_set_flow_ctrl(struct uart_8250_port *up, int mode) --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c -@@ -721,7 +721,7 @@ static void serial8250_set_sleep(struct +@@ -757,7 +757,7 @@ static void serial8250_set_sleep(struct serial_out(p, UART_EFR, UART_EFR_ECB); serial_out(p, UART_LCR, 0); } - serial_out(p, UART_IER, sleep ? UART_IERX_SLEEP : 0); -+ set_ier(p, sleep ? UART_IERX_SLEEP : 0); ++ serial8250_set_IER(p, sleep ? UART_IERX_SLEEP : 0); if (p->capabilities & UART_CAP_EFR) { serial_out(p, UART_LCR, UART_LCR_CONF_MODE_B); serial_out(p, UART_EFR, efr); -@@ -1390,7 +1390,7 @@ static void serial8250_stop_rx(struct ua +@@ -1429,7 +1429,7 @@ static void serial8250_stop_rx(struct ua up->ier &= ~(UART_IER_RLSI | UART_IER_RDI); up->port.read_status_mask &= ~UART_LSR_DR; - serial_port_out(port, UART_IER, up->ier); -+ set_ier(up, up->ier); ++ serial8250_set_IER(up, up->ier); serial8250_rpm_put(up); } -@@ -1408,7 +1408,7 @@ static void __do_stop_tx_rs485(struct ua +@@ -1459,7 +1459,7 @@ void serial8250_em485_stop_tx(struct uar serial8250_clear_and_reinit_fifos(p); p->ier |= UART_IER_RLSI | UART_IER_RDI; - serial_port_out(&p->port, UART_IER, p->ier); -+ set_ier(p, p->ier); ++ serial8250_set_IER(p, p->ier); } } - static enum hrtimer_restart serial8250_em485_handle_stop_tx(struct hrtimer *t) -@@ -1459,7 +1459,7 @@ static void __stop_tx_rs485(struct uart_ - - static inline void __do_stop_tx(struct uart_8250_port *p) - { -- if (serial8250_clear_THRI(p)) -+ if (serial8250_clear_THRI_sier(p)) - serial8250_rpm_put_tx(p); - } - -@@ -1509,7 +1509,7 @@ static inline void __start_tx(struct uar - if (up->dma && !up->dma->tx_dma(up)) - return; - -- if (serial8250_set_THRI(up)) { -+ if (serial8250_set_THRI_sier(up)) { - if (up->bugs & UART_BUG_TXEN) { - unsigned char lsr; - -@@ -1616,7 +1616,7 @@ static void serial8250_disable_ms(struct + EXPORT_SYMBOL_GPL(serial8250_em485_stop_tx); +@@ -1687,7 +1687,7 @@ static void serial8250_disable_ms(struct mctrl_gpio_disable_ms(up->gpios); up->ier &= ~UART_IER_MSI; - serial_port_out(port, UART_IER, up->ier); -+ set_ier(up, up->ier); ++ serial8250_set_IER(up, up->ier); } static void serial8250_enable_ms(struct uart_port *port) -@@ -1632,7 +1632,7 @@ static void serial8250_enable_ms(struct +@@ -1703,7 +1703,7 @@ static void serial8250_enable_ms(struct up->ier |= UART_IER_MSI; serial8250_rpm_get(up); - serial_port_out(port, UART_IER, up->ier); -+ set_ier(up, up->ier); ++ serial8250_set_IER(up, up->ier); serial8250_rpm_put(up); } -@@ -1991,6 +1991,52 @@ static void wait_for_xmitr(struct uart_8 - } - } - -+static atomic_t ier_counter = ATOMIC_INIT(0); -+static atomic_t ier_value = ATOMIC_INIT(0); -+ -+void set_ier(struct uart_8250_port *up, unsigned char ier) -+{ -+ struct uart_port *port = &up->port; -+ unsigned int flags; -+ -+ console_atomic_lock(&flags); -+ if (atomic_read(&ier_counter) > 0) -+ atomic_set(&ier_value, ier); -+ else -+ serial_port_out(port, UART_IER, ier); -+ console_atomic_unlock(flags); -+} -+ -+void clear_ier(struct uart_8250_port *up) -+{ -+ struct uart_port *port = &up->port; -+ unsigned int ier_cleared = 0; -+ unsigned int flags; -+ unsigned int ier; -+ -+ console_atomic_lock(&flags); -+ atomic_inc(&ier_counter); -+ ier = serial_port_in(port, UART_IER); -+ if (up->capabilities & UART_CAP_UUE) -+ ier_cleared = UART_IER_UUE; -+ if (ier != ier_cleared) { -+ serial_port_out(port, UART_IER, ier_cleared); -+ atomic_set(&ier_value, ier); -+ } -+ console_atomic_unlock(flags); -+} -+ -+void restore_ier(struct uart_8250_port *up) -+{ -+ struct uart_port *port = &up->port; -+ unsigned int flags; -+ -+ console_atomic_lock(&flags); -+ if (atomic_fetch_dec(&ier_counter) == 1) -+ serial_port_out(port, UART_IER, atomic_read(&ier_value)); -+ console_atomic_unlock(flags); -+} -+ - #ifdef CONFIG_CONSOLE_POLL - /* - * Console polling routines for writing and reading from the uart while -@@ -2022,18 +2068,10 @@ static int serial8250_get_poll_char(stru - static void serial8250_put_poll_char(struct uart_port *port, - unsigned char c) - { -- unsigned int ier; +@@ -2118,14 +2118,7 @@ static void serial8250_put_poll_char(str struct uart_8250_port *up = up_to_u8250p(port); serial8250_rpm_get(up); @@ -277,38 +285,38 @@ Signed-off-by: Sebastian Andrzej Siewior - serial_port_out(port, UART_IER, UART_IER_UUE); - else - serial_port_out(port, UART_IER, 0); -+ clear_ier(up); ++ ier = serial8250_clear_IER(up); wait_for_xmitr(up, BOTH_EMPTY); /* -@@ -2046,7 +2084,7 @@ static void serial8250_put_poll_char(str +@@ -2138,7 +2131,7 @@ static void serial8250_put_poll_char(str * and restore the IER */ wait_for_xmitr(up, BOTH_EMPTY); - serial_port_out(port, UART_IER, ier); -+ restore_ier(up); ++ serial8250_set_IER(up, ier); serial8250_rpm_put(up); } -@@ -2358,7 +2396,7 @@ void serial8250_do_shutdown(struct uart_ +@@ -2441,7 +2434,7 @@ void serial8250_do_shutdown(struct uart_ */ spin_lock_irqsave(&port->lock, flags); up->ier = 0; - serial_port_out(port, UART_IER, 0); -+ set_ier(up, 0); ++ serial8250_set_IER(up, 0); spin_unlock_irqrestore(&port->lock, flags); synchronize_irq(port->irq); -@@ -2643,7 +2681,7 @@ serial8250_do_set_termios(struct uart_po +@@ -2771,7 +2764,7 @@ serial8250_do_set_termios(struct uart_po if (up->capabilities & UART_CAP_RTOIE) up->ier |= UART_IER_RTOIE; - serial_port_out(port, UART_IER, up->ier); -+ set_ier(up, up->ier); ++ serial8250_set_IER(up, up->ier); if (up->capabilities & UART_CAP_EFR) { unsigned char efr = 0; -@@ -3107,7 +3145,7 @@ EXPORT_SYMBOL_GPL(serial8250_set_default +@@ -3237,7 +3230,7 @@ EXPORT_SYMBOL_GPL(serial8250_set_default #ifdef CONFIG_SERIAL_8250_CONSOLE @@ -317,7 +325,7 @@ Signed-off-by: Sebastian Andrzej Siewior { struct uart_8250_port *up = up_to_u8250p(port); -@@ -3115,6 +3153,18 @@ static void serial8250_console_putchar(s +@@ -3245,6 +3238,18 @@ static void serial8250_console_putchar(s serial_port_out(port, UART_TX, ch); } @@ -336,7 +344,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* * Restore serial console when h/w power-off detected */ -@@ -3136,6 +3186,42 @@ static void serial8250_console_restore(s +@@ -3266,6 +3271,32 @@ static void serial8250_console_restore(s serial8250_out_MCR(up, UART_MCR_DTR | UART_MCR_RTS); } @@ -345,20 +353,13 @@ Signed-off-by: Sebastian Andrzej Siewior +{ + struct uart_port *port = &up->port; + unsigned int flags; -+ bool locked; ++ unsigned int ier; + + console_atomic_lock(&flags); + -+ /* -+ * If possible, keep any other CPUs from working with the -+ * UART until the atomic message is completed. This helps -+ * to keep the output more orderly. -+ */ -+ locked = spin_trylock(&port->lock); -+ + touch_nmi_watchdog(); + -+ clear_ier(up); ++ ier = serial8250_clear_IER(up); + + if (atomic_fetch_inc(&up->console_printing)) { + uart_console_write(port, "\n", 1, @@ -368,10 +369,7 @@ Signed-off-by: Sebastian Andrzej Siewior + atomic_dec(&up->console_printing); + + wait_for_xmitr(up, BOTH_EMPTY); -+ restore_ier(up); -+ -+ if (locked) -+ spin_unlock(&port->lock); ++ serial8250_set_IER(up, ier); + + console_atomic_unlock(flags); +} @@ -379,18 +377,14 @@ Signed-off-by: Sebastian Andrzej Siewior /* * Print a string to the serial port trying not to disturb * any possible real use of the port... -@@ -3147,27 +3233,13 @@ void serial8250_console_write(struct uar - { +@@ -3282,24 +3313,12 @@ void serial8250_console_write(struct uar struct uart_port *port = &up->port; unsigned long flags; -- unsigned int ier; + unsigned int ier; - int locked = 1; touch_nmi_watchdog(); - serial8250_rpm_get(up); -+ spin_lock_irqsave(&port->lock, flags); - - if (oops_in_progress) - locked = spin_trylock_irqsave(&port->lock, flags); - else @@ -400,17 +394,18 @@ Signed-off-by: Sebastian Andrzej Siewior - * First save the IER then disable the interrupts - */ - ier = serial_port_in(port, UART_IER); -- ++ spin_lock_irqsave(&port->lock, flags); + - if (up->capabilities & UART_CAP_UUE) - serial_port_out(port, UART_IER, UART_IER_UUE); - else - serial_port_out(port, UART_IER, 0); -+ clear_ier(up); ++ ier = serial8250_clear_IER(up); /* check scratch reg to see if port powered off during system sleep */ if (up->canary && (up->canary != serial_port_in(port, UART_SCR))) { -@@ -3175,14 +3247,16 @@ void serial8250_console_write(struct uar - up->canary = 0; +@@ -3313,7 +3332,9 @@ void serial8250_console_write(struct uar + mdelay(port->rs485.delay_rts_before_send); } + atomic_inc(&up->console_printing); @@ -419,25 +414,27 @@ Signed-off-by: Sebastian Andrzej Siewior /* * Finally, wait for transmitter to become empty - * and restore the IER - */ - wait_for_xmitr(up, BOTH_EMPTY); +@@ -3326,8 +3347,7 @@ void serial8250_console_write(struct uar + if (em485->tx_stopped) + up->rs485_stop_tx(up); + } +- - serial_port_out(port, UART_IER, ier); -+ restore_ier(up); ++ serial8250_set_IER(up, ier); /* * The receive handling will happen properly because the -@@ -3194,8 +3268,7 @@ void serial8250_console_write(struct uar +@@ -3339,8 +3359,7 @@ void serial8250_console_write(struct uar if (up->msr_saved_flags) serial8250_modem_status(up); - if (locked) - spin_unlock_irqrestore(&port->lock, flags); + spin_unlock_irqrestore(&port->lock, flags); - serial8250_rpm_put(up); } -@@ -3216,6 +3289,7 @@ static unsigned int probe_baud(struct ua + static unsigned int probe_baud(struct uart_port *port) +@@ -3360,6 +3379,7 @@ static unsigned int probe_baud(struct ua int serial8250_console_setup(struct uart_port *port, char *options, bool probe) { @@ -445,7 +442,7 @@ Signed-off-by: Sebastian Andrzej Siewior int baud = 9600; int bits = 8; int parity = 'n'; -@@ -3224,6 +3298,8 @@ int serial8250_console_setup(struct uart +@@ -3369,6 +3389,8 @@ int serial8250_console_setup(struct uart if (!port->iobase && !port->membase) return -ENODEV; @@ -464,7 +461,7 @@ Signed-off-by: Sebastian Andrzej Siewior #include #include #include -@@ -123,6 +124,8 @@ struct uart_8250_port { +@@ -125,6 +126,8 @@ struct uart_8250_port { #define MSR_SAVE_FLAGS UART_MSR_ANY_DELTA unsigned char msr_saved_flags; @@ -473,12 +470,12 @@ Signed-off-by: Sebastian Andrzej Siewior struct uart_8250_dma *dma; const struct uart_8250_ops *ops; -@@ -174,6 +177,8 @@ void serial8250_init_port(struct uart_82 +@@ -180,6 +183,8 @@ void serial8250_init_port(struct uart_82 void serial8250_set_defaults(struct uart_8250_port *up); void serial8250_console_write(struct uart_8250_port *up, const char *s, unsigned int count); +void serial8250_console_write_atomic(struct uart_8250_port *up, const char *s, + unsigned int count); int serial8250_console_setup(struct uart_port *port, char *options, bool probe); + int serial8250_console_exit(struct uart_port *port); - extern void serial8250_set_isa_configurator(void (*v) diff --git a/kernel/patches-5.11.x-rt/0040-0021-printk-relocate-printk_delay-and-vprintk_default.patch b/kernel/patches-5.11.x-rt/0040-0021-printk-relocate-printk_delay-and-vprintk_default.patch new file mode 100644 index 000000000..b7818b0f9 --- /dev/null +++ b/kernel/patches-5.11.x-rt/0040-0021-printk-relocate-printk_delay-and-vprintk_default.patch @@ -0,0 +1,82 @@ +From: John Ogness +Date: Mon, 30 Nov 2020 01:42:03 +0106 +Subject: [PATCH 21/28] printk: relocate printk_delay() and vprintk_default() + +Move printk_delay() and vprintk_default() "as is" further up so that +they can be used by new functions in an upcoming commit. + +Signed-off-by: John Ogness +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/printk/printk.c | 40 ++++++++++++++++++++-------------------- + 1 file changed, 20 insertions(+), 20 deletions(-) + +--- a/kernel/printk/printk.c ++++ b/kernel/printk/printk.c +@@ -1725,6 +1725,20 @@ SYSCALL_DEFINE3(syslog, int, type, char + return do_syslog(type, buf, len, SYSLOG_FROM_READER); + } + ++int printk_delay_msec __read_mostly; ++ ++static inline void printk_delay(void) ++{ ++ if (unlikely(printk_delay_msec)) { ++ int m = printk_delay_msec; ++ ++ while (m--) { ++ mdelay(1); ++ touch_nmi_watchdog(); ++ } ++ } ++} ++ + /* + * Special console_lock variants that help to reduce the risk of soft-lockups. + * They allow to pass console_lock to another printk() call using a busy wait. +@@ -1968,20 +1982,6 @@ static void printk_exit_irqrestore(unsig + local_irq_restore(flags); + } + +-int printk_delay_msec __read_mostly; +- +-static inline void printk_delay(void) +-{ +- if (unlikely(printk_delay_msec)) { +- int m = printk_delay_msec; +- +- while (m--) { +- mdelay(1); +- touch_nmi_watchdog(); +- } +- } +-} +- + static inline u32 printk_caller_id(void) + { + return in_task() ? task_pid_nr(current) : +@@ -2214,18 +2214,18 @@ asmlinkage int vprintk_emit(int facility + } + EXPORT_SYMBOL(vprintk_emit); + +-asmlinkage int vprintk(const char *fmt, va_list args) +-{ +- return vprintk_func(fmt, args); +-} +-EXPORT_SYMBOL(vprintk); +- + int vprintk_default(const char *fmt, va_list args) + { + return vprintk_emit(0, LOGLEVEL_DEFAULT, NULL, fmt, args); + } + EXPORT_SYMBOL_GPL(vprintk_default); + ++asmlinkage int vprintk(const char *fmt, va_list args) ++{ ++ return vprintk_func(fmt, args); ++} ++EXPORT_SYMBOL(vprintk); ++ + /** + * printk - print a kernel message + * @fmt: format string diff --git a/kernel/patches-5.11.x-rt/0041-0022-printk-combine-boot_delay_msec-into-printk_delay.patch b/kernel/patches-5.11.x-rt/0041-0022-printk-combine-boot_delay_msec-into-printk_delay.patch new file mode 100644 index 000000000..ae89bd228 --- /dev/null +++ b/kernel/patches-5.11.x-rt/0041-0022-printk-combine-boot_delay_msec-into-printk_delay.patch @@ -0,0 +1,37 @@ +From: John Ogness +Date: Mon, 30 Nov 2020 01:42:04 +0106 +Subject: [PATCH 22/28] printk: combine boot_delay_msec() into printk_delay() + +boot_delay_msec() is always called immediately before printk_delay() +so just combine the two. + +Signed-off-by: John Ogness +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/printk/printk.c | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +--- a/kernel/printk/printk.c ++++ b/kernel/printk/printk.c +@@ -1727,8 +1727,10 @@ SYSCALL_DEFINE3(syslog, int, type, char + + int printk_delay_msec __read_mostly; + +-static inline void printk_delay(void) ++static inline void printk_delay(int level) + { ++ boot_delay_msec(level); ++ + if (unlikely(printk_delay_msec)) { + int m = printk_delay_msec; + +@@ -2186,8 +2188,7 @@ asmlinkage int vprintk_emit(int facility + in_sched = true; + } + +- boot_delay_msec(level); +- printk_delay(); ++ printk_delay(level); + + printed_len = vprintk_store(facility, level, dev_info, fmt, args); + diff --git a/kernel/patches-5.11.x-rt/0042-0023-printk-change-console_seq-to-atomic64_t.patch b/kernel/patches-5.11.x-rt/0042-0023-printk-change-console_seq-to-atomic64_t.patch new file mode 100644 index 000000000..1e99c3a73 --- /dev/null +++ b/kernel/patches-5.11.x-rt/0042-0023-printk-change-console_seq-to-atomic64_t.patch @@ -0,0 +1,125 @@ +From: John Ogness +Date: Mon, 30 Nov 2020 01:42:05 +0106 +Subject: [PATCH 23/28] printk: change @console_seq to atomic64_t + +In preparation for atomic printing, change @console_seq to atomic +so that it can be accessed without requiring @console_sem. + +Signed-off-by: John Ogness +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/printk/printk.c | 34 +++++++++++++++++++--------------- + 1 file changed, 19 insertions(+), 15 deletions(-) + +--- a/kernel/printk/printk.c ++++ b/kernel/printk/printk.c +@@ -366,12 +366,13 @@ static u64 syslog_seq; + static size_t syslog_partial; + static bool syslog_time; + +-/* All 3 protected by @console_sem. */ +-/* the next printk record to write to the console */ +-static u64 console_seq; ++/* Both protected by @console_sem. */ + static u64 exclusive_console_stop_seq; + static unsigned long console_dropped; + ++/* the next printk record to write to the console */ ++static atomic64_t console_seq = ATOMIC64_INIT(0); ++ + struct latched_seq { + seqcount_latch_t latch; + u64 val[2]; +@@ -2270,7 +2271,7 @@ EXPORT_SYMBOL(printk); + #define prb_first_valid_seq(rb) 0 + + static u64 syslog_seq; +-static u64 console_seq; ++static atomic64_t console_seq = ATOMIC64_INIT(0); + static u64 exclusive_console_stop_seq; + static unsigned long console_dropped; + +@@ -2585,6 +2586,7 @@ void console_unlock(void) + bool do_cond_resched, retry; + struct printk_info info; + struct printk_record r; ++ u64 seq; + + if (console_suspended) { + up_console_sem(); +@@ -2627,12 +2629,14 @@ void console_unlock(void) + size_t len; + + skip: +- if (!prb_read_valid(prb, console_seq, &r)) ++ seq = atomic64_read(&console_seq); ++ if (!prb_read_valid(prb, seq, &r)) + break; + +- if (console_seq != r.info->seq) { +- console_dropped += r.info->seq - console_seq; +- console_seq = r.info->seq; ++ if (seq != r.info->seq) { ++ console_dropped += r.info->seq - seq; ++ atomic64_set(&console_seq, r.info->seq); ++ seq = r.info->seq; + } + + if (suppress_message_printing(r.info->level)) { +@@ -2641,13 +2645,13 @@ void console_unlock(void) + * directly to the console when we received it, and + * record that has level above the console loglevel. + */ +- console_seq++; ++ atomic64_set(&console_seq, seq + 1); + goto skip; + } + + /* Output to all consoles once old messages replayed. */ + if (unlikely(exclusive_console && +- console_seq >= exclusive_console_stop_seq)) { ++ seq >= exclusive_console_stop_seq)) { + exclusive_console = NULL; + } + +@@ -2668,7 +2672,7 @@ void console_unlock(void) + len = record_print_text(&r, + console_msg_format & MSG_FORMAT_SYSLOG, + printk_time); +- console_seq++; ++ atomic64_set(&console_seq, seq + 1); + + /* + * While actively printing out messages, if another printk() +@@ -2699,7 +2703,7 @@ void console_unlock(void) + * there's a new owner and the console_unlock() from them will do the + * flush, no worries. + */ +- retry = prb_read_valid(prb, console_seq, NULL); ++ retry = prb_read_valid(prb, atomic64_read(&console_seq), NULL); + if (retry && console_trylock()) + goto again; + } +@@ -2762,7 +2766,7 @@ void console_flush_on_panic(enum con_flu + console_may_schedule = 0; + + if (mode == CONSOLE_REPLAY_ALL) +- console_seq = prb_first_valid_seq(prb); ++ atomic64_set(&console_seq, prb_first_valid_seq(prb)); + console_unlock(); + } + +@@ -2999,11 +3003,11 @@ void register_console(struct console *ne + * ignores console_lock. + */ + exclusive_console = newcon; +- exclusive_console_stop_seq = console_seq; ++ exclusive_console_stop_seq = atomic64_read(&console_seq); + + /* Get a consistent copy of @syslog_seq. */ + spin_lock_irqsave(&syslog_lock, flags); +- console_seq = syslog_seq; ++ atomic64_set(&console_seq, syslog_seq); + spin_unlock_irqrestore(&syslog_lock, flags); + } + console_unlock(); diff --git a/kernel/patches-5.11.x-rt/0043-0024-printk-introduce-kernel-sync-mode.patch b/kernel/patches-5.11.x-rt/0043-0024-printk-introduce-kernel-sync-mode.patch new file mode 100644 index 000000000..6adbd65db --- /dev/null +++ b/kernel/patches-5.11.x-rt/0043-0024-printk-introduce-kernel-sync-mode.patch @@ -0,0 +1,298 @@ +From: John Ogness +Date: Mon, 30 Nov 2020 01:42:06 +0106 +Subject: [PATCH 24/28] printk: introduce kernel sync mode + +When the kernel performs an OOPS, enter into "sync mode": + +- only atomic consoles (write_atomic() callback) will print +- printing occurs within vprintk_store() instead of console_unlock() + +CONSOLE_LOG_MAX is moved to printk.h to support the per-console +buffer used in sync mode. + +Signed-off-by: John Ogness +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/console.h | 4 + + include/linux/printk.h | 6 ++ + kernel/printk/printk.c | 133 +++++++++++++++++++++++++++++++++++++++++++++--- + 3 files changed, 137 insertions(+), 6 deletions(-) + +--- a/include/linux/console.h ++++ b/include/linux/console.h +@@ -16,6 +16,7 @@ + + #include + #include ++#include + + struct vc_data; + struct console_font_op; +@@ -150,6 +151,9 @@ struct console { + short flags; + short index; + int cflag; ++#ifdef CONFIG_PRINTK ++ char sync_buf[CONSOLE_LOG_MAX]; ++#endif + void *data; + struct console *next; + }; +--- a/include/linux/printk.h ++++ b/include/linux/printk.h +@@ -46,6 +46,12 @@ static inline const char *printk_skip_he + + #define CONSOLE_EXT_LOG_MAX 8192 + ++/* ++ * The maximum size of a record formatted for console printing ++ * (i.e. with the prefix prepended to every line). ++ */ ++#define CONSOLE_LOG_MAX 4096 ++ + /* printk's without a loglevel use this.. */ + #define MESSAGE_LOGLEVEL_DEFAULT CONFIG_MESSAGE_LOGLEVEL_DEFAULT + +--- a/kernel/printk/printk.c ++++ b/kernel/printk/printk.c +@@ -44,6 +44,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -359,6 +360,9 @@ enum log_flags { + static DEFINE_SPINLOCK(syslog_lock); + + #ifdef CONFIG_PRINTK ++/* Set to enable sync mode. Once set, it is never cleared. */ ++static bool sync_mode; ++ + DECLARE_WAIT_QUEUE_HEAD(log_wait); + /* All 3 protected by @syslog_lock. */ + /* the next printk record to read by syslog(READ) or /proc/kmsg */ +@@ -398,9 +402,6 @@ static struct latched_seq clear_seq = { + /* the maximum size allowed to be reserved for a record */ + #define LOG_LINE_MAX (1024 - PREFIX_MAX) + +-/* the maximum size of a formatted record (i.e. with prefix added per line) */ +-#define CONSOLE_LOG_MAX 4096 +- + #define LOG_LEVEL(v) ((v) & 0x07) + #define LOG_FACILITY(v) ((v) >> 3 & 0xff) + +@@ -1742,6 +1743,91 @@ static inline void printk_delay(int leve + } + } + ++static bool kernel_sync_mode(void) ++{ ++ return (oops_in_progress || sync_mode); ++} ++ ++static bool console_can_sync(struct console *con) ++{ ++ if (!(con->flags & CON_ENABLED)) ++ return false; ++ if (con->write_atomic && kernel_sync_mode()) ++ return true; ++ return false; ++} ++ ++static bool call_sync_console_driver(struct console *con, const char *text, size_t text_len) ++{ ++ if (!(con->flags & CON_ENABLED)) ++ return false; ++ if (con->write_atomic && kernel_sync_mode()) ++ con->write_atomic(con, text, text_len); ++ else ++ return false; ++ ++ return true; ++} ++ ++static bool have_atomic_console(void) ++{ ++ struct console *con; ++ ++ for_each_console(con) { ++ if (!(con->flags & CON_ENABLED)) ++ continue; ++ if (con->write_atomic) ++ return true; ++ } ++ return false; ++} ++ ++static bool print_sync(struct console *con, u64 *seq) ++{ ++ struct printk_info info; ++ struct printk_record r; ++ size_t text_len; ++ ++ prb_rec_init_rd(&r, &info, &con->sync_buf[0], sizeof(con->sync_buf)); ++ ++ if (!prb_read_valid(prb, *seq, &r)) ++ return false; ++ ++ text_len = record_print_text(&r, console_msg_format & MSG_FORMAT_SYSLOG, printk_time); ++ ++ if (!call_sync_console_driver(con, &con->sync_buf[0], text_len)) ++ return false; ++ ++ *seq = r.info->seq; ++ ++ touch_softlockup_watchdog_sync(); ++ clocksource_touch_watchdog(); ++ rcu_cpu_stall_reset(); ++ touch_nmi_watchdog(); ++ ++ if (text_len) ++ printk_delay(r.info->level); ++ ++ return true; ++} ++ ++static void print_sync_until(struct console *con, u64 seq) ++{ ++ unsigned int flags; ++ u64 printk_seq; ++ ++ console_atomic_lock(&flags); ++ for (;;) { ++ printk_seq = atomic64_read(&console_seq); ++ if (printk_seq >= seq) ++ break; ++ if (!print_sync(con, &printk_seq)) ++ break; ++ atomic64_set(&console_seq, printk_seq + 1); ++ } ++ console_atomic_unlock(flags); ++} ++ + /* + * Special console_lock variants that help to reduce the risk of soft-lockups. + * They allow to pass console_lock to another printk() call using a busy wait. +@@ -1916,6 +2002,8 @@ static void call_console_drivers(const c + if (!cpu_online(smp_processor_id()) && + !(con->flags & CON_ANYTIME)) + continue; ++ if (kernel_sync_mode()) ++ continue; + if (con->flags & CON_EXTENDED) + con->write(con, ext_text, ext_len); + else { +@@ -2070,6 +2158,7 @@ int vprintk_store(int facility, int leve + const u32 caller_id = printk_caller_id(); + struct prb_reserved_entry e; + enum log_flags lflags = 0; ++ bool final_commit = false; + struct printk_record r; + unsigned long irqflags; + u16 trunc_msg_len = 0; +@@ -2079,6 +2168,7 @@ int vprintk_store(int facility, int leve + u16 text_len; + int ret = 0; + u64 ts_nsec; ++ u64 seq; + + /* + * Since the duration of printk() can vary depending on the message +@@ -2117,6 +2207,7 @@ int vprintk_store(int facility, int leve + if (lflags & LOG_CONT) { + prb_rec_init_wr(&r, reserve_size); + if (prb_reserve_in_last(&e, prb, &r, caller_id, LOG_LINE_MAX)) { ++ seq = r.info->seq; + text_len = printk_sprint(&r.text_buf[r.info->text_len], reserve_size, + facility, &lflags, fmt, args); + r.info->text_len += text_len; +@@ -2124,6 +2215,7 @@ int vprintk_store(int facility, int leve + if (lflags & LOG_NEWLINE) { + r.info->flags |= LOG_NEWLINE; + prb_final_commit(&e); ++ final_commit = true; + } else { + prb_commit(&e); + } +@@ -2148,6 +2240,8 @@ int vprintk_store(int facility, int leve + goto out; + } + ++ seq = r.info->seq; ++ + /* fill message */ + text_len = printk_sprint(&r.text_buf[0], reserve_size, facility, &lflags, fmt, args); + if (trunc_msg_len) +@@ -2162,13 +2256,25 @@ int vprintk_store(int facility, int leve + memcpy(&r.info->dev_info, dev_info, sizeof(r.info->dev_info)); + + /* A message without a trailing newline can be continued. */ +- if (!(lflags & LOG_NEWLINE)) ++ if (!(lflags & LOG_NEWLINE)) { + prb_commit(&e); +- else ++ } else { + prb_final_commit(&e); ++ final_commit = true; ++ } + + ret = text_len + trunc_msg_len; + out: ++ /* only the kernel may perform synchronous printing */ ++ if (facility == 0 && final_commit) { ++ struct console *con; ++ ++ for_each_console(con) { ++ if (console_can_sync(con)) ++ print_sync_until(con, seq + 1); ++ } ++ } ++ + printk_exit_irqrestore(irqflags); + return ret; + } +@@ -2264,12 +2370,13 @@ EXPORT_SYMBOL(printk); + + #else /* CONFIG_PRINTK */ + +-#define CONSOLE_LOG_MAX 0 + #define printk_time false + + #define prb_read_valid(rb, seq, r) false + #define prb_first_valid_seq(rb) 0 + ++#define kernel_sync_mode() false ++ + static u64 syslog_seq; + static atomic64_t console_seq = ATOMIC64_INIT(0); + static u64 exclusive_console_stop_seq; +@@ -2562,6 +2669,8 @@ static int have_callable_console(void) + */ + static inline int can_use_console(void) + { ++ if (kernel_sync_mode()) ++ return false; + return cpu_online(raw_smp_processor_id()) || have_callable_console(); + } + +@@ -3374,6 +3483,18 @@ void kmsg_dump(enum kmsg_dump_reason rea + struct kmsg_dumper_iter iter; + struct kmsg_dumper *dumper; + ++ if (!oops_in_progress) { ++ /* ++ * If atomic consoles are available, activate kernel sync mode ++ * to make sure any final messages are visible. The trailing ++ * printk message is important to flush any pending messages. ++ */ ++ if (have_atomic_console()) { ++ sync_mode = true; ++ pr_info("enabled sync mode\n"); ++ } ++ } ++ + rcu_read_lock(); + list_for_each_entry_rcu(dumper, &dump_list, list) { + enum kmsg_dump_reason max_reason = dumper->max_reason; diff --git a/kernel/patches-5.11.x-rt/0044-0025-printk-move-console-printing-to-kthreads.patch b/kernel/patches-5.11.x-rt/0044-0025-printk-move-console-printing-to-kthreads.patch new file mode 100644 index 000000000..29db3816e --- /dev/null +++ b/kernel/patches-5.11.x-rt/0044-0025-printk-move-console-printing-to-kthreads.patch @@ -0,0 +1,838 @@ +From: John Ogness +Date: Mon, 30 Nov 2020 01:42:07 +0106 +Subject: [PATCH 25/28] printk: move console printing to kthreads + +Create a kthread for each console to perform console printing. Now +all console printing is fully asynchronous except for the boot +console and when the kernel enters sync mode (and there are atomic +consoles available). + +The console_lock() and console_unlock() functions now only do what +their name says... locking and unlocking of the console. + +Signed-off-by: John Ogness +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/console.h | 2 + kernel/printk/printk.c | 625 ++++++++++++++---------------------------------- + 2 files changed, 186 insertions(+), 441 deletions(-) + +--- a/include/linux/console.h ++++ b/include/linux/console.h +@@ -154,6 +154,8 @@ struct console { + #ifdef CONFIG_PRINTK + char sync_buf[CONSOLE_LOG_MAX]; + #endif ++ atomic64_t printk_seq; ++ struct task_struct *thread; + void *data; + struct console *next; + }; +--- a/kernel/printk/printk.c ++++ b/kernel/printk/printk.c +@@ -44,6 +44,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -268,11 +269,6 @@ static void __up_console_sem(unsigned lo + static int console_locked, console_suspended; + + /* +- * If exclusive_console is non-NULL then only this console is to be printed to. +- */ +-static struct console *exclusive_console; +- +-/* + * Array of consoles built from command line options (console=) + */ + +@@ -356,10 +352,10 @@ enum log_flags { + LOG_CONT = 8, /* text is a fragment of a continuation line */ + }; + ++#ifdef CONFIG_PRINTK + /* syslog_lock protects syslog_* variables and write access to clear_seq. */ + static DEFINE_SPINLOCK(syslog_lock); + +-#ifdef CONFIG_PRINTK + /* Set to enable sync mode. Once set, it is never cleared. */ + static bool sync_mode; + +@@ -370,13 +366,6 @@ static u64 syslog_seq; + static size_t syslog_partial; + static bool syslog_time; + +-/* Both protected by @console_sem. */ +-static u64 exclusive_console_stop_seq; +-static unsigned long console_dropped; +- +-/* the next printk record to write to the console */ +-static atomic64_t console_seq = ATOMIC64_INIT(0); +- + struct latched_seq { + seqcount_latch_t latch; + u64 val[2]; +@@ -1754,6 +1743,8 @@ static bool console_can_sync(struct cons + return false; + if (con->write_atomic && kernel_sync_mode()) + return true; ++ if (con->write && (con->flags & CON_BOOT) && !con->thread) ++ return true; + return false; + } + +@@ -1763,6 +1754,8 @@ static bool call_sync_console_driver(str + return false; + if (con->write_atomic && kernel_sync_mode()) + con->write_atomic(con, text, text_len); ++ else if (con->write && (con->flags & CON_BOOT) && !con->thread) ++ con->write(con, text, text_len); + else + return false; + +@@ -1818,202 +1811,16 @@ static void print_sync_until(struct cons + + console_atomic_lock(&flags); + for (;;) { +- printk_seq = atomic64_read(&console_seq); ++ printk_seq = atomic64_read(&con->printk_seq); + if (printk_seq >= seq) + break; + if (!print_sync(con, &printk_seq)) + break; +- atomic64_set(&console_seq, printk_seq + 1); ++ atomic64_set(&con->printk_seq, printk_seq + 1); + } + console_atomic_unlock(flags); + } + +-/* +- * Special console_lock variants that help to reduce the risk of soft-lockups. +- * They allow to pass console_lock to another printk() call using a busy wait. +- */ +- +-#ifdef CONFIG_LOCKDEP +-static struct lockdep_map console_owner_dep_map = { +- .name = "console_owner" +-}; +-#endif +- +-static DEFINE_RAW_SPINLOCK(console_owner_lock); +-static struct task_struct *console_owner; +-static bool console_waiter; +- +-/** +- * console_lock_spinning_enable - mark beginning of code where another +- * thread might safely busy wait +- * +- * This basically converts console_lock into a spinlock. This marks +- * the section where the console_lock owner can not sleep, because +- * there may be a waiter spinning (like a spinlock). Also it must be +- * ready to hand over the lock at the end of the section. +- */ +-static void console_lock_spinning_enable(void) +-{ +- raw_spin_lock(&console_owner_lock); +- console_owner = current; +- raw_spin_unlock(&console_owner_lock); +- +- /* The waiter may spin on us after setting console_owner */ +- spin_acquire(&console_owner_dep_map, 0, 0, _THIS_IP_); +-} +- +-/** +- * console_lock_spinning_disable_and_check - mark end of code where another +- * thread was able to busy wait and check if there is a waiter +- * +- * This is called at the end of the section where spinning is allowed. +- * It has two functions. First, it is a signal that it is no longer +- * safe to start busy waiting for the lock. Second, it checks if +- * there is a busy waiter and passes the lock rights to her. +- * +- * Important: Callers lose the lock if there was a busy waiter. +- * They must not touch items synchronized by console_lock +- * in this case. +- * +- * Return: 1 if the lock rights were passed, 0 otherwise. +- */ +-static int console_lock_spinning_disable_and_check(void) +-{ +- int waiter; +- +- raw_spin_lock(&console_owner_lock); +- waiter = READ_ONCE(console_waiter); +- console_owner = NULL; +- raw_spin_unlock(&console_owner_lock); +- +- if (!waiter) { +- spin_release(&console_owner_dep_map, _THIS_IP_); +- return 0; +- } +- +- /* The waiter is now free to continue */ +- WRITE_ONCE(console_waiter, false); +- +- spin_release(&console_owner_dep_map, _THIS_IP_); +- +- /* +- * Hand off console_lock to waiter. The waiter will perform +- * the up(). After this, the waiter is the console_lock owner. +- */ +- mutex_release(&console_lock_dep_map, _THIS_IP_); +- return 1; +-} +- +-/** +- * console_trylock_spinning - try to get console_lock by busy waiting +- * +- * This allows to busy wait for the console_lock when the current +- * owner is running in specially marked sections. It means that +- * the current owner is running and cannot reschedule until it +- * is ready to lose the lock. +- * +- * Return: 1 if we got the lock, 0 othrewise +- */ +-static int console_trylock_spinning(void) +-{ +- struct task_struct *owner = NULL; +- bool waiter; +- bool spin = false; +- unsigned long flags; +- +- if (console_trylock()) +- return 1; +- +- printk_safe_enter_irqsave(flags); +- +- raw_spin_lock(&console_owner_lock); +- owner = READ_ONCE(console_owner); +- waiter = READ_ONCE(console_waiter); +- if (!waiter && owner && owner != current) { +- WRITE_ONCE(console_waiter, true); +- spin = true; +- } +- raw_spin_unlock(&console_owner_lock); +- +- /* +- * If there is an active printk() writing to the +- * consoles, instead of having it write our data too, +- * see if we can offload that load from the active +- * printer, and do some printing ourselves. +- * Go into a spin only if there isn't already a waiter +- * spinning, and there is an active printer, and +- * that active printer isn't us (recursive printk?). +- */ +- if (!spin) { +- printk_safe_exit_irqrestore(flags); +- return 0; +- } +- +- /* We spin waiting for the owner to release us */ +- spin_acquire(&console_owner_dep_map, 0, 0, _THIS_IP_); +- /* Owner will clear console_waiter on hand off */ +- while (READ_ONCE(console_waiter)) +- cpu_relax(); +- spin_release(&console_owner_dep_map, _THIS_IP_); +- +- printk_safe_exit_irqrestore(flags); +- /* +- * The owner passed the console lock to us. +- * Since we did not spin on console lock, annotate +- * this as a trylock. Otherwise lockdep will +- * complain. +- */ +- mutex_acquire(&console_lock_dep_map, 0, 1, _THIS_IP_); +- +- return 1; +-} +- +-/* +- * Call the console drivers, asking them to write out +- * log_buf[start] to log_buf[end - 1]. +- * The console_lock must be held. +- */ +-static void call_console_drivers(const char *ext_text, size_t ext_len, +- const char *text, size_t len) +-{ +- static char dropped_text[64]; +- size_t dropped_len = 0; +- struct console *con; +- +- trace_console_rcuidle(text, len); +- +- if (!console_drivers) +- return; +- +- if (console_dropped) { +- dropped_len = snprintf(dropped_text, sizeof(dropped_text), +- "** %lu printk messages dropped **\n", +- console_dropped); +- console_dropped = 0; +- } +- +- for_each_console(con) { +- if (exclusive_console && con != exclusive_console) +- continue; +- if (!(con->flags & CON_ENABLED)) +- continue; +- if (!con->write) +- continue; +- if (!cpu_online(smp_processor_id()) && +- !(con->flags & CON_ANYTIME)) +- continue; +- if (kernel_sync_mode()) +- continue; +- if (con->flags & CON_EXTENDED) +- con->write(con, ext_text, ext_len); +- else { +- if (dropped_len) +- con->write(con, dropped_text, dropped_len); +- con->write(con, text, len); +- } +- } +-} +- + #ifdef CONFIG_PRINTK_NMI + #define NUM_RECURSION_CTX 2 + #else +@@ -2284,39 +2091,16 @@ asmlinkage int vprintk_emit(int facility + const char *fmt, va_list args) + { + int printed_len; +- bool in_sched = false; + + /* Suppress unimportant messages after panic happens */ + if (unlikely(suppress_printk)) + return 0; + +- if (level == LOGLEVEL_SCHED) { ++ if (level == LOGLEVEL_SCHED) + level = LOGLEVEL_DEFAULT; +- in_sched = true; +- } +- +- printk_delay(level); + + printed_len = vprintk_store(facility, level, dev_info, fmt, args); + +- /* If called from the scheduler, we can not call up(). */ +- if (!in_sched) { +- /* +- * Disable preemption to avoid being preempted while holding +- * console_sem which would prevent anyone from printing to +- * console +- */ +- preempt_disable(); +- /* +- * Try to acquire and then immediately release the console +- * semaphore. The release will print out buffers and wake up +- * /dev/kmsg and syslog() users. +- */ +- if (console_trylock_spinning()) +- console_unlock(); +- preempt_enable(); +- } +- + wake_up_klogd(); + return printed_len; + } +@@ -2368,38 +2152,158 @@ asmlinkage __visible int printk(const ch + } + EXPORT_SYMBOL(printk); + +-#else /* CONFIG_PRINTK */ ++static int printk_kthread_func(void *data) ++{ ++ struct console *con = data; ++ unsigned long dropped = 0; ++ char *dropped_text = NULL; ++ struct printk_info info; ++ struct printk_record r; ++ char *ext_text = NULL; ++ size_t dropped_len; ++ int ret = -ENOMEM; ++ char *text = NULL; ++ char *write_text; ++ u64 printk_seq; ++ size_t len; ++ int error; ++ u64 seq; + +-#define printk_time false ++ if (con->flags & CON_EXTENDED) { ++ ext_text = kmalloc(CONSOLE_EXT_LOG_MAX, GFP_KERNEL); ++ if (!ext_text) ++ goto out; ++ } ++ text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL); ++ dropped_text = kmalloc(64, GFP_KERNEL); ++ if (!text || !dropped_text) ++ goto out; + +-#define prb_read_valid(rb, seq, r) false +-#define prb_first_valid_seq(rb) 0 ++ if (con->flags & CON_EXTENDED) ++ write_text = ext_text; ++ else ++ write_text = text; + +-#define kernel_sync_mode() false ++ seq = atomic64_read(&con->printk_seq); + +-static u64 syslog_seq; +-static atomic64_t console_seq = ATOMIC64_INIT(0); +-static u64 exclusive_console_stop_seq; +-static unsigned long console_dropped; ++ prb_rec_init_rd(&r, &info, text, LOG_LINE_MAX + PREFIX_MAX); ++ ++ for (;;) { ++ error = wait_event_interruptible(log_wait, ++ prb_read_valid(prb, seq, &r) || kthread_should_stop()); ++ ++ if (kthread_should_stop()) ++ break; ++ ++ if (error) ++ continue; ++ ++ if (seq != r.info->seq) { ++ dropped += r.info->seq - seq; ++ seq = r.info->seq; ++ } ++ ++ seq++; ++ ++ if (!(con->flags & CON_ENABLED)) ++ continue; ++ ++ if (suppress_message_printing(r.info->level)) ++ continue; ++ ++ if (con->flags & CON_EXTENDED) { ++ len = info_print_ext_header(ext_text, ++ CONSOLE_EXT_LOG_MAX, ++ r.info); ++ len += msg_print_ext_body(ext_text + len, ++ CONSOLE_EXT_LOG_MAX - len, ++ &r.text_buf[0], r.info->text_len, ++ &r.info->dev_info); ++ } else { ++ len = record_print_text(&r, ++ console_msg_format & MSG_FORMAT_SYSLOG, ++ printk_time); ++ } ++ ++ printk_seq = atomic64_read(&con->printk_seq); ++ ++ console_lock(); ++ console_may_schedule = 0; + +-static size_t record_print_text(const struct printk_record *r, +- bool syslog, bool time) ++ if (kernel_sync_mode() && con->write_atomic) { ++ console_unlock(); ++ break; ++ } ++ ++ if (!(con->flags & CON_EXTENDED) && dropped) { ++ dropped_len = snprintf(dropped_text, 64, ++ "** %lu printk messages dropped **\n", ++ dropped); ++ dropped = 0; ++ ++ con->write(con, dropped_text, dropped_len); ++ printk_delay(r.info->level); ++ } ++ ++ con->write(con, write_text, len); ++ if (len) ++ printk_delay(r.info->level); ++ ++ atomic64_cmpxchg_relaxed(&con->printk_seq, printk_seq, seq); ++ ++ console_unlock(); ++ } ++out: ++ kfree(dropped_text); ++ kfree(text); ++ kfree(ext_text); ++ pr_info("%sconsole [%s%d]: printing thread stopped\n", ++ (con->flags & CON_BOOT) ? "boot" : "", ++ con->name, con->index); ++ return ret; ++} ++ ++/* Must be called within console_lock(). */ ++static void start_printk_kthread(struct console *con) + { +- return 0; ++ con->thread = kthread_run(printk_kthread_func, con, ++ "pr/%s%d", con->name, con->index); ++ if (IS_ERR(con->thread)) { ++ pr_err("%sconsole [%s%d]: unable to start printing thread\n", ++ (con->flags & CON_BOOT) ? "boot" : "", ++ con->name, con->index); ++ return; ++ } ++ pr_info("%sconsole [%s%d]: printing thread started\n", ++ (con->flags & CON_BOOT) ? "boot" : "", ++ con->name, con->index); + } +-static ssize_t info_print_ext_header(char *buf, size_t size, +- struct printk_info *info) ++ ++/* protected by console_lock */ ++static bool kthreads_started; ++ ++/* Must be called within console_lock(). */ ++static void console_try_thread(struct console *con) + { +- return 0; ++ if (kthreads_started) { ++ start_printk_kthread(con); ++ return; ++ } ++ ++ /* ++ * The printing threads have not been started yet. If this console ++ * can print synchronously, print all unprinted messages. ++ */ ++ if (console_can_sync(con)) ++ print_sync_until(con, prb_next_seq(prb)); + } +-static ssize_t msg_print_ext_body(char *buf, size_t size, +- char *text, size_t text_len, +- struct dev_printk_info *dev_info) { return 0; } +-static void console_lock_spinning_enable(void) { } +-static int console_lock_spinning_disable_and_check(void) { return 0; } +-static void call_console_drivers(const char *ext_text, size_t ext_len, +- const char *text, size_t len) {} +-static bool suppress_message_printing(int level) { return false; } ++ ++#else /* CONFIG_PRINTK */ ++ ++#define prb_first_valid_seq(rb) 0 ++#define prb_next_seq(rb) 0 ++ ++#define console_try_thread(con) + + #endif /* CONFIG_PRINTK */ + +@@ -2644,36 +2548,6 @@ int is_console_locked(void) + } + EXPORT_SYMBOL(is_console_locked); + +-/* +- * Check if we have any console that is capable of printing while cpu is +- * booting or shutting down. Requires console_sem. +- */ +-static int have_callable_console(void) +-{ +- struct console *con; +- +- for_each_console(con) +- if ((con->flags & CON_ENABLED) && +- (con->flags & CON_ANYTIME)) +- return 1; +- +- return 0; +-} +- +-/* +- * Can we actually use the console at this time on this cpu? +- * +- * Console drivers may assume that per-cpu resources have been allocated. So +- * unless they're explicitly marked as being able to cope (CON_ANYTIME) don't +- * call them until this CPU is officially up. +- */ +-static inline int can_use_console(void) +-{ +- if (kernel_sync_mode()) +- return false; +- return cpu_online(raw_smp_processor_id()) || have_callable_console(); +-} +- + /** + * console_unlock - unlock the console system + * +@@ -2690,131 +2564,14 @@ static inline int can_use_console(void) + */ + void console_unlock(void) + { +- static char ext_text[CONSOLE_EXT_LOG_MAX]; +- static char text[CONSOLE_LOG_MAX]; +- bool do_cond_resched, retry; +- struct printk_info info; +- struct printk_record r; +- u64 seq; +- + if (console_suspended) { + up_console_sem(); + return; + } + +- prb_rec_init_rd(&r, &info, text, sizeof(text)); +- +- /* +- * Console drivers are called with interrupts disabled, so +- * @console_may_schedule should be cleared before; however, we may +- * end up dumping a lot of lines, for example, if called from +- * console registration path, and should invoke cond_resched() +- * between lines if allowable. Not doing so can cause a very long +- * scheduling stall on a slow console leading to RCU stall and +- * softlockup warnings which exacerbate the issue with more +- * messages practically incapacitating the system. +- * +- * console_trylock() is not able to detect the preemptive +- * context reliably. Therefore the value must be stored before +- * and cleared after the "again" goto label. +- */ +- do_cond_resched = console_may_schedule; +-again: +- console_may_schedule = 0; +- +- /* +- * We released the console_sem lock, so we need to recheck if +- * cpu is online and (if not) is there at least one CON_ANYTIME +- * console. +- */ +- if (!can_use_console()) { +- console_locked = 0; +- up_console_sem(); +- return; +- } +- +- for (;;) { +- size_t ext_len = 0; +- size_t len; +- +-skip: +- seq = atomic64_read(&console_seq); +- if (!prb_read_valid(prb, seq, &r)) +- break; +- +- if (seq != r.info->seq) { +- console_dropped += r.info->seq - seq; +- atomic64_set(&console_seq, r.info->seq); +- seq = r.info->seq; +- } +- +- if (suppress_message_printing(r.info->level)) { +- /* +- * Skip record we have buffered and already printed +- * directly to the console when we received it, and +- * record that has level above the console loglevel. +- */ +- atomic64_set(&console_seq, seq + 1); +- goto skip; +- } +- +- /* Output to all consoles once old messages replayed. */ +- if (unlikely(exclusive_console && +- seq >= exclusive_console_stop_seq)) { +- exclusive_console = NULL; +- } +- +- /* +- * Handle extended console text first because later +- * record_print_text() will modify the record buffer in-place. +- */ +- if (nr_ext_console_drivers) { +- ext_len = info_print_ext_header(ext_text, +- sizeof(ext_text), +- r.info); +- ext_len += msg_print_ext_body(ext_text + ext_len, +- sizeof(ext_text) - ext_len, +- &r.text_buf[0], +- r.info->text_len, +- &r.info->dev_info); +- } +- len = record_print_text(&r, +- console_msg_format & MSG_FORMAT_SYSLOG, +- printk_time); +- atomic64_set(&console_seq, seq + 1); +- +- /* +- * While actively printing out messages, if another printk() +- * were to occur on another CPU, it may wait for this one to +- * finish. This task can not be preempted if there is a +- * waiter waiting to take over. +- */ +- console_lock_spinning_enable(); +- +- stop_critical_timings(); /* don't trace print latency */ +- call_console_drivers(ext_text, ext_len, text, len); +- start_critical_timings(); +- +- if (console_lock_spinning_disable_and_check()) +- return; +- +- if (do_cond_resched) +- cond_resched(); +- } +- + console_locked = 0; + + up_console_sem(); +- +- /* +- * Someone could have filled up the buffer again, so re-check if there's +- * something to flush. In case we cannot trylock the console_sem again, +- * there's a new owner and the console_unlock() from them will do the +- * flush, no worries. +- */ +- retry = prb_read_valid(prb, atomic64_read(&console_seq), NULL); +- if (retry && console_trylock()) +- goto again; + } + EXPORT_SYMBOL(console_unlock); + +@@ -2864,18 +2621,20 @@ void console_unblank(void) + */ + void console_flush_on_panic(enum con_flush_mode mode) + { +- /* +- * If someone else is holding the console lock, trylock will fail +- * and may_schedule may be set. Ignore and proceed to unlock so +- * that messages are flushed out. As this can be called from any +- * context and we don't want to get preempted while flushing, +- * ensure may_schedule is cleared. +- */ +- console_trylock(); ++ struct console *c; ++ u64 seq; ++ ++ if (!console_trylock()) ++ return; ++ + console_may_schedule = 0; + +- if (mode == CONSOLE_REPLAY_ALL) +- atomic64_set(&console_seq, prb_first_valid_seq(prb)); ++ if (mode == CONSOLE_REPLAY_ALL) { ++ seq = prb_first_valid_seq(prb); ++ for_each_console(c) ++ atomic64_set(&c->printk_seq, seq); ++ } ++ + console_unlock(); + } + +@@ -3010,7 +2769,6 @@ static int try_enable_new_console(struct + */ + void register_console(struct console *newcon) + { +- unsigned long flags; + struct console *bcon = NULL; + int err; + +@@ -3034,6 +2792,8 @@ void register_console(struct console *ne + } + } + ++ newcon->thread = NULL; ++ + if (console_drivers && console_drivers->flags & CON_BOOT) + bcon = console_drivers; + +@@ -3098,27 +2858,12 @@ void register_console(struct console *ne + if (newcon->flags & CON_EXTENDED) + nr_ext_console_drivers++; + +- if (newcon->flags & CON_PRINTBUFFER) { +- /* +- * console_unlock(); will print out the buffered messages +- * for us. +- * +- * We're about to replay the log buffer. Only do this to the +- * just-registered console to avoid excessive message spam to +- * the already-registered consoles. +- * +- * Set exclusive_console with disabled interrupts to reduce +- * race window with eventual console_flush_on_panic() that +- * ignores console_lock. +- */ +- exclusive_console = newcon; +- exclusive_console_stop_seq = atomic64_read(&console_seq); ++ if (newcon->flags & CON_PRINTBUFFER) ++ atomic64_set(&newcon->printk_seq, 0); ++ else ++ atomic64_set(&newcon->printk_seq, prb_next_seq(prb)); + +- /* Get a consistent copy of @syslog_seq. */ +- spin_lock_irqsave(&syslog_lock, flags); +- atomic64_set(&console_seq, syslog_seq); +- spin_unlock_irqrestore(&syslog_lock, flags); +- } ++ console_try_thread(newcon); + console_unlock(); + console_sysfs_notify(); + +@@ -3192,6 +2937,9 @@ int unregister_console(struct console *c + console_unlock(); + console_sysfs_notify(); + ++ if (console->thread && !IS_ERR(console->thread)) ++ kthread_stop(console->thread); ++ + if (console->exit) + res = console->exit(console); + +@@ -3274,6 +3022,15 @@ static int __init printk_late_init(void) + unregister_console(con); + } + } ++ ++#ifdef CONFIG_PRINTK ++ console_lock(); ++ for_each_console(con) ++ start_printk_kthread(con); ++ kthreads_started = true; ++ console_unlock(); ++#endif ++ + ret = cpuhp_setup_state_nocalls(CPUHP_PRINTK_DEAD, "printk:dead", NULL, + console_cpu_notify); + WARN_ON(ret < 0); +@@ -3289,7 +3046,6 @@ late_initcall(printk_late_init); + * Delayed printk version, for scheduler-internal messages: + */ + #define PRINTK_PENDING_WAKEUP 0x01 +-#define PRINTK_PENDING_OUTPUT 0x02 + + static DEFINE_PER_CPU(int, printk_pending); + +@@ -3297,14 +3053,8 @@ static void wake_up_klogd_work_func(stru + { + int pending = __this_cpu_xchg(printk_pending, 0); + +- if (pending & PRINTK_PENDING_OUTPUT) { +- /* If trylock fails, someone else is doing the printing */ +- if (console_trylock()) +- console_unlock(); +- } +- + if (pending & PRINTK_PENDING_WAKEUP) +- wake_up_interruptible(&log_wait); ++ wake_up_interruptible_all(&log_wait); + } + + static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) = +@@ -3325,13 +3075,6 @@ void wake_up_klogd(void) + + void defer_console_output(void) + { +- if (!printk_percpu_data_ready()) +- return; +- +- preempt_disable(); +- __this_cpu_or(printk_pending, PRINTK_PENDING_OUTPUT); +- irq_work_queue(this_cpu_ptr(&wake_up_klogd_work)); +- preempt_enable(); + } + + int vprintk_deferred(const char *fmt, va_list args) diff --git a/kernel/patches-5.11.x-rt/0045-0026-printk-remove-deferred-printing.patch b/kernel/patches-5.11.x-rt/0045-0026-printk-remove-deferred-printing.patch new file mode 100644 index 000000000..d9f0b8601 --- /dev/null +++ b/kernel/patches-5.11.x-rt/0045-0026-printk-remove-deferred-printing.patch @@ -0,0 +1,407 @@ +From: John Ogness +Date: Mon, 30 Nov 2020 01:42:08 +0106 +Subject: [PATCH 26/28] printk: remove deferred printing + +Since printing occurs either atomically or from the printing +kthread, there is no need for any deferring or tracking possible +recursion paths. Remove all printk context tracking. + +Signed-off-by: John Ogness +Signed-off-by: Sebastian Andrzej Siewior +--- + arch/arm/kernel/smp.c | 2 - + arch/powerpc/kexec/crash.c | 3 - + include/linux/hardirq.h | 2 - + include/linux/printk.h | 12 ------ + kernel/printk/Makefile | 1 + kernel/printk/internal.h | 70 ----------------------------------- + kernel/printk/printk.c | 58 +++++++++++------------------ + kernel/printk/printk_safe.c | 88 -------------------------------------------- + kernel/trace/trace.c | 2 - + 9 files changed, 22 insertions(+), 216 deletions(-) + delete mode 100644 kernel/printk/internal.h + delete mode 100644 kernel/printk/printk_safe.c + +--- a/arch/arm/kernel/smp.c ++++ b/arch/arm/kernel/smp.c +@@ -671,9 +671,7 @@ static void do_handle_IPI(int ipinr) + break; + + case IPI_CPU_BACKTRACE: +- printk_nmi_enter(); + nmi_cpu_backtrace(get_irq_regs()); +- printk_nmi_exit(); + break; + + default: +--- a/arch/powerpc/kexec/crash.c ++++ b/arch/powerpc/kexec/crash.c +@@ -311,9 +311,6 @@ void default_machine_crash_shutdown(stru + unsigned int i; + int (*old_handler)(struct pt_regs *regs); + +- /* Avoid hardlocking with irresponsive CPU holding logbuf_lock */ +- printk_nmi_enter(); +- + /* + * This function is only called after the system + * has panicked or is otherwise in a critical state. +--- a/include/linux/hardirq.h ++++ b/include/linux/hardirq.h +@@ -115,7 +115,6 @@ extern void rcu_nmi_exit(void); + do { \ + lockdep_off(); \ + arch_nmi_enter(); \ +- printk_nmi_enter(); \ + BUG_ON(in_nmi() == NMI_MASK); \ + __preempt_count_add(NMI_OFFSET + HARDIRQ_OFFSET); \ + } while (0) +@@ -134,7 +133,6 @@ extern void rcu_nmi_exit(void); + do { \ + BUG_ON(!in_nmi()); \ + __preempt_count_sub(NMI_OFFSET + HARDIRQ_OFFSET); \ +- printk_nmi_exit(); \ + arch_nmi_exit(); \ + lockdep_on(); \ + } while (0) +--- a/include/linux/printk.h ++++ b/include/linux/printk.h +@@ -155,18 +155,6 @@ static inline __printf(1, 2) __cold + void early_printk(const char *s, ...) { } + #endif + +-#ifdef CONFIG_PRINTK_NMI +-extern void printk_nmi_enter(void); +-extern void printk_nmi_exit(void); +-extern void printk_nmi_direct_enter(void); +-extern void printk_nmi_direct_exit(void); +-#else +-static inline void printk_nmi_enter(void) { } +-static inline void printk_nmi_exit(void) { } +-static inline void printk_nmi_direct_enter(void) { } +-static inline void printk_nmi_direct_exit(void) { } +-#endif /* PRINTK_NMI */ +- + struct dev_printk_info; + + #ifdef CONFIG_PRINTK +--- a/kernel/printk/Makefile ++++ b/kernel/printk/Makefile +@@ -1,5 +1,4 @@ + # SPDX-License-Identifier: GPL-2.0-only + obj-y = printk.o +-obj-$(CONFIG_PRINTK) += printk_safe.o + obj-$(CONFIG_A11Y_BRAILLE_CONSOLE) += braille.o + obj-$(CONFIG_PRINTK) += printk_ringbuffer.o +--- a/kernel/printk/internal.h ++++ /dev/null +@@ -1,70 +0,0 @@ +-/* SPDX-License-Identifier: GPL-2.0-or-later */ +-/* +- * internal.h - printk internal definitions +- */ +-#include +- +-#ifdef CONFIG_PRINTK +- +-#define PRINTK_SAFE_CONTEXT_MASK 0x007ffffff +-#define PRINTK_NMI_DIRECT_CONTEXT_MASK 0x008000000 +-#define PRINTK_NMI_CONTEXT_MASK 0xff0000000 +- +-#define PRINTK_NMI_CONTEXT_OFFSET 0x010000000 +- +-__printf(4, 0) +-int vprintk_store(int facility, int level, +- const struct dev_printk_info *dev_info, +- const char *fmt, va_list args); +- +-__printf(1, 0) int vprintk_default(const char *fmt, va_list args); +-__printf(1, 0) int vprintk_deferred(const char *fmt, va_list args); +-__printf(1, 0) int vprintk_func(const char *fmt, va_list args); +-void __printk_safe_enter(void); +-void __printk_safe_exit(void); +- +-bool printk_percpu_data_ready(void); +- +-#define printk_safe_enter_irqsave(flags) \ +- do { \ +- local_irq_save(flags); \ +- __printk_safe_enter(); \ +- } while (0) +- +-#define printk_safe_exit_irqrestore(flags) \ +- do { \ +- __printk_safe_exit(); \ +- local_irq_restore(flags); \ +- } while (0) +- +-#define printk_safe_enter_irq() \ +- do { \ +- local_irq_disable(); \ +- __printk_safe_enter(); \ +- } while (0) +- +-#define printk_safe_exit_irq() \ +- do { \ +- __printk_safe_exit(); \ +- local_irq_enable(); \ +- } while (0) +- +-void defer_console_output(void); +- +-#else +- +-__printf(1, 0) int vprintk_func(const char *fmt, va_list args) { return 0; } +- +-/* +- * In !PRINTK builds we still export console_sem +- * semaphore and some of console functions (console_unlock()/etc.), so +- * printk-safe must preserve the existing local IRQ guarantees. +- */ +-#define printk_safe_enter_irqsave(flags) local_irq_save(flags) +-#define printk_safe_exit_irqrestore(flags) local_irq_restore(flags) +- +-#define printk_safe_enter_irq() local_irq_disable() +-#define printk_safe_exit_irq() local_irq_enable() +- +-static inline bool printk_percpu_data_ready(void) { return false; } +-#endif /* CONFIG_PRINTK */ +--- a/kernel/printk/printk.c ++++ b/kernel/printk/printk.c +@@ -45,6 +45,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -60,7 +61,6 @@ + #include "printk_ringbuffer.h" + #include "console_cmdline.h" + #include "braille.h" +-#include "internal.h" + + int console_printk[4] = { + CONSOLE_LOGLEVEL_DEFAULT, /* console_loglevel */ +@@ -227,19 +227,7 @@ static int nr_ext_console_drivers; + + static int __down_trylock_console_sem(unsigned long ip) + { +- int lock_failed; +- unsigned long flags; +- +- /* +- * Here and in __up_console_sem() we need to be in safe mode, +- * because spindump/WARN/etc from under console ->lock will +- * deadlock in printk()->down_trylock_console_sem() otherwise. +- */ +- printk_safe_enter_irqsave(flags); +- lock_failed = down_trylock(&console_sem); +- printk_safe_exit_irqrestore(flags); +- +- if (lock_failed) ++ if (down_trylock(&console_sem)) + return 1; + mutex_acquire(&console_lock_dep_map, 0, 1, ip); + return 0; +@@ -248,13 +236,9 @@ static int __down_trylock_console_sem(un + + static void __up_console_sem(unsigned long ip) + { +- unsigned long flags; +- + mutex_release(&console_lock_dep_map, ip); + +- printk_safe_enter_irqsave(flags); + up(&console_sem); +- printk_safe_exit_irqrestore(flags); + } + #define up_console_sem() __up_console_sem(_RET_IP_) + +@@ -426,7 +410,7 @@ static struct printk_ringbuffer *prb = & + */ + static bool __printk_percpu_data_ready __read_mostly; + +-bool printk_percpu_data_ready(void) ++static bool printk_percpu_data_ready(void) + { + return __printk_percpu_data_ready; + } +@@ -1060,7 +1044,6 @@ void __init setup_log_buf(int early) + struct printk_record r; + size_t new_descs_size; + size_t new_infos_size; +- unsigned long flags; + char *new_log_buf; + unsigned int free; + u64 seq; +@@ -1958,9 +1941,9 @@ static u16 printk_sprint(char *text, u16 + } + + __printf(4, 0) +-int vprintk_store(int facility, int level, +- const struct dev_printk_info *dev_info, +- const char *fmt, va_list args) ++static int vprintk_store(int facility, int level, ++ const struct dev_printk_info *dev_info, ++ const char *fmt, va_list args) + { + const u32 caller_id = printk_caller_id(); + struct prb_reserved_entry e; +@@ -2106,11 +2089,22 @@ asmlinkage int vprintk_emit(int facility + } + EXPORT_SYMBOL(vprintk_emit); + +-int vprintk_default(const char *fmt, va_list args) ++__printf(1, 0) ++static int vprintk_default(const char *fmt, va_list args) + { + return vprintk_emit(0, LOGLEVEL_DEFAULT, NULL, fmt, args); + } +-EXPORT_SYMBOL_GPL(vprintk_default); ++ ++__printf(1, 0) ++static int vprintk_func(const char *fmt, va_list args) ++{ ++#ifdef CONFIG_KGDB_KDB ++ /* Allow to pass printk() to kdb but avoid a recursion. */ ++ if (unlikely(kdb_trap_printk && kdb_printf_cpu < 0)) ++ return vkdb_printf(KDB_MSGSRC_PRINTK, fmt, args); ++#endif ++ return vprintk_default(fmt, args); ++} + + asmlinkage int vprintk(const char *fmt, va_list args) + { +@@ -3073,18 +3067,10 @@ void wake_up_klogd(void) + preempt_enable(); + } + +-void defer_console_output(void) ++__printf(1, 0) ++static int vprintk_deferred(const char *fmt, va_list args) + { +-} +- +-int vprintk_deferred(const char *fmt, va_list args) +-{ +- int r; +- +- r = vprintk_emit(0, LOGLEVEL_SCHED, NULL, fmt, args); +- defer_console_output(); +- +- return r; ++ return vprintk_emit(0, LOGLEVEL_DEFAULT, NULL, fmt, args); + } + + int printk_deferred(const char *fmt, ...) +--- a/kernel/printk/printk_safe.c ++++ /dev/null +@@ -1,88 +0,0 @@ +-// SPDX-License-Identifier: GPL-2.0-or-later +-/* +- * printk_safe.c - Safe printk for printk-deadlock-prone contexts +- */ +- +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +- +-#include "internal.h" +- +-static DEFINE_PER_CPU(int, printk_context); +- +-#ifdef CONFIG_PRINTK_NMI +-void noinstr printk_nmi_enter(void) +-{ +- this_cpu_add(printk_context, PRINTK_NMI_CONTEXT_OFFSET); +-} +- +-void noinstr printk_nmi_exit(void) +-{ +- this_cpu_sub(printk_context, PRINTK_NMI_CONTEXT_OFFSET); +-} +- +-/* +- * Marks a code that might produce many messages in NMI context +- * and the risk of losing them is more critical than eventual +- * reordering. +- */ +-void printk_nmi_direct_enter(void) +-{ +- if (this_cpu_read(printk_context) & PRINTK_NMI_CONTEXT_MASK) +- this_cpu_or(printk_context, PRINTK_NMI_DIRECT_CONTEXT_MASK); +-} +- +-void printk_nmi_direct_exit(void) +-{ +- this_cpu_and(printk_context, ~PRINTK_NMI_DIRECT_CONTEXT_MASK); +-} +- +-#endif /* CONFIG_PRINTK_NMI */ +- +-/* Can be preempted by NMI. */ +-void __printk_safe_enter(void) +-{ +- this_cpu_inc(printk_context); +-} +- +-/* Can be preempted by NMI. */ +-void __printk_safe_exit(void) +-{ +- this_cpu_dec(printk_context); +-} +- +-__printf(1, 0) int vprintk_func(const char *fmt, va_list args) +-{ +-#ifdef CONFIG_KGDB_KDB +- /* Allow to pass printk() to kdb but avoid a recursion. */ +- if (unlikely(kdb_trap_printk && kdb_printf_cpu < 0)) +- return vkdb_printf(KDB_MSGSRC_PRINTK, fmt, args); +-#endif +- +- /* +- * Use the main logbuf even in NMI. But avoid calling console +- * drivers that might have their own locks. +- */ +- if (this_cpu_read(printk_context) & +- (PRINTK_NMI_DIRECT_CONTEXT_MASK | +- PRINTK_NMI_CONTEXT_MASK | +- PRINTK_SAFE_CONTEXT_MASK)) { +- int len; +- +- printk_safe_enter_irqsave(flags); +- len = vprintk_store(0, LOGLEVEL_DEFAULT, NULL, fmt, args); +- printk_safe_exit_irqrestore(flags); +- defer_console_output(); +- return len; +- } +- +- /* No obstacles. */ +- return vprintk_default(fmt, args); +-} +--- a/kernel/trace/trace.c ++++ b/kernel/trace/trace.c +@@ -9325,7 +9325,6 @@ void ftrace_dump(enum ftrace_dump_mode o + tracing_off(); + + local_irq_save(flags); +- printk_nmi_direct_enter(); + + /* Simulate the iterator */ + trace_init_global_iter(&iter); +@@ -9405,7 +9404,6 @@ void ftrace_dump(enum ftrace_dump_mode o + atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled); + } + atomic_dec(&dump_running); +- printk_nmi_direct_exit(); + local_irq_restore(flags); + } + EXPORT_SYMBOL_GPL(ftrace_dump); diff --git a/kernel/patches-5.11.x-rt/0046-0027-printk-add-console-handover.patch b/kernel/patches-5.11.x-rt/0046-0027-printk-add-console-handover.patch new file mode 100644 index 000000000..b38cefd7e --- /dev/null +++ b/kernel/patches-5.11.x-rt/0046-0027-printk-add-console-handover.patch @@ -0,0 +1,67 @@ +From: John Ogness +Date: Mon, 30 Nov 2020 01:42:09 +0106 +Subject: [PATCH 27/28] printk: add console handover + +If earlyprintk is used, a boot console will print directly to the +console immediately. The boot console will unregister itself as soon +as a non-boot console registers. However, the non-boot console does +not begin printing until its kthread has started. Since this happens +much later, there is a long pause in the console output. If the +ringbuffer is small, messages could even be dropped during the +pause. + +Add a new CON_HANDOVER console flag to be used internally by printk +in order to track which non-boot console took over from a boot +console. If handover consoles have implemented write_atomic(), they +are allowed to print directly to the console until their kthread can +take over. + +Signed-off-by: John Ogness +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/console.h | 1 + + kernel/printk/printk.c | 8 +++++++- + 2 files changed, 8 insertions(+), 1 deletion(-) + +--- a/include/linux/console.h ++++ b/include/linux/console.h +@@ -137,6 +137,7 @@ static inline int con_debug_leave(void) + #define CON_ANYTIME (16) /* Safe to call when cpu is offline */ + #define CON_BRL (32) /* Used for a braille device */ + #define CON_EXTENDED (64) /* Use the extended output format a la /dev/kmsg */ ++#define CON_HANDOVER (128) /* Device was previously a boot console. */ + + struct console { + char name[16]; +--- a/kernel/printk/printk.c ++++ b/kernel/printk/printk.c +@@ -1726,6 +1726,8 @@ static bool console_can_sync(struct cons + return false; + if (con->write_atomic && kernel_sync_mode()) + return true; ++ if (con->write_atomic && (con->flags & CON_HANDOVER) && !con->thread) ++ return true; + if (con->write && (con->flags & CON_BOOT) && !con->thread) + return true; + return false; +@@ -1737,6 +1739,8 @@ static bool call_sync_console_driver(str + return false; + if (con->write_atomic && kernel_sync_mode()) + con->write_atomic(con, text, text_len); ++ else if (con->write_atomic && (con->flags & CON_HANDOVER) && !con->thread) ++ con->write_atomic(con, text, text_len); + else if (con->write && (con->flags & CON_BOOT) && !con->thread) + con->write(con, text, text_len); + else +@@ -2829,8 +2833,10 @@ void register_console(struct console *ne + * the real console are the same physical device, it's annoying to + * see the beginning boot messages twice + */ +- if (bcon && ((newcon->flags & (CON_CONSDEV | CON_BOOT)) == CON_CONSDEV)) ++ if (bcon && ((newcon->flags & (CON_CONSDEV | CON_BOOT)) == CON_CONSDEV)) { + newcon->flags &= ~CON_PRINTBUFFER; ++ newcon->flags |= CON_HANDOVER; ++ } + + /* + * Put this console in the list - keep the diff --git a/kernel/patches-5.11.x-rt/0047-0028-printk-add-pr_flush.patch b/kernel/patches-5.11.x-rt/0047-0028-printk-add-pr_flush.patch new file mode 100644 index 000000000..3b9b73aa4 --- /dev/null +++ b/kernel/patches-5.11.x-rt/0047-0028-printk-add-pr_flush.patch @@ -0,0 +1,198 @@ +From: John Ogness +Date: Mon, 30 Nov 2020 01:42:10 +0106 +Subject: [PATCH 28/28] printk: add pr_flush() + +Provide a function to allow waiting for console printers to catch +up to the latest logged message. + +Use pr_flush() to give console printers a chance to finish in +critical situations if no atomic console is available. For now +pr_flush() is only used in the most common error paths: +panic(), print_oops_end_marker(), report_bug(), kmsg_dump(). + +Signed-off-by: John Ogness +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/printk.h | 2 + + kernel/panic.c | 28 ++++++++++------- + kernel/printk/printk.c | 79 +++++++++++++++++++++++++++++++++++++++++++++++++ + lib/bug.c | 1 + 4 files changed, 99 insertions(+), 11 deletions(-) + +--- a/include/linux/printk.h ++++ b/include/linux/printk.h +@@ -481,6 +481,8 @@ extern int kptr_restrict; + no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) + #endif + ++bool pr_flush(int timeout_ms, bool reset_on_progress); ++ + /* + * ratelimited messages with local ratelimit_state, + * no local ratelimit_state used in the !PRINTK case +--- a/kernel/panic.c ++++ b/kernel/panic.c +@@ -177,12 +177,28 @@ static void panic_print_sys_info(void) + void panic(const char *fmt, ...) + { + static char buf[1024]; ++ va_list args2; + va_list args; + long i, i_next = 0, len; + int state = 0; + int old_cpu, this_cpu; + bool _crash_kexec_post_notifiers = crash_kexec_post_notifiers; + ++ console_verbose(); ++ pr_emerg("Kernel panic - not syncing:\n"); ++ va_start(args2, fmt); ++ va_copy(args, args2); ++ vprintk(fmt, args2); ++ va_end(args2); ++#ifdef CONFIG_DEBUG_BUGVERBOSE ++ /* ++ * Avoid nested stack-dumping if a panic occurs during oops processing ++ */ ++ if (!test_taint(TAINT_DIE) && oops_in_progress <= 1) ++ dump_stack(); ++#endif ++ pr_flush(1000, true); ++ + /* + * Disable local interrupts. This will prevent panic_smp_self_stop + * from deadlocking the first cpu that invokes the panic, since +@@ -213,24 +229,13 @@ void panic(const char *fmt, ...) + if (old_cpu != PANIC_CPU_INVALID && old_cpu != this_cpu) + panic_smp_self_stop(); + +- console_verbose(); + bust_spinlocks(1); +- va_start(args, fmt); + len = vscnprintf(buf, sizeof(buf), fmt, args); + va_end(args); + + if (len && buf[len - 1] == '\n') + buf[len - 1] = '\0'; + +- pr_emerg("Kernel panic - not syncing: %s\n", buf); +-#ifdef CONFIG_DEBUG_BUGVERBOSE +- /* +- * Avoid nested stack-dumping if a panic occurs during oops processing +- */ +- if (!test_taint(TAINT_DIE) && oops_in_progress <= 1) +- dump_stack(); +-#endif +- + /* + * If kgdb is enabled, give it a chance to run before we stop all + * the other CPUs or else we won't be able to debug processes left +@@ -552,6 +557,7 @@ static void print_oops_end_marker(void) + { + init_oops_id(); + pr_warn("---[ end trace %016llx ]---\n", (unsigned long long)oops_id); ++ pr_flush(1000, true); + } + + /* +--- a/kernel/printk/printk.c ++++ b/kernel/printk/printk.c +@@ -3228,6 +3228,12 @@ void kmsg_dump(enum kmsg_dump_reason rea + sync_mode = true; + pr_info("enabled sync mode\n"); + } ++ ++ /* ++ * Give the printing threads time to flush, allowing up to ++ * 1s of no printing forward progress before giving up. ++ */ ++ pr_flush(1000, true); + } + + rcu_read_lock(); +@@ -3507,3 +3513,76 @@ void console_atomic_unlock(unsigned int + prb_unlock(&printk_cpulock, flags); + } + EXPORT_SYMBOL(console_atomic_unlock); ++ ++static void pr_msleep(bool may_sleep, int ms) ++{ ++ if (may_sleep) { ++ msleep(ms); ++ } else { ++ while (ms--) ++ udelay(1000); ++ } ++} ++ ++/** ++ * pr_flush() - Wait for printing threads to catch up. ++ * ++ * @timeout_ms: The maximum time (in ms) to wait. ++ * @reset_on_progress: Reset the timeout if forward progress is seen. ++ * ++ * A value of 0 for @timeout_ms means no waiting will occur. A value of -1 ++ * represents infinite waiting. ++ * ++ * If @reset_on_progress is true, the timeout will be reset whenever any ++ * printer has been seen to make some forward progress. ++ * ++ * Context: Any context. ++ * Return: true if all enabled printers are caught up. ++ */ ++bool pr_flush(int timeout_ms, bool reset_on_progress) ++{ ++ int remaining = timeout_ms; ++ struct console *con; ++ u64 last_diff = 0; ++ bool may_sleep; ++ u64 printk_seq; ++ u64 diff; ++ u64 seq; ++ ++ may_sleep = (preemptible() && !in_softirq()); ++ ++ seq = prb_next_seq(prb); ++ ++ for (;;) { ++ diff = 0; ++ ++ for_each_console(con) { ++ if (!(con->flags & CON_ENABLED)) ++ continue; ++ printk_seq = atomic64_read(&con->printk_seq); ++ if (printk_seq < seq) ++ diff += seq - printk_seq; ++ } ++ ++ if (diff != last_diff && reset_on_progress) ++ remaining = timeout_ms; ++ ++ if (!diff || remaining == 0) ++ break; ++ ++ if (remaining < 0) { ++ pr_msleep(may_sleep, 100); ++ } else if (remaining < 100) { ++ pr_msleep(may_sleep, remaining); ++ remaining = 0; ++ } else { ++ pr_msleep(may_sleep, 100); ++ remaining -= 100; ++ } ++ ++ last_diff = diff; ++ } ++ ++ return (diff == 0); ++} ++EXPORT_SYMBOL(pr_flush); +--- a/lib/bug.c ++++ b/lib/bug.c +@@ -205,6 +205,7 @@ enum bug_trap_type report_bug(unsigned l + else + pr_crit("Kernel BUG at %pB [verbose debug info unavailable]\n", + (void *)bugaddr); ++ pr_flush(1000, true); + + return BUG_TRAP_TYPE_BUG; + } diff --git a/kernel/patches-5.11.x-rt/0048-kcov-Remove-kcov-include-from-sched.h-and-move-it-to.patch b/kernel/patches-5.11.x-rt/0048-kcov-Remove-kcov-include-from-sched.h-and-move-it-to.patch new file mode 100644 index 000000000..bc3205d6e --- /dev/null +++ b/kernel/patches-5.11.x-rt/0048-kcov-Remove-kcov-include-from-sched.h-and-move-it-to.patch @@ -0,0 +1,76 @@ +From: Sebastian Andrzej Siewior +Date: Thu, 18 Feb 2021 18:31:26 +0100 +Subject: [PATCH] kcov: Remove kcov include from sched.h and move it to its + users. + +The recent addition of in_serving_softirq() to kconv.h results in +compile failure on PREEMPT_RT because it requires +task_struct::softirq_disable_cnt. This is not available if kconv.h is +included from sched.h. + +It is not needed to include kconv.h from sched.h. All but the net/ user +already include the kconv header file. + +Move the include of the kconv.h header from sched.h it its users. +Additionally include sched.h from kconv.h to ensure that everything +task_struct related is available. + +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/kcov.h | 1 + + include/linux/sched.h | 1 - + net/core/skbuff.c | 1 + + net/mac80211/iface.c | 1 + + net/mac80211/rx.c | 1 + + 5 files changed, 4 insertions(+), 1 deletion(-) + +--- a/include/linux/kcov.h ++++ b/include/linux/kcov.h +@@ -2,6 +2,7 @@ + #ifndef _LINUX_KCOV_H + #define _LINUX_KCOV_H + ++#include + #include + + struct task_struct; +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -14,7 +14,6 @@ + #include + #include + #include +-#include + #include + #include + #include +--- a/net/core/skbuff.c ++++ b/net/core/skbuff.c +@@ -60,6 +60,7 @@ + #include + #include + #include ++#include + + #include + #include +--- a/net/mac80211/iface.c ++++ b/net/mac80211/iface.c +@@ -15,6 +15,7 @@ + #include + #include + #include ++#include + #include + #include + #include "ieee80211_i.h" +--- a/net/mac80211/rx.c ++++ b/net/mac80211/rx.c +@@ -17,6 +17,7 @@ + #include + #include + #include ++#include + #include + #include + #include diff --git a/kernel/patches-5.11.x-rt/0049-cgroup-use-irqsave-in-cgroup_rstat_flush_locked.patch b/kernel/patches-5.11.x-rt/0049-cgroup-use-irqsave-in-cgroup_rstat_flush_locked.patch new file mode 100644 index 000000000..adec36003 --- /dev/null +++ b/kernel/patches-5.11.x-rt/0049-cgroup-use-irqsave-in-cgroup_rstat_flush_locked.patch @@ -0,0 +1,43 @@ +From: Sebastian Andrzej Siewior +Date: Tue, 3 Jul 2018 18:19:48 +0200 +Subject: [PATCH] cgroup: use irqsave in cgroup_rstat_flush_locked() + +All callers of cgroup_rstat_flush_locked() acquire cgroup_rstat_lock +either with spin_lock_irq() or spin_lock_irqsave(). +cgroup_rstat_flush_locked() itself acquires cgroup_rstat_cpu_lock which +is a raw_spin_lock. This lock is also acquired in cgroup_rstat_updated() +in IRQ context and therefore requires _irqsave() locking suffix in +cgroup_rstat_flush_locked(). +Since there is no difference between spin_lock_t and raw_spin_lock_t +on !RT lockdep does not complain here. On RT lockdep complains because +the interrupts were not disabled here and a deadlock is possible. + +Acquire the raw_spin_lock_t with disabled interrupts. + +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/cgroup/rstat.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/kernel/cgroup/rstat.c ++++ b/kernel/cgroup/rstat.c +@@ -149,8 +149,9 @@ static void cgroup_rstat_flush_locked(st + raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_rstat_cpu_lock, + cpu); + struct cgroup *pos = NULL; ++ unsigned long flags; + +- raw_spin_lock(cpu_lock); ++ raw_spin_lock_irqsave(cpu_lock, flags); + while ((pos = cgroup_rstat_cpu_pop_updated(pos, cgrp, cpu))) { + struct cgroup_subsys_state *css; + +@@ -162,7 +163,7 @@ static void cgroup_rstat_flush_locked(st + css->ss->css_rstat_flush(css, cpu); + rcu_read_unlock(); + } +- raw_spin_unlock(cpu_lock); ++ raw_spin_unlock_irqrestore(cpu_lock, flags); + + /* if @may_sleep, play nice and yield if necessary */ + if (may_sleep && (need_resched() || diff --git a/kernel/patches-5.4.x-rt/0082-mm-workingset-replace-IRQ-off-check-with-a-lockdep-a.patch b/kernel/patches-5.11.x-rt/0050-mm-workingset-replace-IRQ-off-check-with-a-lockdep-a.patch similarity index 92% rename from kernel/patches-5.4.x-rt/0082-mm-workingset-replace-IRQ-off-check-with-a-lockdep-a.patch rename to kernel/patches-5.11.x-rt/0050-mm-workingset-replace-IRQ-off-check-with-a-lockdep-a.patch index 88fc1020f..1882cd91c 100644 --- a/kernel/patches-5.4.x-rt/0082-mm-workingset-replace-IRQ-off-check-with-a-lockdep-a.patch +++ b/kernel/patches-5.11.x-rt/0050-mm-workingset-replace-IRQ-off-check-with-a-lockdep-a.patch @@ -20,7 +20,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/mm/workingset.c +++ b/mm/workingset.c -@@ -367,6 +367,8 @@ static struct list_lru shadow_nodes; +@@ -430,6 +430,8 @@ static struct list_lru shadow_nodes; void workingset_update_node(struct xa_node *node) { @@ -29,7 +29,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* * Track non-empty nodes that contain only shadow entries; * unlink those that contain pages or are being freed. -@@ -375,7 +377,8 @@ void workingset_update_node(struct xa_no +@@ -438,7 +440,8 @@ void workingset_update_node(struct xa_no * already where they should be. The list_empty() test is safe * as node->private_list is protected by the i_pages lock. */ diff --git a/kernel/patches-5.11.x-rt/0051-shmem-Use-raw_spinlock_t-for-stat_lock.patch b/kernel/patches-5.11.x-rt/0051-shmem-Use-raw_spinlock_t-for-stat_lock.patch new file mode 100644 index 000000000..ff83d2337 --- /dev/null +++ b/kernel/patches-5.11.x-rt/0051-shmem-Use-raw_spinlock_t-for-stat_lock.patch @@ -0,0 +1,138 @@ +From: Sebastian Andrzej Siewior +Date: Fri, 14 Aug 2020 18:53:34 +0200 +Subject: [PATCH] shmem: Use raw_spinlock_t for ->stat_lock + +Each CPU has SHMEM_INO_BATCH inodes available in `->ino_batch' which is +per-CPU. Access here is serialized by disabling preemption. If the pool is +empty, it gets reloaded from `->next_ino'. Access here is serialized by +->stat_lock which is a spinlock_t and can not be acquired with disabled +preemption. +One way around it would make per-CPU ino_batch struct containing the inode +number a local_lock_t. +Another sollution is to promote ->stat_lock to a raw_spinlock_t. The critical +sections are short. The mpol_put() should be moved outside of the critical +section to avoid invoking the destrutor with disabled preemption. + +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/shmem_fs.h | 2 +- + mm/shmem.c | 31 +++++++++++++++++-------------- + 2 files changed, 18 insertions(+), 15 deletions(-) + +--- a/include/linux/shmem_fs.h ++++ b/include/linux/shmem_fs.h +@@ -31,7 +31,7 @@ struct shmem_sb_info { + struct percpu_counter used_blocks; /* How many are allocated */ + unsigned long max_inodes; /* How many inodes are allowed */ + unsigned long free_inodes; /* How many are left for allocation */ +- spinlock_t stat_lock; /* Serialize shmem_sb_info changes */ ++ raw_spinlock_t stat_lock; /* Serialize shmem_sb_info changes */ + umode_t mode; /* Mount mode for root directory */ + unsigned char huge; /* Whether to try for hugepages */ + kuid_t uid; /* Mount uid for root directory */ +--- a/mm/shmem.c ++++ b/mm/shmem.c +@@ -278,10 +278,10 @@ static int shmem_reserve_inode(struct su + ino_t ino; + + if (!(sb->s_flags & SB_KERNMOUNT)) { +- spin_lock(&sbinfo->stat_lock); ++ raw_spin_lock(&sbinfo->stat_lock); + if (sbinfo->max_inodes) { + if (!sbinfo->free_inodes) { +- spin_unlock(&sbinfo->stat_lock); ++ raw_spin_unlock(&sbinfo->stat_lock); + return -ENOSPC; + } + sbinfo->free_inodes--; +@@ -304,7 +304,7 @@ static int shmem_reserve_inode(struct su + } + *inop = ino; + } +- spin_unlock(&sbinfo->stat_lock); ++ raw_spin_unlock(&sbinfo->stat_lock); + } else if (inop) { + /* + * __shmem_file_setup, one of our callers, is lock-free: it +@@ -319,13 +319,14 @@ static int shmem_reserve_inode(struct su + * to worry about things like glibc compatibility. + */ + ino_t *next_ino; ++ + next_ino = per_cpu_ptr(sbinfo->ino_batch, get_cpu()); + ino = *next_ino; + if (unlikely(ino % SHMEM_INO_BATCH == 0)) { +- spin_lock(&sbinfo->stat_lock); ++ raw_spin_lock(&sbinfo->stat_lock); + ino = sbinfo->next_ino; + sbinfo->next_ino += SHMEM_INO_BATCH; +- spin_unlock(&sbinfo->stat_lock); ++ raw_spin_unlock(&sbinfo->stat_lock); + if (unlikely(is_zero_ino(ino))) + ino++; + } +@@ -341,9 +342,9 @@ static void shmem_free_inode(struct supe + { + struct shmem_sb_info *sbinfo = SHMEM_SB(sb); + if (sbinfo->max_inodes) { +- spin_lock(&sbinfo->stat_lock); ++ raw_spin_lock(&sbinfo->stat_lock); + sbinfo->free_inodes++; +- spin_unlock(&sbinfo->stat_lock); ++ raw_spin_unlock(&sbinfo->stat_lock); + } + } + +@@ -1479,10 +1480,10 @@ static struct mempolicy *shmem_get_sbmpo + { + struct mempolicy *mpol = NULL; + if (sbinfo->mpol) { +- spin_lock(&sbinfo->stat_lock); /* prevent replace/use races */ ++ raw_spin_lock(&sbinfo->stat_lock); /* prevent replace/use races */ + mpol = sbinfo->mpol; + mpol_get(mpol); +- spin_unlock(&sbinfo->stat_lock); ++ raw_spin_unlock(&sbinfo->stat_lock); + } + return mpol; + } +@@ -3587,9 +3588,10 @@ static int shmem_reconfigure(struct fs_c + struct shmem_options *ctx = fc->fs_private; + struct shmem_sb_info *sbinfo = SHMEM_SB(fc->root->d_sb); + unsigned long inodes; ++ struct mempolicy *mpol = NULL; + const char *err; + +- spin_lock(&sbinfo->stat_lock); ++ raw_spin_lock(&sbinfo->stat_lock); + inodes = sbinfo->max_inodes - sbinfo->free_inodes; + if ((ctx->seen & SHMEM_SEEN_BLOCKS) && ctx->blocks) { + if (!sbinfo->max_blocks) { +@@ -3634,14 +3636,15 @@ static int shmem_reconfigure(struct fs_c + * Preserve previous mempolicy unless mpol remount option was specified. + */ + if (ctx->mpol) { +- mpol_put(sbinfo->mpol); ++ mpol = sbinfo->mpol; + sbinfo->mpol = ctx->mpol; /* transfers initial ref */ + ctx->mpol = NULL; + } +- spin_unlock(&sbinfo->stat_lock); ++ raw_spin_unlock(&sbinfo->stat_lock); ++ mpol_put(mpol); + return 0; + out: +- spin_unlock(&sbinfo->stat_lock); ++ raw_spin_unlock(&sbinfo->stat_lock); + return invalfc(fc, "%s", err); + } + +@@ -3758,7 +3761,7 @@ static int shmem_fill_super(struct super + sbinfo->mpol = ctx->mpol; + ctx->mpol = NULL; + +- spin_lock_init(&sbinfo->stat_lock); ++ raw_spin_lock_init(&sbinfo->stat_lock); + if (percpu_counter_init(&sbinfo->used_blocks, 0, GFP_KERNEL)) + goto failed; + spin_lock_init(&sbinfo->shrinklist_lock); diff --git a/kernel/patches-5.11.x-rt/0052-net--Move-lockdep-where-it-belongs.patch b/kernel/patches-5.11.x-rt/0052-net--Move-lockdep-where-it-belongs.patch new file mode 100644 index 000000000..6b8ea3838 --- /dev/null +++ b/kernel/patches-5.11.x-rt/0052-net--Move-lockdep-where-it-belongs.patch @@ -0,0 +1,40 @@ +Subject: net: Move lockdep where it belongs +From: Thomas Gleixner +Date: Tue, 08 Sep 2020 07:32:20 +0200 + +Signed-off-by: Thomas Gleixner +--- + net/core/sock.c | 6 ++---- + 1 file changed, 2 insertions(+), 4 deletions(-) + +--- a/net/core/sock.c ++++ b/net/core/sock.c +@@ -3050,12 +3050,11 @@ void lock_sock_nested(struct sock *sk, i + if (sk->sk_lock.owned) + __lock_sock(sk); + sk->sk_lock.owned = 1; +- spin_unlock(&sk->sk_lock.slock); ++ spin_unlock_bh(&sk->sk_lock.slock); + /* + * The sk_lock has mutex_lock() semantics here: + */ + mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_); +- local_bh_enable(); + } + EXPORT_SYMBOL(lock_sock_nested); + +@@ -3104,13 +3103,12 @@ bool lock_sock_fast(struct sock *sk) __a + + __lock_sock(sk); + sk->sk_lock.owned = 1; +- spin_unlock(&sk->sk_lock.slock); ++ spin_unlock_bh(&sk->sk_lock.slock); + /* + * The sk_lock has mutex_lock() semantics here: + */ + mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_); + __acquire(&sk->sk_lock.slock); +- local_bh_enable(); + return true; + } + EXPORT_SYMBOL(lock_sock_fast); diff --git a/kernel/patches-5.11.x-rt/0053-tcp-Remove-superfluous-BH-disable-around-listening_h.patch b/kernel/patches-5.11.x-rt/0053-tcp-Remove-superfluous-BH-disable-around-listening_h.patch new file mode 100644 index 000000000..120202dfd --- /dev/null +++ b/kernel/patches-5.11.x-rt/0053-tcp-Remove-superfluous-BH-disable-around-listening_h.patch @@ -0,0 +1,99 @@ +From: Sebastian Andrzej Siewior +Date: Mon, 12 Oct 2020 17:33:54 +0200 +Subject: [PATCH] tcp: Remove superfluous BH-disable around listening_hash + +Commit + 9652dc2eb9e40 ("tcp: relax listening_hash operations") + +removed the need to disable bottom half while acquiring +listening_hash.lock. There are still two callers left which disable +bottom half before the lock is acquired. + +Drop local_bh_disable() around __inet_hash() which acquires +listening_hash->lock, invoke inet_ehash_nolisten() with disabled BH. +inet_unhash() conditionally acquires listening_hash->lock. + +Reported-by: Mike Galbraith +Signed-off-by: Sebastian Andrzej Siewior +Link: https://lore.kernel.org/linux-rt-users/12d6f9879a97cd56c09fb53dee343cbb14f7f1f7.camel@gmx.de/ +Link: https://lkml.kernel.org/r/X9CheYjuXWc75Spa@hirez.programming.kicks-ass.net +--- + net/ipv4/inet_hashtables.c | 19 ++++++++++++------- + net/ipv6/inet6_hashtables.c | 5 +---- + 2 files changed, 13 insertions(+), 11 deletions(-) + +--- a/net/ipv4/inet_hashtables.c ++++ b/net/ipv4/inet_hashtables.c +@@ -635,7 +635,9 @@ int __inet_hash(struct sock *sk, struct + int err = 0; + + if (sk->sk_state != TCP_LISTEN) { ++ local_bh_disable(); + inet_ehash_nolisten(sk, osk, NULL); ++ local_bh_enable(); + return 0; + } + WARN_ON(!sk_unhashed(sk)); +@@ -667,11 +669,8 @@ int inet_hash(struct sock *sk) + { + int err = 0; + +- if (sk->sk_state != TCP_CLOSE) { +- local_bh_disable(); ++ if (sk->sk_state != TCP_CLOSE) + err = __inet_hash(sk, NULL); +- local_bh_enable(); +- } + + return err; + } +@@ -682,17 +681,20 @@ void inet_unhash(struct sock *sk) + struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; + struct inet_listen_hashbucket *ilb = NULL; + spinlock_t *lock; ++ bool state_listen; + + if (sk_unhashed(sk)) + return; + + if (sk->sk_state == TCP_LISTEN) { ++ state_listen = true; + ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; +- lock = &ilb->lock; ++ spin_lock(&ilb->lock); + } else { ++ state_listen = false; + lock = inet_ehash_lockp(hashinfo, sk->sk_hash); ++ spin_lock_bh(lock); + } +- spin_lock_bh(lock); + if (sk_unhashed(sk)) + goto unlock; + +@@ -705,7 +707,10 @@ void inet_unhash(struct sock *sk) + __sk_nulls_del_node_init_rcu(sk); + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); + unlock: +- spin_unlock_bh(lock); ++ if (state_listen) ++ spin_unlock(&ilb->lock); ++ else ++ spin_unlock_bh(lock); + } + EXPORT_SYMBOL_GPL(inet_unhash); + +--- a/net/ipv6/inet6_hashtables.c ++++ b/net/ipv6/inet6_hashtables.c +@@ -333,11 +333,8 @@ int inet6_hash(struct sock *sk) + { + int err = 0; + +- if (sk->sk_state != TCP_CLOSE) { +- local_bh_disable(); ++ if (sk->sk_state != TCP_CLOSE) + err = __inet_hash(sk, NULL); +- local_bh_enable(); +- } + + return err; + } diff --git a/kernel/patches-5.11.x-rt/0054-smp-Wake-ksoftirqd-on-PREEMPT_RT-instead-do_softirq.patch b/kernel/patches-5.11.x-rt/0054-smp-Wake-ksoftirqd-on-PREEMPT_RT-instead-do_softirq.patch new file mode 100644 index 000000000..93dae9aa7 --- /dev/null +++ b/kernel/patches-5.11.x-rt/0054-smp-Wake-ksoftirqd-on-PREEMPT_RT-instead-do_softirq.patch @@ -0,0 +1,40 @@ +From: Sebastian Andrzej Siewior +Date: Mon, 15 Feb 2021 18:44:12 +0100 +Subject: [PATCH] smp: Wake ksoftirqd on PREEMPT_RT instead do_softirq(). + +The softirq implementation on PREEMPT_RT does not provide do_softirq(). +The other user of do_softirq() is replaced with a local_bh_disable() ++ enable() around the possible raise-softirq invocation. This can not be +done here because migration_cpu_stop() is invoked with disabled +preemption. + +Wake the softirq thread on PREEMPT_RT if there are any pending softirqs. + +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/smp.c | 14 ++++++++++++-- + 1 file changed, 12 insertions(+), 2 deletions(-) + +--- a/kernel/smp.c ++++ b/kernel/smp.c +@@ -450,8 +450,18 @@ void flush_smp_call_function_from_idle(v + + local_irq_save(flags); + flush_smp_call_function_queue(true); +- if (local_softirq_pending()) +- do_softirq(); ++ ++ if (local_softirq_pending()) { ++ ++ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) { ++ do_softirq(); ++ } else { ++ struct task_struct *ksoftirqd = this_cpu_ksoftirqd(); ++ ++ if (ksoftirqd && ksoftirqd->state != TASK_RUNNING) ++ wake_up_process(ksoftirqd); ++ } ++ } + + local_irq_restore(flags); + } diff --git a/kernel/patches-5.11.x-rt/0055-0001-tasklets-Replace-barrier-with-cpu_relax-in-tasklet_u.patch b/kernel/patches-5.11.x-rt/0055-0001-tasklets-Replace-barrier-with-cpu_relax-in-tasklet_u.patch new file mode 100644 index 000000000..49163ca14 --- /dev/null +++ b/kernel/patches-5.11.x-rt/0055-0001-tasklets-Replace-barrier-with-cpu_relax-in-tasklet_u.patch @@ -0,0 +1,28 @@ +From: Thomas Gleixner +Date: Tue, 9 Mar 2021 09:42:04 +0100 +Subject: [PATCH 01/20] tasklets: Replace barrier() with cpu_relax() in + tasklet_unlock_wait() + +A barrier() in a tight loop which waits for something to happen on a remote +CPU is a pointless exercise. Replace it with cpu_relax() which allows HT +siblings to make progress. + +Signed-off-by: Thomas Gleixner +Tested-by: Sebastian Andrzej Siewior +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/interrupt.h | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/include/linux/interrupt.h ++++ b/include/linux/interrupt.h +@@ -677,7 +677,8 @@ static inline void tasklet_unlock(struct + + static inline void tasklet_unlock_wait(struct tasklet_struct *t) + { +- while (test_bit(TASKLET_STATE_RUN, &(t)->state)) { barrier(); } ++ while (test_bit(TASKLET_STATE_RUN, &t->state)) ++ cpu_relax(); + } + #else + #define tasklet_trylock(t) 1 diff --git a/kernel/patches-5.11.x-rt/0056-0002-tasklets-Use-static-inlines-for-stub-implementations.patch b/kernel/patches-5.11.x-rt/0056-0002-tasklets-Use-static-inlines-for-stub-implementations.patch new file mode 100644 index 000000000..cb5602e14 --- /dev/null +++ b/kernel/patches-5.11.x-rt/0056-0002-tasklets-Use-static-inlines-for-stub-implementations.patch @@ -0,0 +1,28 @@ +From: Thomas Gleixner +Date: Tue, 9 Mar 2021 09:42:05 +0100 +Subject: [PATCH 02/20] tasklets: Use static inlines for stub implementations + +Inlines exist for a reason. + +Signed-off-by: Thomas Gleixner +Tested-by: Sebastian Andrzej Siewior +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/interrupt.h | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/include/linux/interrupt.h ++++ b/include/linux/interrupt.h +@@ -681,9 +681,9 @@ static inline void tasklet_unlock_wait(s + cpu_relax(); + } + #else +-#define tasklet_trylock(t) 1 +-#define tasklet_unlock_wait(t) do { } while (0) +-#define tasklet_unlock(t) do { } while (0) ++static inline int tasklet_trylock(struct tasklet_struct *t) { return 1; } ++static inline void tasklet_unlock(struct tasklet_struct *t) { } ++static inline void tasklet_unlock_wait(struct tasklet_struct *t) { } + #endif + + extern void __tasklet_schedule(struct tasklet_struct *t); diff --git a/kernel/patches-5.11.x-rt/0057-0003-tasklets-Provide-tasklet_disable_in_atomic.patch b/kernel/patches-5.11.x-rt/0057-0003-tasklets-Provide-tasklet_disable_in_atomic.patch new file mode 100644 index 000000000..dea76c2ae --- /dev/null +++ b/kernel/patches-5.11.x-rt/0057-0003-tasklets-Provide-tasklet_disable_in_atomic.patch @@ -0,0 +1,61 @@ +From: Thomas Gleixner +Date: Tue, 9 Mar 2021 09:42:06 +0100 +Subject: [PATCH 03/20] tasklets: Provide tasklet_disable_in_atomic() + +Replacing the spin wait loops in tasklet_unlock_wait() with +wait_var_event() is not possible as a handful of tasklet_disable() +invocations are happening in atomic context. All other invocations are in +teardown paths which can sleep. + +Provide tasklet_disable_in_atomic() and tasklet_unlock_spin_wait() to +convert the few atomic use cases over, which allows to change +tasklet_disable() and tasklet_unlock_wait() in a later step. + +Signed-off-by: Thomas Gleixner +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/interrupt.h | 22 ++++++++++++++++++++++ + 1 file changed, 22 insertions(+) + +--- a/include/linux/interrupt.h ++++ b/include/linux/interrupt.h +@@ -680,10 +680,21 @@ static inline void tasklet_unlock_wait(s + while (test_bit(TASKLET_STATE_RUN, &t->state)) + cpu_relax(); + } ++ ++/* ++ * Do not use in new code. Waiting for tasklets from atomic contexts is ++ * error prone and should be avoided. ++ */ ++static inline void tasklet_unlock_spin_wait(struct tasklet_struct *t) ++{ ++ while (test_bit(TASKLET_STATE_RUN, &t->state)) ++ cpu_relax(); ++} + #else + static inline int tasklet_trylock(struct tasklet_struct *t) { return 1; } + static inline void tasklet_unlock(struct tasklet_struct *t) { } + static inline void tasklet_unlock_wait(struct tasklet_struct *t) { } ++static inline void tasklet_unlock_spin_wait(struct tasklet_struct *t) { } + #endif + + extern void __tasklet_schedule(struct tasklet_struct *t); +@@ -708,6 +719,17 @@ static inline void tasklet_disable_nosyn + smp_mb__after_atomic(); + } + ++/* ++ * Do not use in new code. Disabling tasklets from atomic contexts is ++ * error prone and should be avoided. ++ */ ++static inline void tasklet_disable_in_atomic(struct tasklet_struct *t) ++{ ++ tasklet_disable_nosync(t); ++ tasklet_unlock_spin_wait(t); ++ smp_mb(); ++} ++ + static inline void tasklet_disable(struct tasklet_struct *t) + { + tasklet_disable_nosync(t); diff --git a/kernel/patches-5.11.x-rt/0058-0004-tasklets-Use-spin-wait-in-tasklet_disable-temporaril.patch b/kernel/patches-5.11.x-rt/0058-0004-tasklets-Use-spin-wait-in-tasklet_disable-temporaril.patch new file mode 100644 index 000000000..6f8ad778b --- /dev/null +++ b/kernel/patches-5.11.x-rt/0058-0004-tasklets-Use-spin-wait-in-tasklet_disable-temporaril.patch @@ -0,0 +1,26 @@ +From: Thomas Gleixner +Date: Tue, 9 Mar 2021 09:42:07 +0100 +Subject: [PATCH 04/20] tasklets: Use spin wait in tasklet_disable() + temporarily + +To ease the transition use spin waiting in tasklet_disable() until all +usage sites from atomic context have been cleaned up. + +Signed-off-by: Thomas Gleixner +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/interrupt.h | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/include/linux/interrupt.h ++++ b/include/linux/interrupt.h +@@ -733,7 +733,8 @@ static inline void tasklet_disable_in_at + static inline void tasklet_disable(struct tasklet_struct *t) + { + tasklet_disable_nosync(t); +- tasklet_unlock_wait(t); ++ /* Spin wait until all atomic users are converted */ ++ tasklet_unlock_spin_wait(t); + smp_mb(); + } + diff --git a/kernel/patches-5.11.x-rt/0059-0005-tasklets-Replace-spin-wait-in-tasklet_unlock_wait.patch b/kernel/patches-5.11.x-rt/0059-0005-tasklets-Replace-spin-wait-in-tasklet_unlock_wait.patch new file mode 100644 index 000000000..0fe21db31 --- /dev/null +++ b/kernel/patches-5.11.x-rt/0059-0005-tasklets-Replace-spin-wait-in-tasklet_unlock_wait.patch @@ -0,0 +1,81 @@ +From: Peter Zijlstra +Date: Tue, 9 Mar 2021 09:42:08 +0100 +Subject: [PATCH 05/20] tasklets: Replace spin wait in tasklet_unlock_wait() + +tasklet_unlock_wait() spin waits for TASKLET_STATE_RUN to be cleared. This +is wasting CPU cycles in a tight loop which is especially painful in a +guest when the CPU running the tasklet is scheduled out. + +tasklet_unlock_wait() is invoked from tasklet_kill() which is used in +teardown paths and not performance critical at all. Replace the spin wait +with wait_var_event(). + +There are no users of tasklet_unlock_wait() which are invoked from atomic +contexts. The usage in tasklet_disable() has been replaced temporarily with +the spin waiting variant until the atomic users are fixed up and will be +converted to the sleep wait variant later. + +Signed-off-by: Peter Zijlstra +Signed-off-by: Thomas Gleixner +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/interrupt.h | 13 ++----------- + kernel/softirq.c | 18 ++++++++++++++++++ + 2 files changed, 20 insertions(+), 11 deletions(-) + +--- a/include/linux/interrupt.h ++++ b/include/linux/interrupt.h +@@ -669,17 +669,8 @@ static inline int tasklet_trylock(struct + return !test_and_set_bit(TASKLET_STATE_RUN, &(t)->state); + } + +-static inline void tasklet_unlock(struct tasklet_struct *t) +-{ +- smp_mb__before_atomic(); +- clear_bit(TASKLET_STATE_RUN, &(t)->state); +-} +- +-static inline void tasklet_unlock_wait(struct tasklet_struct *t) +-{ +- while (test_bit(TASKLET_STATE_RUN, &t->state)) +- cpu_relax(); +-} ++void tasklet_unlock(struct tasklet_struct *t); ++void tasklet_unlock_wait(struct tasklet_struct *t); + + /* + * Do not use in new code. Waiting for tasklets from atomic contexts is +--- a/kernel/softirq.c ++++ b/kernel/softirq.c +@@ -25,6 +25,7 @@ + #include + #include + #include ++#include + + #define CREATE_TRACE_POINTS + #include +@@ -619,6 +620,23 @@ void tasklet_kill(struct tasklet_struct + } + EXPORT_SYMBOL(tasklet_kill); + ++#ifdef CONFIG_SMP ++void tasklet_unlock(struct tasklet_struct *t) ++{ ++ smp_mb__before_atomic(); ++ clear_bit(TASKLET_STATE_RUN, &t->state); ++ smp_mb__after_atomic(); ++ wake_up_var(&t->state); ++} ++EXPORT_SYMBOL_GPL(tasklet_unlock); ++ ++void tasklet_unlock_wait(struct tasklet_struct *t) ++{ ++ wait_var_event(&t->state, !test_bit(TASKLET_STATE_RUN, &t->state)); ++} ++EXPORT_SYMBOL_GPL(tasklet_unlock_wait); ++#endif ++ + void __init softirq_init(void) + { + int cpu; diff --git a/kernel/patches-5.11.x-rt/0060-0006-tasklets-Replace-spin-wait-in-tasklet_kill.patch b/kernel/patches-5.11.x-rt/0060-0006-tasklets-Replace-spin-wait-in-tasklet_kill.patch new file mode 100644 index 000000000..16ebb0465 --- /dev/null +++ b/kernel/patches-5.11.x-rt/0060-0006-tasklets-Replace-spin-wait-in-tasklet_kill.patch @@ -0,0 +1,67 @@ +From: Peter Zijlstra +Date: Tue, 9 Mar 2021 09:42:09 +0100 +Subject: [PATCH 06/20] tasklets: Replace spin wait in tasklet_kill() + +tasklet_kill() spin waits for TASKLET_STATE_SCHED to be cleared invoking +yield() from inside the loop. yield() is an ill defined mechanism and the +result might still be wasting CPU cycles in a tight loop which is +especially painful in a guest when the CPU running the tasklet is scheduled +out. + +tasklet_kill() is used in teardown paths and not performance critical at +all. Replace the spin wait with wait_var_event(). + +Signed-off-by: Peter Zijlstra +Signed-off-by: Thomas Gleixner +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/softirq.c | 23 +++++++++++++++-------- + 1 file changed, 15 insertions(+), 8 deletions(-) + +--- a/kernel/softirq.c ++++ b/kernel/softirq.c +@@ -530,6 +530,16 @@ void __tasklet_hi_schedule(struct taskle + } + EXPORT_SYMBOL(__tasklet_hi_schedule); + ++static inline bool tasklet_clear_sched(struct tasklet_struct *t) ++{ ++ if (test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)) { ++ wake_up_var(&t->state); ++ return true; ++ } ++ ++ return false; ++} ++ + static void tasklet_action_common(struct softirq_action *a, + struct tasklet_head *tl_head, + unsigned int softirq_nr) +@@ -549,8 +559,7 @@ static void tasklet_action_common(struct + + if (tasklet_trylock(t)) { + if (!atomic_read(&t->count)) { +- if (!test_and_clear_bit(TASKLET_STATE_SCHED, +- &t->state)) ++ if (!tasklet_clear_sched(t)) + BUG(); + if (t->use_callback) + t->callback(t); +@@ -610,13 +619,11 @@ void tasklet_kill(struct tasklet_struct + if (in_interrupt()) + pr_notice("Attempt to kill tasklet from interrupt\n"); + +- while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) { +- do { +- yield(); +- } while (test_bit(TASKLET_STATE_SCHED, &t->state)); +- } ++ while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) ++ wait_var_event(&t->state, !test_bit(TASKLET_STATE_SCHED, &t->state)); ++ + tasklet_unlock_wait(t); +- clear_bit(TASKLET_STATE_SCHED, &t->state); ++ tasklet_clear_sched(t); + } + EXPORT_SYMBOL(tasklet_kill); + diff --git a/kernel/patches-5.11.x-rt/0061-0007-tasklets-Prevent-tasklet_unlock_spin_wait-deadlock-o.patch b/kernel/patches-5.11.x-rt/0061-0007-tasklets-Prevent-tasklet_unlock_spin_wait-deadlock-o.patch new file mode 100644 index 000000000..0670a9ff5 --- /dev/null +++ b/kernel/patches-5.11.x-rt/0061-0007-tasklets-Prevent-tasklet_unlock_spin_wait-deadlock-o.patch @@ -0,0 +1,100 @@ +From: Thomas Gleixner +Date: Tue, 9 Mar 2021 09:42:10 +0100 +Subject: [PATCH 07/20] tasklets: Prevent tasklet_unlock_spin_wait() deadlock + on RT + +tasklet_unlock_spin_wait() spin waits for the TASKLET_STATE_SCHED bit in +the tasklet state to be cleared. This works on !RT nicely because the +corresponding execution can only happen on a different CPU. + +On RT softirq processing is preemptible, therefore a task preempting the +softirq processing thread can spin forever. + +Prevent this by invoking local_bh_disable()/enable() inside the loop. In +case that the softirq processing thread was preempted by the current task, +current will block on the local lock which yields the CPU to the preempted +softirq processing thread. If the tasklet is processed on a different CPU +then the local_bh_disable()/enable() pair is just a waste of processor +cycles. + +Signed-off-by: Thomas Gleixner +Tested-by: Sebastian Andrzej Siewior +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/interrupt.h | 12 ++---------- + kernel/softirq.c | 28 +++++++++++++++++++++++++++- + 2 files changed, 29 insertions(+), 11 deletions(-) + +--- a/include/linux/interrupt.h ++++ b/include/linux/interrupt.h +@@ -663,7 +663,7 @@ enum + TASKLET_STATE_RUN /* Tasklet is running (SMP only) */ + }; + +-#ifdef CONFIG_SMP ++#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT) + static inline int tasklet_trylock(struct tasklet_struct *t) + { + return !test_and_set_bit(TASKLET_STATE_RUN, &(t)->state); +@@ -671,16 +671,8 @@ static inline int tasklet_trylock(struct + + void tasklet_unlock(struct tasklet_struct *t); + void tasklet_unlock_wait(struct tasklet_struct *t); ++void tasklet_unlock_spin_wait(struct tasklet_struct *t); + +-/* +- * Do not use in new code. Waiting for tasklets from atomic contexts is +- * error prone and should be avoided. +- */ +-static inline void tasklet_unlock_spin_wait(struct tasklet_struct *t) +-{ +- while (test_bit(TASKLET_STATE_RUN, &t->state)) +- cpu_relax(); +-} + #else + static inline int tasklet_trylock(struct tasklet_struct *t) { return 1; } + static inline void tasklet_unlock(struct tasklet_struct *t) { } +--- a/kernel/softirq.c ++++ b/kernel/softirq.c +@@ -614,6 +614,32 @@ void tasklet_init(struct tasklet_struct + } + EXPORT_SYMBOL(tasklet_init); + ++#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT) ++/* ++ * Do not use in new code. Waiting for tasklets from atomic contexts is ++ * error prone and should be avoided. ++ */ ++void tasklet_unlock_spin_wait(struct tasklet_struct *t) ++{ ++ while (test_bit(TASKLET_STATE_RUN, &(t)->state)) { ++ if (IS_ENABLED(CONFIG_PREEMPT_RT)) { ++ /* ++ * Prevent a live lock when current preempted soft ++ * interrupt processing or prevents ksoftirqd from ++ * running. If the tasklet runs on a different CPU ++ * then this has no effect other than doing the BH ++ * disable/enable dance for nothing. ++ */ ++ local_bh_disable(); ++ local_bh_enable(); ++ } else { ++ cpu_relax(); ++ } ++ } ++} ++EXPORT_SYMBOL(tasklet_unlock_spin_wait); ++#endif ++ + void tasklet_kill(struct tasklet_struct *t) + { + if (in_interrupt()) +@@ -627,7 +653,7 @@ void tasklet_kill(struct tasklet_struct + } + EXPORT_SYMBOL(tasklet_kill); + +-#ifdef CONFIG_SMP ++#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT) + void tasklet_unlock(struct tasklet_struct *t) + { + smp_mb__before_atomic(); diff --git a/kernel/patches-5.11.x-rt/0062-0008-net-jme-Replace-link-change-tasklet-with-work.patch b/kernel/patches-5.11.x-rt/0062-0008-net-jme-Replace-link-change-tasklet-with-work.patch new file mode 100644 index 000000000..d0c08d551 --- /dev/null +++ b/kernel/patches-5.11.x-rt/0062-0008-net-jme-Replace-link-change-tasklet-with-work.patch @@ -0,0 +1,79 @@ +From: Sebastian Andrzej Siewior +Date: Tue, 9 Mar 2021 09:42:11 +0100 +Subject: [PATCH 08/20] net: jme: Replace link-change tasklet with work + +The link change tasklet disables the tasklets for tx/rx processing while +upating hw parameters and then enables the tasklets again. + +This update can also be pushed into a workqueue where it can be performed +in preemptible context. This allows tasklet_disable() to become sleeping. + +Replace the linkch_task tasklet with a work. + +Signed-off-by: Sebastian Andrzej Siewior +Signed-off-by: Thomas Gleixner +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/net/ethernet/jme.c | 10 +++++----- + drivers/net/ethernet/jme.h | 2 +- + 2 files changed, 6 insertions(+), 6 deletions(-) + +--- a/drivers/net/ethernet/jme.c ++++ b/drivers/net/ethernet/jme.c +@@ -1265,9 +1265,9 @@ jme_stop_shutdown_timer(struct jme_adapt + jwrite32f(jme, JME_APMC, apmc); + } + +-static void jme_link_change_tasklet(struct tasklet_struct *t) ++static void jme_link_change_work(struct work_struct *work) + { +- struct jme_adapter *jme = from_tasklet(jme, t, linkch_task); ++ struct jme_adapter *jme = container_of(work, struct jme_adapter, linkch_task); + struct net_device *netdev = jme->dev; + int rc; + +@@ -1510,7 +1510,7 @@ jme_intr_msi(struct jme_adapter *jme, u3 + * all other events are ignored + */ + jwrite32(jme, JME_IEVE, intrstat); +- tasklet_schedule(&jme->linkch_task); ++ schedule_work(&jme->linkch_task); + goto out_reenable; + } + +@@ -1832,7 +1832,6 @@ jme_open(struct net_device *netdev) + jme_clear_pm_disable_wol(jme); + JME_NAPI_ENABLE(jme); + +- tasklet_setup(&jme->linkch_task, jme_link_change_tasklet); + tasklet_setup(&jme->txclean_task, jme_tx_clean_tasklet); + tasklet_setup(&jme->rxclean_task, jme_rx_clean_tasklet); + tasklet_setup(&jme->rxempty_task, jme_rx_empty_tasklet); +@@ -1920,7 +1919,7 @@ jme_close(struct net_device *netdev) + + JME_NAPI_DISABLE(jme); + +- tasklet_kill(&jme->linkch_task); ++ cancel_work_sync(&jme->linkch_task); + tasklet_kill(&jme->txclean_task); + tasklet_kill(&jme->rxclean_task); + tasklet_kill(&jme->rxempty_task); +@@ -3035,6 +3034,7 @@ jme_init_one(struct pci_dev *pdev, + atomic_set(&jme->rx_empty, 1); + + tasklet_setup(&jme->pcc_task, jme_pcc_tasklet); ++ INIT_WORK(&jme->linkch_task, jme_link_change_work); + jme->dpi.cur = PCC_P1; + + jme->reg_ghc = 0; +--- a/drivers/net/ethernet/jme.h ++++ b/drivers/net/ethernet/jme.h +@@ -411,7 +411,7 @@ struct jme_adapter { + struct tasklet_struct rxempty_task; + struct tasklet_struct rxclean_task; + struct tasklet_struct txclean_task; +- struct tasklet_struct linkch_task; ++ struct work_struct linkch_task; + struct tasklet_struct pcc_task; + unsigned long flags; + u32 reg_txcs; diff --git a/kernel/patches-5.11.x-rt/0063-0009-net-sundance-Use-tasklet_disable_in_atomic.patch b/kernel/patches-5.11.x-rt/0063-0009-net-sundance-Use-tasklet_disable_in_atomic.patch new file mode 100644 index 000000000..2dd71370a --- /dev/null +++ b/kernel/patches-5.11.x-rt/0063-0009-net-sundance-Use-tasklet_disable_in_atomic.patch @@ -0,0 +1,32 @@ +From: Sebastian Andrzej Siewior +Date: Tue, 9 Mar 2021 09:42:12 +0100 +Subject: [PATCH 09/20] net: sundance: Use tasklet_disable_in_atomic(). + +tasklet_disable() is used in the timer callback. This might be distangled, +but without access to the hardware that's a bit risky. + +Replace it with tasklet_disable_in_atomic() so tasklet_disable() can be +changed to a sleep wait once all remaining atomic users are converted. + +Signed-off-by: Sebastian Andrzej Siewior +Signed-off-by: Thomas Gleixner +Cc: Denis Kirjanov +Cc: "David S. Miller" +Cc: Jakub Kicinski +Cc: netdev@vger.kernel.org +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/net/ethernet/dlink/sundance.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/ethernet/dlink/sundance.c ++++ b/drivers/net/ethernet/dlink/sundance.c +@@ -963,7 +963,7 @@ static void tx_timeout(struct net_device + unsigned long flag; + + netif_stop_queue(dev); +- tasklet_disable(&np->tx_tasklet); ++ tasklet_disable_in_atomic(&np->tx_tasklet); + iowrite16(0, ioaddr + IntrEnable); + printk(KERN_WARNING "%s: Transmit timed out, TxStatus %2.2x " + "TxFrameId %2.2x," diff --git a/kernel/patches-5.11.x-rt/0064-0010-ath9k-Use-tasklet_disable_in_atomic.patch b/kernel/patches-5.11.x-rt/0064-0010-ath9k-Use-tasklet_disable_in_atomic.patch new file mode 100644 index 000000000..791c1a724 --- /dev/null +++ b/kernel/patches-5.11.x-rt/0064-0010-ath9k-Use-tasklet_disable_in_atomic.patch @@ -0,0 +1,41 @@ +From: Sebastian Andrzej Siewior +Date: Tue, 9 Mar 2021 09:42:13 +0100 +Subject: [PATCH 10/20] ath9k: Use tasklet_disable_in_atomic() + +All callers of ath9k_beacon_ensure_primary_slot() are preemptible / +acquire a mutex except for this callchain: + + spin_lock_bh(&sc->sc_pcu_lock); + ath_complete_reset() + -> ath9k_calculate_summary_state() + -> ath9k_beacon_ensure_primary_slot() + +It's unclear how that can be distangled, so use tasklet_disable_in_atomic() +for now. This allows tasklet_disable() to become sleepable once the +remaining atomic users are cleaned up. + +Signed-off-by: Sebastian Andrzej Siewior +Signed-off-by: Thomas Gleixner +Cc: ath9k-devel@qca.qualcomm.com +Cc: Kalle Valo +Cc: "David S. Miller" +Cc: Jakub Kicinski +Cc: linux-wireless@vger.kernel.org +Cc: netdev@vger.kernel.org +Acked-by: Kalle Valo +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/net/wireless/ath/ath9k/beacon.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/wireless/ath/ath9k/beacon.c ++++ b/drivers/net/wireless/ath/ath9k/beacon.c +@@ -251,7 +251,7 @@ void ath9k_beacon_ensure_primary_slot(st + int first_slot = ATH_BCBUF; + int slot; + +- tasklet_disable(&sc->bcon_tasklet); ++ tasklet_disable_in_atomic(&sc->bcon_tasklet); + + /* Find first taken slot. */ + for (slot = 0; slot < ATH_BCBUF; slot++) { diff --git a/kernel/patches-5.11.x-rt/0065-0011-atm-eni-Use-tasklet_disable_in_atomic-in-the-send-ca.patch b/kernel/patches-5.11.x-rt/0065-0011-atm-eni-Use-tasklet_disable_in_atomic-in-the-send-ca.patch new file mode 100644 index 000000000..ba0555f8d --- /dev/null +++ b/kernel/patches-5.11.x-rt/0065-0011-atm-eni-Use-tasklet_disable_in_atomic-in-the-send-ca.patch @@ -0,0 +1,35 @@ +From: Sebastian Andrzej Siewior +Date: Tue, 9 Mar 2021 09:42:14 +0100 +Subject: [PATCH 11/20] atm: eni: Use tasklet_disable_in_atomic() in the send() + callback + +The atmdev_ops::send callback which calls tasklet_disable() is invoked with +bottom halfs disabled from net_device_ops::ndo_start_xmit(). All other +invocations of tasklet_disable() in this driver happen in preemptible +context. + +Change the send() call to use tasklet_disable_in_atomic() which allows +tasklet_disable() to be made sleepable once the remaining atomic context +usage sites are cleaned up. + +Signed-off-by: Sebastian Andrzej Siewior +Signed-off-by: Thomas Gleixner +Cc: Chas Williams <3chas3@gmail.com> +Cc: linux-atm-general@lists.sourceforge.net +Cc: netdev@vger.kernel.org +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/atm/eni.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/atm/eni.c ++++ b/drivers/atm/eni.c +@@ -2054,7 +2054,7 @@ static int eni_send(struct atm_vcc *vcc, + } + submitted++; + ATM_SKB(skb)->vcc = vcc; +- tasklet_disable(&ENI_DEV(vcc->dev)->task); ++ tasklet_disable_in_atomic(&ENI_DEV(vcc->dev)->task); + res = do_tx(skb); + tasklet_enable(&ENI_DEV(vcc->dev)->task); + if (res == enq_ok) return 0; diff --git a/kernel/patches-5.11.x-rt/0066-0012-PCI-hv-Use-tasklet_disable_in_atomic.patch b/kernel/patches-5.11.x-rt/0066-0012-PCI-hv-Use-tasklet_disable_in_atomic.patch new file mode 100644 index 000000000..99365a298 --- /dev/null +++ b/kernel/patches-5.11.x-rt/0066-0012-PCI-hv-Use-tasklet_disable_in_atomic.patch @@ -0,0 +1,39 @@ +From: Sebastian Andrzej Siewior +Date: Tue, 9 Mar 2021 09:42:15 +0100 +Subject: [PATCH 12/20] PCI: hv: Use tasklet_disable_in_atomic() + +The hv_compose_msi_msg() callback in irq_chip::irq_compose_msi_msg is +invoked via irq_chip_compose_msi_msg(), which itself is always invoked from +atomic contexts from the guts of the interrupt core code. + +There is no way to change this w/o rewriting the whole driver, so use +tasklet_disable_in_atomic() which allows to make tasklet_disable() +sleepable once the remaining atomic users are addressed. + +Signed-off-by: Sebastian Andrzej Siewior +Signed-off-by: Thomas Gleixner +Cc: "K. Y. Srinivasan" +Cc: Haiyang Zhang +Cc: Stephen Hemminger +Cc: Wei Liu +Cc: Lorenzo Pieralisi +Cc: Rob Herring +Cc: Bjorn Helgaas +Cc: linux-hyperv@vger.kernel.org +Cc: linux-pci@vger.kernel.org +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/pci/controller/pci-hyperv.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/pci/controller/pci-hyperv.c ++++ b/drivers/pci/controller/pci-hyperv.c +@@ -1458,7 +1458,7 @@ static void hv_compose_msi_msg(struct ir + * Prevents hv_pci_onchannelcallback() from running concurrently + * in the tasklet. + */ +- tasklet_disable(&channel->callback_event); ++ tasklet_disable_in_atomic(&channel->callback_event); + + /* + * Since this function is called with IRQ locks held, can't diff --git a/kernel/patches-5.11.x-rt/0067-0013-firewire-ohci-Use-tasklet_disable_in_atomic-where-re.patch b/kernel/patches-5.11.x-rt/0067-0013-firewire-ohci-Use-tasklet_disable_in_atomic-where-re.patch new file mode 100644 index 000000000..229bf67e7 --- /dev/null +++ b/kernel/patches-5.11.x-rt/0067-0013-firewire-ohci-Use-tasklet_disable_in_atomic-where-re.patch @@ -0,0 +1,54 @@ +From: Sebastian Andrzej Siewior +Date: Tue, 9 Mar 2021 09:42:16 +0100 +Subject: [PATCH 13/20] firewire: ohci: Use tasklet_disable_in_atomic() where + required + +tasklet_disable() is invoked in several places. Some of them are in atomic +context which prevents a conversion of tasklet_disable() to a sleepable +function. + +The atomic callchains are: + + ar_context_tasklet() + ohci_cancel_packet() + tasklet_disable() + + ... + ohci_flush_iso_completions() + tasklet_disable() + +The invocation of tasklet_disable() from at_context_flush() is always in +preemptible context. + +Use tasklet_disable_in_atomic() for the two invocations in +ohci_cancel_packet() and ohci_flush_iso_completions(). + +Signed-off-by: Sebastian Andrzej Siewior +Signed-off-by: Thomas Gleixner +Cc: Stefan Richter +Cc: linux1394-devel@lists.sourceforge.net +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/firewire/ohci.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/firewire/ohci.c ++++ b/drivers/firewire/ohci.c +@@ -2545,7 +2545,7 @@ static int ohci_cancel_packet(struct fw_ + struct driver_data *driver_data = packet->driver_data; + int ret = -ENOENT; + +- tasklet_disable(&ctx->tasklet); ++ tasklet_disable_in_atomic(&ctx->tasklet); + + if (packet->ack != 0) + goto out; +@@ -3465,7 +3465,7 @@ static int ohci_flush_iso_completions(st + struct iso_context *ctx = container_of(base, struct iso_context, base); + int ret = 0; + +- tasklet_disable(&ctx->context.tasklet); ++ tasklet_disable_in_atomic(&ctx->context.tasklet); + + if (!test_and_set_bit_lock(0, &ctx->flushing_completions)) { + context_tasklet((unsigned long)&ctx->context); diff --git a/kernel/patches-5.11.x-rt/0068-0014-tasklets-Switch-tasklet_disable-to-the-sleep-wait-va.patch b/kernel/patches-5.11.x-rt/0068-0014-tasklets-Switch-tasklet_disable-to-the-sleep-wait-va.patch new file mode 100644 index 000000000..f9dd1b23e --- /dev/null +++ b/kernel/patches-5.11.x-rt/0068-0014-tasklets-Switch-tasklet_disable-to-the-sleep-wait-va.patch @@ -0,0 +1,28 @@ +From: Thomas Gleixner +Date: Tue, 9 Mar 2021 09:42:17 +0100 +Subject: [PATCH 14/20] tasklets: Switch tasklet_disable() to the sleep wait + variant + + -- NOT FOR IMMEDIATE MERGING -- + +Now that all users of tasklet_disable() are invoked from sleepable context, +convert it to use tasklet_unlock_wait() which might sleep. + +Signed-off-by: Thomas Gleixner +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/interrupt.h | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/include/linux/interrupt.h ++++ b/include/linux/interrupt.h +@@ -716,8 +716,7 @@ static inline void tasklet_disable_in_at + static inline void tasklet_disable(struct tasklet_struct *t) + { + tasklet_disable_nosync(t); +- /* Spin wait until all atomic users are converted */ +- tasklet_unlock_spin_wait(t); ++ tasklet_unlock_wait(t); + smp_mb(); + } + diff --git a/kernel/patches-5.11.x-rt/0069-0015-softirq-Add-RT-specific-softirq-accounting.patch b/kernel/patches-5.11.x-rt/0069-0015-softirq-Add-RT-specific-softirq-accounting.patch new file mode 100644 index 000000000..d5ebf2bda --- /dev/null +++ b/kernel/patches-5.11.x-rt/0069-0015-softirq-Add-RT-specific-softirq-accounting.patch @@ -0,0 +1,64 @@ +From: Thomas Gleixner +Date: Tue, 9 Mar 2021 09:55:53 +0100 +Subject: [PATCH 15/20] softirq: Add RT specific softirq accounting + +RT requires the softirq processing and local bottomhalf disabled regions to +be preemptible. Using the normal preempt count based serialization is +therefore not possible because this implicitely disables preemption. + +RT kernels use a per CPU local lock to serialize bottomhalfs. As +local_bh_disable() can nest the lock can only be acquired on the outermost +invocation of local_bh_disable() and released when the nest count becomes +zero. Tasks which hold the local lock can be preempted so its required to +keep track of the nest count per task. + +Add a RT only counter to task struct and adjust the relevant macros in +preempt.h. + +Signed-off-by: Thomas Gleixner +Tested-by: Sebastian Andrzej Siewior +Reviewed-by: Frederic Weisbecker +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/hardirq.h | 1 + + include/linux/preempt.h | 6 +++++- + include/linux/sched.h | 3 +++ + 3 files changed, 9 insertions(+), 1 deletion(-) + +--- a/include/linux/hardirq.h ++++ b/include/linux/hardirq.h +@@ -6,6 +6,7 @@ + #include + #include + #include ++#include + #include + #include + +--- a/include/linux/preempt.h ++++ b/include/linux/preempt.h +@@ -79,7 +79,11 @@ + + #define nmi_count() (preempt_count() & NMI_MASK) + #define hardirq_count() (preempt_count() & HARDIRQ_MASK) +-#define softirq_count() (preempt_count() & SOFTIRQ_MASK) ++#ifdef CONFIG_PREEMPT_RT ++# define softirq_count() (current->softirq_disable_cnt & SOFTIRQ_MASK) ++#else ++# define softirq_count() (preempt_count() & SOFTIRQ_MASK) ++#endif + #define irq_count() (nmi_count() | hardirq_count() | softirq_count()) + + /* +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -1040,6 +1040,9 @@ struct task_struct { + int softirq_context; + int irq_config; + #endif ++#ifdef CONFIG_PREEMPT_RT ++ int softirq_disable_cnt; ++#endif + + #ifdef CONFIG_LOCKDEP + # define MAX_LOCK_DEPTH 48UL diff --git a/kernel/patches-5.11.x-rt/0070-0016-irqtime-Make-accounting-correct-on-RT.patch b/kernel/patches-5.11.x-rt/0070-0016-irqtime-Make-accounting-correct-on-RT.patch new file mode 100644 index 000000000..1611182a9 --- /dev/null +++ b/kernel/patches-5.11.x-rt/0070-0016-irqtime-Make-accounting-correct-on-RT.patch @@ -0,0 +1,47 @@ +From: Thomas Gleixner +Date: Tue, 9 Mar 2021 09:55:54 +0100 +Subject: [PATCH 16/20] irqtime: Make accounting correct on RT + +vtime_account_irq and irqtime_account_irq() base checks on preempt_count() +which fails on RT because preempt_count() does not contain the softirq +accounting which is seperate on RT. + +These checks do not need the full preempt count as they only operate on the +hard and softirq sections. + +Use irq_count() instead which provides the correct value on both RT and non +RT kernels. The compiler is clever enough to fold the masking for !RT: + + 99b: 65 8b 05 00 00 00 00 mov %gs:0x0(%rip),%eax + - 9a2: 25 ff ff ff 7f and $0x7fffffff,%eax + + 9a2: 25 00 ff ff 00 and $0xffff00,%eax + +Reported-by: Sebastian Andrzej Siewior +Signed-off-by: Thomas Gleixner +Tested-by: Sebastian Andrzej Siewior +Reviewed-by: Frederic Weisbecker +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/sched/cputime.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/kernel/sched/cputime.c ++++ b/kernel/sched/cputime.c +@@ -60,7 +60,7 @@ void irqtime_account_irq(struct task_str + cpu = smp_processor_id(); + delta = sched_clock_cpu(cpu) - irqtime->irq_start_time; + irqtime->irq_start_time += delta; +- pc = preempt_count() - offset; ++ pc = irq_count() - offset; + + /* + * We do not account for softirq time from ksoftirqd here. +@@ -421,7 +421,7 @@ void vtime_task_switch(struct task_struc + + void vtime_account_irq(struct task_struct *tsk, unsigned int offset) + { +- unsigned int pc = preempt_count() - offset; ++ unsigned int pc = irq_count() - offset; + + if (pc & HARDIRQ_OFFSET) { + vtime_account_hardirq(tsk); diff --git a/kernel/patches-5.11.x-rt/0071-0017-softirq-Move-various-protections-into-inline-helpers.patch b/kernel/patches-5.11.x-rt/0071-0017-softirq-Move-various-protections-into-inline-helpers.patch new file mode 100644 index 000000000..e88327856 --- /dev/null +++ b/kernel/patches-5.11.x-rt/0071-0017-softirq-Move-various-protections-into-inline-helpers.patch @@ -0,0 +1,101 @@ +From: Thomas Gleixner +Date: Tue, 9 Mar 2021 09:55:55 +0100 +Subject: [PATCH 17/20] softirq: Move various protections into inline helpers + +To allow reuse of the bulk of softirq processing code for RT and to avoid +#ifdeffery all over the place, split protections for various code sections +out into inline helpers so the RT variant can just replace them in one go. + +Signed-off-by: Thomas Gleixner +Tested-by: Sebastian Andrzej Siewior +Reviewed-by: Frederic Weisbecker +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/softirq.c | 39 ++++++++++++++++++++++++++++++++------- + 1 file changed, 32 insertions(+), 7 deletions(-) + +--- a/kernel/softirq.c ++++ b/kernel/softirq.c +@@ -205,6 +205,32 @@ void __local_bh_enable_ip(unsigned long + } + EXPORT_SYMBOL(__local_bh_enable_ip); + ++static inline void softirq_handle_begin(void) ++{ ++ __local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET); ++} ++ ++static inline void softirq_handle_end(void) ++{ ++ __local_bh_enable(SOFTIRQ_OFFSET); ++ WARN_ON_ONCE(in_interrupt()); ++} ++ ++static inline void ksoftirqd_run_begin(void) ++{ ++ local_irq_disable(); ++} ++ ++static inline void ksoftirqd_run_end(void) ++{ ++ local_irq_enable(); ++} ++ ++static inline bool should_wake_ksoftirqd(void) ++{ ++ return true; ++} ++ + static inline void invoke_softirq(void) + { + if (ksoftirqd_running(local_softirq_pending())) +@@ -317,7 +343,7 @@ asmlinkage __visible void __softirq_entr + + pending = local_softirq_pending(); + +- __local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET); ++ softirq_handle_begin(); + in_hardirq = lockdep_softirq_start(); + account_softirq_enter(current); + +@@ -368,8 +394,7 @@ asmlinkage __visible void __softirq_entr + + account_softirq_exit(current); + lockdep_softirq_end(in_hardirq); +- __local_bh_enable(SOFTIRQ_OFFSET); +- WARN_ON_ONCE(in_interrupt()); ++ softirq_handle_end(); + current_restore_flags(old_flags, PF_MEMALLOC); + } + +@@ -464,7 +489,7 @@ inline void raise_softirq_irqoff(unsigne + * Otherwise we wake up ksoftirqd to make sure we + * schedule the softirq soon. + */ +- if (!in_interrupt()) ++ if (!in_interrupt() && should_wake_ksoftirqd()) + wakeup_softirqd(); + } + +@@ -692,18 +717,18 @@ static int ksoftirqd_should_run(unsigned + + static void run_ksoftirqd(unsigned int cpu) + { +- local_irq_disable(); ++ ksoftirqd_run_begin(); + if (local_softirq_pending()) { + /* + * We can safely run softirq on inline stack, as we are not deep + * in the task stack here. + */ + __do_softirq(); +- local_irq_enable(); ++ ksoftirqd_run_end(); + cond_resched(); + return; + } +- local_irq_enable(); ++ ksoftirqd_run_end(); + } + + #ifdef CONFIG_HOTPLUG_CPU diff --git a/kernel/patches-5.11.x-rt/0072-0018-softirq-Make-softirq-control-and-processing-RT-aware.patch b/kernel/patches-5.11.x-rt/0072-0018-softirq-Make-softirq-control-and-processing-RT-aware.patch new file mode 100644 index 000000000..d548842a9 --- /dev/null +++ b/kernel/patches-5.11.x-rt/0072-0018-softirq-Make-softirq-control-and-processing-RT-aware.patch @@ -0,0 +1,258 @@ +From: Thomas Gleixner +Date: Tue, 9 Mar 2021 09:55:56 +0100 +Subject: [PATCH 18/20] softirq: Make softirq control and processing RT aware + +Provide a local lock based serialization for soft interrupts on RT which +allows the local_bh_disabled() sections and servicing soft interrupts to be +preemptible. + +Provide the necessary inline helpers which allow to reuse the bulk of the +softirq processing code. + +Signed-off-by: Thomas Gleixner +Tested-by: Sebastian Andrzej Siewior +Reviewed-by: Frederic Weisbecker +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/bottom_half.h | 2 + kernel/softirq.c | 188 ++++++++++++++++++++++++++++++++++++++++++-- + 2 files changed, 182 insertions(+), 8 deletions(-) + +--- a/include/linux/bottom_half.h ++++ b/include/linux/bottom_half.h +@@ -4,7 +4,7 @@ + + #include + +-#ifdef CONFIG_TRACE_IRQFLAGS ++#if defined(CONFIG_PREEMPT_RT) || defined(CONFIG_TRACE_IRQFLAGS) + extern void __local_bh_disable_ip(unsigned long ip, unsigned int cnt); + #else + static __always_inline void __local_bh_disable_ip(unsigned long ip, unsigned int cnt) +--- a/kernel/softirq.c ++++ b/kernel/softirq.c +@@ -13,6 +13,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -101,20 +102,189 @@ EXPORT_PER_CPU_SYMBOL_GPL(hardirq_contex + #endif + + /* +- * preempt_count and SOFTIRQ_OFFSET usage: +- * - preempt_count is changed by SOFTIRQ_OFFSET on entering or leaving +- * softirq processing. +- * - preempt_count is changed by SOFTIRQ_DISABLE_OFFSET (= 2 * SOFTIRQ_OFFSET) ++ * SOFTIRQ_OFFSET usage: ++ * ++ * On !RT kernels 'count' is the preempt counter, on RT kernels this applies ++ * to a per CPU counter and to task::softirqs_disabled_cnt. ++ * ++ * - count is changed by SOFTIRQ_OFFSET on entering or leaving softirq ++ * processing. ++ * ++ * - count is changed by SOFTIRQ_DISABLE_OFFSET (= 2 * SOFTIRQ_OFFSET) + * on local_bh_disable or local_bh_enable. ++ * + * This lets us distinguish between whether we are currently processing + * softirq and whether we just have bh disabled. + */ ++#ifdef CONFIG_PREEMPT_RT ++ ++/* ++ * RT accounts for BH disabled sections in task::softirqs_disabled_cnt and ++ * also in per CPU softirq_ctrl::cnt. This is necessary to allow tasks in a ++ * softirq disabled section to be preempted. ++ * ++ * The per task counter is used for softirq_count(), in_softirq() and ++ * in_serving_softirqs() because these counts are only valid when the task ++ * holding softirq_ctrl::lock is running. ++ * ++ * The per CPU counter prevents pointless wakeups of ksoftirqd in case that ++ * the task which is in a softirq disabled section is preempted or blocks. ++ */ ++struct softirq_ctrl { ++ local_lock_t lock; ++ int cnt; ++}; ++ ++static DEFINE_PER_CPU(struct softirq_ctrl, softirq_ctrl) = { ++ .lock = INIT_LOCAL_LOCK(softirq_ctrl.lock), ++}; ++ ++void __local_bh_disable_ip(unsigned long ip, unsigned int cnt) ++{ ++ unsigned long flags; ++ int newcnt; ++ ++ WARN_ON_ONCE(in_hardirq()); ++ ++ /* First entry of a task into a BH disabled section? */ ++ if (!current->softirq_disable_cnt) { ++ if (preemptible()) { ++ local_lock(&softirq_ctrl.lock); ++ /* Required to meet the RCU bottomhalf requirements. */ ++ rcu_read_lock(); ++ } else { ++ DEBUG_LOCKS_WARN_ON(this_cpu_read(softirq_ctrl.cnt)); ++ } ++ } ++ ++ /* ++ * Track the per CPU softirq disabled state. On RT this is per CPU ++ * state to allow preemption of bottom half disabled sections. ++ */ ++ newcnt = __this_cpu_add_return(softirq_ctrl.cnt, cnt); ++ /* ++ * Reflect the result in the task state to prevent recursion on the ++ * local lock and to make softirq_count() & al work. ++ */ ++ current->softirq_disable_cnt = newcnt; ++ ++ if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS) && newcnt == cnt) { ++ raw_local_irq_save(flags); ++ lockdep_softirqs_off(ip); ++ raw_local_irq_restore(flags); ++ } ++} ++EXPORT_SYMBOL(__local_bh_disable_ip); ++ ++static void __local_bh_enable(unsigned int cnt, bool unlock) ++{ ++ unsigned long flags; ++ int newcnt; ++ ++ DEBUG_LOCKS_WARN_ON(current->softirq_disable_cnt != ++ this_cpu_read(softirq_ctrl.cnt)); ++ ++ if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS) && softirq_count() == cnt) { ++ raw_local_irq_save(flags); ++ lockdep_softirqs_on(_RET_IP_); ++ raw_local_irq_restore(flags); ++ } ++ ++ newcnt = __this_cpu_sub_return(softirq_ctrl.cnt, cnt); ++ current->softirq_disable_cnt = newcnt; ++ ++ if (!newcnt && unlock) { ++ rcu_read_unlock(); ++ local_unlock(&softirq_ctrl.lock); ++ } ++} ++ ++void __local_bh_enable_ip(unsigned long ip, unsigned int cnt) ++{ ++ bool preempt_on = preemptible(); ++ unsigned long flags; ++ u32 pending; ++ int curcnt; ++ ++ WARN_ON_ONCE(in_irq()); ++ lockdep_assert_irqs_enabled(); ++ ++ local_irq_save(flags); ++ curcnt = __this_cpu_read(softirq_ctrl.cnt); ++ ++ /* ++ * If this is not reenabling soft interrupts, no point in trying to ++ * run pending ones. ++ */ ++ if (curcnt != cnt) ++ goto out; ++ ++ pending = local_softirq_pending(); ++ if (!pending || ksoftirqd_running(pending)) ++ goto out; ++ ++ /* ++ * If this was called from non preemptible context, wake up the ++ * softirq daemon. ++ */ ++ if (!preempt_on) { ++ wakeup_softirqd(); ++ goto out; ++ } ++ ++ /* ++ * Adjust softirq count to SOFTIRQ_OFFSET which makes ++ * in_serving_softirq() become true. ++ */ ++ cnt = SOFTIRQ_OFFSET; ++ __local_bh_enable(cnt, false); ++ __do_softirq(); ++ ++out: ++ __local_bh_enable(cnt, preempt_on); ++ local_irq_restore(flags); ++} ++EXPORT_SYMBOL(__local_bh_enable_ip); ++ ++/* ++ * Invoked from ksoftirqd_run() outside of the interrupt disabled section ++ * to acquire the per CPU local lock for reentrancy protection. ++ */ ++static inline void ksoftirqd_run_begin(void) ++{ ++ __local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET); ++ local_irq_disable(); ++} ++ ++/* Counterpart to ksoftirqd_run_begin() */ ++static inline void ksoftirqd_run_end(void) ++{ ++ __local_bh_enable(SOFTIRQ_OFFSET, true); ++ WARN_ON_ONCE(in_interrupt()); ++ local_irq_enable(); ++} ++ ++static inline void softirq_handle_begin(void) { } ++static inline void softirq_handle_end(void) { } ++ ++static inline bool should_wake_ksoftirqd(void) ++{ ++ return !this_cpu_read(softirq_ctrl.cnt); ++} ++ ++static inline void invoke_softirq(void) ++{ ++ if (should_wake_ksoftirqd()) ++ wakeup_softirqd(); ++} ++ ++#else /* CONFIG_PREEMPT_RT */ + +-#ifdef CONFIG_TRACE_IRQFLAGS + /* +- * This is for softirq.c-internal use, where hardirqs are disabled ++ * This one is for softirq.c-internal use, where hardirqs are disabled + * legitimately: + */ ++#ifdef CONFIG_TRACE_IRQFLAGS + void __local_bh_disable_ip(unsigned long ip, unsigned int cnt) + { + unsigned long flags; +@@ -275,6 +445,8 @@ asmlinkage __visible void do_softirq(voi + local_irq_restore(flags); + } + ++#endif /* !CONFIG_PREEMPT_RT */ ++ + /* + * We restart softirq processing for at most MAX_SOFTIRQ_RESTART times, + * but break the loop if need_resched() is set or after 2 ms. +@@ -379,8 +551,10 @@ asmlinkage __visible void __softirq_entr + pending >>= softirq_bit; + } + +- if (__this_cpu_read(ksoftirqd) == current) ++ if (!IS_ENABLED(CONFIG_PREEMPT_RT) && ++ __this_cpu_read(ksoftirqd) == current) + rcu_softirq_qs(); ++ + local_irq_disable(); + + pending = local_softirq_pending(); diff --git a/kernel/patches-5.11.x-rt/0073-0019-tick-sched-Prevent-false-positive-softirq-pending-wa.patch b/kernel/patches-5.11.x-rt/0073-0019-tick-sched-Prevent-false-positive-softirq-pending-wa.patch new file mode 100644 index 000000000..3d3a12d23 --- /dev/null +++ b/kernel/patches-5.11.x-rt/0073-0019-tick-sched-Prevent-false-positive-softirq-pending-wa.patch @@ -0,0 +1,73 @@ +From: Thomas Gleixner +Date: Tue, 9 Mar 2021 09:55:57 +0100 +Subject: [PATCH 19/20] tick/sched: Prevent false positive softirq pending + warnings on RT + +On RT a task which has soft interrupts disabled can block on a lock and +schedule out to idle while soft interrupts are pending. This triggers the +warning in the NOHZ idle code which complains about going idle with pending +soft interrupts. But as the task is blocked soft interrupt processing is +temporarily blocked as well which means that such a warning is a false +positive. + +To prevent that check the per CPU state which indicates that a scheduled +out task has soft interrupts disabled. + +Signed-off-by: Thomas Gleixner +Tested-by: Sebastian Andrzej Siewior +Reviewed-by: Frederic Weisbecker +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/bottom_half.h | 6 ++++++ + kernel/softirq.c | 15 +++++++++++++++ + kernel/time/tick-sched.c | 2 +- + 3 files changed, 22 insertions(+), 1 deletion(-) + +--- a/include/linux/bottom_half.h ++++ b/include/linux/bottom_half.h +@@ -32,4 +32,10 @@ static inline void local_bh_enable(void) + __local_bh_enable_ip(_THIS_IP_, SOFTIRQ_DISABLE_OFFSET); + } + ++#ifdef CONFIG_PREEMPT_RT ++extern bool local_bh_blocked(void); ++#else ++static inline bool local_bh_blocked(void) { return false; } ++#endif ++ + #endif /* _LINUX_BH_H */ +--- a/kernel/softirq.c ++++ b/kernel/softirq.c +@@ -139,6 +139,21 @@ static DEFINE_PER_CPU(struct softirq_ctr + .lock = INIT_LOCAL_LOCK(softirq_ctrl.lock), + }; + ++/** ++ * local_bh_blocked() - Check for idle whether BH processing is blocked ++ * ++ * Returns false if the per CPU softirq::cnt is 0 otherwise true. ++ * ++ * This is invoked from the idle task to guard against false positive ++ * softirq pending warnings, which would happen when the task which holds ++ * softirq_ctrl::lock was the only running task on the CPU and blocks on ++ * some other lock. ++ */ ++bool local_bh_blocked(void) ++{ ++ return __this_cpu_read(softirq_ctrl.cnt) != 0; ++} ++ + void __local_bh_disable_ip(unsigned long ip, unsigned int cnt) + { + unsigned long flags; +--- a/kernel/time/tick-sched.c ++++ b/kernel/time/tick-sched.c +@@ -973,7 +973,7 @@ static bool can_stop_idle_tick(int cpu, + if (unlikely(local_softirq_pending())) { + static int ratelimit; + +- if (ratelimit < 10 && ++ if (ratelimit < 10 && !local_bh_blocked() && + (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) { + pr_warn("NOHZ tick-stop error: Non-RCU local softirq work is pending, handler #%02x!!!\n", + (unsigned int) local_softirq_pending()); diff --git a/kernel/patches-5.11.x-rt/0074-0020-rcu-Prevent-false-positive-softirq-warning-on-RT.patch b/kernel/patches-5.11.x-rt/0074-0020-rcu-Prevent-false-positive-softirq-warning-on-RT.patch new file mode 100644 index 000000000..13f42f6f5 --- /dev/null +++ b/kernel/patches-5.11.x-rt/0074-0020-rcu-Prevent-false-positive-softirq-warning-on-RT.patch @@ -0,0 +1,28 @@ +From: Thomas Gleixner +Date: Tue, 9 Mar 2021 09:55:58 +0100 +Subject: [PATCH 20/20] rcu: Prevent false positive softirq warning on RT + +Soft interrupt disabled sections can legitimately be preempted or schedule +out when blocking on a lock on RT enabled kernels so the RCU preempt check +warning has to be disabled for RT kernels. + +Signed-off-by: Thomas Gleixner +Tested-by: Sebastian Andrzej Siewior +Reviewed-by: Paul E. McKenney +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/rcupdate.h | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/include/linux/rcupdate.h ++++ b/include/linux/rcupdate.h +@@ -328,7 +328,8 @@ static inline void rcu_preempt_sleep_che + #define rcu_sleep_check() \ + do { \ + rcu_preempt_sleep_check(); \ +- RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map), \ ++ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) \ ++ RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map), \ + "Illegal context switch in RCU-bh read-side critical section"); \ + RCU_LOCKDEP_WARN(lock_is_held(&rcu_sched_lock_map), \ + "Illegal context switch in RCU-sched read-side critical section"); \ diff --git a/kernel/patches-5.11.x-rt/0075-0001-chelsio-cxgb-Replace-the-workqueue-with-threaded-int.patch b/kernel/patches-5.11.x-rt/0075-0001-chelsio-cxgb-Replace-the-workqueue-with-threaded-int.patch new file mode 100644 index 000000000..20c8e4962 --- /dev/null +++ b/kernel/patches-5.11.x-rt/0075-0001-chelsio-cxgb-Replace-the-workqueue-with-threaded-int.patch @@ -0,0 +1,256 @@ +From: Sebastian Andrzej Siewior +Date: Tue, 2 Feb 2021 18:01:03 +0100 +Subject: [PATCH 1/2] chelsio: cxgb: Replace the workqueue with threaded + interrupt + +The external interrupt (F_PL_INTR_EXT) needs to be handled in a process +context and this is accomplished by utilizing a workqueue. + +The process context can also be provided by a threaded interrupt instead +of a workqueue. The threaded interrupt can be used later for other +interrupt related processing which require non-atomic context without +using yet another workqueue. free_irq() also ensures that the thread is +done which is currently missing (the worker could continue after the +module has been removed). + +Save pending flags in pending_thread_intr. Use the same mechanism +to disable F_PL_INTR_EXT as interrupt source like it is used before the +worker is scheduled. Enable the interrupt again once +t1_elmer0_ext_intr_handler() is done. + +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/net/ethernet/chelsio/cxgb/common.h | 5 +-- + drivers/net/ethernet/chelsio/cxgb/cxgb2.c | 44 ++--------------------------- + drivers/net/ethernet/chelsio/cxgb/sge.c | 33 +++++++++++++++++++-- + drivers/net/ethernet/chelsio/cxgb/sge.h | 1 + drivers/net/ethernet/chelsio/cxgb/subr.c | 26 +++++++++++------ + 5 files changed, 55 insertions(+), 54 deletions(-) + +--- a/drivers/net/ethernet/chelsio/cxgb/common.h ++++ b/drivers/net/ethernet/chelsio/cxgb/common.h +@@ -238,7 +238,6 @@ struct adapter { + int msg_enable; + u32 mmio_len; + +- struct work_struct ext_intr_handler_task; + struct adapter_params params; + + /* Terminator modules. */ +@@ -257,6 +256,7 @@ struct adapter { + + /* guards async operations */ + spinlock_t async_lock ____cacheline_aligned; ++ u32 pending_thread_intr; + u32 slow_intr_mask; + int t1powersave; + }; +@@ -334,8 +334,7 @@ void t1_interrupts_enable(adapter_t *ada + void t1_interrupts_disable(adapter_t *adapter); + void t1_interrupts_clear(adapter_t *adapter); + int t1_elmer0_ext_intr_handler(adapter_t *adapter); +-void t1_elmer0_ext_intr(adapter_t *adapter); +-int t1_slow_intr_handler(adapter_t *adapter); ++irqreturn_t t1_slow_intr_handler(adapter_t *adapter); + + int t1_link_start(struct cphy *phy, struct cmac *mac, struct link_config *lc); + const struct board_info *t1_get_board_info(unsigned int board_id); +--- a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c ++++ b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c +@@ -211,9 +211,10 @@ static int cxgb_up(struct adapter *adapt + t1_interrupts_clear(adapter); + + adapter->params.has_msi = !disable_msi && !pci_enable_msi(adapter->pdev); +- err = request_irq(adapter->pdev->irq, t1_interrupt, +- adapter->params.has_msi ? 0 : IRQF_SHARED, +- adapter->name, adapter); ++ err = request_threaded_irq(adapter->pdev->irq, t1_interrupt, ++ t1_interrupt_thread, ++ adapter->params.has_msi ? 0 : IRQF_SHARED, ++ adapter->name, adapter); + if (err) { + if (adapter->params.has_msi) + pci_disable_msi(adapter->pdev); +@@ -916,41 +917,6 @@ static void mac_stats_task(struct work_s + spin_unlock(&adapter->work_lock); + } + +-/* +- * Processes elmer0 external interrupts in process context. +- */ +-static void ext_intr_task(struct work_struct *work) +-{ +- struct adapter *adapter = +- container_of(work, struct adapter, ext_intr_handler_task); +- +- t1_elmer0_ext_intr_handler(adapter); +- +- /* Now reenable external interrupts */ +- spin_lock_irq(&adapter->async_lock); +- adapter->slow_intr_mask |= F_PL_INTR_EXT; +- writel(F_PL_INTR_EXT, adapter->regs + A_PL_CAUSE); +- writel(adapter->slow_intr_mask | F_PL_INTR_SGE_DATA, +- adapter->regs + A_PL_ENABLE); +- spin_unlock_irq(&adapter->async_lock); +-} +- +-/* +- * Interrupt-context handler for elmer0 external interrupts. +- */ +-void t1_elmer0_ext_intr(struct adapter *adapter) +-{ +- /* +- * Schedule a task to handle external interrupts as we require +- * a process context. We disable EXT interrupts in the interim +- * and let the task reenable them when it's done. +- */ +- adapter->slow_intr_mask &= ~F_PL_INTR_EXT; +- writel(adapter->slow_intr_mask | F_PL_INTR_SGE_DATA, +- adapter->regs + A_PL_ENABLE); +- schedule_work(&adapter->ext_intr_handler_task); +-} +- + void t1_fatal_err(struct adapter *adapter) + { + if (adapter->flags & FULL_INIT_DONE) { +@@ -1062,8 +1028,6 @@ static int init_one(struct pci_dev *pdev + spin_lock_init(&adapter->async_lock); + spin_lock_init(&adapter->mac_lock); + +- INIT_WORK(&adapter->ext_intr_handler_task, +- ext_intr_task); + INIT_DELAYED_WORK(&adapter->stats_update_task, + mac_stats_task); + +--- a/drivers/net/ethernet/chelsio/cxgb/sge.c ++++ b/drivers/net/ethernet/chelsio/cxgb/sge.c +@@ -1619,11 +1619,38 @@ int t1_poll(struct napi_struct *napi, in + return work_done; + } + ++irqreturn_t t1_interrupt_thread(int irq, void *data) ++{ ++ struct adapter *adapter = data; ++ u32 pending_thread_intr; ++ ++ spin_lock_irq(&adapter->async_lock); ++ pending_thread_intr = adapter->pending_thread_intr; ++ adapter->pending_thread_intr = 0; ++ spin_unlock_irq(&adapter->async_lock); ++ ++ if (!pending_thread_intr) ++ return IRQ_NONE; ++ ++ if (pending_thread_intr & F_PL_INTR_EXT) ++ t1_elmer0_ext_intr_handler(adapter); ++ ++ spin_lock_irq(&adapter->async_lock); ++ adapter->slow_intr_mask |= F_PL_INTR_EXT; ++ ++ writel(F_PL_INTR_EXT, adapter->regs + A_PL_CAUSE); ++ writel(adapter->slow_intr_mask | F_PL_INTR_SGE_DATA, ++ adapter->regs + A_PL_ENABLE); ++ spin_unlock_irq(&adapter->async_lock); ++ ++ return IRQ_HANDLED; ++} ++ + irqreturn_t t1_interrupt(int irq, void *data) + { + struct adapter *adapter = data; + struct sge *sge = adapter->sge; +- int handled; ++ irqreturn_t handled; + + if (likely(responses_pending(adapter))) { + writel(F_PL_INTR_SGE_DATA, adapter->regs + A_PL_CAUSE); +@@ -1645,10 +1672,10 @@ irqreturn_t t1_interrupt(int irq, void * + handled = t1_slow_intr_handler(adapter); + spin_unlock(&adapter->async_lock); + +- if (!handled) ++ if (handled == IRQ_NONE) + sge->stats.unhandled_irqs++; + +- return IRQ_RETVAL(handled != 0); ++ return handled; + } + + /* +--- a/drivers/net/ethernet/chelsio/cxgb/sge.h ++++ b/drivers/net/ethernet/chelsio/cxgb/sge.h +@@ -74,6 +74,7 @@ struct sge *t1_sge_create(struct adapter + int t1_sge_configure(struct sge *, struct sge_params *); + int t1_sge_set_coalesce_params(struct sge *, struct sge_params *); + void t1_sge_destroy(struct sge *); ++irqreturn_t t1_interrupt_thread(int irq, void *data); + irqreturn_t t1_interrupt(int irq, void *cookie); + int t1_poll(struct napi_struct *, int); + +--- a/drivers/net/ethernet/chelsio/cxgb/subr.c ++++ b/drivers/net/ethernet/chelsio/cxgb/subr.c +@@ -210,7 +210,7 @@ static int fpga_phy_intr_handler(adapter + /* + * Slow path interrupt handler for FPGAs. + */ +-static int fpga_slow_intr(adapter_t *adapter) ++static irqreturn_t fpga_slow_intr(adapter_t *adapter) + { + u32 cause = readl(adapter->regs + A_PL_CAUSE); + +@@ -238,7 +238,7 @@ static int fpga_slow_intr(adapter_t *ada + if (cause) + writel(cause, adapter->regs + A_PL_CAUSE); + +- return cause != 0; ++ return cause == 0 ? IRQ_NONE : IRQ_HANDLED; + } + #endif + +@@ -842,13 +842,14 @@ void t1_interrupts_clear(adapter_t* adap + /* + * Slow path interrupt handler for ASICs. + */ +-static int asic_slow_intr(adapter_t *adapter) ++static irqreturn_t asic_slow_intr(adapter_t *adapter) + { + u32 cause = readl(adapter->regs + A_PL_CAUSE); ++ irqreturn_t ret = IRQ_HANDLED; + + cause &= adapter->slow_intr_mask; + if (!cause) +- return 0; ++ return IRQ_NONE; + if (cause & F_PL_INTR_SGE_ERR) + t1_sge_intr_error_handler(adapter->sge); + if (cause & F_PL_INTR_TP) +@@ -857,16 +858,25 @@ static int asic_slow_intr(adapter_t *ada + t1_espi_intr_handler(adapter->espi); + if (cause & F_PL_INTR_PCIX) + t1_pci_intr_handler(adapter); +- if (cause & F_PL_INTR_EXT) +- t1_elmer0_ext_intr(adapter); ++ if (cause & F_PL_INTR_EXT) { ++ /* Wake the threaded interrupt to handle external interrupts as ++ * we require a process context. We disable EXT interrupts in ++ * the interim and let the thread reenable them when it's done. ++ */ ++ adapter->pending_thread_intr |= F_PL_INTR_EXT; ++ adapter->slow_intr_mask &= ~F_PL_INTR_EXT; ++ writel(adapter->slow_intr_mask | F_PL_INTR_SGE_DATA, ++ adapter->regs + A_PL_ENABLE); ++ ret = IRQ_WAKE_THREAD; ++ } + + /* Clear the interrupts just processed. */ + writel(cause, adapter->regs + A_PL_CAUSE); + readl(adapter->regs + A_PL_CAUSE); /* flush writes */ +- return 1; ++ return ret; + } + +-int t1_slow_intr_handler(adapter_t *adapter) ++irqreturn_t t1_slow_intr_handler(adapter_t *adapter) + { + #ifdef CONFIG_CHELSIO_T1_1G + if (!t1_is_asic(adapter)) diff --git a/kernel/patches-5.11.x-rt/0076-0002-chelsio-cxgb-Disable-the-card-on-error-in-threaded-i.patch b/kernel/patches-5.11.x-rt/0076-0002-chelsio-cxgb-Disable-the-card-on-error-in-threaded-i.patch new file mode 100644 index 000000000..453d8105e --- /dev/null +++ b/kernel/patches-5.11.x-rt/0076-0002-chelsio-cxgb-Disable-the-card-on-error-in-threaded-i.patch @@ -0,0 +1,200 @@ +From: Sebastian Andrzej Siewior +Date: Tue, 2 Feb 2021 18:01:04 +0100 +Subject: [PATCH 2/2] chelsio: cxgb: Disable the card on error in threaded + interrupt + +t1_fatal_err() is invoked from the interrupt handler. The bad part is +that it invokes (via t1_sge_stop()) del_timer_sync() and tasklet_kill(). +Both functions must not be called from an interrupt because it is +possible that it will wait for the completion of the timer/tasklet it +just interrupted. + +In case of a fatal error, use t1_interrupts_disable() to disable all +interrupt sources and then wake the interrupt thread with +F_PL_INTR_SGE_ERR as pending flag. The threaded-interrupt will stop the +card via t1_sge_stop() and not re-enable the interrupts again. + +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/net/ethernet/chelsio/cxgb/common.h | 1 + drivers/net/ethernet/chelsio/cxgb/cxgb2.c | 10 ------- + drivers/net/ethernet/chelsio/cxgb/sge.c | 20 ++++++++++++--- + drivers/net/ethernet/chelsio/cxgb/sge.h | 2 - + drivers/net/ethernet/chelsio/cxgb/subr.c | 38 ++++++++++++++++++++--------- + 5 files changed, 44 insertions(+), 27 deletions(-) + +--- a/drivers/net/ethernet/chelsio/cxgb/common.h ++++ b/drivers/net/ethernet/chelsio/cxgb/common.h +@@ -346,7 +346,6 @@ int t1_get_board_rev(adapter_t *adapter, + int t1_init_hw_modules(adapter_t *adapter); + int t1_init_sw_modules(adapter_t *adapter, const struct board_info *bi); + void t1_free_sw_modules(adapter_t *adapter); +-void t1_fatal_err(adapter_t *adapter); + void t1_link_changed(adapter_t *adapter, int port_id); + void t1_link_negotiated(adapter_t *adapter, int port_id, int link_stat, + int speed, int duplex, int pause); +--- a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c ++++ b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c +@@ -917,16 +917,6 @@ static void mac_stats_task(struct work_s + spin_unlock(&adapter->work_lock); + } + +-void t1_fatal_err(struct adapter *adapter) +-{ +- if (adapter->flags & FULL_INIT_DONE) { +- t1_sge_stop(adapter->sge); +- t1_interrupts_disable(adapter); +- } +- pr_alert("%s: encountered fatal error, operation suspended\n", +- adapter->name); +-} +- + static const struct net_device_ops cxgb_netdev_ops = { + .ndo_open = cxgb_open, + .ndo_stop = cxgb_close, +--- a/drivers/net/ethernet/chelsio/cxgb/sge.c ++++ b/drivers/net/ethernet/chelsio/cxgb/sge.c +@@ -940,10 +940,11 @@ void t1_sge_intr_clear(struct sge *sge) + /* + * SGE 'Error' interrupt handler + */ +-int t1_sge_intr_error_handler(struct sge *sge) ++bool t1_sge_intr_error_handler(struct sge *sge) + { + struct adapter *adapter = sge->adapter; + u32 cause = readl(adapter->regs + A_SG_INT_CAUSE); ++ bool wake = false; + + if (adapter->port[0].dev->hw_features & NETIF_F_TSO) + cause &= ~F_PACKET_TOO_BIG; +@@ -967,11 +968,14 @@ int t1_sge_intr_error_handler(struct sge + sge->stats.pkt_mismatch++; + pr_alert("%s: SGE packet mismatch\n", adapter->name); + } +- if (cause & SGE_INT_FATAL) +- t1_fatal_err(adapter); ++ if (cause & SGE_INT_FATAL) { ++ t1_interrupts_disable(adapter); ++ adapter->pending_thread_intr |= F_PL_INTR_SGE_ERR; ++ wake = true; ++ } + + writel(cause, adapter->regs + A_SG_INT_CAUSE); +- return 0; ++ return wake; + } + + const struct sge_intr_counts *t1_sge_get_intr_counts(const struct sge *sge) +@@ -1635,6 +1639,14 @@ irqreturn_t t1_interrupt_thread(int irq, + if (pending_thread_intr & F_PL_INTR_EXT) + t1_elmer0_ext_intr_handler(adapter); + ++ /* This error is fatal, interrupts remain off */ ++ if (pending_thread_intr & F_PL_INTR_SGE_ERR) { ++ pr_alert("%s: encountered fatal error, operation suspended\n", ++ adapter->name); ++ t1_sge_stop(adapter->sge); ++ return IRQ_HANDLED; ++ } ++ + spin_lock_irq(&adapter->async_lock); + adapter->slow_intr_mask |= F_PL_INTR_EXT; + +--- a/drivers/net/ethernet/chelsio/cxgb/sge.h ++++ b/drivers/net/ethernet/chelsio/cxgb/sge.h +@@ -82,7 +82,7 @@ netdev_tx_t t1_start_xmit(struct sk_buff + void t1_vlan_mode(struct adapter *adapter, netdev_features_t features); + void t1_sge_start(struct sge *); + void t1_sge_stop(struct sge *); +-int t1_sge_intr_error_handler(struct sge *); ++bool t1_sge_intr_error_handler(struct sge *sge); + void t1_sge_intr_enable(struct sge *); + void t1_sge_intr_disable(struct sge *); + void t1_sge_intr_clear(struct sge *); +--- a/drivers/net/ethernet/chelsio/cxgb/subr.c ++++ b/drivers/net/ethernet/chelsio/cxgb/subr.c +@@ -170,7 +170,7 @@ void t1_link_changed(adapter_t *adapter, + t1_link_negotiated(adapter, port_id, link_ok, speed, duplex, fc); + } + +-static int t1_pci_intr_handler(adapter_t *adapter) ++static bool t1_pci_intr_handler(adapter_t *adapter) + { + u32 pcix_cause; + +@@ -179,9 +179,13 @@ static int t1_pci_intr_handler(adapter_t + if (pcix_cause) { + pci_write_config_dword(adapter->pdev, A_PCICFG_INTR_CAUSE, + pcix_cause); +- t1_fatal_err(adapter); /* PCI errors are fatal */ ++ /* PCI errors are fatal */ ++ t1_interrupts_disable(adapter); ++ adapter->pending_thread_intr |= F_PL_INTR_SGE_ERR; ++ pr_alert("%s: PCI error encountered.\n", adapter->name); ++ return true; + } +- return 0; ++ return false; + } + + #ifdef CONFIG_CHELSIO_T1_1G +@@ -213,10 +217,13 @@ static int fpga_phy_intr_handler(adapter + static irqreturn_t fpga_slow_intr(adapter_t *adapter) + { + u32 cause = readl(adapter->regs + A_PL_CAUSE); ++ irqreturn_t ret = IRQ_NONE; + + cause &= ~F_PL_INTR_SGE_DATA; +- if (cause & F_PL_INTR_SGE_ERR) +- t1_sge_intr_error_handler(adapter->sge); ++ if (cause & F_PL_INTR_SGE_ERR) { ++ if (t1_sge_intr_error_handler(adapter->sge)) ++ ret = IRQ_WAKE_THREAD; ++ } + + if (cause & FPGA_PCIX_INTERRUPT_GMAC) + fpga_phy_intr_handler(adapter); +@@ -231,13 +238,18 @@ static irqreturn_t fpga_slow_intr(adapte + /* Clear TP interrupt */ + writel(tp_cause, adapter->regs + FPGA_TP_ADDR_INTERRUPT_CAUSE); + } +- if (cause & FPGA_PCIX_INTERRUPT_PCIX) +- t1_pci_intr_handler(adapter); ++ if (cause & FPGA_PCIX_INTERRUPT_PCIX) { ++ if (t1_pci_intr_handler(adapter)) ++ ret = IRQ_WAKE_THREAD; ++ } + + /* Clear the interrupts just processed. */ + if (cause) + writel(cause, adapter->regs + A_PL_CAUSE); + ++ if (ret != IRQ_NONE) ++ return ret; ++ + return cause == 0 ? IRQ_NONE : IRQ_HANDLED; + } + #endif +@@ -850,14 +862,18 @@ static irqreturn_t asic_slow_intr(adapte + cause &= adapter->slow_intr_mask; + if (!cause) + return IRQ_NONE; +- if (cause & F_PL_INTR_SGE_ERR) +- t1_sge_intr_error_handler(adapter->sge); ++ if (cause & F_PL_INTR_SGE_ERR) { ++ if (t1_sge_intr_error_handler(adapter->sge)) ++ ret = IRQ_WAKE_THREAD; ++ } + if (cause & F_PL_INTR_TP) + t1_tp_intr_handler(adapter->tp); + if (cause & F_PL_INTR_ESPI) + t1_espi_intr_handler(adapter->espi); +- if (cause & F_PL_INTR_PCIX) +- t1_pci_intr_handler(adapter); ++ if (cause & F_PL_INTR_PCIX) { ++ if (t1_pci_intr_handler(adapter)) ++ ret = IRQ_WAKE_THREAD; ++ } + if (cause & F_PL_INTR_EXT) { + /* Wake the threaded interrupt to handle external interrupts as + * we require a process context. We disable EXT interrupts in diff --git a/kernel/patches-5.11.x-rt/0077-0001-locking-rtmutex-Remove-cruft.patch b/kernel/patches-5.11.x-rt/0077-0001-locking-rtmutex-Remove-cruft.patch new file mode 100644 index 000000000..7e9c2e00a --- /dev/null +++ b/kernel/patches-5.11.x-rt/0077-0001-locking-rtmutex-Remove-cruft.patch @@ -0,0 +1,86 @@ +From: Sebastian Andrzej Siewior +Date: Tue, 29 Sep 2020 15:21:17 +0200 +Subject: [PATCH 01/22] locking/rtmutex: Remove cruft + +Most of this is around since the very beginning. I'm not sure if this +was used while the rtmutex-deadlock-tester was around but today it seems +to only waste memory: +- save_state: No users +- name: Assigned and printed if a dead lock was detected. I'm keeping it + but want to point out that lockdep has the same information. +- file + line: Printed if ::name was NULL. This is only used for + in-kernel locks so it ::name shouldn't be NULL and then ::file and + ::line isn't used. +- magic: Assigned to NULL by rt_mutex_destroy(). + +Remove members of rt_mutex which are not used. + +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/rtmutex.h | 7 ++----- + kernel/locking/rtmutex-debug.c | 7 +------ + kernel/locking/rtmutex.c | 3 --- + kernel/locking/rtmutex_common.h | 1 - + 4 files changed, 3 insertions(+), 15 deletions(-) + +--- a/include/linux/rtmutex.h ++++ b/include/linux/rtmutex.h +@@ -32,10 +32,7 @@ struct rt_mutex { + struct rb_root_cached waiters; + struct task_struct *owner; + #ifdef CONFIG_DEBUG_RT_MUTEXES +- int save_state; +- const char *name, *file; +- int line; +- void *magic; ++ const char *name; + #endif + #ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; +@@ -60,7 +57,7 @@ struct hrtimer_sleeper; + + #ifdef CONFIG_DEBUG_RT_MUTEXES + # define __DEBUG_RT_MUTEX_INITIALIZER(mutexname) \ +- , .name = #mutexname, .file = __FILE__, .line = __LINE__ ++ , .name = #mutexname + + # define rt_mutex_init(mutex) \ + do { \ +--- a/kernel/locking/rtmutex-debug.c ++++ b/kernel/locking/rtmutex-debug.c +@@ -42,12 +42,7 @@ static void printk_task(struct task_stru + + static void printk_lock(struct rt_mutex *lock, int print_owner) + { +- if (lock->name) +- printk(" [%p] {%s}\n", +- lock, lock->name); +- else +- printk(" [%p] {%s:%d}\n", +- lock, lock->file, lock->line); ++ printk(" [%p] {%s}\n", lock, lock->name); + + if (print_owner && rt_mutex_owner(lock)) { + printk(".. ->owner: %p\n", lock->owner); +--- a/kernel/locking/rtmutex.c ++++ b/kernel/locking/rtmutex.c +@@ -1655,9 +1655,6 @@ void __sched rt_mutex_futex_unlock(struc + void rt_mutex_destroy(struct rt_mutex *lock) + { + WARN_ON(rt_mutex_is_locked(lock)); +-#ifdef CONFIG_DEBUG_RT_MUTEXES +- lock->magic = NULL; +-#endif + } + EXPORT_SYMBOL_GPL(rt_mutex_destroy); + +--- a/kernel/locking/rtmutex_common.h ++++ b/kernel/locking/rtmutex_common.h +@@ -30,7 +30,6 @@ struct rt_mutex_waiter { + struct task_struct *task; + struct rt_mutex *lock; + #ifdef CONFIG_DEBUG_RT_MUTEXES +- unsigned long ip; + struct pid *deadlock_task_pid; + struct rt_mutex *deadlock_lock; + #endif diff --git a/kernel/patches-5.11.x-rt/0078-0002-locking-rtmutex-Remove-output-from-deadlock-detector.patch b/kernel/patches-5.11.x-rt/0078-0002-locking-rtmutex-Remove-output-from-deadlock-detector.patch new file mode 100644 index 000000000..72e8df0d4 --- /dev/null +++ b/kernel/patches-5.11.x-rt/0078-0002-locking-rtmutex-Remove-output-from-deadlock-detector.patch @@ -0,0 +1,294 @@ +From: Sebastian Andrzej Siewior +Date: Tue, 29 Sep 2020 16:05:11 +0200 +Subject: [PATCH 02/22] locking/rtmutex: Remove output from deadlock detector. + +In commit + f5694788ad8da ("rt_mutex: Add lockdep annotations") + +rtmutex gained lockdep annotation for rt_mutex_lock() and and related +functions. +lockdep will see the locking order and may complain about a deadlock +before rtmutex' own mechanism gets a chance to detect it. +The rtmutex deadlock detector will only complain locks with the +RT_MUTEX_MIN_CHAINWALK and a waiter must be pending. That means it +works only for in-kernel locks because the futex interface always uses +RT_MUTEX_FULL_CHAINWALK. +The requirement for an active waiter limits the detector to actual +deadlocks and makes it possible to report potential deadlocks like +lockdep does. +It looks like lockdep is better suited for reporting deadlocks. + +Remove rtmutex' debug print on deadlock detection. + +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/rtmutex.h | 7 -- + kernel/locking/rtmutex-debug.c | 97 ---------------------------------------- + kernel/locking/rtmutex-debug.h | 11 ---- + kernel/locking/rtmutex.c | 9 --- + kernel/locking/rtmutex.h | 7 -- + kernel/locking/rtmutex_common.h | 4 - + 6 files changed, 135 deletions(-) + +--- a/include/linux/rtmutex.h ++++ b/include/linux/rtmutex.h +@@ -31,9 +31,6 @@ struct rt_mutex { + raw_spinlock_t wait_lock; + struct rb_root_cached waiters; + struct task_struct *owner; +-#ifdef CONFIG_DEBUG_RT_MUTEXES +- const char *name; +-#endif + #ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; + #endif +@@ -56,8 +53,6 @@ struct hrtimer_sleeper; + #endif + + #ifdef CONFIG_DEBUG_RT_MUTEXES +-# define __DEBUG_RT_MUTEX_INITIALIZER(mutexname) \ +- , .name = #mutexname + + # define rt_mutex_init(mutex) \ + do { \ +@@ -67,7 +62,6 @@ do { \ + + extern void rt_mutex_debug_task_free(struct task_struct *tsk); + #else +-# define __DEBUG_RT_MUTEX_INITIALIZER(mutexname) + # define rt_mutex_init(mutex) __rt_mutex_init(mutex, NULL, NULL) + # define rt_mutex_debug_task_free(t) do { } while (0) + #endif +@@ -83,7 +77,6 @@ do { \ + { .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \ + , .waiters = RB_ROOT_CACHED \ + , .owner = NULL \ +- __DEBUG_RT_MUTEX_INITIALIZER(mutexname) \ + __DEP_MAP_RT_MUTEX_INITIALIZER(mutexname)} + + #define DEFINE_RT_MUTEX(mutexname) \ +--- a/kernel/locking/rtmutex-debug.c ++++ b/kernel/locking/rtmutex-debug.c +@@ -32,105 +32,12 @@ + + #include "rtmutex_common.h" + +-static void printk_task(struct task_struct *p) +-{ +- if (p) +- printk("%16s:%5d [%p, %3d]", p->comm, task_pid_nr(p), p, p->prio); +- else +- printk(""); +-} +- +-static void printk_lock(struct rt_mutex *lock, int print_owner) +-{ +- printk(" [%p] {%s}\n", lock, lock->name); +- +- if (print_owner && rt_mutex_owner(lock)) { +- printk(".. ->owner: %p\n", lock->owner); +- printk(".. held by: "); +- printk_task(rt_mutex_owner(lock)); +- printk("\n"); +- } +-} +- + void rt_mutex_debug_task_free(struct task_struct *task) + { + DEBUG_LOCKS_WARN_ON(!RB_EMPTY_ROOT(&task->pi_waiters.rb_root)); + DEBUG_LOCKS_WARN_ON(task->pi_blocked_on); + } + +-/* +- * We fill out the fields in the waiter to store the information about +- * the deadlock. We print when we return. act_waiter can be NULL in +- * case of a remove waiter operation. +- */ +-void debug_rt_mutex_deadlock(enum rtmutex_chainwalk chwalk, +- struct rt_mutex_waiter *act_waiter, +- struct rt_mutex *lock) +-{ +- struct task_struct *task; +- +- if (!debug_locks || chwalk == RT_MUTEX_FULL_CHAINWALK || !act_waiter) +- return; +- +- task = rt_mutex_owner(act_waiter->lock); +- if (task && task != current) { +- act_waiter->deadlock_task_pid = get_pid(task_pid(task)); +- act_waiter->deadlock_lock = lock; +- } +-} +- +-void debug_rt_mutex_print_deadlock(struct rt_mutex_waiter *waiter) +-{ +- struct task_struct *task; +- +- if (!waiter->deadlock_lock || !debug_locks) +- return; +- +- rcu_read_lock(); +- task = pid_task(waiter->deadlock_task_pid, PIDTYPE_PID); +- if (!task) { +- rcu_read_unlock(); +- return; +- } +- +- if (!debug_locks_off()) { +- rcu_read_unlock(); +- return; +- } +- +- pr_warn("\n"); +- pr_warn("============================================\n"); +- pr_warn("WARNING: circular locking deadlock detected!\n"); +- pr_warn("%s\n", print_tainted()); +- pr_warn("--------------------------------------------\n"); +- printk("%s/%d is deadlocking current task %s/%d\n\n", +- task->comm, task_pid_nr(task), +- current->comm, task_pid_nr(current)); +- +- printk("\n1) %s/%d is trying to acquire this lock:\n", +- current->comm, task_pid_nr(current)); +- printk_lock(waiter->lock, 1); +- +- printk("\n2) %s/%d is blocked on this lock:\n", +- task->comm, task_pid_nr(task)); +- printk_lock(waiter->deadlock_lock, 1); +- +- debug_show_held_locks(current); +- debug_show_held_locks(task); +- +- printk("\n%s/%d's [blocked] stackdump:\n\n", +- task->comm, task_pid_nr(task)); +- show_stack(task, NULL, KERN_DEFAULT); +- printk("\n%s/%d's [current] stackdump:\n\n", +- current->comm, task_pid_nr(current)); +- dump_stack(); +- debug_show_all_locks(); +- rcu_read_unlock(); +- +- printk("[ turning off deadlock detection." +- "Please report this trace. ]\n\n"); +-} +- + void debug_rt_mutex_lock(struct rt_mutex *lock) + { + } +@@ -153,12 +60,10 @@ void debug_rt_mutex_proxy_unlock(struct + void debug_rt_mutex_init_waiter(struct rt_mutex_waiter *waiter) + { + memset(waiter, 0x11, sizeof(*waiter)); +- waiter->deadlock_task_pid = NULL; + } + + void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter) + { +- put_pid(waiter->deadlock_task_pid); + memset(waiter, 0x22, sizeof(*waiter)); + } + +@@ -168,10 +73,8 @@ void debug_rt_mutex_init(struct rt_mutex + * Make sure we are not reinitializing a held lock: + */ + debug_check_no_locks_freed((void *)lock, sizeof(*lock)); +- lock->name = name; + + #ifdef CONFIG_DEBUG_LOCK_ALLOC + lockdep_init_map(&lock->dep_map, name, key, 0); + #endif + } +- +--- a/kernel/locking/rtmutex-debug.h ++++ b/kernel/locking/rtmutex-debug.h +@@ -18,20 +18,9 @@ extern void debug_rt_mutex_unlock(struct + extern void debug_rt_mutex_proxy_lock(struct rt_mutex *lock, + struct task_struct *powner); + extern void debug_rt_mutex_proxy_unlock(struct rt_mutex *lock); +-extern void debug_rt_mutex_deadlock(enum rtmutex_chainwalk chwalk, +- struct rt_mutex_waiter *waiter, +- struct rt_mutex *lock); +-extern void debug_rt_mutex_print_deadlock(struct rt_mutex_waiter *waiter); +-# define debug_rt_mutex_reset_waiter(w) \ +- do { (w)->deadlock_lock = NULL; } while (0) + + static inline bool debug_rt_mutex_detect_deadlock(struct rt_mutex_waiter *waiter, + enum rtmutex_chainwalk walk) + { + return (waiter != NULL); + } +- +-static inline void rt_mutex_print_deadlock(struct rt_mutex_waiter *w) +-{ +- debug_rt_mutex_print_deadlock(w); +-} +--- a/kernel/locking/rtmutex.c ++++ b/kernel/locking/rtmutex.c +@@ -597,7 +597,6 @@ static int rt_mutex_adjust_prio_chain(st + * walk, we detected a deadlock. + */ + if (lock == orig_lock || rt_mutex_owner(lock) == top_task) { +- debug_rt_mutex_deadlock(chwalk, orig_waiter, lock); + raw_spin_unlock(&lock->wait_lock); + ret = -EDEADLK; + goto out_unlock_pi; +@@ -1189,8 +1188,6 @@ static int __sched + + raw_spin_unlock_irq(&lock->wait_lock); + +- debug_rt_mutex_print_deadlock(waiter); +- + schedule(); + + raw_spin_lock_irq(&lock->wait_lock); +@@ -1211,10 +1208,6 @@ static void rt_mutex_handle_deadlock(int + if (res != -EDEADLOCK || detect_deadlock) + return; + +- /* +- * Yell lowdly and stop the task right here. +- */ +- rt_mutex_print_deadlock(w); + while (1) { + set_current_state(TASK_INTERRUPTIBLE); + schedule(); +@@ -1763,8 +1756,6 @@ int __rt_mutex_start_proxy_lock(struct r + ret = 0; + } + +- debug_rt_mutex_print_deadlock(waiter); +- + return ret; + } + +--- a/kernel/locking/rtmutex.h ++++ b/kernel/locking/rtmutex.h +@@ -19,15 +19,8 @@ + #define debug_rt_mutex_proxy_unlock(l) do { } while (0) + #define debug_rt_mutex_unlock(l) do { } while (0) + #define debug_rt_mutex_init(m, n, k) do { } while (0) +-#define debug_rt_mutex_deadlock(d, a ,l) do { } while (0) +-#define debug_rt_mutex_print_deadlock(w) do { } while (0) + #define debug_rt_mutex_reset_waiter(w) do { } while (0) + +-static inline void rt_mutex_print_deadlock(struct rt_mutex_waiter *w) +-{ +- WARN(1, "rtmutex deadlock detected\n"); +-} +- + static inline bool debug_rt_mutex_detect_deadlock(struct rt_mutex_waiter *w, + enum rtmutex_chainwalk walk) + { +--- a/kernel/locking/rtmutex_common.h ++++ b/kernel/locking/rtmutex_common.h +@@ -29,10 +29,6 @@ struct rt_mutex_waiter { + struct rb_node pi_tree_entry; + struct task_struct *task; + struct rt_mutex *lock; +-#ifdef CONFIG_DEBUG_RT_MUTEXES +- struct pid *deadlock_task_pid; +- struct rt_mutex *deadlock_lock; +-#endif + int prio; + u64 deadline; + }; diff --git a/kernel/patches-5.11.x-rt/0079-0003-locking-rtmutex-Move-rt_mutex_init-outside-of-CONFIG.patch b/kernel/patches-5.11.x-rt/0079-0003-locking-rtmutex-Move-rt_mutex_init-outside-of-CONFIG.patch new file mode 100644 index 000000000..6cbb41b8b --- /dev/null +++ b/kernel/patches-5.11.x-rt/0079-0003-locking-rtmutex-Move-rt_mutex_init-outside-of-CONFIG.patch @@ -0,0 +1,53 @@ +From: Sebastian Andrzej Siewior +Date: Tue, 29 Sep 2020 16:32:49 +0200 +Subject: [PATCH 03/22] locking/rtmutex: Move rt_mutex_init() outside of + CONFIG_DEBUG_RT_MUTEXES + +rt_mutex_init() only initializes lockdep if CONFIG_DEBUG_RT_MUTEXES is +enabled. The static initializer (DEFINE_RT_MUTEX) does not have such a +restriction. + +Move rt_mutex_init() outside of CONFIG_DEBUG_RT_MUTEXES. +Move the remaining functions in this CONFIG_DEBUG_RT_MUTEXES block to +the upper block. + +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/rtmutex.h | 12 +++--------- + 1 file changed, 3 insertions(+), 9 deletions(-) + +--- a/include/linux/rtmutex.h ++++ b/include/linux/rtmutex.h +@@ -43,6 +43,7 @@ struct hrtimer_sleeper; + extern int rt_mutex_debug_check_no_locks_freed(const void *from, + unsigned long len); + extern void rt_mutex_debug_check_no_locks_held(struct task_struct *task); ++ extern void rt_mutex_debug_task_free(struct task_struct *tsk); + #else + static inline int rt_mutex_debug_check_no_locks_freed(const void *from, + unsigned long len) +@@ -50,22 +51,15 @@ struct hrtimer_sleeper; + return 0; + } + # define rt_mutex_debug_check_no_locks_held(task) do { } while (0) ++# define rt_mutex_debug_task_free(t) do { } while (0) + #endif + +-#ifdef CONFIG_DEBUG_RT_MUTEXES +- +-# define rt_mutex_init(mutex) \ ++#define rt_mutex_init(mutex) \ + do { \ + static struct lock_class_key __key; \ + __rt_mutex_init(mutex, __func__, &__key); \ + } while (0) + +- extern void rt_mutex_debug_task_free(struct task_struct *tsk); +-#else +-# define rt_mutex_init(mutex) __rt_mutex_init(mutex, NULL, NULL) +-# define rt_mutex_debug_task_free(t) do { } while (0) +-#endif +- + #ifdef CONFIG_DEBUG_LOCK_ALLOC + #define __DEP_MAP_RT_MUTEX_INITIALIZER(mutexname) \ + , .dep_map = { .name = #mutexname } diff --git a/kernel/patches-5.11.x-rt/0080-0004-locking-rtmutex-Remove-rt_mutex_timed_lock.patch b/kernel/patches-5.11.x-rt/0080-0004-locking-rtmutex-Remove-rt_mutex_timed_lock.patch new file mode 100644 index 000000000..a7c2235c4 --- /dev/null +++ b/kernel/patches-5.11.x-rt/0080-0004-locking-rtmutex-Remove-rt_mutex_timed_lock.patch @@ -0,0 +1,89 @@ +From: Sebastian Andrzej Siewior +Date: Wed, 7 Oct 2020 12:11:33 +0200 +Subject: [PATCH 04/22] locking/rtmutex: Remove rt_mutex_timed_lock() + +rt_mutex_timed_lock() has no callers since commit + c051b21f71d1f ("rtmutex: Confine deadlock logic to futex") + +Remove rt_mutex_timed_lock(). + +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/rtmutex.h | 3 --- + kernel/locking/rtmutex.c | 46 ---------------------------------------------- + 2 files changed, 49 deletions(-) + +--- a/include/linux/rtmutex.h ++++ b/include/linux/rtmutex.h +@@ -99,9 +99,6 @@ extern void rt_mutex_lock(struct rt_mute + #endif + + extern int rt_mutex_lock_interruptible(struct rt_mutex *lock); +-extern int rt_mutex_timed_lock(struct rt_mutex *lock, +- struct hrtimer_sleeper *timeout); +- + extern int rt_mutex_trylock(struct rt_mutex *lock); + + extern void rt_mutex_unlock(struct rt_mutex *lock); +--- a/kernel/locking/rtmutex.c ++++ b/kernel/locking/rtmutex.c +@@ -1406,21 +1406,6 @@ rt_mutex_fastlock(struct rt_mutex *lock, + } + + static inline int +-rt_mutex_timed_fastlock(struct rt_mutex *lock, int state, +- struct hrtimer_sleeper *timeout, +- enum rtmutex_chainwalk chwalk, +- int (*slowfn)(struct rt_mutex *lock, int state, +- struct hrtimer_sleeper *timeout, +- enum rtmutex_chainwalk chwalk)) +-{ +- if (chwalk == RT_MUTEX_MIN_CHAINWALK && +- likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) +- return 0; +- +- return slowfn(lock, state, timeout, chwalk); +-} +- +-static inline int + rt_mutex_fasttrylock(struct rt_mutex *lock, + int (*slowfn)(struct rt_mutex *lock)) + { +@@ -1528,37 +1513,6 @@ int __sched __rt_mutex_futex_trylock(str + } + + /** +- * rt_mutex_timed_lock - lock a rt_mutex interruptible +- * the timeout structure is provided +- * by the caller +- * +- * @lock: the rt_mutex to be locked +- * @timeout: timeout structure or NULL (no timeout) +- * +- * Returns: +- * 0 on success +- * -EINTR when interrupted by a signal +- * -ETIMEDOUT when the timeout expired +- */ +-int +-rt_mutex_timed_lock(struct rt_mutex *lock, struct hrtimer_sleeper *timeout) +-{ +- int ret; +- +- might_sleep(); +- +- mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_); +- ret = rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout, +- RT_MUTEX_MIN_CHAINWALK, +- rt_mutex_slowlock); +- if (ret) +- mutex_release(&lock->dep_map, _RET_IP_); +- +- return ret; +-} +-EXPORT_SYMBOL_GPL(rt_mutex_timed_lock); +- +-/** + * rt_mutex_trylock - try to lock a rt_mutex + * + * @lock: the rt_mutex to be locked diff --git a/kernel/patches-5.4.x-rt/0150-rtmutex-futex-prepare-rt.patch b/kernel/patches-5.11.x-rt/0081-0005-locking-rtmutex-Handle-the-various-new-futex-race-co.patch similarity index 84% rename from kernel/patches-5.4.x-rt/0150-rtmutex-futex-prepare-rt.patch rename to kernel/patches-5.11.x-rt/0081-0005-locking-rtmutex-Handle-the-various-new-futex-race-co.patch index 826e32479..3ffd5fe2e 100644 --- a/kernel/patches-5.4.x-rt/0150-rtmutex-futex-prepare-rt.patch +++ b/kernel/patches-5.11.x-rt/0081-0005-locking-rtmutex-Handle-the-various-new-futex-race-co.patch @@ -1,6 +1,7 @@ -Subject: rtmutex: Handle the various new futex race conditions From: Thomas Gleixner Date: Fri, 10 Jun 2011 11:04:15 +0200 +Subject: [PATCH 05/22] locking/rtmutex: Handle the various new futex race + conditions RT opens a few new interesting race conditions in the rtmutex/futex combo due to futex hash bucket lock being a 'sleeping' spinlock and @@ -8,16 +9,16 @@ therefor not disabling preemption. Signed-off-by: Thomas Gleixner --- - kernel/futex.c | 77 ++++++++++++++++++++++++++++++++-------- + kernel/futex.c | 78 ++++++++++++++++++++++++++++++++-------- kernel/locking/rtmutex.c | 36 +++++++++++++++--- kernel/locking/rtmutex_common.h | 2 + - 3 files changed, 94 insertions(+), 21 deletions(-) + 3 files changed, 95 insertions(+), 21 deletions(-) --- a/kernel/futex.c +++ b/kernel/futex.c -@@ -2260,6 +2260,16 @@ static int futex_requeue(u32 __user *uad +@@ -2154,6 +2154,16 @@ static int futex_requeue(u32 __user *uad + */ requeue_pi_wake_futex(this, &key2, hb2); - drop_count++; continue; + } else if (ret == -EAGAIN) { + /* @@ -32,16 +33,16 @@ Signed-off-by: Thomas Gleixner } else if (ret) { /* * rt_mutex_start_proxy_lock() detected a -@@ -3315,7 +3325,7 @@ static int futex_wait_requeue_pi(u32 __u +@@ -3172,7 +3182,7 @@ static int futex_wait_requeue_pi(u32 __u + { struct hrtimer_sleeper timeout, *to; - struct futex_pi_state *pi_state = NULL; struct rt_mutex_waiter rt_waiter; - struct futex_hash_bucket *hb; + struct futex_hash_bucket *hb, *hb2; union futex_key key2 = FUTEX_KEY_INIT; struct futex_q q = futex_q_init; int res, ret; -@@ -3367,20 +3377,55 @@ static int futex_wait_requeue_pi(u32 __u +@@ -3224,20 +3234,55 @@ static int futex_wait_requeue_pi(u32 __u /* Queue the futex_q, drop the hb lock, wait for wakeup. */ futex_wait_queue_me(hb, &q, to); @@ -49,7 +50,7 @@ Signed-off-by: Thomas Gleixner - ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to); - spin_unlock(&hb->lock); - if (ret) -- goto out_put_keys; +- goto out; + /* + * On RT we must avoid races with requeue and trying to block + * on two mutexes (hb->lock and uaddr2's rtmutex) by @@ -86,7 +87,7 @@ Signed-off-by: Thomas Gleixner + ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to); + spin_unlock(&hb->lock); + if (ret) -+ goto out_put_keys; ++ goto out; + } /* @@ -108,7 +109,7 @@ Signed-off-by: Thomas Gleixner /* Check if the requeue code acquired the second futex for us. */ if (!q.rt_waiter) { -@@ -3389,7 +3434,8 @@ static int futex_wait_requeue_pi(u32 __u +@@ -3246,14 +3291,16 @@ static int futex_wait_requeue_pi(u32 __u * did a lock-steal - fix up the PI-state in that case. */ if (q.pi_state && (q.pi_state->owner != current)) { @@ -116,18 +117,18 @@ Signed-off-by: Thomas Gleixner + spin_lock(&hb2->lock); + BUG_ON(&hb2->lock != q.lock_ptr); ret = fixup_pi_state_owner(uaddr2, &q, current); - if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current) { - pi_state = q.pi_state; -@@ -3400,7 +3446,7 @@ static int futex_wait_requeue_pi(u32 __u + /* + * Drop the reference to the pi state which * the requeue_pi() code acquired for us. */ put_pi_state(q.pi_state); - spin_unlock(q.lock_ptr); + spin_unlock(&hb2->lock); - } - } else { - struct rt_mutex *pi_mutex; -@@ -3414,7 +3460,8 @@ static int futex_wait_requeue_pi(u32 __u ++ + /* + * Adjust the return value. It's either -EFAULT or + * success (1) but the caller expects 0 for success. +@@ -3272,7 +3319,8 @@ static int futex_wait_requeue_pi(u32 __u pi_mutex = &q.pi_state->pi_mutex; ret = rt_mutex_wait_proxy_lock(pi_mutex, to, &rt_waiter); @@ -151,7 +152,7 @@ Signed-off-by: Thomas Gleixner /* * We can speed up the acquire/release, if there's no debugging state to be * set up. -@@ -380,7 +385,8 @@ int max_lock_depth = 1024; +@@ -378,7 +383,8 @@ int max_lock_depth = 1024; static inline struct rt_mutex *task_blocked_on_lock(struct task_struct *p) { @@ -161,7 +162,7 @@ Signed-off-by: Thomas Gleixner } /* -@@ -516,7 +522,7 @@ static int rt_mutex_adjust_prio_chain(st +@@ -514,7 +520,7 @@ static int rt_mutex_adjust_prio_chain(st * reached or the state of the chain has changed while we * dropped the locks. */ @@ -170,7 +171,7 @@ Signed-off-by: Thomas Gleixner goto out_unlock_pi; /* -@@ -950,6 +956,22 @@ static int task_blocks_on_rt_mutex(struc +@@ -947,6 +953,22 @@ static int task_blocks_on_rt_mutex(struc return -EDEADLK; raw_spin_lock(&task->pi_lock); @@ -193,7 +194,7 @@ Signed-off-by: Thomas Gleixner waiter->task = task; waiter->lock = lock; waiter->prio = task->prio; -@@ -973,7 +995,7 @@ static int task_blocks_on_rt_mutex(struc +@@ -970,7 +992,7 @@ static int task_blocks_on_rt_mutex(struc rt_mutex_enqueue_pi(owner, waiter); rt_mutex_adjust_prio(owner); @@ -202,7 +203,7 @@ Signed-off-by: Thomas Gleixner chain_walk = 1; } else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) { chain_walk = 1; -@@ -1069,7 +1091,7 @@ static void remove_waiter(struct rt_mute +@@ -1066,7 +1088,7 @@ static void remove_waiter(struct rt_mute { bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock)); struct task_struct *owner = rt_mutex_owner(lock); @@ -211,7 +212,7 @@ Signed-off-by: Thomas Gleixner lockdep_assert_held(&lock->wait_lock); -@@ -1095,7 +1117,8 @@ static void remove_waiter(struct rt_mute +@@ -1092,7 +1114,8 @@ static void remove_waiter(struct rt_mute rt_mutex_adjust_prio(owner); /* Store the lock on which owner is blocked or NULL */ @@ -221,7 +222,7 @@ Signed-off-by: Thomas Gleixner raw_spin_unlock(&owner->pi_lock); -@@ -1131,7 +1154,8 @@ void rt_mutex_adjust_pi(struct task_stru +@@ -1128,7 +1151,8 @@ void rt_mutex_adjust_pi(struct task_stru raw_spin_lock_irqsave(&task->pi_lock, flags); waiter = task->pi_blocked_on; @@ -233,7 +234,7 @@ Signed-off-by: Thomas Gleixner } --- a/kernel/locking/rtmutex_common.h +++ b/kernel/locking/rtmutex_common.h -@@ -130,6 +130,8 @@ enum rtmutex_chainwalk { +@@ -125,6 +125,8 @@ enum rtmutex_chainwalk { /* * PI-futex support (proxy locking functions, etc.): */ diff --git a/kernel/patches-5.4.x-rt/0151-futex-requeue-pi-fix.patch b/kernel/patches-5.11.x-rt/0082-0006-futex-Fix-bug-on-when-a-requeued-RT-task-times-out.patch similarity index 94% rename from kernel/patches-5.4.x-rt/0151-futex-requeue-pi-fix.patch rename to kernel/patches-5.11.x-rt/0082-0006-futex-Fix-bug-on-when-a-requeued-RT-task-times-out.patch index 2fd439623..b0ee73c83 100644 --- a/kernel/patches-5.4.x-rt/0151-futex-requeue-pi-fix.patch +++ b/kernel/patches-5.11.x-rt/0082-0006-futex-Fix-bug-on-when-a-requeued-RT-task-times-out.patch @@ -1,12 +1,11 @@ From: Steven Rostedt Date: Tue, 14 Jul 2015 14:26:34 +0200 -Subject: futex: Fix bug on when a requeued RT task times out +Subject: [PATCH 06/22] futex: Fix bug on when a requeued RT task times out Requeue with timeout causes a bug with PREEMPT_RT. The bug comes from a timed out condition. - TASK 1 TASK 2 ------ ------ futex_wait_requeue_pi() @@ -16,13 +15,12 @@ The bug comes from a timed out condition. double_lock_hb(); raw_spin_lock(pi_lock); - if (current->pi_blocked_on) { + if (current->pi_blocked_on) { } else { current->pi_blocked_on = PI_WAKE_INPROGRESS; run_spin_unlock(pi_lock); spin_lock(hb->lock); <-- blocked! - plist_for_each_entry_safe(this) { rt_mutex_start_proxy_lock(); task_blocks_on_rt_mutex(); @@ -45,7 +43,6 @@ Otherwise set it to a new flag PI_REQUEUE_INPROGRESS, which notifies the proxy task that it is being requeued, and will handle things appropriately. - Signed-off-by: Steven Rostedt Signed-off-by: Thomas Gleixner --- @@ -65,7 +62,7 @@ Signed-off-by: Thomas Gleixner } /* -@@ -1779,6 +1780,34 @@ int __rt_mutex_start_proxy_lock(struct r +@@ -1720,6 +1721,34 @@ int __rt_mutex_start_proxy_lock(struct r if (try_to_take_rt_mutex(lock, task, NULL)) return 1; @@ -102,7 +99,7 @@ Signed-off-by: Thomas Gleixner RT_MUTEX_FULL_CHAINWALK); --- a/kernel/locking/rtmutex_common.h +++ b/kernel/locking/rtmutex_common.h -@@ -131,6 +131,7 @@ enum rtmutex_chainwalk { +@@ -126,6 +126,7 @@ enum rtmutex_chainwalk { * PI-futex support (proxy locking functions, etc.): */ #define PI_WAKEUP_INPROGRESS ((struct rt_mutex_waiter *) 1) diff --git a/kernel/patches-5.4.x-rt/0156-rtmutex-Make-lock_killable-work.patch b/kernel/patches-5.11.x-rt/0083-0007-locking-rtmutex-Make-lock_killable-work.patch similarity index 86% rename from kernel/patches-5.4.x-rt/0156-rtmutex-Make-lock_killable-work.patch rename to kernel/patches-5.11.x-rt/0083-0007-locking-rtmutex-Make-lock_killable-work.patch index 1c793c5c2..f1e672e9f 100644 --- a/kernel/patches-5.4.x-rt/0156-rtmutex-Make-lock_killable-work.patch +++ b/kernel/patches-5.11.x-rt/0083-0007-locking-rtmutex-Make-lock_killable-work.patch @@ -1,13 +1,12 @@ From: Thomas Gleixner Date: Sat, 1 Apr 2017 12:50:59 +0200 -Subject: [PATCH] rtmutex: Make lock_killable work +Subject: [PATCH 07/22] locking/rtmutex: Make lock_killable work Locking an rt mutex killable does not work because signal handling is restricted to TASK_INTERRUPTIBLE. -Use signal_pending_state() unconditionaly. +Use signal_pending_state() unconditionally. -Cc: stable-rt@vger.kernel.org Signed-off-by: Thomas Gleixner Signed-off-by: Sebastian Andrzej Siewior --- @@ -16,7 +15,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c -@@ -1200,18 +1200,13 @@ static int __sched +@@ -1197,18 +1197,13 @@ static int __sched if (try_to_take_rt_mutex(lock, current, waiter)) break; diff --git a/kernel/patches-5.4.x-rt/0157-spinlock-types-separate-raw.patch b/kernel/patches-5.11.x-rt/0084-0008-locking-spinlock-Split-the-lock-types-header.patch similarity index 70% rename from kernel/patches-5.4.x-rt/0157-spinlock-types-separate-raw.patch rename to kernel/patches-5.11.x-rt/0084-0008-locking-spinlock-Split-the-lock-types-header.patch index e2291eac0..d6b9e9d20 100644 --- a/kernel/patches-5.4.x-rt/0157-spinlock-types-separate-raw.patch +++ b/kernel/patches-5.11.x-rt/0084-0008-locking-spinlock-Split-the-lock-types-header.patch @@ -1,6 +1,6 @@ -Subject: spinlock: Split the lock types header From: Thomas Gleixner Date: Wed, 29 Jun 2011 19:34:01 +0200 +Subject: [PATCH 08/22] locking/spinlock: Split the lock types header Split raw_spinlock into its own file and the remaining spinlock_t into its own non-RT header. The non-RT header will be replaced later by sleeping @@ -8,11 +8,13 @@ spinlocks. Signed-off-by: Thomas Gleixner --- - include/linux/rwlock_types.h | 4 ++ - include/linux/spinlock_types.h | 71 +----------------------------------- - include/linux/spinlock_types_nort.h | 33 ++++++++++++++++ - include/linux/spinlock_types_raw.h | 55 +++++++++++++++++++++++++++ - 4 files changed, 94 insertions(+), 69 deletions(-) + include/linux/rwlock_types.h | 4 + + include/linux/spinlock_types.h | 87 ------------------------------------ + include/linux/spinlock_types_nort.h | 39 ++++++++++++++++ + include/linux/spinlock_types_raw.h | 65 ++++++++++++++++++++++++++ + 4 files changed, 110 insertions(+), 85 deletions(-) + create mode 100644 include/linux/spinlock_types_nort.h + create mode 100644 include/linux/spinlock_types_raw.h --- a/include/linux/rwlock_types.h +++ b/include/linux/rwlock_types.h @@ -29,7 +31,7 @@ Signed-off-by: Thomas Gleixner * and initializers --- a/include/linux/spinlock_types.h +++ b/include/linux/spinlock_types.h -@@ -9,76 +9,9 @@ +@@ -9,92 +9,9 @@ * Released under the General Public License (GPL). */ @@ -40,7 +42,7 @@ Signed-off-by: Thomas Gleixner -#endif +#include --#include +-#include - -typedef struct raw_spinlock { - arch_spinlock_t raw_lock; @@ -58,8 +60,18 @@ Signed-off-by: Thomas Gleixner -#define SPINLOCK_OWNER_INIT ((void *)-1L) - -#ifdef CONFIG_DEBUG_LOCK_ALLOC --# define SPIN_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname } +-# define RAW_SPIN_DEP_MAP_INIT(lockname) \ +- .dep_map = { \ +- .name = #lockname, \ +- .wait_type_inner = LD_WAIT_SPIN, \ +- } +-# define SPIN_DEP_MAP_INIT(lockname) \ +- .dep_map = { \ +- .name = #lockname, \ +- .wait_type_inner = LD_WAIT_CONFIG, \ +- } -#else +-# define RAW_SPIN_DEP_MAP_INIT(lockname) -# define SPIN_DEP_MAP_INIT(lockname) -#endif - @@ -76,7 +88,7 @@ Signed-off-by: Thomas Gleixner - { \ - .raw_lock = __ARCH_SPIN_LOCK_UNLOCKED, \ - SPIN_DEBUG_INIT(lockname) \ -- SPIN_DEP_MAP_INIT(lockname) } +- RAW_SPIN_DEP_MAP_INIT(lockname) } - -#define __RAW_SPIN_LOCK_UNLOCKED(lockname) \ - (raw_spinlock_t) __RAW_SPIN_LOCK_INITIALIZER(lockname) @@ -97,11 +109,17 @@ Signed-off-by: Thomas Gleixner - }; -} spinlock_t; - +-#define ___SPIN_LOCK_INITIALIZER(lockname) \ +- { \ +- .raw_lock = __ARCH_SPIN_LOCK_UNLOCKED, \ +- SPIN_DEBUG_INIT(lockname) \ +- SPIN_DEP_MAP_INIT(lockname) } +- -#define __SPIN_LOCK_INITIALIZER(lockname) \ -- { { .rlock = __RAW_SPIN_LOCK_INITIALIZER(lockname) } } +- { { .rlock = ___SPIN_LOCK_INITIALIZER(lockname) } } - -#define __SPIN_LOCK_UNLOCKED(lockname) \ -- (spinlock_t ) __SPIN_LOCK_INITIALIZER(lockname) +- (spinlock_t) __SPIN_LOCK_INITIALIZER(lockname) - -#define DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x) +#include @@ -110,7 +128,7 @@ Signed-off-by: Thomas Gleixner --- /dev/null +++ b/include/linux/spinlock_types_nort.h -@@ -0,0 +1,33 @@ +@@ -0,0 +1,39 @@ +#ifndef __LINUX_SPINLOCK_TYPES_NORT_H +#define __LINUX_SPINLOCK_TYPES_NORT_H + @@ -135,18 +153,24 @@ Signed-off-by: Thomas Gleixner + }; +} spinlock_t; + ++#define ___SPIN_LOCK_INITIALIZER(lockname) \ ++{ \ ++ .raw_lock = __ARCH_SPIN_LOCK_UNLOCKED, \ ++ SPIN_DEBUG_INIT(lockname) \ ++ SPIN_DEP_MAP_INIT(lockname) } ++ +#define __SPIN_LOCK_INITIALIZER(lockname) \ -+ { { .rlock = __RAW_SPIN_LOCK_INITIALIZER(lockname) } } ++ { { .rlock = ___SPIN_LOCK_INITIALIZER(lockname) } } + +#define __SPIN_LOCK_UNLOCKED(lockname) \ -+ (spinlock_t ) __SPIN_LOCK_INITIALIZER(lockname) ++ (spinlock_t) __SPIN_LOCK_INITIALIZER(lockname) + +#define DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x) + +#endif --- /dev/null +++ b/include/linux/spinlock_types_raw.h -@@ -0,0 +1,55 @@ +@@ -0,0 +1,65 @@ +#ifndef __LINUX_SPINLOCK_TYPES_RAW_H +#define __LINUX_SPINLOCK_TYPES_RAW_H + @@ -158,7 +182,7 @@ Signed-off-by: Thomas Gleixner +# include +#endif + -+#include ++#include + +typedef struct raw_spinlock { + arch_spinlock_t raw_lock; @@ -176,8 +200,18 @@ Signed-off-by: Thomas Gleixner +#define SPINLOCK_OWNER_INIT ((void *)-1L) + +#ifdef CONFIG_DEBUG_LOCK_ALLOC -+# define SPIN_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname } ++# define RAW_SPIN_DEP_MAP_INIT(lockname) \ ++ .dep_map = { \ ++ .name = #lockname, \ ++ .wait_type_inner = LD_WAIT_SPIN, \ ++ } ++# define SPIN_DEP_MAP_INIT(lockname) \ ++ .dep_map = { \ ++ .name = #lockname, \ ++ .wait_type_inner = LD_WAIT_CONFIG, \ ++ } +#else ++# define RAW_SPIN_DEP_MAP_INIT(lockname) +# define SPIN_DEP_MAP_INIT(lockname) +#endif + @@ -191,14 +225,14 @@ Signed-off-by: Thomas Gleixner +#endif + +#define __RAW_SPIN_LOCK_INITIALIZER(lockname) \ -+ { \ ++{ \ + .raw_lock = __ARCH_SPIN_LOCK_UNLOCKED, \ + SPIN_DEBUG_INIT(lockname) \ -+ SPIN_DEP_MAP_INIT(lockname) } ++ RAW_SPIN_DEP_MAP_INIT(lockname) } + +#define __RAW_SPIN_LOCK_UNLOCKED(lockname) \ + (raw_spinlock_t) __RAW_SPIN_LOCK_INITIALIZER(lockname) + -+#define DEFINE_RAW_SPINLOCK(x) raw_spinlock_t x = __RAW_SPIN_LOCK_UNLOCKED(x) ++#define DEFINE_RAW_SPINLOCK(x) raw_spinlock_t x = __RAW_SPIN_LOCK_UNLOCKED(x) + +#endif diff --git a/kernel/patches-5.4.x-rt/0158-rtmutex-avoid-include-hell.patch b/kernel/patches-5.11.x-rt/0085-0009-locking-rtmutex-Avoid-include-hell.patch similarity index 91% rename from kernel/patches-5.4.x-rt/0158-rtmutex-avoid-include-hell.patch rename to kernel/patches-5.11.x-rt/0085-0009-locking-rtmutex-Avoid-include-hell.patch index a3b55f5b7..4eb12e889 100644 --- a/kernel/patches-5.4.x-rt/0158-rtmutex-avoid-include-hell.patch +++ b/kernel/patches-5.11.x-rt/0085-0009-locking-rtmutex-Avoid-include-hell.patch @@ -1,6 +1,6 @@ -Subject: rtmutex: Avoid include hell From: Thomas Gleixner Date: Wed, 29 Jun 2011 20:06:39 +0200 +Subject: [PATCH 09/22] locking/rtmutex: Avoid include hell Include only the required raw types. This avoids pulling in the complete spinlock header which in turn requires rtmutex.h at some point. diff --git a/kernel/patches-5.11.x-rt/0086-0010-lockdep-Reduce-header-files-in-debug_locks.h.patch b/kernel/patches-5.11.x-rt/0086-0010-lockdep-Reduce-header-files-in-debug_locks.h.patch new file mode 100644 index 000000000..fe8bb603c --- /dev/null +++ b/kernel/patches-5.11.x-rt/0086-0010-lockdep-Reduce-header-files-in-debug_locks.h.patch @@ -0,0 +1,26 @@ +From: Sebastian Andrzej Siewior +Date: Fri, 14 Aug 2020 16:55:25 +0200 +Subject: [PATCH 11/23] lockdep: Reduce header files in debug_locks.h + +The inclusion of printk.h leads to circular dependency if spinlock_t is +based on rt_mutex. + +Include only atomic.h (xchg()) and cache.h (__read_mostly). + +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/debug_locks.h | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/include/linux/debug_locks.h ++++ b/include/linux/debug_locks.h +@@ -3,8 +3,7 @@ + #define __LINUX_DEBUG_LOCKING_H + + #include +-#include +-#include ++#include + + struct task_struct; + diff --git a/kernel/patches-5.11.x-rt/0087-0011-locking-split-out-the-rbtree-definition.patch b/kernel/patches-5.11.x-rt/0087-0011-locking-split-out-the-rbtree-definition.patch new file mode 100644 index 000000000..cb0ab1fb1 --- /dev/null +++ b/kernel/patches-5.11.x-rt/0087-0011-locking-split-out-the-rbtree-definition.patch @@ -0,0 +1,108 @@ +From: Sebastian Andrzej Siewior +Date: Fri, 14 Aug 2020 17:08:41 +0200 +Subject: [PATCH 11/22] locking: split out the rbtree definition + +rtmutex.h needs the definition for rb_root_cached. By including kernel.h +we will get to spinlock.h which requires rtmutex.h again. + +Split out the required struct definition and move it into its own header +file which can be included by rtmutex.h + +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/rbtree.h | 27 +-------------------------- + include/linux/rbtree_type.h | 31 +++++++++++++++++++++++++++++++ + include/linux/rtmutex.h | 2 +- + 3 files changed, 33 insertions(+), 27 deletions(-) + create mode 100644 include/linux/rbtree_type.h + +--- a/include/linux/rbtree.h ++++ b/include/linux/rbtree.h +@@ -19,19 +19,9 @@ + + #include + #include ++#include + #include + +-struct rb_node { +- unsigned long __rb_parent_color; +- struct rb_node *rb_right; +- struct rb_node *rb_left; +-} __attribute__((aligned(sizeof(long)))); +- /* The alignment might seem pointless, but allegedly CRIS needs it */ +- +-struct rb_root { +- struct rb_node *rb_node; +-}; +- + #define rb_parent(r) ((struct rb_node *)((r)->__rb_parent_color & ~3)) + + #define RB_ROOT (struct rb_root) { NULL, } +@@ -112,21 +102,6 @@ static inline void rb_link_node_rcu(stru + typeof(*pos), field); 1; }); \ + pos = n) + +-/* +- * Leftmost-cached rbtrees. +- * +- * We do not cache the rightmost node based on footprint +- * size vs number of potential users that could benefit +- * from O(1) rb_last(). Just not worth it, users that want +- * this feature can always implement the logic explicitly. +- * Furthermore, users that want to cache both pointers may +- * find it a bit asymmetric, but that's ok. +- */ +-struct rb_root_cached { +- struct rb_root rb_root; +- struct rb_node *rb_leftmost; +-}; +- + #define RB_ROOT_CACHED (struct rb_root_cached) { {NULL, }, NULL } + + /* Same as rb_first(), but O(1) */ +--- /dev/null ++++ b/include/linux/rbtree_type.h +@@ -0,0 +1,31 @@ ++/* SPDX-License-Identifier: GPL-2.0-or-later */ ++#ifndef _LINUX_RBTREE_TYPE_H ++#define _LINUX_RBTREE_TYPE_H ++ ++struct rb_node { ++ unsigned long __rb_parent_color; ++ struct rb_node *rb_right; ++ struct rb_node *rb_left; ++} __attribute__((aligned(sizeof(long)))); ++/* The alignment might seem pointless, but allegedly CRIS needs it */ ++ ++struct rb_root { ++ struct rb_node *rb_node; ++}; ++ ++/* ++ * Leftmost-cached rbtrees. ++ * ++ * We do not cache the rightmost node based on footprint ++ * size vs number of potential users that could benefit ++ * from O(1) rb_last(). Just not worth it, users that want ++ * this feature can always implement the logic explicitly. ++ * Furthermore, users that want to cache both pointers may ++ * find it a bit asymmetric, but that's ok. ++ */ ++struct rb_root_cached { ++ struct rb_root rb_root; ++ struct rb_node *rb_leftmost; ++}; ++ ++#endif +--- a/include/linux/rtmutex.h ++++ b/include/linux/rtmutex.h +@@ -14,7 +14,7 @@ + #define __LINUX_RT_MUTEX_H + + #include +-#include ++#include + #include + + extern int max_lock_depth; /* for sysctl */ diff --git a/kernel/patches-5.4.x-rt/0160-rtmutex-Provide-rt_mutex_slowlock_locked.patch b/kernel/patches-5.11.x-rt/0088-0012-locking-rtmutex-Provide-rt_mutex_slowlock_locked.patch similarity index 92% rename from kernel/patches-5.4.x-rt/0160-rtmutex-Provide-rt_mutex_slowlock_locked.patch rename to kernel/patches-5.11.x-rt/0088-0012-locking-rtmutex-Provide-rt_mutex_slowlock_locked.patch index d6b902cba..e333c4ef5 100644 --- a/kernel/patches-5.4.x-rt/0160-rtmutex-Provide-rt_mutex_slowlock_locked.patch +++ b/kernel/patches-5.11.x-rt/0088-0012-locking-rtmutex-Provide-rt_mutex_slowlock_locked.patch @@ -1,6 +1,6 @@ From: Thomas Gleixner Date: Thu, 12 Oct 2017 16:14:22 +0200 -Subject: rtmutex: Provide rt_mutex_slowlock_locked() +Subject: [PATCH 12/22] locking/rtmutex: Provide rt_mutex_slowlock_locked() This is the inner-part of rt_mutex_slowlock(), required for rwsem-rt. @@ -13,7 +13,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c -@@ -1243,35 +1243,16 @@ static void rt_mutex_handle_deadlock(int +@@ -1234,35 +1234,16 @@ static void rt_mutex_handle_deadlock(int } } @@ -55,7 +55,7 @@ Signed-off-by: Sebastian Andrzej Siewior set_current_state(state); -@@ -1279,16 +1260,16 @@ rt_mutex_slowlock(struct rt_mutex *lock, +@@ -1270,16 +1251,16 @@ rt_mutex_slowlock(struct rt_mutex *lock, if (unlikely(timeout)) hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS); @@ -76,7 +76,7 @@ Signed-off-by: Sebastian Andrzej Siewior } /* -@@ -1296,6 +1277,34 @@ rt_mutex_slowlock(struct rt_mutex *lock, +@@ -1287,6 +1268,34 @@ rt_mutex_slowlock(struct rt_mutex *lock, * unconditionally. We might have to fix that up. */ fixup_rt_mutex_waiters(lock); @@ -121,7 +121,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* * This is the control structure for tasks blocked on a rt_mutex, -@@ -159,6 +160,12 @@ extern bool __rt_mutex_futex_unlock(stru +@@ -153,6 +154,12 @@ extern bool __rt_mutex_futex_unlock(stru struct wake_q_head *wqh); extern void rt_mutex_postunlock(struct wake_q_head *wake_q); diff --git a/kernel/patches-5.4.x-rt/0161-rtmutex-export-lockdep-less-version-of-rt_mutex-s-lo.patch b/kernel/patches-5.11.x-rt/0089-0013-locking-rtmutex-export-lockdep-less-version-of-rt_mu.patch similarity index 70% rename from kernel/patches-5.4.x-rt/0161-rtmutex-export-lockdep-less-version-of-rt_mutex-s-lo.patch rename to kernel/patches-5.11.x-rt/0089-0013-locking-rtmutex-export-lockdep-less-version-of-rt_mu.patch index a2953bbe2..75ba6a559 100644 --- a/kernel/patches-5.4.x-rt/0161-rtmutex-export-lockdep-less-version-of-rt_mutex-s-lo.patch +++ b/kernel/patches-5.11.x-rt/0089-0013-locking-rtmutex-export-lockdep-less-version-of-rt_mu.patch @@ -1,20 +1,20 @@ From: Thomas Gleixner Date: Thu, 12 Oct 2017 16:36:39 +0200 -Subject: rtmutex: export lockdep-less version of rt_mutex's lock, - trylock and unlock +Subject: [PATCH 13/22] locking/rtmutex: export lockdep-less version of + rt_mutex's lock, trylock and unlock Required for lock implementation ontop of rtmutex. Signed-off-by: Thomas Gleixner Signed-off-by: Sebastian Andrzej Siewior --- - kernel/locking/rtmutex.c | 59 ++++++++++++++++++++++++++-------------- + kernel/locking/rtmutex.c | 54 ++++++++++++++++++++++++++++------------ kernel/locking/rtmutex_common.h | 3 ++ - 2 files changed, 42 insertions(+), 20 deletions(-) + 2 files changed, 41 insertions(+), 16 deletions(-) --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c -@@ -1493,12 +1493,33 @@ rt_mutex_fastunlock(struct rt_mutex *loc +@@ -1469,12 +1469,33 @@ rt_mutex_fastunlock(struct rt_mutex *loc rt_mutex_postunlock(&wake_q); } @@ -40,7 +40,7 @@ Signed-off-by: Sebastian Andrzej Siewior - rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, rt_mutex_slowlock); + ret = __rt_mutex_lock_state(lock, state); + if (ret) -+ mutex_release(&lock->dep_map, 1, _RET_IP_); ++ mutex_release(&lock->dep_map, _RET_IP_); + return ret; +} + @@ -50,7 +50,7 @@ Signed-off-by: Sebastian Andrzej Siewior } #ifdef CONFIG_DEBUG_LOCK_ALLOC -@@ -1539,16 +1560,7 @@ EXPORT_SYMBOL_GPL(rt_mutex_lock); +@@ -1515,16 +1536,7 @@ EXPORT_SYMBOL_GPL(rt_mutex_lock); */ int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock) { @@ -61,31 +61,16 @@ Signed-off-by: Sebastian Andrzej Siewior - mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_); - ret = rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE, rt_mutex_slowlock); - if (ret) -- mutex_release(&lock->dep_map, 1, _RET_IP_); +- mutex_release(&lock->dep_map, _RET_IP_); - - return ret; + return rt_mutex_lock_state(lock, 0, TASK_INTERRUPTIBLE); } EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible); -@@ -1574,13 +1586,10 @@ int __sched __rt_mutex_futex_trylock(str - * Returns: - * 0 on success - * -EINTR when interrupted by a signal -- * -EDEADLK when the lock would deadlock (when deadlock detection is on) - */ - int __sched rt_mutex_lock_killable(struct rt_mutex *lock) - { -- might_sleep(); -- -- return rt_mutex_fastlock(lock, TASK_KILLABLE, rt_mutex_slowlock); -+ return rt_mutex_lock_state(lock, 0, TASK_KILLABLE); +@@ -1541,6 +1553,14 @@ int __sched __rt_mutex_futex_trylock(str + return __rt_mutex_slowtrylock(lock); } - EXPORT_SYMBOL_GPL(rt_mutex_lock_killable); - -@@ -1615,6 +1624,14 @@ rt_mutex_timed_lock(struct rt_mutex *loc - } - EXPORT_SYMBOL_GPL(rt_mutex_timed_lock); +int __sched __rt_mutex_trylock(struct rt_mutex *lock) +{ @@ -98,7 +83,7 @@ Signed-off-by: Sebastian Andrzej Siewior /** * rt_mutex_trylock - try to lock a rt_mutex * -@@ -1630,10 +1647,7 @@ int __sched rt_mutex_trylock(struct rt_m +@@ -1556,10 +1576,7 @@ int __sched rt_mutex_trylock(struct rt_m { int ret; @@ -110,7 +95,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (ret) mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_); -@@ -1641,6 +1655,11 @@ int __sched rt_mutex_trylock(struct rt_m +@@ -1567,6 +1584,11 @@ int __sched rt_mutex_trylock(struct rt_m } EXPORT_SYMBOL_GPL(rt_mutex_trylock); @@ -124,7 +109,7 @@ Signed-off-by: Sebastian Andrzej Siewior * --- a/kernel/locking/rtmutex_common.h +++ b/kernel/locking/rtmutex_common.h -@@ -162,6 +162,9 @@ extern bool __rt_mutex_futex_unlock(stru +@@ -156,6 +156,9 @@ extern bool __rt_mutex_futex_unlock(stru extern void rt_mutex_postunlock(struct wake_q_head *wake_q); /* RW semaphore special interface */ diff --git a/kernel/patches-5.4.x-rt/0143-sched-rt-mutex-wakeup.patch b/kernel/patches-5.11.x-rt/0090-0014-sched-Add-saved_state-for-tasks-blocked-on-sleeping-.patch similarity index 61% rename from kernel/patches-5.4.x-rt/0143-sched-rt-mutex-wakeup.patch rename to kernel/patches-5.11.x-rt/0090-0014-sched-Add-saved_state-for-tasks-blocked-on-sleeping-.patch index 8a0154428..02796d443 100644 --- a/kernel/patches-5.4.x-rt/0143-sched-rt-mutex-wakeup.patch +++ b/kernel/patches-5.11.x-rt/0090-0014-sched-Add-saved_state-for-tasks-blocked-on-sleeping-.patch @@ -1,6 +1,7 @@ -Subject: sched: Add saved_state for tasks blocked on sleeping locks From: Thomas Gleixner Date: Sat, 25 Jun 2011 09:21:04 +0200 +Subject: [PATCH 14/22] sched: Add saved_state for tasks blocked on sleeping + locks Spinlocks are state preserving in !RT. RT changes the state when a task gets blocked on a lock. So we need to remember the state before @@ -11,13 +12,13 @@ sleep is done, the saved state is restored. Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 3 +++ - kernel/sched/core.c | 42 +++++++++++++++++++++++++++++++++++++++--- + kernel/sched/core.c | 34 ++++++++++++++++++++++++++++++++-- kernel/sched/sched.h | 1 + - 3 files changed, 43 insertions(+), 3 deletions(-) + 3 files changed, 36 insertions(+), 2 deletions(-) --- a/include/linux/sched.h +++ b/include/linux/sched.h -@@ -631,6 +631,8 @@ struct task_struct { +@@ -655,6 +655,8 @@ struct task_struct { #endif /* -1 unrunnable, 0 runnable, >0 stopped: */ volatile long state; @@ -26,7 +27,7 @@ Signed-off-by: Thomas Gleixner /* * This begins the randomizable portion of task_struct. Only -@@ -1679,6 +1681,7 @@ extern struct task_struct *find_get_task +@@ -1777,6 +1779,7 @@ extern struct task_struct *find_get_task extern int wake_up_state(struct task_struct *tsk, unsigned int state); extern int wake_up_process(struct task_struct *tsk); @@ -36,30 +37,20 @@ Signed-off-by: Thomas Gleixner #ifdef CONFIG_SMP --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -2524,6 +2524,8 @@ try_to_wake_up(struct task_struct *p, un +@@ -3316,7 +3316,7 @@ try_to_wake_up(struct task_struct *p, un int cpu, success = 0; preempt_disable(); -+ -+#ifndef CONFIG_PREEMPT_RT - if (p == current) { +- if (p == current) { ++ if (!IS_ENABLED(CONFIG_PREEMPT_RT) && p == current) { /* * We're waking current, this means 'p->on_rq' and 'task_cpu(p) -@@ -2546,7 +2548,7 @@ try_to_wake_up(struct task_struct *p, un - trace_sched_wakeup(p); - goto out; - } -- -+#endif - /* - * If we are going to wake up a thread waiting for CONDITION we - * need to ensure that CONDITION=1 done by the caller can not be -@@ -2555,8 +2557,27 @@ try_to_wake_up(struct task_struct *p, un + * == smp_processor_id()'. Together this means we can special +@@ -3346,8 +3346,26 @@ try_to_wake_up(struct task_struct *p, un */ raw_spin_lock_irqsave(&p->pi_lock, flags); smp_mb__after_spinlock(); - if (!(p->state & state)) -- goto unlock; + if (!(p->state & state)) { + /* + * The task might be running due to a spinlock sleeper @@ -72,8 +63,7 @@ Signed-off-by: Thomas Gleixner + success = 1; + } + } -+ raw_spin_unlock_irqrestore(&p->pi_lock, flags); -+ goto out_nostat; + goto unlock; + } + /* + * If this is a regular wakeup, then we can unconditionally @@ -84,20 +74,7 @@ Signed-off-by: Thomas Gleixner trace_sched_waking(p); -@@ -2648,9 +2669,12 @@ try_to_wake_up(struct task_struct *p, un - ttwu_queue(p, cpu, wake_flags); - unlock: - raw_spin_unlock_irqrestore(&p->pi_lock, flags); -+#ifndef CONFIG_PREEMPT_RT - out: -+#endif - if (success) - ttwu_stat(p, cpu, wake_flags); -+out_nostat: - preempt_enable(); - - return success; -@@ -2673,6 +2697,18 @@ int wake_up_process(struct task_struct * +@@ -3536,6 +3554,18 @@ int wake_up_process(struct task_struct * } EXPORT_SYMBOL(wake_up_process); @@ -118,11 +95,11 @@ Signed-off-by: Thomas Gleixner return try_to_wake_up(p, state, 0); --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h -@@ -1644,6 +1644,7 @@ static inline int task_on_rq_migrating(s - #define WF_SYNC 0x01 /* Waker goes to sleep after wakeup */ - #define WF_FORK 0x02 /* Child wakeup after fork */ - #define WF_MIGRATED 0x4 /* Internal use, task got migrated */ -+#define WF_LOCK_SLEEPER 0x08 /* wakeup spinlock "sleeper" */ +@@ -1751,6 +1751,7 @@ static inline int task_on_rq_migrating(s + #define WF_SYNC 0x10 /* Waker goes to sleep after wakeup */ + #define WF_MIGRATED 0x20 /* Internal use, task got migrated */ + #define WF_ON_CPU 0x40 /* Wakee is on_cpu */ ++#define WF_LOCK_SLEEPER 0x80 /* Wakeup spinlock "sleeper" */ - /* - * To aid in avoiding the subversion of "niceness" due to uneven distribution + #ifdef CONFIG_SMP + static_assert(WF_EXEC == SD_BALANCE_EXEC); diff --git a/kernel/patches-5.4.x-rt/0162-rtmutex-add-sleeping-lock-implementation.patch b/kernel/patches-5.11.x-rt/0091-0015-locking-rtmutex-add-sleeping-lock-implementation.patch similarity index 82% rename from kernel/patches-5.4.x-rt/0162-rtmutex-add-sleeping-lock-implementation.patch rename to kernel/patches-5.11.x-rt/0091-0015-locking-rtmutex-add-sleeping-lock-implementation.patch index ece330074..70a1c945e 100644 --- a/kernel/patches-5.4.x-rt/0162-rtmutex-add-sleeping-lock-implementation.patch +++ b/kernel/patches-5.11.x-rt/0091-0015-locking-rtmutex-add-sleeping-lock-implementation.patch @@ -1,28 +1,29 @@ From: Thomas Gleixner Date: Thu, 12 Oct 2017 17:11:19 +0200 -Subject: rtmutex: add sleeping lock implementation +Subject: [PATCH 15/22] locking/rtmutex: add sleeping lock implementation Signed-off-by: Thomas Gleixner Signed-off-by: Sebastian Andrzej Siewior --- include/linux/kernel.h | 5 - include/linux/rtmutex.h | 21 + - include/linux/sched.h | 8 + include/linux/preempt.h | 4 + include/linux/rtmutex.h | 19 + + include/linux/sched.h | 7 include/linux/sched/wake_q.h | 13 + - include/linux/spinlock_rt.h | 156 +++++++++++++ - include/linux/spinlock_types_rt.h | 48 ++++ + include/linux/spinlock_rt.h | 155 +++++++++++++ + include/linux/spinlock_types_rt.h | 38 +++ kernel/fork.c | 1 - kernel/futex.c | 11 - kernel/locking/rtmutex.c | 436 ++++++++++++++++++++++++++++++++++---- + kernel/futex.c | 10 + kernel/locking/rtmutex.c | 444 ++++++++++++++++++++++++++++++++++---- kernel/locking/rtmutex_common.h | 14 - kernel/sched/core.c | 39 ++- - 11 files changed, 694 insertions(+), 58 deletions(-) + 12 files changed, 694 insertions(+), 55 deletions(-) create mode 100644 include/linux/spinlock_rt.h create mode 100644 include/linux/spinlock_types_rt.h --- a/include/linux/kernel.h +++ b/include/linux/kernel.h -@@ -227,6 +227,10 @@ extern void __cant_sleep(const char *fil +@@ -107,6 +107,10 @@ extern void __cant_migrate(const char *f */ # define might_sleep() \ do { __might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0) @@ -33,23 +34,31 @@ Signed-off-by: Sebastian Andrzej Siewior /** * cant_sleep - annotation for functions that cannot sleep * -@@ -258,6 +262,7 @@ extern void __cant_sleep(const char *fil +@@ -150,6 +154,7 @@ extern void __cant_migrate(const char *f static inline void __might_sleep(const char *file, int line, int preempt_offset) { } # define might_sleep() do { might_resched(); } while (0) +# define might_sleep_no_state_check() do { might_resched(); } while (0) # define cant_sleep() do { } while (0) + # define cant_migrate() do { } while (0) # define sched_annotate_sleep() do { } while (0) - # define non_block_start() do { } while (0) +--- a/include/linux/preempt.h ++++ b/include/linux/preempt.h +@@ -121,7 +121,11 @@ + /* + * The preempt_count offset after spin_lock() + */ ++#if !defined(CONFIG_PREEMPT_RT) + #define PREEMPT_LOCK_OFFSET PREEMPT_DISABLE_OFFSET ++#else ++#define PREEMPT_LOCK_OFFSET 0 ++#endif + + /* + * The preempt_count offset needed for things like: --- a/include/linux/rtmutex.h +++ b/include/linux/rtmutex.h -@@ -14,11 +14,15 @@ - #define __LINUX_RT_MUTEX_H - - #include --#include - #include -+#include +@@ -19,6 +19,10 @@ extern int max_lock_depth; /* for sysctl */ @@ -60,46 +69,40 @@ Signed-off-by: Sebastian Andrzej Siewior /** * The rt_mutex structure * -@@ -31,8 +35,8 @@ struct rt_mutex { +@@ -31,6 +35,7 @@ struct rt_mutex { raw_spinlock_t wait_lock; struct rb_root_cached waiters; struct task_struct *owner; --#ifdef CONFIG_DEBUG_RT_MUTEXES - int save_state; -+#ifdef CONFIG_DEBUG_RT_MUTEXES - const char *name, *file; - int line; - void *magic; -@@ -82,16 +86,23 @@ do { \ ++ int save_state; + #ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; + #endif +@@ -67,11 +72,19 @@ do { \ #define __DEP_MAP_RT_MUTEX_INITIALIZER(mutexname) #endif -#define __RT_MUTEX_INITIALIZER(mutexname) \ - { .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \ +#define __RT_MUTEX_INITIALIZER_PLAIN(mutexname) \ -+ .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \ ++ .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \ , .waiters = RB_ROOT_CACHED \ , .owner = NULL \ - __DEBUG_RT_MUTEX_INITIALIZER(mutexname) \ - __DEP_MAP_RT_MUTEX_INITIALIZER(mutexname)} + __DEP_MAP_RT_MUTEX_INITIALIZER(mutexname) + +#define __RT_MUTEX_INITIALIZER(mutexname) \ -+ { __RT_MUTEX_INITIALIZER_PLAIN(mutexname) } ++ { __RT_MUTEX_INITIALIZER_PLAIN(mutexname) \ ++ , .save_state = 0 } ++ ++#define __RT_MUTEX_INITIALIZER_SAVE_STATE(mutexname) \ ++ { __RT_MUTEX_INITIALIZER_PLAIN(mutexname) \ ++ , .save_state = 1 } #define DEFINE_RT_MUTEX(mutexname) \ struct rt_mutex mutexname = __RT_MUTEX_INITIALIZER(mutexname) - -+#define __RT_MUTEX_INITIALIZER_SAVE_STATE(mutexname) \ -+ { __RT_MUTEX_INITIALIZER_PLAIN(mutexname) \ -+ , .save_state = 1 } -+ - /** - * rt_mutex_is_locked - is the mutex locked - * @lock: the mutex to be queried --- a/include/linux/sched.h +++ b/include/linux/sched.h -@@ -140,6 +140,9 @@ struct task_group; +@@ -141,6 +141,9 @@ struct io_uring_task; smp_store_mb(current->state, (state_value)); \ } while (0) @@ -109,15 +112,7 @@ Signed-off-by: Sebastian Andrzej Siewior #define set_special_state(state_value) \ do { \ unsigned long flags; /* may shadow */ \ -@@ -149,6 +152,7 @@ struct task_group; - current->state = (state_value); \ - raw_spin_unlock_irqrestore(¤t->pi_lock, flags); \ - } while (0) -+ - #else - /* - * set_current_state() includes a barrier so that the write of current->state -@@ -193,6 +197,9 @@ struct task_group; +@@ -194,6 +197,9 @@ struct io_uring_task; #define set_current_state(state_value) \ smp_store_mb(current->state, (state_value)) @@ -127,7 +122,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* * set_special_state() should be used for those states when the blocking task * can not use the regular condition based wait-loop. In that case we must -@@ -950,6 +957,7 @@ struct task_struct { +@@ -1015,6 +1021,7 @@ struct task_struct { raw_spinlock_t pi_lock; struct wake_q_node wake_q; @@ -158,7 +153,8 @@ Signed-off-by: Sebastian Andrzej Siewior #endif /* _LINUX_SCHED_WAKE_Q_H */ --- /dev/null +++ b/include/linux/spinlock_rt.h -@@ -0,0 +1,156 @@ +@@ -0,0 +1,155 @@ ++// SPDX-License-Identifier: GPL-2.0-only +#ifndef __LINUX_SPINLOCK_RT_H +#define __LINUX_SPINLOCK_RT_H + @@ -180,10 +176,10 @@ Signed-off-by: Sebastian Andrzej Siewior +} while (0) + +extern void __lockfunc rt_spin_lock(spinlock_t *lock); -+extern unsigned long __lockfunc rt_spin_lock_trace_flags(spinlock_t *lock); +extern void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass); ++extern void __lockfunc rt_spin_lock_nest_lock(spinlock_t *lock, struct lockdep_map *nest_lock); +extern void __lockfunc rt_spin_unlock(spinlock_t *lock); -+extern void __lockfunc rt_spin_unlock_wait(spinlock_t *lock); ++extern void __lockfunc rt_spin_lock_unlock(spinlock_t *lock); +extern int __lockfunc rt_spin_trylock_irqsave(spinlock_t *lock, unsigned long *flags); +extern int __lockfunc rt_spin_trylock_bh(spinlock_t *lock); +extern int __lockfunc rt_spin_trylock(spinlock_t *lock); @@ -229,6 +225,12 @@ Signed-off-by: Sebastian Andrzej Siewior + rt_spin_lock_nested(lock, subclass); \ + } while (0) + ++# define spin_lock_nest_lock(lock, subclass) \ ++ do { \ ++ typecheck(struct lockdep_map *, &(subclass)->dep_map); \ ++ rt_spin_lock_nest_lock(lock, &(subclass)->dep_map); \ ++ } while (0) ++ +# define spin_lock_irqsave_nested(lock, flags, subclass) \ + do { \ + typecheck(unsigned long, flags); \ @@ -236,14 +238,15 @@ Signed-off-by: Sebastian Andrzej Siewior + rt_spin_lock_nested(lock, subclass); \ + } while (0) +#else -+# define spin_lock_nested(lock, subclass) spin_lock(lock) -+# define spin_lock_bh_nested(lock, subclass) spin_lock_bh(lock) ++# define spin_lock_nested(lock, subclass) spin_lock(((void)(subclass), (lock))) ++# define spin_lock_nest_lock(lock, subclass) spin_lock(((void)(subclass), (lock))) ++# define spin_lock_bh_nested(lock, subclass) spin_lock_bh(((void)(subclass), (lock))) + +# define spin_lock_irqsave_nested(lock, flags, subclass) \ + do { \ + typecheck(unsigned long, flags); \ + flags = 0; \ -+ spin_lock(lock); \ ++ spin_lock(((void)(subclass), (lock))); \ + } while (0) +#endif + @@ -254,20 +257,6 @@ Signed-off-by: Sebastian Andrzej Siewior + spin_lock(lock); \ + } while (0) + -+static inline unsigned long spin_lock_trace_flags(spinlock_t *lock) -+{ -+ unsigned long flags = 0; -+#ifdef CONFIG_TRACE_IRQFLAGS -+ flags = rt_spin_lock_trace_flags(lock); -+#else -+ spin_lock(lock); /* lock_local */ -+#endif -+ return flags; -+} -+ -+/* FIXME: we need rt_spin_lock_nest_lock */ -+#define spin_lock_nest_lock(lock, nest_lock) spin_lock_nested(lock, 0) -+ +#define spin_unlock(lock) rt_spin_unlock(lock) + +#define spin_unlock_bh(lock) \ @@ -288,10 +277,15 @@ Signed-off-by: Sebastian Andrzej Siewior +#define spin_trylock_bh(lock) __cond_lock(lock, rt_spin_trylock_bh(lock)) +#define spin_trylock_irq(lock) spin_trylock(lock) + -+#define spin_trylock_irqsave(lock, flags) \ -+ rt_spin_trylock_irqsave(lock, &(flags)) -+ -+#define spin_unlock_wait(lock) rt_spin_unlock_wait(lock) ++#define spin_trylock_irqsave(lock, flags) \ ++({ \ ++ int __locked; \ ++ \ ++ typecheck(unsigned long, flags); \ ++ flags = 0; \ ++ __locked = spin_trylock(lock); \ ++ __locked; \ ++}) + +#ifdef CONFIG_GENERIC_LOCKBREAK +# define spin_is_contended(lock) ((lock)->break_lock) @@ -317,7 +311,8 @@ Signed-off-by: Sebastian Andrzej Siewior +#endif --- /dev/null +++ b/include/linux/spinlock_types_rt.h -@@ -0,0 +1,48 @@ +@@ -0,0 +1,38 @@ ++// SPDX-License-Identifier: GPL-2.0-only +#ifndef __LINUX_SPINLOCK_TYPES_RT_H +#define __LINUX_SPINLOCK_TYPES_RT_H + @@ -338,22 +333,11 @@ Signed-off-by: Sebastian Andrzej Siewior +#endif +} spinlock_t; + -+#ifdef CONFIG_DEBUG_RT_MUTEXES -+# define __RT_SPIN_INITIALIZER(name) \ ++#define __RT_SPIN_INITIALIZER(name) \ + { \ + .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock), \ + .save_state = 1, \ -+ .file = __FILE__, \ -+ .line = __LINE__ , \ + } -+#else -+# define __RT_SPIN_INITIALIZER(name) \ -+ { \ -+ .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock), \ -+ .save_state = 1, \ -+ } -+#endif -+ +/* +.wait_list = PLIST_HEAD_INIT_RAW((name).lock.wait_list, (name).lock.wait_lock) +*/ @@ -368,7 +352,7 @@ Signed-off-by: Sebastian Andrzej Siewior +#endif --- a/kernel/fork.c +++ b/kernel/fork.c -@@ -950,6 +950,7 @@ static struct task_struct *dup_task_stru +@@ -927,6 +927,7 @@ static struct task_struct *dup_task_stru tsk->splice_pipe = NULL; tsk->task_frag.page = NULL; tsk->wake_q.next = NULL; @@ -378,7 +362,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/kernel/futex.c +++ b/kernel/futex.c -@@ -1573,6 +1573,7 @@ static int wake_futex_pi(u32 __user *uad +@@ -1497,6 +1497,7 @@ static int wake_futex_pi(u32 __user *uad struct task_struct *new_owner; bool postunlock = false; DEFINE_WAKE_Q(wake_q); @@ -386,14 +370,15 @@ Signed-off-by: Sebastian Andrzej Siewior int ret = 0; new_owner = rt_mutex_next_owner(&pi_state->pi_mutex); -@@ -1632,13 +1633,13 @@ static int wake_futex_pi(u32 __user *uad - pi_state->owner = new_owner; - raw_spin_unlock(&new_owner->pi_lock); +@@ -1546,14 +1547,15 @@ static int wake_futex_pi(u32 __user *uad + * not fail. + */ + pi_state_update_owner(pi_state, new_owner); +- postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q); ++ postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q, ++ &wake_sleeper_q); + } -- postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q); -- -+ postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q, -+ &wake_sleeper_q); out_unlock: raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); @@ -403,7 +388,7 @@ Signed-off-by: Sebastian Andrzej Siewior return ret; } -@@ -2980,7 +2981,7 @@ static int futex_lock_pi(u32 __user *uad +@@ -2857,7 +2859,7 @@ static int futex_lock_pi(u32 __user *uad goto no_block; } @@ -412,7 +397,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* * On PREEMPT_RT_FULL, when hb->lock becomes an rt_mutex, we must not -@@ -3348,7 +3349,7 @@ static int futex_wait_requeue_pi(u32 __u +@@ -3203,7 +3205,7 @@ static int futex_wait_requeue_pi(u32 __u * The waiter is allocated on our stack, manipulated by the requeue * code while we sleep on uaddr. */ @@ -435,7 +420,7 @@ Signed-off-by: Sebastian Andrzej Siewior * * See Documentation/locking/rt-mutex-design.rst for details. */ -@@ -235,7 +240,7 @@ static inline bool unlock_rt_mutex_safe( +@@ -233,7 +238,7 @@ static inline bool unlock_rt_mutex_safe( * Only use with rt_mutex_waiter_{less,equal}() */ #define task_to_waiter(p) \ @@ -444,7 +429,7 @@ Signed-off-by: Sebastian Andrzej Siewior static inline int rt_mutex_waiter_less(struct rt_mutex_waiter *left, -@@ -275,6 +280,27 @@ rt_mutex_waiter_equal(struct rt_mutex_wa +@@ -273,6 +278,27 @@ rt_mutex_waiter_equal(struct rt_mutex_wa return 1; } @@ -472,7 +457,7 @@ Signed-off-by: Sebastian Andrzej Siewior static void rt_mutex_enqueue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter) { -@@ -379,6 +405,14 @@ static bool rt_mutex_cond_detect_deadloc +@@ -377,6 +403,14 @@ static bool rt_mutex_cond_detect_deadloc return debug_rt_mutex_detect_deadlock(waiter, chwalk); } @@ -487,7 +472,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* * Max number of times we'll walk the boosting chain: */ -@@ -703,13 +737,16 @@ static int rt_mutex_adjust_prio_chain(st +@@ -700,13 +734,16 @@ static int rt_mutex_adjust_prio_chain(st * follow here. This is the end of the chain we are walking. */ if (!rt_mutex_owner(lock)) { @@ -506,7 +491,7 @@ Signed-off-by: Sebastian Andrzej Siewior raw_spin_unlock_irq(&lock->wait_lock); return 0; } -@@ -810,9 +847,11 @@ static int rt_mutex_adjust_prio_chain(st +@@ -807,9 +844,11 @@ static int rt_mutex_adjust_prio_chain(st * @task: The task which wants to acquire the lock * @waiter: The waiter that is queued to the lock's wait tree if the * callsite called task_blocked_on_lock(), otherwise NULL @@ -520,7 +505,7 @@ Signed-off-by: Sebastian Andrzej Siewior { lockdep_assert_held(&lock->wait_lock); -@@ -848,12 +887,11 @@ static int try_to_take_rt_mutex(struct r +@@ -845,12 +884,11 @@ static int try_to_take_rt_mutex(struct r */ if (waiter) { /* @@ -536,7 +521,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* * We can acquire the lock. Remove the waiter from the * lock waiters tree. -@@ -871,14 +909,12 @@ static int try_to_take_rt_mutex(struct r +@@ -868,14 +906,12 @@ static int try_to_take_rt_mutex(struct r */ if (rt_mutex_has_waiters(lock)) { /* @@ -555,7 +540,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* * The current top waiter stays enqueued. We * don't have to change anything in the lock -@@ -925,6 +961,296 @@ static int try_to_take_rt_mutex(struct r +@@ -922,6 +958,289 @@ static int try_to_take_rt_mutex(struct r return 1; } @@ -666,8 +651,6 @@ Signed-off-by: Sebastian Andrzej Siewior + + raw_spin_unlock_irqrestore(&lock->wait_lock, flags); + -+ debug_rt_mutex_print_deadlock(waiter); -+ + if (top_waiter != waiter || adaptive_wait(lock, lock_owner)) + schedule(); + @@ -736,9 +719,9 @@ Signed-off-by: Sebastian Andrzej Siewior + +void __lockfunc rt_spin_lock(spinlock_t *lock) +{ -+ migrate_disable(); + spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); + rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock); ++ migrate_disable(); +} +EXPORT_SYMBOL(rt_spin_lock); + @@ -750,19 +733,28 @@ Signed-off-by: Sebastian Andrzej Siewior +#ifdef CONFIG_DEBUG_LOCK_ALLOC +void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass) +{ -+ migrate_disable(); + spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_); + rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock); ++ migrate_disable(); +} +EXPORT_SYMBOL(rt_spin_lock_nested); ++ ++void __lockfunc rt_spin_lock_nest_lock(spinlock_t *lock, ++ struct lockdep_map *nest_lock) ++{ ++ spin_acquire_nest(&lock->dep_map, 0, 0, nest_lock, _RET_IP_); ++ rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock); ++ migrate_disable(); ++} ++EXPORT_SYMBOL(rt_spin_lock_nest_lock); +#endif + +void __lockfunc rt_spin_unlock(spinlock_t *lock) +{ + /* NOTE: we always pass in '1' for nested, for simplicity */ -+ spin_release(&lock->dep_map, 1, _RET_IP_); -+ rt_spin_lock_fastunlock(&lock->lock, rt_spin_lock_slowunlock); ++ spin_release(&lock->dep_map, _RET_IP_); + migrate_enable(); ++ rt_spin_lock_fastunlock(&lock->lock, rt_spin_lock_slowunlock); +} +EXPORT_SYMBOL(rt_spin_unlock); + @@ -777,23 +769,22 @@ Signed-off-by: Sebastian Andrzej Siewior + * (like raw spinlocks do), we lock and unlock, to force the kernel to + * schedule if there's contention: + */ -+void __lockfunc rt_spin_unlock_wait(spinlock_t *lock) ++void __lockfunc rt_spin_lock_unlock(spinlock_t *lock) +{ + spin_lock(lock); + spin_unlock(lock); +} -+EXPORT_SYMBOL(rt_spin_unlock_wait); ++EXPORT_SYMBOL(rt_spin_lock_unlock); + +int __lockfunc rt_spin_trylock(spinlock_t *lock) +{ + int ret; + -+ migrate_disable(); + ret = __rt_mutex_trylock(&lock->lock); -+ if (ret) ++ if (ret) { + spin_acquire(&lock->dep_map, 0, 1, _RET_IP_); -+ else -+ migrate_enable(); ++ migrate_disable(); ++ } + return ret; +} +EXPORT_SYMBOL(rt_spin_trylock); @@ -805,27 +796,14 @@ Signed-off-by: Sebastian Andrzej Siewior + local_bh_disable(); + ret = __rt_mutex_trylock(&lock->lock); + if (ret) { -+ migrate_disable(); + spin_acquire(&lock->dep_map, 0, 1, _RET_IP_); -+ } else ++ migrate_disable(); ++ } else { + local_bh_enable(); -+ return ret; -+} -+EXPORT_SYMBOL(rt_spin_trylock_bh); -+ -+int __lockfunc rt_spin_trylock_irqsave(spinlock_t *lock, unsigned long *flags) -+{ -+ int ret; -+ -+ *flags = 0; -+ ret = __rt_mutex_trylock(&lock->lock); -+ if (ret) { -+ migrate_disable(); -+ spin_acquire(&lock->dep_map, 0, 1, _RET_IP_); + } + return ret; +} -+EXPORT_SYMBOL(rt_spin_trylock_irqsave); ++EXPORT_SYMBOL(rt_spin_trylock_bh); + +void +__rt_spin_lock_init(spinlock_t *lock, const char *name, struct lock_class_key *key) @@ -852,7 +830,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* * Task blocks on lock. * -@@ -1038,6 +1364,7 @@ static int task_blocks_on_rt_mutex(struc +@@ -1035,6 +1354,7 @@ static int task_blocks_on_rt_mutex(struc * Called with lock->wait_lock held and interrupts disabled. */ static void mark_wakeup_next_waiter(struct wake_q_head *wake_q, @@ -860,7 +838,7 @@ Signed-off-by: Sebastian Andrzej Siewior struct rt_mutex *lock) { struct rt_mutex_waiter *waiter; -@@ -1077,7 +1404,10 @@ static void mark_wakeup_next_waiter(stru +@@ -1074,7 +1394,10 @@ static void mark_wakeup_next_waiter(stru * Pairs with preempt_enable() in rt_mutex_postunlock(); */ preempt_disable(); @@ -872,7 +850,7 @@ Signed-off-by: Sebastian Andrzej Siewior raw_spin_unlock(¤t->pi_lock); } -@@ -1161,21 +1491,22 @@ void rt_mutex_adjust_pi(struct task_stru +@@ -1158,21 +1481,22 @@ void rt_mutex_adjust_pi(struct task_stru return; } next_lock = waiter->lock; @@ -897,7 +875,7 @@ Signed-off-by: Sebastian Andrzej Siewior } /** -@@ -1292,7 +1623,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, +@@ -1283,7 +1607,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, unsigned long flags; int ret = 0; @@ -906,7 +884,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* * Technically we could use raw_spin_[un]lock_irq() here, but this can -@@ -1365,7 +1696,8 @@ static inline int rt_mutex_slowtrylock(s +@@ -1356,7 +1680,8 @@ static inline int rt_mutex_slowtrylock(s * Return whether the current task needs to call rt_mutex_postunlock(). */ static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock, @@ -916,7 +894,7 @@ Signed-off-by: Sebastian Andrzej Siewior { unsigned long flags; -@@ -1419,7 +1751,7 @@ static bool __sched rt_mutex_slowunlock( +@@ -1410,7 +1735,7 @@ static bool __sched rt_mutex_slowunlock( * * Queue the next waiter for wakeup once we release the wait_lock. */ @@ -925,7 +903,7 @@ Signed-off-by: Sebastian Andrzej Siewior raw_spin_unlock_irqrestore(&lock->wait_lock, flags); return true; /* call rt_mutex_postunlock() */ -@@ -1471,9 +1803,11 @@ rt_mutex_fasttrylock(struct rt_mutex *lo +@@ -1447,9 +1772,11 @@ rt_mutex_fasttrylock(struct rt_mutex *lo /* * Performs the wakeup of the the top-waiter and re-enables preemption. */ @@ -938,7 +916,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* Pairs with preempt_disable() in rt_mutex_slowunlock() */ preempt_enable(); -@@ -1482,15 +1816,17 @@ void rt_mutex_postunlock(struct wake_q_h +@@ -1458,15 +1785,17 @@ void rt_mutex_postunlock(struct wake_q_h static inline void rt_mutex_fastunlock(struct rt_mutex *lock, bool (*slowfn)(struct rt_mutex *lock, @@ -959,10 +937,10 @@ Signed-off-by: Sebastian Andrzej Siewior } int __sched __rt_mutex_lock_state(struct rt_mutex *lock, int state) -@@ -1668,16 +2004,13 @@ void __sched __rt_mutex_unlock(struct rt +@@ -1597,16 +1926,13 @@ void __sched __rt_mutex_unlock(struct rt void __sched rt_mutex_unlock(struct rt_mutex *lock) { - mutex_release(&lock->dep_map, 1, _RET_IP_); + mutex_release(&lock->dep_map, _RET_IP_); - rt_mutex_fastunlock(lock, rt_mutex_slowunlock); + __rt_mutex_unlock(lock); } @@ -980,7 +958,7 @@ Signed-off-by: Sebastian Andrzej Siewior { lockdep_assert_held(&lock->wait_lock); -@@ -1694,23 +2027,35 @@ bool __sched __rt_mutex_futex_unlock(str +@@ -1623,23 +1949,35 @@ bool __sched __rt_mutex_futex_unlock(str * avoid inversion prior to the wakeup. preempt_disable() * therein pairs with rt_mutex_postunlock(). */ @@ -1019,7 +997,7 @@ Signed-off-by: Sebastian Andrzej Siewior } /** -@@ -1749,7 +2094,7 @@ void __rt_mutex_init(struct rt_mutex *lo +@@ -1675,7 +2013,7 @@ void __rt_mutex_init(struct rt_mutex *lo if (name && key) debug_rt_mutex_init(lock, name, key); } @@ -1028,18 +1006,28 @@ Signed-off-by: Sebastian Andrzej Siewior /** * rt_mutex_init_proxy_locked - initialize and lock a rt_mutex on behalf of a -@@ -1944,6 +2289,7 @@ int rt_mutex_wait_proxy_lock(struct rt_m - struct hrtimer_sleeper *to, - struct rt_mutex_waiter *waiter) +@@ -1695,6 +2033,14 @@ void rt_mutex_init_proxy_locked(struct r + struct task_struct *proxy_owner) { -+ struct task_struct *tsk = current; - int ret; + __rt_mutex_init(lock, NULL, NULL); ++#ifdef CONFIG_DEBUG_SPINLOCK ++ /* ++ * get another key class for the wait_lock. LOCK_PI and UNLOCK_PI is ++ * holding the ->wait_lock of the proxy_lock while unlocking a sleeping ++ * lock. ++ */ ++ raw_spin_lock_init(&lock->wait_lock); ++#endif + debug_rt_mutex_proxy_lock(lock, proxy_owner); + rt_mutex_set_owner(lock, proxy_owner); + } +@@ -1717,6 +2063,26 @@ void rt_mutex_proxy_unlock(struct rt_mut + rt_mutex_set_owner(lock, NULL); + } - raw_spin_lock_irq(&lock->wait_lock); -@@ -1955,6 +2301,24 @@ int rt_mutex_wait_proxy_lock(struct rt_m - * have to fix that up. - */ - fixup_rt_mutex_waiters(lock); ++static void fixup_rt_mutex_blocked(struct rt_mutex *lock) ++{ ++ struct task_struct *tsk = current; + /* + * RT has a problem here when the wait got interrupted by a timeout + * or a signal. task->pi_blocked_on is still set. The task must @@ -1052,35 +1040,54 @@ Signed-off-by: Sebastian Andrzej Siewior + * boosting chain of the rtmutex. That's correct because the task + * is not longer blocked on it. + */ -+ if (ret) { -+ raw_spin_lock(&tsk->pi_lock); -+ tsk->pi_blocked_on = NULL; -+ raw_spin_unlock(&tsk->pi_lock); -+ } ++ raw_spin_lock(&tsk->pi_lock); ++ tsk->pi_blocked_on = NULL; ++ raw_spin_unlock(&tsk->pi_lock); ++} ++ + /** + * __rt_mutex_start_proxy_lock() - Start lock acquisition for another task + * @lock: the rt_mutex to take +@@ -1789,6 +2155,9 @@ int __rt_mutex_start_proxy_lock(struct r + ret = 0; + } + ++ if (ret) ++ fixup_rt_mutex_blocked(lock); ++ + return ret; + } + +@@ -1878,6 +2247,9 @@ int rt_mutex_wait_proxy_lock(struct rt_m + * have to fix that up. + */ + fixup_rt_mutex_waiters(lock); ++ if (ret) ++ fixup_rt_mutex_blocked(lock); + raw_spin_unlock_irq(&lock->wait_lock); return ret; --- a/kernel/locking/rtmutex_common.h +++ b/kernel/locking/rtmutex_common.h -@@ -30,6 +30,7 @@ struct rt_mutex_waiter { - struct rb_node pi_tree_entry; +@@ -31,6 +31,7 @@ struct rt_mutex_waiter { struct task_struct *task; struct rt_mutex *lock; + int prio; + bool savestate; - #ifdef CONFIG_DEBUG_RT_MUTEXES - unsigned long ip; - struct pid *deadlock_task_pid; -@@ -139,7 +140,7 @@ extern void rt_mutex_init_proxy_locked(s + u64 deadline; + }; + +@@ -133,7 +134,7 @@ extern struct task_struct *rt_mutex_next + extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock, struct task_struct *proxy_owner); - extern void rt_mutex_proxy_unlock(struct rt_mutex *lock, - struct task_struct *proxy_owner); + extern void rt_mutex_proxy_unlock(struct rt_mutex *lock); -extern void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter); +extern void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter, bool savetate); extern int __rt_mutex_start_proxy_lock(struct rt_mutex *lock, struct rt_mutex_waiter *waiter, struct task_struct *task); -@@ -157,9 +158,12 @@ extern int __rt_mutex_futex_trylock(stru +@@ -151,9 +152,12 @@ extern int __rt_mutex_futex_trylock(stru extern void rt_mutex_futex_unlock(struct rt_mutex *lock); extern bool __rt_mutex_futex_unlock(struct rt_mutex *lock, @@ -1095,7 +1102,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* RW semaphore special interface */ extern int __rt_mutex_lock_state(struct rt_mutex *lock, int state); -@@ -169,6 +173,10 @@ int __sched rt_mutex_slowlock_locked(str +@@ -163,6 +167,10 @@ int __sched rt_mutex_slowlock_locked(str struct hrtimer_sleeper *timeout, enum rtmutex_chainwalk chwalk, struct rt_mutex_waiter *waiter); @@ -1108,7 +1115,7 @@ Signed-off-by: Sebastian Andrzej Siewior # include "rtmutex-debug.h" --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -414,9 +414,15 @@ static bool set_nr_if_polling(struct tas +@@ -502,9 +502,15 @@ static bool set_nr_if_polling(struct tas #endif #endif @@ -1126,7 +1133,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* * Atomically grab the task, if ->wake_q is !nil already it means -@@ -452,7 +458,13 @@ static bool __wake_q_add(struct wake_q_h +@@ -540,7 +546,13 @@ static bool __wake_q_add(struct wake_q_h */ void wake_q_add(struct wake_q_head *head, struct task_struct *task) { @@ -1141,7 +1148,7 @@ Signed-off-by: Sebastian Andrzej Siewior get_task_struct(task); } -@@ -475,28 +487,39 @@ void wake_q_add(struct wake_q_head *head +@@ -563,28 +575,39 @@ void wake_q_add(struct wake_q_head *head */ void wake_q_add_safe(struct wake_q_head *head, struct task_struct *task) { diff --git a/kernel/patches-5.4.x-rt/0165-rtmutex-trylock-is-okay-on-RT.patch b/kernel/patches-5.11.x-rt/0092-0016-locking-rtmutex-Allow-rt_mutex_trylock-on-PREEMPT_RT.patch similarity index 56% rename from kernel/patches-5.4.x-rt/0165-rtmutex-trylock-is-okay-on-RT.patch rename to kernel/patches-5.11.x-rt/0092-0016-locking-rtmutex-Allow-rt_mutex_trylock-on-PREEMPT_RT.patch index b86898eba..041210144 100644 --- a/kernel/patches-5.4.x-rt/0165-rtmutex-trylock-is-okay-on-RT.patch +++ b/kernel/patches-5.11.x-rt/0092-0016-locking-rtmutex-Allow-rt_mutex_trylock-on-PREEMPT_RT.patch @@ -1,10 +1,12 @@ From: Sebastian Andrzej Siewior -Date: Wed 02 Dec 2015 11:34:07 +0100 -Subject: rtmutex: trylock is okay on -RT +Date: Wed, 2 Dec 2015 11:34:07 +0100 +Subject: [PATCH 16/22] locking/rtmutex: Allow rt_mutex_trylock() on PREEMPT_RT -non-RT kernel could deadlock on rt_mutex_trylock() in softirq context. On --RT we don't run softirqs in IRQ context but in thread context so it is -not a issue here. +Non PREEMPT_RT kernel can deadlock on rt_mutex_trylock() in softirq +context. +On PREEMPT_RT the softirq context is handled in thread context. This +avoids the deadlock in the slow path and PI-boosting will be done on the +correct thread. Signed-off-by: Sebastian Andrzej Siewior --- @@ -13,7 +15,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c -@@ -1962,7 +1962,11 @@ EXPORT_SYMBOL_GPL(rt_mutex_timed_lock); +@@ -1884,7 +1884,11 @@ int __sched __rt_mutex_futex_trylock(str int __sched __rt_mutex_trylock(struct rt_mutex *lock) { diff --git a/kernel/patches-5.4.x-rt/0166-rtmutex-add-mutex-implementation-based-on-rtmutex.patch b/kernel/patches-5.11.x-rt/0093-0017-locking-rtmutex-add-mutex-implementation-based-on-rt.patch similarity index 88% rename from kernel/patches-5.4.x-rt/0166-rtmutex-add-mutex-implementation-based-on-rtmutex.patch rename to kernel/patches-5.11.x-rt/0093-0017-locking-rtmutex-add-mutex-implementation-based-on-rt.patch index 783831268..06198cd72 100644 --- a/kernel/patches-5.4.x-rt/0166-rtmutex-add-mutex-implementation-based-on-rtmutex.patch +++ b/kernel/patches-5.11.x-rt/0093-0017-locking-rtmutex-add-mutex-implementation-based-on-rt.patch @@ -1,19 +1,21 @@ From: Thomas Gleixner Date: Thu, 12 Oct 2017 17:17:03 +0200 -Subject: rtmutex: add mutex implementation based on rtmutex +Subject: [PATCH 17/22] locking/rtmutex: add mutex implementation based on + rtmutex Signed-off-by: Thomas Gleixner Signed-off-by: Sebastian Andrzej Siewior --- include/linux/mutex_rt.h | 130 ++++++++++++++++++++++++++ - kernel/locking/mutex-rt.c | 223 ++++++++++++++++++++++++++++++++++++++++++++++ - 2 files changed, 353 insertions(+) + kernel/locking/mutex-rt.c | 224 ++++++++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 354 insertions(+) create mode 100644 include/linux/mutex_rt.h create mode 100644 kernel/locking/mutex-rt.c --- /dev/null +++ b/include/linux/mutex_rt.h @@ -0,0 +1,130 @@ ++// SPDX-License-Identifier: GPL-2.0-only +#ifndef __LINUX_MUTEX_RT_H +#define __LINUX_MUTEX_RT_H + @@ -44,7 +46,6 @@ Signed-off-by: Sebastian Andrzej Siewior + +extern void __mutex_do_init(struct mutex *lock, const char *name, struct lock_class_key *key); +extern void __lockfunc _mutex_lock(struct mutex *lock); -+extern void __lockfunc _mutex_lock_io(struct mutex *lock); +extern void __lockfunc _mutex_lock_io_nested(struct mutex *lock, int subclass); +extern int __lockfunc _mutex_lock_interruptible(struct mutex *lock); +extern int __lockfunc _mutex_lock_killable(struct mutex *lock); @@ -61,7 +62,7 @@ Signed-off-by: Sebastian Andrzej Siewior +#define mutex_lock_killable(l) _mutex_lock_killable(l) +#define mutex_trylock(l) _mutex_trylock(l) +#define mutex_unlock(l) _mutex_unlock(l) -+#define mutex_lock_io(l) _mutex_lock_io(l); ++#define mutex_lock_io(l) _mutex_lock_io_nested(l, 0); + +#define __mutex_owner(l) ((l)->lock.owner) + @@ -92,7 +93,7 @@ Signed-off-by: Sebastian Andrzej Siewior +# define mutex_lock_killable_nested(l, s) \ + _mutex_lock_killable(l) +# define mutex_lock_nest_lock(lock, nest_lock) mutex_lock(lock) -+# define mutex_lock_io_nested(l, s) _mutex_lock_io(l) ++# define mutex_lock_io_nested(l, s) _mutex_lock_io_nested(l, s) +#endif + +# define mutex_init(mutex) \ @@ -146,10 +147,9 @@ Signed-off-by: Sebastian Andrzej Siewior +#endif --- /dev/null +++ b/kernel/locking/mutex-rt.c -@@ -0,0 +1,223 @@ +@@ -0,0 +1,224 @@ ++// SPDX-License-Identifier: GPL-2.0-only +/* -+ * kernel/rt.c -+ * + * Real-Time Preemption Support + * + * started by Ingo Molnar: @@ -215,6 +215,7 @@ Signed-off-by: Sebastian Andrzej Siewior +#include +#include +#include ++#include + +#include "rtmutex_common.h" + @@ -235,55 +236,24 @@ Signed-off-by: Sebastian Andrzej Siewior +} +EXPORT_SYMBOL(__mutex_do_init); + ++static int _mutex_lock_blk_flush(struct mutex *lock, int state) ++{ ++ /* ++ * Flush blk before ->pi_blocked_on is set. At schedule() time it is too ++ * late if one of the callbacks needs to acquire a sleeping lock. ++ */ ++ if (blk_needs_flush_plug(current)) ++ blk_schedule_flush_plug(current); ++ return __rt_mutex_lock_state(&lock->lock, state); ++} ++ +void __lockfunc _mutex_lock(struct mutex *lock) +{ + mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_); -+ __rt_mutex_lock_state(&lock->lock, TASK_UNINTERRUPTIBLE); ++ _mutex_lock_blk_flush(lock, TASK_UNINTERRUPTIBLE); +} +EXPORT_SYMBOL(_mutex_lock); + -+void __lockfunc _mutex_lock_io(struct mutex *lock) -+{ -+ int token; -+ -+ token = io_schedule_prepare(); -+ _mutex_lock(lock); -+ io_schedule_finish(token); -+} -+EXPORT_SYMBOL_GPL(_mutex_lock_io); -+ -+int __lockfunc _mutex_lock_interruptible(struct mutex *lock) -+{ -+ int ret; -+ -+ mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_); -+ ret = __rt_mutex_lock_state(&lock->lock, TASK_INTERRUPTIBLE); -+ if (ret) -+ mutex_release(&lock->dep_map, 1, _RET_IP_); -+ return ret; -+} -+EXPORT_SYMBOL(_mutex_lock_interruptible); -+ -+int __lockfunc _mutex_lock_killable(struct mutex *lock) -+{ -+ int ret; -+ -+ mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_); -+ ret = __rt_mutex_lock_state(&lock->lock, TASK_KILLABLE); -+ if (ret) -+ mutex_release(&lock->dep_map, 1, _RET_IP_); -+ return ret; -+} -+EXPORT_SYMBOL(_mutex_lock_killable); -+ -+#ifdef CONFIG_DEBUG_LOCK_ALLOC -+void __lockfunc _mutex_lock_nested(struct mutex *lock, int subclass) -+{ -+ mutex_acquire_nest(&lock->dep_map, subclass, 0, NULL, _RET_IP_); -+ __rt_mutex_lock_state(&lock->lock, TASK_UNINTERRUPTIBLE); -+} -+EXPORT_SYMBOL(_mutex_lock_nested); -+ +void __lockfunc _mutex_lock_io_nested(struct mutex *lock, int subclass) +{ + int token; @@ -297,10 +267,42 @@ Signed-off-by: Sebastian Andrzej Siewior +} +EXPORT_SYMBOL_GPL(_mutex_lock_io_nested); + ++int __lockfunc _mutex_lock_interruptible(struct mutex *lock) ++{ ++ int ret; ++ ++ mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_); ++ ret = _mutex_lock_blk_flush(lock, TASK_INTERRUPTIBLE); ++ if (ret) ++ mutex_release(&lock->dep_map, _RET_IP_); ++ return ret; ++} ++EXPORT_SYMBOL(_mutex_lock_interruptible); ++ ++int __lockfunc _mutex_lock_killable(struct mutex *lock) ++{ ++ int ret; ++ ++ mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_); ++ ret = _mutex_lock_blk_flush(lock, TASK_KILLABLE); ++ if (ret) ++ mutex_release(&lock->dep_map, _RET_IP_); ++ return ret; ++} ++EXPORT_SYMBOL(_mutex_lock_killable); ++ ++#ifdef CONFIG_DEBUG_LOCK_ALLOC ++void __lockfunc _mutex_lock_nested(struct mutex *lock, int subclass) ++{ ++ mutex_acquire_nest(&lock->dep_map, subclass, 0, NULL, _RET_IP_); ++ _mutex_lock_blk_flush(lock, TASK_UNINTERRUPTIBLE); ++} ++EXPORT_SYMBOL(_mutex_lock_nested); ++ +void __lockfunc _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest) +{ + mutex_acquire_nest(&lock->dep_map, 0, 0, nest, _RET_IP_); -+ __rt_mutex_lock_state(&lock->lock, TASK_UNINTERRUPTIBLE); ++ _mutex_lock_blk_flush(lock, TASK_UNINTERRUPTIBLE); +} +EXPORT_SYMBOL(_mutex_lock_nest_lock); + @@ -309,9 +311,9 @@ Signed-off-by: Sebastian Andrzej Siewior + int ret; + + mutex_acquire_nest(&lock->dep_map, subclass, 0, NULL, _RET_IP_); -+ ret = __rt_mutex_lock_state(&lock->lock, TASK_INTERRUPTIBLE); ++ ret = _mutex_lock_blk_flush(lock, TASK_INTERRUPTIBLE); + if (ret) -+ mutex_release(&lock->dep_map, 1, _RET_IP_); ++ mutex_release(&lock->dep_map, _RET_IP_); + return ret; +} +EXPORT_SYMBOL(_mutex_lock_interruptible_nested); @@ -321,9 +323,9 @@ Signed-off-by: Sebastian Andrzej Siewior + int ret; + + mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_); -+ ret = __rt_mutex_lock_state(&lock->lock, TASK_KILLABLE); ++ ret = _mutex_lock_blk_flush(lock, TASK_KILLABLE); + if (ret) -+ mutex_release(&lock->dep_map, 1, _RET_IP_); ++ mutex_release(&lock->dep_map, _RET_IP_); + return ret; +} +EXPORT_SYMBOL(_mutex_lock_killable_nested); @@ -342,7 +344,7 @@ Signed-off-by: Sebastian Andrzej Siewior + +void __lockfunc _mutex_unlock(struct mutex *lock) +{ -+ mutex_release(&lock->dep_map, 1, _RET_IP_); ++ mutex_release(&lock->dep_map, _RET_IP_); + __rt_mutex_unlock(&lock->lock); +} +EXPORT_SYMBOL(_mutex_unlock); diff --git a/kernel/patches-5.4.x-rt/0167-rtmutex-add-rwsem-implementation-based-on-rtmutex.patch b/kernel/patches-5.11.x-rt/0094-0018-locking-rtmutex-add-rwsem-implementation-based-on-rt.patch similarity index 90% rename from kernel/patches-5.4.x-rt/0167-rtmutex-add-rwsem-implementation-based-on-rtmutex.patch rename to kernel/patches-5.11.x-rt/0094-0018-locking-rtmutex-add-rwsem-implementation-based-on-rt.patch index c48c56dff..2529c5f76 100644 --- a/kernel/patches-5.4.x-rt/0167-rtmutex-add-rwsem-implementation-based-on-rtmutex.patch +++ b/kernel/patches-5.11.x-rt/0094-0018-locking-rtmutex-add-rwsem-implementation-based-on-rt.patch @@ -1,6 +1,7 @@ From: Thomas Gleixner Date: Thu, 12 Oct 2017 17:28:34 +0200 -Subject: rtmutex: add rwsem implementation based on rtmutex +Subject: [PATCH 18/22] locking/rtmutex: add rwsem implementation based on + rtmutex The RT specific R/W semaphore implementation restricts the number of readers to one because a writer cannot block on multiple readers and inherit its @@ -14,7 +15,7 @@ The single reader restricting is painful in various ways: - Progress blocker for drivers which are carefully crafted to avoid the potential reader/writer deadlock in mainline. -The analysis of the writer code pathes shows, that properly written RT tasks +The analysis of the writer code paths shows, that properly written RT tasks should not take them. Syscalls like mmap(), file access which take mmap sem write locked have unbound latencies which are completely unrelated to mmap sem. Other R/W sem users like graphics drivers are not suitable for RT tasks @@ -41,15 +42,16 @@ the approach. Signed-off-by: Thomas Gleixner Signed-off-by: Sebastian Andrzej Siewior --- - include/linux/rwsem-rt.h | 68 ++++++++++ - kernel/locking/rwsem-rt.c | 293 ++++++++++++++++++++++++++++++++++++++++++++++ - 2 files changed, 361 insertions(+) + include/linux/rwsem-rt.h | 70 ++++++++++ + kernel/locking/rwsem-rt.c | 318 ++++++++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 388 insertions(+) create mode 100644 include/linux/rwsem-rt.h create mode 100644 kernel/locking/rwsem-rt.c --- /dev/null +++ b/include/linux/rwsem-rt.h -@@ -0,0 +1,68 @@ +@@ -0,0 +1,70 @@ ++// SPDX-License-Identifier: GPL-2.0-only +#ifndef _LINUX_RWSEM_RT_H +#define _LINUX_RWSEM_RT_H + @@ -108,6 +110,7 @@ Signed-off-by: Sebastian Andrzej Siewior +} + +extern void __down_read(struct rw_semaphore *sem); ++extern int __down_read_interruptible(struct rw_semaphore *sem); +extern int __down_read_killable(struct rw_semaphore *sem); +extern int __down_read_trylock(struct rw_semaphore *sem); +extern void __down_write(struct rw_semaphore *sem); @@ -120,13 +123,13 @@ Signed-off-by: Sebastian Andrzej Siewior +#endif --- /dev/null +++ b/kernel/locking/rwsem-rt.c -@@ -0,0 +1,293 @@ -+/* -+ */ +@@ -0,0 +1,318 @@ ++// SPDX-License-Identifier: GPL-2.0-only +#include +#include +#include +#include ++#include + +#include "rtmutex_common.h" + @@ -211,6 +214,13 @@ Signed-off-by: Sebastian Andrzej Siewior + if (__down_read_trylock(sem)) + return 0; + ++ /* ++ * Flush blk before ->pi_blocked_on is set. At schedule() time it is too ++ * late if one of the callbacks needs to acquire a sleeping lock. ++ */ ++ if (blk_needs_flush_plug(current)) ++ blk_schedule_flush_plug(current); ++ + might_sleep(); + raw_spin_lock_irq(&m->wait_lock); + /* @@ -280,6 +290,17 @@ Signed-off-by: Sebastian Andrzej Siewior + WARN_ON_ONCE(ret); +} + ++int __down_read_interruptible(struct rw_semaphore *sem) ++{ ++ int ret; ++ ++ ret = __down_read_common(sem, TASK_INTERRUPTIBLE); ++ if (likely(!ret)) ++ return ret; ++ WARN_ONCE(ret != -EINTR, "Unexpected state: %d\n", ret); ++ return -EINTR; ++} ++ +int __down_read_killable(struct rw_semaphore *sem) +{ + int ret; @@ -333,6 +354,13 @@ Signed-off-by: Sebastian Andrzej Siewior + struct rt_mutex *m = &sem->rtmutex; + unsigned long flags; + ++ /* ++ * Flush blk before ->pi_blocked_on is set. At schedule() time it is too ++ * late if one of the callbacks needs to acquire a sleeping lock. ++ */ ++ if (blk_needs_flush_plug(current)) ++ blk_schedule_flush_plug(current); ++ + /* Take the rtmutex as a first step */ + if (__rt_mutex_lock_state(m, state)) + return -EINTR; diff --git a/kernel/patches-5.4.x-rt/0168-rtmutex-add-rwlock-implementation-based-on-rtmutex.patch b/kernel/patches-5.11.x-rt/0095-0019-locking-rtmutex-add-rwlock-implementation-based-on-r.patch similarity index 86% rename from kernel/patches-5.4.x-rt/0168-rtmutex-add-rwlock-implementation-based-on-rtmutex.patch rename to kernel/patches-5.11.x-rt/0095-0019-locking-rtmutex-add-rwlock-implementation-based-on-r.patch index f65d0f0c7..99f27069c 100644 --- a/kernel/patches-5.4.x-rt/0168-rtmutex-add-rwlock-implementation-based-on-rtmutex.patch +++ b/kernel/patches-5.11.x-rt/0095-0019-locking-rtmutex-add-rwlock-implementation-based-on-r.patch @@ -1,23 +1,26 @@ From: Thomas Gleixner Date: Thu, 12 Oct 2017 17:18:06 +0200 -Subject: rtmutex: add rwlock implementation based on rtmutex +Subject: [PATCH 19/22] locking/rtmutex: add rwlock implementation based on + rtmutex The implementation is bias-based, similar to the rwsem implementation. Signed-off-by: Thomas Gleixner Signed-off-by: Sebastian Andrzej Siewior --- - include/linux/rwlock_rt.h | 119 ++++++++++++ - include/linux/rwlock_types_rt.h | 55 +++++ - kernel/locking/rwlock-rt.c | 368 ++++++++++++++++++++++++++++++++++++++++ - 3 files changed, 542 insertions(+) + include/linux/rwlock_rt.h | 109 +++++++++++++ + include/linux/rwlock_types_rt.h | 56 ++++++ + kernel/Kconfig.locks | 2 + kernel/locking/rwlock-rt.c | 328 ++++++++++++++++++++++++++++++++++++++++ + 4 files changed, 494 insertions(+), 1 deletion(-) create mode 100644 include/linux/rwlock_rt.h create mode 100644 include/linux/rwlock_types_rt.h create mode 100644 kernel/locking/rwlock-rt.c --- /dev/null +++ b/include/linux/rwlock_rt.h -@@ -0,0 +1,119 @@ +@@ -0,0 +1,109 @@ ++// SPDX-License-Identifier: GPL-2.0-only +#ifndef __LINUX_RWLOCK_RT_H +#define __LINUX_RWLOCK_RT_H + @@ -43,7 +46,6 @@ Signed-off-by: Sebastian Andrzej Siewior + +static inline int __write_trylock_rt_irqsave(rwlock_t *lock, unsigned long *flags) +{ -+ /* XXX ARCH_IRQ_ENABLED */ + *flags = 0; + return rt_write_trylock(lock); +} @@ -126,20 +128,11 @@ Signed-off-by: Sebastian Andrzej Siewior + __rt_rwlock_init(rwl, #rwl, &__key); \ +} while (0) + -+/* -+ * Internal functions made global for CPU pinning -+ */ -+void __read_rt_lock(struct rt_rw_lock *lock); -+int __read_rt_trylock(struct rt_rw_lock *lock); -+void __write_rt_lock(struct rt_rw_lock *lock); -+int __write_rt_trylock(struct rt_rw_lock *lock); -+void __read_rt_unlock(struct rt_rw_lock *lock); -+void __write_rt_unlock(struct rt_rw_lock *lock); -+ +#endif --- /dev/null +++ b/include/linux/rwlock_types_rt.h -@@ -0,0 +1,55 @@ +@@ -0,0 +1,56 @@ ++// SPDX-License-Identifier: GPL-2.0-only +#ifndef __LINUX_RWLOCK_TYPES_RT_H +#define __LINUX_RWLOCK_TYPES_RT_H + @@ -195,11 +188,21 @@ Signed-off-by: Sebastian Andrzej Siewior + } while (0) + +#endif +--- a/kernel/Kconfig.locks ++++ b/kernel/Kconfig.locks +@@ -251,7 +251,7 @@ config ARCH_USE_QUEUED_RWLOCKS + + config QUEUED_RWLOCKS + def_bool y if ARCH_USE_QUEUED_RWLOCKS +- depends on SMP ++ depends on SMP && !PREEMPT_RT + + config ARCH_HAS_MMIOWB + bool --- /dev/null +++ b/kernel/locking/rwlock-rt.c -@@ -0,0 +1,368 @@ -+/* -+ */ +@@ -0,0 +1,328 @@ ++// SPDX-License-Identifier: GPL-2.0-only +#include +#include + @@ -262,7 +265,7 @@ Signed-off-by: Sebastian Andrzej Siewior + lock->rtmutex.save_state = 1; +} + -+int __read_rt_trylock(struct rt_rw_lock *lock) ++static int __read_rt_trylock(struct rt_rw_lock *lock) +{ + int r, old; + @@ -279,7 +282,7 @@ Signed-off-by: Sebastian Andrzej Siewior + return 0; +} + -+void __sched __read_rt_lock(struct rt_rw_lock *lock) ++static void __read_rt_lock(struct rt_rw_lock *lock) +{ + struct rt_mutex *m = &lock->rtmutex; + struct rt_mutex_waiter waiter; @@ -342,7 +345,7 @@ Signed-off-by: Sebastian Andrzej Siewior + debug_rt_mutex_free_waiter(&waiter); +} + -+void __read_rt_unlock(struct rt_rw_lock *lock) ++static void __read_rt_unlock(struct rt_rw_lock *lock) +{ + struct rt_mutex *m = &lock->rtmutex; + struct task_struct *tsk; @@ -378,7 +381,7 @@ Signed-off-by: Sebastian Andrzej Siewior + rt_spin_lock_slowunlock(m); +} + -+void __sched __write_rt_lock(struct rt_rw_lock *lock) ++static void __write_rt_lock(struct rt_rw_lock *lock) +{ + struct rt_mutex *m = &lock->rtmutex; + struct task_struct *self = current; @@ -422,7 +425,7 @@ Signed-off-by: Sebastian Andrzej Siewior + } +} + -+int __write_rt_trylock(struct rt_rw_lock *lock) ++static int __write_rt_trylock(struct rt_rw_lock *lock) +{ + struct rt_mutex *m = &lock->rtmutex; + unsigned long flags; @@ -442,7 +445,7 @@ Signed-off-by: Sebastian Andrzej Siewior + return 0; +} + -+void __write_rt_unlock(struct rt_rw_lock *lock) ++static void __write_rt_unlock(struct rt_rw_lock *lock) +{ + struct rt_mutex *m = &lock->rtmutex; + unsigned long flags; @@ -451,43 +454,6 @@ Signed-off-by: Sebastian Andrzej Siewior + __write_unlock_common(lock, WRITER_BIAS, flags); +} + -+/* Map the reader biased implementation */ -+static inline int do_read_rt_trylock(rwlock_t *rwlock) -+{ -+ return __read_rt_trylock(rwlock); -+} -+ -+static inline int do_write_rt_trylock(rwlock_t *rwlock) -+{ -+ return __write_rt_trylock(rwlock); -+} -+ -+static inline void do_read_rt_lock(rwlock_t *rwlock) -+{ -+ __read_rt_lock(rwlock); -+} -+ -+static inline void do_write_rt_lock(rwlock_t *rwlock) -+{ -+ __write_rt_lock(rwlock); -+} -+ -+static inline void do_read_rt_unlock(rwlock_t *rwlock) -+{ -+ __read_rt_unlock(rwlock); -+} -+ -+static inline void do_write_rt_unlock(rwlock_t *rwlock) -+{ -+ __write_rt_unlock(rwlock); -+} -+ -+static inline void do_rwlock_rt_init(rwlock_t *rwlock, const char *name, -+ struct lock_class_key *key) -+{ -+ __rwlock_biased_rt_init(rwlock, name, key); -+} -+ +int __lockfunc rt_read_can_lock(rwlock_t *rwlock) +{ + return atomic_read(&rwlock->readers) < 0; @@ -505,12 +471,11 @@ Signed-off-by: Sebastian Andrzej Siewior +{ + int ret; + -+ migrate_disable(); -+ ret = do_read_rt_trylock(rwlock); -+ if (ret) ++ ret = __read_rt_trylock(rwlock); ++ if (ret) { + rwlock_acquire_read(&rwlock->dep_map, 0, 1, _RET_IP_); -+ else -+ migrate_enable(); ++ migrate_disable(); ++ } + return ret; +} +EXPORT_SYMBOL(rt_read_trylock); @@ -519,50 +484,49 @@ Signed-off-by: Sebastian Andrzej Siewior +{ + int ret; + -+ migrate_disable(); -+ ret = do_write_rt_trylock(rwlock); -+ if (ret) ++ ret = __write_rt_trylock(rwlock); ++ if (ret) { + rwlock_acquire(&rwlock->dep_map, 0, 1, _RET_IP_); -+ else -+ migrate_enable(); ++ migrate_disable(); ++ } + return ret; +} +EXPORT_SYMBOL(rt_write_trylock); + +void __lockfunc rt_read_lock(rwlock_t *rwlock) +{ -+ migrate_disable(); + rwlock_acquire_read(&rwlock->dep_map, 0, 0, _RET_IP_); -+ do_read_rt_lock(rwlock); ++ __read_rt_lock(rwlock); ++ migrate_disable(); +} +EXPORT_SYMBOL(rt_read_lock); + +void __lockfunc rt_write_lock(rwlock_t *rwlock) +{ -+ migrate_disable(); + rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_); -+ do_write_rt_lock(rwlock); ++ __write_rt_lock(rwlock); ++ migrate_disable(); +} +EXPORT_SYMBOL(rt_write_lock); + +void __lockfunc rt_read_unlock(rwlock_t *rwlock) +{ -+ rwlock_release(&rwlock->dep_map, 1, _RET_IP_); -+ do_read_rt_unlock(rwlock); ++ rwlock_release(&rwlock->dep_map, _RET_IP_); + migrate_enable(); ++ __read_rt_unlock(rwlock); +} +EXPORT_SYMBOL(rt_read_unlock); + +void __lockfunc rt_write_unlock(rwlock_t *rwlock) +{ -+ rwlock_release(&rwlock->dep_map, 1, _RET_IP_); -+ do_write_rt_unlock(rwlock); ++ rwlock_release(&rwlock->dep_map, _RET_IP_); + migrate_enable(); ++ __write_rt_unlock(rwlock); +} +EXPORT_SYMBOL(rt_write_unlock); + +void __rt_rwlock_init(rwlock_t *rwlock, char *name, struct lock_class_key *key) +{ -+ do_rwlock_rt_init(rwlock, name, key); ++ __rwlock_biased_rt_init(rwlock, name, key); +} +EXPORT_SYMBOL(__rt_rwlock_init); diff --git a/kernel/patches-5.4.x-rt/0169-rtmutex-wire-up-RT-s-locking.patch b/kernel/patches-5.11.x-rt/0096-0020-locking-rtmutex-wire-up-RT-s-locking.patch similarity index 73% rename from kernel/patches-5.4.x-rt/0169-rtmutex-wire-up-RT-s-locking.patch rename to kernel/patches-5.11.x-rt/0096-0020-locking-rtmutex-wire-up-RT-s-locking.patch index dc1637113..70e588c76 100644 --- a/kernel/patches-5.4.x-rt/0169-rtmutex-wire-up-RT-s-locking.patch +++ b/kernel/patches-5.11.x-rt/0096-0020-locking-rtmutex-wire-up-RT-s-locking.patch @@ -1,30 +1,35 @@ From: Thomas Gleixner Date: Thu, 12 Oct 2017 17:31:14 +0200 -Subject: rtmutex: wire up RT's locking +Subject: [PATCH 20/22] locking/rtmutex: wire up RT's locking Signed-off-by: Thomas Gleixner Signed-off-by: Sebastian Andrzej Siewior --- - include/linux/mutex.h | 20 +++++++++++++------- - include/linux/rwsem.h | 12 ++++++++++++ - include/linux/spinlock.h | 12 +++++++++++- - include/linux/spinlock_api_smp.h | 4 +++- - include/linux/spinlock_types.h | 11 ++++++++--- - kernel/locking/Makefile | 10 +++++++--- - kernel/locking/rwsem.c | 7 +++++++ - kernel/locking/spinlock.c | 7 +++++++ - kernel/locking/spinlock_debug.c | 5 +++++ - 9 files changed, 73 insertions(+), 15 deletions(-) + include/linux/mutex.h | 26 ++++++++++++++++---------- + include/linux/rwsem.h | 12 ++++++++++++ + include/linux/spinlock.h | 12 +++++++++++- + include/linux/spinlock_api_smp.h | 4 +++- + include/linux/spinlock_types.h | 11 ++++++++--- + include/linux/spinlock_types_up.h | 2 +- + kernel/Kconfig.preempt | 1 + + kernel/locking/Makefile | 10 +++++++--- + kernel/locking/rwsem.c | 6 ++++++ + kernel/locking/spinlock.c | 7 +++++++ + kernel/locking/spinlock_debug.c | 5 +++++ + 11 files changed, 77 insertions(+), 19 deletions(-) --- a/include/linux/mutex.h +++ b/include/linux/mutex.h -@@ -22,6 +22,17 @@ +@@ -22,6 +22,20 @@ struct ww_acquire_ctx; +#ifdef CONFIG_DEBUG_LOCK_ALLOC -+# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \ -+ , .dep_map = { .name = #lockname } ++# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \ ++ , .dep_map = { \ ++ .name = #lockname, \ ++ .wait_type_inner = LD_WAIT_SLEEP, \ ++ } +#else +# define __DEP_MAP_MUTEX_INITIALIZER(lockname) +#endif @@ -36,13 +41,16 @@ Signed-off-by: Sebastian Andrzej Siewior /* * Simple, straightforward mutexes with strict semantics: * -@@ -108,13 +119,6 @@ do { \ +@@ -119,16 +133,6 @@ do { \ __mutex_init((mutex), #mutex, &__key); \ } while (0) -#ifdef CONFIG_DEBUG_LOCK_ALLOC --# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \ -- , .dep_map = { .name = #lockname } +-# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \ +- , .dep_map = { \ +- .name = #lockname, \ +- .wait_type_inner = LD_WAIT_SLEEP, \ +- } -#else -# define __DEP_MAP_MUTEX_INITIALIZER(lockname) -#endif @@ -50,7 +58,7 @@ Signed-off-by: Sebastian Andrzej Siewior #define __MUTEX_INITIALIZER(lockname) \ { .owner = ATOMIC_LONG_INIT(0) \ , .wait_lock = __SPIN_LOCK_UNLOCKED(lockname.wait_lock) \ -@@ -210,4 +214,6 @@ enum mutex_trylock_recursive_enum { +@@ -224,4 +228,6 @@ enum mutex_trylock_recursive_enum { extern /* __deprecated */ __must_check enum mutex_trylock_recursive_enum mutex_trylock_recursive(struct mutex *lock); @@ -71,7 +79,7 @@ Signed-off-by: Sebastian Andrzej Siewior #ifdef CONFIG_RWSEM_SPIN_ON_OWNER #include #endif -@@ -115,6 +120,13 @@ static inline int rwsem_is_contended(str +@@ -119,6 +124,13 @@ static inline int rwsem_is_contended(str return !list_empty(&sem->wait_list); } @@ -87,7 +95,7 @@ Signed-off-by: Sebastian Andrzej Siewior */ --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h -@@ -307,7 +307,11 @@ static inline void do_raw_spin_unlock(ra +@@ -309,7 +309,11 @@ static inline void do_raw_spin_unlock(ra }) /* Include rwlock functions */ @@ -100,7 +108,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* * Pull the _spin_*()/_read_*()/_write_*() functions/declarations: -@@ -318,6 +322,10 @@ static inline void do_raw_spin_unlock(ra +@@ -320,6 +324,10 @@ static inline void do_raw_spin_unlock(ra # include #endif @@ -111,7 +119,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* * Map the spin_lock functions to the raw variants for PREEMPT_RT=n */ -@@ -438,6 +446,8 @@ static __always_inline int spin_is_conte +@@ -454,6 +462,8 @@ static __always_inline int spin_is_conte #define assert_spin_locked(lock) assert_raw_spin_locked(&(lock)->rlock) @@ -151,6 +159,27 @@ Signed-off-by: Sebastian Andrzej Siewior +#endif #endif /* __LINUX_SPINLOCK_TYPES_H */ +--- a/include/linux/spinlock_types_up.h ++++ b/include/linux/spinlock_types_up.h +@@ -1,7 +1,7 @@ + #ifndef __LINUX_SPINLOCK_TYPES_UP_H + #define __LINUX_SPINLOCK_TYPES_UP_H + +-#ifndef __LINUX_SPINLOCK_TYPES_H ++#if !defined(__LINUX_SPINLOCK_TYPES_H) && !defined(__LINUX_RT_MUTEX_H) + # error "please don't include this file directly" + #endif + +--- a/kernel/Kconfig.preempt ++++ b/kernel/Kconfig.preempt +@@ -59,6 +59,7 @@ config PREEMPT_RT + bool "Fully Preemptible Kernel (Real-Time)" + depends on EXPERT && ARCH_SUPPORTS_RT + select PREEMPTION ++ select RT_MUTEXES + help + This option turns the kernel into a real-time kernel by replacing + various locking primitives (spinlocks, rwlocks, etc.) with --- a/kernel/locking/Makefile +++ b/kernel/locking/Makefile @@ -3,7 +3,7 @@ @@ -160,9 +189,9 @@ Signed-off-by: Sebastian Andrzej Siewior -obj-y += mutex.o semaphore.o rwsem.o percpu-rwsem.o +obj-y += semaphore.o rwsem.o percpu-rwsem.o - ifdef CONFIG_FUNCTION_TRACER - CFLAGS_REMOVE_lockdep.o = $(CC_FLAGS_FTRACE) -@@ -12,19 +12,23 @@ CFLAGS_REMOVE_mutex-debug.o = $(CC_FLAGS + # Avoid recursion lockdep -> KCSAN -> ... -> lockdep. + KCSAN_SANITIZE_lockdep.o := n +@@ -15,19 +15,23 @@ CFLAGS_REMOVE_mutex-debug.o = $(CC_FLAGS CFLAGS_REMOVE_rtmutex-debug.o = $(CC_FLAGS_FTRACE) endif @@ -198,15 +227,7 @@ Signed-off-by: Sebastian Andrzej Siewior #include "lock_events.h" /* -@@ -1332,6 +1333,7 @@ static struct rw_semaphore *rwsem_downgr - return sem; - } - -+ - /* - * lock for reading - */ -@@ -1482,6 +1484,7 @@ static inline void __downgrade_write(str +@@ -1343,6 +1344,7 @@ static inline void __downgrade_write(str if (tmp & RWSEM_FLAG_WAITERS) rwsem_downgrade_wake(sem); } @@ -214,36 +235,26 @@ Signed-off-by: Sebastian Andrzej Siewior /* * lock for reading -@@ -1613,6 +1616,7 @@ void _down_write_nest_lock(struct rw_sem - } - EXPORT_SYMBOL(_down_write_nest_lock); - -+#ifndef CONFIG_PREEMPT_RT - void down_read_non_owner(struct rw_semaphore *sem) +@@ -1506,7 +1508,9 @@ void down_read_non_owner(struct rw_semap { might_sleep(); -@@ -1620,6 +1624,7 @@ void down_read_non_owner(struct rw_semap + __down_read(sem); ++#ifndef CONFIG_PREEMPT_RT __rwsem_set_reader_owned(sem, NULL); ++#endif } EXPORT_SYMBOL(down_read_non_owner); -+#endif - void down_write_nested(struct rw_semaphore *sem, int subclass) - { -@@ -1644,11 +1649,13 @@ int __sched down_write_killable_nested(s - } - EXPORT_SYMBOL(down_write_killable_nested); +@@ -1535,7 +1539,9 @@ EXPORT_SYMBOL(down_write_killable_nested -+#ifndef CONFIG_PREEMPT_RT void up_read_non_owner(struct rw_semaphore *sem) { ++#ifndef CONFIG_PREEMPT_RT DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem); ++#endif __up_read(sem); } EXPORT_SYMBOL(up_read_non_owner); -+#endif - - #endif --- a/kernel/locking/spinlock.c +++ b/kernel/locking/spinlock.c @@ -124,8 +124,11 @@ void __lockfunc __raw_##op##_lock_bh(loc diff --git a/kernel/patches-5.4.x-rt/0170-rtmutex-add-ww_mutex-addon-for-mutex-rt.patch b/kernel/patches-5.11.x-rt/0097-0021-locking-rtmutex-add-ww_mutex-addon-for-mutex-rt.patch similarity index 81% rename from kernel/patches-5.4.x-rt/0170-rtmutex-add-ww_mutex-addon-for-mutex-rt.patch rename to kernel/patches-5.11.x-rt/0097-0021-locking-rtmutex-add-ww_mutex-addon-for-mutex-rt.patch index 6faa6263d..08c9e448c 100644 --- a/kernel/patches-5.4.x-rt/0170-rtmutex-add-ww_mutex-addon-for-mutex-rt.patch +++ b/kernel/patches-5.11.x-rt/0097-0021-locking-rtmutex-add-ww_mutex-addon-for-mutex-rt.patch @@ -1,14 +1,50 @@ From: Sebastian Andrzej Siewior Date: Thu, 12 Oct 2017 17:34:38 +0200 -Subject: rtmutex: add ww_mutex addon for mutex-rt +Subject: [PATCH 21/22] locking/rtmutex: add ww_mutex addon for mutex-rt Signed-off-by: Sebastian Andrzej Siewior --- - kernel/locking/rtmutex.c | 271 ++++++++++++++++++++++++++++++++++++++-- + include/linux/mutex.h | 8 - + include/linux/ww_mutex.h | 8 + + kernel/locking/rtmutex.c | 262 ++++++++++++++++++++++++++++++++++++++-- kernel/locking/rtmutex_common.h | 2 kernel/locking/rwsem-rt.c | 2 - 3 files changed, 261 insertions(+), 14 deletions(-) + 5 files changed, 262 insertions(+), 20 deletions(-) +--- a/include/linux/mutex.h ++++ b/include/linux/mutex.h +@@ -82,14 +82,6 @@ struct mutex { + struct ww_class; + struct ww_acquire_ctx; + +-struct ww_mutex { +- struct mutex base; +- struct ww_acquire_ctx *ctx; +-#ifdef CONFIG_DEBUG_MUTEXES +- struct ww_class *ww_class; +-#endif +-}; +- + /* + * This is the control structure for tasks blocked on mutex, + * which resides on the blocked task's kernel stack: +--- a/include/linux/ww_mutex.h ++++ b/include/linux/ww_mutex.h +@@ -28,6 +28,14 @@ struct ww_class { + unsigned int is_wait_die; + }; + ++struct ww_mutex { ++ struct mutex base; ++ struct ww_acquire_ctx *ctx; ++#ifdef CONFIG_DEBUG_MUTEXES ++ struct ww_class *ww_class; ++#endif ++}; ++ + struct ww_acquire_ctx { + struct task_struct *task; + unsigned long stamp; --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -24,6 +24,7 @@ @@ -19,7 +55,7 @@ Signed-off-by: Sebastian Andrzej Siewior #include "rtmutex_common.h" -@@ -1244,6 +1245,40 @@ EXPORT_SYMBOL(__rt_spin_lock_init); +@@ -1234,6 +1235,40 @@ EXPORT_SYMBOL(__rt_spin_lock_init); #endif /* PREEMPT_RT */ @@ -60,7 +96,7 @@ Signed-off-by: Sebastian Andrzej Siewior static inline int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, struct rt_mutex_waiter *waiter) -@@ -1522,7 +1557,8 @@ void rt_mutex_init_waiter(struct rt_mute +@@ -1512,7 +1547,8 @@ void rt_mutex_init_waiter(struct rt_mute static int __sched __rt_mutex_slowlock(struct rt_mutex *lock, int state, struct hrtimer_sleeper *timeout, @@ -70,7 +106,7 @@ Signed-off-by: Sebastian Andrzej Siewior { int ret = 0; -@@ -1540,6 +1576,12 @@ static int __sched +@@ -1530,6 +1566,12 @@ static int __sched break; } @@ -82,8 +118,8 @@ Signed-off-by: Sebastian Andrzej Siewior + raw_spin_unlock_irq(&lock->wait_lock); - debug_rt_mutex_print_deadlock(waiter); -@@ -1574,16 +1616,106 @@ static void rt_mutex_handle_deadlock(int + schedule(); +@@ -1558,16 +1600,106 @@ static void rt_mutex_handle_deadlock(int } } @@ -191,7 +227,7 @@ Signed-off-by: Sebastian Andrzej Siewior set_current_state(state); -@@ -1593,14 +1725,24 @@ int __sched rt_mutex_slowlock_locked(str +@@ -1577,14 +1709,24 @@ int __sched rt_mutex_slowlock_locked(str ret = task_blocks_on_rt_mutex(lock, waiter, current, chwalk); @@ -219,7 +255,7 @@ Signed-off-by: Sebastian Andrzej Siewior } /* -@@ -1617,7 +1759,8 @@ int __sched rt_mutex_slowlock_locked(str +@@ -1601,7 +1743,8 @@ int __sched rt_mutex_slowlock_locked(str static int __sched rt_mutex_slowlock(struct rt_mutex *lock, int state, struct hrtimer_sleeper *timeout, @@ -229,7 +265,7 @@ Signed-off-by: Sebastian Andrzej Siewior { struct rt_mutex_waiter waiter; unsigned long flags; -@@ -1635,7 +1778,8 @@ rt_mutex_slowlock(struct rt_mutex *lock, +@@ -1619,7 +1762,8 @@ rt_mutex_slowlock(struct rt_mutex *lock, */ raw_spin_lock_irqsave(&lock->wait_lock, flags); @@ -239,7 +275,7 @@ Signed-off-by: Sebastian Andrzej Siewior raw_spin_unlock_irqrestore(&lock->wait_lock, flags); -@@ -1765,29 +1909,33 @@ static bool __sched rt_mutex_slowunlock( +@@ -1749,14 +1893,16 @@ static bool __sched rt_mutex_slowunlock( */ static inline int rt_mutex_fastlock(struct rt_mutex *lock, int state, @@ -258,26 +294,7 @@ Signed-off-by: Sebastian Andrzej Siewior } static inline int - rt_mutex_timed_fastlock(struct rt_mutex *lock, int state, - struct hrtimer_sleeper *timeout, - enum rtmutex_chainwalk chwalk, -+ struct ww_acquire_ctx *ww_ctx, - int (*slowfn)(struct rt_mutex *lock, int state, - struct hrtimer_sleeper *timeout, -- enum rtmutex_chainwalk chwalk)) -+ enum rtmutex_chainwalk chwalk, -+ struct ww_acquire_ctx *ww_ctx)) - { - if (chwalk == RT_MUTEX_MIN_CHAINWALK && - likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) - return 0; - -- return slowfn(lock, state, timeout, chwalk); -+ return slowfn(lock, state, timeout, chwalk, ww_ctx); - } - - static inline int -@@ -1832,7 +1980,7 @@ rt_mutex_fastunlock(struct rt_mutex *loc +@@ -1801,7 +1947,7 @@ rt_mutex_fastunlock(struct rt_mutex *loc int __sched __rt_mutex_lock_state(struct rt_mutex *lock, int state) { might_sleep(); @@ -286,15 +303,7 @@ Signed-off-by: Sebastian Andrzej Siewior } /** -@@ -1952,6 +2100,7 @@ rt_mutex_timed_lock(struct rt_mutex *loc - mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_); - ret = rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout, - RT_MUTEX_MIN_CHAINWALK, -+ NULL, - rt_mutex_slowlock); - if (ret) - mutex_release(&lock->dep_map, 1, _RET_IP_); -@@ -2321,7 +2470,7 @@ int rt_mutex_wait_proxy_lock(struct rt_m +@@ -2245,7 +2391,7 @@ int rt_mutex_wait_proxy_lock(struct rt_m raw_spin_lock_irq(&lock->wait_lock); /* sleep on the mutex */ set_current_state(TASK_INTERRUPTIBLE); @@ -303,7 +312,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might * have to fix that up. -@@ -2391,3 +2540,99 @@ bool rt_mutex_cleanup_proxy_lock(struct +@@ -2315,3 +2461,97 @@ bool rt_mutex_cleanup_proxy_lock(struct return cleanup; } @@ -312,7 +321,7 @@ Signed-off-by: Sebastian Andrzej Siewior +ww_mutex_deadlock_injection(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) +{ +#ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH -+ unsigned tmp; ++ unsigned int tmp; + + if (ctx->deadlock_inject_countdown-- == 0) { + tmp = ctx->deadlock_inject_interval; @@ -347,7 +356,7 @@ Signed-off-by: Sebastian Andrzej Siewior + ret = rt_mutex_slowlock(&lock->base.lock, TASK_INTERRUPTIBLE, NULL, 0, + ctx); + if (ret) -+ mutex_release(&lock->base.dep_map, 1, _RET_IP_); ++ mutex_release(&lock->base.dep_map, _RET_IP_); + else if (!ret && ctx && ctx->acquired > 1) + return ww_mutex_deadlock_injection(lock, ctx); + @@ -367,7 +376,7 @@ Signed-off-by: Sebastian Andrzej Siewior + ret = rt_mutex_slowlock(&lock->base.lock, TASK_UNINTERRUPTIBLE, NULL, 0, + ctx); + if (ret) -+ mutex_release(&lock->base.dep_map, 1, _RET_IP_); ++ mutex_release(&lock->base.dep_map, _RET_IP_); + else if (!ret && ctx && ctx->acquired > 1) + return ww_mutex_deadlock_injection(lock, ctx); + @@ -377,13 +386,11 @@ Signed-off-by: Sebastian Andrzej Siewior + +void __sched ww_mutex_unlock(struct ww_mutex *lock) +{ -+ int nest = !!lock->ctx; -+ + /* + * The unlocking fastpath is the 0->1 transition from 'locked' + * into 'unlocked' state: + */ -+ if (nest) { ++ if (lock->ctx) { +#ifdef CONFIG_DEBUG_MUTEXES + DEBUG_LOCKS_WARN_ON(!lock->ctx->acquired); +#endif @@ -392,7 +399,7 @@ Signed-off-by: Sebastian Andrzej Siewior + lock->ctx = NULL; + } + -+ mutex_release(&lock->base.dep_map, nest, _RET_IP_); ++ mutex_release(&lock->base.dep_map, _RET_IP_); + __rt_mutex_unlock(&lock->base.lock); +} +EXPORT_SYMBOL(ww_mutex_unlock); @@ -405,7 +412,7 @@ Signed-off-by: Sebastian Andrzej Siewior +#endif --- a/kernel/locking/rtmutex_common.h +++ b/kernel/locking/rtmutex_common.h -@@ -165,6 +165,7 @@ extern void rt_mutex_postunlock(struct w +@@ -159,6 +159,7 @@ extern void rt_mutex_postunlock(struct w struct wake_q_head *wake_sleeper_q); /* RW semaphore special interface */ @@ -413,7 +420,7 @@ Signed-off-by: Sebastian Andrzej Siewior extern int __rt_mutex_lock_state(struct rt_mutex *lock, int state); extern int __rt_mutex_trylock(struct rt_mutex *lock); -@@ -172,6 +173,7 @@ extern void __rt_mutex_unlock(struct rt_ +@@ -166,6 +167,7 @@ extern void __rt_mutex_unlock(struct rt_ int __sched rt_mutex_slowlock_locked(struct rt_mutex *lock, int state, struct hrtimer_sleeper *timeout, enum rtmutex_chainwalk chwalk, @@ -423,7 +430,7 @@ Signed-off-by: Sebastian Andrzej Siewior struct rt_mutex_waiter *waiter, --- a/kernel/locking/rwsem-rt.c +++ b/kernel/locking/rwsem-rt.c -@@ -131,7 +131,7 @@ static int __sched __down_read_common(st +@@ -138,7 +138,7 @@ static int __sched __down_read_common(st */ rt_mutex_init_waiter(&waiter, false); ret = rt_mutex_slowlock_locked(m, state, NULL, RT_MUTEX_MIN_CHAINWALK, diff --git a/kernel/patches-5.11.x-rt/0098-0022-locking-rtmutex-Use-custom-scheduling-function-for-s.patch b/kernel/patches-5.11.x-rt/0098-0022-locking-rtmutex-Use-custom-scheduling-function-for-s.patch new file mode 100644 index 000000000..82b226d7e --- /dev/null +++ b/kernel/patches-5.11.x-rt/0098-0022-locking-rtmutex-Use-custom-scheduling-function-for-s.patch @@ -0,0 +1,224 @@ +From: Sebastian Andrzej Siewior +Date: Tue, 6 Oct 2020 13:07:17 +0200 +Subject: [PATCH 22/22] locking/rtmutex: Use custom scheduling function for + spin-schedule() + +PREEMPT_RT builds the rwsem, mutex, spinlock and rwlock typed locks on +top of a rtmutex lock. While blocked task->pi_blocked_on is set +(tsk_is_pi_blocked()) and task needs to schedule away while waiting. + +The schedule process must distinguish between blocking on a regular +sleeping lock (rwsem and mutex) and a RT-only sleeping lock (spinlock +and rwlock): +- rwsem and mutex must flush block requests (blk_schedule_flush_plug()) + even if blocked on a lock. This can not deadlock because this also + happens for non-RT. + There should be a warning if the scheduling point is within a RCU read + section. + +- spinlock and rwlock must not flush block requests. This will deadlock + if the callback attempts to acquire a lock which is already acquired. + Similarly to being preempted, there should be no warning if the + scheduling point is within a RCU read section. + +Add preempt_schedule_lock() which is invoked if scheduling is required +while blocking on a PREEMPT_RT-only sleeping lock. +Remove tsk_is_pi_blocked() from the scheduler path which is no longer +needed with the additional scheduler entry point. + +Signed-off-by: Sebastian Andrzej Siewior +--- + arch/arm64/include/asm/preempt.h | 3 +++ + arch/x86/include/asm/preempt.h | 3 +++ + include/asm-generic/preempt.h | 3 +++ + include/linux/sched/rt.h | 8 -------- + kernel/locking/rtmutex.c | 2 +- + kernel/locking/rwlock-rt.c | 2 +- + kernel/sched/core.c | 32 +++++++++++++++++++++----------- + 7 files changed, 32 insertions(+), 21 deletions(-) + +--- a/arch/arm64/include/asm/preempt.h ++++ b/arch/arm64/include/asm/preempt.h +@@ -81,6 +81,9 @@ static inline bool should_resched(int pr + + #ifdef CONFIG_PREEMPTION + void preempt_schedule(void); ++#ifdef CONFIG_PREEMPT_RT ++void preempt_schedule_lock(void); ++#endif + #define __preempt_schedule() preempt_schedule() + void preempt_schedule_notrace(void); + #define __preempt_schedule_notrace() preempt_schedule_notrace() +--- a/arch/x86/include/asm/preempt.h ++++ b/arch/x86/include/asm/preempt.h +@@ -103,6 +103,9 @@ static __always_inline bool should_resch + } + + #ifdef CONFIG_PREEMPTION ++#ifdef CONFIG_PREEMPT_RT ++ extern void preempt_schedule_lock(void); ++#endif + extern asmlinkage void preempt_schedule_thunk(void); + # define __preempt_schedule() \ + asm volatile ("call preempt_schedule_thunk" : ASM_CALL_CONSTRAINT) +--- a/include/asm-generic/preempt.h ++++ b/include/asm-generic/preempt.h +@@ -79,6 +79,9 @@ static __always_inline bool should_resch + } + + #ifdef CONFIG_PREEMPTION ++#ifdef CONFIG_PREEMPT_RT ++extern void preempt_schedule_lock(void); ++#endif + extern asmlinkage void preempt_schedule(void); + #define __preempt_schedule() preempt_schedule() + extern asmlinkage void preempt_schedule_notrace(void); +--- a/include/linux/sched/rt.h ++++ b/include/linux/sched/rt.h +@@ -39,20 +39,12 @@ static inline struct task_struct *rt_mut + } + extern void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task); + extern void rt_mutex_adjust_pi(struct task_struct *p); +-static inline bool tsk_is_pi_blocked(struct task_struct *tsk) +-{ +- return tsk->pi_blocked_on != NULL; +-} + #else + static inline struct task_struct *rt_mutex_get_top_task(struct task_struct *task) + { + return NULL; + } + # define rt_mutex_adjust_pi(p) do { } while (0) +-static inline bool tsk_is_pi_blocked(struct task_struct *tsk) +-{ +- return false; +-} + #endif + + extern void normalize_rt_tasks(void); +--- a/kernel/locking/rtmutex.c ++++ b/kernel/locking/rtmutex.c +@@ -1067,7 +1067,7 @@ void __sched rt_spin_lock_slowlock_locke + raw_spin_unlock_irqrestore(&lock->wait_lock, flags); + + if (top_waiter != waiter || adaptive_wait(lock, lock_owner)) +- schedule(); ++ preempt_schedule_lock(); + + raw_spin_lock_irqsave(&lock->wait_lock, flags); + +--- a/kernel/locking/rwlock-rt.c ++++ b/kernel/locking/rwlock-rt.c +@@ -211,7 +211,7 @@ static void __write_rt_lock(struct rt_rw + raw_spin_unlock_irqrestore(&m->wait_lock, flags); + + if (atomic_read(&lock->readers) != 0) +- schedule(); ++ preempt_schedule_lock(); + + raw_spin_lock_irqsave(&m->wait_lock, flags); + +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -5006,7 +5006,7 @@ pick_next_task(struct rq *rq, struct tas + * + * WARNING: must be called with preemption disabled! + */ +-static void __sched notrace __schedule(bool preempt) ++static void __sched notrace __schedule(bool preempt, bool spinning_lock) + { + struct task_struct *prev, *next; + unsigned long *switch_count; +@@ -5059,7 +5059,7 @@ static void __sched notrace __schedule(b + * - ptrace_{,un}freeze_traced() can change ->state underneath us. + */ + prev_state = prev->state; +- if (!preempt && prev_state) { ++ if ((!preempt || spinning_lock) && prev_state) { + if (signal_pending_state(prev_state, prev)) { + prev->state = TASK_RUNNING; + } else { +@@ -5143,7 +5143,7 @@ void __noreturn do_task_dead(void) + /* Tell freezer to ignore us: */ + current->flags |= PF_NOFREEZE; + +- __schedule(false); ++ __schedule(false, false); + BUG(); + + /* Avoid "noreturn function does return" - but don't continue if BUG() is a NOP: */ +@@ -5176,9 +5176,6 @@ static inline void sched_submit_work(str + preempt_enable_no_resched(); + } + +- if (tsk_is_pi_blocked(tsk)) +- return; +- + /* + * If we are going to sleep and we have plugged IO queued, + * make sure to submit it to avoid deadlocks. +@@ -5204,7 +5201,7 @@ asmlinkage __visible void __sched schedu + sched_submit_work(tsk); + do { + preempt_disable(); +- __schedule(false); ++ __schedule(false, false); + sched_preempt_enable_no_resched(); + } while (need_resched()); + sched_update_worker(tsk); +@@ -5232,7 +5229,7 @@ void __sched schedule_idle(void) + */ + WARN_ON_ONCE(current->state); + do { +- __schedule(false); ++ __schedule(false, false); + } while (need_resched()); + } + +@@ -5285,7 +5282,7 @@ static void __sched notrace preempt_sche + */ + preempt_disable_notrace(); + preempt_latency_start(1); +- __schedule(true); ++ __schedule(true, false); + preempt_latency_stop(1); + preempt_enable_no_resched_notrace(); + +@@ -5315,6 +5312,19 @@ asmlinkage __visible void __sched notrac + NOKPROBE_SYMBOL(preempt_schedule); + EXPORT_SYMBOL(preempt_schedule); + ++#ifdef CONFIG_PREEMPT_RT ++void __sched notrace preempt_schedule_lock(void) ++{ ++ do { ++ preempt_disable(); ++ __schedule(true, true); ++ sched_preempt_enable_no_resched(); ++ } while (need_resched()); ++} ++NOKPROBE_SYMBOL(preempt_schedule_lock); ++EXPORT_SYMBOL(preempt_schedule_lock); ++#endif ++ + /** + * preempt_schedule_notrace - preempt_schedule called by tracing + * +@@ -5358,7 +5368,7 @@ asmlinkage __visible void __sched notrac + * an infinite recursion. + */ + prev_ctx = exception_enter(); +- __schedule(true); ++ __schedule(true, false); + exception_exit(prev_ctx); + + preempt_latency_stop(1); +@@ -5387,7 +5397,7 @@ asmlinkage __visible void __sched preemp + do { + preempt_disable(); + local_irq_enable(); +- __schedule(true); ++ __schedule(true, false); + local_irq_disable(); + sched_preempt_enable_no_resched(); + } while (need_resched()); diff --git a/kernel/patches-5.4.x-rt/0087-signal-revert-ptrace-preempt-magic.patch b/kernel/patches-5.11.x-rt/0099-signal-revert-ptrace-preempt-magic.patch similarity index 93% rename from kernel/patches-5.4.x-rt/0087-signal-revert-ptrace-preempt-magic.patch rename to kernel/patches-5.11.x-rt/0099-signal-revert-ptrace-preempt-magic.patch index d673cb07a..7e95a5ae3 100644 --- a/kernel/patches-5.4.x-rt/0087-signal-revert-ptrace-preempt-magic.patch +++ b/kernel/patches-5.11.x-rt/0099-signal-revert-ptrace-preempt-magic.patch @@ -13,7 +13,7 @@ Signed-off-by: Thomas Gleixner --- a/kernel/signal.c +++ b/kernel/signal.c -@@ -2202,16 +2202,8 @@ static void ptrace_stop(int exit_code, i +@@ -2203,16 +2203,8 @@ static void ptrace_stop(int exit_code, i if (gstop_done && ptrace_reparented(current)) do_notify_parent_cldstop(current, false, why); diff --git a/kernel/patches-5.4.x-rt/0121-preempt-nort-rt-variants.patch b/kernel/patches-5.11.x-rt/0100-preempt-nort-rt-variants.patch similarity index 96% rename from kernel/patches-5.4.x-rt/0121-preempt-nort-rt-variants.patch rename to kernel/patches-5.11.x-rt/0100-preempt-nort-rt-variants.patch index 7ec8cc03f..816e55ea5 100644 --- a/kernel/patches-5.4.x-rt/0121-preempt-nort-rt-variants.patch +++ b/kernel/patches-5.11.x-rt/0100-preempt-nort-rt-variants.patch @@ -13,7 +13,7 @@ Signed-off-by: Thomas Gleixner --- a/include/linux/preempt.h +++ b/include/linux/preempt.h -@@ -187,7 +187,11 @@ do { \ +@@ -188,7 +188,11 @@ do { \ preempt_count_dec(); \ } while (0) @@ -26,7 +26,7 @@ Signed-off-by: Thomas Gleixner #define preemptible() (preempt_count() == 0 && !irqs_disabled()) -@@ -281,6 +285,18 @@ do { \ +@@ -282,6 +286,18 @@ do { \ set_preempt_need_resched(); \ } while (0) diff --git a/kernel/patches-5.4.x-rt/0122-mm-make-vmstat-rt-aware.patch b/kernel/patches-5.11.x-rt/0101-mm-make-vmstat-rt-aware.patch similarity index 78% rename from kernel/patches-5.4.x-rt/0122-mm-make-vmstat-rt-aware.patch rename to kernel/patches-5.11.x-rt/0101-mm-make-vmstat-rt-aware.patch index 1f303d965..12ff0598a 100644 --- a/kernel/patches-5.4.x-rt/0122-mm-make-vmstat-rt-aware.patch +++ b/kernel/patches-5.11.x-rt/0101-mm-make-vmstat-rt-aware.patch @@ -16,7 +16,7 @@ Signed-off-by: Thomas Gleixner --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h -@@ -54,7 +54,9 @@ DECLARE_PER_CPU(struct vm_event_state, v +@@ -63,7 +63,9 @@ DECLARE_PER_CPU(struct vm_event_state, v */ static inline void __count_vm_event(enum vm_event_item item) { @@ -26,7 +26,7 @@ Signed-off-by: Thomas Gleixner } static inline void count_vm_event(enum vm_event_item item) -@@ -64,7 +66,9 @@ static inline void count_vm_event(enum v +@@ -73,7 +75,9 @@ static inline void count_vm_event(enum v static inline void __count_vm_events(enum vm_event_item item, long delta) { @@ -54,15 +54,15 @@ Signed-off-by: Thomas Gleixner } EXPORT_SYMBOL(__mod_zone_page_state); -@@ -341,6 +343,7 @@ void __mod_node_page_state(struct pglist - long x; - long t; +@@ -346,6 +348,7 @@ void __mod_node_page_state(struct pglist + delta >>= PAGE_SHIFT; + } + preempt_disable_rt(); x = delta + __this_cpu_read(*p); t = __this_cpu_read(pcp->stat_threshold); -@@ -350,6 +353,7 @@ void __mod_node_page_state(struct pglist +@@ -355,6 +358,7 @@ void __mod_node_page_state(struct pglist x = 0; } __this_cpu_write(*p, x); @@ -70,7 +70,7 @@ Signed-off-by: Thomas Gleixner } EXPORT_SYMBOL(__mod_node_page_state); -@@ -382,6 +386,7 @@ void __inc_zone_state(struct zone *zone, +@@ -387,6 +391,7 @@ void __inc_zone_state(struct zone *zone, s8 __percpu *p = pcp->vm_stat_diff + item; s8 v, t; @@ -78,7 +78,7 @@ Signed-off-by: Thomas Gleixner v = __this_cpu_inc_return(*p); t = __this_cpu_read(pcp->stat_threshold); if (unlikely(v > t)) { -@@ -390,6 +395,7 @@ void __inc_zone_state(struct zone *zone, +@@ -395,6 +400,7 @@ void __inc_zone_state(struct zone *zone, zone_page_state_add(v + overstep, zone, item); __this_cpu_write(*p, -overstep); } @@ -86,15 +86,15 @@ Signed-off-by: Thomas Gleixner } void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item) -@@ -398,6 +404,7 @@ void __inc_node_state(struct pglist_data - s8 __percpu *p = pcp->vm_node_stat_diff + item; - s8 v, t; +@@ -405,6 +411,7 @@ void __inc_node_state(struct pglist_data + + VM_WARN_ON_ONCE(vmstat_item_in_bytes(item)); + preempt_disable_rt(); v = __this_cpu_inc_return(*p); t = __this_cpu_read(pcp->stat_threshold); if (unlikely(v > t)) { -@@ -406,6 +413,7 @@ void __inc_node_state(struct pglist_data +@@ -413,6 +420,7 @@ void __inc_node_state(struct pglist_data node_page_state_add(v + overstep, pgdat, item); __this_cpu_write(*p, -overstep); } @@ -102,7 +102,7 @@ Signed-off-by: Thomas Gleixner } void __inc_zone_page_state(struct page *page, enum zone_stat_item item) -@@ -426,6 +434,7 @@ void __dec_zone_state(struct zone *zone, +@@ -433,6 +441,7 @@ void __dec_zone_state(struct zone *zone, s8 __percpu *p = pcp->vm_stat_diff + item; s8 v, t; @@ -110,7 +110,7 @@ Signed-off-by: Thomas Gleixner v = __this_cpu_dec_return(*p); t = __this_cpu_read(pcp->stat_threshold); if (unlikely(v < - t)) { -@@ -434,6 +443,7 @@ void __dec_zone_state(struct zone *zone, +@@ -441,6 +450,7 @@ void __dec_zone_state(struct zone *zone, zone_page_state_add(v - overstep, zone, item); __this_cpu_write(*p, overstep); } @@ -118,15 +118,15 @@ Signed-off-by: Thomas Gleixner } void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item) -@@ -442,6 +452,7 @@ void __dec_node_state(struct pglist_data - s8 __percpu *p = pcp->vm_node_stat_diff + item; - s8 v, t; +@@ -451,6 +461,7 @@ void __dec_node_state(struct pglist_data + + VM_WARN_ON_ONCE(vmstat_item_in_bytes(item)); + preempt_disable_rt(); v = __this_cpu_dec_return(*p); t = __this_cpu_read(pcp->stat_threshold); if (unlikely(v < - t)) { -@@ -450,6 +461,7 @@ void __dec_node_state(struct pglist_data +@@ -459,6 +470,7 @@ void __dec_node_state(struct pglist_data node_page_state_add(v - overstep, pgdat, item); __this_cpu_write(*p, overstep); } diff --git a/kernel/patches-5.11.x-rt/0102-mm-memcontrol-Disable-preemption-in-__mod_memcg_lruv.patch b/kernel/patches-5.11.x-rt/0102-mm-memcontrol-Disable-preemption-in-__mod_memcg_lruv.patch new file mode 100644 index 000000000..d6b51d46c --- /dev/null +++ b/kernel/patches-5.11.x-rt/0102-mm-memcontrol-Disable-preemption-in-__mod_memcg_lruv.patch @@ -0,0 +1,37 @@ +From: Sebastian Andrzej Siewior +Date: Wed, 28 Oct 2020 18:15:32 +0100 +Subject: [PATCH] mm/memcontrol: Disable preemption in + __mod_memcg_lruvec_state() + +The callers expect disabled preemption/interrupts while invoking +__mod_memcg_lruvec_state(). This works mainline because a lock of +somekind is acquired. + +Use preempt_disable_rt() where per-CPU variables are accessed and a +stable pointer is expected. This is also done in __mod_zone_page_state() +for the same reason. + +Cc: stable-rt@vger.kernel.org +Signed-off-by: Sebastian Andrzej Siewior +--- + mm/memcontrol.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/mm/memcontrol.c ++++ b/mm/memcontrol.c +@@ -805,6 +805,7 @@ void __mod_memcg_lruvec_state(struct lru + pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec); + memcg = pn->memcg; + ++ preempt_disable_rt(); + /* Update memcg */ + __mod_memcg_state(memcg, idx, val); + +@@ -824,6 +825,7 @@ void __mod_memcg_lruvec_state(struct lru + x = 0; + } + __this_cpu_write(pn->lruvec_stat_cpu->count[idx], x); ++ preempt_enable_rt(); + } + + /** diff --git a/kernel/patches-5.11.x-rt/0103-0024-xfrm-Use-sequence-counter-with-associated-spinlock.patch b/kernel/patches-5.11.x-rt/0103-0024-xfrm-Use-sequence-counter-with-associated-spinlock.patch new file mode 100644 index 000000000..c14516969 --- /dev/null +++ b/kernel/patches-5.11.x-rt/0103-0024-xfrm-Use-sequence-counter-with-associated-spinlock.patch @@ -0,0 +1,59 @@ +From: "Ahmed S. Darwish" +Date: Wed, 10 Jun 2020 12:53:22 +0200 +Subject: [PATCH 24/24] xfrm: Use sequence counter with associated spinlock + +A sequence counter write side critical section must be protected by some +form of locking to serialize writers. A plain seqcount_t does not +contain the information of which lock must be held when entering a write +side critical section. + +Use the new seqcount_spinlock_t data type, which allows to associate a +spinlock with the sequence counter. This enables lockdep to verify that +the spinlock used for writer serialization is held when the write side +critical section is entered. + +If lockdep is disabled this lock association is compiled out and has +neither storage size nor runtime overhead. + +Upstream-status: The xfrm locking used for seqcoun writer serialization +appears to be broken. If that's the case, a proper fix will need to be +submitted upstream. (e.g. make the seqcount per network namespace?) + +Signed-off-by: Ahmed S. Darwish +Signed-off-by: Sebastian Andrzej Siewior +--- + net/xfrm/xfrm_state.c | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +--- a/net/xfrm/xfrm_state.c ++++ b/net/xfrm/xfrm_state.c +@@ -44,7 +44,7 @@ static void xfrm_state_gc_task(struct wo + */ + + static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024; +-static __read_mostly seqcount_t xfrm_state_hash_generation = SEQCNT_ZERO(xfrm_state_hash_generation); ++static __read_mostly seqcount_spinlock_t xfrm_state_hash_generation; + static struct kmem_cache *xfrm_state_cache __ro_after_init; + + static DECLARE_WORK(xfrm_state_gc_work, xfrm_state_gc_task); +@@ -139,6 +139,11 @@ static void xfrm_hash_resize(struct work + return; + } + ++ /* XXX - the locking which protects the sequence counter appears ++ * to be broken here. The sequence counter is global, but the ++ * spinlock used for the sequence counter write serialization is ++ * per network namespace... ++ */ + spin_lock_bh(&net->xfrm.xfrm_state_lock); + write_seqcount_begin(&xfrm_state_hash_generation); + +@@ -2666,6 +2671,8 @@ int __net_init xfrm_state_init(struct ne + net->xfrm.state_num = 0; + INIT_WORK(&net->xfrm.state_hash_work, xfrm_hash_resize); + spin_lock_init(&net->xfrm.xfrm_state_lock); ++ seqcount_spinlock_init(&xfrm_state_hash_generation, ++ &net->xfrm.xfrm_state_lock); + return 0; + + out_byspi: diff --git a/kernel/patches-5.11.x-rt/0104-u64_stats-Disable-preemption-on-32bit-UP-SMP-with-RT.patch b/kernel/patches-5.11.x-rt/0104-u64_stats-Disable-preemption-on-32bit-UP-SMP-with-RT.patch new file mode 100644 index 000000000..4e0d4d213 --- /dev/null +++ b/kernel/patches-5.11.x-rt/0104-u64_stats-Disable-preemption-on-32bit-UP-SMP-with-RT.patch @@ -0,0 +1,144 @@ +From: Sebastian Andrzej Siewior +Date: Mon, 17 Aug 2020 12:28:10 +0200 +Subject: [PATCH] u64_stats: Disable preemption on 32bit-UP/SMP with RT during + updates + +On RT the seqcount_t is required even on UP because the softirq can be +preempted. The IRQ handler is threaded so it is also preemptible. + +Disable preemption on 32bit-RT during value updates. There is no need to +disable interrupts on RT because the handler is run threaded. Therefore +disabling preemption is enough to guarantee that the update is not +interruped. + +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/u64_stats_sync.h | 42 +++++++++++++++++++++++++++-------------- + 1 file changed, 28 insertions(+), 14 deletions(-) + +--- a/include/linux/u64_stats_sync.h ++++ b/include/linux/u64_stats_sync.h +@@ -66,7 +66,7 @@ + #include + + struct u64_stats_sync { +-#if BITS_PER_LONG==32 && defined(CONFIG_SMP) ++#if BITS_PER_LONG==32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) + seqcount_t seq; + #endif + }; +@@ -117,22 +117,26 @@ static inline void u64_stats_inc(u64_sta + + static inline void u64_stats_init(struct u64_stats_sync *syncp) + { +-#if BITS_PER_LONG == 32 && defined(CONFIG_SMP) ++#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) + seqcount_init(&syncp->seq); + #endif + } + + static inline void u64_stats_update_begin(struct u64_stats_sync *syncp) + { +-#if BITS_PER_LONG==32 && defined(CONFIG_SMP) ++#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) ++ if (IS_ENABLED(CONFIG_PREEMPT_RT)) ++ preempt_disable(); + write_seqcount_begin(&syncp->seq); + #endif + } + + static inline void u64_stats_update_end(struct u64_stats_sync *syncp) + { +-#if BITS_PER_LONG==32 && defined(CONFIG_SMP) ++#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) + write_seqcount_end(&syncp->seq); ++ if (IS_ENABLED(CONFIG_PREEMPT_RT)) ++ preempt_enable(); + #endif + } + +@@ -141,8 +145,11 @@ u64_stats_update_begin_irqsave(struct u6 + { + unsigned long flags = 0; + +-#if BITS_PER_LONG==32 && defined(CONFIG_SMP) +- local_irq_save(flags); ++#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) ++ if (IS_ENABLED(CONFIG_PREEMPT_RT)) ++ preempt_disable(); ++ else ++ local_irq_save(flags); + write_seqcount_begin(&syncp->seq); + #endif + return flags; +@@ -152,15 +159,18 @@ static inline void + u64_stats_update_end_irqrestore(struct u64_stats_sync *syncp, + unsigned long flags) + { +-#if BITS_PER_LONG==32 && defined(CONFIG_SMP) ++#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) + write_seqcount_end(&syncp->seq); +- local_irq_restore(flags); ++ if (IS_ENABLED(CONFIG_PREEMPT_RT)) ++ preempt_enable(); ++ else ++ local_irq_restore(flags); + #endif + } + + static inline unsigned int __u64_stats_fetch_begin(const struct u64_stats_sync *syncp) + { +-#if BITS_PER_LONG==32 && defined(CONFIG_SMP) ++#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) + return read_seqcount_begin(&syncp->seq); + #else + return 0; +@@ -169,7 +179,7 @@ static inline unsigned int __u64_stats_f + + static inline unsigned int u64_stats_fetch_begin(const struct u64_stats_sync *syncp) + { +-#if BITS_PER_LONG==32 && !defined(CONFIG_SMP) ++#if BITS_PER_LONG == 32 && (!defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT_RT)) + preempt_disable(); + #endif + return __u64_stats_fetch_begin(syncp); +@@ -178,7 +188,7 @@ static inline unsigned int u64_stats_fet + static inline bool __u64_stats_fetch_retry(const struct u64_stats_sync *syncp, + unsigned int start) + { +-#if BITS_PER_LONG==32 && defined(CONFIG_SMP) ++#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)) + return read_seqcount_retry(&syncp->seq, start); + #else + return false; +@@ -188,7 +198,7 @@ static inline bool __u64_stats_fetch_ret + static inline bool u64_stats_fetch_retry(const struct u64_stats_sync *syncp, + unsigned int start) + { +-#if BITS_PER_LONG==32 && !defined(CONFIG_SMP) ++#if BITS_PER_LONG == 32 && (!defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT_RT)) + preempt_enable(); + #endif + return __u64_stats_fetch_retry(syncp, start); +@@ -202,7 +212,9 @@ static inline bool u64_stats_fetch_retry + */ + static inline unsigned int u64_stats_fetch_begin_irq(const struct u64_stats_sync *syncp) + { +-#if BITS_PER_LONG==32 && !defined(CONFIG_SMP) ++#if BITS_PER_LONG == 32 && defined(CONFIG_PREEMPT_RT) ++ preempt_disable(); ++#elif BITS_PER_LONG == 32 && !defined(CONFIG_SMP) + local_irq_disable(); + #endif + return __u64_stats_fetch_begin(syncp); +@@ -211,7 +223,9 @@ static inline unsigned int u64_stats_fet + static inline bool u64_stats_fetch_retry_irq(const struct u64_stats_sync *syncp, + unsigned int start) + { +-#if BITS_PER_LONG==32 && !defined(CONFIG_SMP) ++#if BITS_PER_LONG == 32 && defined(CONFIG_PREEMPT_RT) ++ preempt_enable(); ++#elif BITS_PER_LONG == 32 && !defined(CONFIG_SMP) + local_irq_enable(); + #endif + return __u64_stats_fetch_retry(syncp, start); diff --git a/kernel/patches-5.4.x-rt/0099-fs-dcache-use-swait_queue-instead-of-waitqueue.patch b/kernel/patches-5.11.x-rt/0105-fs-dcache-use-swait_queue-instead-of-waitqueue.patch similarity index 82% rename from kernel/patches-5.4.x-rt/0099-fs-dcache-use-swait_queue-instead-of-waitqueue.patch rename to kernel/patches-5.11.x-rt/0105-fs-dcache-use-swait_queue-instead-of-waitqueue.patch index 6a4910a93..b5f3eff05 100644 --- a/kernel/patches-5.4.x-rt/0099-fs-dcache-use-swait_queue-instead-of-waitqueue.patch +++ b/kernel/patches-5.11.x-rt/0105-fs-dcache-use-swait_queue-instead-of-waitqueue.patch @@ -14,16 +14,16 @@ Signed-off-by: Sebastian Andrzej Siewior fs/namei.c | 4 ++-- fs/nfs/dir.c | 4 ++-- fs/nfs/unlink.c | 4 ++-- - fs/proc/base.c | 2 +- + fs/proc/base.c | 3 ++- fs/proc/proc_sysctl.c | 2 +- include/linux/dcache.h | 4 ++-- include/linux/nfs_xdr.h | 2 +- kernel/sched/swait.c | 1 + - 12 files changed, 30 insertions(+), 26 deletions(-) + 12 files changed, 31 insertions(+), 26 deletions(-) --- a/fs/afs/dir_silly.c +++ b/fs/afs/dir_silly.c -@@ -202,7 +202,7 @@ int afs_silly_iput(struct dentry *dentry +@@ -236,7 +236,7 @@ int afs_silly_iput(struct dentry *dentry struct dentry *alias; int ret; @@ -34,7 +34,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c -@@ -80,7 +80,7 @@ cifs_prime_dcache(struct dentry *parent, +@@ -82,7 +82,7 @@ cifs_prime_dcache(struct dentry *parent, struct inode *inode; struct super_block *sb = parent->d_sb; struct cifs_sb_info *cifs_sb = CIFS_SB(sb); @@ -45,7 +45,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/fs/dcache.c +++ b/fs/dcache.c -@@ -2499,21 +2499,24 @@ static inline void end_dir_add(struct in +@@ -2525,21 +2525,24 @@ static inline void end_dir_add(struct in static void d_wait_lookup(struct dentry *dentry) { @@ -81,7 +81,7 @@ Signed-off-by: Sebastian Andrzej Siewior { unsigned int hash = name->hash; struct hlist_bl_head *b = in_lookup_hash(parent, hash); -@@ -2628,7 +2631,7 @@ void __d_lookup_done(struct dentry *dent +@@ -2654,7 +2657,7 @@ void __d_lookup_done(struct dentry *dent hlist_bl_lock(b); dentry->d_flags &= ~DCACHE_PAR_LOOKUP; __hlist_bl_del(&dentry->d_u.d_in_lookup_hash); @@ -103,7 +103,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* --- a/fs/namei.c +++ b/fs/namei.c -@@ -1638,7 +1638,7 @@ static struct dentry *__lookup_slow(cons +@@ -1520,7 +1520,7 @@ static struct dentry *__lookup_slow(cons { struct dentry *dentry, *old; struct inode *inode = dir->d_inode; @@ -112,7 +112,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* Don't go there if it's already dead */ if (unlikely(IS_DEADDIR(inode))) -@@ -3126,7 +3126,7 @@ static int lookup_open(struct nameidata +@@ -3016,7 +3016,7 @@ static struct dentry *lookup_open(struct struct dentry *dentry; int error, create_error = 0; umode_t mode = op->mode; @@ -120,19 +120,19 @@ Signed-off-by: Sebastian Andrzej Siewior + DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq); if (unlikely(IS_DEADDIR(dir_inode))) - return -ENOENT; + return ERR_PTR(-ENOENT); --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c -@@ -457,7 +457,7 @@ static - void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry) +@@ -635,7 +635,7 @@ void nfs_prime_dcache(struct dentry *par + unsigned long dir_verifier) { struct qstr filename = QSTR_INIT(entry->name, entry->len); - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); + DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq); struct dentry *dentry; struct dentry *alias; - struct inode *dir = d_inode(parent); -@@ -1517,7 +1517,7 @@ int nfs_atomic_open(struct inode *dir, s + struct inode *inode; +@@ -1859,7 +1859,7 @@ int nfs_atomic_open(struct inode *dir, s struct file *file, unsigned open_flags, umode_t mode) { @@ -152,7 +152,7 @@ Signed-off-by: Sebastian Andrzej Siewior #include #include -@@ -203,7 +203,7 @@ nfs_async_unlink(struct dentry *dentry, +@@ -180,7 +180,7 @@ nfs_async_unlink(struct dentry *dentry, data->cred = get_current_cred(); data->res.dir_attr = &data->dir_attr; @@ -163,7 +163,15 @@ Signed-off-by: Sebastian Andrzej Siewior spin_lock(&dentry->d_lock); --- a/fs/proc/base.c +++ b/fs/proc/base.c -@@ -1891,7 +1891,7 @@ bool proc_fill_cache(struct file *file, +@@ -96,6 +96,7 @@ + #include + #include + #include ++#include + #include + #include "internal.h" + #include "fd.h" +@@ -2038,7 +2039,7 @@ bool proc_fill_cache(struct file *file, child = d_hash_and_lookup(dir, &qname); if (!child) { @@ -174,7 +182,7 @@ Signed-off-by: Sebastian Andrzej Siewior goto end_instantiate; --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c -@@ -702,7 +702,7 @@ static bool proc_sys_fill_cache(struct f +@@ -683,7 +683,7 @@ static bool proc_sys_fill_cache(struct f child = d_lookup(dir, &qname); if (!child) { @@ -185,7 +193,7 @@ Signed-off-by: Sebastian Andrzej Siewior return false; --- a/include/linux/dcache.h +++ b/include/linux/dcache.h -@@ -106,7 +106,7 @@ struct dentry { +@@ -107,7 +107,7 @@ struct dentry { union { struct list_head d_lru; /* LRU list */ @@ -194,7 +202,7 @@ Signed-off-by: Sebastian Andrzej Siewior }; struct list_head d_child; /* child of parent list */ struct list_head d_subdirs; /* our children */ -@@ -236,7 +236,7 @@ extern void d_set_d_op(struct dentry *de +@@ -239,7 +239,7 @@ extern void d_set_d_op(struct dentry *de extern struct dentry * d_alloc(struct dentry *, const struct qstr *); extern struct dentry * d_alloc_anon(struct super_block *); extern struct dentry * d_alloc_parallel(struct dentry *, const struct qstr *, @@ -205,7 +213,7 @@ Signed-off-by: Sebastian Andrzej Siewior extern struct dentry * d_exact_alias(struct dentry *, struct inode *); --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h -@@ -1594,7 +1594,7 @@ struct nfs_unlinkdata { +@@ -1684,7 +1684,7 @@ struct nfs_unlinkdata { struct nfs_removeargs args; struct nfs_removeres res; struct dentry *dentry; @@ -216,7 +224,7 @@ Signed-off-by: Sebastian Andrzej Siewior long timeout; --- a/kernel/sched/swait.c +++ b/kernel/sched/swait.c -@@ -51,6 +51,7 @@ void swake_up_all(struct swait_queue_hea +@@ -64,6 +64,7 @@ void swake_up_all(struct swait_queue_hea struct swait_queue *curr; LIST_HEAD(tmp); diff --git a/kernel/patches-5.4.x-rt/0095-fs-dcache-disable-preemption-on-i_dir_seq-s-write-si.patch b/kernel/patches-5.11.x-rt/0106-fs-dcache-disable-preemption-on-i_dir_seq-s-write-si.patch similarity index 87% rename from kernel/patches-5.4.x-rt/0095-fs-dcache-disable-preemption-on-i_dir_seq-s-write-si.patch rename to kernel/patches-5.11.x-rt/0106-fs-dcache-disable-preemption-on-i_dir_seq-s-write-si.patch index 04b329571..c1e2d6122 100644 --- a/kernel/patches-5.4.x-rt/0095-fs-dcache-disable-preemption-on-i_dir_seq-s-write-si.patch +++ b/kernel/patches-5.11.x-rt/0106-fs-dcache-disable-preemption-on-i_dir_seq-s-write-si.patch @@ -22,7 +22,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/fs/dcache.c +++ b/fs/dcache.c -@@ -2482,9 +2482,10 @@ EXPORT_SYMBOL(d_rehash); +@@ -2510,9 +2510,10 @@ EXPORT_SYMBOL(d_rehash); static inline unsigned start_dir_add(struct inode *dir) { @@ -35,7 +35,7 @@ Signed-off-by: Sebastian Andrzej Siewior return n; cpu_relax(); } -@@ -2492,7 +2493,8 @@ static inline unsigned start_dir_add(str +@@ -2520,7 +2521,8 @@ static inline unsigned start_dir_add(str static inline void end_dir_add(struct inode *dir, unsigned n) { @@ -45,7 +45,7 @@ Signed-off-by: Sebastian Andrzej Siewior } static void d_wait_lookup(struct dentry *dentry) -@@ -2525,7 +2527,7 @@ struct dentry *d_alloc_parallel(struct d +@@ -2556,7 +2558,7 @@ struct dentry *d_alloc_parallel(struct d retry: rcu_read_lock(); @@ -54,7 +54,7 @@ Signed-off-by: Sebastian Andrzej Siewior r_seq = read_seqbegin(&rename_lock); dentry = __d_lookup_rcu(parent, name, &d_seq); if (unlikely(dentry)) { -@@ -2553,7 +2555,7 @@ struct dentry *d_alloc_parallel(struct d +@@ -2584,7 +2586,7 @@ struct dentry *d_alloc_parallel(struct d } hlist_bl_lock(b); @@ -66,7 +66,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/fs/inode.c +++ b/fs/inode.c @@ -157,7 +157,7 @@ int inode_init_always(struct super_block - inode->i_bdev = NULL; + inode->i_pipe = NULL; inode->i_cdev = NULL; inode->i_link = NULL; - inode->i_dir_seq = 0; @@ -76,8 +76,8 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/include/linux/fs.h +++ b/include/linux/fs.h -@@ -717,7 +717,7 @@ struct inode { - struct block_device *i_bdev; +@@ -698,7 +698,7 @@ struct inode { + struct pipe_inode_info *i_pipe; struct cdev *i_cdev; char *i_link; - unsigned i_dir_seq; diff --git a/kernel/patches-5.4.x-rt/0091-net-Qdisc-use-a-seqlock-instead-seqcount.patch b/kernel/patches-5.11.x-rt/0107-net-Qdisc-use-a-seqlock-instead-seqcount.patch similarity index 81% rename from kernel/patches-5.4.x-rt/0091-net-Qdisc-use-a-seqlock-instead-seqcount.patch rename to kernel/patches-5.11.x-rt/0107-net-Qdisc-use-a-seqlock-instead-seqcount.patch index 49106ae5d..77172ce8a 100644 --- a/kernel/patches-5.4.x-rt/0091-net-Qdisc-use-a-seqlock-instead-seqcount.patch +++ b/kernel/patches-5.11.x-rt/0107-net-Qdisc-use-a-seqlock-instead-seqcount.patch @@ -9,35 +9,16 @@ the lock while writer is active. Signed-off-by: Sebastian Andrzej Siewior --- - include/linux/seqlock.h | 9 +++++++++ include/net/gen_stats.h | 11 ++++++----- - include/net/net_seq_lock.h | 15 +++++++++++++++ + include/net/net_seq_lock.h | 24 ++++++++++++++++++++++++ include/net/sch_generic.h | 19 +++++++++++++++++-- net/core/gen_estimator.c | 6 +++--- net/core/gen_stats.c | 12 ++++++------ net/sched/sch_api.c | 2 +- - net/sched/sch_generic.c | 13 +++++++++++++ - 8 files changed, 70 insertions(+), 17 deletions(-) + net/sched/sch_generic.c | 10 ++++++++++ + 7 files changed, 67 insertions(+), 17 deletions(-) create mode 100644 include/net/net_seq_lock.h ---- a/include/linux/seqlock.h -+++ b/include/linux/seqlock.h -@@ -482,6 +482,15 @@ static inline void write_seqlock(seqlock - __raw_write_seqcount_begin(&sl->seqcount); - } - -+static inline int try_write_seqlock(seqlock_t *sl) -+{ -+ if (spin_trylock(&sl->lock)) { -+ __raw_write_seqcount_begin(&sl->seqcount); -+ return 1; -+ } -+ return 0; -+} -+ - static inline void write_sequnlock(seqlock_t *sl) - { - __raw_write_seqcount_end(&sl->seqcount); --- a/include/net/gen_stats.h +++ b/include/net/gen_stats.h @@ -6,6 +6,7 @@ @@ -46,9 +27,9 @@ Signed-off-by: Sebastian Andrzej Siewior #include +#include - struct gnet_stats_basic_cpu { - struct gnet_stats_basic_packed bstats; -@@ -36,15 +37,15 @@ int gnet_stats_start_copy_compat(struct + /* Note: this used to be in include/uapi/linux/gen_stats.h */ + struct gnet_stats_basic_packed { +@@ -42,15 +43,15 @@ int gnet_stats_start_copy_compat(struct spinlock_t *lock, struct gnet_dump *d, int padattr); @@ -67,7 +48,7 @@ Signed-off-by: Sebastian Andrzej Siewior struct gnet_dump *d, struct gnet_stats_basic_cpu __percpu *cpu, struct gnet_stats_basic_packed *b); -@@ -64,13 +65,13 @@ int gen_new_estimator(struct gnet_stats_ +@@ -70,13 +71,13 @@ int gen_new_estimator(struct gnet_stats_ struct gnet_stats_basic_cpu __percpu *cpu_bstats, struct net_rate_estimator __rcu **rate_est, spinlock_t *lock, @@ -85,7 +66,7 @@ Signed-off-by: Sebastian Andrzej Siewior struct gnet_stats_rate_est64 *sample); --- /dev/null +++ b/include/net/net_seq_lock.h -@@ -0,0 +1,15 @@ +@@ -0,0 +1,24 @@ +#ifndef __NET_NET_SEQ_LOCK_H__ +#define __NET_NET_SEQ_LOCK_H__ + @@ -94,6 +75,15 @@ Signed-off-by: Sebastian Andrzej Siewior +# define net_seq_begin(__r) read_seqbegin(__r) +# define net_seq_retry(__r, __s) read_seqretry(__r, __s) + ++static inline int try_write_seqlock(seqlock_t *sl) ++{ ++ if (spin_trylock(&sl->lock)) { ++ write_seqcount_begin(&sl->seqcount); ++ return 1; ++ } ++ return 0; ++} ++ +#else +# define net_seqlock_t seqcount_t +# define net_seq_begin(__r) read_seqcount_begin(__r) @@ -120,7 +110,7 @@ Signed-off-by: Sebastian Andrzej Siewior struct gnet_stats_queue qstats; unsigned long state; struct Qdisc *next_sched; -@@ -138,7 +139,11 @@ static inline bool qdisc_is_running(stru +@@ -141,7 +142,11 @@ static inline bool qdisc_is_running(stru { if (qdisc->flags & TCQ_F_NOLOCK) return spin_is_locked(&qdisc->seqlock); @@ -132,7 +122,7 @@ Signed-off-by: Sebastian Andrzej Siewior } static inline bool qdisc_is_percpu_stats(const struct Qdisc *q) -@@ -162,17 +167,27 @@ static inline bool qdisc_run_begin(struc +@@ -165,17 +170,27 @@ static inline bool qdisc_run_begin(struc } else if (qdisc_is_running(qdisc)) { return false; } @@ -160,7 +150,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (qdisc->flags & TCQ_F_NOLOCK) spin_unlock(&qdisc->seqlock); } -@@ -541,7 +556,7 @@ static inline spinlock_t *qdisc_root_sle +@@ -538,7 +553,7 @@ static inline spinlock_t *qdisc_root_sle return qdisc_lock(root); } @@ -189,7 +179,7 @@ Signed-off-by: Sebastian Andrzej Siewior struct nlattr *opt) { struct gnet_estimator *parm = nla_data(opt); -@@ -223,7 +223,7 @@ int gen_replace_estimator(struct gnet_st +@@ -226,7 +226,7 @@ int gen_replace_estimator(struct gnet_st struct gnet_stats_basic_cpu __percpu *cpu_bstats, struct net_rate_estimator __rcu **rate_est, spinlock_t *lock, @@ -200,7 +190,7 @@ Signed-off-by: Sebastian Andrzej Siewior lock, running, opt); --- a/net/core/gen_stats.c +++ b/net/core/gen_stats.c -@@ -138,7 +138,7 @@ static void +@@ -137,7 +137,7 @@ static void } void @@ -209,7 +199,7 @@ Signed-off-by: Sebastian Andrzej Siewior struct gnet_stats_basic_packed *bstats, struct gnet_stats_basic_cpu __percpu *cpu, struct gnet_stats_basic_packed *b) -@@ -151,15 +151,15 @@ void +@@ -150,15 +150,15 @@ void } do { if (running) @@ -228,7 +218,7 @@ Signed-off-by: Sebastian Andrzej Siewior struct gnet_dump *d, struct gnet_stats_basic_cpu __percpu *cpu, struct gnet_stats_basic_packed *b, -@@ -200,7 +200,7 @@ static int +@@ -204,7 +204,7 @@ static int * if the room in the socket buffer was not sufficient. */ int @@ -237,7 +227,7 @@ Signed-off-by: Sebastian Andrzej Siewior struct gnet_dump *d, struct gnet_stats_basic_cpu __percpu *cpu, struct gnet_stats_basic_packed *b) -@@ -224,7 +224,7 @@ EXPORT_SYMBOL(gnet_stats_copy_basic); +@@ -228,7 +228,7 @@ EXPORT_SYMBOL(gnet_stats_copy_basic); * if the room in the socket buffer was not sufficient. */ int @@ -248,7 +238,7 @@ Signed-off-by: Sebastian Andrzej Siewior struct gnet_stats_basic_packed *b) --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c -@@ -1248,7 +1248,7 @@ static struct Qdisc *qdisc_create(struct +@@ -1258,7 +1258,7 @@ static struct Qdisc *qdisc_create(struct rcu_assign_pointer(sch->stab, stab); } if (tca[TCA_RATE]) { @@ -259,7 +249,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (sch->flags & TCQ_F_MQROOT) { --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c -@@ -557,7 +557,11 @@ struct Qdisc noop_qdisc = { +@@ -553,7 +553,11 @@ struct Qdisc noop_qdisc = { .ops = &noop_qdisc_ops, .q.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock), .dev_queue = &noop_netdev_queue, @@ -271,28 +261,19 @@ Signed-off-by: Sebastian Andrzej Siewior .busylock = __SPIN_LOCK_UNLOCKED(noop_qdisc.busylock), .gso_skb = { .next = (struct sk_buff *)&noop_qdisc.gso_skb, -@@ -853,7 +857,11 @@ struct Qdisc *qdisc_alloc(struct netdev_ - spin_lock_init(&sch->busylock); - /* seqlock has the same scope of busylock, for NOLOCK qdisc */ - spin_lock_init(&sch->seqlock); +@@ -845,9 +849,15 @@ struct Qdisc *qdisc_alloc(struct netdev_ + lockdep_set_class(&sch->busylock, + dev->qdisc_tx_busylock ?: &qdisc_tx_busylock); + +#ifdef CONFIG_PREEMPT_RT + seqlock_init(&sch->running); ++ lockdep_set_class(&sch->running.lock, ++ dev->qdisc_running_key ?: &qdisc_running_key); +#else seqcount_init(&sch->running); + lockdep_set_class(&sch->running, + dev->qdisc_running_key ?: &qdisc_running_key); +#endif sch->ops = ops; sch->flags = ops->static_flags; -@@ -867,7 +875,12 @@ struct Qdisc *qdisc_alloc(struct netdev_ - if (sch != &noop_qdisc) { - lockdep_set_class(&sch->busylock, &dev->qdisc_tx_busylock_key); - lockdep_set_class(&sch->seqlock, &dev->qdisc_tx_busylock_key); -+#ifdef CONFIG_PREEMPT_RT -+ lockdep_set_class(&sch->running.seqcount, &dev->qdisc_running_key); -+ lockdep_set_class(&sch->running.lock, &dev->qdisc_running_key); -+#else - lockdep_set_class(&sch->running, &dev->qdisc_running_key); -+#endif - } - - return sch; diff --git a/kernel/patches-5.11.x-rt/0108-net-Properly-annotate-the-try-lock-for-the-seqlock.patch b/kernel/patches-5.11.x-rt/0108-net-Properly-annotate-the-try-lock-for-the-seqlock.patch new file mode 100644 index 000000000..1a0afd2b8 --- /dev/null +++ b/kernel/patches-5.11.x-rt/0108-net-Properly-annotate-the-try-lock-for-the-seqlock.patch @@ -0,0 +1,61 @@ +From: Sebastian Andrzej Siewior +Date: Tue, 8 Sep 2020 16:57:11 +0200 +Subject: [PATCH] net: Properly annotate the try-lock for the seqlock + +In patch + ("net/Qdisc: use a seqlock instead seqcount") + +the seqcount has been replaced with a seqlock to allow to reader to +boost the preempted writer. +The try_write_seqlock() acquired the lock with a try-lock but the +seqcount annotation was "lock". + +Opencode write_seqcount_t_begin() and use the try-lock annotation for +lockdep. + +Reported-by: Mike Galbraith +Cc: stable-rt@vger.kernel.org +Signed-off-by: Sebastian Andrzej Siewior +--- + include/net/net_seq_lock.h | 9 --------- + include/net/sch_generic.h | 10 +++++++++- + 2 files changed, 9 insertions(+), 10 deletions(-) + +--- a/include/net/net_seq_lock.h ++++ b/include/net/net_seq_lock.h +@@ -6,15 +6,6 @@ + # define net_seq_begin(__r) read_seqbegin(__r) + # define net_seq_retry(__r, __s) read_seqretry(__r, __s) + +-static inline int try_write_seqlock(seqlock_t *sl) +-{ +- if (spin_trylock(&sl->lock)) { +- write_seqcount_begin(&sl->seqcount); +- return 1; +- } +- return 0; +-} +- + #else + # define net_seqlock_t seqcount_t + # define net_seq_begin(__r) read_seqcount_begin(__r) +--- a/include/net/sch_generic.h ++++ b/include/net/sch_generic.h +@@ -171,8 +171,16 @@ static inline bool qdisc_run_begin(struc + return false; + } + #ifdef CONFIG_PREEMPT_RT +- if (try_write_seqlock(&qdisc->running)) ++ if (spin_trylock(&qdisc->running.lock)) { ++ seqcount_t *s = &qdisc->running.seqcount.seqcount; ++ /* ++ * Variant of write_seqcount_t_begin() telling lockdep that a ++ * trylock was attempted. ++ */ ++ do_raw_write_seqcount_begin(s); ++ seqcount_acquire(&s->dep_map, 0, 1, _RET_IP_); + return true; ++ } + return false; + #else + /* Variant of write_seqcount_begin() telling lockdep a trylock diff --git a/kernel/patches-5.4.x-rt/0100-kconfig-disable-a-few-options-rt.patch b/kernel/patches-5.11.x-rt/0109-kconfig-disable-a-few-options-rt.patch similarity index 90% rename from kernel/patches-5.4.x-rt/0100-kconfig-disable-a-few-options-rt.patch rename to kernel/patches-5.11.x-rt/0109-kconfig-disable-a-few-options-rt.patch index dde7bbd0d..0b34219d5 100644 --- a/kernel/patches-5.4.x-rt/0100-kconfig-disable-a-few-options-rt.patch +++ b/kernel/patches-5.11.x-rt/0109-kconfig-disable-a-few-options-rt.patch @@ -12,7 +12,7 @@ Signed-off-by: Thomas Gleixner --- a/arch/Kconfig +++ b/arch/Kconfig -@@ -31,6 +31,7 @@ config OPROFILE +@@ -37,6 +37,7 @@ config OPROFILE tristate "OProfile system profiling" depends on PROFILING depends on HAVE_OPROFILE @@ -22,7 +22,7 @@ Signed-off-by: Thomas Gleixner help --- a/mm/Kconfig +++ b/mm/Kconfig -@@ -369,7 +369,7 @@ config NOMMU_INITIAL_TRIM_EXCESS +@@ -387,7 +387,7 @@ config NOMMU_INITIAL_TRIM_EXCESS config TRANSPARENT_HUGEPAGE bool "Transparent Hugepage Support" diff --git a/kernel/patches-5.4.x-rt/0101-mm-disable-sloub-rt.patch b/kernel/patches-5.11.x-rt/0110-mm-disable-sloub-rt.patch similarity index 95% rename from kernel/patches-5.4.x-rt/0101-mm-disable-sloub-rt.patch rename to kernel/patches-5.11.x-rt/0110-mm-disable-sloub-rt.patch index 9f898aeee..4eb445af3 100644 --- a/kernel/patches-5.4.x-rt/0101-mm-disable-sloub-rt.patch +++ b/kernel/patches-5.11.x-rt/0110-mm-disable-sloub-rt.patch @@ -22,7 +22,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/init/Kconfig +++ b/init/Kconfig -@@ -1785,6 +1785,7 @@ choice +@@ -1895,6 +1895,7 @@ choice config SLAB bool "SLAB" @@ -30,7 +30,7 @@ Signed-off-by: Sebastian Andrzej Siewior select HAVE_HARDENED_USERCOPY_ALLOCATOR help The regular slab allocator that is established and known to work -@@ -1805,6 +1806,7 @@ config SLUB +@@ -1915,6 +1916,7 @@ config SLUB config SLOB depends on EXPERT bool "SLOB (Simple Allocator)" diff --git a/kernel/patches-5.4.x-rt/0103-sched-disable-rt-group-sched-on-rt.patch b/kernel/patches-5.11.x-rt/0111-sched-disable-rt-group-sched-on-rt.patch similarity index 95% rename from kernel/patches-5.4.x-rt/0103-sched-disable-rt-group-sched-on-rt.patch rename to kernel/patches-5.11.x-rt/0111-sched-disable-rt-group-sched-on-rt.patch index 3d3762448..f0121fca6 100644 --- a/kernel/patches-5.4.x-rt/0103-sched-disable-rt-group-sched-on-rt.patch +++ b/kernel/patches-5.11.x-rt/0111-sched-disable-rt-group-sched-on-rt.patch @@ -18,7 +18,7 @@ Signed-off-by: Thomas Gleixner --- a/init/Kconfig +++ b/init/Kconfig -@@ -904,6 +904,7 @@ config CFS_BANDWIDTH +@@ -968,6 +968,7 @@ config CFS_BANDWIDTH config RT_GROUP_SCHED bool "Group scheduling for SCHED_RR/FIFO" depends on CGROUP_SCHED diff --git a/kernel/patches-5.4.x-rt/0104-net_disable_NET_RX_BUSY_POLL.patch b/kernel/patches-5.11.x-rt/0112-net_disable_NET_RX_BUSY_POLL.patch similarity index 96% rename from kernel/patches-5.4.x-rt/0104-net_disable_NET_RX_BUSY_POLL.patch rename to kernel/patches-5.11.x-rt/0112-net_disable_NET_RX_BUSY_POLL.patch index 3f7d3d6fa..1b52d019f 100644 --- a/kernel/patches-5.4.x-rt/0104-net_disable_NET_RX_BUSY_POLL.patch +++ b/kernel/patches-5.11.x-rt/0112-net_disable_NET_RX_BUSY_POLL.patch @@ -26,7 +26,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/net/Kconfig +++ b/net/Kconfig -@@ -278,7 +278,7 @@ config CGROUP_NET_CLASSID +@@ -282,7 +282,7 @@ config CGROUP_NET_CLASSID config NET_RX_BUSY_POLL bool diff --git a/kernel/patches-5.4.x-rt/0106-efi-Disable-runtime-services-on-RT.patch b/kernel/patches-5.11.x-rt/0113-efi-Disable-runtime-services-on-RT.patch similarity index 96% rename from kernel/patches-5.4.x-rt/0106-efi-Disable-runtime-services-on-RT.patch rename to kernel/patches-5.11.x-rt/0113-efi-Disable-runtime-services-on-RT.patch index 88851ac57..2eecd1a8d 100644 --- a/kernel/patches-5.4.x-rt/0106-efi-Disable-runtime-services-on-RT.patch +++ b/kernel/patches-5.11.x-rt/0113-efi-Disable-runtime-services-on-RT.patch @@ -28,7 +28,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c -@@ -68,7 +68,7 @@ struct mm_struct efi_mm = { +@@ -66,7 +66,7 @@ struct mm_struct efi_mm = { struct workqueue_struct *efi_rts_wq; diff --git a/kernel/patches-5.4.x-rt/0107-efi-Allow-efi-runtime.patch b/kernel/patches-5.11.x-rt/0114-efi-Allow-efi-runtime.patch similarity index 81% rename from kernel/patches-5.4.x-rt/0107-efi-Allow-efi-runtime.patch rename to kernel/patches-5.11.x-rt/0114-efi-Allow-efi-runtime.patch index cde5d913f..97ac4fdcc 100644 --- a/kernel/patches-5.4.x-rt/0107-efi-Allow-efi-runtime.patch +++ b/kernel/patches-5.11.x-rt/0114-efi-Allow-efi-runtime.patch @@ -13,13 +13,13 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c -@@ -94,6 +94,9 @@ static int __init parse_efi_cmdline(char +@@ -97,6 +97,9 @@ static int __init parse_efi_cmdline(char if (parse_option_str(str, "noruntime")) disable_runtime = true; + if (parse_option_str(str, "runtime")) + disable_runtime = false; + - return 0; - } - early_param("efi", parse_efi_cmdline); + if (parse_option_str(str, "nosoftreserve")) + set_bit(EFI_MEM_NO_SOFT_RESERVE, &efi.flags); + diff --git a/kernel/patches-5.11.x-rt/0115-rt-local-irq-lock.patch b/kernel/patches-5.11.x-rt/0115-rt-local-irq-lock.patch new file mode 100644 index 000000000..5a2744d22 --- /dev/null +++ b/kernel/patches-5.11.x-rt/0115-rt-local-irq-lock.patch @@ -0,0 +1,199 @@ +Subject: rt: Add local irq locks +From: Thomas Gleixner +Date: Mon, 20 Jun 2011 09:03:47 +0200 + +Introduce locallock. For !RT this maps to preempt_disable()/ +local_irq_disable() so there is not much that changes. For RT this will +map to a spinlock. This makes preemption possible and locked "ressource" +gets the lockdep anotation it wouldn't have otherwise. The locks are +recursive for owner == current. Also, all locks user migrate_disable() +which ensures that the task is not migrated to another CPU while the lock +is held and the owner is preempted. + +Signed-off-by: Thomas Gleixner +--- + include/linux/local_lock_internal.h | 126 ++++++++++++++++++++++++++++++++---- + 1 file changed, 113 insertions(+), 13 deletions(-) + +--- a/include/linux/local_lock_internal.h ++++ b/include/linux/local_lock_internal.h +@@ -7,33 +7,90 @@ + #include + + typedef struct { +-#ifdef CONFIG_DEBUG_LOCK_ALLOC ++#ifdef CONFIG_PREEMPT_RT ++ spinlock_t lock; ++ struct task_struct *owner; ++ int nestcnt; ++ ++#elif defined(CONFIG_DEBUG_LOCK_ALLOC) + struct lockdep_map dep_map; + struct task_struct *owner; + #endif + } local_lock_t; + +-#ifdef CONFIG_DEBUG_LOCK_ALLOC +-# define LL_DEP_MAP_INIT(lockname) \ ++#ifdef CONFIG_PREEMPT_RT ++ ++#define INIT_LOCAL_LOCK(lockname) { \ ++ __SPIN_LOCK_UNLOCKED((lockname).lock), \ ++ .owner = NULL, \ ++ .nestcnt = 0, \ ++ } ++#else ++ ++# ifdef CONFIG_DEBUG_LOCK_ALLOC ++# define LL_DEP_MAP_INIT(lockname) \ + .dep_map = { \ + .name = #lockname, \ + .wait_type_inner = LD_WAIT_CONFIG, \ + } +-#else +-# define LL_DEP_MAP_INIT(lockname) +-#endif ++# else ++# define LL_DEP_MAP_INIT(lockname) ++# endif + + #define INIT_LOCAL_LOCK(lockname) { LL_DEP_MAP_INIT(lockname) } + +-#define __local_lock_init(lock) \ ++#endif ++ ++#ifdef CONFIG_PREEMPT_RT ++ ++static inline void ___local_lock_init(local_lock_t *l) ++{ ++ l->owner = NULL; ++ l->nestcnt = 0; ++} ++ ++#define __local_lock_init(l) \ ++do { \ ++ spin_lock_init(&(l)->lock); \ ++ ___local_lock_init(l); \ ++} while (0) ++ ++#else ++ ++#define __local_lock_init(l) \ + do { \ + static struct lock_class_key __key; \ + \ +- debug_check_no_locks_freed((void *)lock, sizeof(*lock));\ +- lockdep_init_map_wait(&(lock)->dep_map, #lock, &__key, 0, LD_WAIT_CONFIG);\ ++ debug_check_no_locks_freed((void *)l, sizeof(*l)); \ ++ lockdep_init_map_wait(&(l)->dep_map, #l, &__key, 0, LD_WAIT_CONFIG);\ + } while (0) ++#endif ++ ++#ifdef CONFIG_PREEMPT_RT ++ ++static inline void local_lock_acquire(local_lock_t *l) ++{ ++ if (l->owner != current) { ++ spin_lock(&l->lock); ++ DEBUG_LOCKS_WARN_ON(l->owner); ++ DEBUG_LOCKS_WARN_ON(l->nestcnt); ++ l->owner = current; ++ } ++ l->nestcnt++; ++} ++ ++static inline void local_lock_release(local_lock_t *l) ++{ ++ DEBUG_LOCKS_WARN_ON(l->nestcnt == 0); ++ DEBUG_LOCKS_WARN_ON(l->owner != current); ++ if (--l->nestcnt) ++ return; ++ ++ l->owner = NULL; ++ spin_unlock(&l->lock); ++} + +-#ifdef CONFIG_DEBUG_LOCK_ALLOC ++#elif defined(CONFIG_DEBUG_LOCK_ALLOC) + static inline void local_lock_acquire(local_lock_t *l) + { + lock_map_acquire(&l->dep_map); +@@ -53,21 +110,50 @@ static inline void local_lock_acquire(lo + static inline void local_lock_release(local_lock_t *l) { } + #endif /* !CONFIG_DEBUG_LOCK_ALLOC */ + ++#ifdef CONFIG_PREEMPT_RT ++ + #define __local_lock(lock) \ + do { \ +- preempt_disable(); \ ++ migrate_disable(); \ + local_lock_acquire(this_cpu_ptr(lock)); \ + } while (0) + ++#define __local_unlock(lock) \ ++ do { \ ++ local_lock_release(this_cpu_ptr(lock)); \ ++ migrate_enable(); \ ++ } while (0) ++ + #define __local_lock_irq(lock) \ + do { \ +- local_irq_disable(); \ ++ migrate_disable(); \ + local_lock_acquire(this_cpu_ptr(lock)); \ + } while (0) + + #define __local_lock_irqsave(lock, flags) \ + do { \ +- local_irq_save(flags); \ ++ migrate_disable(); \ ++ flags = 0; \ ++ local_lock_acquire(this_cpu_ptr(lock)); \ ++ } while (0) ++ ++#define __local_unlock_irq(lock) \ ++ do { \ ++ local_lock_release(this_cpu_ptr(lock)); \ ++ migrate_enable(); \ ++ } while (0) ++ ++#define __local_unlock_irqrestore(lock, flags) \ ++ do { \ ++ local_lock_release(this_cpu_ptr(lock)); \ ++ migrate_enable(); \ ++ } while (0) ++ ++#else ++ ++#define __local_lock(lock) \ ++ do { \ ++ preempt_disable(); \ + local_lock_acquire(this_cpu_ptr(lock)); \ + } while (0) + +@@ -77,6 +163,18 @@ static inline void local_lock_release(lo + preempt_enable(); \ + } while (0) + ++#define __local_lock_irq(lock) \ ++ do { \ ++ local_irq_disable(); \ ++ local_lock_acquire(this_cpu_ptr(lock)); \ ++ } while (0) ++ ++#define __local_lock_irqsave(lock, flags) \ ++ do { \ ++ local_irq_save(flags); \ ++ local_lock_acquire(this_cpu_ptr(lock)); \ ++ } while (0) ++ + #define __local_unlock_irq(lock) \ + do { \ + local_lock_release(this_cpu_ptr(lock)); \ +@@ -88,3 +186,5 @@ static inline void local_lock_release(lo + local_lock_release(this_cpu_ptr(lock)); \ + local_irq_restore(flags); \ + } while (0) ++ ++#endif diff --git a/kernel/patches-5.4.x-rt/0112-oleg-signal-rt-fix.patch b/kernel/patches-5.11.x-rt/0116-oleg-signal-rt-fix.patch similarity index 91% rename from kernel/patches-5.4.x-rt/0112-oleg-signal-rt-fix.patch rename to kernel/patches-5.11.x-rt/0116-oleg-signal-rt-fix.patch index de43b7eb7..52603af48 100644 --- a/kernel/patches-5.4.x-rt/0112-oleg-signal-rt-fix.patch +++ b/kernel/patches-5.11.x-rt/0116-oleg-signal-rt-fix.patch @@ -32,28 +32,12 @@ Signed-off-by: Thomas Gleixner Signed-off-by: Sebastian Andrzej Siewior --- - arch/x86/entry/common.c | 7 +++++++ arch/x86/include/asm/signal.h | 13 +++++++++++++ include/linux/sched.h | 4 ++++ + kernel/entry/common.c | 8 ++++++++ kernel/signal.c | 28 ++++++++++++++++++++++++++++ - 4 files changed, 52 insertions(+) + 4 files changed, 53 insertions(+) ---- a/arch/x86/entry/common.c -+++ b/arch/x86/entry/common.c -@@ -148,6 +148,13 @@ static void exit_to_usermode_loop(struct - if (cached_flags & _TIF_NEED_RESCHED) - schedule(); - -+#ifdef ARCH_RT_DELAYS_SIGNAL_SEND -+ if (unlikely(current->forced_info.si_signo)) { -+ struct task_struct *t = current; -+ force_sig_info(&t->forced_info); -+ t->forced_info.si_signo = 0; -+ } -+#endif - if (cached_flags & _TIF_UPROBE) - uprobe_notify_resume(regs); - --- a/arch/x86/include/asm/signal.h +++ b/arch/x86/include/asm/signal.h @@ -28,6 +28,19 @@ typedef struct { @@ -78,7 +62,7 @@ Signed-off-by: Sebastian Andrzej Siewior #endif --- a/include/linux/sched.h +++ b/include/linux/sched.h -@@ -918,6 +918,10 @@ struct task_struct { +@@ -994,6 +994,10 @@ struct task_struct { /* Restored if set_restore_sigmask() was used: */ sigset_t saved_sigmask; struct sigpending pending; @@ -89,9 +73,26 @@ Signed-off-by: Sebastian Andrzej Siewior unsigned long sas_ss_sp; size_t sas_ss_size; unsigned int sas_ss_flags; +--- a/kernel/entry/common.c ++++ b/kernel/entry/common.c +@@ -161,6 +161,14 @@ static unsigned long exit_to_user_mode_l + if (ti_work & _TIF_NEED_RESCHED) + schedule(); + ++#ifdef ARCH_RT_DELAYS_SIGNAL_SEND ++ if (unlikely(current->forced_info.si_signo)) { ++ struct task_struct *t = current; ++ force_sig_info(&t->forced_info); ++ t->forced_info.si_signo = 0; ++ } ++#endif ++ + if (ti_work & _TIF_UPROBE) + uprobe_notify_resume(regs); + --- a/kernel/signal.c +++ b/kernel/signal.c -@@ -1313,6 +1313,34 @@ force_sig_info_to_task(struct kernel_sig +@@ -1314,6 +1314,34 @@ force_sig_info_to_task(struct kernel_sig struct k_sigaction *action; int sig = info->si_signo; diff --git a/kernel/patches-5.11.x-rt/0117-add_cpu_light.patch b/kernel/patches-5.11.x-rt/0117-add_cpu_light.patch new file mode 100644 index 000000000..83568853c --- /dev/null +++ b/kernel/patches-5.11.x-rt/0117-add_cpu_light.patch @@ -0,0 +1,21 @@ +From: Sebastian Andrzej Siewior +Date: Sat, 27 May 2017 19:02:06 +0200 +Subject: kernel/sched: add {put|get}_cpu_light() + +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/smp.h | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/include/linux/smp.h ++++ b/include/linux/smp.h +@@ -238,6 +238,9 @@ static inline int get_boot_cpu_id(void) + #define get_cpu() ({ preempt_disable(); __smp_processor_id(); }) + #define put_cpu() preempt_enable() + ++#define get_cpu_light() ({ migrate_disable(); __smp_processor_id(); }) ++#define put_cpu_light() migrate_enable() ++ + /* + * Callback to arch code if there's nosmp or maxcpus=0 on the + * boot command line: diff --git a/kernel/patches-5.11.x-rt/0118-ftrace-migrate-disable-tracing.patch b/kernel/patches-5.11.x-rt/0118-ftrace-migrate-disable-tracing.patch new file mode 100644 index 000000000..232d4ee30 --- /dev/null +++ b/kernel/patches-5.11.x-rt/0118-ftrace-migrate-disable-tracing.patch @@ -0,0 +1,110 @@ +From: Thomas Gleixner +Date: Sun, 17 Jul 2011 21:56:42 +0200 +Subject: trace: Add migrate-disabled counter to tracing output + +Signed-off-by: Thomas Gleixner +--- + include/linux/trace_events.h | 2 ++ + kernel/trace/trace.c | 26 +++++++++++++++++++------- + kernel/trace/trace_events.c | 1 + + kernel/trace/trace_output.c | 5 +++++ + 4 files changed, 27 insertions(+), 7 deletions(-) + +--- a/include/linux/trace_events.h ++++ b/include/linux/trace_events.h +@@ -67,6 +67,7 @@ struct trace_entry { + unsigned char flags; + unsigned char preempt_count; + int pid; ++ unsigned char migrate_disable; + }; + + #define TRACE_EVENT_TYPE_MAX \ +@@ -153,6 +154,7 @@ static inline void tracing_generic_entry + unsigned int trace_ctx) + { + entry->preempt_count = trace_ctx & 0xff; ++ entry->migrate_disable = (trace_ctx >> 8) & 0xff; + entry->pid = current->pid; + entry->type = type; + entry->flags = trace_ctx >> 16; +--- a/kernel/trace/trace.c ++++ b/kernel/trace/trace.c +@@ -2578,6 +2578,15 @@ enum print_line_t trace_handle_return(st + } + EXPORT_SYMBOL_GPL(trace_handle_return); + ++static unsigned short migration_disable_value(void) ++{ ++#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT) ++ return current->migration_disabled; ++#else ++ return 0; ++#endif ++} ++ + unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status) + { + unsigned int trace_flags = irqs_status; +@@ -2596,7 +2605,8 @@ unsigned int tracing_gen_ctx_irq_test(un + trace_flags |= TRACE_FLAG_NEED_RESCHED; + if (test_preempt_need_resched()) + trace_flags |= TRACE_FLAG_PREEMPT_RESCHED; +- return (trace_flags << 16) | (pc & 0xff); ++ return (trace_flags << 16) | (pc & 0xff) | ++ (migration_disable_value() & 0xff) << 8; + } + + struct ring_buffer_event * +@@ -3802,9 +3812,10 @@ static void print_lat_help_header(struct + "# | / _----=> need-resched \n" + "# || / _---=> hardirq/softirq \n" + "# ||| / _--=> preempt-depth \n" +- "# |||| / delay \n" +- "# cmd pid ||||| time | caller \n" +- "# \\ / ||||| \\ | / \n"); ++ "# |||| / _-=> migrate-disable \n" ++ "# ||||| / delay \n" ++ "# cmd pid |||||| time | caller \n" ++ "# \\ / |||||| \\ | / \n"); + } + + static void print_event_info(struct array_buffer *buf, struct seq_file *m) +@@ -3842,9 +3853,10 @@ static void print_func_help_header_irq(s + seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space); + seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space); + seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space); +- seq_printf(m, "# %.*s||| / delay\n", prec, space); +- seq_printf(m, "# TASK-PID %.*s CPU# |||| TIMESTAMP FUNCTION\n", prec, " TGID "); +- seq_printf(m, "# | | %.*s | |||| | |\n", prec, " | "); ++ seq_printf(m, "# %.*s||| / _-=> migrate-disable\n", prec, space); ++ seq_printf(m, "# %.*s|||| / delay\n", prec, space); ++ seq_printf(m, "# TASK-PID %.*s CPU# ||||| TIMESTAMP FUNCTION\n", prec, " TGID "); ++ seq_printf(m, "# | | %.*s | ||||| | |\n", prec, " | "); + } + + void +--- a/kernel/trace/trace_events.c ++++ b/kernel/trace/trace_events.c +@@ -183,6 +183,7 @@ static int trace_define_common_fields(vo + __common_field(unsigned char, flags); + __common_field(unsigned char, preempt_count); + __common_field(int, pid); ++ __common_field(unsigned char, migrate_disable); + + return ret; + } +--- a/kernel/trace/trace_output.c ++++ b/kernel/trace/trace_output.c +@@ -487,6 +487,11 @@ int trace_print_lat_fmt(struct trace_seq + else + trace_seq_putc(s, '.'); + ++ if (entry->migrate_disable) ++ trace_seq_printf(s, "%x", entry->migrate_disable); ++ else ++ trace_seq_putc(s, '.'); ++ + return !trace_seq_has_overflowed(s); + } + diff --git a/kernel/patches-5.4.x-rt/0183-locking-don-t-check-for-__LINUX_SPINLOCK_TYPES_H-on-.patch b/kernel/patches-5.11.x-rt/0119-locking-don-t-check-for-__LINUX_SPINLOCK_TYPES_H-on-.patch similarity index 89% rename from kernel/patches-5.4.x-rt/0183-locking-don-t-check-for-__LINUX_SPINLOCK_TYPES_H-on-.patch rename to kernel/patches-5.11.x-rt/0119-locking-don-t-check-for-__LINUX_SPINLOCK_TYPES_H-on-.patch index 4f8273190..0993143d3 100644 --- a/kernel/patches-5.4.x-rt/0183-locking-don-t-check-for-__LINUX_SPINLOCK_TYPES_H-on-.patch +++ b/kernel/patches-5.11.x-rt/0119-locking-don-t-check-for-__LINUX_SPINLOCK_TYPES_H-on-.patch @@ -22,8 +22,7 @@ Signed-off-by: Sebastian Andrzej Siewior arch/s390/include/asm/spinlock_types.h | 4 ---- arch/sh/include/asm/spinlock_types.h | 4 ---- arch/xtensa/include/asm/spinlock_types.h | 4 ---- - include/linux/spinlock_types_up.h | 4 ---- - 10 files changed, 40 deletions(-) + 9 files changed, 36 deletions(-) --- a/arch/alpha/include/asm/spinlock_types.h +++ b/arch/alpha/include/asm/spinlock_types.h @@ -100,9 +99,9 @@ Signed-off-by: Sebastian Andrzej Siewior -# error "please don't include this file directly" -#endif - - typedef struct { - volatile unsigned int slock; - } arch_spinlock_t; + #ifdef CONFIG_PPC_QUEUED_SPINLOCKS + #include + #include --- a/arch/s390/include/asm/spinlock_types.h +++ b/arch/s390/include/asm/spinlock_types.h @@ -2,10 +2,6 @@ @@ -142,16 +141,3 @@ Signed-off-by: Sebastian Andrzej Siewior #include #include ---- a/include/linux/spinlock_types_up.h -+++ b/include/linux/spinlock_types_up.h -@@ -1,10 +1,6 @@ - #ifndef __LINUX_SPINLOCK_TYPES_UP_H - #define __LINUX_SPINLOCK_TYPES_UP_H - --#ifndef __LINUX_SPINLOCK_TYPES_H --# error "please don't include this file directly" --#endif -- - /* - * include/linux/spinlock_types_up.h - spinlock type definitions for UP - * diff --git a/kernel/patches-5.4.x-rt/0115-0003-mm-SLxB-change-list_lock-to-raw_spinlock_t.patch b/kernel/patches-5.11.x-rt/0120-0001-mm-sl-au-b-Change-list_lock-to-raw_spinlock_t.patch similarity index 77% rename from kernel/patches-5.4.x-rt/0115-0003-mm-SLxB-change-list_lock-to-raw_spinlock_t.patch rename to kernel/patches-5.11.x-rt/0120-0001-mm-sl-au-b-Change-list_lock-to-raw_spinlock_t.patch index 5b7fbc5c0..fbdef8c36 100644 --- a/kernel/patches-5.4.x-rt/0115-0003-mm-SLxB-change-list_lock-to-raw_spinlock_t.patch +++ b/kernel/patches-5.11.x-rt/0120-0001-mm-sl-au-b-Change-list_lock-to-raw_spinlock_t.patch @@ -1,11 +1,11 @@ From: Thomas Gleixner Date: Mon, 28 May 2018 15:24:22 +0200 -Subject: [PATCH 3/4] mm/SLxB: change list_lock to raw_spinlock_t +Subject: [PATCH 1/8] mm: sl[au]b: Change list_lock to raw_spinlock_t -The list_lock is used with used with IRQs off on RT. Make it a raw_spinlock_t -otherwise the interrupts won't be disabled on -RT. The locking rules remain -the same on !RT. -This patch changes it for SLAB and SLUB since both share the same header +The list_lock is used with used with IRQs off on PREEMPT_RT. Make it a +raw_spinlock_t otherwise the interrupts won't be disabled on PREEMPT_RT. +The locking rules remain unchanged. +The lock is updated for SLAB and SLUB since both share the same header file for struct kmem_cache_node defintion. Signed-off-by: Thomas Gleixner @@ -39,7 +39,7 @@ Signed-off-by: Sebastian Andrzej Siewior slabs_destroy(cachep, &list); } -@@ -688,7 +688,7 @@ static void __drain_alien_cache(struct k +@@ -698,7 +698,7 @@ static void __drain_alien_cache(struct k struct kmem_cache_node *n = get_node(cachep, node); if (ac->avail) { @@ -48,7 +48,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* * Stuff objects into the remote nodes shared array first. * That way we could avoid the overhead of putting the objects -@@ -699,7 +699,7 @@ static void __drain_alien_cache(struct k +@@ -709,7 +709,7 @@ static void __drain_alien_cache(struct k free_block(cachep, ac->entry, ac->avail, node, list); ac->avail = 0; @@ -57,7 +57,7 @@ Signed-off-by: Sebastian Andrzej Siewior } } -@@ -772,9 +772,9 @@ static int __cache_free_alien(struct kme +@@ -782,9 +782,9 @@ static int __cache_free_alien(struct kme slabs_destroy(cachep, &list); } else { n = get_node(cachep, page_node); @@ -69,7 +69,7 @@ Signed-off-by: Sebastian Andrzej Siewior slabs_destroy(cachep, &list); } return 1; -@@ -815,10 +815,10 @@ static int init_cache_node(struct kmem_c +@@ -825,10 +825,10 @@ static int init_cache_node(struct kmem_c */ n = get_node(cachep, node); if (n) { @@ -82,7 +82,7 @@ Signed-off-by: Sebastian Andrzej Siewior return 0; } -@@ -897,7 +897,7 @@ static int setup_kmem_cache_node(struct +@@ -907,7 +907,7 @@ static int setup_kmem_cache_node(struct goto fail; n = get_node(cachep, node); @@ -91,7 +91,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (n->shared && force_change) { free_block(cachep, n->shared->entry, n->shared->avail, node, &list); -@@ -915,7 +915,7 @@ static int setup_kmem_cache_node(struct +@@ -925,7 +925,7 @@ static int setup_kmem_cache_node(struct new_alien = NULL; } @@ -100,7 +100,7 @@ Signed-off-by: Sebastian Andrzej Siewior slabs_destroy(cachep, &list); /* -@@ -954,7 +954,7 @@ static void cpuup_canceled(long cpu) +@@ -964,7 +964,7 @@ static void cpuup_canceled(long cpu) if (!n) continue; @@ -109,7 +109,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* Free limit for this kmem_cache_node */ n->free_limit -= cachep->batchcount; -@@ -965,7 +965,7 @@ static void cpuup_canceled(long cpu) +@@ -975,7 +975,7 @@ static void cpuup_canceled(long cpu) nc->avail = 0; if (!cpumask_empty(mask)) { @@ -118,7 +118,7 @@ Signed-off-by: Sebastian Andrzej Siewior goto free_slab; } -@@ -979,7 +979,7 @@ static void cpuup_canceled(long cpu) +@@ -989,7 +989,7 @@ static void cpuup_canceled(long cpu) alien = n->alien; n->alien = NULL; @@ -127,7 +127,7 @@ Signed-off-by: Sebastian Andrzej Siewior kfree(shared); if (alien) { -@@ -1163,7 +1163,7 @@ static void __init init_list(struct kmem +@@ -1173,7 +1173,7 @@ static void __init init_list(struct kmem /* * Do not assume that spinlocks can be initialized via memcpy: */ @@ -136,7 +136,7 @@ Signed-off-by: Sebastian Andrzej Siewior MAKE_ALL_LISTS(cachep, ptr, nodeid); cachep->node[nodeid] = ptr; -@@ -1334,11 +1334,11 @@ slab_out_of_memory(struct kmem_cache *ca +@@ -1344,11 +1344,11 @@ slab_out_of_memory(struct kmem_cache *ca for_each_kmem_cache_node(cachep, node, n) { unsigned long total_slabs, free_slabs, free_objs; @@ -150,7 +150,7 @@ Signed-off-by: Sebastian Andrzej Siewior pr_warn(" node %d: slabs: %ld/%ld, objs: %ld/%ld\n", node, total_slabs - free_slabs, total_slabs, -@@ -2096,7 +2096,7 @@ static void check_spinlock_acquired(stru +@@ -2107,7 +2107,7 @@ static void check_spinlock_acquired(stru { #ifdef CONFIG_SMP check_irq_off(); @@ -159,7 +159,7 @@ Signed-off-by: Sebastian Andrzej Siewior #endif } -@@ -2104,7 +2104,7 @@ static void check_spinlock_acquired_node +@@ -2115,7 +2115,7 @@ static void check_spinlock_acquired_node { #ifdef CONFIG_SMP check_irq_off(); @@ -168,7 +168,7 @@ Signed-off-by: Sebastian Andrzej Siewior #endif } -@@ -2144,9 +2144,9 @@ static void do_drain(void *arg) +@@ -2155,9 +2155,9 @@ static void do_drain(void *arg) check_irq_off(); ac = cpu_cache_get(cachep); n = get_node(cachep, node); @@ -177,10 +177,10 @@ Signed-off-by: Sebastian Andrzej Siewior free_block(cachep, ac->entry, ac->avail, node, &list); - spin_unlock(&n->list_lock); + raw_spin_unlock(&n->list_lock); - slabs_destroy(cachep, &list); ac->avail = 0; + slabs_destroy(cachep, &list); } -@@ -2164,9 +2164,9 @@ static void drain_cpu_caches(struct kmem +@@ -2175,9 +2175,9 @@ static void drain_cpu_caches(struct kmem drain_alien_cache(cachep, n->alien); for_each_kmem_cache_node(cachep, node, n) { @@ -192,7 +192,7 @@ Signed-off-by: Sebastian Andrzej Siewior slabs_destroy(cachep, &list); } -@@ -2188,10 +2188,10 @@ static int drain_freelist(struct kmem_ca +@@ -2199,10 +2199,10 @@ static int drain_freelist(struct kmem_ca nr_freed = 0; while (nr_freed < tofree && !list_empty(&n->slabs_free)) { @@ -205,7 +205,7 @@ Signed-off-by: Sebastian Andrzej Siewior goto out; } -@@ -2204,7 +2204,7 @@ static int drain_freelist(struct kmem_ca +@@ -2215,7 +2215,7 @@ static int drain_freelist(struct kmem_ca * to the cache. */ n->free_objects -= cache->num; @@ -214,7 +214,7 @@ Signed-off-by: Sebastian Andrzej Siewior slab_destroy(cache, page); nr_freed++; } -@@ -2657,7 +2657,7 @@ static void cache_grow_end(struct kmem_c +@@ -2651,7 +2651,7 @@ static void cache_grow_end(struct kmem_c INIT_LIST_HEAD(&page->slab_list); n = get_node(cachep, page_to_nid(page)); @@ -223,7 +223,7 @@ Signed-off-by: Sebastian Andrzej Siewior n->total_slabs++; if (!page->active) { list_add_tail(&page->slab_list, &n->slabs_free); -@@ -2667,7 +2667,7 @@ static void cache_grow_end(struct kmem_c +@@ -2661,7 +2661,7 @@ static void cache_grow_end(struct kmem_c STATS_INC_GROWN(cachep); n->free_objects += cachep->num - page->active; @@ -232,7 +232,7 @@ Signed-off-by: Sebastian Andrzej Siewior fixup_objfreelist_debug(cachep, &list); } -@@ -2833,7 +2833,7 @@ static struct page *get_first_slab(struc +@@ -2827,7 +2827,7 @@ static struct page *get_first_slab(struc { struct page *page; @@ -241,7 +241,7 @@ Signed-off-by: Sebastian Andrzej Siewior page = list_first_entry_or_null(&n->slabs_partial, struct page, slab_list); if (!page) { -@@ -2860,10 +2860,10 @@ static noinline void *cache_alloc_pfmema +@@ -2854,10 +2854,10 @@ static noinline void *cache_alloc_pfmema if (!gfp_pfmemalloc_allowed(flags)) return NULL; @@ -254,7 +254,7 @@ Signed-off-by: Sebastian Andrzej Siewior return NULL; } -@@ -2872,7 +2872,7 @@ static noinline void *cache_alloc_pfmema +@@ -2866,7 +2866,7 @@ static noinline void *cache_alloc_pfmema fixup_slab_list(cachep, n, page, &list); @@ -263,7 +263,7 @@ Signed-off-by: Sebastian Andrzej Siewior fixup_objfreelist_debug(cachep, &list); return obj; -@@ -2931,7 +2931,7 @@ static void *cache_alloc_refill(struct k +@@ -2925,7 +2925,7 @@ static void *cache_alloc_refill(struct k if (!n->free_objects && (!shared || !shared->avail)) goto direct_grow; @@ -272,7 +272,7 @@ Signed-off-by: Sebastian Andrzej Siewior shared = READ_ONCE(n->shared); /* See if we can refill from the shared array */ -@@ -2955,7 +2955,7 @@ static void *cache_alloc_refill(struct k +@@ -2949,7 +2949,7 @@ static void *cache_alloc_refill(struct k must_grow: n->free_objects -= ac->avail; alloc_done: @@ -281,7 +281,7 @@ Signed-off-by: Sebastian Andrzej Siewior fixup_objfreelist_debug(cachep, &list); direct_grow: -@@ -3180,7 +3180,7 @@ static void *____cache_alloc_node(struct +@@ -3174,7 +3174,7 @@ static void *____cache_alloc_node(struct BUG_ON(!n); check_irq_off(); @@ -290,7 +290,7 @@ Signed-off-by: Sebastian Andrzej Siewior page = get_first_slab(n, false); if (!page) goto must_grow; -@@ -3198,12 +3198,12 @@ static void *____cache_alloc_node(struct +@@ -3192,12 +3192,12 @@ static void *____cache_alloc_node(struct fixup_slab_list(cachep, n, page, &list); @@ -305,7 +305,7 @@ Signed-off-by: Sebastian Andrzej Siewior page = cache_grow_begin(cachep, gfp_exact_node(flags), nodeid); if (page) { /* This slab isn't counted yet so don't update free_objects */ -@@ -3379,7 +3379,7 @@ static void cache_flusharray(struct kmem +@@ -3375,7 +3375,7 @@ static void cache_flusharray(struct kmem check_irq_off(); n = get_node(cachep, node); @@ -314,16 +314,16 @@ Signed-off-by: Sebastian Andrzej Siewior if (n->shared) { struct array_cache *shared_array = n->shared; int max = shared_array->limit - shared_array->avail; -@@ -3408,7 +3408,7 @@ static void cache_flusharray(struct kmem +@@ -3404,7 +3404,7 @@ static void cache_flusharray(struct kmem STATS_SET_FREEABLE(cachep, i); } #endif - spin_unlock(&n->list_lock); + raw_spin_unlock(&n->list_lock); - slabs_destroy(cachep, &list); ac->avail -= batchcount; memmove(ac->entry, &(ac->entry[batchcount]), sizeof(void *)*ac->avail); -@@ -3830,9 +3830,9 @@ static int __do_tune_cpucache(struct kme + slabs_destroy(cachep, &list); +@@ -3834,9 +3834,9 @@ static int do_tune_cpucache(struct kmem_ node = cpu_to_mem(cpu); n = get_node(cachep, node); @@ -335,7 +335,7 @@ Signed-off-by: Sebastian Andrzej Siewior slabs_destroy(cachep, &list); } free_percpu(prev); -@@ -3957,9 +3957,9 @@ static void drain_array(struct kmem_cach +@@ -3931,9 +3931,9 @@ static void drain_array(struct kmem_cach return; } @@ -347,7 +347,7 @@ Signed-off-by: Sebastian Andrzej Siewior slabs_destroy(cachep, &list); } -@@ -4043,7 +4043,7 @@ void get_slabinfo(struct kmem_cache *cac +@@ -4017,7 +4017,7 @@ void get_slabinfo(struct kmem_cache *cac for_each_kmem_cache_node(cachep, node, n) { check_irq_on(); @@ -356,7 +356,7 @@ Signed-off-by: Sebastian Andrzej Siewior total_slabs += n->total_slabs; free_slabs += n->free_slabs; -@@ -4052,7 +4052,7 @@ void get_slabinfo(struct kmem_cache *cac +@@ -4026,7 +4026,7 @@ void get_slabinfo(struct kmem_cache *cac if (n->shared) shared_avail += n->shared->avail; @@ -367,7 +367,7 @@ Signed-off-by: Sebastian Andrzej Siewior active_slabs = total_slabs - free_slabs; --- a/mm/slab.h +++ b/mm/slab.h -@@ -596,7 +596,7 @@ static inline void slab_post_alloc_hook( +@@ -523,7 +523,7 @@ static inline void slab_post_alloc_hook( * The slab lists for all objects. */ struct kmem_cache_node { @@ -378,8 +378,8 @@ Signed-off-by: Sebastian Andrzej Siewior struct list_head slabs_partial; /* partial list first, better asm code */ --- a/mm/slub.c +++ b/mm/slub.c -@@ -1176,7 +1176,7 @@ static noinline int free_debug_processin - unsigned long uninitialized_var(flags); +@@ -1216,7 +1216,7 @@ static noinline int free_debug_processin + unsigned long flags; int ret = 0; - spin_lock_irqsave(&n->list_lock, flags); @@ -387,7 +387,7 @@ Signed-off-by: Sebastian Andrzej Siewior slab_lock(page); if (s->flags & SLAB_CONSISTENCY_CHECKS) { -@@ -1211,7 +1211,7 @@ static noinline int free_debug_processin +@@ -1251,7 +1251,7 @@ static noinline int free_debug_processin bulk_cnt, cnt); slab_unlock(page); @@ -396,7 +396,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (!ret) slab_fix(s, "Object at 0x%p not freed", object); return ret; -@@ -1849,7 +1849,7 @@ static void *get_partial_node(struct kme +@@ -1964,7 +1964,7 @@ static void *get_partial_node(struct kme if (!n || !n->nr_partial) return NULL; @@ -405,7 +405,7 @@ Signed-off-by: Sebastian Andrzej Siewior list_for_each_entry_safe(page, page2, &n->partial, slab_list) { void *t; -@@ -1874,7 +1874,7 @@ static void *get_partial_node(struct kme +@@ -1989,7 +1989,7 @@ static void *get_partial_node(struct kme break; } @@ -414,7 +414,7 @@ Signed-off-by: Sebastian Andrzej Siewior return object; } -@@ -2120,7 +2120,7 @@ static void deactivate_slab(struct kmem_ +@@ -2243,7 +2243,7 @@ static void deactivate_slab(struct kmem_ * that acquire_slab() will see a slab page that * is frozen */ @@ -423,7 +423,7 @@ Signed-off-by: Sebastian Andrzej Siewior } } else { m = M_FULL; -@@ -2131,7 +2131,7 @@ static void deactivate_slab(struct kmem_ +@@ -2254,7 +2254,7 @@ static void deactivate_slab(struct kmem_ * slabs from diagnostic functions will not see * any frozen slabs. */ @@ -432,7 +432,7 @@ Signed-off-by: Sebastian Andrzej Siewior } } -@@ -2155,7 +2155,7 @@ static void deactivate_slab(struct kmem_ +@@ -2278,7 +2278,7 @@ static void deactivate_slab(struct kmem_ goto redo; if (lock) @@ -441,7 +441,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (m == M_PARTIAL) stat(s, tail); -@@ -2194,10 +2194,10 @@ static void unfreeze_partials(struct kme +@@ -2317,10 +2317,10 @@ static void unfreeze_partials(struct kme n2 = get_node(s, page_to_nid(page)); if (n != n2) { if (n) @@ -454,7 +454,7 @@ Signed-off-by: Sebastian Andrzej Siewior } do { -@@ -2226,7 +2226,7 @@ static void unfreeze_partials(struct kme +@@ -2349,7 +2349,7 @@ static void unfreeze_partials(struct kme } if (n) @@ -463,7 +463,7 @@ Signed-off-by: Sebastian Andrzej Siewior while (discard_page) { page = discard_page; -@@ -2393,10 +2393,10 @@ static unsigned long count_partial(struc +@@ -2516,10 +2516,10 @@ static unsigned long count_partial(struc unsigned long x = 0; struct page *page; @@ -476,7 +476,7 @@ Signed-off-by: Sebastian Andrzej Siewior return x; } #endif /* CONFIG_SLUB_DEBUG || CONFIG_SYSFS */ -@@ -2856,7 +2856,7 @@ static void __slab_free(struct kmem_cach +@@ -2979,7 +2979,7 @@ static void __slab_free(struct kmem_cach do { if (unlikely(n)) { @@ -485,7 +485,7 @@ Signed-off-by: Sebastian Andrzej Siewior n = NULL; } prior = page->freelist; -@@ -2888,7 +2888,7 @@ static void __slab_free(struct kmem_cach +@@ -3011,7 +3011,7 @@ static void __slab_free(struct kmem_cach * Otherwise the list_lock will synchronize with * other processors updating the list of slabs. */ @@ -494,7 +494,7 @@ Signed-off-by: Sebastian Andrzej Siewior } } -@@ -2929,7 +2929,7 @@ static void __slab_free(struct kmem_cach +@@ -3053,7 +3053,7 @@ static void __slab_free(struct kmem_cach add_partial(n, page, DEACTIVATE_TO_TAIL); stat(s, FREE_ADD_PARTIAL); } @@ -503,7 +503,7 @@ Signed-off-by: Sebastian Andrzej Siewior return; slab_empty: -@@ -2944,7 +2944,7 @@ static void __slab_free(struct kmem_cach +@@ -3068,7 +3068,7 @@ static void __slab_free(struct kmem_cach remove_full(s, n, page); } @@ -512,7 +512,7 @@ Signed-off-by: Sebastian Andrzej Siewior stat(s, FREE_SLAB); discard_slab(s, page); } -@@ -3348,7 +3348,7 @@ static void +@@ -3487,7 +3487,7 @@ static void init_kmem_cache_node(struct kmem_cache_node *n) { n->nr_partial = 0; @@ -521,7 +521,7 @@ Signed-off-by: Sebastian Andrzej Siewior INIT_LIST_HEAD(&n->partial); #ifdef CONFIG_SLUB_DEBUG atomic_long_set(&n->nr_slabs, 0); -@@ -3729,7 +3729,7 @@ static void free_partial(struct kmem_cac +@@ -3888,7 +3888,7 @@ static void free_partial(struct kmem_cac struct page *page, *h; BUG_ON(irqs_disabled()); @@ -530,8 +530,8 @@ Signed-off-by: Sebastian Andrzej Siewior list_for_each_entry_safe(page, h, &n->partial, slab_list) { if (!page->inuse) { remove_partial(n, page); -@@ -3739,7 +3739,7 @@ static void free_partial(struct kmem_cac - "Objects remaining in %s on __kmem_cache_shutdown()"); +@@ -3898,7 +3898,7 @@ static void free_partial(struct kmem_cac + "Objects remaining in %s on __kmem_cache_shutdown()"); } } - spin_unlock_irq(&n->list_lock); @@ -539,7 +539,7 @@ Signed-off-by: Sebastian Andrzej Siewior list_for_each_entry_safe(page, h, &discard, slab_list) discard_slab(s, page); -@@ -4011,7 +4011,7 @@ int __kmem_cache_shrink(struct kmem_cach +@@ -4169,7 +4169,7 @@ int __kmem_cache_shrink(struct kmem_cach for (i = 0; i < SHRINK_PROMOTE_MAX; i++) INIT_LIST_HEAD(promote + i); @@ -548,7 +548,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* * Build lists of slabs to discard or promote. -@@ -4042,7 +4042,7 @@ int __kmem_cache_shrink(struct kmem_cach +@@ -4200,7 +4200,7 @@ int __kmem_cache_shrink(struct kmem_cach for (i = SHRINK_PROMOTE_MAX - 1; i >= 0; i--) list_splice(promote + i, &n->partial); @@ -557,7 +557,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* Release empty slabs */ list_for_each_entry_safe(page, t, &discard, slab_list) -@@ -4450,7 +4450,7 @@ static int validate_slab_node(struct kme +@@ -4562,7 +4562,7 @@ static int validate_slab_node(struct kme struct page *page; unsigned long flags; @@ -565,8 +565,8 @@ Signed-off-by: Sebastian Andrzej Siewior + raw_spin_lock_irqsave(&n->list_lock, flags); list_for_each_entry(page, &n->partial, slab_list) { - validate_slab_slab(s, page, map); -@@ -4472,7 +4472,7 @@ static int validate_slab_node(struct kme + validate_slab(s, page); +@@ -4584,7 +4584,7 @@ static int validate_slab_node(struct kme s->name, count, atomic_long_read(&n->nr_slabs)); out: @@ -575,16 +575,16 @@ Signed-off-by: Sebastian Andrzej Siewior return count; } -@@ -4658,12 +4658,12 @@ static int list_locations(struct kmem_ca +@@ -4763,12 +4763,12 @@ static int list_locations(struct kmem_ca if (!atomic_long_read(&n->nr_slabs)) continue; - spin_lock_irqsave(&n->list_lock, flags); + raw_spin_lock_irqsave(&n->list_lock, flags); list_for_each_entry(page, &n->partial, slab_list) - process_slab(&t, s, page, alloc, map); + process_slab(&t, s, page, alloc); list_for_each_entry(page, &n->full, slab_list) - process_slab(&t, s, page, alloc, map); + process_slab(&t, s, page, alloc); - spin_unlock_irqrestore(&n->list_lock, flags); + raw_spin_unlock_irqrestore(&n->list_lock, flags); } diff --git a/kernel/patches-5.11.x-rt/0121-0002-mm-slub-Make-object_map_lock-a-raw_spinlock_t.patch b/kernel/patches-5.11.x-rt/0121-0002-mm-slub-Make-object_map_lock-a-raw_spinlock_t.patch new file mode 100644 index 000000000..6f9805aed --- /dev/null +++ b/kernel/patches-5.11.x-rt/0121-0002-mm-slub-Make-object_map_lock-a-raw_spinlock_t.patch @@ -0,0 +1,43 @@ +From: Sebastian Andrzej Siewior +Date: Thu, 16 Jul 2020 18:47:50 +0200 +Subject: [PATCH 2/8] mm: slub: Make object_map_lock a raw_spinlock_t + +The variable object_map is protected by object_map_lock. The lock is always +acquired in debug code and within already atomic context + +Make object_map_lock a raw_spinlock_t. + +Signed-off-by: Sebastian Andrzej Siewior +--- + mm/slub.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/mm/slub.c ++++ b/mm/slub.c +@@ -436,7 +436,7 @@ static inline bool cmpxchg_double_slab(s + + #ifdef CONFIG_SLUB_DEBUG + static unsigned long object_map[BITS_TO_LONGS(MAX_OBJS_PER_PAGE)]; +-static DEFINE_SPINLOCK(object_map_lock); ++static DEFINE_RAW_SPINLOCK(object_map_lock); + + /* + * Determine a map of object in use on a page. +@@ -452,7 +452,7 @@ static unsigned long *get_map(struct kme + + VM_BUG_ON(!irqs_disabled()); + +- spin_lock(&object_map_lock); ++ raw_spin_lock(&object_map_lock); + + bitmap_zero(object_map, page->objects); + +@@ -465,7 +465,7 @@ static unsigned long *get_map(struct kme + static void put_map(unsigned long *map) __releases(&object_map_lock) + { + VM_BUG_ON(map != object_map); +- spin_unlock(&object_map_lock); ++ raw_spin_unlock(&object_map_lock); + } + + static inline unsigned int size_from_object(struct kmem_cache *s) diff --git a/kernel/patches-5.11.x-rt/0122-0003-mm-slub-Enable-irqs-for-__GFP_WAIT.patch b/kernel/patches-5.11.x-rt/0122-0003-mm-slub-Enable-irqs-for-__GFP_WAIT.patch new file mode 100644 index 000000000..a89e89f12 --- /dev/null +++ b/kernel/patches-5.11.x-rt/0122-0003-mm-slub-Enable-irqs-for-__GFP_WAIT.patch @@ -0,0 +1,70 @@ +From: Thomas Gleixner +Date: Wed, 9 Jan 2013 12:08:15 +0100 +Subject: [PATCH 3/8] mm: slub: Enable irqs for __GFP_WAIT + +SYSTEM_RUNNING might be too late for enabling interrupts. Allocations +with GFP_WAIT can happen before that. So use this as an indicator. + +[bigeasy: Add warning on RT for allocations in atomic context. + Don't enable interrupts on allocations during SYSTEM_SUSPEND. This is done + during suspend by ACPI, noticed by Liwei Song +] + +Signed-off-by: Thomas Gleixner +Signed-off-by: Sebastian Andrzej Siewior +--- + mm/slub.c | 18 +++++++++++++++++- + 1 file changed, 17 insertions(+), 1 deletion(-) + +--- a/mm/slub.c ++++ b/mm/slub.c +@@ -1739,10 +1739,18 @@ static struct page *allocate_slab(struct + void *start, *p, *next; + int idx; + bool shuffle; ++ bool enableirqs = false; + + flags &= gfp_allowed_mask; + + if (gfpflags_allow_blocking(flags)) ++ enableirqs = true; ++ ++#ifdef CONFIG_PREEMPT_RT ++ if (system_state > SYSTEM_BOOTING && system_state < SYSTEM_SUSPEND) ++ enableirqs = true; ++#endif ++ if (enableirqs) + local_irq_enable(); + + flags |= s->allocflags; +@@ -1803,7 +1811,7 @@ static struct page *allocate_slab(struct + page->frozen = 1; + + out: +- if (gfpflags_allow_blocking(flags)) ++ if (enableirqs) + local_irq_disable(); + if (!page) + return NULL; +@@ -2814,6 +2822,10 @@ static __always_inline void *slab_alloc_ + unsigned long tid; + struct obj_cgroup *objcg = NULL; + ++ if (IS_ENABLED(CONFIG_PREEMPT_RT) && IS_ENABLED(CONFIG_DEBUG_ATOMIC_SLEEP)) ++ WARN_ON_ONCE(!preemptible() && ++ (system_state > SYSTEM_BOOTING && system_state < SYSTEM_SUSPEND)); ++ + s = slab_pre_alloc_hook(s, &objcg, 1, gfpflags); + if (!s) + return NULL; +@@ -3279,6 +3291,10 @@ int kmem_cache_alloc_bulk(struct kmem_ca + int i; + struct obj_cgroup *objcg = NULL; + ++ if (IS_ENABLED(CONFIG_PREEMPT_RT) && IS_ENABLED(CONFIG_DEBUG_ATOMIC_SLEEP)) ++ WARN_ON_ONCE(!preemptible() && ++ (system_state > SYSTEM_BOOTING && system_state < SYSTEM_SUSPEND)); ++ + /* memcg and kmem_cache debug support */ + s = slab_pre_alloc_hook(s, &objcg, size, flags); + if (unlikely(!s)) diff --git a/kernel/patches-5.11.x-rt/0123-0004-mm-slub-Move-discard_slab-invocations-out-of-IRQ-off.patch b/kernel/patches-5.11.x-rt/0123-0004-mm-slub-Move-discard_slab-invocations-out-of-IRQ-off.patch new file mode 100644 index 000000000..bc2aaca0f --- /dev/null +++ b/kernel/patches-5.11.x-rt/0123-0004-mm-slub-Move-discard_slab-invocations-out-of-IRQ-off.patch @@ -0,0 +1,410 @@ +From: Sebastian Andrzej Siewior +Date: Fri, 26 Feb 2021 15:14:15 +0100 +Subject: [PATCH 4/8] mm: slub: Move discard_slab() invocations out of IRQ-off + sections + +discard_slab() gives the memory back to the page-allocator. Some of its +invocation occur from IRQ-disabled sections which were disabled by SLUB. +An example is the deactivate_slab() invocation from within +___slab_alloc() or put_cpu_partial(). + +Instead of giving the memory back directly, put the pages on a list and +process it once the caller is out of the known IRQ-off region. + +Signed-off-by: Sebastian Andrzej Siewior +--- + mm/slub.c | 114 ++++++++++++++++++++++++++++++++++++++++++-------------------- + 1 file changed, 78 insertions(+), 36 deletions(-) + +--- a/mm/slub.c ++++ b/mm/slub.c +@@ -1869,12 +1869,29 @@ static void free_slab(struct kmem_cache + __free_slab(s, page); + } + ++static void discard_slab_delayed(struct kmem_cache *s, struct page *page, ++ struct list_head *delayed_free) ++{ ++ dec_slabs_node(s, page_to_nid(page), page->objects); ++ list_add(&page->lru, delayed_free); ++} ++ + static void discard_slab(struct kmem_cache *s, struct page *page) + { + dec_slabs_node(s, page_to_nid(page), page->objects); + free_slab(s, page); + } + ++static void discard_delayed(struct list_head *l) ++{ ++ while (!list_empty(l)) { ++ struct page *page = list_first_entry(l, struct page, lru); ++ ++ list_del(&page->lru); ++ __free_slab(page->slab_cache, page); ++ } ++} ++ + /* + * Management of partially allocated slabs. + */ +@@ -1948,15 +1965,16 @@ static inline void *acquire_slab(struct + WARN_ON(!freelist); + return freelist; + } +- +-static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain); ++static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain, ++ struct list_head *delayed_free); + static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags); + + /* + * Try to allocate a partial slab from a specific node. + */ + static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n, +- struct kmem_cache_cpu *c, gfp_t flags) ++ struct kmem_cache_cpu *c, gfp_t flags, ++ struct list_head *delayed_free) + { + struct page *page, *page2; + void *object = NULL; +@@ -1989,7 +2007,7 @@ static void *get_partial_node(struct kme + stat(s, ALLOC_FROM_PARTIAL); + object = t; + } else { +- put_cpu_partial(s, page, 0); ++ put_cpu_partial(s, page, 0, delayed_free); + stat(s, CPU_PARTIAL_NODE); + } + if (!kmem_cache_has_cpu_partial(s) +@@ -2005,7 +2023,8 @@ static void *get_partial_node(struct kme + * Get a page from somewhere. Search in increasing NUMA distances. + */ + static void *get_any_partial(struct kmem_cache *s, gfp_t flags, +- struct kmem_cache_cpu *c) ++ struct kmem_cache_cpu *c, ++ struct list_head *delayed_free) + { + #ifdef CONFIG_NUMA + struct zonelist *zonelist; +@@ -2047,7 +2066,7 @@ static void *get_any_partial(struct kmem + + if (n && cpuset_zone_allowed(zone, flags) && + n->nr_partial > s->min_partial) { +- object = get_partial_node(s, n, c, flags); ++ object = get_partial_node(s, n, c, flags, delayed_free); + if (object) { + /* + * Don't check read_mems_allowed_retry() +@@ -2069,7 +2088,8 @@ static void *get_any_partial(struct kmem + * Get a partial page, lock it and return it. + */ + static void *get_partial(struct kmem_cache *s, gfp_t flags, int node, +- struct kmem_cache_cpu *c) ++ struct kmem_cache_cpu *c, ++ struct list_head *delayed_free) + { + void *object; + int searchnode = node; +@@ -2077,11 +2097,12 @@ static void *get_partial(struct kmem_cac + if (node == NUMA_NO_NODE) + searchnode = numa_mem_id(); + +- object = get_partial_node(s, get_node(s, searchnode), c, flags); ++ object = get_partial_node(s, get_node(s, searchnode), c, flags, ++ delayed_free); + if (object || node != NUMA_NO_NODE) + return object; + +- return get_any_partial(s, flags, c); ++ return get_any_partial(s, flags, c, delayed_free); + } + + #ifdef CONFIG_PREEMPTION +@@ -2157,7 +2178,8 @@ static void init_kmem_cache_cpus(struct + * Remove the cpu slab + */ + static void deactivate_slab(struct kmem_cache *s, struct page *page, +- void *freelist, struct kmem_cache_cpu *c) ++ void *freelist, struct kmem_cache_cpu *c, ++ struct list_head *delayed_free) + { + enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE }; + struct kmem_cache_node *n = get_node(s, page_to_nid(page)); +@@ -2294,7 +2316,7 @@ static void deactivate_slab(struct kmem_ + stat(s, DEACTIVATE_FULL); + else if (m == M_FREE) { + stat(s, DEACTIVATE_EMPTY); +- discard_slab(s, page); ++ discard_slab_delayed(s, page, delayed_free); + stat(s, FREE_SLAB); + } + +@@ -2309,8 +2331,8 @@ static void deactivate_slab(struct kmem_ + * for the cpu using c (or some other guarantee must be there + * to guarantee no concurrent accesses). + */ +-static void unfreeze_partials(struct kmem_cache *s, +- struct kmem_cache_cpu *c) ++static void unfreeze_partials(struct kmem_cache *s, struct kmem_cache_cpu *c, ++ struct list_head *delayed_free) + { + #ifdef CONFIG_SLUB_CPU_PARTIAL + struct kmem_cache_node *n = NULL, *n2 = NULL; +@@ -2364,7 +2386,7 @@ static void unfreeze_partials(struct kme + discard_page = discard_page->next; + + stat(s, DEACTIVATE_EMPTY); +- discard_slab(s, page); ++ discard_slab_delayed(s, page, delayed_free); + stat(s, FREE_SLAB); + } + #endif /* CONFIG_SLUB_CPU_PARTIAL */ +@@ -2377,7 +2399,8 @@ static void unfreeze_partials(struct kme + * If we did not find a slot then simply move all the partials to the + * per node partial list. + */ +-static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain) ++static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain, ++ struct list_head *delayed_free) + { + #ifdef CONFIG_SLUB_CPU_PARTIAL + struct page *oldpage; +@@ -2400,7 +2423,8 @@ static void put_cpu_partial(struct kmem_ + * set to the per node partial list. + */ + local_irq_save(flags); +- unfreeze_partials(s, this_cpu_ptr(s->cpu_slab)); ++ unfreeze_partials(s, this_cpu_ptr(s->cpu_slab), ++ delayed_free); + local_irq_restore(flags); + oldpage = NULL; + pobjects = 0; +@@ -2422,17 +2446,18 @@ static void put_cpu_partial(struct kmem_ + unsigned long flags; + + local_irq_save(flags); +- unfreeze_partials(s, this_cpu_ptr(s->cpu_slab)); ++ unfreeze_partials(s, this_cpu_ptr(s->cpu_slab), delayed_free); + local_irq_restore(flags); + } + preempt_enable(); + #endif /* CONFIG_SLUB_CPU_PARTIAL */ + } + +-static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) ++static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c, ++ struct list_head *delayed_free) + { + stat(s, CPUSLAB_FLUSH); +- deactivate_slab(s, c->page, c->freelist, c); ++ deactivate_slab(s, c->page, c->freelist, c, delayed_free); + + c->tid = next_tid(c->tid); + } +@@ -2442,21 +2467,24 @@ static inline void flush_slab(struct kme + * + * Called from IPI handler with interrupts disabled. + */ +-static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu) ++static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu, ++ struct list_head *delayed_free) + { + struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu); + + if (c->page) +- flush_slab(s, c); ++ flush_slab(s, c, delayed_free); + +- unfreeze_partials(s, c); ++ unfreeze_partials(s, c, delayed_free); + } + + static void flush_cpu_slab(void *d) + { + struct kmem_cache *s = d; ++ LIST_HEAD(delayed_free); + +- __flush_cpu_slab(s, smp_processor_id()); ++ __flush_cpu_slab(s, smp_processor_id(), &delayed_free); ++ discard_delayed(&delayed_free); + } + + static bool has_cpu_slab(int cpu, void *info) +@@ -2480,13 +2508,15 @@ static int slub_cpu_dead(unsigned int cp + { + struct kmem_cache *s; + unsigned long flags; ++ LIST_HEAD(delayed_free); + + mutex_lock(&slab_mutex); + list_for_each_entry(s, &slab_caches, list) { + local_irq_save(flags); +- __flush_cpu_slab(s, cpu); ++ __flush_cpu_slab(s, cpu, &delayed_free); + local_irq_restore(flags); + } ++ discard_delayed(&delayed_free); + mutex_unlock(&slab_mutex); + return 0; + } +@@ -2570,7 +2600,8 @@ slab_out_of_memory(struct kmem_cache *s, + } + + static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags, +- int node, struct kmem_cache_cpu **pc) ++ int node, struct kmem_cache_cpu **pc, ++ struct list_head *delayed_free) + { + void *freelist; + struct kmem_cache_cpu *c = *pc; +@@ -2578,7 +2609,7 @@ static inline void *new_slab_objects(str + + WARN_ON_ONCE(s->ctor && (flags & __GFP_ZERO)); + +- freelist = get_partial(s, flags, node, c); ++ freelist = get_partial(s, flags, node, c, delayed_free); + + if (freelist) + return freelist; +@@ -2587,7 +2618,7 @@ static inline void *new_slab_objects(str + if (page) { + c = raw_cpu_ptr(s->cpu_slab); + if (c->page) +- flush_slab(s, c); ++ flush_slab(s, c, delayed_free); + + /* + * No other reference to the page yet so we can +@@ -2666,7 +2697,8 @@ static inline void *get_freelist(struct + * already disabled (which is the case for bulk allocation). + */ + static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, +- unsigned long addr, struct kmem_cache_cpu *c) ++ unsigned long addr, struct kmem_cache_cpu *c, ++ struct list_head *delayed_free) + { + void *freelist; + struct page *page; +@@ -2696,7 +2728,7 @@ static void *___slab_alloc(struct kmem_c + goto redo; + } else { + stat(s, ALLOC_NODE_MISMATCH); +- deactivate_slab(s, page, c->freelist, c); ++ deactivate_slab(s, page, c->freelist, c, delayed_free); + goto new_slab; + } + } +@@ -2707,7 +2739,7 @@ static void *___slab_alloc(struct kmem_c + * information when the page leaves the per-cpu allocator + */ + if (unlikely(!pfmemalloc_match(page, gfpflags))) { +- deactivate_slab(s, page, c->freelist, c); ++ deactivate_slab(s, page, c->freelist, c, delayed_free); + goto new_slab; + } + +@@ -2746,7 +2778,7 @@ static void *___slab_alloc(struct kmem_c + goto redo; + } + +- freelist = new_slab_objects(s, gfpflags, node, &c); ++ freelist = new_slab_objects(s, gfpflags, node, &c, delayed_free); + + if (unlikely(!freelist)) { + slab_out_of_memory(s, gfpflags, node); +@@ -2762,7 +2794,7 @@ static void *___slab_alloc(struct kmem_c + !alloc_debug_processing(s, page, freelist, addr)) + goto new_slab; /* Slab failed checks. Next slab needed */ + +- deactivate_slab(s, page, get_freepointer(s, freelist), c); ++ deactivate_slab(s, page, get_freepointer(s, freelist), c, delayed_free); + return freelist; + } + +@@ -2775,6 +2807,7 @@ static void *__slab_alloc(struct kmem_ca + { + void *p; + unsigned long flags; ++ LIST_HEAD(delayed_free); + + local_irq_save(flags); + #ifdef CONFIG_PREEMPTION +@@ -2786,8 +2819,9 @@ static void *__slab_alloc(struct kmem_ca + c = this_cpu_ptr(s->cpu_slab); + #endif + +- p = ___slab_alloc(s, gfpflags, node, addr, c); ++ p = ___slab_alloc(s, gfpflags, node, addr, c, &delayed_free); + local_irq_restore(flags); ++ discard_delayed(&delayed_free); + return p; + } + +@@ -3042,11 +3076,13 @@ static void __slab_free(struct kmem_cach + */ + stat(s, FREE_FROZEN); + } else if (new.frozen) { ++ LIST_HEAD(delayed_free); + /* + * If we just froze the page then put it onto the + * per cpu partial list. + */ +- put_cpu_partial(s, page, 1); ++ put_cpu_partial(s, page, 1, &delayed_free); ++ discard_delayed(&delayed_free); + stat(s, CPU_PARTIAL_FREE); + } + +@@ -3290,6 +3326,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca + struct kmem_cache_cpu *c; + int i; + struct obj_cgroup *objcg = NULL; ++ LIST_HEAD(delayed_free); + + if (IS_ENABLED(CONFIG_PREEMPT_RT) && IS_ENABLED(CONFIG_DEBUG_ATOMIC_SLEEP)) + WARN_ON_ONCE(!preemptible() && +@@ -3325,7 +3362,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca + * of re-populating per CPU c->freelist + */ + p[i] = ___slab_alloc(s, flags, NUMA_NO_NODE, +- _RET_IP_, c); ++ _RET_IP_, c, &delayed_free); + if (unlikely(!p[i])) + goto error; + +@@ -3341,6 +3378,8 @@ int kmem_cache_alloc_bulk(struct kmem_ca + c->tid = next_tid(c->tid); + local_irq_enable(); + ++ discard_delayed(&delayed_free); ++ + /* Clear memory outside IRQ disabled fastpath loop */ + if (unlikely(slab_want_init_on_alloc(flags, s))) { + int j; +@@ -3354,6 +3393,7 @@ int kmem_cache_alloc_bulk(struct kmem_ca + return i; + error: + local_irq_enable(); ++ discard_delayed(&delayed_free); + slab_post_alloc_hook(s, objcg, flags, i, p); + __kmem_cache_free_bulk(s, i, p); + return 0; +@@ -4363,6 +4403,7 @@ static struct kmem_cache * __init bootst + int node; + struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT); + struct kmem_cache_node *n; ++ LIST_HEAD(delayed_free); + + memcpy(s, static_cache, kmem_cache->object_size); + +@@ -4371,7 +4412,8 @@ static struct kmem_cache * __init bootst + * up. Even if it weren't true, IRQs are not up so we couldn't fire + * IPIs around. + */ +- __flush_cpu_slab(s, smp_processor_id()); ++ __flush_cpu_slab(s, smp_processor_id(), &delayed_free); ++ discard_delayed(&delayed_free); + for_each_kmem_cache_node(s, node, n) { + struct page *p; + diff --git a/kernel/patches-5.11.x-rt/0124-0005-mm-slub-Move-flush_cpu_slab-invocations-__free_slab-.patch b/kernel/patches-5.11.x-rt/0124-0005-mm-slub-Move-flush_cpu_slab-invocations-__free_slab-.patch new file mode 100644 index 000000000..ee3c56338 --- /dev/null +++ b/kernel/patches-5.11.x-rt/0124-0005-mm-slub-Move-flush_cpu_slab-invocations-__free_slab-.patch @@ -0,0 +1,114 @@ +From: Sebastian Andrzej Siewior +Date: Fri, 26 Feb 2021 17:11:55 +0100 +Subject: [PATCH 5/8] mm: slub: Move flush_cpu_slab() invocations __free_slab() + invocations out of IRQ context + +flush_all() flushes a specific SLAB cache on each CPU (where the cache +is present). The discard_delayed()/__free_slab() invocation happens +within IPI handler and is problematic for PREEMPT_RT. + +The flush operation is not a frequent operation or a hot path. The +per-CPU flush operation can be moved to within a workqueue. + +Signed-off-by: Sebastian Andrzej Siewior +--- + mm/slub.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++-------- + 1 file changed, 52 insertions(+), 8 deletions(-) + +--- a/mm/slub.c ++++ b/mm/slub.c +@@ -2478,26 +2478,70 @@ static inline void __flush_cpu_slab(stru + unfreeze_partials(s, c, delayed_free); + } + +-static void flush_cpu_slab(void *d) ++struct slub_flush_work { ++ struct work_struct work; ++ struct kmem_cache *s; ++ bool skip; ++}; ++ ++static void flush_cpu_slab(struct work_struct *w) + { +- struct kmem_cache *s = d; ++ struct slub_flush_work *sfw; + LIST_HEAD(delayed_free); + +- __flush_cpu_slab(s, smp_processor_id(), &delayed_free); ++ sfw = container_of(w, struct slub_flush_work, work); ++ ++ local_irq_disable(); ++ __flush_cpu_slab(sfw->s, smp_processor_id(), &delayed_free); ++ local_irq_enable(); ++ + discard_delayed(&delayed_free); + } + +-static bool has_cpu_slab(int cpu, void *info) ++static bool has_cpu_slab(int cpu, struct kmem_cache *s) + { +- struct kmem_cache *s = info; + struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu); + + return c->page || slub_percpu_partial(c); + } + ++static DEFINE_MUTEX(flush_lock); ++static DEFINE_PER_CPU(struct slub_flush_work, slub_flush); ++ ++static void flush_all_locked(struct kmem_cache *s) ++{ ++ struct slub_flush_work *sfw; ++ unsigned int cpu; ++ ++ mutex_lock(&flush_lock); ++ ++ for_each_online_cpu(cpu) { ++ sfw = &per_cpu(slub_flush, cpu); ++ if (!has_cpu_slab(cpu, s)) { ++ sfw->skip = true; ++ continue; ++ } ++ INIT_WORK(&sfw->work, flush_cpu_slab); ++ sfw->skip = false; ++ sfw->s = s; ++ schedule_work_on(cpu, &sfw->work); ++ } ++ ++ for_each_online_cpu(cpu) { ++ sfw = &per_cpu(slub_flush, cpu); ++ if (sfw->skip) ++ continue; ++ flush_work(&sfw->work); ++ } ++ ++ mutex_unlock(&flush_lock); ++} ++ + static void flush_all(struct kmem_cache *s) + { +- on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1); ++ cpus_read_lock(); ++ flush_all_locked(s); ++ cpus_read_unlock(); + } + + /* +@@ -3979,7 +4023,7 @@ int __kmem_cache_shutdown(struct kmem_ca + int node; + struct kmem_cache_node *n; + +- flush_all(s); ++ flush_all_locked(s); + /* Attempt to free all objects */ + for_each_kmem_cache_node(s, node, n) { + free_partial(s, n); +@@ -4219,7 +4263,7 @@ int __kmem_cache_shrink(struct kmem_cach + unsigned long flags; + int ret = 0; + +- flush_all(s); ++ flush_all_locked(s); + for_each_kmem_cache_node(s, node, n) { + INIT_LIST_HEAD(&discard); + for (i = 0; i < SHRINK_PROMOTE_MAX; i++) diff --git a/kernel/patches-5.11.x-rt/0125-0006-mm-slub-Don-t-resize-the-location-tracking-cache-on-.patch b/kernel/patches-5.11.x-rt/0125-0006-mm-slub-Don-t-resize-the-location-tracking-cache-on-.patch new file mode 100644 index 000000000..a65073370 --- /dev/null +++ b/kernel/patches-5.11.x-rt/0125-0006-mm-slub-Don-t-resize-the-location-tracking-cache-on-.patch @@ -0,0 +1,29 @@ +From: Sebastian Andrzej Siewior +Date: Fri, 26 Feb 2021 17:26:04 +0100 +Subject: [PATCH 6/8] mm: slub: Don't resize the location tracking cache on + PREEMPT_RT + +The location tracking cache has a size of a page and is resized if its +current size is too small. +This allocation happens with disabled interrupts and can't happen on +PREEMPT_RT. +Should one page be too small, then we have to allocate more at the +beginning. The only downside is that less callers will be visible. + +Signed-off-by: Sebastian Andrzej Siewior +--- + mm/slub.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/mm/slub.c ++++ b/mm/slub.c +@@ -4737,6 +4737,9 @@ static int alloc_loc_track(struct loc_tr + struct location *l; + int order; + ++ if (IS_ENABLED(CONFIG_PREEMPT_RT) && flags == GFP_ATOMIC) ++ return 0; ++ + order = get_order(sizeof(struct location) * max); + + l = (void *)__get_free_pages(flags, order); diff --git a/kernel/patches-5.11.x-rt/0126-0007-mm-page_alloc-Use-migrate_disable-in-drain_local_pag.patch b/kernel/patches-5.11.x-rt/0126-0007-mm-page_alloc-Use-migrate_disable-in-drain_local_pag.patch new file mode 100644 index 000000000..378ea2257 --- /dev/null +++ b/kernel/patches-5.11.x-rt/0126-0007-mm-page_alloc-Use-migrate_disable-in-drain_local_pag.patch @@ -0,0 +1,32 @@ +From: Sebastian Andrzej Siewior +Date: Thu, 2 Jul 2020 14:27:23 +0200 +Subject: [PATCH 7/8] mm: page_alloc: Use migrate_disable() in + drain_local_pages_wq() + +drain_local_pages_wq() disables preemption to avoid CPU migration during +CPU hotplug and can't use cpus_read_lock(). + +Using migrate_disable() works here, too. The scheduler won't take the +CPU offline until the task left the migrate-disable section. + +Use migrate_disable() in drain_local_pages_wq(). + +Signed-off-by: Sebastian Andrzej Siewior +--- + mm/page_alloc.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -3033,9 +3033,9 @@ static void drain_local_pages_wq(struct + * cpu which is allright but we also have to make sure to not move to + * a different one. + */ +- preempt_disable(); ++ migrate_disable(); + drain_local_pages(drain->zone); +- preempt_enable(); ++ migrate_enable(); + } + + /* diff --git a/kernel/patches-5.11.x-rt/0127-0008-mm-page_alloc-Use-a-local_lock-instead-of-explicit-l.patch b/kernel/patches-5.11.x-rt/0127-0008-mm-page_alloc-Use-a-local_lock-instead-of-explicit-l.patch new file mode 100644 index 000000000..085507793 --- /dev/null +++ b/kernel/patches-5.11.x-rt/0127-0008-mm-page_alloc-Use-a-local_lock-instead-of-explicit-l.patch @@ -0,0 +1,204 @@ +From: Ingo Molnar +Date: Fri, 3 Jul 2009 08:29:37 -0500 +Subject: [PATCH 8/8] mm: page_alloc: Use a local_lock instead of explicit + local_irq_save(). + +The page-allocator disables interrupts for a few reasons: +- Decouple interrupt the irqsave operation from spin_lock() so it can be + extended over the actual lock region and cover other areas. Areas like + counters increments where the preemptible version can be avoided. + +- Access to the per-CPU pcp from struct zone. + +Replace the irqsave with a local-lock. The counters are expected to be +always modified with disabled preemption and no access from interrupt +context. + +Contains fixes from: + Peter Zijlstra + Thomas Gleixner + +Signed-off-by: Ingo Molnar +Signed-off-by: Thomas Gleixner +Signed-off-by: Sebastian Andrzej Siewior +--- + mm/page_alloc.c | 49 ++++++++++++++++++++++++++++++------------------- + 1 file changed, 30 insertions(+), 19 deletions(-) + +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -62,6 +62,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -363,6 +364,13 @@ EXPORT_SYMBOL(nr_online_nodes); + + int page_group_by_mobility_disabled __read_mostly; + ++struct pa_lock { ++ local_lock_t l; ++}; ++static DEFINE_PER_CPU(struct pa_lock, pa_lock) = { ++ .l = INIT_LOCAL_LOCK(l), ++}; ++ + #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT + /* + * During boot we initialize deferred pages on-demand, as needed, but once +@@ -1537,11 +1545,11 @@ static void __free_pages_ok(struct page + return; + + migratetype = get_pfnblock_migratetype(page, pfn); +- local_irq_save(flags); ++ local_lock_irqsave(&pa_lock.l, flags); + __count_vm_events(PGFREE, 1 << order); + free_one_page(page_zone(page), page, pfn, order, migratetype, + fpi_flags); +- local_irq_restore(flags); ++ local_unlock_irqrestore(&pa_lock.l, flags); + } + + void __free_pages_core(struct page *page, unsigned int order) +@@ -2957,12 +2965,12 @@ void drain_zone_pages(struct zone *zone, + unsigned long flags; + int to_drain, batch; + +- local_irq_save(flags); ++ local_lock_irqsave(&pa_lock.l, flags); + batch = READ_ONCE(pcp->batch); + to_drain = min(pcp->count, batch); + if (to_drain > 0) + free_pcppages_bulk(zone, to_drain, pcp); +- local_irq_restore(flags); ++ local_unlock_irqrestore(&pa_lock.l, flags); + } + #endif + +@@ -2979,13 +2987,13 @@ static void drain_pages_zone(unsigned in + struct per_cpu_pageset *pset; + struct per_cpu_pages *pcp; + +- local_irq_save(flags); ++ local_lock_irqsave(&pa_lock.l, flags); + pset = per_cpu_ptr(zone->pageset, cpu); + + pcp = &pset->pcp; + if (pcp->count) + free_pcppages_bulk(zone, pcp->count, pcp); +- local_irq_restore(flags); ++ local_unlock_irqrestore(&pa_lock.l, flags); + } + + /* +@@ -3248,9 +3256,9 @@ void free_unref_page(struct page *page) + if (!free_unref_page_prepare(page, pfn)) + return; + +- local_irq_save(flags); ++ local_lock_irqsave(&pa_lock.l, flags); + free_unref_page_commit(page, pfn); +- local_irq_restore(flags); ++ local_unlock_irqrestore(&pa_lock.l, flags); + } + + /* +@@ -3270,7 +3278,7 @@ void free_unref_page_list(struct list_he + set_page_private(page, pfn); + } + +- local_irq_save(flags); ++ local_lock_irqsave(&pa_lock.l, flags); + list_for_each_entry_safe(page, next, list, lru) { + unsigned long pfn = page_private(page); + +@@ -3283,12 +3291,12 @@ void free_unref_page_list(struct list_he + * a large list of pages to free. + */ + if (++batch_count == SWAP_CLUSTER_MAX) { +- local_irq_restore(flags); ++ local_unlock_irqrestore(&pa_lock.l, flags); + batch_count = 0; +- local_irq_save(flags); ++ local_lock_irqsave(&pa_lock.l, flags); + } + } +- local_irq_restore(flags); ++ local_unlock_irqrestore(&pa_lock.l, flags); + } + + /* +@@ -3443,7 +3451,7 @@ static struct page *rmqueue_pcplist(stru + struct page *page; + unsigned long flags; + +- local_irq_save(flags); ++ local_lock_irqsave(&pa_lock.l, flags); + pcp = &this_cpu_ptr(zone->pageset)->pcp; + list = &pcp->lists[migratetype]; + page = __rmqueue_pcplist(zone, migratetype, alloc_flags, pcp, list); +@@ -3451,7 +3459,7 @@ static struct page *rmqueue_pcplist(stru + __count_zid_vm_events(PGALLOC, page_zonenum(page), 1); + zone_statistics(preferred_zone, zone); + } +- local_irq_restore(flags); ++ local_unlock_irqrestore(&pa_lock.l, flags); + return page; + } + +@@ -3485,7 +3493,9 @@ struct page *rmqueue(struct zone *prefer + * allocate greater than order-1 page units with __GFP_NOFAIL. + */ + WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1)); +- spin_lock_irqsave(&zone->lock, flags); ++ ++ local_lock_irqsave(&pa_lock.l, flags); ++ spin_lock(&zone->lock); + + do { + page = NULL; +@@ -3506,12 +3516,13 @@ struct page *rmqueue(struct zone *prefer + spin_unlock(&zone->lock); + if (!page) + goto failed; ++ + __mod_zone_freepage_state(zone, -(1 << order), + get_pcppage_migratetype(page)); + + __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order); + zone_statistics(preferred_zone, zone); +- local_irq_restore(flags); ++ local_unlock_irqrestore(&pa_lock.l, flags); + + out: + /* Separate test+clear to avoid unnecessary atomics */ +@@ -3524,7 +3535,7 @@ struct page *rmqueue(struct zone *prefer + return page; + + failed: +- local_irq_restore(flags); ++ local_unlock_irqrestore(&pa_lock.l, flags); + return NULL; + } + +@@ -8828,7 +8839,7 @@ void zone_pcp_reset(struct zone *zone) + struct per_cpu_pageset *pset; + + /* avoid races with drain_pages() */ +- local_irq_save(flags); ++ local_lock_irqsave(&pa_lock.l, flags); + if (zone->pageset != &boot_pageset) { + for_each_online_cpu(cpu) { + pset = per_cpu_ptr(zone->pageset, cpu); +@@ -8837,7 +8848,7 @@ void zone_pcp_reset(struct zone *zone) + free_percpu(zone->pageset); + zone->pageset = &boot_pageset; + } +- local_irq_restore(flags); ++ local_unlock_irqrestore(&pa_lock.l, flags); + } + + #ifdef CONFIG_MEMORY_HOTREMOVE diff --git a/kernel/patches-5.11.x-rt/0128-mm-slub-Don-t-enable-partial-CPU-caches-on-PREEMPT_R.patch b/kernel/patches-5.11.x-rt/0128-mm-slub-Don-t-enable-partial-CPU-caches-on-PREEMPT_R.patch new file mode 100644 index 000000000..843501536 --- /dev/null +++ b/kernel/patches-5.11.x-rt/0128-mm-slub-Don-t-enable-partial-CPU-caches-on-PREEMPT_R.patch @@ -0,0 +1,26 @@ +From: Sebastian Andrzej Siewior +Date: Tue, 2 Mar 2021 18:58:04 +0100 +Subject: [PATCH] mm: slub: Don't enable partial CPU caches on PREEMPT_RT by + default + +SLUB's partial CPU caches lead to higher latencies in a hackbench +benchmark. + +Don't enable partial CPU caches by default on PREEMPT_RT. + +Signed-off-by: Sebastian Andrzej Siewior +--- + init/Kconfig | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/init/Kconfig ++++ b/init/Kconfig +@@ -1983,7 +1983,7 @@ config SHUFFLE_PAGE_ALLOCATOR + Say Y if unsure. + + config SLUB_CPU_PARTIAL +- default y ++ default y if !PREEMPT_RT + depends on SLUB && SMP + bool "SLUB per cpu partial cache" + help diff --git a/kernel/patches-5.11.x-rt/0129-mm-memcontrol-Provide-a-local_lock-for-per-CPU-memcg.patch b/kernel/patches-5.11.x-rt/0129-mm-memcontrol-Provide-a-local_lock-for-per-CPU-memcg.patch new file mode 100644 index 000000000..4e4769210 --- /dev/null +++ b/kernel/patches-5.11.x-rt/0129-mm-memcontrol-Provide-a-local_lock-for-per-CPU-memcg.patch @@ -0,0 +1,136 @@ +From: Sebastian Andrzej Siewior +Date: Tue, 18 Aug 2020 10:30:00 +0200 +Subject: [PATCH] mm: memcontrol: Provide a local_lock for per-CPU memcg_stock + +The interrupts are disabled to ensure CPU-local access to the per-CPU +variable `memcg_stock'. +As the code inside the interrupt disabled section acquires regular +spinlocks, which are converted to 'sleeping' spinlocks on a PREEMPT_RT +kernel, this conflicts with the RT semantics. + +Convert it to a local_lock which allows RT kernels to substitute them with +a real per CPU lock. On non RT kernels this maps to local_irq_save() as +before, but provides also lockdep coverage of the critical region. +No functional change. + +Signed-off-by: Sebastian Andrzej Siewior +--- + mm/memcontrol.c | 31 ++++++++++++++++++------------- + 1 file changed, 18 insertions(+), 13 deletions(-) + +--- a/mm/memcontrol.c ++++ b/mm/memcontrol.c +@@ -2234,6 +2234,7 @@ void unlock_page_memcg(struct page *page + EXPORT_SYMBOL(unlock_page_memcg); + + struct memcg_stock_pcp { ++ local_lock_t lock; + struct mem_cgroup *cached; /* this never be root cgroup */ + unsigned int nr_pages; + +@@ -2285,7 +2286,7 @@ static bool consume_stock(struct mem_cgr + if (nr_pages > MEMCG_CHARGE_BATCH) + return ret; + +- local_irq_save(flags); ++ local_lock_irqsave(&memcg_stock.lock, flags); + + stock = this_cpu_ptr(&memcg_stock); + if (memcg == stock->cached && stock->nr_pages >= nr_pages) { +@@ -2293,7 +2294,7 @@ static bool consume_stock(struct mem_cgr + ret = true; + } + +- local_irq_restore(flags); ++ local_unlock_irqrestore(&memcg_stock.lock, flags); + + return ret; + } +@@ -2328,14 +2329,14 @@ static void drain_local_stock(struct wor + * The only protection from memory hotplug vs. drain_stock races is + * that we always operate on local CPU stock here with IRQ disabled + */ +- local_irq_save(flags); ++ local_lock_irqsave(&memcg_stock.lock, flags); + + stock = this_cpu_ptr(&memcg_stock); + drain_obj_stock(stock); + drain_stock(stock); + clear_bit(FLUSHING_CACHED_CHARGE, &stock->flags); + +- local_irq_restore(flags); ++ local_unlock_irqrestore(&memcg_stock.lock, flags); + } + + /* +@@ -2347,7 +2348,7 @@ static void refill_stock(struct mem_cgro + struct memcg_stock_pcp *stock; + unsigned long flags; + +- local_irq_save(flags); ++ local_lock_irqsave(&memcg_stock.lock, flags); + + stock = this_cpu_ptr(&memcg_stock); + if (stock->cached != memcg) { /* reset if necessary */ +@@ -2360,7 +2361,7 @@ static void refill_stock(struct mem_cgro + if (stock->nr_pages > MEMCG_CHARGE_BATCH) + drain_stock(stock); + +- local_irq_restore(flags); ++ local_unlock_irqrestore(&memcg_stock.lock, flags); + } + + /* +@@ -3167,7 +3168,7 @@ static bool consume_obj_stock(struct obj + unsigned long flags; + bool ret = false; + +- local_irq_save(flags); ++ local_lock_irqsave(&memcg_stock.lock, flags); + + stock = this_cpu_ptr(&memcg_stock); + if (objcg == stock->cached_objcg && stock->nr_bytes >= nr_bytes) { +@@ -3175,7 +3176,7 @@ static bool consume_obj_stock(struct obj + ret = true; + } + +- local_irq_restore(flags); ++ local_unlock_irqrestore(&memcg_stock.lock, flags); + + return ret; + } +@@ -3234,7 +3235,7 @@ static void refill_obj_stock(struct obj_ + struct memcg_stock_pcp *stock; + unsigned long flags; + +- local_irq_save(flags); ++ local_lock_irqsave(&memcg_stock.lock, flags); + + stock = this_cpu_ptr(&memcg_stock); + if (stock->cached_objcg != objcg) { /* reset if necessary */ +@@ -3248,7 +3249,7 @@ static void refill_obj_stock(struct obj_ + if (stock->nr_bytes > PAGE_SIZE) + drain_obj_stock(stock); + +- local_irq_restore(flags); ++ local_unlock_irqrestore(&memcg_stock.lock, flags); + } + + int obj_cgroup_charge(struct obj_cgroup *objcg, gfp_t gfp, size_t size) +@@ -7089,9 +7090,13 @@ static int __init mem_cgroup_init(void) + cpuhp_setup_state_nocalls(CPUHP_MM_MEMCQ_DEAD, "mm/memctrl:dead", NULL, + memcg_hotplug_cpu_dead); + +- for_each_possible_cpu(cpu) +- INIT_WORK(&per_cpu_ptr(&memcg_stock, cpu)->work, +- drain_local_stock); ++ for_each_possible_cpu(cpu) { ++ struct memcg_stock_pcp *stock; ++ ++ stock = per_cpu_ptr(&memcg_stock, cpu); ++ INIT_WORK(&stock->work, drain_local_stock); ++ local_lock_init(&stock->lock); ++ } + + for_each_node(node) { + struct mem_cgroup_tree_per_node *rtpn; diff --git a/kernel/patches-5.4.x-rt/0126-mm-memcontrol-Don-t-call-schedule_work_on-in-preempt.patch b/kernel/patches-5.11.x-rt/0130-mm-memcontrol-Don-t-call-schedule_work_on-in-preempt.patch similarity index 95% rename from kernel/patches-5.4.x-rt/0126-mm-memcontrol-Don-t-call-schedule_work_on-in-preempt.patch rename to kernel/patches-5.11.x-rt/0130-mm-memcontrol-Don-t-call-schedule_work_on-in-preempt.patch index 6b2492ffb..fe94964b4 100644 --- a/kernel/patches-5.4.x-rt/0126-mm-memcontrol-Don-t-call-schedule_work_on-in-preempt.patch +++ b/kernel/patches-5.11.x-rt/0130-mm-memcontrol-Don-t-call-schedule_work_on-in-preempt.patch @@ -48,7 +48,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/mm/memcontrol.c +++ b/mm/memcontrol.c -@@ -2267,7 +2267,7 @@ static void drain_all_stock(struct mem_c +@@ -2381,7 +2381,7 @@ static void drain_all_stock(struct mem_c * as well as workers from this path always operate on the local * per-cpu data. CPU up doesn't touch memcg_stock at all. */ @@ -57,7 +57,7 @@ Signed-off-by: Sebastian Andrzej Siewior for_each_online_cpu(cpu) { struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu); struct mem_cgroup *memcg; -@@ -2288,7 +2288,7 @@ static void drain_all_stock(struct mem_c +@@ -2404,7 +2404,7 @@ static void drain_all_stock(struct mem_c schedule_work_on(cpu, &stock->work); } } diff --git a/kernel/patches-5.11.x-rt/0131-mm-memcontrol-do_not_disable_irq.patch b/kernel/patches-5.11.x-rt/0131-mm-memcontrol-do_not_disable_irq.patch new file mode 100644 index 000000000..cf9653b39 --- /dev/null +++ b/kernel/patches-5.11.x-rt/0131-mm-memcontrol-do_not_disable_irq.patch @@ -0,0 +1,115 @@ +From: Sebastian Andrzej Siewior +Subject: mm/memcontrol: Replace local_irq_disable with local locks +Date: Wed, 28 Jan 2015 17:14:16 +0100 + +There are a few local_irq_disable() which then take sleeping locks. This +patch converts them local locks. + +[bigeasy: Move unlock after memcg_check_events() in mem_cgroup_swapout(), + pointed out by Matt Fleming ] +Signed-off-by: Sebastian Andrzej Siewior +--- + mm/memcontrol.c | 29 +++++++++++++++++++++-------- + 1 file changed, 21 insertions(+), 8 deletions(-) + +--- a/mm/memcontrol.c ++++ b/mm/memcontrol.c +@@ -66,6 +66,7 @@ + #include + #include + #include "slab.h" ++#include + + #include + +@@ -96,6 +97,13 @@ bool cgroup_memory_noswap __read_mostly; + static DECLARE_WAIT_QUEUE_HEAD(memcg_cgwb_frn_waitq); + #endif + ++struct event_lock { ++ local_lock_t l; ++}; ++static DEFINE_PER_CPU(struct event_lock, event_lock) = { ++ .l = INIT_LOCAL_LOCK(l), ++}; ++ + /* Whether legacy memory+swap accounting is active */ + static bool do_memsw_account(void) + { +@@ -5696,12 +5704,12 @@ static int mem_cgroup_move_account(struc + + ret = 0; + +- local_irq_disable(); ++ local_lock_irq(&event_lock.l); + mem_cgroup_charge_statistics(to, page, nr_pages); + memcg_check_events(to, page); + mem_cgroup_charge_statistics(from, page, -nr_pages); + memcg_check_events(from, page); +- local_irq_enable(); ++ local_unlock_irq(&event_lock.l); + out_unlock: + unlock_page(page); + out: +@@ -6752,10 +6760,10 @@ int mem_cgroup_charge(struct page *page, + css_get(&memcg->css); + commit_charge(page, memcg); + +- local_irq_disable(); ++ local_lock_irq(&event_lock.l); + mem_cgroup_charge_statistics(memcg, page, nr_pages); + memcg_check_events(memcg, page); +- local_irq_enable(); ++ local_unlock_irq(&event_lock.l); + + /* + * Cgroup1's unified memory+swap counter has been charged with the +@@ -6811,11 +6819,11 @@ static void uncharge_batch(const struct + memcg_oom_recover(ug->memcg); + } + +- local_irq_save(flags); ++ local_lock_irqsave(&event_lock.l, flags); + __count_memcg_events(ug->memcg, PGPGOUT, ug->pgpgout); + __this_cpu_add(ug->memcg->vmstats_percpu->nr_page_events, ug->nr_pages); + memcg_check_events(ug->memcg, ug->dummy_page); +- local_irq_restore(flags); ++ local_unlock_irqrestore(&event_lock.l, flags); + + /* drop reference from uncharge_page */ + css_put(&ug->memcg->css); +@@ -6967,10 +6975,10 @@ void mem_cgroup_migrate(struct page *old + css_get(&memcg->css); + commit_charge(newpage, memcg); + +- local_irq_save(flags); ++ local_lock_irqsave(&event_lock.l, flags); + mem_cgroup_charge_statistics(memcg, newpage, nr_pages); + memcg_check_events(memcg, newpage); +- local_irq_restore(flags); ++ local_unlock_irqrestore(&event_lock.l, flags); + } + + DEFINE_STATIC_KEY_FALSE(memcg_sockets_enabled_key); +@@ -7145,6 +7153,7 @@ void mem_cgroup_swapout(struct page *pag + struct mem_cgroup *memcg, *swap_memcg; + unsigned int nr_entries; + unsigned short oldid; ++ unsigned long flags; + + VM_BUG_ON_PAGE(PageLRU(page), page); + VM_BUG_ON_PAGE(page_count(page), page); +@@ -7193,9 +7202,13 @@ void mem_cgroup_swapout(struct page *pag + * important here to have the interrupts disabled because it is the + * only synchronisation we have for updating the per-CPU variables. + */ ++ local_lock_irqsave(&event_lock.l, flags); ++#ifndef CONFIG_PREEMPT_RT + VM_BUG_ON(!irqs_disabled()); ++#endif + mem_cgroup_charge_statistics(memcg, page, -nr_entries); + memcg_check_events(memcg, page); ++ local_unlock_irqrestore(&event_lock.l, flags); + + css_put(&memcg->css); + } diff --git a/kernel/patches-5.4.x-rt/0129-mm_zsmalloc_copy_with_get_cpu_var_and_locking.patch b/kernel/patches-5.11.x-rt/0132-mm_zsmalloc_copy_with_get_cpu_var_and_locking.patch similarity index 75% rename from kernel/patches-5.4.x-rt/0129-mm_zsmalloc_copy_with_get_cpu_var_and_locking.patch rename to kernel/patches-5.11.x-rt/0132-mm_zsmalloc_copy_with_get_cpu_var_and_locking.patch index dace00b89..0fd7ef8de 100644 --- a/kernel/patches-5.4.x-rt/0129-mm_zsmalloc_copy_with_get_cpu_var_and_locking.patch +++ b/kernel/patches-5.11.x-rt/0132-mm_zsmalloc_copy_with_get_cpu_var_and_locking.patch @@ -12,8 +12,8 @@ Signed-off-by: Mike Galbraith fixed the size magic] Signed-off-by: Sebastian Andrzej Siewior --- - mm/zsmalloc.c | 80 +++++++++++++++++++++++++++++++++++++++++++++++++++++----- - 1 file changed, 74 insertions(+), 6 deletions(-) + mm/zsmalloc.c | 85 +++++++++++++++++++++++++++++++++++++++++++++++++++++----- + 1 file changed, 79 insertions(+), 6 deletions(-) --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -21,15 +21,12 @@ Signed-off-by: Sebastian Andrzej Siewior #include #include #include -+#include ++#include #define ZSPAGE_MAGIC 0x58 -@@ -74,9 +75,22 @@ - */ - #define ZS_MAX_ZSPAGE_ORDER 2 - #define ZS_MAX_PAGES_PER_ZSPAGE (_AC(1, UL) << ZS_MAX_ZSPAGE_ORDER) -- +@@ -77,6 +78,20 @@ + #define ZS_HANDLE_SIZE (sizeof(unsigned long)) +#ifdef CONFIG_PREEMPT_RT @@ -48,8 +45,16 @@ Signed-off-by: Sebastian Andrzej Siewior + /* * Object location (, ) is encoded as - * as single (unsigned long) handle value. -@@ -326,7 +340,7 @@ static void SetZsPageMovable(struct zs_p + * a single (unsigned long) handle value. +@@ -293,6 +308,7 @@ struct zspage { + }; + + struct mapping_area { ++ local_lock_t lock; + char *vm_buf; /* copy buffer for objects that span pages */ + char *vm_addr; /* address of kmap_atomic()'ed pages */ + enum zs_mapmode vm_mm; /* mapping mode */ +@@ -322,7 +338,7 @@ static void SetZsPageMovable(struct zs_p static int create_cache(struct zs_pool *pool) { @@ -58,7 +63,7 @@ Signed-off-by: Sebastian Andrzej Siewior 0, 0, NULL); if (!pool->handle_cachep) return 1; -@@ -350,10 +364,27 @@ static void destroy_cache(struct zs_pool +@@ -346,9 +362,26 @@ static void destroy_cache(struct zs_pool static unsigned long cache_alloc_handle(struct zs_pool *pool, gfp_t gfp) { @@ -76,19 +81,18 @@ Signed-off-by: Sebastian Andrzej Siewior + } +#endif + return (unsigned long)p; - } - ++} ++ +#ifdef CONFIG_PREEMPT_RT +static struct zsmalloc_handle *zs_get_pure_handle(unsigned long handle) +{ + return (void *)(handle &~((1 << OBJ_TAG_BITS) - 1)); -+} + } +#endif -+ + static void cache_free_handle(struct zs_pool *pool, unsigned long handle) { - kmem_cache_free(pool->handle_cachep, (void *)handle); -@@ -372,12 +403,18 @@ static void cache_free_zspage(struct zs_ +@@ -368,12 +401,18 @@ static void cache_free_zspage(struct zs_ static void record_obj(unsigned long handle, unsigned long obj) { @@ -107,15 +111,19 @@ Signed-off-by: Sebastian Andrzej Siewior } /* zpool driver */ -@@ -460,6 +497,7 @@ MODULE_ALIAS("zpool-zsmalloc"); +@@ -455,7 +494,10 @@ MODULE_ALIAS("zpool-zsmalloc"); + #endif /* CONFIG_ZPOOL */ /* per-cpu VM mapping areas for zspage accesses that cross page boundaries */ - static DEFINE_PER_CPU(struct mapping_area, zs_map_area); -+static DEFINE_LOCAL_IRQ_LOCK(zs_map_area_lock); +-static DEFINE_PER_CPU(struct mapping_area, zs_map_area); ++static DEFINE_PER_CPU(struct mapping_area, zs_map_area) = { ++ /* XXX remove this and use a spin_lock_t in pin_tag() */ ++ .lock = INIT_LOCAL_LOCK(lock), ++}; static bool is_zspage_isolated(struct zspage *zspage) { -@@ -869,7 +907,13 @@ static unsigned long location_to_obj(str +@@ -862,7 +904,13 @@ static unsigned long location_to_obj(str static unsigned long handle_to_obj(unsigned long handle) { @@ -129,7 +137,7 @@ Signed-off-by: Sebastian Andrzej Siewior } static unsigned long obj_to_head(struct page *page, void *obj) -@@ -883,22 +927,46 @@ static unsigned long obj_to_head(struct +@@ -876,22 +924,46 @@ static unsigned long obj_to_head(struct static inline int testpin_tag(unsigned long handle) { @@ -153,7 +161,7 @@ Signed-off-by: Sebastian Andrzej Siewior +#endif } - static void pin_tag(unsigned long handle) + static void pin_tag(unsigned long handle) __acquires(bitlock) { +#ifdef CONFIG_PREEMPT_RT + struct zsmalloc_handle *zh = zs_get_pure_handle(handle); @@ -164,7 +172,7 @@ Signed-off-by: Sebastian Andrzej Siewior +#endif } - static void unpin_tag(unsigned long handle) + static void unpin_tag(unsigned long handle) __releases(bitlock) { +#ifdef CONFIG_PREEMPT_RT + struct zsmalloc_handle *zh = zs_get_pure_handle(handle); @@ -176,21 +184,22 @@ Signed-off-by: Sebastian Andrzej Siewior } static void reset_page(struct page *page) -@@ -1324,7 +1392,7 @@ void *zs_map_object(struct zs_pool *pool +@@ -1275,7 +1347,8 @@ void *zs_map_object(struct zs_pool *pool class = pool->size_class[class_idx]; off = (class->size * obj_idx) & ~PAGE_MASK; - area = &get_cpu_var(zs_map_area); -+ area = &get_locked_var(zs_map_area_lock, zs_map_area); ++ local_lock(&zs_map_area.lock); ++ area = this_cpu_ptr(&zs_map_area); area->vm_mm = mm; if (off + class->size <= PAGE_SIZE) { /* this object is contained entirely within a page */ -@@ -1378,7 +1446,7 @@ void zs_unmap_object(struct zs_pool *poo +@@ -1329,7 +1402,7 @@ void zs_unmap_object(struct zs_pool *poo __zs_unmap_object(area, pages, off, class->size); } - put_cpu_var(zs_map_area); -+ put_locked_var(zs_map_area_lock, zs_map_area); ++ local_unlock(&zs_map_area.lock); migrate_read_unlock(zspage); unpin_tag(handle); diff --git a/kernel/patches-5.4.x-rt/0132-x86-kvm-require-const-tsc-for-rt.patch b/kernel/patches-5.11.x-rt/0133-x86-kvm-require-const-tsc-for-rt.patch similarity index 93% rename from kernel/patches-5.4.x-rt/0132-x86-kvm-require-const-tsc-for-rt.patch rename to kernel/patches-5.11.x-rt/0133-x86-kvm-require-const-tsc-for-rt.patch index 07d05ad5e..e3e007961 100644 --- a/kernel/patches-5.4.x-rt/0132-x86-kvm-require-const-tsc-for-rt.patch +++ b/kernel/patches-5.11.x-rt/0133-x86-kvm-require-const-tsc-for-rt.patch @@ -14,7 +14,7 @@ Signed-off-by: Thomas Gleixner --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c -@@ -7257,6 +7257,14 @@ int kvm_arch_init(void *opaque) +@@ -7908,6 +7908,14 @@ int kvm_arch_init(void *opaque) goto out; } diff --git a/kernel/patches-5.4.x-rt/0134-wait.h-include-atomic.h.patch b/kernel/patches-5.11.x-rt/0134-wait.h-include-atomic.h.patch similarity index 100% rename from kernel/patches-5.4.x-rt/0134-wait.h-include-atomic.h.patch rename to kernel/patches-5.11.x-rt/0134-wait.h-include-atomic.h.patch diff --git a/kernel/patches-5.4.x-rt/0140-sched-limit-nr-migrate.patch b/kernel/patches-5.11.x-rt/0135-sched-limit-nr-migrate.patch similarity index 92% rename from kernel/patches-5.4.x-rt/0140-sched-limit-nr-migrate.patch rename to kernel/patches-5.11.x-rt/0135-sched-limit-nr-migrate.patch index 801d10a95..3d0c9aeca 100644 --- a/kernel/patches-5.4.x-rt/0140-sched-limit-nr-migrate.patch +++ b/kernel/patches-5.11.x-rt/0135-sched-limit-nr-migrate.patch @@ -12,7 +12,7 @@ Signed-off-by: Thomas Gleixner --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -56,7 +56,11 @@ const_debug unsigned int sysctl_sched_fe +@@ -64,7 +64,11 @@ const_debug unsigned int sysctl_sched_fe * Number of tasks to iterate in a single balance run. * Limited because this is done with IRQs disabled. */ diff --git a/kernel/patches-5.4.x-rt/0141-sched-mmdrop-delayed.patch b/kernel/patches-5.11.x-rt/0136-sched-mmdrop-delayed.patch similarity index 66% rename from kernel/patches-5.4.x-rt/0141-sched-mmdrop-delayed.patch rename to kernel/patches-5.11.x-rt/0136-sched-mmdrop-delayed.patch index 3614fc617..af93b6295 100644 --- a/kernel/patches-5.4.x-rt/0141-sched-mmdrop-delayed.patch +++ b/kernel/patches-5.11.x-rt/0136-sched-mmdrop-delayed.patch @@ -10,8 +10,8 @@ Signed-off-by: Thomas Gleixner include/linux/mm_types.h | 4 ++++ include/linux/sched/mm.h | 11 +++++++++++ kernel/fork.c | 13 +++++++++++++ - kernel/sched/core.c | 18 ++++++++++++++++-- - 4 files changed, 44 insertions(+), 2 deletions(-) + kernel/sched/core.c | 7 ++++++- + 4 files changed, 34 insertions(+), 1 deletion(-) --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -22,8 +22,8 @@ Signed-off-by: Thomas Gleixner +#include #include #include - -@@ -520,6 +521,9 @@ struct mm_struct { + #include +@@ -553,6 +554,9 @@ struct mm_struct { bool tlb_flush_batched; #endif struct uprobes_state uprobes_state; @@ -47,15 +47,15 @@ Signed-off-by: Thomas Gleixner + call_rcu(&mm->delayed_drop, __mmdrop_delayed); +} +#else -+# define mmdrop_delayed(mm) mmdrop(mm) ++# define mmdrop_delayed(mm) mmdrop(mm) +#endif + - /* - * This has to be called after a get_task_mm()/mmget_not_zero() - * followed by taking the mmap_sem for writing before modifying the + /** + * mmget() - Pin the address space associated with a &struct mm_struct. + * @mm: The address space to pin. --- a/kernel/fork.c +++ b/kernel/fork.c -@@ -696,6 +696,19 @@ void __mmdrop(struct mm_struct *mm) +@@ -689,6 +689,19 @@ void __mmdrop(struct mm_struct *mm) } EXPORT_SYMBOL_GPL(__mmdrop); @@ -77,7 +77,7 @@ Signed-off-by: Thomas Gleixner struct mm_struct *mm; --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -3242,9 +3242,13 @@ static struct rq *finish_task_switch(str +@@ -4272,9 +4272,13 @@ static struct rq *finish_task_switch(str * provided by mmdrop(), * - a sync_core for SYNC_CORE. */ @@ -92,36 +92,11 @@ Signed-off-by: Thomas Gleixner } if (unlikely(prev_state == TASK_DEAD)) { if (prev->sched_class->task_dead) -@@ -6178,6 +6182,8 @@ void sched_setnuma(struct task_struct *p +@@ -7291,6 +7295,7 @@ void sched_setnuma(struct task_struct *p #endif /* CONFIG_NUMA_BALANCING */ #ifdef CONFIG_HOTPLUG_CPU -+static DEFINE_PER_CPU(struct mm_struct *, idle_last_mm); + /* * Ensure that the idle task is using init_mm right before its CPU goes * offline. -@@ -6193,7 +6199,11 @@ void idle_task_exit(void) - current->active_mm = &init_mm; - finish_arch_post_lock_switch(); - } -- mmdrop(mm); -+ /* -+ * Defer the cleanup to an alive cpu. On RT we can neither -+ * call mmdrop() nor mmdrop_delayed() from here. -+ */ -+ per_cpu(idle_last_mm, smp_processor_id()) = mm; - } - - /* -@@ -6499,6 +6509,10 @@ int sched_cpu_dying(unsigned int cpu) - update_max_interval(); - nohz_balance_exit_idle(rq); - hrtick_clear(rq); -+ if (per_cpu(idle_last_mm, cpu)) { -+ mmdrop_delayed(per_cpu(idle_last_mm, cpu)); -+ per_cpu(idle_last_mm, cpu) = NULL; -+ } - return 0; - } - #endif diff --git a/kernel/patches-5.4.x-rt/0142-kernel-sched-move-stack-kprobe-clean-up-to-__put_tas.patch b/kernel/patches-5.11.x-rt/0137-kernel-sched-move-stack-kprobe-clean-up-to-__put_tas.patch similarity index 87% rename from kernel/patches-5.4.x-rt/0142-kernel-sched-move-stack-kprobe-clean-up-to-__put_tas.patch rename to kernel/patches-5.11.x-rt/0137-kernel-sched-move-stack-kprobe-clean-up-to-__put_tas.patch index 1b9330f73..f3af65395 100644 --- a/kernel/patches-5.4.x-rt/0142-kernel-sched-move-stack-kprobe-clean-up-to-__put_tas.patch +++ b/kernel/patches-5.11.x-rt/0137-kernel-sched-move-stack-kprobe-clean-up-to-__put_tas.patch @@ -19,15 +19,15 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/kernel/fork.c +++ b/kernel/fork.c -@@ -43,6 +43,7 @@ - #include +@@ -42,6 +42,7 @@ + #include #include #include +#include #include #include #include -@@ -289,7 +290,7 @@ static inline void free_thread_stack(str +@@ -288,7 +289,7 @@ static inline void free_thread_stack(str return; } @@ -36,7 +36,7 @@ Signed-off-by: Sebastian Andrzej Siewior return; } #endif -@@ -750,6 +751,15 @@ void __put_task_struct(struct task_struc +@@ -743,6 +744,15 @@ void __put_task_struct(struct task_struc WARN_ON(refcount_read(&tsk->usage)); WARN_ON(tsk == current); @@ -49,12 +49,12 @@ Signed-off-by: Sebastian Andrzej Siewior + /* Task is done with its stack. */ + put_task_stack(tsk); + + io_uring_free(tsk); cgroup_free(tsk); task_numa_free(tsk, true); - security_task_free(tsk); --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -3254,15 +3254,6 @@ static struct rq *finish_task_switch(str +@@ -4284,15 +4284,6 @@ static struct rq *finish_task_switch(str if (prev->sched_class->task_dead) prev->sched_class->task_dead(prev); diff --git a/kernel/patches-5.4.x-rt/0144-sched-might-sleep-do-not-account-rcu-depth.patch b/kernel/patches-5.11.x-rt/0138-sched-might-sleep-do-not-account-rcu-depth.patch similarity index 89% rename from kernel/patches-5.4.x-rt/0144-sched-might-sleep-do-not-account-rcu-depth.patch rename to kernel/patches-5.11.x-rt/0138-sched-might-sleep-do-not-account-rcu-depth.patch index fd8836978..3b80a729d 100644 --- a/kernel/patches-5.4.x-rt/0144-sched-might-sleep-do-not-account-rcu-depth.patch +++ b/kernel/patches-5.11.x-rt/0138-sched-might-sleep-do-not-account-rcu-depth.patch @@ -13,7 +13,7 @@ Signed-off-by: Thomas Gleixner --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h -@@ -51,6 +51,11 @@ void __rcu_read_unlock(void); +@@ -52,6 +52,11 @@ void __rcu_read_unlock(void); * types of kernel builds, the rcu_read_lock() nesting depth is unknowable. */ #define rcu_preempt_depth() (current->rcu_read_lock_nesting) @@ -25,7 +25,7 @@ Signed-off-by: Thomas Gleixner #else /* #ifdef CONFIG_PREEMPT_RCU */ -@@ -69,6 +74,8 @@ static inline int rcu_preempt_depth(void +@@ -77,6 +82,8 @@ static inline int rcu_preempt_depth(void return 0; } @@ -36,7 +36,7 @@ Signed-off-by: Thomas Gleixner /* Internal to kernel */ --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -6771,7 +6771,7 @@ void __init sched_init(void) +@@ -7958,7 +7958,7 @@ void __init sched_init(void) #ifdef CONFIG_DEBUG_ATOMIC_SLEEP static inline int preempt_count_equals(int preempt_offset) { diff --git a/kernel/patches-5.4.x-rt/0145-sched-disable-ttwu-queue.patch b/kernel/patches-5.11.x-rt/0139-sched-disable-ttwu-queue.patch similarity index 100% rename from kernel/patches-5.4.x-rt/0145-sched-disable-ttwu-queue.patch rename to kernel/patches-5.11.x-rt/0139-sched-disable-ttwu-queue.patch diff --git a/kernel/patches-5.4.x-rt/0147-softirq-preempt-fix-3-re.patch b/kernel/patches-5.11.x-rt/0140-softirq-preempt-fix-3-re.patch similarity index 79% rename from kernel/patches-5.4.x-rt/0147-softirq-preempt-fix-3-re.patch rename to kernel/patches-5.11.x-rt/0140-softirq-preempt-fix-3-re.patch index a688e57b7..ec00a132b 100644 --- a/kernel/patches-5.4.x-rt/0147-softirq-preempt-fix-3-re.patch +++ b/kernel/patches-5.11.x-rt/0140-softirq-preempt-fix-3-re.patch @@ -14,33 +14,14 @@ Reported-by: Carsten Emde Signed-off-by: Thomas Gleixner --- - block/blk-softirq.c | 2 ++ include/linux/preempt.h | 3 +++ lib/irq_poll.c | 5 +++++ net/core/dev.c | 7 +++++++ - 4 files changed, 17 insertions(+) + 3 files changed, 15 insertions(+) ---- a/block/blk-softirq.c -+++ b/block/blk-softirq.c -@@ -87,6 +87,7 @@ static int blk_softirq_cpu_dead(unsigned - this_cpu_ptr(&blk_cpu_done)); - raise_softirq_irqoff(BLOCK_SOFTIRQ); - local_irq_enable(); -+ preempt_check_resched_rt(); - - return 0; - } -@@ -138,6 +139,7 @@ void __blk_complete_request(struct reque - goto do_local; - - local_irq_restore(flags); -+ preempt_check_resched_rt(); - } - - static __init int blk_softirq_init(void) --- a/include/linux/preempt.h +++ b/include/linux/preempt.h -@@ -189,8 +189,10 @@ do { \ +@@ -190,8 +190,10 @@ do { \ #ifdef CONFIG_PREEMPT_RT # define preempt_enable_no_resched() sched_preempt_enable_no_resched() @@ -51,7 +32,7 @@ Signed-off-by: Thomas Gleixner #endif #define preemptible() (preempt_count() == 0 && !irqs_disabled()) -@@ -261,6 +263,7 @@ do { \ +@@ -262,6 +264,7 @@ do { \ #define preempt_disable_notrace() barrier() #define preempt_enable_no_resched_notrace() barrier() #define preempt_enable_notrace() barrier() @@ -103,7 +84,7 @@ Signed-off-by: Thomas Gleixner } --- a/net/core/dev.c +++ b/net/core/dev.c -@@ -2674,6 +2674,7 @@ static void __netif_reschedule(struct Qd +@@ -3041,6 +3041,7 @@ static void __netif_reschedule(struct Qd sd->output_queue_tailp = &q->next_sched; raise_softirq_irqoff(NET_TX_SOFTIRQ); local_irq_restore(flags); @@ -111,7 +92,7 @@ Signed-off-by: Thomas Gleixner } void __netif_schedule(struct Qdisc *q) -@@ -2736,6 +2737,7 @@ void __dev_kfree_skb_irq(struct sk_buff +@@ -3103,6 +3104,7 @@ void __dev_kfree_skb_irq(struct sk_buff __this_cpu_write(softnet_data.completion_queue, skb); raise_softirq_irqoff(NET_TX_SOFTIRQ); local_irq_restore(flags); @@ -119,7 +100,7 @@ Signed-off-by: Thomas Gleixner } EXPORT_SYMBOL(__dev_kfree_skb_irq); -@@ -4198,6 +4200,7 @@ static int enqueue_to_backlog(struct sk_ +@@ -4570,6 +4572,7 @@ static int enqueue_to_backlog(struct sk_ rps_unlock(sd); local_irq_restore(flags); @@ -127,7 +108,7 @@ Signed-off-by: Thomas Gleixner atomic_long_inc(&skb->dev->rx_dropped); kfree_skb(skb); -@@ -5822,12 +5825,14 @@ static void net_rps_action_and_irq_enabl +@@ -6288,12 +6291,14 @@ static void net_rps_action_and_irq_enabl sd->rps_ipi_list = NULL; local_irq_enable(); @@ -142,7 +123,7 @@ Signed-off-by: Thomas Gleixner } static bool sd_has_rps_ipi_waiting(struct softnet_data *sd) -@@ -5905,6 +5910,7 @@ void __napi_schedule(struct napi_struct +@@ -6371,6 +6376,7 @@ void __napi_schedule(struct napi_struct local_irq_save(flags); ____napi_schedule(this_cpu_ptr(&softnet_data), n); local_irq_restore(flags); @@ -150,7 +131,7 @@ Signed-off-by: Thomas Gleixner } EXPORT_SYMBOL(__napi_schedule); -@@ -9857,6 +9863,7 @@ static int dev_cpu_dead(unsigned int old +@@ -10980,6 +10986,7 @@ static int dev_cpu_dead(unsigned int old raise_softirq_irqoff(NET_TX_SOFTIRQ); local_irq_enable(); diff --git a/kernel/patches-5.4.x-rt/0148-softirq-disable-softirq-stacks-for-rt.patch b/kernel/patches-5.11.x-rt/0141-softirq-disable-softirq-stacks-for-rt.patch similarity index 82% rename from kernel/patches-5.4.x-rt/0148-softirq-disable-softirq-stacks-for-rt.patch rename to kernel/patches-5.11.x-rt/0141-softirq-disable-softirq-stacks-for-rt.patch index fa2157ca9..4c06e1e27 100644 --- a/kernel/patches-5.4.x-rt/0148-softirq-disable-softirq-stacks-for-rt.patch +++ b/kernel/patches-5.11.x-rt/0141-softirq-disable-softirq-stacks-for-rt.patch @@ -12,14 +12,14 @@ Signed-off-by: Thomas Gleixner arch/powerpc/kernel/misc_64.S | 2 ++ arch/sh/kernel/irq.c | 2 ++ arch/sparc/kernel/irq_64.c | 2 ++ - arch/x86/entry/entry_64.S | 2 ++ arch/x86/kernel/irq_32.c | 2 ++ + arch/x86/kernel/irq_64.c | 2 ++ include/linux/interrupt.h | 2 +- 8 files changed, 15 insertions(+), 1 deletion(-) --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c -@@ -679,10 +679,12 @@ void *mcheckirq_ctx[NR_CPUS] __read_most +@@ -753,10 +753,12 @@ void *mcheckirq_ctx[NR_CPUS] __read_most void *softirq_ctx[NR_CPUS] __read_mostly; void *hardirq_ctx[NR_CPUS] __read_mostly; @@ -34,7 +34,7 @@ Signed-off-by: Thomas Gleixner { --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S -@@ -37,6 +37,7 @@ +@@ -31,6 +31,7 @@ * We store the saved ksp_limit in the unused part * of the STACK_FRAME_OVERHEAD */ @@ -42,7 +42,7 @@ Signed-off-by: Thomas Gleixner _GLOBAL(call_do_softirq) mflr r0 stw r0,4(r1) -@@ -52,6 +53,7 @@ +@@ -46,6 +47,7 @@ stw r10,THREAD+KSP_LIMIT(r2) mtlr r0 blr @@ -104,24 +104,6 @@ Signed-off-by: Thomas Gleixner #ifdef CONFIG_HOTPLUG_CPU void fixup_irqs(void) ---- a/arch/x86/entry/entry_64.S -+++ b/arch/x86/entry/entry_64.S -@@ -1074,6 +1074,7 @@ EXPORT_SYMBOL(native_load_gs_index) - jmp 2b - .previous - -+#ifndef CONFIG_PREEMPT_RT - /* Call softirq on interrupt stack. Interrupts are off. */ - ENTRY(do_softirq_own_stack) - pushq %rbp -@@ -1084,6 +1085,7 @@ ENTRY(do_softirq_own_stack) - leaveq - ret - ENDPROC(do_softirq_own_stack) -+#endif - - #ifdef CONFIG_XEN_PV - idtentry hypervisor_callback xen_do_hypervisor_callback has_error_code=0 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -131,6 +131,7 @@ int irq_init_percpu_irqstack(unsigned in @@ -138,11 +120,23 @@ Signed-off-by: Thomas Gleixner } +#endif - void handle_irq(struct irq_desc *desc, struct pt_regs *regs) + void __handle_irq(struct irq_desc *desc, struct pt_regs *regs) { +--- a/arch/x86/kernel/irq_64.c ++++ b/arch/x86/kernel/irq_64.c +@@ -72,7 +72,9 @@ int irq_init_percpu_irqstack(unsigned in + return map_irq_stack(cpu); + } + ++#ifndef CONFIG_PREEMPT_RT + void do_softirq_own_stack(void) + { + run_on_irqstack_cond(__do_softirq, NULL); + } ++#endif --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h -@@ -546,7 +546,7 @@ struct softirq_action +@@ -569,7 +569,7 @@ struct softirq_action asmlinkage void do_softirq(void); asmlinkage void __do_softirq(void); diff --git a/kernel/patches-5.4.x-rt/0149-net-core-use-local_bh_disable-in-netif_rx_ni.patch b/kernel/patches-5.11.x-rt/0142-net-core-use-local_bh_disable-in-netif_rx_ni.patch similarity index 94% rename from kernel/patches-5.4.x-rt/0149-net-core-use-local_bh_disable-in-netif_rx_ni.patch rename to kernel/patches-5.11.x-rt/0142-net-core-use-local_bh_disable-in-netif_rx_ni.patch index f1c9d303c..d3d47d8d4 100644 --- a/kernel/patches-5.4.x-rt/0149-net-core-use-local_bh_disable-in-netif_rx_ni.patch +++ b/kernel/patches-5.11.x-rt/0142-net-core-use-local_bh_disable-in-netif_rx_ni.patch @@ -18,7 +18,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/net/core/dev.c +++ b/net/core/dev.c -@@ -4471,11 +4471,9 @@ int netif_rx_ni(struct sk_buff *skb) +@@ -4844,11 +4844,9 @@ int netif_rx_ni(struct sk_buff *skb) trace_netif_rx_ni_entry(skb); diff --git a/kernel/patches-5.4.x-rt/0153-pid.h-include-atomic.h.patch b/kernel/patches-5.11.x-rt/0143-pid.h-include-atomic.h.patch similarity index 100% rename from kernel/patches-5.4.x-rt/0153-pid.h-include-atomic.h.patch rename to kernel/patches-5.11.x-rt/0143-pid.h-include-atomic.h.patch diff --git a/kernel/patches-5.4.x-rt/0174-ptrace-fix-ptrace-vs-tasklist_lock-race.patch b/kernel/patches-5.11.x-rt/0144-ptrace-fix-ptrace-vs-tasklist_lock-race.patch similarity index 90% rename from kernel/patches-5.4.x-rt/0174-ptrace-fix-ptrace-vs-tasklist_lock-race.patch rename to kernel/patches-5.11.x-rt/0144-ptrace-fix-ptrace-vs-tasklist_lock-race.patch index 3edd93d42..3e504968f 100644 --- a/kernel/patches-5.4.x-rt/0174-ptrace-fix-ptrace-vs-tasklist_lock-race.patch +++ b/kernel/patches-5.11.x-rt/0144-ptrace-fix-ptrace-vs-tasklist_lock-race.patch @@ -30,7 +30,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/include/linux/sched.h +++ b/include/linux/sched.h -@@ -107,12 +107,8 @@ struct task_group; +@@ -112,12 +112,8 @@ struct io_uring_task; __TASK_TRACED | EXIT_DEAD | EXIT_ZOMBIE | \ TASK_PARKED) @@ -40,10 +40,10 @@ Signed-off-by: Sebastian Andrzej Siewior -#define task_is_stopped_or_traced(task) ((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0) - - #define task_contributes_to_load(task) ((task->state & TASK_UNINTERRUPTIBLE) != 0 && \ - (task->flags & PF_FROZEN) == 0 && \ - (task->state & TASK_NOLOAD) == 0) -@@ -1772,6 +1768,51 @@ static inline int test_tsk_need_resched( + #ifdef CONFIG_DEBUG_ATOMIC_SLEEP + + /* +@@ -1881,6 +1877,51 @@ static inline int test_tsk_need_resched( return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED)); } @@ -115,7 +115,7 @@ Signed-off-by: Sebastian Andrzej Siewior spin_unlock_irq(&task->sighand->siglock); --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -1900,6 +1900,18 @@ int migrate_swap(struct task_struct *cur +@@ -2598,6 +2598,18 @@ int migrate_swap(struct task_struct *cur } #endif /* CONFIG_NUMA_BALANCING */ @@ -134,7 +134,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* * wait_task_inactive - wait for a thread to unschedule. * -@@ -1944,7 +1956,7 @@ unsigned long wait_task_inactive(struct +@@ -2642,7 +2654,7 @@ unsigned long wait_task_inactive(struct * is actually now running somewhere else! */ while (task_running(rq, p)) { @@ -143,7 +143,7 @@ Signed-off-by: Sebastian Andrzej Siewior return 0; cpu_relax(); } -@@ -1959,7 +1971,8 @@ unsigned long wait_task_inactive(struct +@@ -2657,7 +2669,8 @@ unsigned long wait_task_inactive(struct running = task_running(rq, p); queued = task_on_rq_queued(p); ncsw = 0; diff --git a/kernel/patches-5.11.x-rt/0145-ptrace-fix-ptrace_unfreeze_traced-race-with-rt-lock.patch b/kernel/patches-5.11.x-rt/0145-ptrace-fix-ptrace_unfreeze_traced-race-with-rt-lock.patch new file mode 100644 index 000000000..eab426719 --- /dev/null +++ b/kernel/patches-5.11.x-rt/0145-ptrace-fix-ptrace_unfreeze_traced-race-with-rt-lock.patch @@ -0,0 +1,57 @@ +From: Oleg Nesterov +Date: Tue, 3 Nov 2020 12:39:01 +0100 +Subject: [PATCH] ptrace: fix ptrace_unfreeze_traced() race with rt-lock + +The patch "ptrace: fix ptrace vs tasklist_lock race" changed +ptrace_freeze_traced() to take task->saved_state into account, but +ptrace_unfreeze_traced() has the same problem and needs a similar fix: +it should check/update both ->state and ->saved_state. + +Reported-by: Luis Claudio R. Goncalves +Fixes: "ptrace: fix ptrace vs tasklist_lock race" +Signed-off-by: Oleg Nesterov +Signed-off-by: Sebastian Andrzej Siewior +Cc: stable-rt@vger.kernel.org +--- + kernel/ptrace.c | 23 +++++++++++++++-------- + 1 file changed, 15 insertions(+), 8 deletions(-) + +--- a/kernel/ptrace.c ++++ b/kernel/ptrace.c +@@ -197,8 +197,8 @@ static bool ptrace_freeze_traced(struct + + static void ptrace_unfreeze_traced(struct task_struct *task) + { +- if (task->state != __TASK_TRACED) +- return; ++ unsigned long flags; ++ bool frozen = true; + + WARN_ON(!task->ptrace || task->parent != current); + +@@ -207,12 +207,19 @@ static void ptrace_unfreeze_traced(struc + * Recheck state under the lock to close this race. + */ + spin_lock_irq(&task->sighand->siglock); +- if (task->state == __TASK_TRACED) { +- if (__fatal_signal_pending(task)) +- wake_up_state(task, __TASK_TRACED); +- else +- task->state = TASK_TRACED; +- } ++ ++ raw_spin_lock_irqsave(&task->pi_lock, flags); ++ if (task->state == __TASK_TRACED) ++ task->state = TASK_TRACED; ++ else if (task->saved_state == __TASK_TRACED) ++ task->saved_state = TASK_TRACED; ++ else ++ frozen = false; ++ raw_spin_unlock_irqrestore(&task->pi_lock, flags); ++ ++ if (frozen && __fatal_signal_pending(task)) ++ wake_up_state(task, __TASK_TRACED); ++ + spin_unlock_irq(&task->sighand->siglock); + } + diff --git a/kernel/patches-5.4.x-rt/0184-locking-Make-spinlock_t-and-rwlock_t-a-RCU-section-o.patch b/kernel/patches-5.11.x-rt/0146-locking-Make-spinlock_t-and-rwlock_t-a-RCU-section-o.patch similarity index 53% rename from kernel/patches-5.4.x-rt/0184-locking-Make-spinlock_t-and-rwlock_t-a-RCU-section-o.patch rename to kernel/patches-5.11.x-rt/0146-locking-Make-spinlock_t-and-rwlock_t-a-RCU-section-o.patch index 6da2b5e10..84c66fb9d 100644 --- a/kernel/patches-5.4.x-rt/0184-locking-Make-spinlock_t-and-rwlock_t-a-RCU-section-o.patch +++ b/kernel/patches-5.11.x-rt/0146-locking-Make-spinlock_t-and-rwlock_t-a-RCU-section-o.patch @@ -16,101 +16,101 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c -@@ -1141,6 +1141,7 @@ void __sched rt_spin_lock_slowunlock(str - void __lockfunc rt_spin_lock(spinlock_t *lock) +@@ -1136,6 +1136,7 @@ void __lockfunc rt_spin_lock(spinlock_t { - sleeping_lock_inc(); -+ rcu_read_lock(); - migrate_disable(); spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock); -@@ -1156,6 +1157,7 @@ void __lockfunc __rt_spin_lock(struct rt - void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass) - { - sleeping_lock_inc(); + rcu_read_lock(); migrate_disable(); + } + EXPORT_SYMBOL(rt_spin_lock); +@@ -1150,6 +1151,7 @@ void __lockfunc rt_spin_lock_nested(spin + { spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_); rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock); -@@ -1169,6 +1171,7 @@ void __lockfunc rt_spin_unlock(spinlock_ - spin_release(&lock->dep_map, 1, _RET_IP_); - rt_spin_lock_fastunlock(&lock->lock, rt_spin_lock_slowunlock); ++ rcu_read_lock(); + migrate_disable(); + } + EXPORT_SYMBOL(rt_spin_lock_nested); +@@ -1159,6 +1161,7 @@ void __lockfunc rt_spin_lock_nest_lock(s + { + spin_acquire_nest(&lock->dep_map, 0, 0, nest_lock, _RET_IP_); + rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock); ++ rcu_read_lock(); + migrate_disable(); + } + EXPORT_SYMBOL(rt_spin_lock_nest_lock); +@@ -1169,6 +1172,7 @@ void __lockfunc rt_spin_unlock(spinlock_ + /* NOTE: we always pass in '1' for nested, for simplicity */ + spin_release(&lock->dep_map, _RET_IP_); migrate_enable(); + rcu_read_unlock(); - sleeping_lock_dec(); + rt_spin_lock_fastunlock(&lock->lock, rt_spin_lock_slowunlock); } EXPORT_SYMBOL(rt_spin_unlock); -@@ -1200,6 +1203,7 @@ int __lockfunc rt_spin_trylock(spinlock_ +@@ -1198,6 +1202,7 @@ int __lockfunc rt_spin_trylock(spinlock_ ret = __rt_mutex_trylock(&lock->lock); if (ret) { spin_acquire(&lock->dep_map, 0, 1, _RET_IP_); -+ rcu_read_lock(); - } else { - migrate_enable(); - sleeping_lock_dec(); -@@ -1216,6 +1220,7 @@ int __lockfunc rt_spin_trylock_bh(spinlo - ret = __rt_mutex_trylock(&lock->lock); - if (ret) { - sleeping_lock_inc(); + rcu_read_lock(); migrate_disable(); - spin_acquire(&lock->dep_map, 0, 1, _RET_IP_); - } else -@@ -1232,6 +1237,7 @@ int __lockfunc rt_spin_trylock_irqsave(s - ret = __rt_mutex_trylock(&lock->lock); - if (ret) { - sleeping_lock_inc(); -+ rcu_read_lock(); - migrate_disable(); - spin_acquire(&lock->dep_map, 0, 1, _RET_IP_); } + return ret; +@@ -1212,6 +1217,7 @@ int __lockfunc rt_spin_trylock_bh(spinlo + ret = __rt_mutex_trylock(&lock->lock); + if (ret) { + spin_acquire(&lock->dep_map, 0, 1, _RET_IP_); ++ rcu_read_lock(); + migrate_disable(); + } else { + local_bh_enable(); --- a/kernel/locking/rwlock-rt.c +++ b/kernel/locking/rwlock-rt.c -@@ -310,6 +310,7 @@ int __lockfunc rt_read_trylock(rwlock_t - ret = do_read_rt_trylock(rwlock); +@@ -270,6 +270,7 @@ int __lockfunc rt_read_trylock(rwlock_t + ret = __read_rt_trylock(rwlock); if (ret) { rwlock_acquire_read(&rwlock->dep_map, 0, 1, _RET_IP_); + rcu_read_lock(); - } else { - migrate_enable(); - sleeping_lock_dec(); -@@ -327,6 +328,7 @@ int __lockfunc rt_write_trylock(rwlock_t - ret = do_write_rt_trylock(rwlock); + migrate_disable(); + } + return ret; +@@ -283,6 +284,7 @@ int __lockfunc rt_write_trylock(rwlock_t + ret = __write_rt_trylock(rwlock); if (ret) { rwlock_acquire(&rwlock->dep_map, 0, 1, _RET_IP_); + rcu_read_lock(); - } else { - migrate_enable(); - sleeping_lock_dec(); -@@ -338,6 +340,7 @@ EXPORT_SYMBOL(rt_write_trylock); - void __lockfunc rt_read_lock(rwlock_t *rwlock) + migrate_disable(); + } + return ret; +@@ -293,6 +295,7 @@ void __lockfunc rt_read_lock(rwlock_t *r { - sleeping_lock_inc(); -+ rcu_read_lock(); - migrate_disable(); rwlock_acquire_read(&rwlock->dep_map, 0, 0, _RET_IP_); - do_read_rt_lock(rwlock); -@@ -347,6 +350,7 @@ EXPORT_SYMBOL(rt_read_lock); - void __lockfunc rt_write_lock(rwlock_t *rwlock) - { - sleeping_lock_inc(); + __read_rt_lock(rwlock); + rcu_read_lock(); migrate_disable(); + } + EXPORT_SYMBOL(rt_read_lock); +@@ -301,6 +304,7 @@ void __lockfunc rt_write_lock(rwlock_t * + { rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_); - do_write_rt_lock(rwlock); -@@ -358,6 +362,7 @@ void __lockfunc rt_read_unlock(rwlock_t - rwlock_release(&rwlock->dep_map, 1, _RET_IP_); - do_read_rt_unlock(rwlock); + __write_rt_lock(rwlock); ++ rcu_read_lock(); + migrate_disable(); + } + EXPORT_SYMBOL(rt_write_lock); +@@ -309,6 +313,7 @@ void __lockfunc rt_read_unlock(rwlock_t + { + rwlock_release(&rwlock->dep_map, _RET_IP_); migrate_enable(); + rcu_read_unlock(); - sleeping_lock_dec(); + __read_rt_unlock(rwlock); } EXPORT_SYMBOL(rt_read_unlock); -@@ -367,6 +372,7 @@ void __lockfunc rt_write_unlock(rwlock_t - rwlock_release(&rwlock->dep_map, 1, _RET_IP_); - do_write_rt_unlock(rwlock); +@@ -317,6 +322,7 @@ void __lockfunc rt_write_unlock(rwlock_t + { + rwlock_release(&rwlock->dep_map, _RET_IP_); migrate_enable(); + rcu_read_unlock(); - sleeping_lock_dec(); + __write_rt_unlock(rwlock); } EXPORT_SYMBOL(rt_write_unlock); diff --git a/kernel/patches-5.4.x-rt/0188-rcutorture-Avoid-problematic-critical-section-nestin.patch b/kernel/patches-5.11.x-rt/0147-rcutorture-Avoid-problematic-critical-section-nestin.patch similarity index 93% rename from kernel/patches-5.4.x-rt/0188-rcutorture-Avoid-problematic-critical-section-nestin.patch rename to kernel/patches-5.11.x-rt/0147-rcutorture-Avoid-problematic-critical-section-nestin.patch index 27047f052..c908297d7 100644 --- a/kernel/patches-5.4.x-rt/0188-rcutorture-Avoid-problematic-critical-section-nestin.patch +++ b/kernel/patches-5.11.x-rt/0147-rcutorture-Avoid-problematic-critical-section-nestin.patch @@ -37,12 +37,12 @@ happening elsewhere. Signed-off-by: Scott Wood Signed-off-by: Sebastian Andrzej Siewior --- - kernel/rcu/rcutorture.c | 96 +++++++++++++++++++++++++++++++++++++++++------- - 1 file changed, 82 insertions(+), 14 deletions(-) + kernel/rcu/rcutorture.c | 97 +++++++++++++++++++++++++++++++++++++++++------- + 1 file changed, 83 insertions(+), 14 deletions(-) --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c -@@ -60,10 +60,13 @@ MODULE_AUTHOR("Paul E. McKenney #define RCUTORTURE_RDR_MAX_LOOPS 0x7 /* Maximum reader extensions. */ /* Must be power of two minus one. */ #define RCUTORTURE_RDR_MAX_SEGS (RCUTORTURE_RDR_MAX_LOOPS + 3) -@@ -1152,31 +1155,52 @@ static void rcutorture_one_extend(int *r +@@ -1250,31 +1253,53 @@ static void rcutorture_one_extend(int *r WARN_ON_ONCE((idxold >> RCUTORTURE_RDR_SHIFT) > 1); rtrsp->rt_readstate = newstate; @@ -115,10 +115,11 @@ Signed-off-by: Sebastian Andrzej Siewior + local_bh_enable(); + if (statesold & RCUTORTURE_RDR_RBH) + rcu_read_unlock_bh(); - if (statesold & RCUTORTURE_RDR_RCU) - cur_ops->readunlock(idxold >> RCUTORTURE_RDR_SHIFT); ++ + if (statesold & RCUTORTURE_RDR_RCU) { + bool lockit = !statesnew && !(torture_random(trsp) & 0xffff); -@@ -1212,6 +1236,12 @@ rcutorture_extend_mask(int oldmask, stru +@@ -1317,6 +1342,12 @@ rcutorture_extend_mask(int oldmask, stru int mask = rcutorture_extend_mask_max(); unsigned long randmask1 = torture_random(trsp) >> 8; unsigned long randmask2 = randmask1 >> 3; @@ -131,7 +132,7 @@ Signed-off-by: Sebastian Andrzej Siewior WARN_ON_ONCE(mask >> RCUTORTURE_RDR_SHIFT); /* Mostly only one bit (need preemption!), sometimes lots of bits. */ -@@ -1219,11 +1249,49 @@ rcutorture_extend_mask(int oldmask, stru +@@ -1324,11 +1355,49 @@ rcutorture_extend_mask(int oldmask, stru mask = mask & randmask2; else mask = mask & (1 << (randmask2 % RCUTORTURE_RDR_NBITS)); diff --git a/kernel/patches-5.4.x-rt/0191-mm-vmalloc-use-get-cpu-light.patch b/kernel/patches-5.11.x-rt/0148-mm-vmalloc-use-get-cpu-light.patch similarity index 79% rename from kernel/patches-5.4.x-rt/0191-mm-vmalloc-use-get-cpu-light.patch rename to kernel/patches-5.11.x-rt/0148-mm-vmalloc-use-get-cpu-light.patch index 4743a474f..a8e244ba7 100644 --- a/kernel/patches-5.4.x-rt/0191-mm-vmalloc-use-get-cpu-light.patch +++ b/kernel/patches-5.11.x-rt/0148-mm-vmalloc-use-get-cpu-light.patch @@ -12,7 +12,7 @@ Signed-off-by: Thomas Gleixner --- a/mm/vmalloc.c +++ b/mm/vmalloc.c -@@ -1462,7 +1462,7 @@ static void *new_vmap_block(unsigned int +@@ -1558,7 +1558,7 @@ static void *new_vmap_block(unsigned int struct vmap_block *vb; struct vmap_area *va; unsigned long vb_idx; @@ -21,9 +21,9 @@ Signed-off-by: Thomas Gleixner void *vaddr; node = numa_node_id(); -@@ -1505,11 +1505,12 @@ static void *new_vmap_block(unsigned int - BUG_ON(err); - radix_tree_preload_end(); +@@ -1595,11 +1595,12 @@ static void *new_vmap_block(unsigned int + return ERR_PTR(err); + } - vbq = &get_cpu_var(vmap_block_queue); + cpu = get_cpu_light(); @@ -36,7 +36,7 @@ Signed-off-by: Thomas Gleixner return vaddr; } -@@ -1578,6 +1579,7 @@ static void *vb_alloc(unsigned long size +@@ -1664,6 +1665,7 @@ static void *vb_alloc(unsigned long size struct vmap_block *vb; void *vaddr = NULL; unsigned int order; @@ -44,7 +44,7 @@ Signed-off-by: Thomas Gleixner BUG_ON(offset_in_page(size)); BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC); -@@ -1592,7 +1594,8 @@ static void *vb_alloc(unsigned long size +@@ -1678,7 +1680,8 @@ static void *vb_alloc(unsigned long size order = get_order(size); rcu_read_lock(); @@ -54,7 +54,7 @@ Signed-off-by: Thomas Gleixner list_for_each_entry_rcu(vb, &vbq->free, free_list) { unsigned long pages_off; -@@ -1615,7 +1618,7 @@ static void *vb_alloc(unsigned long size +@@ -1701,7 +1704,7 @@ static void *vb_alloc(unsigned long size break; } diff --git a/kernel/patches-5.4.x-rt/0192-block-mq-drop-preempt-disable.patch b/kernel/patches-5.11.x-rt/0149-block-mq-drop-preempt-disable.patch similarity index 58% rename from kernel/patches-5.4.x-rt/0192-block-mq-drop-preempt-disable.patch rename to kernel/patches-5.11.x-rt/0149-block-mq-drop-preempt-disable.patch index 49b453489..596839691 100644 --- a/kernel/patches-5.4.x-rt/0192-block-mq-drop-preempt-disable.patch +++ b/kernel/patches-5.11.x-rt/0149-block-mq-drop-preempt-disable.patch @@ -8,30 +8,12 @@ It seems to be enough to replace it with get_cpu_light() and migrate_disable(). Signed-off-by: Sebastian Andrzej Siewior --- - block/blk-mq.c | 10 +++++----- - 1 file changed, 5 insertions(+), 5 deletions(-) + block/blk-mq.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) --- a/block/blk-mq.c +++ b/block/blk-mq.c -@@ -611,7 +611,7 @@ static void __blk_mq_complete_request(st - return; - } - -- cpu = get_cpu(); -+ cpu = get_cpu_light(); - if (!test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags)) - shared = cpus_share_cache(cpu, ctx->cpu); - -@@ -623,7 +623,7 @@ static void __blk_mq_complete_request(st - } else { - q->mq_ops->complete(rq); - } -- put_cpu(); -+ put_cpu_light(); - } - - static void hctx_unlock(struct blk_mq_hw_ctx *hctx, int srcu_idx) -@@ -1466,14 +1466,14 @@ static void __blk_mq_delay_run_hw_queue( +@@ -1560,14 +1560,14 @@ static void __blk_mq_delay_run_hw_queue( return; if (!async && !(hctx->flags & BLK_MQ_F_BLOCKING)) { diff --git a/kernel/patches-5.4.x-rt/0194-md-raid5-percpu-handling-rt-aware.patch b/kernel/patches-5.11.x-rt/0150-md-raid5-percpu-handling-rt-aware.patch similarity index 88% rename from kernel/patches-5.4.x-rt/0194-md-raid5-percpu-handling-rt-aware.patch rename to kernel/patches-5.11.x-rt/0150-md-raid5-percpu-handling-rt-aware.patch index 0cda18d49..f5ac87307 100644 --- a/kernel/patches-5.4.x-rt/0194-md-raid5-percpu-handling-rt-aware.patch +++ b/kernel/patches-5.11.x-rt/0150-md-raid5-percpu-handling-rt-aware.patch @@ -20,7 +20,7 @@ Tested-by: Udo van den Heuvel --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c -@@ -2058,8 +2058,9 @@ static void raid_run_ops(struct stripe_h +@@ -2216,8 +2216,9 @@ static void raid_run_ops(struct stripe_h struct raid5_percpu *percpu; unsigned long cpu; @@ -31,7 +31,7 @@ Tested-by: Udo van den Heuvel if (test_bit(STRIPE_OP_BIOFILL, &ops_request)) { ops_run_biofill(sh); overlap_clear++; -@@ -2118,7 +2119,8 @@ static void raid_run_ops(struct stripe_h +@@ -2276,7 +2277,8 @@ static void raid_run_ops(struct stripe_h if (test_and_clear_bit(R5_Overlap, &dev->flags)) wake_up(&sh->raid_conf->wait_for_overlap); } @@ -41,7 +41,7 @@ Tested-by: Udo van den Heuvel } static void free_stripe(struct kmem_cache *sc, struct stripe_head *sh) -@@ -6821,6 +6823,7 @@ static int raid456_cpu_up_prepare(unsign +@@ -7097,6 +7099,7 @@ static int raid456_cpu_up_prepare(unsign __func__, cpu); return -ENOMEM; } @@ -51,7 +51,7 @@ Tested-by: Udo van den Heuvel --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h -@@ -634,6 +634,7 @@ struct r5conf { +@@ -635,6 +635,7 @@ struct r5conf { int recovery_disabled; /* per cpu variables */ struct raid5_percpu { diff --git a/kernel/patches-5.4.x-rt/0195-scsi-fcoe-rt-aware.patch b/kernel/patches-5.11.x-rt/0151-scsi-fcoe-rt-aware.patch similarity index 93% rename from kernel/patches-5.4.x-rt/0195-scsi-fcoe-rt-aware.patch rename to kernel/patches-5.11.x-rt/0151-scsi-fcoe-rt-aware.patch index 983588961..57b40ee0a 100644 --- a/kernel/patches-5.4.x-rt/0195-scsi-fcoe-rt-aware.patch +++ b/kernel/patches-5.11.x-rt/0151-scsi-fcoe-rt-aware.patch @@ -70,7 +70,7 @@ Signed-off-by: Thomas Gleixner --- a/drivers/scsi/fcoe/fcoe_ctlr.c +++ b/drivers/scsi/fcoe/fcoe_ctlr.c -@@ -826,7 +826,7 @@ static unsigned long fcoe_ctlr_age_fcfs( +@@ -828,7 +828,7 @@ static unsigned long fcoe_ctlr_age_fcfs( INIT_LIST_HEAD(&del_list); @@ -79,7 +79,7 @@ Signed-off-by: Thomas Gleixner list_for_each_entry_safe(fcf, next, &fip->fcfs, list) { deadline = fcf->time + fcf->fka_period + fcf->fka_period / 2; -@@ -862,7 +862,7 @@ static unsigned long fcoe_ctlr_age_fcfs( +@@ -864,7 +864,7 @@ static unsigned long fcoe_ctlr_age_fcfs( sel_time = fcf->time; } } @@ -90,7 +90,7 @@ Signed-off-by: Thomas Gleixner /* Removes fcf from current list */ --- a/drivers/scsi/libfc/fc_exch.c +++ b/drivers/scsi/libfc/fc_exch.c -@@ -821,10 +821,10 @@ static struct fc_exch *fc_exch_em_alloc( +@@ -825,10 +825,10 @@ static struct fc_exch *fc_exch_em_alloc( } memset(ep, 0, sizeof(*ep)); diff --git a/kernel/patches-5.4.x-rt/0196-sunrpc-make-svc_xprt_do_enqueue-use-get_cpu_light.patch b/kernel/patches-5.11.x-rt/0152-sunrpc-make-svc_xprt_do_enqueue-use-get_cpu_light.patch similarity index 94% rename from kernel/patches-5.4.x-rt/0196-sunrpc-make-svc_xprt_do_enqueue-use-get_cpu_light.patch rename to kernel/patches-5.11.x-rt/0152-sunrpc-make-svc_xprt_do_enqueue-use-get_cpu_light.patch index 2e7690036..41d42ecdb 100644 --- a/kernel/patches-5.4.x-rt/0196-sunrpc-make-svc_xprt_do_enqueue-use-get_cpu_light.patch +++ b/kernel/patches-5.11.x-rt/0152-sunrpc-make-svc_xprt_do_enqueue-use-get_cpu_light.patch @@ -33,7 +33,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c -@@ -411,7 +411,7 @@ void svc_xprt_do_enqueue(struct svc_xprt +@@ -422,7 +422,7 @@ void svc_xprt_do_enqueue(struct svc_xprt if (test_and_set_bit(XPT_BUSY, &xprt->xpt_flags)) return; @@ -42,7 +42,7 @@ Signed-off-by: Sebastian Andrzej Siewior pool = svc_pool_for_cpu(xprt->xpt_server, cpu); atomic_long_inc(&pool->sp_stats.packets); -@@ -435,7 +435,7 @@ void svc_xprt_do_enqueue(struct svc_xprt +@@ -446,7 +446,7 @@ void svc_xprt_do_enqueue(struct svc_xprt rqstp = NULL; out_unlock: rcu_read_unlock(); diff --git a/kernel/patches-5.4.x-rt/0197-rt-introduce-cpu-chill.patch b/kernel/patches-5.11.x-rt/0153-rt-introduce-cpu-chill.patch similarity index 89% rename from kernel/patches-5.4.x-rt/0197-rt-introduce-cpu-chill.patch rename to kernel/patches-5.11.x-rt/0153-rt-introduce-cpu-chill.patch index 3ce8cf43a..07bc09e92 100644 --- a/kernel/patches-5.4.x-rt/0197-rt-introduce-cpu-chill.patch +++ b/kernel/patches-5.11.x-rt/0153-rt-introduce-cpu-chill.patch @@ -55,15 +55,15 @@ Signed-off-by: Thomas Gleixner Signed-off-by: Steven Rostedt Signed-off-by: Sebastian Andrzej Siewior --- - include/linux/delay.h | 6 ++++++ - kernel/time/hrtimer.c | 32 ++++++++++++++++++++++++++++++++ - 2 files changed, 38 insertions(+) + include/linux/hrtimer.h | 6 ++++++ + kernel/time/hrtimer.c | 30 ++++++++++++++++++++++++++++++ + 2 files changed, 36 insertions(+) ---- a/include/linux/delay.h -+++ b/include/linux/delay.h -@@ -65,4 +65,10 @@ static inline void ssleep(unsigned int s - msleep(seconds * 1000); - } +--- a/include/linux/hrtimer.h ++++ b/include/linux/hrtimer.h +@@ -540,4 +540,10 @@ int hrtimers_dead_cpu(unsigned int cpu); + #define hrtimers_dead_cpu NULL + #endif +#ifdef CONFIG_PREEMPT_RT +extern void cpu_chill(void); @@ -71,10 +71,10 @@ Signed-off-by: Sebastian Andrzej Siewior +# define cpu_chill() cpu_relax() +#endif + - #endif /* defined(_LINUX_DELAY_H) */ + #endif --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c -@@ -1982,6 +1982,38 @@ SYSCALL_DEFINE2(nanosleep_time32, struct +@@ -1988,6 +1988,36 @@ SYSCALL_DEFINE2(nanosleep_time32, struct } #endif @@ -96,9 +96,7 @@ Signed-off-by: Sebastian Andrzej Siewior + chill_time = ktime_set(0, NSEC_PER_MSEC); + + current->flags |= PF_NOFREEZE; -+ sleeping_lock_inc(); + schedule_hrtimeout(&chill_time, HRTIMER_MODE_REL_HARD); -+ sleeping_lock_dec(); + if (!freeze_flag) + current->flags &= ~PF_NOFREEZE; + diff --git a/kernel/patches-5.4.x-rt/0199-fs-namespace-use-cpu-chill-in-trylock-loops.patch b/kernel/patches-5.11.x-rt/0154-fs-namespace-use-cpu-chill-in-trylock-loops.patch similarity index 97% rename from kernel/patches-5.4.x-rt/0199-fs-namespace-use-cpu-chill-in-trylock-loops.patch rename to kernel/patches-5.11.x-rt/0154-fs-namespace-use-cpu-chill-in-trylock-loops.patch index 7ad64c363..4ad9bb7f5 100644 --- a/kernel/patches-5.4.x-rt/0199-fs-namespace-use-cpu-chill-in-trylock-loops.patch +++ b/kernel/patches-5.11.x-rt/0154-fs-namespace-use-cpu-chill-in-trylock-loops.patch @@ -18,7 +18,7 @@ Signed-off-by: Thomas Gleixner #include #include #include -+#include ++#include #include #include #include diff --git a/kernel/patches-5.4.x-rt/0201-debugobjects-rt.patch b/kernel/patches-5.11.x-rt/0155-debugobjects-rt.patch similarity index 94% rename from kernel/patches-5.4.x-rt/0201-debugobjects-rt.patch rename to kernel/patches-5.11.x-rt/0155-debugobjects-rt.patch index 6f0923f6d..8e5147775 100644 --- a/kernel/patches-5.4.x-rt/0201-debugobjects-rt.patch +++ b/kernel/patches-5.11.x-rt/0155-debugobjects-rt.patch @@ -11,7 +11,7 @@ Signed-off-by: Thomas Gleixner --- a/lib/debugobjects.c +++ b/lib/debugobjects.c -@@ -537,7 +537,10 @@ static void +@@ -557,7 +557,10 @@ static void struct debug_obj *obj; unsigned long flags; diff --git a/kernel/patches-5.11.x-rt/0156-skbufhead-raw-lock.patch b/kernel/patches-5.11.x-rt/0156-skbufhead-raw-lock.patch new file mode 100644 index 000000000..7fdb02ca0 --- /dev/null +++ b/kernel/patches-5.11.x-rt/0156-skbufhead-raw-lock.patch @@ -0,0 +1,65 @@ +From: Thomas Gleixner +Date: Tue, 12 Jul 2011 15:38:34 +0200 +Subject: net: Use skbufhead with raw lock + +Use the rps lock as rawlock so we can keep irq-off regions. It looks low +latency. However we can't kfree() from this context therefore we defer this +to the softirq and use the tofree_queue list for it (similar to process_queue). + +Signed-off-by: Thomas Gleixner +--- + include/linux/skbuff.h | 7 +++++++ + net/core/dev.c | 6 +++--- + 2 files changed, 10 insertions(+), 3 deletions(-) + +--- a/include/linux/skbuff.h ++++ b/include/linux/skbuff.h +@@ -295,6 +295,7 @@ struct sk_buff_head { + + __u32 qlen; + spinlock_t lock; ++ raw_spinlock_t raw_lock; + }; + + struct sk_buff; +@@ -1890,6 +1891,12 @@ static inline void skb_queue_head_init(s + __skb_queue_head_init(list); + } + ++static inline void skb_queue_head_init_raw(struct sk_buff_head *list) ++{ ++ raw_spin_lock_init(&list->raw_lock); ++ __skb_queue_head_init(list); ++} ++ + static inline void skb_queue_head_init_class(struct sk_buff_head *list, + struct lock_class_key *class) + { +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -221,14 +221,14 @@ static inline struct hlist_head *dev_ind + static inline void rps_lock(struct softnet_data *sd) + { + #ifdef CONFIG_RPS +- spin_lock(&sd->input_pkt_queue.lock); ++ raw_spin_lock(&sd->input_pkt_queue.raw_lock); + #endif + } + + static inline void rps_unlock(struct softnet_data *sd) + { + #ifdef CONFIG_RPS +- spin_unlock(&sd->input_pkt_queue.lock); ++ raw_spin_unlock(&sd->input_pkt_queue.raw_lock); + #endif + } + +@@ -11314,7 +11314,7 @@ static int __init net_dev_init(void) + + INIT_WORK(flush, flush_backlog); + +- skb_queue_head_init(&sd->input_pkt_queue); ++ skb_queue_head_init_raw(&sd->input_pkt_queue); + skb_queue_head_init(&sd->process_queue); + #ifdef CONFIG_XFRM_OFFLOAD + skb_queue_head_init(&sd->xfrm_backlog); diff --git a/kernel/patches-5.11.x-rt/0157-net-Dequeue-in-dev_cpu_dead-without-the-lock.patch b/kernel/patches-5.11.x-rt/0157-net-Dequeue-in-dev_cpu_dead-without-the-lock.patch new file mode 100644 index 000000000..5227a7d52 --- /dev/null +++ b/kernel/patches-5.11.x-rt/0157-net-Dequeue-in-dev_cpu_dead-without-the-lock.patch @@ -0,0 +1,30 @@ +From: Sebastian Andrzej Siewior +Date: Wed, 16 Sep 2020 16:15:39 +0200 +Subject: [PATCH] net: Dequeue in dev_cpu_dead() without the lock + +Upstream uses skb_dequeue() to acquire lock of `input_pkt_queue'. The reason is +to synchronize against a remote CPU which still thinks that the CPU is online +enqueues packets to this CPU. +There are no guarantees that the packet is enqueued before the callback is run, +it just hope. +RT however complains about an not initialized lock because it uses another lock +for `input_pkt_queue' due to the IRQ-off nature of the context. + +Use the unlocked dequeue version for `input_pkt_queue'. + +Signed-off-by: Sebastian Andrzej Siewior +--- + net/core/dev.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -10998,7 +10998,7 @@ static int dev_cpu_dead(unsigned int old + netif_rx_ni(skb); + input_queue_head_incr(oldsd); + } +- while ((skb = skb_dequeue(&oldsd->input_pkt_queue))) { ++ while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) { + netif_rx_ni(skb); + input_queue_head_incr(oldsd); + } diff --git a/kernel/patches-5.4.x-rt/0203-net-dev-always-take-qdisc-s-busylock-in-__dev_xmit_s.patch b/kernel/patches-5.11.x-rt/0158-net-dev-always-take-qdisc-s-busylock-in-__dev_xmit_s.patch similarity index 94% rename from kernel/patches-5.4.x-rt/0203-net-dev-always-take-qdisc-s-busylock-in-__dev_xmit_s.patch rename to kernel/patches-5.11.x-rt/0158-net-dev-always-take-qdisc-s-busylock-in-__dev_xmit_s.patch index 30d6d3edf..b6a6afcfc 100644 --- a/kernel/patches-5.4.x-rt/0203-net-dev-always-take-qdisc-s-busylock-in-__dev_xmit_s.patch +++ b/kernel/patches-5.11.x-rt/0158-net-dev-always-take-qdisc-s-busylock-in-__dev_xmit_s.patch @@ -20,7 +20,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/net/core/dev.c +++ b/net/core/dev.c -@@ -3407,7 +3407,11 @@ static inline int __dev_xmit_skb(struct +@@ -3777,7 +3777,11 @@ static inline int __dev_xmit_skb(struct * This permits qdisc->running owner to get the lock more * often and dequeue packets faster. */ diff --git a/kernel/patches-5.4.x-rt/0204-irqwork-push_most_work_into_softirq_context.patch b/kernel/patches-5.11.x-rt/0159-irqwork-push_most_work_into_softirq_context.patch similarity index 59% rename from kernel/patches-5.4.x-rt/0204-irqwork-push_most_work_into_softirq_context.patch rename to kernel/patches-5.11.x-rt/0159-irqwork-push_most_work_into_softirq_context.patch index 48065fcb3..24c77ea88 100644 --- a/kernel/patches-5.4.x-rt/0204-irqwork-push_most_work_into_softirq_context.patch +++ b/kernel/patches-5.11.x-rt/0159-irqwork-push_most_work_into_softirq_context.patch @@ -21,27 +21,16 @@ Mike Galbraith, hard and soft variant] Signed-off-by: Sebastian Andrzej Siewior --- - include/linux/irq_work.h | 8 ++++++ - kernel/irq_work.c | 59 +++++++++++++++++++++++++++++++++++++---------- - kernel/rcu/tree.c | 1 - kernel/sched/topology.c | 1 - kernel/time/tick-sched.c | 1 + include/linux/irq_work.h | 6 ++++ + kernel/irq_work.c | 69 ++++++++++++++++++++++++++++++++++++++--------- + kernel/sched/topology.c | 3 +- kernel/time/timer.c | 2 + - 6 files changed, 60 insertions(+), 12 deletions(-) + 4 files changed, 66 insertions(+), 14 deletions(-) --- a/include/linux/irq_work.h +++ b/include/linux/irq_work.h -@@ -18,6 +18,8 @@ - - /* Doesn't want IPI, wait for tick: */ - #define IRQ_WORK_LAZY BIT(2) -+/* Run hard IRQ context, even on RT */ -+#define IRQ_WORK_HARD_IRQ BIT(3) - - #define IRQ_WORK_CLAIMED (IRQ_WORK_PENDING | IRQ_WORK_BUSY) - -@@ -52,4 +54,10 @@ static inline bool irq_work_needs_cpu(vo - static inline void irq_work_run(void) { } +@@ -64,4 +64,10 @@ static inline void irq_work_run(void) { + static inline void irq_work_single(void *arg) { } #endif +#if defined(CONFIG_IRQ_WORK) && defined(CONFIG_PREEMPT_RT) @@ -61,51 +50,58 @@ Signed-off-by: Sebastian Andrzej Siewior #include -@@ -60,13 +61,19 @@ void __weak arch_irq_work_raise(void) +@@ -52,13 +53,27 @@ void __weak arch_irq_work_raise(void) /* Enqueue on current CPU, work must already be claimed and preempt disabled */ static void __irq_work_queue_local(struct irq_work *work) { -+ struct llist_head *list; -+ bool lazy_work, realtime = IS_ENABLED(CONFIG_PREEMPT_RT); -+ -+ lazy_work = work->flags & IRQ_WORK_LAZY; -+ - /* If the work is "lazy", handle it from next tick if any */ -- if (work->flags & IRQ_WORK_LAZY) { -- if (llist_add(&work->llnode, this_cpu_ptr(&lazy_list)) && +- /* If the work is "lazy", handle it from next tick if any */ +- if (atomic_read(&work->node.a_flags) & IRQ_WORK_LAZY) { +- if (llist_add(&work->node.llist, this_cpu_ptr(&lazy_list)) && - tick_nohz_tick_stopped()) - arch_irq_work_raise(); - } else { -- if (llist_add(&work->llnode, this_cpu_ptr(&raised_list))) -+ if (lazy_work || (realtime && !(work->flags & IRQ_WORK_HARD_IRQ))) +- if (llist_add(&work->node.llist, this_cpu_ptr(&raised_list))) ++ struct llist_head *list; ++ bool lazy_work; ++ int work_flags; ++ ++ work_flags = atomic_read(&work->node.a_flags); ++ if (work_flags & IRQ_WORK_LAZY) ++ lazy_work = true; ++ else if (IS_ENABLED(CONFIG_PREEMPT_RT) && ++ !(work_flags & IRQ_WORK_HARD_IRQ)) ++ lazy_work = true; ++ else ++ lazy_work = false; ++ ++ if (lazy_work) + list = this_cpu_ptr(&lazy_list); + else + list = this_cpu_ptr(&raised_list); + -+ if (llist_add(&work->llnode, list)) { ++ if (llist_add(&work->node.llist, list)) { ++ /* If the work is "lazy", handle it from next tick if any */ + if (!lazy_work || tick_nohz_tick_stopped()) arch_irq_work_raise(); } } -@@ -108,9 +115,16 @@ bool irq_work_queue_on(struct irq_work * - - preempt_disable(); +@@ -102,7 +117,14 @@ bool irq_work_queue_on(struct irq_work * if (cpu != smp_processor_id()) { -+ struct llist_head *list; -+ /* Arch remote IPI send/receive backend aren't NMI safe */ WARN_ON_ONCE(in_nmi()); -- if (llist_add(&work->llnode, &per_cpu(raised_list, cpu))) -+ if (IS_ENABLED(CONFIG_PREEMPT_RT) && !(work->flags & IRQ_WORK_HARD_IRQ)) -+ list = &per_cpu(lazy_list, cpu); -+ else -+ list = &per_cpu(raised_list, cpu); +- __smp_call_single_queue(cpu, &work->node.llist); + -+ if (llist_add(&work->llnode, list)) - arch_send_call_function_single_ipi(cpu); ++ if (IS_ENABLED(CONFIG_PREEMPT_RT) && !(atomic_read(&work->node.a_flags) & IRQ_WORK_HARD_IRQ)) { ++ if (llist_add(&work->node.llist, &per_cpu(lazy_list, cpu))) ++ /* && tick_nohz_tick_stopped_cpu(cpu) */ ++ arch_send_call_function_single_ipi(cpu); ++ } else { ++ __smp_call_single_queue(cpu, &work->node.llist); ++ } } else { __irq_work_queue_local(work); -@@ -129,9 +143,8 @@ bool irq_work_needs_cpu(void) + } +@@ -120,9 +142,8 @@ bool irq_work_needs_cpu(void) raised = this_cpu_ptr(&raised_list); lazy = this_cpu_ptr(&lazy_list); @@ -117,9 +113,9 @@ Signed-off-by: Sebastian Andrzej Siewior /* All work should have been flushed before going offline */ WARN_ON_ONCE(cpu_is_offline(smp_processor_id())); -@@ -145,8 +158,12 @@ static void irq_work_run_list(struct lli +@@ -165,8 +186,12 @@ static void irq_work_run_list(struct lli + struct irq_work *work, *tmp; struct llist_node *llnode; - unsigned long flags; +#ifndef CONFIG_PREEMPT_RT + /* @@ -131,7 +127,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (llist_empty(list)) return; -@@ -178,7 +195,16 @@ static void irq_work_run_list(struct lli +@@ -182,7 +207,16 @@ static void irq_work_run_list(struct lli void irq_work_run(void) { irq_work_run_list(this_cpu_ptr(&raised_list)); @@ -149,7 +145,7 @@ Signed-off-by: Sebastian Andrzej Siewior } EXPORT_SYMBOL_GPL(irq_work_run); -@@ -188,8 +214,17 @@ void irq_work_tick(void) +@@ -192,8 +226,17 @@ void irq_work_tick(void) if (!llist_empty(raised) && !arch_irq_work_has_interrupt()) irq_work_run_list(raised); @@ -167,39 +163,21 @@ Signed-off-by: Sebastian Andrzej Siewior /* * Synchronize against the irq_work @entry, ensures the entry is not ---- a/kernel/rcu/tree.c -+++ b/kernel/rcu/tree.c -@@ -1097,6 +1097,7 @@ static int rcu_implicit_dynticks_qs(stru - !rdp->rcu_iw_pending && rdp->rcu_iw_gp_seq != rnp->gp_seq && - (rnp->ffmask & rdp->grpmask)) { - init_irq_work(&rdp->rcu_iw, rcu_iw_handler); -+ rdp->rcu_iw.flags = IRQ_WORK_HARD_IRQ; - rdp->rcu_iw_pending = true; - rdp->rcu_iw_gp_seq = rnp->gp_seq; - irq_work_queue_on(&rdp->rcu_iw, rdp->cpu); --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c -@@ -502,6 +502,7 @@ static int init_rootdomain(struct root_d +@@ -526,7 +526,8 @@ static int init_rootdomain(struct root_d + #ifdef HAVE_RT_PUSH_IPI rd->rto_cpu = -1; raw_spin_lock_init(&rd->rto_lock); - init_irq_work(&rd->rto_push_work, rto_push_irq_work_func); -+ rd->rto_push_work.flags |= IRQ_WORK_HARD_IRQ; +- init_irq_work(&rd->rto_push_work, rto_push_irq_work_func); ++// init_irq_work(&rd->rto_push_work, rto_push_irq_work_func); ++ rd->rto_push_work = IRQ_WORK_INIT_HARD(rto_push_irq_work_func); #endif - init_dl_bw(&rd->dl_bw); ---- a/kernel/time/tick-sched.c -+++ b/kernel/time/tick-sched.c -@@ -239,6 +239,7 @@ static void nohz_full_kick_func(struct i - - static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = { - .func = nohz_full_kick_func, -+ .flags = IRQ_WORK_HARD_IRQ, - }; - - /* + rd->visit_gen = 0; --- a/kernel/time/timer.c +++ b/kernel/time/timer.c -@@ -1783,6 +1783,8 @@ static __latent_entropy void run_timer_s +@@ -1743,6 +1743,8 @@ static __latent_entropy void run_timer_s { struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); diff --git a/kernel/patches-5.4.x-rt/0205-x86-crypto-reduce-preempt-disabled-regions.patch b/kernel/patches-5.11.x-rt/0160-x86-crypto-reduce-preempt-disabled-regions.patch similarity index 89% rename from kernel/patches-5.4.x-rt/0205-x86-crypto-reduce-preempt-disabled-regions.patch rename to kernel/patches-5.11.x-rt/0160-x86-crypto-reduce-preempt-disabled-regions.patch index 16a3506d4..2136a6cb2 100644 --- a/kernel/patches-5.4.x-rt/0205-x86-crypto-reduce-preempt-disabled-regions.patch +++ b/kernel/patches-5.11.x-rt/0160-x86-crypto-reduce-preempt-disabled-regions.patch @@ -18,7 +18,7 @@ Signed-off-by: Thomas Gleixner --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c -@@ -387,14 +387,14 @@ static int ecb_encrypt(struct skcipher_r +@@ -376,14 +376,14 @@ static int ecb_encrypt(struct skcipher_r err = skcipher_walk_virt(&walk, req, true); @@ -35,7 +35,7 @@ Signed-off-by: Thomas Gleixner return err; } -@@ -409,14 +409,14 @@ static int ecb_decrypt(struct skcipher_r +@@ -398,14 +398,14 @@ static int ecb_decrypt(struct skcipher_r err = skcipher_walk_virt(&walk, req, true); @@ -52,7 +52,7 @@ Signed-off-by: Thomas Gleixner return err; } -@@ -431,14 +431,14 @@ static int cbc_encrypt(struct skcipher_r +@@ -420,14 +420,14 @@ static int cbc_encrypt(struct skcipher_r err = skcipher_walk_virt(&walk, req, true); @@ -69,7 +69,7 @@ Signed-off-by: Thomas Gleixner return err; } -@@ -453,14 +453,14 @@ static int cbc_decrypt(struct skcipher_r +@@ -442,14 +442,14 @@ static int cbc_decrypt(struct skcipher_r err = skcipher_walk_virt(&walk, req, true); @@ -86,7 +86,7 @@ Signed-off-by: Thomas Gleixner return err; } -@@ -510,18 +510,20 @@ static int ctr_crypt(struct skcipher_req +@@ -497,18 +497,20 @@ static int ctr_crypt(struct skcipher_req err = skcipher_walk_virt(&walk, req, true); diff --git a/kernel/patches-5.4.x-rt/0206-crypto-Reduce-preempt-disabled-regions-more-algos.patch b/kernel/patches-5.11.x-rt/0161-crypto-Reduce-preempt-disabled-regions-more-algos.patch similarity index 95% rename from kernel/patches-5.4.x-rt/0206-crypto-Reduce-preempt-disabled-regions-more-algos.patch rename to kernel/patches-5.11.x-rt/0161-crypto-Reduce-preempt-disabled-regions-more-algos.patch index e7d6ac914..83f60f155 100644 --- a/kernel/patches-5.4.x-rt/0206-crypto-Reduce-preempt-disabled-regions-more-algos.patch +++ b/kernel/patches-5.11.x-rt/0161-crypto-Reduce-preempt-disabled-regions-more-algos.patch @@ -172,7 +172,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* Start of the last block. */ src += nbytes / bsize - 1; dst += nbytes / bsize - 1; -@@ -147,10 +146,10 @@ int glue_cbc_decrypt_req_128bit(const st +@@ -148,10 +147,10 @@ int glue_cbc_decrypt_req_128bit(const st done: u128_xor(dst, dst, (u128 *)walk.iv); *(u128 *)walk.iv = last_iv; @@ -184,7 +184,7 @@ Signed-off-by: Sebastian Andrzej Siewior return err; } EXPORT_SYMBOL_GPL(glue_cbc_decrypt_req_128bit); -@@ -161,7 +160,7 @@ int glue_ctr_req_128bit(const struct com +@@ -162,7 +161,7 @@ int glue_ctr_req_128bit(const struct com void *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req)); const unsigned int bsize = 128 / 8; struct skcipher_walk walk; @@ -193,7 +193,7 @@ Signed-off-by: Sebastian Andrzej Siewior unsigned int nbytes; int err; -@@ -175,7 +174,7 @@ int glue_ctr_req_128bit(const struct com +@@ -176,7 +175,7 @@ int glue_ctr_req_128bit(const struct com le128 ctrblk; fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, @@ -202,7 +202,7 @@ Signed-off-by: Sebastian Andrzej Siewior be128_to_le128(&ctrblk, (be128 *)walk.iv); -@@ -199,11 +198,10 @@ int glue_ctr_req_128bit(const struct com +@@ -202,11 +201,10 @@ int glue_ctr_req_128bit(const struct com } le128_to_be128((be128 *)walk.iv, &ctrblk); @@ -215,7 +215,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (nbytes) { le128 ctrblk; u128 tmp; -@@ -301,8 +299,14 @@ int glue_xts_req_128bit(const struct com +@@ -306,8 +304,14 @@ int glue_xts_req_128bit(const struct com tweak_fn(tweak_ctx, walk.iv, walk.iv); while (nbytes) { diff --git a/kernel/patches-5.4.x-rt/0207-crypto-limit-more-FPU-enabled-sections.patch b/kernel/patches-5.11.x-rt/0162-crypto-limit-more-FPU-enabled-sections.patch similarity index 62% rename from kernel/patches-5.4.x-rt/0207-crypto-limit-more-FPU-enabled-sections.patch rename to kernel/patches-5.11.x-rt/0162-crypto-limit-more-FPU-enabled-sections.patch index c8bbb5c8c..22b3d6d53 100644 --- a/kernel/patches-5.4.x-rt/0207-crypto-limit-more-FPU-enabled-sections.patch +++ b/kernel/patches-5.11.x-rt/0162-crypto-limit-more-FPU-enabled-sections.patch @@ -28,57 +28,23 @@ performance. Cc: stable-rt@vger.kernel.org Signed-off-by: Sebastian Andrzej Siewior --- - arch/x86/crypto/chacha_glue.c | 11 ++--------- arch/x86/include/asm/fpu/api.h | 1 + arch/x86/kernel/fpu/core.c | 12 ++++++++++++ - 3 files changed, 15 insertions(+), 9 deletions(-) + 2 files changed, 13 insertions(+) ---- a/arch/x86/crypto/chacha_glue.c -+++ b/arch/x86/crypto/chacha_glue.c -@@ -127,7 +127,6 @@ static int chacha_simd_stream_xor(struct - const struct chacha_ctx *ctx, const u8 *iv) - { - u32 *state, state_buf[16 + 2] __aligned(8); -- int next_yield = 4096; /* bytes until next FPU yield */ - int err = 0; - - BUILD_BUG_ON(CHACHA_STATE_ALIGN != 16); -@@ -140,20 +139,14 @@ static int chacha_simd_stream_xor(struct - - if (nbytes < walk->total) { - nbytes = round_down(nbytes, walk->stride); -- next_yield -= nbytes; - } - - chacha_dosimd(state, walk->dst.virt.addr, walk->src.virt.addr, - nbytes, ctx->nrounds); - -- if (next_yield <= 0) { -- /* temporarily allow preemption */ -- kernel_fpu_end(); -- kernel_fpu_begin(); -- next_yield = 4096; -- } -- -+ kernel_fpu_end(); - err = skcipher_walk_done(walk, walk->nbytes - nbytes); -+ kernel_fpu_begin(); - } - - return err; --- a/arch/x86/include/asm/fpu/api.h +++ b/arch/x86/include/asm/fpu/api.h -@@ -23,6 +23,7 @@ extern void kernel_fpu_begin(void); +@@ -28,6 +28,7 @@ extern void kernel_fpu_begin_mask(unsign extern void kernel_fpu_end(void); extern bool irq_fpu_usable(void); extern void fpregs_mark_activate(void); +extern void kernel_fpu_resched(void); - /* - * Use fpregs_lock() while editing CPU's FPU registers or fpu->state. + /* Code that is unaware of kernel_fpu_begin_mask() can use this */ + static inline void kernel_fpu_begin(void) --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c -@@ -113,6 +113,18 @@ void kernel_fpu_end(void) +@@ -159,6 +159,18 @@ void kernel_fpu_end(void) } EXPORT_SYMBOL_GPL(kernel_fpu_end); diff --git a/kernel/patches-5.4.x-rt/0208-crypto-cryptd-add-a-lock-instead-preempt_disable-loc.patch b/kernel/patches-5.11.x-rt/0163-crypto-cryptd-add-a-lock-instead-preempt_disable-loc.patch similarity index 100% rename from kernel/patches-5.4.x-rt/0208-crypto-cryptd-add-a-lock-instead-preempt_disable-loc.patch rename to kernel/patches-5.11.x-rt/0163-crypto-cryptd-add-a-lock-instead-preempt_disable-loc.patch diff --git a/kernel/patches-5.4.x-rt/0209-panic-disable-random-on-rt.patch b/kernel/patches-5.11.x-rt/0164-panic-disable-random-on-rt.patch similarity index 93% rename from kernel/patches-5.4.x-rt/0209-panic-disable-random-on-rt.patch rename to kernel/patches-5.11.x-rt/0164-panic-disable-random-on-rt.patch index c77f92300..c93df5d15 100644 --- a/kernel/patches-5.4.x-rt/0209-panic-disable-random-on-rt.patch +++ b/kernel/patches-5.11.x-rt/0164-panic-disable-random-on-rt.patch @@ -12,7 +12,7 @@ Signed-off-by: Thomas Gleixner --- a/kernel/panic.c +++ b/kernel/panic.c -@@ -521,9 +521,11 @@ static u64 oops_id; +@@ -544,9 +544,11 @@ static u64 oops_id; static int init_oops_id(void) { diff --git a/kernel/patches-5.4.x-rt/0210-x86-stackprot-no-random-on-rt.patch b/kernel/patches-5.11.x-rt/0165-x86-stackprot-no-random-on-rt.patch similarity index 93% rename from kernel/patches-5.4.x-rt/0210-x86-stackprot-no-random-on-rt.patch rename to kernel/patches-5.11.x-rt/0165-x86-stackprot-no-random-on-rt.patch index 32419846d..64b7cd562 100644 --- a/kernel/patches-5.4.x-rt/0210-x86-stackprot-no-random-on-rt.patch +++ b/kernel/patches-5.11.x-rt/0165-x86-stackprot-no-random-on-rt.patch @@ -19,16 +19,16 @@ Signed-off-by: Thomas Gleixner --- a/arch/x86/include/asm/stackprotector.h +++ b/arch/x86/include/asm/stackprotector.h -@@ -60,7 +60,7 @@ +@@ -65,7 +65,7 @@ */ static __always_inline void boot_init_stack_canary(void) { - u64 canary; -+ u64 uninitialized_var(canary); ++ u64 canary = 0; u64 tsc; #ifdef CONFIG_X86_64 -@@ -71,8 +71,14 @@ static __always_inline void boot_init_st +@@ -76,8 +76,14 @@ static __always_inline void boot_init_st * of randomness. The TSC only matters for very early init, * there it already has some randomness on most systems. Later * on during the bootup the random pool has true entropy too. diff --git a/kernel/patches-5.4.x-rt/0211-random-make-it-work-on-rt.patch b/kernel/patches-5.11.x-rt/0166-random-make-it-work-on-rt.patch similarity index 84% rename from kernel/patches-5.4.x-rt/0211-random-make-it-work-on-rt.patch rename to kernel/patches-5.11.x-rt/0166-random-make-it-work-on-rt.patch index 0984d7502..7f6e9b93a 100644 --- a/kernel/patches-5.4.x-rt/0211-random-make-it-work-on-rt.patch +++ b/kernel/patches-5.11.x-rt/0166-random-make-it-work-on-rt.patch @@ -22,13 +22,12 @@ Signed-off-by: Thomas Gleixner --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c -@@ -77,12 +77,13 @@ EXPORT_SYMBOL_GPL(hv_remove_vmbus_irq); - __visible void __irq_entry hv_stimer0_vector_handler(struct pt_regs *regs) +@@ -80,11 +80,12 @@ EXPORT_SYMBOL_GPL(hv_remove_vmbus_irq); + DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_stimer0) { struct pt_regs *old_regs = set_irq_regs(regs); + u64 ip = regs ? instruction_pointer(regs) : 0; - entering_irq(); inc_irq_stat(hyperv_stimer0_count); if (hv_stimer0_handler) hv_stimer0_handler(); @@ -36,10 +35,10 @@ Signed-off-by: Thomas Gleixner + add_interrupt_randomness(HYPERV_STIMER0_VECTOR, 0, ip); ack_APIC_irq(); - exiting_irq(); + set_irq_regs(old_regs); --- a/drivers/char/random.c +++ b/drivers/char/random.c -@@ -1305,28 +1305,27 @@ static __u32 get_reg(struct fast_pool *f +@@ -1252,28 +1252,27 @@ static __u32 get_reg(struct fast_pool *f return *ptr; } @@ -91,9 +90,9 @@ Signed-off-by: Thomas Gleixner #include +#include - #include #include -@@ -1199,6 +1200,8 @@ static void vmbus_isr(void) + #include +@@ -1310,6 +1311,8 @@ static void vmbus_isr(void) void *page_addr = hv_cpu->synic_event_page; struct hv_message *msg; union hv_synic_event_flags *event; @@ -102,18 +101,18 @@ Signed-off-by: Thomas Gleixner bool handled = false; if (unlikely(page_addr == NULL)) -@@ -1243,7 +1246,7 @@ static void vmbus_isr(void) +@@ -1354,7 +1357,7 @@ static void vmbus_isr(void) tasklet_schedule(&hv_cpu->msg_dpc); } -- add_interrupt_randomness(HYPERVISOR_CALLBACK_VECTOR, 0); -+ add_interrupt_randomness(HYPERVISOR_CALLBACK_VECTOR, 0, ip); +- add_interrupt_randomness(hv_get_vector(), 0); ++ add_interrupt_randomness(hv_get_vector(), 0, ip); } /* --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h -@@ -72,6 +72,7 @@ struct irq_desc { +@@ -68,6 +68,7 @@ struct irq_desc { unsigned int irqs_unhandled; atomic_t threads_handled; int threads_handled_last; @@ -123,7 +122,7 @@ Signed-off-by: Thomas Gleixner const struct cpumask *percpu_affinity; --- a/include/linux/random.h +++ b/include/linux/random.h -@@ -33,7 +33,7 @@ static inline void add_latent_entropy(vo +@@ -35,7 +35,7 @@ static inline void add_latent_entropy(vo extern void add_input_randomness(unsigned int type, unsigned int code, unsigned int value) __latent_entropy; @@ -134,7 +133,7 @@ Signed-off-by: Thomas Gleixner extern int wait_for_random_bytes(void); --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c -@@ -185,10 +185,16 @@ irqreturn_t handle_irq_event_percpu(stru +@@ -192,10 +192,16 @@ irqreturn_t handle_irq_event_percpu(stru { irqreturn_t retval; unsigned int flags = 0; @@ -154,16 +153,16 @@ Signed-off-by: Thomas Gleixner note_interrupt(desc, retval); --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c -@@ -1085,6 +1085,12 @@ static int irq_thread(void *data) +@@ -1247,6 +1247,12 @@ static int irq_thread(void *data) if (action_ret == IRQ_WAKE_THREAD) irq_wake_secondary(desc, action); -+#ifdef CONFIG_PREEMPT_RT -+ migrate_disable(); -+ add_interrupt_randomness(action->irq, 0, ++ if (IS_ENABLED(CONFIG_PREEMPT_RT)) { ++ migrate_disable(); ++ add_interrupt_randomness(action->irq, 0, + desc->random_ip ^ (unsigned long) action); -+ migrate_enable(); -+#endif ++ migrate_enable(); ++ } wake_threads_waitq(desc); } diff --git a/kernel/patches-5.4.x-rt/0212-upstream-net-rt-remove-preemption-disabling-in-netif_rx.patch b/kernel/patches-5.11.x-rt/0167-upstream-net-rt-remove-preemption-disabling-in-netif_rx.patch similarity index 85% rename from kernel/patches-5.4.x-rt/0212-upstream-net-rt-remove-preemption-disabling-in-netif_rx.patch rename to kernel/patches-5.11.x-rt/0167-upstream-net-rt-remove-preemption-disabling-in-netif_rx.patch index 59dadde7f..bd18c9fe4 100644 --- a/kernel/patches-5.4.x-rt/0212-upstream-net-rt-remove-preemption-disabling-in-netif_rx.patch +++ b/kernel/patches-5.11.x-rt/0167-upstream-net-rt-remove-preemption-disabling-in-netif_rx.patch @@ -13,11 +13,6 @@ Date: Thu, 17 May 2012 09:35:11 +0530 kfree_skb() is called. But in RT, kfree_skb() might gets scheduled out, so it expects non atomic context. -3)When CONFIG_PREEMPT_RT is not defined, - migrate_enable(), migrate_disable() maps to - preempt_enable() and preempt_disable(), so no - change in functionality in case of non-RT. - -Replace preempt_enable(), preempt_disable() with migrate_enable(), migrate_disable() respectively -Replace get_cpu(), put_cpu() with get_cpu_light(), @@ -29,6 +24,8 @@ Cc: Link: http://lkml.kernel.org/r/1337227511-2271-1-git-send-email-Priyanka.Jain@freescale.com Signed-off-by: Thomas Gleixner +[bigeasy: Remove assumption about migrate_disable() from the description.] +Signed-off-by: Sebastian Andrzej Siewior --- Testing: Tested successfully on p4080ds(8-core SMP system) @@ -37,7 +34,7 @@ Signed-off-by: Thomas Gleixner --- a/net/core/dev.c +++ b/net/core/dev.c -@@ -4419,7 +4419,7 @@ static int netif_rx_internal(struct sk_b +@@ -4792,7 +4792,7 @@ static int netif_rx_internal(struct sk_b struct rps_dev_flow voidflow, *rflow = &voidflow; int cpu; @@ -46,7 +43,7 @@ Signed-off-by: Thomas Gleixner rcu_read_lock(); cpu = get_rps_cpu(skb->dev, skb, &rflow); -@@ -4429,14 +4429,14 @@ static int netif_rx_internal(struct sk_b +@@ -4802,14 +4802,14 @@ static int netif_rx_internal(struct sk_b ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail); rcu_read_unlock(); diff --git a/kernel/patches-5.4.x-rt/0213-lockdep-no-softirq-accounting-on-rt.patch b/kernel/patches-5.11.x-rt/0168-lockdep-no-softirq-accounting-on-rt.patch similarity index 71% rename from kernel/patches-5.4.x-rt/0213-lockdep-no-softirq-accounting-on-rt.patch rename to kernel/patches-5.11.x-rt/0168-lockdep-no-softirq-accounting-on-rt.patch index 701e30cf0..5110ef742 100644 --- a/kernel/patches-5.4.x-rt/0213-lockdep-no-softirq-accounting-on-rt.patch +++ b/kernel/patches-5.11.x-rt/0168-lockdep-no-softirq-accounting-on-rt.patch @@ -12,9 +12,9 @@ Signed-off-by: Thomas Gleixner --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h -@@ -43,14 +43,6 @@ do { \ +@@ -71,14 +71,6 @@ do { \ do { \ - current->hardirq_context--; \ + __this_cpu_dec(hardirq_context); \ } while (0) -# define lockdep_softirq_enter() \ -do { \ @@ -24,14 +24,13 @@ Signed-off-by: Thomas Gleixner -do { \ - current->softirq_context--; \ -} while (0) - #else - # define trace_hardirqs_on() do { } while (0) - # define trace_hardirqs_off() do { } while (0) -@@ -63,6 +55,21 @@ do { \ - # define lockdep_softirq_enter() do { } while (0) - # define lockdep_softirq_exit() do { } while (0) + + # define lockdep_hrtimer_enter(__hrtimer) \ + ({ \ +@@ -140,6 +132,21 @@ do { \ + # define lockdep_irq_work_exit(__work) do { } while (0) #endif -+ + +#if defined(CONFIG_TRACE_IRQFLAGS) && !defined(CONFIG_PREEMPT_RT) +# define lockdep_softirq_enter() \ +do { \ @@ -43,15 +42,16 @@ Signed-off-by: Thomas Gleixner +} while (0) + +#else -+# define lockdep_softirq_enter() do { } while (0) -+# define lockdep_softirq_exit() do { } while (0) ++# define lockdep_softirq_enter() do { } while (0) ++# define lockdep_softirq_exit() do { } while (0) +#endif - ++ #if defined(CONFIG_IRQSOFF_TRACER) || \ defined(CONFIG_PREEMPT_TRACER) + extern void stop_critical_timings(void); --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c -@@ -4409,6 +4409,7 @@ static void check_flags(unsigned long fl +@@ -5291,6 +5291,7 @@ static noinstr void check_flags(unsigned } } @@ -59,7 +59,7 @@ Signed-off-by: Thomas Gleixner /* * We dont accurately track softirq state in e.g. * hardirq contexts (such as on 4KSTACKS), so only -@@ -4423,6 +4424,7 @@ static void check_flags(unsigned long fl +@@ -5305,6 +5306,7 @@ static noinstr void check_flags(unsigned DEBUG_LOCKS_WARN_ON(!current->softirqs_enabled); } } diff --git a/kernel/patches-5.4.x-rt/0214-lockdep-selftest-only-do-hardirq-context-test-for-raw-spinlock.patch b/kernel/patches-5.11.x-rt/0169-lockdep-selftest-only-do-hardirq-context-test-for-raw-spinlock.patch similarity index 88% rename from kernel/patches-5.4.x-rt/0214-lockdep-selftest-only-do-hardirq-context-test-for-raw-spinlock.patch rename to kernel/patches-5.11.x-rt/0169-lockdep-selftest-only-do-hardirq-context-test-for-raw-spinlock.patch index 5e5714f35..68a6799ad 100644 --- a/kernel/patches-5.4.x-rt/0214-lockdep-selftest-only-do-hardirq-context-test-for-raw-spinlock.patch +++ b/kernel/patches-5.11.x-rt/0169-lockdep-selftest-only-do-hardirq-context-test-for-raw-spinlock.patch @@ -17,7 +17,7 @@ Signed-off-by: Thomas Gleixner --- a/lib/locking-selftest.c +++ b/lib/locking-selftest.c -@@ -2058,6 +2058,7 @@ void locking_selftest(void) +@@ -2517,6 +2517,7 @@ void locking_selftest(void) printk(" --------------------------------------------------------------------------\n"); @@ -25,10 +25,10 @@ Signed-off-by: Thomas Gleixner /* * irq-context testcases: */ -@@ -2070,6 +2071,28 @@ void locking_selftest(void) +@@ -2531,6 +2532,28 @@ void locking_selftest(void) + DO_TESTCASE_6x2x2RW("irq read-recursion #2", irq_read_recursion2); + DO_TESTCASE_6x2x2RW("irq read-recursion #3", irq_read_recursion3); - DO_TESTCASE_6x2("irq read-recursion", irq_read_recursion); - // DO_TESTCASE_6x2B("irq read-recursion #2", irq_read_recursion2); +#else + /* On -rt, we only do hardirq context test for raw spinlock */ + DO_TESTCASE_1B("hard-irqs-on + irq-safe-A", irqsafe1_hard_spin, 12); @@ -51,6 +51,6 @@ Signed-off-by: Thomas Gleixner + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 312); + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 321); +#endif - ww_tests(); + force_read_lock_recursive = 0; diff --git a/kernel/patches-5.4.x-rt/0215-lockdep-selftest-fix-warnings-due-to-missing-PREEMPT.patch b/kernel/patches-5.11.x-rt/0170-lockdep-selftest-fix-warnings-due-to-missing-PREEMPT.patch similarity index 80% rename from kernel/patches-5.4.x-rt/0215-lockdep-selftest-fix-warnings-due-to-missing-PREEMPT.patch rename to kernel/patches-5.11.x-rt/0170-lockdep-selftest-fix-warnings-due-to-missing-PREEMPT.patch index be7cb9253..9ab9505cd 100644 --- a/kernel/patches-5.4.x-rt/0215-lockdep-selftest-fix-warnings-due-to-missing-PREEMPT.patch +++ b/kernel/patches-5.11.x-rt/0170-lockdep-selftest-fix-warnings-due-to-missing-PREEMPT.patch @@ -23,12 +23,12 @@ Signed-off-by: Xander Huff Acked-by: Gratian Crisan Signed-off-by: Sebastian Andrzej Siewior --- - lib/locking-selftest.c | 27 +++++++++++++++++++++++++++ - 1 file changed, 27 insertions(+) + lib/locking-selftest.c | 28 ++++++++++++++++++++++++++++ + 1 file changed, 28 insertions(+) --- a/lib/locking-selftest.c +++ b/lib/locking-selftest.c -@@ -742,6 +742,8 @@ GENERATE_TESTCASE(init_held_rtmutex); +@@ -787,6 +787,8 @@ GENERATE_TESTCASE(init_held_rtmutex); #include "locking-selftest-spin-hardirq.h" GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_hard_spin) @@ -37,7 +37,7 @@ Signed-off-by: Sebastian Andrzej Siewior #include "locking-selftest-rlock-hardirq.h" GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_hard_rlock) -@@ -757,9 +759,12 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_ +@@ -802,9 +804,12 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_ #include "locking-selftest-wlock-softirq.h" GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_soft_wlock) @@ -50,7 +50,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* * Enabling hardirqs with a softirq-safe lock held: */ -@@ -792,6 +797,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2A +@@ -837,6 +842,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2A #undef E1 #undef E2 @@ -59,7 +59,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* * Enabling irqs with an irq-safe lock held: */ -@@ -815,6 +822,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2A +@@ -860,6 +867,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2A #include "locking-selftest-spin-hardirq.h" GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_hard_spin) @@ -68,7 +68,7 @@ Signed-off-by: Sebastian Andrzej Siewior #include "locking-selftest-rlock-hardirq.h" GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_hard_rlock) -@@ -830,6 +839,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B +@@ -875,6 +884,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B #include "locking-selftest-wlock-softirq.h" GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_wlock) @@ -77,7 +77,7 @@ Signed-off-by: Sebastian Andrzej Siewior #undef E1 #undef E2 -@@ -861,6 +872,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B +@@ -906,6 +917,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B #include "locking-selftest-spin-hardirq.h" GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_hard_spin) @@ -86,7 +86,7 @@ Signed-off-by: Sebastian Andrzej Siewior #include "locking-selftest-rlock-hardirq.h" GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_hard_rlock) -@@ -876,6 +889,8 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_ +@@ -921,6 +934,8 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_ #include "locking-selftest-wlock-softirq.h" GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_wlock) @@ -95,7 +95,7 @@ Signed-off-by: Sebastian Andrzej Siewior #undef E1 #undef E2 #undef E3 -@@ -909,6 +924,8 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_ +@@ -954,6 +969,8 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_ #include "locking-selftest-spin-hardirq.h" GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_hard_spin) @@ -104,7 +104,7 @@ Signed-off-by: Sebastian Andrzej Siewior #include "locking-selftest-rlock-hardirq.h" GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_hard_rlock) -@@ -924,10 +941,14 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_ +@@ -969,10 +986,14 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_ #include "locking-selftest-wlock-softirq.h" GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_soft_wlock) @@ -119,10 +119,11 @@ Signed-off-by: Sebastian Andrzej Siewior /* * read-lock / write-lock irq inversion. * -@@ -990,6 +1011,10 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_inver +@@ -1162,6 +1183,11 @@ GENERATE_PERMUTATIONS_3_EVENTS(W1W2_R2R3 + #undef E1 #undef E2 #undef E3 - ++ +#endif + +#ifndef CONFIG_PREEMPT_RT @@ -130,7 +131,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* * read-lock / write-lock recursion that is actually safe. */ -@@ -1028,6 +1053,8 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_ +@@ -1208,6 +1234,8 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_ #undef E2 #undef E3 diff --git a/kernel/patches-5.4.x-rt/0216-lockdep-disable-self-test.patch b/kernel/patches-5.11.x-rt/0171-lockdep-disable-self-test.patch similarity index 95% rename from kernel/patches-5.4.x-rt/0216-lockdep-disable-self-test.patch rename to kernel/patches-5.11.x-rt/0171-lockdep-disable-self-test.patch index d86b021f2..e190e7b5b 100644 --- a/kernel/patches-5.4.x-rt/0216-lockdep-disable-self-test.patch +++ b/kernel/patches-5.11.x-rt/0171-lockdep-disable-self-test.patch @@ -17,7 +17,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug -@@ -1237,7 +1237,7 @@ config DEBUG_ATOMIC_SLEEP +@@ -1353,7 +1353,7 @@ config DEBUG_ATOMIC_SLEEP config DEBUG_LOCKING_API_SELFTESTS bool "Locking API boot-time self-tests" diff --git a/kernel/patches-5.4.x-rt/0217-drmradeoni915_Use_preempt_disableenable_rt()_where_recommended.patch b/kernel/patches-5.11.x-rt/0172-drmradeoni915_Use_preempt_disableenable_rt()_where_recommended.patch similarity index 86% rename from kernel/patches-5.4.x-rt/0217-drmradeoni915_Use_preempt_disableenable_rt()_where_recommended.patch rename to kernel/patches-5.11.x-rt/0172-drmradeoni915_Use_preempt_disableenable_rt()_where_recommended.patch index 3056282e3..4de2ffafb 100644 --- a/kernel/patches-5.4.x-rt/0217-drmradeoni915_Use_preempt_disableenable_rt()_where_recommended.patch +++ b/kernel/patches-5.11.x-rt/0172-drmradeoni915_Use_preempt_disableenable_rt()_where_recommended.patch @@ -15,7 +15,7 @@ Signed-off-by: Thomas Gleixner --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c -@@ -983,6 +983,7 @@ bool i915_get_crtc_scanoutpos(struct drm +@@ -878,6 +878,7 @@ static bool i915_get_crtc_scanoutpos(str spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); /* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */ @@ -23,7 +23,7 @@ Signed-off-by: Thomas Gleixner /* Get optional system timestamp before query. */ if (stime) -@@ -1034,6 +1035,7 @@ bool i915_get_crtc_scanoutpos(struct drm +@@ -929,6 +930,7 @@ static bool i915_get_crtc_scanoutpos(str *etime = ktime_get(); /* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */ @@ -33,7 +33,7 @@ Signed-off-by: Thomas Gleixner --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c -@@ -1821,6 +1821,7 @@ int radeon_get_crtc_scanoutpos(struct dr +@@ -1813,6 +1813,7 @@ int radeon_get_crtc_scanoutpos(struct dr struct radeon_device *rdev = dev->dev_private; /* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */ @@ -41,7 +41,7 @@ Signed-off-by: Thomas Gleixner /* Get optional system timestamp before query. */ if (stime) -@@ -1913,6 +1914,7 @@ int radeon_get_crtc_scanoutpos(struct dr +@@ -1905,6 +1906,7 @@ int radeon_get_crtc_scanoutpos(struct dr *etime = ktime_get(); /* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */ diff --git a/kernel/patches-5.11.x-rt/0173-drm-i915-Don-t-disable-interrupts-on-PREEMPT_RT-duri.patch b/kernel/patches-5.11.x-rt/0173-drm-i915-Don-t-disable-interrupts-on-PREEMPT_RT-duri.patch new file mode 100644 index 000000000..e60ae6c19 --- /dev/null +++ b/kernel/patches-5.11.x-rt/0173-drm-i915-Don-t-disable-interrupts-on-PREEMPT_RT-duri.patch @@ -0,0 +1,73 @@ +From: Mike Galbraith +Date: Sat, 27 Feb 2016 09:01:42 +0100 +Subject: [PATCH] drm/i915: Don't disable interrupts on PREEMPT_RT during + atomic updates + +Commit + 8d7849db3eab7 ("drm/i915: Make sprite updates atomic") + +started disabling interrupts across atomic updates. This breaks on PREEMPT_RT +because within this section the code attempt to acquire spinlock_t locks which +are sleeping locks on PREEMPT_RT. + +According to the comment the interrupts are disabled to avoid random delays and +not required for protection or synchronisation. + +Don't disable interrupts on PREEMPT_RT during atomic updates. + +[bigeasy: drop local locks, commit message] + +Signed-off-by: Mike Galbraith +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/gpu/drm/i915/display/intel_sprite.c | 15 ++++++++++----- + 1 file changed, 10 insertions(+), 5 deletions(-) + +--- a/drivers/gpu/drm/i915/display/intel_sprite.c ++++ b/drivers/gpu/drm/i915/display/intel_sprite.c +@@ -122,7 +122,8 @@ void intel_pipe_update_start(const struc + "PSR idle timed out 0x%x, atomic update may fail\n", + psr_status); + +- local_irq_disable(); ++ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) ++ local_irq_disable(); + + crtc->debug.min_vbl = min; + crtc->debug.max_vbl = max; +@@ -147,11 +148,13 @@ void intel_pipe_update_start(const struc + break; + } + +- local_irq_enable(); ++ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) ++ local_irq_enable(); + + timeout = schedule_timeout(timeout); + +- local_irq_disable(); ++ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) ++ local_irq_disable(); + } + + finish_wait(wq, &wait); +@@ -184,7 +187,8 @@ void intel_pipe_update_start(const struc + return; + + irq_disable: +- local_irq_disable(); ++ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) ++ local_irq_disable(); + } + + /** +@@ -233,7 +237,8 @@ void intel_pipe_update_end(struct intel_ + new_crtc_state->uapi.event = NULL; + } + +- local_irq_enable(); ++ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) ++ local_irq_enable(); + + if (intel_vgpu_active(dev_priv)) + return; diff --git a/kernel/patches-5.4.x-rt/0219-drm-i915-disable-tracing-on-RT.patch b/kernel/patches-5.11.x-rt/0174-drm-i915-disable-tracing-on-RT.patch similarity index 100% rename from kernel/patches-5.4.x-rt/0219-drm-i915-disable-tracing-on-RT.patch rename to kernel/patches-5.11.x-rt/0174-drm-i915-disable-tracing-on-RT.patch diff --git a/kernel/patches-5.4.x-rt/0220-drm-i915-skip-DRM_I915_LOW_LEVEL_TRACEPOINTS-with-NO.patch b/kernel/patches-5.11.x-rt/0175-drm-i915-skip-DRM_I915_LOW_LEVEL_TRACEPOINTS-with-NO.patch similarity index 93% rename from kernel/patches-5.4.x-rt/0220-drm-i915-skip-DRM_I915_LOW_LEVEL_TRACEPOINTS-with-NO.patch rename to kernel/patches-5.11.x-rt/0175-drm-i915-skip-DRM_I915_LOW_LEVEL_TRACEPOINTS-with-NO.patch index a5eb1d79d..276d8eca6 100644 --- a/kernel/patches-5.4.x-rt/0220-drm-i915-skip-DRM_I915_LOW_LEVEL_TRACEPOINTS-with-NO.patch +++ b/kernel/patches-5.11.x-rt/0175-drm-i915-skip-DRM_I915_LOW_LEVEL_TRACEPOINTS-with-NO.patch @@ -14,7 +14,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h -@@ -725,7 +725,7 @@ DEFINE_EVENT(i915_request, i915_request_ +@@ -782,7 +782,7 @@ DEFINE_EVENT(i915_request, i915_request_ TP_ARGS(rq) ); diff --git a/kernel/patches-5.11.x-rt/0176-drm-i915-gt-Only-disable-interrupts-for-the-timeline.patch b/kernel/patches-5.11.x-rt/0176-drm-i915-gt-Only-disable-interrupts-for-the-timeline.patch new file mode 100644 index 000000000..1122f5c6e --- /dev/null +++ b/kernel/patches-5.11.x-rt/0176-drm-i915-gt-Only-disable-interrupts-for-the-timeline.patch @@ -0,0 +1,45 @@ +From: Sebastian Andrzej Siewior +Date: Tue, 7 Jul 2020 12:25:11 +0200 +Subject: [PATCH] drm/i915/gt: Only disable interrupts for the timeline lock on + !force-threaded + +According to commit + d67739268cf0e ("drm/i915/gt: Mark up the nested engine-pm timeline lock as irqsafe") + +the intrrupts are disabled the code may be called from an interrupt +handler and from preemptible context. +With `force_irqthreads' set the timeline mutex is never observed in IRQ +context so it is not neede to disable interrupts. + +Disable only interrupts if not in `force_irqthreads' mode. + +Signed-off-by: Sebastian Andrzej Siewior +--- + drivers/gpu/drm/i915/gt/intel_engine_pm.c | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c ++++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c +@@ -73,9 +73,10 @@ static int __engine_unpark(struct intel_ + + static inline unsigned long __timeline_mark_lock(struct intel_context *ce) + { +- unsigned long flags; ++ unsigned long flags = 0; + +- local_irq_save(flags); ++ if (!force_irqthreads) ++ local_irq_save(flags); + mutex_acquire(&ce->timeline->mutex.dep_map, 2, 0, _THIS_IP_); + + return flags; +@@ -85,7 +86,8 @@ static inline void __timeline_mark_unloc + unsigned long flags) + { + mutex_release(&ce->timeline->mutex.dep_map, _THIS_IP_); +- local_irq_restore(flags); ++ if (!force_irqthreads) ++ local_irq_restore(flags); + } + + #else diff --git a/kernel/patches-5.4.x-rt/0224-cpuset-Convert-callback_lock-to-raw_spinlock_t.patch b/kernel/patches-5.11.x-rt/0177-cpuset-Convert-callback_lock-to-raw_spinlock_t.patch similarity index 86% rename from kernel/patches-5.4.x-rt/0224-cpuset-Convert-callback_lock-to-raw_spinlock_t.patch rename to kernel/patches-5.11.x-rt/0177-cpuset-Convert-callback_lock-to-raw_spinlock_t.patch index d8e391f03..a4a8c83b1 100644 --- a/kernel/patches-5.4.x-rt/0224-cpuset-Convert-callback_lock-to-raw_spinlock_t.patch +++ b/kernel/patches-5.11.x-rt/0177-cpuset-Convert-callback_lock-to-raw_spinlock_t.patch @@ -59,7 +59,7 @@ Signed-off-by: Sebastian Andrzej Siewior static struct workqueue_struct *cpuset_migrate_mm_wq; -@@ -1255,7 +1255,7 @@ static int update_parent_subparts_cpumas +@@ -1280,7 +1280,7 @@ static int update_parent_subparts_cpumas * Newly added CPUs will be removed from effective_cpus and * newly deleted ones will be added back to effective_cpus. */ @@ -68,7 +68,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (adding) { cpumask_or(parent->subparts_cpus, parent->subparts_cpus, tmp->addmask); -@@ -1274,7 +1274,7 @@ static int update_parent_subparts_cpumas +@@ -1299,7 +1299,7 @@ static int update_parent_subparts_cpumas } parent->nr_subparts_cpus = cpumask_weight(parent->subparts_cpus); @@ -77,7 +77,7 @@ Signed-off-by: Sebastian Andrzej Siewior return cmd == partcmd_update; } -@@ -1379,7 +1379,7 @@ static void update_cpumasks_hier(struct +@@ -1404,7 +1404,7 @@ static void update_cpumasks_hier(struct continue; rcu_read_unlock(); @@ -86,7 +86,7 @@ Signed-off-by: Sebastian Andrzej Siewior cpumask_copy(cp->effective_cpus, tmp->new_cpus); if (cp->nr_subparts_cpus && -@@ -1410,7 +1410,7 @@ static void update_cpumasks_hier(struct +@@ -1435,7 +1435,7 @@ static void update_cpumasks_hier(struct = cpumask_weight(cp->subparts_cpus); } } @@ -95,7 +95,7 @@ Signed-off-by: Sebastian Andrzej Siewior WARN_ON(!is_in_v2_mode() && !cpumask_equal(cp->cpus_allowed, cp->effective_cpus)); -@@ -1528,7 +1528,7 @@ static int update_cpumask(struct cpuset +@@ -1553,7 +1553,7 @@ static int update_cpumask(struct cpuset return -EINVAL; } @@ -104,7 +104,7 @@ Signed-off-by: Sebastian Andrzej Siewior cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed); /* -@@ -1539,7 +1539,7 @@ static int update_cpumask(struct cpuset +@@ -1564,7 +1564,7 @@ static int update_cpumask(struct cpuset cs->cpus_allowed); cs->nr_subparts_cpus = cpumask_weight(cs->subparts_cpus); } @@ -113,7 +113,7 @@ Signed-off-by: Sebastian Andrzej Siewior update_cpumasks_hier(cs, &tmp); -@@ -1733,9 +1733,9 @@ static void update_nodemasks_hier(struct +@@ -1758,9 +1758,9 @@ static void update_nodemasks_hier(struct continue; rcu_read_unlock(); @@ -125,7 +125,7 @@ Signed-off-by: Sebastian Andrzej Siewior WARN_ON(!is_in_v2_mode() && !nodes_equal(cp->mems_allowed, cp->effective_mems)); -@@ -1803,9 +1803,9 @@ static int update_nodemask(struct cpuset +@@ -1828,9 +1828,9 @@ static int update_nodemask(struct cpuset if (retval < 0) goto done; @@ -137,7 +137,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* use trialcs->mems_allowed as a temp variable */ update_nodemasks_hier(cs, &trialcs->mems_allowed); -@@ -1896,9 +1896,9 @@ static int update_flag(cpuset_flagbits_t +@@ -1921,9 +1921,9 @@ static int update_flag(cpuset_flagbits_t spread_flag_changed = ((is_spread_slab(cs) != is_spread_slab(trialcs)) || (is_spread_page(cs) != is_spread_page(trialcs))); @@ -149,7 +149,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed) rebuild_sched_domains_locked(); -@@ -2407,7 +2407,7 @@ static int cpuset_common_seq_show(struct +@@ -2432,7 +2432,7 @@ static int cpuset_common_seq_show(struct cpuset_filetype_t type = seq_cft(sf)->private; int ret = 0; @@ -158,7 +158,7 @@ Signed-off-by: Sebastian Andrzej Siewior switch (type) { case FILE_CPULIST: -@@ -2429,7 +2429,7 @@ static int cpuset_common_seq_show(struct +@@ -2454,7 +2454,7 @@ static int cpuset_common_seq_show(struct ret = -EINVAL; } @@ -167,7 +167,7 @@ Signed-off-by: Sebastian Andrzej Siewior return ret; } -@@ -2742,14 +2742,14 @@ static int cpuset_css_online(struct cgro +@@ -2767,14 +2767,14 @@ static int cpuset_css_online(struct cgro cpuset_inc(); @@ -184,7 +184,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (!test_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags)) goto out_unlock; -@@ -2776,12 +2776,12 @@ static int cpuset_css_online(struct cgro +@@ -2801,12 +2801,12 @@ static int cpuset_css_online(struct cgro } rcu_read_unlock(); @@ -199,7 +199,7 @@ Signed-off-by: Sebastian Andrzej Siewior out_unlock: percpu_up_write(&cpuset_rwsem); put_online_cpus(); -@@ -2837,7 +2837,7 @@ static void cpuset_css_free(struct cgrou +@@ -2862,7 +2862,7 @@ static void cpuset_css_free(struct cgrou static void cpuset_bind(struct cgroup_subsys_state *root_css) { percpu_down_write(&cpuset_rwsem); @@ -208,7 +208,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (is_in_v2_mode()) { cpumask_copy(top_cpuset.cpus_allowed, cpu_possible_mask); -@@ -2848,7 +2848,7 @@ static void cpuset_bind(struct cgroup_su +@@ -2873,7 +2873,7 @@ static void cpuset_bind(struct cgroup_su top_cpuset.mems_allowed = top_cpuset.effective_mems; } @@ -217,7 +217,7 @@ Signed-off-by: Sebastian Andrzej Siewior percpu_up_write(&cpuset_rwsem); } -@@ -2945,12 +2945,12 @@ hotplug_update_tasks_legacy(struct cpuse +@@ -2970,12 +2970,12 @@ hotplug_update_tasks_legacy(struct cpuse { bool is_empty; @@ -232,7 +232,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* * Don't call update_tasks_cpumask() if the cpuset becomes empty, -@@ -2987,10 +2987,10 @@ hotplug_update_tasks(struct cpuset *cs, +@@ -3012,10 +3012,10 @@ hotplug_update_tasks(struct cpuset *cs, if (nodes_empty(*new_mems)) *new_mems = parent_cs(cs)->effective_mems; @@ -245,7 +245,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (cpus_updated) update_tasks_cpumask(cs); -@@ -3145,7 +3145,7 @@ static void cpuset_hotplug_workfn(struct +@@ -3170,7 +3170,7 @@ static void cpuset_hotplug_workfn(struct /* synchronize cpus_allowed to cpu_active_mask */ if (cpus_updated) { @@ -254,7 +254,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (!on_dfl) cpumask_copy(top_cpuset.cpus_allowed, &new_cpus); /* -@@ -3165,17 +3165,17 @@ static void cpuset_hotplug_workfn(struct +@@ -3190,17 +3190,17 @@ static void cpuset_hotplug_workfn(struct } } cpumask_copy(top_cpuset.effective_cpus, &new_cpus); @@ -275,7 +275,7 @@ Signed-off-by: Sebastian Andrzej Siewior update_tasks_nodemask(&top_cpuset); } -@@ -3276,11 +3276,11 @@ void cpuset_cpus_allowed(struct task_str +@@ -3301,11 +3301,11 @@ void cpuset_cpus_allowed(struct task_str { unsigned long flags; @@ -289,7 +289,7 @@ Signed-off-by: Sebastian Andrzej Siewior } /** -@@ -3341,11 +3341,11 @@ nodemask_t cpuset_mems_allowed(struct ta +@@ -3366,11 +3366,11 @@ nodemask_t cpuset_mems_allowed(struct ta nodemask_t mask; unsigned long flags; @@ -303,7 +303,7 @@ Signed-off-by: Sebastian Andrzej Siewior return mask; } -@@ -3437,14 +3437,14 @@ bool __cpuset_node_allowed(int node, gfp +@@ -3462,14 +3462,14 @@ bool __cpuset_node_allowed(int node, gfp return true; /* Not hardwall and node outside mems_allowed: scan up cpusets */ diff --git a/kernel/patches-5.4.x-rt/0248-x86-Enable-RT.patch b/kernel/patches-5.11.x-rt/0178-x86-Enable-RT.patch similarity index 87% rename from kernel/patches-5.4.x-rt/0248-x86-Enable-RT.patch rename to kernel/patches-5.11.x-rt/0178-x86-Enable-RT.patch index a2cc5b650..87f17975b 100644 --- a/kernel/patches-5.4.x-rt/0248-x86-Enable-RT.patch +++ b/kernel/patches-5.11.x-rt/0178-x86-Enable-RT.patch @@ -11,10 +11,10 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig -@@ -25,6 +25,7 @@ config X86_64 +@@ -27,6 +27,7 @@ config X86_64 # Options that are inherently 64-bit kernel only: select ARCH_HAS_GIGANTIC_PAGE - select ARCH_SUPPORTS_INT128 + select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 + select ARCH_SUPPORTS_RT select ARCH_USE_CMPXCHG_LOCKREF select HAVE_ARCH_SOFT_DIRTY diff --git a/kernel/patches-5.4.x-rt/0253-mm-scatterlist-dont-disable-irqs-on-RT.patch b/kernel/patches-5.11.x-rt/0179-mm-scatterlist-dont-disable-irqs-on-RT.patch similarity index 91% rename from kernel/patches-5.4.x-rt/0253-mm-scatterlist-dont-disable-irqs-on-RT.patch rename to kernel/patches-5.11.x-rt/0179-mm-scatterlist-dont-disable-irqs-on-RT.patch index 9747f5123..fd87beb25 100644 --- a/kernel/patches-5.4.x-rt/0253-mm-scatterlist-dont-disable-irqs-on-RT.patch +++ b/kernel/patches-5.11.x-rt/0179-mm-scatterlist-dont-disable-irqs-on-RT.patch @@ -12,7 +12,7 @@ Signed-off-by: Thomas Gleixner --- a/lib/scatterlist.c +++ b/lib/scatterlist.c -@@ -811,7 +811,7 @@ void sg_miter_stop(struct sg_mapping_ite +@@ -892,7 +892,7 @@ void sg_miter_stop(struct sg_mapping_ite flush_kernel_dcache_page(miter->page); if (miter->__flags & SG_MITER_ATOMIC) { diff --git a/kernel/patches-5.4.x-rt/0254-preempt-lazy-support.patch b/kernel/patches-5.11.x-rt/0180-preempt-lazy-support.patch similarity index 62% rename from kernel/patches-5.4.x-rt/0254-preempt-lazy-support.patch rename to kernel/patches-5.11.x-rt/0180-preempt-lazy-support.patch index 4db1c2994..be1071e9d 100644 --- a/kernel/patches-5.4.x-rt/0254-preempt-lazy-support.patch +++ b/kernel/patches-5.11.x-rt/0180-preempt-lazy-support.patch @@ -52,23 +52,23 @@ performance. Signed-off-by: Thomas Gleixner --- - include/linux/preempt.h | 35 +++++++++++++++++- + include/linux/preempt.h | 54 ++++++++++++++++++++++++++-- include/linux/sched.h | 38 +++++++++++++++++++ include/linux/thread_info.h | 12 +++++- - include/linux/trace_events.h | 1 + include/linux/trace_events.h | 5 ++ kernel/Kconfig.preempt | 6 +++ kernel/sched/core.c | 82 +++++++++++++++++++++++++++++++++++++++++-- kernel/sched/fair.c | 16 ++++---- kernel/sched/features.h | 3 + kernel/sched/sched.h | 9 ++++ - kernel/trace/trace.c | 35 ++++++++++-------- - kernel/trace/trace.h | 2 + + kernel/trace/trace.c | 50 ++++++++++++++++---------- + kernel/trace/trace_events.c | 1 kernel/trace/trace_output.c | 14 ++++++- - 12 files changed, 224 insertions(+), 29 deletions(-) + 12 files changed, 254 insertions(+), 36 deletions(-) --- a/include/linux/preempt.h +++ b/include/linux/preempt.h -@@ -177,6 +177,20 @@ extern void preempt_count_sub(int val); +@@ -174,6 +174,20 @@ extern void preempt_count_sub(int val); #define preempt_count_inc() preempt_count_add(1) #define preempt_count_dec() preempt_count_sub(1) @@ -89,7 +89,7 @@ Signed-off-by: Thomas Gleixner #ifdef CONFIG_PREEMPT_COUNT #define preempt_disable() \ -@@ -185,6 +199,12 @@ do { \ +@@ -182,6 +196,12 @@ do { \ barrier(); \ } while (0) @@ -102,21 +102,26 @@ Signed-off-by: Thomas Gleixner #define sched_preempt_enable_no_resched() \ do { \ barrier(); \ -@@ -247,6 +267,13 @@ do { \ +@@ -219,6 +239,18 @@ do { \ __preempt_schedule(); \ } while (0) ++/* ++ * open code preempt_check_resched() because it is not exported to modules and ++ * used by local_unlock() or bpf_enable_instrumentation(). ++ */ +#define preempt_lazy_enable() \ +do { \ + dec_preempt_lazy_count(); \ + barrier(); \ -+ preempt_check_resched(); \ ++ if (should_resched(0)) \ ++ __preempt_schedule(); \ +} while (0) + #else /* !CONFIG_PREEMPTION */ #define preempt_enable() \ do { \ -@@ -254,6 +281,12 @@ do { \ +@@ -226,6 +258,12 @@ do { \ preempt_count_dec(); \ } while (0) @@ -129,7 +134,17 @@ Signed-off-by: Thomas Gleixner #define preempt_enable_notrace() \ do { \ barrier(); \ -@@ -320,7 +353,7 @@ do { \ +@@ -267,6 +305,9 @@ do { \ + #define preempt_check_resched_rt() barrier() + #define preemptible() 0 + ++#define preempt_lazy_disable() barrier() ++#define preempt_lazy_enable() barrier() ++ + #endif /* CONFIG_PREEMPT_COUNT */ + + #ifdef MODULE +@@ -285,7 +326,7 @@ do { \ } while (0) #define preempt_fold_need_resched() \ do { \ @@ -138,9 +153,27 @@ Signed-off-by: Thomas Gleixner set_preempt_need_resched(); \ } while (0) +@@ -413,8 +454,15 @@ extern void migrate_enable(void); + + #else + +-static inline void migrate_disable(void) { } +-static inline void migrate_enable(void) { } ++static inline void migrate_disable(void) ++{ ++ preempt_lazy_disable(); ++} ++ ++static inline void migrate_enable(void) ++{ ++ preempt_lazy_enable(); ++} + + #endif /* CONFIG_SMP */ + --- a/include/linux/sched.h +++ b/include/linux/sched.h -@@ -1791,6 +1791,44 @@ static inline int test_tsk_need_resched( +@@ -1877,6 +1877,44 @@ static inline int test_tsk_need_resched( return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED)); } @@ -187,16 +220,16 @@ Signed-off-by: Thomas Gleixner if (task->state & (__TASK_STOPPED | __TASK_TRACED)) --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h -@@ -97,7 +97,17 @@ static inline int test_ti_thread_flag(st - #define test_thread_flag(flag) \ - test_ti_thread_flag(current_thread_info(), flag) +@@ -149,7 +149,17 @@ static inline int test_ti_thread_flag(st + clear_ti_thread_flag(task_thread_info(t), TIF_##fl) + #endif /* !CONFIG_GENERIC_ENTRY */ -#define tif_need_resched() test_thread_flag(TIF_NEED_RESCHED) +#ifdef CONFIG_PREEMPT_LAZY +#define tif_need_resched() (test_thread_flag(TIF_NEED_RESCHED) || \ + test_thread_flag(TIF_NEED_RESCHED_LAZY)) +#define tif_need_resched_now() (test_thread_flag(TIF_NEED_RESCHED)) -+#define tif_need_resched_lazy() test_thread_flag(TIF_NEED_RESCHED_LAZY)) ++#define tif_need_resched_lazy() test_thread_flag(TIF_NEED_RESCHED_LAZY) + +#else +#define tif_need_resched() test_thread_flag(TIF_NEED_RESCHED) @@ -208,14 +241,34 @@ Signed-off-by: Thomas Gleixner static inline int arch_within_stack_frames(const void * const stack, --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h -@@ -64,6 +64,7 @@ struct trace_entry { +@@ -68,6 +68,7 @@ struct trace_entry { + unsigned char preempt_count; int pid; - unsigned short migrate_disable; - unsigned short padding; + unsigned char migrate_disable; + unsigned char preempt_lazy_count; }; #define TRACE_EVENT_TYPE_MAX \ +@@ -155,9 +156,10 @@ static inline void tracing_generic_entry + { + entry->preempt_count = trace_ctx & 0xff; + entry->migrate_disable = (trace_ctx >> 8) & 0xff; ++ entry->preempt_lazy_count = (trace_ctx >> 16) & 0xff; + entry->pid = current->pid; + entry->type = type; +- entry->flags = trace_ctx >> 16; ++ entry->flags = trace_ctx >> 24; + } + + unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status); +@@ -170,6 +172,7 @@ enum trace_flag_type { + TRACE_FLAG_SOFTIRQ = 0x10, + TRACE_FLAG_PREEMPT_RESCHED = 0x20, + TRACE_FLAG_NMI = 0x40, ++ TRACE_FLAG_NEED_RESCHED_LAZY = 0x80, + }; + + #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT --- a/kernel/Kconfig.preempt +++ b/kernel/Kconfig.preempt @@ -1,5 +1,11 @@ @@ -232,7 +285,7 @@ Signed-off-by: Thomas Gleixner default PREEMPT_NONE --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -555,6 +555,48 @@ void resched_curr(struct rq *rq) +@@ -647,6 +647,48 @@ void resched_curr(struct rq *rq) trace_sched_wake_idle_without_ipi(cpu); } @@ -281,7 +334,23 @@ Signed-off-by: Thomas Gleixner void resched_cpu(int cpu) { struct rq *rq = cpu_rq(cpu); -@@ -3003,6 +3045,9 @@ int sched_fork(unsigned long clone_flags +@@ -1778,6 +1820,7 @@ void migrate_disable(void) + preempt_disable(); + this_rq()->nr_pinned++; + p->migration_disabled = 1; ++ preempt_lazy_disable(); + preempt_enable(); + } + EXPORT_SYMBOL_GPL(migrate_disable); +@@ -1806,6 +1849,7 @@ void migrate_enable(void) + barrier(); + p->migration_disabled = 0; + this_rq()->nr_pinned--; ++ preempt_lazy_enable(); + preempt_enable(); + } + EXPORT_SYMBOL_GPL(migrate_enable); +@@ -3853,6 +3897,9 @@ int sched_fork(unsigned long clone_flags p->on_cpu = 0; #endif init_task_preempt_count(p); @@ -291,7 +360,7 @@ Signed-off-by: Thomas Gleixner #ifdef CONFIG_SMP plist_node_init(&p->pushable_tasks, MAX_PRIO); RB_CLEAR_NODE(&p->pushable_dl_tasks); -@@ -4144,6 +4189,7 @@ static void __sched notrace __schedule(b +@@ -5106,6 +5153,7 @@ static void __sched notrace __schedule(b next = pick_next_task(rq, prev, &rf); clear_tsk_need_resched(prev); @@ -299,7 +368,7 @@ Signed-off-by: Thomas Gleixner clear_preempt_need_resched(); if (likely(prev != next)) { -@@ -4331,6 +4377,30 @@ static void __sched notrace preempt_sche +@@ -5305,6 +5353,30 @@ static void __sched notrace preempt_sche } while (need_resched()); } @@ -330,7 +399,7 @@ Signed-off-by: Thomas Gleixner #ifdef CONFIG_PREEMPTION /* * This is the entry point to schedule() from in-kernel preemption -@@ -4344,7 +4414,8 @@ asmlinkage __visible void __sched notrac +@@ -5318,7 +5390,8 @@ asmlinkage __visible void __sched notrac */ if (likely(!preemptible())) return; @@ -340,7 +409,7 @@ Signed-off-by: Thomas Gleixner preempt_schedule_common(); } NOKPROBE_SYMBOL(preempt_schedule); -@@ -4371,6 +4442,9 @@ asmlinkage __visible void __sched notrac +@@ -5358,6 +5431,9 @@ asmlinkage __visible void __sched notrac if (likely(!preemptible())) return; @@ -350,7 +419,7 @@ Signed-off-by: Thomas Gleixner do { /* * Because the function tracer can trace preempt_count_sub() -@@ -6161,7 +6235,9 @@ void init_idle(struct task_struct *idle, +@@ -7194,7 +7270,9 @@ void init_idle(struct task_struct *idle, /* Set the preempt count _outside_ the spinlocks! */ init_idle_preempt_count(idle, cpu); @@ -361,25 +430,9 @@ Signed-off-by: Thomas Gleixner /* * The idle tasks have their own, simple scheduling class: */ -@@ -8111,6 +8187,7 @@ void migrate_disable(void) - - if (++current->migrate_disable == 1) { - this_rq()->nr_pinned++; -+ preempt_lazy_disable(); - #ifdef CONFIG_SCHED_DEBUG - WARN_ON_ONCE(current->pinned_on_cpu >= 0); - current->pinned_on_cpu = smp_processor_id(); -@@ -8192,6 +8269,7 @@ void migrate_enable(void) - } - - out: -+ preempt_lazy_enable(); - preempt_enable(); - } - EXPORT_SYMBOL(migrate_enable); --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c -@@ -4123,7 +4123,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq +@@ -4383,7 +4383,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq ideal_runtime = sched_slice(cfs_rq, curr); delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime; if (delta_exec > ideal_runtime) { @@ -388,7 +441,7 @@ Signed-off-by: Thomas Gleixner /* * The current task ran long enough, ensure it doesn't get * re-elected due to buddy favours. -@@ -4147,7 +4147,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq +@@ -4407,7 +4407,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq return; if (delta > ideal_runtime) @@ -397,7 +450,7 @@ Signed-off-by: Thomas Gleixner } static void -@@ -4290,7 +4290,7 @@ entity_tick(struct cfs_rq *cfs_rq, struc +@@ -4550,7 +4550,7 @@ entity_tick(struct cfs_rq *cfs_rq, struc * validating it and just reschedule. */ if (queued) { @@ -406,7 +459,7 @@ Signed-off-by: Thomas Gleixner return; } /* -@@ -4415,7 +4415,7 @@ static void __account_cfs_rq_runtime(str +@@ -4687,7 +4687,7 @@ static void __account_cfs_rq_runtime(str * hierarchy can be throttled */ if (!assign_cfs_rq_runtime(cfs_rq) && likely(cfs_rq->curr)) @@ -415,7 +468,7 @@ Signed-off-by: Thomas Gleixner } static __always_inline -@@ -5128,7 +5128,7 @@ static void hrtick_start_fair(struct rq +@@ -5431,7 +5431,7 @@ static void hrtick_start_fair(struct rq if (delta < 0) { if (rq->curr == p) @@ -424,7 +477,7 @@ Signed-off-by: Thomas Gleixner return; } hrtick_start(rq, delta); -@@ -6731,7 +6731,7 @@ static void check_preempt_wakeup(struct +@@ -7017,7 +7017,7 @@ static void check_preempt_wakeup(struct return; preempt: @@ -433,7 +486,7 @@ Signed-off-by: Thomas Gleixner /* * Only set the backward buddy when the current task is still * on the rq. This can happen when a wakeup gets interleaved -@@ -9998,7 +9998,7 @@ static void task_fork_fair(struct task_s +@@ -10794,7 +10794,7 @@ static void task_fork_fair(struct task_s * 'current' within the tree based on its new key value. */ swap(curr->vruntime, se->vruntime); @@ -442,7 +495,7 @@ Signed-off-by: Thomas Gleixner } se->vruntime -= cfs_rq->min_vruntime; -@@ -10022,7 +10022,7 @@ prio_changed_fair(struct rq *rq, struct +@@ -10821,7 +10821,7 @@ prio_changed_fair(struct rq *rq, struct */ if (rq->curr == p) { if (p->prio > oldprio) @@ -465,7 +518,7 @@ Signed-off-by: Thomas Gleixner /* --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h -@@ -1876,6 +1876,15 @@ extern void reweight_task(struct task_st +@@ -2015,6 +2015,15 @@ extern void reweight_task(struct task_st extern void resched_curr(struct rq *rq); extern void resched_cpu(int cpu); @@ -483,90 +536,90 @@ Signed-off-by: Thomas Gleixner --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c -@@ -2343,6 +2343,7 @@ tracing_generic_entry_update(struct trac - struct task_struct *tsk = current; +@@ -2605,8 +2605,16 @@ unsigned int tracing_gen_ctx_irq_test(un + trace_flags |= TRACE_FLAG_NEED_RESCHED; + if (test_preempt_need_resched()) + trace_flags |= TRACE_FLAG_PREEMPT_RESCHED; +- return (trace_flags << 16) | (pc & 0xff) | +- (migration_disable_value() & 0xff) << 8; ++ ++#ifdef CONFIG_PREEMPT_LAZY ++ if (need_resched_lazy()) ++ trace_flags |= TRACE_FLAG_NEED_RESCHED_LAZY; ++#endif ++ ++ return (pc & 0xff) | ++ (migration_disable_value() & 0xff) << 8 | ++ (preempt_lazy_count() & 0xff) << 16 | ++ (trace_flags << 24); + } - entry->preempt_count = pc & 0xff; -+ entry->preempt_lazy_count = preempt_lazy_count(); - entry->pid = (tsk) ? tsk->pid : 0; - entry->type = type; - entry->flags = -@@ -2354,7 +2355,8 @@ tracing_generic_entry_update(struct trac - ((pc & NMI_MASK ) ? TRACE_FLAG_NMI : 0) | - ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) | - ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) | -- (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) | -+ (tif_need_resched_now() ? TRACE_FLAG_NEED_RESCHED : 0) | -+ (need_resched_lazy() ? TRACE_FLAG_NEED_RESCHED_LAZY : 0) | - (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0); - - entry->migrate_disable = (tsk) ? __migrate_disabled(tsk) & 0xFF : 0; -@@ -3583,15 +3585,17 @@ unsigned long trace_total_entries(struct + struct ring_buffer_event * +@@ -3807,15 +3815,17 @@ unsigned long trace_total_entries(struct static void print_lat_help_header(struct seq_file *m) { -- seq_puts(m, "# _------=> CPU# \n" -- "# / _-----=> irqs-off \n" -- "# | / _----=> need-resched \n" -- "# || / _---=> hardirq/softirq \n" -- "# ||| / _--=> preempt-depth \n" -- "# |||| / _--=> migrate-disable\n" -- "# ||||| / delay \n" -- "# cmd pid |||||| time | caller \n" -- "# \\ / ||||| \\ | / \n"); -+ seq_puts(m, "# _--------=> CPU# \n" -+ "# / _-------=> irqs-off \n" -+ "# | / _------=> need-resched \n" -+ "# || / _-----=> need-resched_lazy \n" -+ "# ||| / _----=> hardirq/softirq \n" -+ "# |||| / _---=> preempt-depth \n" -+ "# ||||| / _--=> preempt-lazy-depth\n" -+ "# |||||| / _-=> migrate-disable \n" -+ "# ||||||| / delay \n" -+ "# cmd pid |||||||| time | caller \n" -+ "# \\ / |||||||| \\ | / \n"); +- seq_puts(m, "# _------=> CPU# \n" +- "# / _-----=> irqs-off \n" +- "# | / _----=> need-resched \n" +- "# || / _---=> hardirq/softirq \n" +- "# ||| / _--=> preempt-depth \n" +- "# |||| / _-=> migrate-disable \n" +- "# ||||| / delay \n" +- "# cmd pid |||||| time | caller \n" +- "# \\ / |||||| \\ | / \n"); ++ seq_puts(m, "# _--------=> CPU# \n" ++ "# / _-------=> irqs-off \n" ++ "# | / _------=> need-resched \n" ++ "# || / _-----=> need-resched-lazy\n" ++ "# ||| / _----=> hardirq/softirq \n" ++ "# |||| / _---=> preempt-depth \n" ++ "# ||||| / _--=> preempt-lazy-depth\n" ++ "# |||||| / _-=> migrate-disable \n" ++ "# ||||||| / delay \n" ++ "# cmd pid |||||||| time | caller \n" ++ "# \\ / |||||||| \\ | / \n"); } - static void print_event_info(struct trace_buffer *buf, struct seq_file *m) -@@ -3627,11 +3631,12 @@ static void print_func_help_header_irq(s + static void print_event_info(struct array_buffer *buf, struct seq_file *m) +@@ -3849,14 +3859,16 @@ static void print_func_help_header_irq(s - seq_printf(m, "# %.*s _-----=> irqs-off\n", prec, space); - seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space); -- seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space); -- seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space); -- seq_printf(m, "# %.*s||| / delay\n", prec, space); -- seq_printf(m, "# TASK-PID %.*sCPU# |||| TIMESTAMP FUNCTION\n", prec, " TGID "); -- seq_printf(m, "# | | %.*s | |||| | |\n", prec, " | "); -+ seq_printf(m, "# %.*s| / _----=> need-resched\n", prec, space); -+ seq_printf(m, "# %.*s|| / _---=> hardirq/softirq\n", prec, space); -+ seq_printf(m, "# %.*s||| / _--=> preempt-depth\n", prec, space); -+ seq_printf(m, "# %.*s||||/ delay\n", prec, space); -+ seq_printf(m, "# TASK-PID %.*sCPU# ||||| TIMESTAMP FUNCTION\n", prec, " TGID "); -+ seq_printf(m, "# | | %.*s | ||||| | |\n", prec, " | "); + print_event_info(buf, m); + +- seq_printf(m, "# %.*s _-----=> irqs-off\n", prec, space); +- seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space); +- seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space); +- seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space); +- seq_printf(m, "# %.*s||| / _-=> migrate-disable\n", prec, space); +- seq_printf(m, "# %.*s|||| / delay\n", prec, space); +- seq_printf(m, "# TASK-PID %.*s CPU# ||||| TIMESTAMP FUNCTION\n", prec, " TGID "); +- seq_printf(m, "# | | %.*s | ||||| | |\n", prec, " | "); ++ seq_printf(m, "# %.*s _-------=> irqs-off\n", prec, space); ++ seq_printf(m, "# %.*s / _------=> need-resched\n", prec, space); ++ seq_printf(m, "# %.*s| / _-----=> need-resched-lazy\n", prec, space); ++ seq_printf(m, "# %.*s|| / _----=> hardirq/softirq\n", prec, space); ++ seq_printf(m, "# %.*s||| / _---=> preempt-depth\n", prec, space); ++ seq_printf(m, "# %.*s|||| / _--=> preempt-lazy-depth\n", prec, space); ++ seq_printf(m, "# %.*s||||| / _-=> migrate-disable\n", prec, space); ++ seq_printf(m, "# %.*s|||||| / delay\n", prec, space); ++ seq_printf(m, "# TASK-PID %.*s CPU# ||||||| TIMESTAMP FUNCTION\n", prec, " TGID "); ++ seq_printf(m, "# | | %.*s | ||||||| | |\n", prec, " | "); } void ---- a/kernel/trace/trace.h -+++ b/kernel/trace/trace.h -@@ -126,6 +126,7 @@ struct kretprobe_trace_entry_head { - * NEED_RESCHED - reschedule is requested - * HARDIRQ - inside an interrupt handler - * SOFTIRQ - inside a softirq handler -+ * NEED_RESCHED_LAZY - lazy reschedule is requested - */ - enum trace_flag_type { - TRACE_FLAG_IRQS_OFF = 0x01, -@@ -135,6 +136,7 @@ enum trace_flag_type { - TRACE_FLAG_SOFTIRQ = 0x10, - TRACE_FLAG_PREEMPT_RESCHED = 0x20, - TRACE_FLAG_NMI = 0x40, -+ TRACE_FLAG_NEED_RESCHED_LAZY = 0x80, - }; +--- a/kernel/trace/trace_events.c ++++ b/kernel/trace/trace_events.c +@@ -184,6 +184,7 @@ static int trace_define_common_fields(vo + __common_field(unsigned char, preempt_count); + __common_field(int, pid); + __common_field(unsigned char, migrate_disable); ++ __common_field(unsigned char, preempt_lazy_count); - #define TRACE_BUF_SIZE 1024 + return ret; + } --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c -@@ -426,6 +426,7 @@ int trace_print_lat_fmt(struct trace_seq +@@ -441,6 +441,7 @@ int trace_print_lat_fmt(struct trace_seq { char hardsoft_irq; char need_resched; @@ -574,7 +627,7 @@ Signed-off-by: Thomas Gleixner char irqs_off; int hardirq; int softirq; -@@ -456,6 +457,9 @@ int trace_print_lat_fmt(struct trace_seq +@@ -471,6 +472,9 @@ int trace_print_lat_fmt(struct trace_seq break; } @@ -584,7 +637,7 @@ Signed-off-by: Thomas Gleixner hardsoft_irq = (nmi && hardirq) ? 'Z' : nmi ? 'z' : -@@ -464,14 +468,20 @@ int trace_print_lat_fmt(struct trace_seq +@@ -479,14 +483,20 @@ int trace_print_lat_fmt(struct trace_seq softirq ? 's' : '.' ; diff --git a/kernel/patches-5.11.x-rt/0181-x86-entry-Use-should_resched-in-idtentry_exit_cond_r.patch b/kernel/patches-5.11.x-rt/0181-x86-entry-Use-should_resched-in-idtentry_exit_cond_r.patch new file mode 100644 index 000000000..c5871ca20 --- /dev/null +++ b/kernel/patches-5.11.x-rt/0181-x86-entry-Use-should_resched-in-idtentry_exit_cond_r.patch @@ -0,0 +1,28 @@ +From: Sebastian Andrzej Siewior +Date: Tue, 30 Jun 2020 11:45:14 +0200 +Subject: [PATCH] x86/entry: Use should_resched() in + idtentry_exit_cond_resched() + +The TIF_NEED_RESCHED bit is inlined on x86 into the preemption counter. +By using should_resched(0) instead of need_resched() the same check can +be performed which uses the same variable as 'preempt_count()` which was +issued before. + +Use should_resched(0) instead need_resched(). + +Signed-off-by: Sebastian Andrzej Siewior +--- + kernel/entry/common.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/kernel/entry/common.c ++++ b/kernel/entry/common.c +@@ -396,7 +396,7 @@ void irqentry_exit_cond_resched(void) + rcu_irq_exit_check_preempt(); + if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) + WARN_ON_ONCE(!on_thread_stack()); +- if (need_resched()) ++ if (should_resched(0)) + preempt_schedule_irq(); + } + } diff --git a/kernel/patches-5.11.x-rt/0182-x86-preempt-lazy.patch b/kernel/patches-5.11.x-rt/0182-x86-preempt-lazy.patch new file mode 100644 index 000000000..eeca917c8 --- /dev/null +++ b/kernel/patches-5.11.x-rt/0182-x86-preempt-lazy.patch @@ -0,0 +1,141 @@ +Subject: x86: Support for lazy preemption +From: Thomas Gleixner +Date: Thu, 01 Nov 2012 11:03:47 +0100 + +Implement the x86 pieces for lazy preempt. + +Signed-off-by: Thomas Gleixner +--- + arch/x86/Kconfig | 1 + + arch/x86/include/asm/preempt.h | 33 ++++++++++++++++++++++++++++++++- + arch/x86/include/asm/thread_info.h | 7 +++++++ + include/linux/entry-common.h | 2 +- + kernel/entry/common.c | 2 +- + 5 files changed, 42 insertions(+), 3 deletions(-) + +--- a/arch/x86/Kconfig ++++ b/arch/x86/Kconfig +@@ -216,6 +216,7 @@ config X86 + select HAVE_PCI + select HAVE_PERF_REGS + select HAVE_PERF_USER_STACK_DUMP ++ select HAVE_PREEMPT_LAZY + select MMU_GATHER_RCU_TABLE_FREE if PARAVIRT + select HAVE_POSIX_CPU_TIMERS_TASK_WORK + select HAVE_REGS_AND_STACK_ACCESS_API +--- a/arch/x86/include/asm/preempt.h ++++ b/arch/x86/include/asm/preempt.h +@@ -89,17 +89,48 @@ static __always_inline void __preempt_co + * a decrement which hits zero means we have no preempt_count and should + * reschedule. + */ +-static __always_inline bool __preempt_count_dec_and_test(void) ++static __always_inline bool ____preempt_count_dec_and_test(void) + { + return GEN_UNARY_RMWcc("decl", __preempt_count, e, __percpu_arg([var])); + } + ++static __always_inline bool __preempt_count_dec_and_test(void) ++{ ++ if (____preempt_count_dec_and_test()) ++ return true; ++#ifdef CONFIG_PREEMPT_LAZY ++ if (preempt_count()) ++ return false; ++ if (current_thread_info()->preempt_lazy_count) ++ return false; ++ return test_thread_flag(TIF_NEED_RESCHED_LAZY); ++#else ++ return false; ++#endif ++} ++ + /* + * Returns true when we need to resched and can (barring IRQ state). + */ + static __always_inline bool should_resched(int preempt_offset) + { ++#ifdef CONFIG_PREEMPT_LAZY ++ u32 tmp; ++ tmp = raw_cpu_read_4(__preempt_count); ++ if (tmp == preempt_offset) ++ return true; ++ ++ /* preempt count == 0 ? */ ++ tmp &= ~PREEMPT_NEED_RESCHED; ++ if (tmp != preempt_offset) ++ return false; ++ /* XXX PREEMPT_LOCK_OFFSET */ ++ if (current_thread_info()->preempt_lazy_count) ++ return false; ++ return test_thread_flag(TIF_NEED_RESCHED_LAZY); ++#else + return unlikely(raw_cpu_read_4(__preempt_count) == preempt_offset); ++#endif + } + + #ifdef CONFIG_PREEMPTION +--- a/arch/x86/include/asm/thread_info.h ++++ b/arch/x86/include/asm/thread_info.h +@@ -57,11 +57,14 @@ struct thread_info { + unsigned long flags; /* low level flags */ + unsigned long syscall_work; /* SYSCALL_WORK_ flags */ + u32 status; /* thread synchronous flags */ ++ int preempt_lazy_count; /* 0 => lazy preemptable ++ <0 => BUG */ + }; + + #define INIT_THREAD_INFO(tsk) \ + { \ + .flags = 0, \ ++ .preempt_lazy_count = 0, \ + } + + #else /* !__ASSEMBLY__ */ +@@ -90,6 +93,7 @@ struct thread_info { + #define TIF_NOTSC 16 /* TSC is not accessible in userland */ + #define TIF_NOTIFY_SIGNAL 17 /* signal notifications exist */ + #define TIF_SLD 18 /* Restore split lock detection on context switch */ ++#define TIF_NEED_RESCHED_LAZY 19 /* lazy rescheduling necessary */ + #define TIF_MEMDIE 20 /* is terminating due to OOM killer */ + #define TIF_POLLING_NRFLAG 21 /* idle is polling for TIF_NEED_RESCHED */ + #define TIF_IO_BITMAP 22 /* uses I/O bitmap */ +@@ -113,6 +117,7 @@ struct thread_info { + #define _TIF_NOTSC (1 << TIF_NOTSC) + #define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) + #define _TIF_SLD (1 << TIF_SLD) ++#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY) + #define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) + #define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) + #define _TIF_FORCED_TF (1 << TIF_FORCED_TF) +@@ -143,6 +148,8 @@ struct thread_info { + + #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) + ++#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY) ++ + #define STACK_WARN (THREAD_SIZE/8) + + /* +--- a/include/linux/entry-common.h ++++ b/include/linux/entry-common.h +@@ -58,7 +58,7 @@ + + #define EXIT_TO_USER_MODE_WORK \ + (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ +- _TIF_NEED_RESCHED | _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL | \ ++ _TIF_NEED_RESCHED_MASK | _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL | \ + ARCH_EXIT_TO_USER_MODE_WORK) + + /** +--- a/kernel/entry/common.c ++++ b/kernel/entry/common.c +@@ -158,7 +158,7 @@ static unsigned long exit_to_user_mode_l + + local_irq_enable_exit_to_user(ti_work); + +- if (ti_work & _TIF_NEED_RESCHED) ++ if (ti_work & _TIF_NEED_RESCHED_MASK) + schedule(); + + #ifdef ARCH_RT_DELAYS_SIGNAL_SEND diff --git a/kernel/patches-5.4.x-rt/0256-arm-preempt-lazy-support.patch b/kernel/patches-5.11.x-rt/0183-arm-preempt-lazy-support.patch similarity index 58% rename from kernel/patches-5.4.x-rt/0256-arm-preempt-lazy-support.patch rename to kernel/patches-5.11.x-rt/0183-arm-preempt-lazy-support.patch index 17ca580ad..5b45d1d37 100644 --- a/kernel/patches-5.4.x-rt/0256-arm-preempt-lazy-support.patch +++ b/kernel/patches-5.11.x-rt/0183-arm-preempt-lazy-support.patch @@ -7,26 +7,25 @@ Implement the arm pieces for lazy preempt. Signed-off-by: Thomas Gleixner --- arch/arm/Kconfig | 1 + - arch/arm/include/asm/thread_info.h | 8 ++++++-- + arch/arm/include/asm/thread_info.h | 6 +++++- arch/arm/kernel/asm-offsets.c | 1 + arch/arm/kernel/entry-armv.S | 19 ++++++++++++++++--- - arch/arm/kernel/entry-common.S | 9 +++++++-- arch/arm/kernel/signal.c | 3 ++- - 6 files changed, 33 insertions(+), 8 deletions(-) + 5 files changed, 25 insertions(+), 5 deletions(-) --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig -@@ -103,6 +103,7 @@ config ARM +@@ -107,6 +107,7 @@ config ARM select HAVE_PERF_EVENTS select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP + select HAVE_PREEMPT_LAZY - select HAVE_RCU_TABLE_FREE if SMP && ARM_LPAE + select MMU_GATHER_RCU_TABLE_FREE if SMP && ARM_LPAE select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RSEQ --- a/arch/arm/include/asm/thread_info.h +++ b/arch/arm/include/asm/thread_info.h -@@ -46,6 +46,7 @@ struct cpu_context_save { +@@ -54,6 +54,7 @@ struct cpu_context_save { struct thread_info { unsigned long flags; /* low level flags */ int preempt_count; /* 0 => preemptable, <0 => bug */ @@ -34,37 +33,35 @@ Signed-off-by: Thomas Gleixner mm_segment_t addr_limit; /* address limit */ struct task_struct *task; /* main task structure */ __u32 cpu; /* cpu */ -@@ -139,7 +140,8 @@ extern int vfp_restore_user_hwstate(stru - #define TIF_SYSCALL_TRACE 4 /* syscall trace active */ - #define TIF_SYSCALL_AUDIT 5 /* syscall auditing active */ +@@ -146,6 +147,7 @@ extern int vfp_restore_user_hwstate(stru #define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */ --#define TIF_SECCOMP 7 /* seccomp syscall filtering active */ -+#define TIF_SECCOMP 8 /* seccomp syscall filtering active */ -+#define TIF_NEED_RESCHED_LAZY 7 + #define TIF_SECCOMP 7 /* seccomp syscall filtering active */ + #define TIF_NOTIFY_SIGNAL 8 /* signal notifications exist */ ++#define TIF_NEED_RESCHED_LAZY 9 - #define TIF_NOHZ 12 /* in adaptive nohz mode */ #define TIF_USING_IWMMXT 17 -@@ -149,6 +151,7 @@ extern int vfp_restore_user_hwstate(stru - #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) - #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) - #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) + #define TIF_MEMDIE 18 /* is terminating due to OOM killer */ +@@ -160,6 +162,7 @@ extern int vfp_restore_user_hwstate(stru + #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) + #define _TIF_SECCOMP (1 << TIF_SECCOMP) + #define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) +#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY) - #define _TIF_UPROBE (1 << TIF_UPROBE) - #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) - #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) -@@ -164,7 +167,8 @@ extern int vfp_restore_user_hwstate(stru + #define _TIF_USING_IWMMXT (1 << TIF_USING_IWMMXT) + + /* Checks for any syscall work in entry-common.S */ +@@ -169,7 +172,8 @@ extern int vfp_restore_user_hwstate(stru + /* * Change these and you break ASM code in entry-common.S */ - #define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ -- _TIF_NOTIFY_RESUME | _TIF_UPROBE) -+ _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ -+ _TIF_NEED_RESCHED_LAZY) +-#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ ++#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | \ ++ _TIF_SIGPENDING | \ + _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ + _TIF_NOTIFY_SIGNAL) - #endif /* __KERNEL__ */ - #endif /* __ASM_ARM_THREAD_INFO_H */ --- a/arch/arm/kernel/asm-offsets.c +++ b/arch/arm/kernel/asm-offsets.c -@@ -53,6 +53,7 @@ int main(void) +@@ -42,6 +42,7 @@ int main(void) BLANK(); DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count)); @@ -74,7 +71,7 @@ Signed-off-by: Thomas Gleixner DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S -@@ -213,11 +213,18 @@ ENDPROC(__dabt_svc) +@@ -206,11 +206,18 @@ ENDPROC(__dabt_svc) #ifdef CONFIG_PREEMPTION ldr r8, [tsk, #TI_PREEMPT] @ get preempt count @@ -95,7 +92,7 @@ Signed-off-by: Thomas Gleixner #endif svc_exit r5, irq = 1 @ return from exception -@@ -232,8 +239,14 @@ ENDPROC(__irq_svc) +@@ -225,8 +232,14 @@ ENDPROC(__irq_svc) 1: bl preempt_schedule_irq @ irq en/disable is done inside ldr r0, [tsk, #TI_FLAGS] @ get new tasks TI_FLAGS tst r0, #_TIF_NEED_RESCHED @@ -111,32 +108,6 @@ Signed-off-by: Thomas Gleixner #endif __und_fault: ---- a/arch/arm/kernel/entry-common.S -+++ b/arch/arm/kernel/entry-common.S -@@ -53,7 +53,9 @@ saved_pc .req lr - cmp r2, #TASK_SIZE - blne addr_limit_check_failed - ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing -- tst r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK -+ tst r1, #((_TIF_SYSCALL_WORK | _TIF_WORK_MASK) & ~_TIF_SECCOMP) -+ bne fast_work_pending -+ tst r1, #_TIF_SECCOMP - bne fast_work_pending - - -@@ -90,8 +92,11 @@ ENDPROC(ret_fast_syscall) - cmp r2, #TASK_SIZE - blne addr_limit_check_failed - ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing -- tst r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK -+ tst r1, #((_TIF_SYSCALL_WORK | _TIF_WORK_MASK) & ~_TIF_SECCOMP) -+ bne do_slower_path -+ tst r1, #_TIF_SECCOMP - beq no_work_pending -+do_slower_path: - UNWIND(.fnend ) - ENDPROC(ret_fast_syscall) - --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -649,7 +649,8 @@ do_work_pending(struct pt_regs *regs, un diff --git a/kernel/patches-5.4.x-rt/0257-powerpc-preempt-lazy-support.patch b/kernel/patches-5.11.x-rt/0184-powerpc-preempt-lazy-support.patch similarity index 66% rename from kernel/patches-5.4.x-rt/0257-powerpc-preempt-lazy-support.patch rename to kernel/patches-5.11.x-rt/0184-powerpc-preempt-lazy-support.patch index 3f76bcdcc..e1355d1a3 100644 --- a/kernel/patches-5.4.x-rt/0257-powerpc-preempt-lazy-support.patch +++ b/kernel/patches-5.11.x-rt/0184-powerpc-preempt-lazy-support.patch @@ -7,25 +7,26 @@ Implement the powerpc pieces for lazy preempt. Signed-off-by: Thomas Gleixner --- arch/powerpc/Kconfig | 1 + - arch/powerpc/include/asm/thread_info.h | 16 ++++++++++++---- + arch/powerpc/include/asm/thread_info.h | 15 ++++++++++++--- arch/powerpc/kernel/asm-offsets.c | 1 + arch/powerpc/kernel/entry_32.S | 23 ++++++++++++++++------- - arch/powerpc/kernel/entry_64.S | 24 +++++++++++++++++------- - 5 files changed, 47 insertions(+), 18 deletions(-) + arch/powerpc/kernel/exceptions-64e.S | 16 ++++++++++++---- + arch/powerpc/kernel/syscall_64.c | 10 +++++++--- + 6 files changed, 49 insertions(+), 17 deletions(-) --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig -@@ -221,6 +221,7 @@ config PPC +@@ -233,6 +233,7 @@ config PPC select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI && !HAVE_HARDLOCKUP_DETECTOR_ARCH select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP + select HAVE_PREEMPT_LAZY - select HAVE_RCU_TABLE_FREE - select HAVE_MMU_GATHER_PAGE_SIZE + select MMU_GATHER_RCU_TABLE_FREE + select MMU_GATHER_PAGE_SIZE select HAVE_REGS_AND_STACK_ACCESS_API --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h -@@ -30,6 +30,8 @@ +@@ -48,6 +48,8 @@ struct thread_info { int preempt_count; /* 0 => preemptable, <0 => BUG */ @@ -34,7 +35,7 @@ Signed-off-by: Thomas Gleixner unsigned long local_flags; /* private flags for thread */ #ifdef CONFIG_LIVEPATCH unsigned long *livepatch_sp; -@@ -80,11 +82,12 @@ void arch_setup_new_exec(void); +@@ -96,11 +98,12 @@ void arch_setup_new_exec(void); #define TIF_SINGLESTEP 8 /* singlestepping active */ #define TIF_NOHZ 9 /* in adaptive nohz mode */ #define TIF_SECCOMP 10 /* secure computing */ @@ -50,7 +51,7 @@ Signed-off-by: Thomas Gleixner #define TIF_EMULATE_STACK_STORE 16 /* Is an instruction emulation for stack store? */ #define TIF_MEMDIE 17 /* is terminating due to OOM killer */ -@@ -93,6 +96,9 @@ void arch_setup_new_exec(void); +@@ -109,6 +112,9 @@ void arch_setup_new_exec(void); #endif #define TIF_POLLING_NRFLAG 19 /* true if poll_idle() is polling TIF_NEED_RESCHED */ #define TIF_32BIT 20 /* 32 bit binary */ @@ -60,20 +61,21 @@ Signed-off-by: Thomas Gleixner /* as above, but as bit values */ #define _TIF_SYSCALL_TRACE (1< /* Don't move TLF_NAPPING without adjusting the code in entry_32.S */ --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c -@@ -167,6 +167,7 @@ int main(void) +@@ -191,6 +191,7 @@ int main(void) OFFSET(TI_FLAGS, thread_info, flags); OFFSET(TI_LOCAL_FLAGS, thread_info, local_flags); OFFSET(TI_PREEMPT, thread_info, preempt_count); @@ -91,8 +93,8 @@ Signed-off-by: Thomas Gleixner OFFSET(DCACHEL1BLOCKSIZE, ppc64_caches, l1d.block_size); --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S -@@ -401,7 +401,9 @@ - MTMSRD(r10) +@@ -423,7 +423,9 @@ + mtmsr r10 lwz r9,TI_FLAGS(r2) li r8,-MAX_ERRNO - andi. r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK) @@ -102,7 +104,7 @@ Signed-off-by: Thomas Gleixner bne- syscall_exit_work cmplw 0,r3,r8 blt+ syscall_exit_cont -@@ -516,13 +518,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRE +@@ -540,13 +542,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRE b syscall_dotrace_cont syscall_exit_work: @@ -118,22 +120,22 @@ Signed-off-by: Thomas Gleixner bne- 1f lwz r11,_CCR(r1) /* Load CR */ neg r3,r3 -@@ -531,12 +533,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRE +@@ -555,12 +557,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRE 1: stw r6,RESULT(r1) /* Save result */ stw r3,GPR3(r1) /* Update return value */ -2: andi. r0,r9,(_TIF_PERSYSCALL_MASK) -+2: andi. r0,r9,(_TIF_PERSYSCALL_MASK)@h ++2: andis. r0,r9,(_TIF_PERSYSCALL_MASK)@h beq 4f /* Clear per-syscall TIF flags if any are set. */ - li r11,_TIF_PERSYSCALL_MASK -+ li r11,_TIF_PERSYSCALL_MASK@h ++ lis r11,(_TIF_PERSYSCALL_MASK)@h addi r12,r2,TI_FLAGS 3: lwarx r8,0,r12 andc r8,r8,r11 -@@ -904,7 +906,14 @@ user_exc_return: /* r10 contains MSR_KE +@@ -943,7 +945,14 @@ user_exc_return: /* r10 contains MSR_KE cmpwi 0,r0,0 /* if non-zero, just restore regs and return */ bne restore_kuap andi. r8,r8,_TIF_NEED_RESCHED @@ -148,7 +150,7 @@ Signed-off-by: Thomas Gleixner lwz r3,_MSR(r1) andi. r0,r3,MSR_EE /* interrupts off? */ beq restore_kuap /* don't schedule if so */ -@@ -1225,7 +1234,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRE +@@ -1261,7 +1270,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRE #endif /* !(CONFIG_4xx || CONFIG_BOOKE) */ do_work: /* r10 contains MSR_KERNEL here */ @@ -157,70 +159,32 @@ Signed-off-by: Thomas Gleixner beq do_user_signal do_resched: /* r10 contains MSR_KERNEL here */ -@@ -1246,7 +1255,7 @@ do_resched: /* r10 contains MSR_KERNEL - SYNC - MTMSRD(r10) /* disable interrupts */ +@@ -1280,7 +1289,7 @@ do_resched: /* r10 contains MSR_KERNEL + LOAD_REG_IMMEDIATE(r10,MSR_KERNEL) + mtmsr r10 /* disable interrupts */ lwz r9,TI_FLAGS(r2) - andi. r0,r9,_TIF_NEED_RESCHED + andi. r0,r9,_TIF_NEED_RESCHED_MASK bne- do_resched andi. r0,r9,_TIF_USER_WORK_MASK beq restore_user ---- a/arch/powerpc/kernel/entry_64.S -+++ b/arch/powerpc/kernel/entry_64.S -@@ -240,7 +240,9 @@ system_call: /* label this so stack tr - - ld r9,TI_FLAGS(r12) - li r11,-MAX_ERRNO -- andi. r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK) -+ lis r0,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK)@h -+ ori r0,r0,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK)@l -+ and. r0,r9,r0 - bne- .Lsyscall_exit_work - - andi. r0,r8,MSR_FP -@@ -363,25 +365,25 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) - /* If TIF_RESTOREALL is set, don't scribble on either r3 or ccr. - If TIF_NOERROR is set, just save r3 as it is. */ - -- andi. r0,r9,_TIF_RESTOREALL -+ andis. r0,r9,_TIF_RESTOREALL@h - beq+ 0f - REST_NVGPRS(r1) - b 2f - 0: cmpld r3,r11 /* r11 is -MAX_ERRNO */ - blt+ 1f -- andi. r0,r9,_TIF_NOERROR -+ andis. r0,r9,_TIF_NOERROR@h - bne- 1f - ld r5,_CCR(r1) - neg r3,r3 - oris r5,r5,0x1000 /* Set SO bit in CR */ - std r5,_CCR(r1) - 1: std r3,GPR3(r1) --2: andi. r0,r9,(_TIF_PERSYSCALL_MASK) -+2: andis. r0,r9,(_TIF_PERSYSCALL_MASK)@h - beq 4f - - /* Clear per-syscall TIF flags if any are set. */ - -- li r11,_TIF_PERSYSCALL_MASK -+ lis r11,(_TIF_PERSYSCALL_MASK)@h - addi r12,r12,TI_FLAGS - 3: ldarx r10,0,r12 - andc r10,r10,r11 -@@ -786,7 +788,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) - bl restore_math +--- a/arch/powerpc/kernel/exceptions-64e.S ++++ b/arch/powerpc/kernel/exceptions-64e.S +@@ -1080,7 +1080,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) + li r10, -1 + mtspr SPRN_DBSR,r10 b restore - #endif -1: andi. r0,r4,_TIF_NEED_RESCHED +1: andi. r0,r4,_TIF_NEED_RESCHED_MASK beq 2f bl restore_interrupts SCHEDULE_USER -@@ -848,10 +850,18 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) +@@ -1132,12 +1132,20 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) + bne- 0b + 1: - #ifdef CONFIG_PREEMPTION +-#ifdef CONFIG_PREEMPT ++#ifdef CONFIG_PREEMPTION /* Check if we need to preempt */ + lwz r8,TI_PREEMPT(r9) + cmpwi 0,r8,0 /* if non-zero, just restore regs and return */ @@ -238,3 +202,49 @@ Signed-off-by: Thomas Gleixner cmpwi cr0,r8,0 bne restore ld r0,SOFTE(r1) +@@ -1158,7 +1166,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) + * interrupted after loading SRR0/1. + */ + wrteei 0 +-#endif /* CONFIG_PREEMPT */ ++#endif /* CONFIG_PREEMPTION */ + + restore: + /* +--- a/arch/powerpc/kernel/syscall_64.c ++++ b/arch/powerpc/kernel/syscall_64.c +@@ -217,7 +217,7 @@ notrace unsigned long syscall_exit_prepa + ti_flags = READ_ONCE(*ti_flagsp); + while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) { + local_irq_enable(); +- if (ti_flags & _TIF_NEED_RESCHED) { ++ if (ti_flags & _TIF_NEED_RESCHED_MASK) { + schedule(); + } else { + /* +@@ -307,7 +307,7 @@ notrace unsigned long interrupt_exit_use + ti_flags = READ_ONCE(*ti_flagsp); + while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) { + local_irq_enable(); /* returning to user: may enable */ +- if (ti_flags & _TIF_NEED_RESCHED) { ++ if (ti_flags & _TIF_NEED_RESCHED_MASK) { + schedule(); + } else { + if (ti_flags & _TIF_SIGPENDING) +@@ -395,11 +395,15 @@ notrace unsigned long interrupt_exit_ker + /* Returning to a kernel context with local irqs enabled. */ + WARN_ON_ONCE(!(regs->msr & MSR_EE)); + again: +- if (IS_ENABLED(CONFIG_PREEMPT)) { ++ if (IS_ENABLED(CONFIG_PREEMPTION)) { + /* Return to preemptible kernel context */ + if (unlikely(*ti_flagsp & _TIF_NEED_RESCHED)) { + if (preempt_count() == 0) + preempt_schedule_irq(); ++ } else if (unlikely(*ti_flagsp & _TIF_NEED_RESCHED_LAZY)) { ++ if ((preempt_count() == 0) && ++ (current_thread_info()->preempt_lazy_count == 0)) ++ preempt_schedule_irq(); + } + } + diff --git a/kernel/patches-5.4.x-rt/0259-arch-arm64-Add-lazy-preempt-support.patch b/kernel/patches-5.11.x-rt/0185-arch-arm64-Add-lazy-preempt-support.patch similarity index 74% rename from kernel/patches-5.4.x-rt/0259-arch-arm64-Add-lazy-preempt-support.patch rename to kernel/patches-5.11.x-rt/0185-arch-arm64-Add-lazy-preempt-support.patch index bb262195d..2087084ae 100644 --- a/kernel/patches-5.4.x-rt/0259-arch-arm64-Add-lazy-preempt-support.patch +++ b/kernel/patches-5.11.x-rt/0185-arch-arm64-Add-lazy-preempt-support.patch @@ -13,22 +13,22 @@ Signed-off-by: Anders Roxell --- arch/arm64/Kconfig | 1 + arch/arm64/include/asm/preempt.h | 25 ++++++++++++++++++++++++- - arch/arm64/include/asm/thread_info.h | 6 +++++- + arch/arm64/include/asm/thread_info.h | 8 +++++++- arch/arm64/kernel/asm-offsets.c | 1 + arch/arm64/kernel/entry.S | 13 +++++++++++-- arch/arm64/kernel/signal.c | 2 +- - 6 files changed, 43 insertions(+), 5 deletions(-) + 6 files changed, 45 insertions(+), 5 deletions(-) --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig -@@ -159,6 +159,7 @@ config ARM64 - select HAVE_PERF_EVENTS +@@ -177,6 +177,7 @@ config ARM64 select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP -+ select HAVE_PREEMPT_LAZY select HAVE_REGS_AND_STACK_ACCESS_API ++ select HAVE_PREEMPT_LAZY select HAVE_FUNCTION_ARG_ACCESS_API - select HAVE_RCU_TABLE_FREE + select HAVE_FUTEX_CMPXCHG if FUTEX + select MMU_GATHER_RCU_TABLE_FREE --- a/arch/arm64/include/asm/preempt.h +++ b/arch/arm64/include/asm/preempt.h @@ -70,13 +70,36 @@ static inline bool __preempt_count_dec_a @@ -71,7 +71,7 @@ Signed-off-by: Anders Roxell #ifdef CONFIG_PREEMPTION --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h -@@ -29,6 +29,7 @@ struct thread_info { +@@ -26,6 +26,7 @@ struct thread_info { #ifdef CONFIG_ARM64_SW_TTBR0_PAN u64 ttbr0; /* saved TTBR0_EL1 */ #endif @@ -79,33 +79,35 @@ Signed-off-by: Anders Roxell union { u64 preempt_count; /* 0 => preemptible, <0 => bug */ struct { -@@ -63,6 +64,7 @@ void arch_release_task_struct(struct tas - #define TIF_FOREIGN_FPSTATE 3 /* CPU's FP state is not current's */ +@@ -65,6 +66,7 @@ void arch_release_task_struct(struct tas #define TIF_UPROBE 4 /* uprobe breakpoint or singlestep */ - #define TIF_FSCHECK 5 /* Check FS is USER_DS on return */ -+#define TIF_NEED_RESCHED_LAZY 6 - #define TIF_NOHZ 7 + #define TIF_MTE_ASYNC_FAULT 5 /* MTE Asynchronous Tag Check Fault */ + #define TIF_NOTIFY_SIGNAL 6 /* signal notifications exist */ ++#define TIF_NEED_RESCHED_LAZY 7 #define TIF_SYSCALL_TRACE 8 /* syscall trace active */ #define TIF_SYSCALL_AUDIT 9 /* syscall auditing */ -@@ -83,6 +85,7 @@ void arch_release_task_struct(struct tas - #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) - #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) - #define _TIF_FOREIGN_FPSTATE (1 << TIF_FOREIGN_FPSTATE) + #define TIF_SYSCALL_TRACEPOINT 10 /* syscall tracepoint for ftrace */ +@@ -95,8 +97,10 @@ void arch_release_task_struct(struct tas + #define _TIF_SVE (1 << TIF_SVE) + #define _TIF_MTE_ASYNC_FAULT (1 << TIF_MTE_ASYNC_FAULT) + #define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) +#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY) - #define _TIF_NOHZ (1 << TIF_NOHZ) - #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) - #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) -@@ -96,8 +99,9 @@ void arch_release_task_struct(struct tas - #define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ +-#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ ++#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | \ ++ _TIF_SIGPENDING | \ _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \ -- _TIF_UPROBE | _TIF_FSCHECK) -+ _TIF_UPROBE | _TIF_FSCHECK | _TIF_NEED_RESCHED_LAZY) + _TIF_UPROBE | _TIF_MTE_ASYNC_FAULT | \ + _TIF_NOTIFY_SIGNAL) +@@ -105,6 +109,8 @@ void arch_release_task_struct(struct tas + _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \ + _TIF_SYSCALL_EMU) +#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY) - #define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ - _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \ - _TIF_NOHZ | _TIF_SYSCALL_EMU) ++ + #ifdef CONFIG_SHADOW_CALL_STACK + #define INIT_SCS \ + .scs_base = init_shadow_call_stack, \ --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -30,6 +30,7 @@ int main(void) @@ -113,12 +115,12 @@ Signed-off-by: Anders Roxell DEFINE(TSK_TI_FLAGS, offsetof(struct task_struct, thread_info.flags)); DEFINE(TSK_TI_PREEMPT, offsetof(struct task_struct, thread_info.preempt_count)); + DEFINE(TSK_TI_PREEMPT_LAZY, offsetof(struct task_struct, thread_info.preempt_lazy_count)); - DEFINE(TSK_TI_ADDR_LIMIT, offsetof(struct task_struct, thread_info.addr_limit)); #ifdef CONFIG_ARM64_SW_TTBR0_PAN DEFINE(TSK_TI_TTBR0, offsetof(struct task_struct, thread_info.ttbr0)); + #endif --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S -@@ -679,9 +679,18 @@ alternative_if ARM64_HAS_IRQ_PRIO_MASKIN +@@ -678,9 +678,18 @@ alternative_if ARM64_HAS_IRQ_PRIO_MASKIN mrs x0, daif orr x24, x24, x0 alternative_else_nop_endif @@ -138,13 +140,13 @@ Signed-off-by: Anders Roxell +2: #endif - #ifdef CONFIG_ARM64_PSEUDO_NMI + mov x0, sp --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c -@@ -910,7 +910,7 @@ asmlinkage void do_notify_resume(struct - /* Check valid user FS if needed */ - addr_limit_user_check(); - +@@ -915,7 +915,7 @@ asmlinkage void do_notify_resume(struct + unsigned long thread_flags) + { + do { - if (thread_flags & _TIF_NEED_RESCHED) { + if (thread_flags & _TIF_NEED_RESCHED_MASK) { /* Unmask Debug and SError for the next task */ diff --git a/kernel/patches-5.4.x-rt/0261-jump-label-rt.patch b/kernel/patches-5.11.x-rt/0186-jump-label-rt.patch similarity index 92% rename from kernel/patches-5.4.x-rt/0261-jump-label-rt.patch rename to kernel/patches-5.11.x-rt/0186-jump-label-rt.patch index 7ca171519..7b9d40c8a 100644 --- a/kernel/patches-5.4.x-rt/0261-jump-label-rt.patch +++ b/kernel/patches-5.11.x-rt/0186-jump-label-rt.patch @@ -24,12 +24,12 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig -@@ -64,7 +64,7 @@ config ARM +@@ -66,7 +66,7 @@ config ARM select HARDIRQS_SW_RESEND select HAVE_ARCH_AUDITSYSCALL if AEABI && !OABI_COMPAT select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6 - select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU + select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU && !PREEMPT_RT select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 && MMU + select HAVE_ARCH_KASAN if MMU && !XIP_KERNEL select HAVE_ARCH_MMAP_RND_BITS if MMU - select HAVE_ARCH_SECCOMP_FILTER if AEABI && !OABI_COMPAT diff --git a/kernel/patches-5.4.x-rt/0262-leds-trigger-disable-CPU-trigger-on-RT.patch b/kernel/patches-5.11.x-rt/0187-leds-trigger-disable-CPU-trigger-on-RT.patch similarity index 100% rename from kernel/patches-5.4.x-rt/0262-leds-trigger-disable-CPU-trigger-on-RT.patch rename to kernel/patches-5.11.x-rt/0187-leds-trigger-disable-CPU-trigger-on-RT.patch diff --git a/kernel/patches-5.4.x-rt/0263-drivers-tty-fix-omap-lock-crap.patch b/kernel/patches-5.11.x-rt/0188-drivers-tty-fix-omap-lock-crap.patch similarity index 90% rename from kernel/patches-5.4.x-rt/0263-drivers-tty-fix-omap-lock-crap.patch rename to kernel/patches-5.11.x-rt/0188-drivers-tty-fix-omap-lock-crap.patch index f70e9198d..1e7206eb5 100644 --- a/kernel/patches-5.4.x-rt/0263-drivers-tty-fix-omap-lock-crap.patch +++ b/kernel/patches-5.11.x-rt/0188-drivers-tty-fix-omap-lock-crap.patch @@ -13,7 +13,7 @@ Signed-off-by: Thomas Gleixner --- a/drivers/tty/serial/omap-serial.c +++ b/drivers/tty/serial/omap-serial.c -@@ -1307,13 +1307,10 @@ serial_omap_console_write(struct console +@@ -1301,13 +1301,10 @@ serial_omap_console_write(struct console pm_runtime_get_sync(up->dev); @@ -30,7 +30,7 @@ Signed-off-by: Thomas Gleixner /* * First save the IER then disable the interrupts -@@ -1342,8 +1339,7 @@ serial_omap_console_write(struct console +@@ -1336,8 +1333,7 @@ serial_omap_console_write(struct console pm_runtime_mark_last_busy(up->dev); pm_runtime_put_autosuspend(up->dev); if (locked) diff --git a/kernel/patches-5.4.x-rt/0264-drivers-tty-pl011-irq-disable-madness.patch b/kernel/patches-5.11.x-rt/0189-drivers-tty-pl011-irq-disable-madness.patch similarity index 75% rename from kernel/patches-5.4.x-rt/0264-drivers-tty-pl011-irq-disable-madness.patch rename to kernel/patches-5.11.x-rt/0189-drivers-tty-pl011-irq-disable-madness.patch index 5cf19ee65..d231b1525 100644 --- a/kernel/patches-5.4.x-rt/0264-drivers-tty-pl011-irq-disable-madness.patch +++ b/kernel/patches-5.11.x-rt/0189-drivers-tty-pl011-irq-disable-madness.patch @@ -7,12 +7,18 @@ we are looking for. Redo it to make it work on -RT and non-RT. Signed-off-by: Thomas Gleixner --- - drivers/tty/serial/amba-pl011.c | 15 ++++++++++----- - 1 file changed, 10 insertions(+), 5 deletions(-) + drivers/tty/serial/amba-pl011.c | 17 +++++++++++------ + 1 file changed, 11 insertions(+), 6 deletions(-) --- a/drivers/tty/serial/amba-pl011.c +++ b/drivers/tty/serial/amba-pl011.c -@@ -2214,13 +2214,19 @@ pl011_console_write(struct console *co, +@@ -2201,18 +2201,24 @@ pl011_console_write(struct console *co, + { + struct uart_amba_port *uap = amba_ports[co->index]; + unsigned int old_cr = 0, new_cr; +- unsigned long flags; ++ unsigned long flags = 0; + int locked = 1; clk_enable(uap->clk); @@ -35,7 +41,7 @@ Signed-off-by: Thomas Gleixner /* * First save the CR then disable the interrupts -@@ -2246,8 +2252,7 @@ pl011_console_write(struct console *co, +@@ -2238,8 +2244,7 @@ pl011_console_write(struct console *co, pl011_write(old_cr, uap, REG_CR); if (locked) diff --git a/kernel/patches-5.4.x-rt/0267-ARM-enable-irq-in-translation-section-permission-fau.patch b/kernel/patches-5.11.x-rt/0190-ARM-enable-irq-in-translation-section-permission-fau.patch similarity index 96% rename from kernel/patches-5.4.x-rt/0267-ARM-enable-irq-in-translation-section-permission-fau.patch rename to kernel/patches-5.11.x-rt/0190-ARM-enable-irq-in-translation-section-permission-fau.patch index 9e083f041..74a959275 100644 --- a/kernel/patches-5.4.x-rt/0267-ARM-enable-irq-in-translation-section-permission-fau.patch +++ b/kernel/patches-5.11.x-rt/0190-ARM-enable-irq-in-translation-section-permission-fau.patch @@ -63,7 +63,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c -@@ -414,6 +414,9 @@ do_translation_fault(unsigned long addr, +@@ -400,6 +400,9 @@ do_translation_fault(unsigned long addr, if (addr < TASK_SIZE) return do_page_fault(addr, fsr, regs); @@ -73,7 +73,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (user_mode(regs)) goto bad_area; -@@ -481,6 +484,9 @@ do_translation_fault(unsigned long addr, +@@ -470,6 +473,9 @@ do_translation_fault(unsigned long addr, static int do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { diff --git a/kernel/patches-5.4.x-rt/0268-genirq-update-irq_set_irqchip_state-documentation.patch b/kernel/patches-5.11.x-rt/0191-genirq-update-irq_set_irqchip_state-documentation.patch similarity index 93% rename from kernel/patches-5.4.x-rt/0268-genirq-update-irq_set_irqchip_state-documentation.patch rename to kernel/patches-5.11.x-rt/0191-genirq-update-irq_set_irqchip_state-documentation.patch index 160b84c8c..9b9e681b9 100644 --- a/kernel/patches-5.4.x-rt/0268-genirq-update-irq_set_irqchip_state-documentation.patch +++ b/kernel/patches-5.11.x-rt/0191-genirq-update-irq_set_irqchip_state-documentation.patch @@ -14,7 +14,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c -@@ -2673,7 +2673,7 @@ EXPORT_SYMBOL_GPL(irq_get_irqchip_state) +@@ -2787,7 +2787,7 @@ EXPORT_SYMBOL_GPL(irq_get_irqchip_state) * This call sets the internal irqchip state of an interrupt, * depending on the value of @which. * diff --git a/kernel/patches-5.4.x-rt/0269-KVM-arm-arm64-downgrade-preempt_disable-d-region-to-.patch b/kernel/patches-5.11.x-rt/0192-KVM-arm-arm64-downgrade-preempt_disable-d-region-to-.patch similarity index 76% rename from kernel/patches-5.4.x-rt/0269-KVM-arm-arm64-downgrade-preempt_disable-d-region-to-.patch rename to kernel/patches-5.11.x-rt/0192-KVM-arm-arm64-downgrade-preempt_disable-d-region-to-.patch index 818944745..3f22ed268 100644 --- a/kernel/patches-5.4.x-rt/0269-KVM-arm-arm64-downgrade-preempt_disable-d-region-to-.patch +++ b/kernel/patches-5.11.x-rt/0192-KVM-arm-arm64-downgrade-preempt_disable-d-region-to-.patch @@ -17,12 +17,12 @@ Reported-by: Manish Jaggi Signed-off-by: Josh Cartwright Signed-off-by: Sebastian Andrzej Siewior --- - virt/kvm/arm/arm.c | 6 +++--- + arch/arm64/kvm/arm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) ---- a/virt/kvm/arm/arm.c -+++ b/virt/kvm/arm/arm.c -@@ -700,7 +700,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_v +--- a/arch/arm64/kvm/arm.c ++++ b/arch/arm64/kvm/arm.c +@@ -732,7 +732,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_v * involves poking the GIC, which must be done in a * non-preemptible context. */ @@ -31,8 +31,8 @@ Signed-off-by: Sebastian Andrzej Siewior kvm_pmu_flush_hwstate(vcpu); -@@ -749,7 +749,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_v - kvm_timer_sync_hwstate(vcpu); +@@ -781,7 +781,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_v + kvm_timer_sync_user(vcpu); kvm_vgic_sync_hwstate(vcpu); local_irq_enable(); - preempt_enable(); @@ -40,12 +40,12 @@ Signed-off-by: Sebastian Andrzej Siewior continue; } -@@ -827,7 +827,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_v +@@ -853,7 +853,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_v /* Exit types that need handling before we can be preempted */ - handle_exit_early(vcpu, run, ret); + handle_exit_early(vcpu, ret); - preempt_enable(); + migrate_enable(); - ret = handle_exit(vcpu, run, ret); - } + /* + * The ARMv8 architecture doesn't give the hypervisor diff --git a/kernel/patches-5.4.x-rt/0270-arm64-fpsimd-use-preemp_disable-in-addition-to-local.patch b/kernel/patches-5.11.x-rt/0193-arm64-fpsimd-use-preemp_disable-in-addition-to-local.patch similarity index 85% rename from kernel/patches-5.4.x-rt/0270-arm64-fpsimd-use-preemp_disable-in-addition-to-local.patch rename to kernel/patches-5.11.x-rt/0193-arm64-fpsimd-use-preemp_disable-in-addition-to-local.patch index 490171f65..4078b8dba 100644 --- a/kernel/patches-5.4.x-rt/0270-arm64-fpsimd-use-preemp_disable-in-addition-to-local.patch +++ b/kernel/patches-5.11.x-rt/0193-arm64-fpsimd-use-preemp_disable-in-addition-to-local.patch @@ -14,7 +14,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c -@@ -213,6 +213,16 @@ static void sve_free(struct task_struct +@@ -226,6 +226,16 @@ static void sve_free(struct task_struct __sve_free(task); } @@ -31,7 +31,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* * TIF_SVE controls whether a task can use SVE without trapping while * in userspace, and also the way a task's FPSIMD/SVE state is stored -@@ -1010,6 +1020,7 @@ void fpsimd_thread_switch(struct task_st +@@ -1022,6 +1032,7 @@ void fpsimd_thread_switch(struct task_st void fpsimd_flush_thread(void) { int vl, supported_vl; @@ -39,7 +39,7 @@ Signed-off-by: Sebastian Andrzej Siewior if (!system_supports_fpsimd()) return; -@@ -1022,7 +1033,7 @@ void fpsimd_flush_thread(void) +@@ -1034,7 +1045,7 @@ void fpsimd_flush_thread(void) if (system_supports_sve()) { clear_thread_flag(TIF_SVE); @@ -48,7 +48,7 @@ Signed-off-by: Sebastian Andrzej Siewior /* * Reset the task vector length as required. -@@ -1056,6 +1067,7 @@ void fpsimd_flush_thread(void) +@@ -1068,6 +1079,7 @@ void fpsimd_flush_thread(void) } put_cpu_fpsimd_context(); diff --git a/kernel/patches-5.4.x-rt/0273-x86-Enable-RT-also-on-32bit.patch b/kernel/patches-5.11.x-rt/0194-x86-Enable-RT-also-on-32bit.patch similarity index 77% rename from kernel/patches-5.4.x-rt/0273-x86-Enable-RT-also-on-32bit.patch rename to kernel/patches-5.11.x-rt/0194-x86-Enable-RT-also-on-32bit.patch index 833583053..54b3af282 100644 --- a/kernel/patches-5.4.x-rt/0273-x86-Enable-RT-also-on-32bit.patch +++ b/kernel/patches-5.11.x-rt/0194-x86-Enable-RT-also-on-32bit.patch @@ -9,18 +9,18 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig -@@ -25,7 +25,6 @@ config X86_64 +@@ -27,7 +27,6 @@ config X86_64 # Options that are inherently 64-bit kernel only: select ARCH_HAS_GIGANTIC_PAGE - select ARCH_SUPPORTS_INT128 + select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 - select ARCH_SUPPORTS_RT select ARCH_USE_CMPXCHG_LOCKREF select HAVE_ARCH_SOFT_DIRTY select MODULES_USE_ELF_RELA -@@ -91,6 +90,7 @@ config X86 - select ARCH_SUPPORTS_ACPI - select ARCH_SUPPORTS_ATOMIC_RMW +@@ -97,6 +96,7 @@ config X86 + select ARCH_SUPPORTS_DEBUG_PAGEALLOC select ARCH_SUPPORTS_NUMA_BALANCING if X86_64 + select ARCH_SUPPORTS_KMAP_LOCAL_FORCE_MAP if NR_CPUS <= 4096 + select ARCH_SUPPORTS_RT select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_QUEUED_RWLOCKS diff --git a/kernel/patches-5.4.x-rt/0274-ARM-Allow-to-enable-RT.patch b/kernel/patches-5.11.x-rt/0195-ARM-Allow-to-enable-RT.patch similarity index 58% rename from kernel/patches-5.4.x-rt/0274-ARM-Allow-to-enable-RT.patch rename to kernel/patches-5.11.x-rt/0195-ARM-Allow-to-enable-RT.patch index 1721a11eb..ea82b69c8 100644 --- a/kernel/patches-5.4.x-rt/0274-ARM-Allow-to-enable-RT.patch +++ b/kernel/patches-5.11.x-rt/0195-ARM-Allow-to-enable-RT.patch @@ -6,16 +6,24 @@ Allow to select RT. Signed-off-by: Sebastian Andrzej Siewior --- - arch/arm/Kconfig | 1 + - 1 file changed, 1 insertion(+) + arch/arm/Kconfig | 2 ++ + 1 file changed, 2 insertions(+) --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig -@@ -32,6 +32,7 @@ config ARM +@@ -30,6 +30,7 @@ config ARM select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT if CPU_V7 select ARCH_SUPPORTS_ATOMIC_RMW -+ select ARCH_SUPPORTS_RT ++ select ARCH_SUPPORTS_RT if HAVE_POSIX_CPU_TIMERS_TASK_WORK select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_CMPXCHG_LOCKREF select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU +@@ -123,6 +124,7 @@ config ARM + select OLD_SIGSUSPEND3 + select PCI_SYSCALL if PCI + select PERF_USE_VMALLOC ++ select HAVE_POSIX_CPU_TIMERS_TASK_WORK if !KVM + select RTC_LIB + select SET_FS + select SYS_SUPPORTS_APM_EMULATION diff --git a/kernel/patches-5.11.x-rt/0196-ARM64-Allow-to-enable-RT.patch b/kernel/patches-5.11.x-rt/0196-ARM64-Allow-to-enable-RT.patch new file mode 100644 index 000000000..17e8ae2a3 --- /dev/null +++ b/kernel/patches-5.11.x-rt/0196-ARM64-Allow-to-enable-RT.patch @@ -0,0 +1,29 @@ +From: Sebastian Andrzej Siewior +Date: Fri, 11 Oct 2019 13:14:35 +0200 +Subject: [PATCH] ARM64: Allow to enable RT + +Allow to select RT. + +Signed-off-by: Sebastian Andrzej Siewior +--- + arch/arm64/Kconfig | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/arch/arm64/Kconfig ++++ b/arch/arm64/Kconfig +@@ -76,6 +76,7 @@ config ARM64 + select ARCH_SUPPORTS_ATOMIC_RMW + select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 && (GCC_VERSION >= 50000 || CC_IS_CLANG) + select ARCH_SUPPORTS_NUMA_BALANCING ++ select ARCH_SUPPORTS_RT if HAVE_POSIX_CPU_TIMERS_TASK_WORK + select ARCH_WANT_COMPAT_IPC_PARSE_VERSION if COMPAT + select ARCH_WANT_DEFAULT_BPF_JIT + select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT +@@ -198,6 +199,7 @@ config ARM64 + select PCI_DOMAINS_GENERIC if PCI + select PCI_ECAM if (ACPI && PCI) + select PCI_SYSCALL if PCI ++ select HAVE_POSIX_CPU_TIMERS_TASK_WORK if !KVM + select POWER_RESET + select POWER_SUPPLY + select SPARSE_IRQ diff --git a/kernel/patches-5.11.x-rt/0197-powerpc-traps.patch b/kernel/patches-5.11.x-rt/0197-powerpc-traps.patch new file mode 100644 index 000000000..07ce6ec56 --- /dev/null +++ b/kernel/patches-5.11.x-rt/0197-powerpc-traps.patch @@ -0,0 +1,32 @@ +From: Sebastian Andrzej Siewior +Date: Fri, 26 Jul 2019 11:30:49 +0200 +Subject: [PATCH] powerpc: traps: Use PREEMPT_RT + +Add PREEMPT_RT to the backtrace if enabled. + +Signed-off-by: Sebastian Andrzej Siewior +--- + arch/powerpc/kernel/traps.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +--- a/arch/powerpc/kernel/traps.c ++++ b/arch/powerpc/kernel/traps.c +@@ -259,12 +259,17 @@ static char *get_mmu_str(void) + + static int __die(const char *str, struct pt_regs *regs, long err) + { ++ const char *pr = ""; ++ + printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter); + ++ if (IS_ENABLED(CONFIG_PREEMPTION)) ++ pr = IS_ENABLED(CONFIG_PREEMPT_RT) ? " PREEMPT_RT" : " PREEMPT"; ++ + printk("%s PAGE_SIZE=%luK%s%s%s%s%s%s %s\n", + IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN) ? "LE" : "BE", + PAGE_SIZE / 1024, get_mmu_str(), +- IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : "", ++ pr, + IS_ENABLED(CONFIG_SMP) ? " SMP" : "", + IS_ENABLED(CONFIG_SMP) ? (" NR_CPUS=" __stringify(NR_CPUS)) : "", + debug_pagealloc_enabled() ? " DEBUG_PAGEALLOC" : "", diff --git a/kernel/patches-5.4.x-rt/0276-powerpc-pseries-iommu-Use-a-locallock-instead-local_ir.patch b/kernel/patches-5.11.x-rt/0198-powerpc-pseries-iommu-Use-a-locallock-instead-local_ir.patch similarity index 57% rename from kernel/patches-5.4.x-rt/0276-powerpc-pseries-iommu-Use-a-locallock-instead-local_ir.patch rename to kernel/patches-5.11.x-rt/0198-powerpc-pseries-iommu-Use-a-locallock-instead-local_ir.patch index fd3c23b74..470c64020 100644 --- a/kernel/patches-5.4.x-rt/0276-powerpc-pseries-iommu-Use-a-locallock-instead-local_ir.patch +++ b/kernel/patches-5.11.x-rt/0198-powerpc-pseries-iommu-Use-a-locallock-instead-local_ir.patch @@ -12,8 +12,8 @@ Use local_irq_save() instead of local_irq_disable(). Cc: stable-rt@vger.kernel.org Signed-off-by: Sebastian Andrzej Siewior --- - arch/powerpc/platforms/pseries/iommu.c | 17 ++++++++++------- - 1 file changed, 10 insertions(+), 7 deletions(-) + arch/powerpc/platforms/pseries/iommu.c | 31 ++++++++++++++++++++----------- + 1 file changed, 20 insertions(+), 11 deletions(-) --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -21,70 +21,90 @@ Signed-off-by: Sebastian Andrzej Siewior #include #include #include -+#include ++#include #include #include #include -@@ -177,6 +178,7 @@ static int tce_build_pSeriesLP(unsigned +@@ -190,7 +191,13 @@ static int tce_build_pSeriesLP(unsigned + return ret; } - static DEFINE_PER_CPU(__be64 *, tce_page); -+static DEFINE_LOCAL_IRQ_LOCK(tcp_page_lock); +-static DEFINE_PER_CPU(__be64 *, tce_page); ++struct tce_page { ++ __be64 * page; ++ local_lock_t lock; ++}; ++static DEFINE_PER_CPU(struct tce_page, tce_page) = { ++ .lock = INIT_LOCAL_LOCK(lock), ++}; static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages, unsigned long uaddr, -@@ -198,8 +200,8 @@ static int tce_buildmulti_pSeriesLP(stru +@@ -212,9 +219,10 @@ static int tce_buildmulti_pSeriesLP(stru direction, attrs); } - local_irq_save(flags); /* to protect tcep and the page behind it */ -- + /* to protect tcep and the page behind it */ -+ local_lock_irqsave(tcp_page_lock, flags); - tcep = __this_cpu_read(tce_page); ++ local_lock_irqsave(&tce_page.lock, flags); + +- tcep = __this_cpu_read(tce_page); ++ tcep = __this_cpu_read(tce_page.page); /* This is safe to do since interrupts are off when we're called -@@ -209,7 +211,7 @@ static int tce_buildmulti_pSeriesLP(stru + * from iommu_alloc{,_sg}() +@@ -223,12 +231,12 @@ static int tce_buildmulti_pSeriesLP(stru tcep = (__be64 *)__get_free_page(GFP_ATOMIC); /* If allocation fails, fall back to the loop implementation */ if (!tcep) { - local_irq_restore(flags); -+ local_unlock_irqrestore(tcp_page_lock, flags); ++ local_unlock_irqrestore(&tce_page.lock, flags); return tce_build_pSeriesLP(tbl->it_index, tcenum, tbl->it_page_shift, npages, uaddr, direction, attrs); -@@ -244,7 +246,7 @@ static int tce_buildmulti_pSeriesLP(stru + } +- __this_cpu_write(tce_page, tcep); ++ __this_cpu_write(tce_page.page, tcep); + } + + rpn = __pa(uaddr) >> TCE_SHIFT; +@@ -258,7 +266,7 @@ static int tce_buildmulti_pSeriesLP(stru tcenum += limit; } while (npages > 0 && !rc); - local_irq_restore(flags); -+ local_unlock_irqrestore(tcp_page_lock, flags); ++ local_unlock_irqrestore(&tce_page.lock, flags); if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) { ret = (int)rc; -@@ -415,13 +417,14 @@ static int tce_setrange_multi_pSeriesLP( +@@ -429,16 +437,17 @@ static int tce_setrange_multi_pSeriesLP( DMA_BIDIRECTIONAL, 0); } - local_irq_disable(); /* to protect tcep and the page behind it */ +- tcep = __this_cpu_read(tce_page); + /* to protect tcep and the page behind it */ -+ local_lock_irq(tcp_page_lock); - tcep = __this_cpu_read(tce_page); ++ local_lock_irq(&tce_page.lock); ++ tcep = __this_cpu_read(tce_page.page); if (!tcep) { tcep = (__be64 *)__get_free_page(GFP_ATOMIC); if (!tcep) { - local_irq_enable(); -+ local_unlock_irq(tcp_page_lock); ++ local_unlock_irq(&tce_page.lock); return -ENOMEM; } - __this_cpu_write(tce_page, tcep); -@@ -467,7 +470,7 @@ static int tce_setrange_multi_pSeriesLP( +- __this_cpu_write(tce_page, tcep); ++ __this_cpu_write(tce_page.page, tcep); + } + + proto_tce = TCE_PCI_READ | TCE_PCI_WRITE; +@@ -481,7 +490,7 @@ static int tce_setrange_multi_pSeriesLP( /* error cleanup: caller will clear whole range */ - local_irq_enable(); -+ local_unlock_irq(tcp_page_lock); ++ local_unlock_irq(&tce_page.lock); return rc; } diff --git a/kernel/patches-5.4.x-rt/0277-powerpc-kvm-Disable-in-kernel-MPIC-emulation-for-PRE.patch b/kernel/patches-5.11.x-rt/0199-powerpc-kvm-Disable-in-kernel-MPIC-emulation-for-PRE.patch similarity index 97% rename from kernel/patches-5.4.x-rt/0277-powerpc-kvm-Disable-in-kernel-MPIC-emulation-for-PRE.patch rename to kernel/patches-5.11.x-rt/0199-powerpc-kvm-Disable-in-kernel-MPIC-emulation-for-PRE.patch index d2aff68e3..bbd087bf3 100644 --- a/kernel/patches-5.4.x-rt/0277-powerpc-kvm-Disable-in-kernel-MPIC-emulation-for-PRE.patch +++ b/kernel/patches-5.11.x-rt/0199-powerpc-kvm-Disable-in-kernel-MPIC-emulation-for-PRE.patch @@ -27,7 +27,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig -@@ -178,6 +178,7 @@ config KVM_E500MC +@@ -179,6 +179,7 @@ config KVM_E500MC config KVM_MPIC bool "KVM in-kernel MPIC emulation" depends on KVM && E500 diff --git a/kernel/patches-5.4.x-rt/0279-powerpc-stackprotector-work-around-stack-guard-init-.patch b/kernel/patches-5.11.x-rt/0200-powerpc-stackprotector-work-around-stack-guard-init-.patch similarity index 100% rename from kernel/patches-5.4.x-rt/0279-powerpc-stackprotector-work-around-stack-guard-init-.patch rename to kernel/patches-5.11.x-rt/0200-powerpc-stackprotector-work-around-stack-guard-init-.patch diff --git a/kernel/patches-5.11.x-rt/0201-powerpc-Avoid-recursive-header-includes.patch b/kernel/patches-5.11.x-rt/0201-powerpc-Avoid-recursive-header-includes.patch new file mode 100644 index 000000000..6a048d598 --- /dev/null +++ b/kernel/patches-5.11.x-rt/0201-powerpc-Avoid-recursive-header-includes.patch @@ -0,0 +1,39 @@ +From: Sebastian Andrzej Siewior +Date: Fri, 8 Jan 2021 19:48:21 +0100 +Subject: [PATCH] powerpc: Avoid recursive header includes + +- The include of bug.h leads to an include of printk.h which gets back + to spinlock.h and complains then about missing xchg(). + Remove bug.h and add bits.h which is needed for BITS_PER_BYTE. + +- Avoid the "please don't include this file directly" error from + rwlock-rt. Allow an include from/with rtmutex.h. + +Signed-off-by: Sebastian Andrzej Siewior +--- + arch/powerpc/include/asm/cmpxchg.h | 2 +- + arch/powerpc/include/asm/simple_spinlock_types.h | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +--- a/arch/powerpc/include/asm/cmpxchg.h ++++ b/arch/powerpc/include/asm/cmpxchg.h +@@ -5,7 +5,7 @@ + #ifdef __KERNEL__ + #include + #include +-#include ++#include + + #ifdef __BIG_ENDIAN + #define BITOFF_CAL(size, off) ((sizeof(u32) - size - off) * BITS_PER_BYTE) +--- a/arch/powerpc/include/asm/simple_spinlock_types.h ++++ b/arch/powerpc/include/asm/simple_spinlock_types.h +@@ -2,7 +2,7 @@ + #ifndef _ASM_POWERPC_SIMPLE_SPINLOCK_TYPES_H + #define _ASM_POWERPC_SIMPLE_SPINLOCK_TYPES_H + +-#ifndef __LINUX_SPINLOCK_TYPES_H ++#if !defined(__LINUX_SPINLOCK_TYPES_H) && !defined(__LINUX_RT_MUTEX_H) + # error "please don't include this file directly" + #endif + diff --git a/kernel/patches-5.11.x-rt/0202-POWERPC-Allow-to-enable-RT.patch b/kernel/patches-5.11.x-rt/0202-POWERPC-Allow-to-enable-RT.patch new file mode 100644 index 000000000..4a4abc82a --- /dev/null +++ b/kernel/patches-5.11.x-rt/0202-POWERPC-Allow-to-enable-RT.patch @@ -0,0 +1,29 @@ +From: Sebastian Andrzej Siewior +Date: Fri, 11 Oct 2019 13:14:41 +0200 +Subject: [PATCH] POWERPC: Allow to enable RT + +Allow to select RT. + +Signed-off-by: Sebastian Andrzej Siewior +--- + arch/powerpc/Kconfig | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/arch/powerpc/Kconfig ++++ b/arch/powerpc/Kconfig +@@ -147,6 +147,7 @@ config PPC + select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX + select ARCH_SUPPORTS_ATOMIC_RMW + select ARCH_SUPPORTS_DEBUG_PAGEALLOC if PPC32 || PPC_BOOK3S_64 ++ select ARCH_SUPPORTS_RT if HAVE_POSIX_CPU_TIMERS_TASK_WORK + select ARCH_USE_BUILTIN_BSWAP + select ARCH_USE_CMPXCHG_LOCKREF if PPC64 + select ARCH_USE_QUEUED_RWLOCKS if PPC_QUEUED_SPINLOCKS +@@ -241,6 +242,7 @@ config PPC + select HAVE_SYSCALL_TRACEPOINTS + select HAVE_VIRT_CPU_ACCOUNTING + select HAVE_IRQ_TIME_ACCOUNTING ++ select HAVE_POSIX_CPU_TIMERS_TASK_WORK if !KVM + select HAVE_RSEQ + select IOMMU_HELPER if PPC64 + select IRQ_DOMAIN diff --git a/kernel/patches-5.4.x-rt/0283-drivers-block-zram-Replace-bit-spinlocks-with-rtmute.patch b/kernel/patches-5.11.x-rt/0203-drivers-block-zram-Replace-bit-spinlocks-with-rtmute.patch similarity index 92% rename from kernel/patches-5.4.x-rt/0283-drivers-block-zram-Replace-bit-spinlocks-with-rtmute.patch rename to kernel/patches-5.11.x-rt/0203-drivers-block-zram-Replace-bit-spinlocks-with-rtmute.patch index 5e1f43b68..e1ee8673d 100644 --- a/kernel/patches-5.4.x-rt/0283-drivers-block-zram-Replace-bit-spinlocks-with-rtmute.patch +++ b/kernel/patches-5.11.x-rt/0203-drivers-block-zram-Replace-bit-spinlocks-with-rtmute.patch @@ -15,7 +15,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c -@@ -55,6 +55,40 @@ static void zram_free_page(struct zram * +@@ -59,6 +59,40 @@ static void zram_free_page(struct zram * static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, u32 index, int offset, struct bio *bio); @@ -56,7 +56,7 @@ Signed-off-by: Sebastian Andrzej Siewior static int zram_slot_trylock(struct zram *zram, u32 index) { -@@ -70,6 +104,7 @@ static void zram_slot_unlock(struct zram +@@ -74,6 +108,7 @@ static void zram_slot_unlock(struct zram { bit_spin_unlock(ZRAM_LOCK, &zram->table[index].flags); } @@ -64,7 +64,7 @@ Signed-off-by: Sebastian Andrzej Siewior static inline bool init_done(struct zram *zram) { -@@ -1154,6 +1189,7 @@ static bool zram_meta_alloc(struct zram +@@ -1165,6 +1200,7 @@ static bool zram_meta_alloc(struct zram if (!huge_class_size) huge_class_size = zs_huge_class_size(zram->mem_pool); diff --git a/kernel/patches-5.4.x-rt/0286-tpm_tis-fix-stall-after-iowrite-s.patch b/kernel/patches-5.11.x-rt/0204-tpm_tis-fix-stall-after-iowrite-s.patch similarity index 90% rename from kernel/patches-5.4.x-rt/0286-tpm_tis-fix-stall-after-iowrite-s.patch rename to kernel/patches-5.11.x-rt/0204-tpm_tis-fix-stall-after-iowrite-s.patch index f2a7021cf..445d8ab7d 100644 --- a/kernel/patches-5.4.x-rt/0286-tpm_tis-fix-stall-after-iowrite-s.patch +++ b/kernel/patches-5.11.x-rt/0204-tpm_tis-fix-stall-after-iowrite-s.patch @@ -25,7 +25,7 @@ Signed-off-by: Sebastian Andrzej Siewior --- a/drivers/char/tpm/tpm_tis.c +++ b/drivers/char/tpm/tpm_tis.c -@@ -49,6 +49,31 @@ static inline struct tpm_tis_tcg_phy *to +@@ -50,6 +50,31 @@ static inline struct tpm_tis_tcg_phy *to return container_of(data, struct tpm_tis_tcg_phy, priv); } @@ -54,10 +54,10 @@ Signed-off-by: Sebastian Andrzej Siewior + tpm_tis_flush(iobase); +} + - static bool interrupts = true; - module_param(interrupts, bool, 0444); + static int interrupts = -1; + module_param(interrupts, int, 0444); MODULE_PARM_DESC(interrupts, "Enable interrupts"); -@@ -146,7 +171,7 @@ static int tpm_tcg_write_bytes(struct tp +@@ -169,7 +194,7 @@ static int tpm_tcg_write_bytes(struct tp struct tpm_tis_tcg_phy *phy = to_tpm_tis_tcg_phy(data); while (len--) @@ -66,7 +66,7 @@ Signed-off-by: Sebastian Andrzej Siewior return 0; } -@@ -173,7 +198,7 @@ static int tpm_tcg_write32(struct tpm_ti +@@ -196,7 +221,7 @@ static int tpm_tcg_write32(struct tpm_ti { struct tpm_tis_tcg_phy *phy = to_tpm_tis_tcg_phy(data); diff --git a/kernel/patches-5.4.x-rt/0287-signals-allow-rt-tasks-to-cache-one-sigqueue-struct.patch b/kernel/patches-5.11.x-rt/0205-signals-allow-rt-tasks-to-cache-one-sigqueue-struct.patch similarity index 61% rename from kernel/patches-5.4.x-rt/0287-signals-allow-rt-tasks-to-cache-one-sigqueue-struct.patch rename to kernel/patches-5.11.x-rt/0205-signals-allow-rt-tasks-to-cache-one-sigqueue-struct.patch index aa866105c..51cf1a0cc 100644 --- a/kernel/patches-5.4.x-rt/0287-signals-allow-rt-tasks-to-cache-one-sigqueue-struct.patch +++ b/kernel/patches-5.11.x-rt/0205-signals-allow-rt-tasks-to-cache-one-sigqueue-struct.patch @@ -1,34 +1,39 @@ From: Thomas Gleixner Date: Fri, 3 Jul 2009 08:44:56 -0500 -Subject: signals: Allow rt tasks to cache one sigqueue struct +Subject: signals: Allow RT tasks to cache one sigqueue struct -To avoid allocation allow rt tasks to cache one sigqueue struct in -task struct. +Allow realtime tasks to cache one sigqueue in task struct. This avoids an +allocation which can cause latencies or fail. +Ideally the sigqueue is cached after first sucessfull delivery and will be +available for next signal delivery. This works under the assumption that the RT +task has never an unprocessed singal while one is about to be queued. +The caching is not used for SIGQUEUE_PREALLOC because this kind of sigqueue is +handled differently (and not used for regular signal delivery). +[bigeasy: With a fix from Matt Fleming ] Signed-off-by: Thomas Gleixner - +Signed-off-by: Sebastian Andrzej Siewior --- - include/linux/sched.h | 2 + + include/linux/sched.h | 1 include/linux/signal.h | 1 kernel/exit.c | 2 - kernel/fork.c | 1 - kernel/signal.c | 69 ++++++++++++++++++++++++++++++++++++++++++++++--- - 5 files changed, 70 insertions(+), 5 deletions(-) + kernel/signal.c | 67 ++++++++++++++++++++++++++++++++++++++++++++++--- + 5 files changed, 67 insertions(+), 5 deletions(-) --- a/include/linux/sched.h +++ b/include/linux/sched.h -@@ -935,6 +935,8 @@ struct task_struct { +@@ -985,6 +985,7 @@ struct task_struct { /* Signal handlers: */ struct signal_struct *signal; - struct sighand_struct *sighand; + struct sighand_struct __rcu *sighand; + struct sigqueue *sigqueue_cache; -+ sigset_t blocked; sigset_t real_blocked; /* Restored if set_restore_sigmask() was used: */ --- a/include/linux/signal.h +++ b/include/linux/signal.h -@@ -255,6 +255,7 @@ static inline void init_sigpending(struc +@@ -265,6 +265,7 @@ static inline void init_sigpending(struc } extern void flush_sigqueue(struct sigpending *queue); @@ -38,7 +43,7 @@ Signed-off-by: Thomas Gleixner static inline int valid_signal(unsigned long sig) --- a/kernel/exit.c +++ b/kernel/exit.c -@@ -161,7 +161,7 @@ static void __exit_signal(struct task_st +@@ -152,7 +152,7 @@ static void __exit_signal(struct task_st * Do this under ->siglock, we can race with another thread * doing sigqueue_free() if we have SIGQUEUE_PREALLOC signals. */ @@ -49,7 +54,7 @@ Signed-off-by: Thomas Gleixner --- a/kernel/fork.c +++ b/kernel/fork.c -@@ -1924,6 +1924,7 @@ static __latent_entropy struct task_stru +@@ -2017,6 +2017,7 @@ static __latent_entropy struct task_stru spin_lock_init(&p->alloc_lock); init_sigpending(&p->pending); @@ -67,24 +72,24 @@ Signed-off-by: Thomas Gleixner #include #include #include -@@ -403,13 +404,30 @@ void task_join_group_stop(struct task_st - } +@@ -404,13 +405,30 @@ void task_join_group_stop(struct task_st + task_set_jobctl_pending(task, mask | JOBCTL_STOP_PENDING); } -+static inline struct sigqueue *get_task_cache(struct task_struct *t) ++static struct sigqueue *sigqueue_from_cache(struct task_struct *t) +{ + struct sigqueue *q = t->sigqueue_cache; + -+ if (cmpxchg(&t->sigqueue_cache, q, NULL) != q) -+ return NULL; -+ return q; ++ if (q && cmpxchg(&t->sigqueue_cache, q, NULL) == q) ++ return q; ++ return NULL; +} + -+static inline int put_task_cache(struct task_struct *t, struct sigqueue *q) ++static bool sigqueue_add_cache(struct task_struct *t, struct sigqueue *q) +{ -+ if (cmpxchg(&t->sigqueue_cache, NULL, q) == NULL) -+ return 0; -+ return 1; ++ if (!t->sigqueue_cache && cmpxchg(&t->sigqueue_cache, NULL, q) == NULL) ++ return true; ++ return false; +} + /* @@ -95,23 +100,23 @@ Signed-off-by: Thomas Gleixner static struct sigqueue * -__sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimit) +__sigqueue_do_alloc(int sig, struct task_struct *t, gfp_t flags, -+ int override_rlimit, int fromslab) ++ int override_rlimit, bool fromslab) { struct sigqueue *q = NULL; struct user_struct *user; -@@ -431,7 +449,10 @@ static struct sigqueue * +@@ -432,7 +450,10 @@ static struct sigqueue * rcu_read_unlock(); if (override_rlimit || likely(sigpending <= task_rlimit(t, RLIMIT_SIGPENDING))) { - q = kmem_cache_alloc(sigqueue_cachep, flags); + if (!fromslab) -+ q = get_task_cache(t); ++ q = sigqueue_from_cache(t); + if (!q) + q = kmem_cache_alloc(sigqueue_cachep, flags); } else { print_dropped_signal(sig); } -@@ -448,6 +469,13 @@ static struct sigqueue * +@@ -449,6 +470,13 @@ static struct sigqueue * return q; } @@ -119,17 +124,17 @@ Signed-off-by: Thomas Gleixner +__sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, + int override_rlimit) +{ -+ return __sigqueue_do_alloc(sig, t, flags, override_rlimit, 0); ++ return __sigqueue_do_alloc(sig, t, flags, override_rlimit, false); +} + static void __sigqueue_free(struct sigqueue *q) { if (q->flags & SIGQUEUE_PREALLOC) -@@ -457,6 +485,21 @@ static void __sigqueue_free(struct sigqu +@@ -458,6 +486,20 @@ static void __sigqueue_free(struct sigqu kmem_cache_free(sigqueue_cachep, q); } -+static void sigqueue_free_current(struct sigqueue *q) ++static void __sigqueue_cache_or_free(struct sigqueue *q) +{ + struct user_struct *up; + @@ -137,17 +142,16 @@ Signed-off-by: Thomas Gleixner + return; + + up = q->user; -+ if (rt_prio(current->normal_prio) && !put_task_cache(current, q)) { -+ atomic_dec(&up->sigpending); ++ if (atomic_dec_and_test(&up->sigpending)) + free_uid(up); -+ } else -+ __sigqueue_free(q); ++ if (!task_is_realtime(current) || !sigqueue_add_cache(current, q)) ++ kmem_cache_free(sigqueue_cachep, q); +} + void flush_sigqueue(struct sigpending *queue) { struct sigqueue *q; -@@ -470,6 +513,21 @@ void flush_sigqueue(struct sigpending *q +@@ -471,6 +513,21 @@ void flush_sigqueue(struct sigpending *q } /* @@ -160,7 +164,7 @@ Signed-off-by: Thomas Gleixner + + flush_sigqueue(&tsk->pending); + -+ q = get_task_cache(tsk); ++ q = sigqueue_from_cache(tsk); + if (q) + kmem_cache_free(sigqueue_cachep, q); +} @@ -169,16 +173,16 @@ Signed-off-by: Thomas Gleixner * Flush all pending signals for this kthread. */ void flush_signals(struct task_struct *t) -@@ -593,7 +651,7 @@ static void collect_signal(int sig, stru +@@ -594,7 +651,7 @@ static void collect_signal(int sig, stru (info->si_code == SI_TIMER) && (info->si_sys_private); - __sigqueue_free(first); -+ sigqueue_free_current(first); ++ __sigqueue_cache_or_free(first); } else { /* * Ok, it wasn't in the queue. This must be -@@ -630,6 +688,8 @@ int dequeue_signal(struct task_struct *t +@@ -631,6 +688,8 @@ int dequeue_signal(struct task_struct *t bool resched_timer = false; int signr; @@ -187,13 +191,12 @@ Signed-off-by: Thomas Gleixner /* We only dequeue private signals from ourselves, we don't let * signalfd steal them */ -@@ -1838,7 +1898,8 @@ EXPORT_SYMBOL(kill_pid); +@@ -1835,7 +1894,7 @@ EXPORT_SYMBOL(kill_pid); */ struct sigqueue *sigqueue_alloc(void) { - struct sigqueue *q = __sigqueue_alloc(-1, current, GFP_KERNEL, 0); -+ /* Preallocated sigqueue objects always from the slabcache ! */ -+ struct sigqueue *q = __sigqueue_do_alloc(-1, current, GFP_KERNEL, 0, 1); ++ struct sigqueue *q = __sigqueue_do_alloc(-1, current, GFP_KERNEL, 0, true); if (q) q->flags |= SIGQUEUE_PREALLOC; diff --git a/kernel/patches-5.4.x-rt/0288-genirq-disable-irqpoll-on-rt.patch b/kernel/patches-5.11.x-rt/0206-genirq-disable-irqpoll-on-rt.patch similarity index 89% rename from kernel/patches-5.4.x-rt/0288-genirq-disable-irqpoll-on-rt.patch rename to kernel/patches-5.11.x-rt/0206-genirq-disable-irqpoll-on-rt.patch index 74cd5e0a4..fd88efd9c 100644 --- a/kernel/patches-5.4.x-rt/0288-genirq-disable-irqpoll-on-rt.patch +++ b/kernel/patches-5.11.x-rt/0206-genirq-disable-irqpoll-on-rt.patch @@ -13,7 +13,7 @@ Signed-off-by: Thomas Gleixner --- a/kernel/irq/spurious.c +++ b/kernel/irq/spurious.c -@@ -442,6 +442,10 @@ MODULE_PARM_DESC(noirqdebug, "Disable ir +@@ -443,6 +443,10 @@ MODULE_PARM_DESC(noirqdebug, "Disable ir static int __init irqfixup_setup(char *str) { @@ -24,7 +24,7 @@ Signed-off-by: Thomas Gleixner irqfixup = 1; printk(KERN_WARNING "Misrouted IRQ fixup support enabled.\n"); printk(KERN_WARNING "This may impact system performance.\n"); -@@ -454,6 +458,10 @@ module_param(irqfixup, int, 0644); +@@ -455,6 +459,10 @@ module_param(irqfixup, int, 0644); static int __init irqpoll_setup(char *str) { diff --git a/kernel/patches-5.4.x-rt/0289-sysfs-realtime-entry.patch b/kernel/patches-5.11.x-rt/0207-sysfs-realtime-entry.patch similarity index 100% rename from kernel/patches-5.4.x-rt/0289-sysfs-realtime-entry.patch rename to kernel/patches-5.11.x-rt/0207-sysfs-realtime-entry.patch diff --git a/kernel/patches-5.4.x-rt/0290-localversion.patch b/kernel/patches-5.11.x-rt/0208-localversion.patch similarity index 97% rename from kernel/patches-5.4.x-rt/0290-localversion.patch rename to kernel/patches-5.11.x-rt/0208-localversion.patch index 19d7ea050..58842b503 100644 --- a/kernel/patches-5.4.x-rt/0290-localversion.patch +++ b/kernel/patches-5.11.x-rt/0208-localversion.patch @@ -10,4 +10,4 @@ Signed-off-by: Thomas Gleixner --- /dev/null +++ b/localversion-rt @@ -0,0 +1 @@ -+-rt19 ++-rt11 diff --git a/kernel/patches-5.4.x-rt/0001-lib-smp_processor_id-Don-t-use-cpumask_equal.patch b/kernel/patches-5.4.x-rt/0001-lib-smp_processor_id-Don-t-use-cpumask_equal.patch deleted file mode 100644 index e0bbebc1c..000000000 --- a/kernel/patches-5.4.x-rt/0001-lib-smp_processor_id-Don-t-use-cpumask_equal.patch +++ /dev/null @@ -1,35 +0,0 @@ -From: Waiman Long -Date: Thu, 3 Oct 2019 16:36:08 -0400 -Subject: [PATCH] lib/smp_processor_id: Don't use cpumask_equal() - -The check_preemption_disabled() function uses cpumask_equal() to see -if the task is bounded to the current CPU only. cpumask_equal() calls -memcmp() to do the comparison. As x86 doesn't have __HAVE_ARCH_MEMCMP, -the slow memcmp() function in lib/string.c is used. - -On a RT kernel that call check_preemption_disabled() very frequently, -below is the perf-record output of a certain microbenchmark: - - 42.75% 2.45% testpmd [kernel.kallsyms] [k] check_preemption_disabled - 40.01% 39.97% testpmd [kernel.kallsyms] [k] memcmp - -We should avoid calling memcmp() in performance critical path. So the -cpumask_equal() call is now replaced with an equivalent simpler check. - -Signed-off-by: Waiman Long -Signed-off-by: Sebastian Andrzej Siewior ---- - lib/smp_processor_id.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - ---- a/lib/smp_processor_id.c -+++ b/lib/smp_processor_id.c -@@ -23,7 +23,7 @@ unsigned int check_preemption_disabled(c - * Kernel threads bound to a single CPU can safely use - * smp_processor_id(): - */ -- if (cpumask_equal(current->cpus_ptr, cpumask_of(this_cpu))) -+ if (current->nr_cpus_allowed == 1) - goto out; - - /* diff --git a/kernel/patches-5.4.x-rt/0002-0001-jbd2-Simplify-journal_unmap_buffer.patch b/kernel/patches-5.4.x-rt/0002-0001-jbd2-Simplify-journal_unmap_buffer.patch deleted file mode 100644 index b4c4a38bb..000000000 --- a/kernel/patches-5.4.x-rt/0002-0001-jbd2-Simplify-journal_unmap_buffer.patch +++ /dev/null @@ -1,57 +0,0 @@ -From: Thomas Gleixner -Date: Fri, 9 Aug 2019 14:42:27 +0200 -Subject: [PATCH 1/7] jbd2: Simplify journal_unmap_buffer() - -journal_unmap_buffer() checks first whether the buffer head is a journal. -If so it takes locks and then invokes jbd2_journal_grab_journal_head() -followed by another check whether this is journal head buffer. - -The double checking is pointless. - -Replace the initial check with jbd2_journal_grab_journal_head() which -alredy checks whether the buffer head is actually a journal. - -Allows also early access to the journal head pointer for the upcoming -conversion of state lock to a regular spinlock. - -Signed-off-by: Thomas Gleixner -Reviewed-by: Jan Kara -Cc: linux-ext4@vger.kernel.org -Cc: "Theodore Ts'o" -Signed-off-by: Jan Kara -Signed-off-by: Sebastian Andrzej Siewior ---- - fs/jbd2/transaction.c | 8 ++------ - 1 file changed, 2 insertions(+), 6 deletions(-) - ---- a/fs/jbd2/transaction.c -+++ b/fs/jbd2/transaction.c -@@ -2203,7 +2203,8 @@ static int journal_unmap_buffer(journal_ - * holding the page lock. --sct - */ - -- if (!buffer_jbd(bh)) -+ jh = jbd2_journal_grab_journal_head(bh); -+ if (!jh) - goto zap_buffer_unlocked; - - /* OK, we have data buffer in journaled mode */ -@@ -2211,10 +2212,6 @@ static int journal_unmap_buffer(journal_ - jbd_lock_bh_state(bh); - spin_lock(&journal->j_list_lock); - -- jh = jbd2_journal_grab_journal_head(bh); -- if (!jh) -- goto zap_buffer_no_jh; -- - /* - * We cannot remove the buffer from checkpoint lists until the - * transaction adding inode to orphan list (let's call it T) -@@ -2338,7 +2335,6 @@ static int journal_unmap_buffer(journal_ - */ - jh->b_modified = 0; - jbd2_journal_put_journal_head(jh); --zap_buffer_no_jh: - spin_unlock(&journal->j_list_lock); - jbd_unlock_bh_state(bh); - write_unlock(&journal->j_state_lock); diff --git a/kernel/patches-5.4.x-rt/0003-0002-jbd2-Remove-jbd_trylock_bh_state.patch b/kernel/patches-5.4.x-rt/0003-0002-jbd2-Remove-jbd_trylock_bh_state.patch deleted file mode 100644 index bce5d83a7..000000000 --- a/kernel/patches-5.4.x-rt/0003-0002-jbd2-Remove-jbd_trylock_bh_state.patch +++ /dev/null @@ -1,30 +0,0 @@ -From: Thomas Gleixner -Date: Fri, 9 Aug 2019 14:42:28 +0200 -Subject: [PATCH 2/7] jbd2: Remove jbd_trylock_bh_state() - -No users. - -Signed-off-by: Thomas Gleixner -Reviewed-by: Jan Kara -Cc: linux-ext4@vger.kernel.org -Cc: "Theodore Ts'o" -Signed-off-by: Jan Kara -Signed-off-by: Sebastian Andrzej Siewior ---- - include/linux/jbd2.h | 5 ----- - 1 file changed, 5 deletions(-) - ---- a/include/linux/jbd2.h -+++ b/include/linux/jbd2.h -@@ -347,11 +347,6 @@ static inline void jbd_lock_bh_state(str - bit_spin_lock(BH_State, &bh->b_state); - } - --static inline int jbd_trylock_bh_state(struct buffer_head *bh) --{ -- return bit_spin_trylock(BH_State, &bh->b_state); --} -- - static inline int jbd_is_locked_bh_state(struct buffer_head *bh) - { - return bit_spin_is_locked(BH_State, &bh->b_state); diff --git a/kernel/patches-5.4.x-rt/0004-0003-jbd2-Move-dropping-of-jh-reference-out-of-un-re-fili.patch b/kernel/patches-5.4.x-rt/0004-0003-jbd2-Move-dropping-of-jh-reference-out-of-un-re-fili.patch deleted file mode 100644 index 6464bbb11..000000000 --- a/kernel/patches-5.4.x-rt/0004-0003-jbd2-Move-dropping-of-jh-reference-out-of-un-re-fili.patch +++ /dev/null @@ -1,150 +0,0 @@ -From: Jan Kara -Date: Fri, 9 Aug 2019 14:42:29 +0200 -Subject: [PATCH 3/7] jbd2: Move dropping of jh reference out of un/re-filing - functions - -__jbd2_journal_unfile_buffer() and __jbd2_journal_refile_buffer() drop -transaction's jh reference when they remove jh from a transaction. This -will be however inconvenient once we move state lock into journal_head -itself as we still need to unlock it and we'd need to grab jh reference -just for that. Move dropping of jh reference out of these functions into -the few callers. - -Signed-off-by: Jan Kara -Signed-off-by: Sebastian Andrzej Siewior ---- - fs/jbd2/commit.c | 5 ++++- - fs/jbd2/transaction.c | 23 +++++++++++++++-------- - include/linux/jbd2.h | 2 +- - 3 files changed, 20 insertions(+), 10 deletions(-) - ---- a/fs/jbd2/commit.c -+++ b/fs/jbd2/commit.c -@@ -920,6 +920,7 @@ void jbd2_journal_commit_transaction(jou - transaction_t *cp_transaction; - struct buffer_head *bh; - int try_to_free = 0; -+ bool drop_ref; - - jh = commit_transaction->t_forget; - spin_unlock(&journal->j_list_lock); -@@ -1028,8 +1029,10 @@ void jbd2_journal_commit_transaction(jou - try_to_free = 1; - } - JBUFFER_TRACE(jh, "refile or unfile buffer"); -- __jbd2_journal_refile_buffer(jh); -+ drop_ref = __jbd2_journal_refile_buffer(jh); - jbd_unlock_bh_state(bh); -+ if (drop_ref) -+ jbd2_journal_put_journal_head(jh); - if (try_to_free) - release_buffer_page(bh); /* Drops bh reference */ - else ---- a/fs/jbd2/transaction.c -+++ b/fs/jbd2/transaction.c -@@ -1602,6 +1602,7 @@ int jbd2_journal_forget (handle_t *handl - __jbd2_journal_file_buffer(jh, transaction, BJ_Forget); - } else { - __jbd2_journal_unfile_buffer(jh); -+ jbd2_journal_put_journal_head(jh); - if (!buffer_jbd(bh)) { - spin_unlock(&journal->j_list_lock); - goto not_jbd; -@@ -1975,17 +1976,15 @@ static void __jbd2_journal_temp_unlink_b - } - - /* -- * Remove buffer from all transactions. -+ * Remove buffer from all transactions. The caller is responsible for dropping -+ * the jh reference that belonged to the transaction. - * - * Called with bh_state lock and j_list_lock -- * -- * jh and bh may be already freed when this function returns. - */ - static void __jbd2_journal_unfile_buffer(struct journal_head *jh) - { - __jbd2_journal_temp_unlink_buffer(jh); - jh->b_transaction = NULL; -- jbd2_journal_put_journal_head(jh); - } - - void jbd2_journal_unfile_buffer(journal_t *journal, struct journal_head *jh) -@@ -1999,6 +1998,7 @@ void jbd2_journal_unfile_buffer(journal_ - __jbd2_journal_unfile_buffer(jh); - spin_unlock(&journal->j_list_lock); - jbd_unlock_bh_state(bh); -+ jbd2_journal_put_journal_head(jh); - __brelse(bh); - } - -@@ -2137,6 +2137,7 @@ static int __dispose_buffer(struct journ - } else { - JBUFFER_TRACE(jh, "on running transaction"); - __jbd2_journal_unfile_buffer(jh); -+ jbd2_journal_put_journal_head(jh); - } - return may_free; - } -@@ -2502,9 +2503,11 @@ void jbd2_journal_file_buffer(struct jou - * Called under j_list_lock - * Called under jbd_lock_bh_state(jh2bh(jh)) - * -- * jh and bh may be already free when this function returns -+ * When this function returns true, there's no next transaction to refile to -+ * and the caller has to drop jh reference through -+ * jbd2_journal_put_journal_head(). - */ --void __jbd2_journal_refile_buffer(struct journal_head *jh) -+bool __jbd2_journal_refile_buffer(struct journal_head *jh) - { - int was_dirty, jlist; - struct buffer_head *bh = jh2bh(jh); -@@ -2516,7 +2519,7 @@ void __jbd2_journal_refile_buffer(struct - /* If the buffer is now unused, just drop it. */ - if (jh->b_next_transaction == NULL) { - __jbd2_journal_unfile_buffer(jh); -- return; -+ return true; - } - - /* -@@ -2544,6 +2547,7 @@ void __jbd2_journal_refile_buffer(struct - - if (was_dirty) - set_buffer_jbddirty(bh); -+ return false; - } - - /* -@@ -2555,15 +2559,18 @@ void __jbd2_journal_refile_buffer(struct - void jbd2_journal_refile_buffer(journal_t *journal, struct journal_head *jh) - { - struct buffer_head *bh = jh2bh(jh); -+ bool drop; - - /* Get reference so that buffer cannot be freed before we unlock it */ - get_bh(bh); - jbd_lock_bh_state(bh); - spin_lock(&journal->j_list_lock); -- __jbd2_journal_refile_buffer(jh); -+ drop = __jbd2_journal_refile_buffer(jh); - jbd_unlock_bh_state(bh); - spin_unlock(&journal->j_list_lock); - __brelse(bh); -+ if (drop) -+ jbd2_journal_put_journal_head(jh); - } - - /* ---- a/include/linux/jbd2.h -+++ b/include/linux/jbd2.h -@@ -1252,7 +1252,7 @@ JBD2_FEATURE_INCOMPAT_FUNCS(csum3, CSUM - - /* Filing buffers */ - extern void jbd2_journal_unfile_buffer(journal_t *, struct journal_head *); --extern void __jbd2_journal_refile_buffer(struct journal_head *); -+extern bool __jbd2_journal_refile_buffer(struct journal_head *); - extern void jbd2_journal_refile_buffer(journal_t *, struct journal_head *); - extern void __jbd2_journal_file_buffer(struct journal_head *, transaction_t *, int); - extern void __journal_free_buffer(struct journal_head *bh); diff --git a/kernel/patches-5.4.x-rt/0005-0004-jbd2-Drop-unnecessary-branch-from-jbd2_journal_forge.patch b/kernel/patches-5.4.x-rt/0005-0004-jbd2-Drop-unnecessary-branch-from-jbd2_journal_forge.patch deleted file mode 100644 index 07e46014b..000000000 --- a/kernel/patches-5.4.x-rt/0005-0004-jbd2-Drop-unnecessary-branch-from-jbd2_journal_forge.patch +++ /dev/null @@ -1,27 +0,0 @@ -From: Jan Kara -Date: Fri, 9 Aug 2019 14:42:30 +0200 -Subject: [PATCH 4/7] jbd2: Drop unnecessary branch from jbd2_journal_forget() - -We have cleared both dirty & jbddirty bits from the bh. So there's no -difference between bforget() and brelse(). Thus there's no point jumping -to no_jbd branch. - -Signed-off-by: Jan Kara -Signed-off-by: Sebastian Andrzej Siewior ---- - fs/jbd2/transaction.c | 4 ---- - 1 file changed, 4 deletions(-) - ---- a/fs/jbd2/transaction.c -+++ b/fs/jbd2/transaction.c -@@ -1603,10 +1603,6 @@ int jbd2_journal_forget (handle_t *handl - } else { - __jbd2_journal_unfile_buffer(jh); - jbd2_journal_put_journal_head(jh); -- if (!buffer_jbd(bh)) { -- spin_unlock(&journal->j_list_lock); -- goto not_jbd; -- } - } - spin_unlock(&journal->j_list_lock); - } else if (jh->b_transaction) { diff --git a/kernel/patches-5.4.x-rt/0006-0005-jbd2-Don-t-call-__bforget-unnecessarily.patch b/kernel/patches-5.4.x-rt/0006-0005-jbd2-Don-t-call-__bforget-unnecessarily.patch deleted file mode 100644 index 13b61b7f7..000000000 --- a/kernel/patches-5.4.x-rt/0006-0005-jbd2-Don-t-call-__bforget-unnecessarily.patch +++ /dev/null @@ -1,58 +0,0 @@ -From: Jan Kara -Date: Fri, 9 Aug 2019 14:42:31 +0200 -Subject: [PATCH 5/7] jbd2: Don't call __bforget() unnecessarily - -jbd2_journal_forget() jumps to 'not_jbd' branch which calls __bforget() -in cases where the buffer is clean which is pointless. In case of failed -assertion, it can be even argued that it is safer not to touch buffer's -dirty bits. Also logically it makes more sense to just jump to 'drop' -and that will make logic also simpler when we switch bh_state_lock to a -spinlock. - -Signed-off-by: Jan Kara -Signed-off-by: Sebastian Andrzej Siewior ---- - fs/jbd2/transaction.c | 9 ++++----- - 1 file changed, 4 insertions(+), 5 deletions(-) - ---- a/fs/jbd2/transaction.c -+++ b/fs/jbd2/transaction.c -@@ -1554,7 +1554,7 @@ int jbd2_journal_forget (handle_t *handl - if (!J_EXPECT_JH(jh, !jh->b_committed_data, - "inconsistent data on disk")) { - err = -EIO; -- goto not_jbd; -+ goto drop; - } - - /* keep track of whether or not this transaction modified us */ -@@ -1644,7 +1644,7 @@ int jbd2_journal_forget (handle_t *handl - if (!jh->b_cp_transaction) { - JBUFFER_TRACE(jh, "belongs to none transaction"); - spin_unlock(&journal->j_list_lock); -- goto not_jbd; -+ goto drop; - } - - /* -@@ -1654,7 +1654,7 @@ int jbd2_journal_forget (handle_t *handl - if (!buffer_dirty(bh)) { - __jbd2_journal_remove_checkpoint(jh); - spin_unlock(&journal->j_list_lock); -- goto not_jbd; -+ goto drop; - } - - /* -@@ -1667,10 +1667,9 @@ int jbd2_journal_forget (handle_t *handl - __jbd2_journal_file_buffer(jh, transaction, BJ_Forget); - spin_unlock(&journal->j_list_lock); - } -- -+drop: - jbd_unlock_bh_state(bh); - __brelse(bh); --drop: - if (drop_reserve) { - /* no need to reserve log space for this block -bzzz */ - handle->h_buffer_credits++; diff --git a/kernel/patches-5.4.x-rt/0007-0006-jbd2-Make-state-lock-a-spinlock.patch b/kernel/patches-5.4.x-rt/0007-0006-jbd2-Make-state-lock-a-spinlock.patch deleted file mode 100644 index 41415b373..000000000 --- a/kernel/patches-5.4.x-rt/0007-0006-jbd2-Make-state-lock-a-spinlock.patch +++ /dev/null @@ -1,675 +0,0 @@ -From: Thomas Gleixner -Date: Fri, 9 Aug 2019 14:42:32 +0200 -Subject: [PATCH 6/7] jbd2: Make state lock a spinlock - -Bit-spinlocks are problematic on PREEMPT_RT if functions which might sleep -on RT, e.g. spin_lock(), alloc/free(), are invoked inside the lock held -region because bit spinlocks disable preemption even on RT. - -A first attempt was to replace state lock with a spinlock placed in struct -buffer_head and make the locking conditional on PREEMPT_RT and -DEBUG_BIT_SPINLOCKS. - -Jan pointed out that there is a 4 byte hole in struct journal_head where a -regular spinlock fits in and he would not object to convert the state lock -to a spinlock unconditionally. - -Aside of solving the RT problem, this also gains lockdep coverage for the -journal head state lock (bit-spinlocks are not covered by lockdep as it's -hard to fit a lockdep map into a single bit). - -The trivial change would have been to convert the jbd_*lock_bh_state() -inlines, but that comes with the downside that these functions take a -buffer head pointer which needs to be converted to a journal head pointer -which adds another level of indirection. - -As almost all functions which use this lock have a journal head pointer -readily available, it makes more sense to remove the lock helper inlines -and write out spin_*lock() at all call sites. - -Fixup all locking comments as well. - -Suggested-by: Jan Kara -Signed-off-by: Thomas Gleixner -Signed-off-by: Jan Kara -Cc: "Theodore Ts'o" -Cc: Mark Fasheh -Cc: Joseph Qi -Cc: Joel Becker -Cc: Jan Kara -Cc: linux-ext4@vger.kernel.org -Signed-off-by: Sebastian Andrzej Siewior ---- - fs/jbd2/commit.c | 8 +-- - fs/jbd2/journal.c | 10 ++-- - fs/jbd2/transaction.c | 100 ++++++++++++++++++++----------------------- - fs/ocfs2/suballoc.c | 19 ++++---- - include/linux/jbd2.h | 20 -------- - include/linux/journal-head.h | 21 ++++++--- - 6 files changed, 84 insertions(+), 94 deletions(-) - ---- a/fs/jbd2/commit.c -+++ b/fs/jbd2/commit.c -@@ -482,10 +482,10 @@ void jbd2_journal_commit_transaction(jou - if (jh->b_committed_data) { - struct buffer_head *bh = jh2bh(jh); - -- jbd_lock_bh_state(bh); -+ spin_lock(&jh->b_state_lock); - jbd2_free(jh->b_committed_data, bh->b_size); - jh->b_committed_data = NULL; -- jbd_unlock_bh_state(bh); -+ spin_unlock(&jh->b_state_lock); - } - jbd2_journal_refile_buffer(journal, jh); - } -@@ -930,7 +930,7 @@ void jbd2_journal_commit_transaction(jou - * done with it. - */ - get_bh(bh); -- jbd_lock_bh_state(bh); -+ spin_lock(&jh->b_state_lock); - J_ASSERT_JH(jh, jh->b_transaction == commit_transaction); - - /* -@@ -1030,7 +1030,7 @@ void jbd2_journal_commit_transaction(jou - } - JBUFFER_TRACE(jh, "refile or unfile buffer"); - drop_ref = __jbd2_journal_refile_buffer(jh); -- jbd_unlock_bh_state(bh); -+ spin_unlock(&jh->b_state_lock); - if (drop_ref) - jbd2_journal_put_journal_head(jh); - if (try_to_free) ---- a/fs/jbd2/journal.c -+++ b/fs/jbd2/journal.c -@@ -363,7 +363,7 @@ int jbd2_journal_write_metadata_buffer(t - /* keep subsequent assertions sane */ - atomic_set(&new_bh->b_count, 1); - -- jbd_lock_bh_state(bh_in); -+ spin_lock(&jh_in->b_state_lock); - repeat: - /* - * If a new transaction has already done a buffer copy-out, then -@@ -405,13 +405,13 @@ int jbd2_journal_write_metadata_buffer(t - if (need_copy_out && !done_copy_out) { - char *tmp; - -- jbd_unlock_bh_state(bh_in); -+ spin_unlock(&jh_in->b_state_lock); - tmp = jbd2_alloc(bh_in->b_size, GFP_NOFS); - if (!tmp) { - brelse(new_bh); - return -ENOMEM; - } -- jbd_lock_bh_state(bh_in); -+ spin_lock(&jh_in->b_state_lock); - if (jh_in->b_frozen_data) { - jbd2_free(tmp, bh_in->b_size); - goto repeat; -@@ -464,7 +464,7 @@ int jbd2_journal_write_metadata_buffer(t - __jbd2_journal_file_buffer(jh_in, transaction, BJ_Shadow); - spin_unlock(&journal->j_list_lock); - set_buffer_shadow(bh_in); -- jbd_unlock_bh_state(bh_in); -+ spin_unlock(&jh_in->b_state_lock); - - return do_escape | (done_copy_out << 1); - } -@@ -2407,6 +2407,8 @@ static struct journal_head *journal_allo - ret = kmem_cache_zalloc(jbd2_journal_head_cache, - GFP_NOFS | __GFP_NOFAIL); - } -+ if (ret) -+ spin_lock_init(&ret->b_state_lock); - return ret; - } - ---- a/fs/jbd2/transaction.c -+++ b/fs/jbd2/transaction.c -@@ -877,7 +877,7 @@ do_get_write_access(handle_t *handle, st - - start_lock = jiffies; - lock_buffer(bh); -- jbd_lock_bh_state(bh); -+ spin_lock(&jh->b_state_lock); - - /* If it takes too long to lock the buffer, trace it */ - time_lock = jbd2_time_diff(start_lock, jiffies); -@@ -927,7 +927,7 @@ do_get_write_access(handle_t *handle, st - - error = -EROFS; - if (is_handle_aborted(handle)) { -- jbd_unlock_bh_state(bh); -+ spin_unlock(&jh->b_state_lock); - goto out; - } - error = 0; -@@ -991,7 +991,7 @@ do_get_write_access(handle_t *handle, st - */ - if (buffer_shadow(bh)) { - JBUFFER_TRACE(jh, "on shadow: sleep"); -- jbd_unlock_bh_state(bh); -+ spin_unlock(&jh->b_state_lock); - wait_on_bit_io(&bh->b_state, BH_Shadow, TASK_UNINTERRUPTIBLE); - goto repeat; - } -@@ -1012,7 +1012,7 @@ do_get_write_access(handle_t *handle, st - JBUFFER_TRACE(jh, "generate frozen data"); - if (!frozen_buffer) { - JBUFFER_TRACE(jh, "allocate memory for buffer"); -- jbd_unlock_bh_state(bh); -+ spin_unlock(&jh->b_state_lock); - frozen_buffer = jbd2_alloc(jh2bh(jh)->b_size, - GFP_NOFS | __GFP_NOFAIL); - goto repeat; -@@ -1031,7 +1031,7 @@ do_get_write_access(handle_t *handle, st - jh->b_next_transaction = transaction; - - done: -- jbd_unlock_bh_state(bh); -+ spin_unlock(&jh->b_state_lock); - - /* - * If we are about to journal a buffer, then any revoke pending on it is -@@ -1173,7 +1173,7 @@ int jbd2_journal_get_create_access(handl - * that case: the transaction must have deleted the buffer for it to be - * reused here. - */ -- jbd_lock_bh_state(bh); -+ spin_lock(&jh->b_state_lock); - J_ASSERT_JH(jh, (jh->b_transaction == transaction || - jh->b_transaction == NULL || - (jh->b_transaction == journal->j_committing_transaction && -@@ -1208,7 +1208,7 @@ int jbd2_journal_get_create_access(handl - jh->b_next_transaction = transaction; - spin_unlock(&journal->j_list_lock); - } -- jbd_unlock_bh_state(bh); -+ spin_unlock(&jh->b_state_lock); - - /* - * akpm: I added this. ext3_alloc_branch can pick up new indirect -@@ -1279,13 +1279,13 @@ int jbd2_journal_get_undo_access(handle_ - committed_data = jbd2_alloc(jh2bh(jh)->b_size, - GFP_NOFS|__GFP_NOFAIL); - -- jbd_lock_bh_state(bh); -+ spin_lock(&jh->b_state_lock); - if (!jh->b_committed_data) { - /* Copy out the current buffer contents into the - * preserved, committed copy. */ - JBUFFER_TRACE(jh, "generate b_committed data"); - if (!committed_data) { -- jbd_unlock_bh_state(bh); -+ spin_unlock(&jh->b_state_lock); - goto repeat; - } - -@@ -1293,7 +1293,7 @@ int jbd2_journal_get_undo_access(handle_ - committed_data = NULL; - memcpy(jh->b_committed_data, bh->b_data, bh->b_size); - } -- jbd_unlock_bh_state(bh); -+ spin_unlock(&jh->b_state_lock); - out: - jbd2_journal_put_journal_head(jh); - if (unlikely(committed_data)) -@@ -1394,16 +1394,16 @@ int jbd2_journal_dirty_metadata(handle_t - */ - if (jh->b_transaction != transaction && - jh->b_next_transaction != transaction) { -- jbd_lock_bh_state(bh); -+ spin_lock(&jh->b_state_lock); - J_ASSERT_JH(jh, jh->b_transaction == transaction || - jh->b_next_transaction == transaction); -- jbd_unlock_bh_state(bh); -+ spin_unlock(&jh->b_state_lock); - } - if (jh->b_modified == 1) { - /* If it's in our transaction it must be in BJ_Metadata list. */ - if (jh->b_transaction == transaction && - jh->b_jlist != BJ_Metadata) { -- jbd_lock_bh_state(bh); -+ spin_lock(&jh->b_state_lock); - if (jh->b_transaction == transaction && - jh->b_jlist != BJ_Metadata) - pr_err("JBD2: assertion failure: h_type=%u " -@@ -1413,13 +1413,13 @@ int jbd2_journal_dirty_metadata(handle_t - jh->b_jlist); - J_ASSERT_JH(jh, jh->b_transaction != transaction || - jh->b_jlist == BJ_Metadata); -- jbd_unlock_bh_state(bh); -+ spin_unlock(&jh->b_state_lock); - } - goto out; - } - - journal = transaction->t_journal; -- jbd_lock_bh_state(bh); -+ spin_lock(&jh->b_state_lock); - - if (jh->b_modified == 0) { - /* -@@ -1505,7 +1505,7 @@ int jbd2_journal_dirty_metadata(handle_t - __jbd2_journal_file_buffer(jh, transaction, BJ_Metadata); - spin_unlock(&journal->j_list_lock); - out_unlock_bh: -- jbd_unlock_bh_state(bh); -+ spin_unlock(&jh->b_state_lock); - out: - JBUFFER_TRACE(jh, "exit"); - return ret; -@@ -1543,11 +1543,13 @@ int jbd2_journal_forget (handle_t *handl - - BUFFER_TRACE(bh, "entry"); - -- jbd_lock_bh_state(bh); -+ jh = jbd2_journal_grab_journal_head(bh); -+ if (!jh) { -+ __bforget(bh); -+ return 0; -+ } - -- if (!buffer_jbd(bh)) -- goto not_jbd; -- jh = bh2jh(bh); -+ spin_lock(&jh->b_state_lock); - - /* Critical error: attempting to delete a bitmap buffer, maybe? - * Don't do any jbd operations, and return an error. */ -@@ -1668,18 +1670,14 @@ int jbd2_journal_forget (handle_t *handl - spin_unlock(&journal->j_list_lock); - } - drop: -- jbd_unlock_bh_state(bh); - __brelse(bh); -+ spin_unlock(&jh->b_state_lock); -+ jbd2_journal_put_journal_head(jh); - if (drop_reserve) { - /* no need to reserve log space for this block -bzzz */ - handle->h_buffer_credits++; - } - return err; -- --not_jbd: -- jbd_unlock_bh_state(bh); -- __bforget(bh); -- goto drop; - } - - /** -@@ -1878,7 +1876,7 @@ int jbd2_journal_stop(handle_t *handle) - * - * j_list_lock is held. - * -- * jbd_lock_bh_state(jh2bh(jh)) is held. -+ * jh->b_state_lock is held. - */ - - static inline void -@@ -1902,7 +1900,7 @@ static inline void - * - * Called with j_list_lock held, and the journal may not be locked. - * -- * jbd_lock_bh_state(jh2bh(jh)) is held. -+ * jh->b_state_lock is held. - */ - - static inline void -@@ -1934,7 +1932,7 @@ static void __jbd2_journal_temp_unlink_b - transaction_t *transaction; - struct buffer_head *bh = jh2bh(jh); - -- J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh)); -+ lockdep_assert_held(&jh->b_state_lock); - transaction = jh->b_transaction; - if (transaction) - assert_spin_locked(&transaction->t_journal->j_list_lock); -@@ -1988,11 +1986,11 @@ void jbd2_journal_unfile_buffer(journal_ - - /* Get reference so that buffer cannot be freed before we unlock it */ - get_bh(bh); -- jbd_lock_bh_state(bh); -+ spin_lock(&jh->b_state_lock); - spin_lock(&journal->j_list_lock); - __jbd2_journal_unfile_buffer(jh); - spin_unlock(&journal->j_list_lock); -- jbd_unlock_bh_state(bh); -+ spin_unlock(&jh->b_state_lock); - jbd2_journal_put_journal_head(jh); - __brelse(bh); - } -@@ -2000,7 +1998,7 @@ void jbd2_journal_unfile_buffer(journal_ - /* - * Called from jbd2_journal_try_to_free_buffers(). - * -- * Called under jbd_lock_bh_state(bh) -+ * Called under jh->b_state_lock - */ - static void - __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh) -@@ -2087,10 +2085,10 @@ int jbd2_journal_try_to_free_buffers(jou - if (!jh) - continue; - -- jbd_lock_bh_state(bh); -+ spin_lock(&jh->b_state_lock); - __journal_try_to_free_buffer(journal, bh); -+ spin_unlock(&jh->b_state_lock); - jbd2_journal_put_journal_head(jh); -- jbd_unlock_bh_state(bh); - if (buffer_jbd(bh)) - goto busy; - } while ((bh = bh->b_this_page) != head); -@@ -2111,7 +2109,7 @@ int jbd2_journal_try_to_free_buffers(jou - * - * Called under j_list_lock. - * -- * Called under jbd_lock_bh_state(bh). -+ * Called under jh->b_state_lock. - */ - static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction) - { -@@ -2205,7 +2203,7 @@ static int journal_unmap_buffer(journal_ - - /* OK, we have data buffer in journaled mode */ - write_lock(&journal->j_state_lock); -- jbd_lock_bh_state(bh); -+ spin_lock(&jh->b_state_lock); - spin_lock(&journal->j_list_lock); - - /* -@@ -2286,10 +2284,10 @@ static int journal_unmap_buffer(journal_ - * for commit and try again. - */ - if (partial_page) { -- jbd2_journal_put_journal_head(jh); - spin_unlock(&journal->j_list_lock); -- jbd_unlock_bh_state(bh); -+ spin_unlock(&jh->b_state_lock); - write_unlock(&journal->j_state_lock); -+ jbd2_journal_put_journal_head(jh); - return -EBUSY; - } - /* -@@ -2303,10 +2301,10 @@ static int journal_unmap_buffer(journal_ - if (journal->j_running_transaction && buffer_jbddirty(bh)) - jh->b_next_transaction = journal->j_running_transaction; - jh->b_modified = 0; -- jbd2_journal_put_journal_head(jh); - spin_unlock(&journal->j_list_lock); -- jbd_unlock_bh_state(bh); -+ spin_unlock(&jh->b_state_lock); - write_unlock(&journal->j_state_lock); -+ jbd2_journal_put_journal_head(jh); - return 0; - } else { - /* Good, the buffer belongs to the running transaction. -@@ -2330,10 +2328,10 @@ static int journal_unmap_buffer(journal_ - * here. - */ - jh->b_modified = 0; -- jbd2_journal_put_journal_head(jh); - spin_unlock(&journal->j_list_lock); -- jbd_unlock_bh_state(bh); -+ spin_unlock(&jh->b_state_lock); - write_unlock(&journal->j_state_lock); -+ jbd2_journal_put_journal_head(jh); - zap_buffer_unlocked: - clear_buffer_dirty(bh); - J_ASSERT_BH(bh, !buffer_jbddirty(bh)); -@@ -2420,7 +2418,7 @@ void __jbd2_journal_file_buffer(struct j - int was_dirty = 0; - struct buffer_head *bh = jh2bh(jh); - -- J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh)); -+ lockdep_assert_held(&jh->b_state_lock); - assert_spin_locked(&transaction->t_journal->j_list_lock); - - J_ASSERT_JH(jh, jh->b_jlist < BJ_Types); -@@ -2482,11 +2480,11 @@ void __jbd2_journal_file_buffer(struct j - void jbd2_journal_file_buffer(struct journal_head *jh, - transaction_t *transaction, int jlist) - { -- jbd_lock_bh_state(jh2bh(jh)); -+ spin_lock(&jh->b_state_lock); - spin_lock(&transaction->t_journal->j_list_lock); - __jbd2_journal_file_buffer(jh, transaction, jlist); - spin_unlock(&transaction->t_journal->j_list_lock); -- jbd_unlock_bh_state(jh2bh(jh)); -+ spin_unlock(&jh->b_state_lock); - } - - /* -@@ -2496,7 +2494,7 @@ void jbd2_journal_file_buffer(struct jou - * buffer on that transaction's metadata list. - * - * Called under j_list_lock -- * Called under jbd_lock_bh_state(jh2bh(jh)) -+ * Called under jh->b_state_lock - * - * When this function returns true, there's no next transaction to refile to - * and the caller has to drop jh reference through -@@ -2507,7 +2505,7 @@ bool __jbd2_journal_refile_buffer(struct - int was_dirty, jlist; - struct buffer_head *bh = jh2bh(jh); - -- J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh)); -+ lockdep_assert_held(&jh->b_state_lock); - if (jh->b_transaction) - assert_spin_locked(&jh->b_transaction->t_journal->j_list_lock); - -@@ -2553,17 +2551,13 @@ bool __jbd2_journal_refile_buffer(struct - */ - void jbd2_journal_refile_buffer(journal_t *journal, struct journal_head *jh) - { -- struct buffer_head *bh = jh2bh(jh); - bool drop; - -- /* Get reference so that buffer cannot be freed before we unlock it */ -- get_bh(bh); -- jbd_lock_bh_state(bh); -+ spin_lock(&jh->b_state_lock); - spin_lock(&journal->j_list_lock); - drop = __jbd2_journal_refile_buffer(jh); -- jbd_unlock_bh_state(bh); -+ spin_unlock(&jh->b_state_lock); - spin_unlock(&journal->j_list_lock); -- __brelse(bh); - if (drop) - jbd2_journal_put_journal_head(jh); - } ---- a/fs/ocfs2/suballoc.c -+++ b/fs/ocfs2/suballoc.c -@@ -1252,6 +1252,7 @@ static int ocfs2_test_bg_bit_allocatable - int nr) - { - struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data; -+ struct journal_head *jh; - int ret; - - if (ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap)) -@@ -1260,13 +1261,14 @@ static int ocfs2_test_bg_bit_allocatable - if (!buffer_jbd(bg_bh)) - return 1; - -- jbd_lock_bh_state(bg_bh); -- bg = (struct ocfs2_group_desc *) bh2jh(bg_bh)->b_committed_data; -+ jh = bh2jh(bg_bh); -+ spin_lock(&jh->b_state_lock); -+ bg = (struct ocfs2_group_desc *) jh->b_committed_data; - if (bg) - ret = !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap); - else - ret = 1; -- jbd_unlock_bh_state(bg_bh); -+ spin_unlock(&jh->b_state_lock); - - return ret; - } -@@ -2387,6 +2389,7 @@ static int ocfs2_block_group_clear_bits( - int status; - unsigned int tmp; - struct ocfs2_group_desc *undo_bg = NULL; -+ struct journal_head *jh; - - /* The caller got this descriptor from - * ocfs2_read_group_descriptor(). Any corruption is a code bug. */ -@@ -2405,10 +2408,10 @@ static int ocfs2_block_group_clear_bits( - goto bail; - } - -+ jh = bh2jh(group_bh); - if (undo_fn) { -- jbd_lock_bh_state(group_bh); -- undo_bg = (struct ocfs2_group_desc *) -- bh2jh(group_bh)->b_committed_data; -+ spin_lock(&jh->b_state_lock); -+ undo_bg = (struct ocfs2_group_desc *) jh->b_committed_data; - BUG_ON(!undo_bg); - } - -@@ -2423,7 +2426,7 @@ static int ocfs2_block_group_clear_bits( - le16_add_cpu(&bg->bg_free_bits_count, num_bits); - if (le16_to_cpu(bg->bg_free_bits_count) > le16_to_cpu(bg->bg_bits)) { - if (undo_fn) -- jbd_unlock_bh_state(group_bh); -+ spin_unlock(&jh->b_state_lock); - return ocfs2_error(alloc_inode->i_sb, "Group descriptor # %llu has bit count %u but claims %u are freed. num_bits %d\n", - (unsigned long long)le64_to_cpu(bg->bg_blkno), - le16_to_cpu(bg->bg_bits), -@@ -2432,7 +2435,7 @@ static int ocfs2_block_group_clear_bits( - } - - if (undo_fn) -- jbd_unlock_bh_state(group_bh); -+ spin_unlock(&jh->b_state_lock); - - ocfs2_journal_dirty(handle, group_bh); - bail: ---- a/include/linux/jbd2.h -+++ b/include/linux/jbd2.h -@@ -313,7 +313,6 @@ enum jbd_state_bits { - BH_Revoked, /* Has been revoked from the log */ - BH_RevokeValid, /* Revoked flag is valid */ - BH_JBDDirty, /* Is dirty but journaled */ -- BH_State, /* Pins most journal_head state */ - BH_JournalHead, /* Pins bh->b_private and jh->b_bh */ - BH_Shadow, /* IO on shadow buffer is running */ - BH_Verified, /* Metadata block has been verified ok */ -@@ -342,21 +341,6 @@ static inline struct journal_head *bh2jh - return bh->b_private; - } - --static inline void jbd_lock_bh_state(struct buffer_head *bh) --{ -- bit_spin_lock(BH_State, &bh->b_state); --} -- --static inline int jbd_is_locked_bh_state(struct buffer_head *bh) --{ -- return bit_spin_is_locked(BH_State, &bh->b_state); --} -- --static inline void jbd_unlock_bh_state(struct buffer_head *bh) --{ -- bit_spin_unlock(BH_State, &bh->b_state); --} -- - static inline void jbd_lock_bh_journal_head(struct buffer_head *bh) - { - bit_spin_lock(BH_JournalHead, &bh->b_state); -@@ -551,9 +535,9 @@ struct transaction_chp_stats_s { - * ->jbd_lock_bh_journal_head() (This is "innermost") - * - * j_state_lock -- * ->jbd_lock_bh_state() -+ * ->b_state_lock - * -- * jbd_lock_bh_state() -+ * b_state_lock - * ->j_list_lock - * - * j_state_lock ---- a/include/linux/journal-head.h -+++ b/include/linux/journal-head.h -@@ -11,6 +11,8 @@ - #ifndef JOURNAL_HEAD_H_INCLUDED - #define JOURNAL_HEAD_H_INCLUDED - -+#include -+ - typedef unsigned int tid_t; /* Unique transaction ID */ - typedef struct transaction_s transaction_t; /* Compound transaction type */ - -@@ -24,13 +26,18 @@ struct journal_head { - struct buffer_head *b_bh; - - /* -+ * Protect the buffer head state -+ */ -+ spinlock_t b_state_lock; -+ -+ /* - * Reference count - see description in journal.c - * [jbd_lock_bh_journal_head()] - */ - int b_jcount; - - /* -- * Journalling list for this buffer [jbd_lock_bh_state()] -+ * Journalling list for this buffer [b_state_lock] - * NOTE: We *cannot* combine this with b_modified into a bitfield - * as gcc would then (which the C standard allows but which is - * very unuseful) make 64-bit accesses to the bitfield and clobber -@@ -41,20 +48,20 @@ struct journal_head { - /* - * This flag signals the buffer has been modified by - * the currently running transaction -- * [jbd_lock_bh_state()] -+ * [b_state_lock] - */ - unsigned b_modified; - - /* - * Copy of the buffer data frozen for writing to the log. -- * [jbd_lock_bh_state()] -+ * [b_state_lock] - */ - char *b_frozen_data; - - /* - * Pointer to a saved copy of the buffer containing no uncommitted - * deallocation references, so that allocations can avoid overwriting -- * uncommitted deletes. [jbd_lock_bh_state()] -+ * uncommitted deletes. [b_state_lock] - */ - char *b_committed_data; - -@@ -63,7 +70,7 @@ struct journal_head { - * metadata: either the running transaction or the committing - * transaction (if there is one). Only applies to buffers on a - * transaction's data or metadata journaling list. -- * [j_list_lock] [jbd_lock_bh_state()] -+ * [j_list_lock] [b_state_lock] - * Either of these locks is enough for reading, both are needed for - * changes. - */ -@@ -73,13 +80,13 @@ struct journal_head { - * Pointer to the running compound transaction which is currently - * modifying the buffer's metadata, if there was already a transaction - * committing it when the new transaction touched it. -- * [t_list_lock] [jbd_lock_bh_state()] -+ * [t_list_lock] [b_state_lock] - */ - transaction_t *b_next_transaction; - - /* - * Doubly-linked list of buffers on a transaction's data, metadata or -- * forget queue. [t_list_lock] [jbd_lock_bh_state()] -+ * forget queue. [t_list_lock] [b_state_lock] - */ - struct journal_head *b_tnext, *b_tprev; - diff --git a/kernel/patches-5.4.x-rt/0008-0007-jbd2-Free-journal-head-outside-of-locked-region.patch b/kernel/patches-5.4.x-rt/0008-0007-jbd2-Free-journal-head-outside-of-locked-region.patch deleted file mode 100644 index e58eaf8c6..000000000 --- a/kernel/patches-5.4.x-rt/0008-0007-jbd2-Free-journal-head-outside-of-locked-region.patch +++ /dev/null @@ -1,88 +0,0 @@ -From: Thomas Gleixner -Date: Fri, 9 Aug 2019 14:42:33 +0200 -Subject: [PATCH 7/7] jbd2: Free journal head outside of locked region - -On PREEMPT_RT bit-spinlocks have the same semantics as on PREEMPT_RT=n, -i.e. they disable preemption. That means functions which are not safe to be -called in preempt disabled context on RT trigger a might_sleep() assert. - -The journal head bit spinlock is mostly held for short code sequences with -trivial RT safe functionality, except for one place: - -jbd2_journal_put_journal_head() invokes __journal_remove_journal_head() -with the journal head bit spinlock held. __journal_remove_journal_head() -invokes kmem_cache_free() which must not be called with preemption disabled -on RT. - -Jan suggested to rework the removal function so the actual free happens -outside the bit-spinlocked region. - -Split it into two parts: - - - Do the sanity checks and the buffer head detach under the lock - - - Do the actual free after dropping the lock - -There is error case handling in the free part which needs to dereference -the b_size field of the now detached buffer head. Due to paranoia (caused -by ignorance) the size is retrieved in the detach function and handed into -the free function. Might be over-engineered, but better safe than sorry. - -This makes the journal head bit-spinlock usage RT compliant and also avoids -nested locking which is not covered by lockdep. - -Suggested-by: Jan Kara -Signed-off-by: Thomas Gleixner -Cc: linux-ext4@vger.kernel.org -Cc: "Theodore Ts'o" -Cc: Jan Kara -Signed-off-by: Jan Kara -Signed-off-by: Sebastian Andrzej Siewior ---- - fs/jbd2/journal.c | 20 ++++++++++++++------ - 1 file changed, 14 insertions(+), 6 deletions(-) - ---- a/fs/jbd2/journal.c -+++ b/fs/jbd2/journal.c -@@ -2528,17 +2528,23 @@ static void __journal_remove_journal_hea - J_ASSERT_BH(bh, buffer_jbd(bh)); - J_ASSERT_BH(bh, jh2bh(jh) == bh); - BUFFER_TRACE(bh, "remove journal_head"); -+ -+ /* Unlink before dropping the lock */ -+ bh->b_private = NULL; -+ jh->b_bh = NULL; /* debug, really */ -+ clear_buffer_jbd(bh); -+} -+ -+static void journal_release_journal_head(struct journal_head *jh, size_t b_size) -+{ - if (jh->b_frozen_data) { - printk(KERN_WARNING "%s: freeing b_frozen_data\n", __func__); -- jbd2_free(jh->b_frozen_data, bh->b_size); -+ jbd2_free(jh->b_frozen_data, b_size); - } - if (jh->b_committed_data) { - printk(KERN_WARNING "%s: freeing b_committed_data\n", __func__); -- jbd2_free(jh->b_committed_data, bh->b_size); -+ jbd2_free(jh->b_committed_data, b_size); - } -- bh->b_private = NULL; -- jh->b_bh = NULL; /* debug, really */ -- clear_buffer_jbd(bh); - journal_free_journal_head(jh); - } - -@@ -2556,9 +2562,11 @@ void jbd2_journal_put_journal_head(struc - if (!jh->b_jcount) { - __journal_remove_journal_head(bh); - jbd_unlock_bh_journal_head(bh); -+ journal_release_journal_head(jh, bh->b_size); - __brelse(bh); -- } else -+ } else { - jbd_unlock_bh_journal_head(bh); -+ } - } - - /* diff --git a/kernel/patches-5.4.x-rt/0009-x86-ioapic-Rename-misnamed-functions.patch b/kernel/patches-5.4.x-rt/0009-x86-ioapic-Rename-misnamed-functions.patch deleted file mode 100644 index 547e5a889..000000000 --- a/kernel/patches-5.4.x-rt/0009-x86-ioapic-Rename-misnamed-functions.patch +++ /dev/null @@ -1,86 +0,0 @@ -From: Thomas Gleixner -Date: Thu, 17 Oct 2019 12:19:02 +0200 -Subject: [PATCH] x86/ioapic: Rename misnamed functions - -ioapic_irqd_[un]mask() are misnomers as both functions do way more than -masking and unmasking the interrupt line. Both deal with the moving the -affinity of the interrupt within interrupt context. The mask/unmask is just -a tiny part of the functionality. - -Rename them to ioapic_prepare/finish_move(), fixup the call sites and -rename the related variables in the code to reflect what this is about. - -No functional change. - -Signed-off-by: Thomas Gleixner -Cc: Andy Shevchenko -Cc: Linus Torvalds -Cc: Peter Zijlstra -Cc: Sebastian Siewior -Link: https://lkml.kernel.org/r/20191017101938.412489856@linutronix.de -Signed-off-by: Ingo Molnar -Signed-off-by: Sebastian Andrzej Siewior ---- - arch/x86/kernel/apic/io_apic.c | 16 ++++++++-------- - 1 file changed, 8 insertions(+), 8 deletions(-) - ---- a/arch/x86/kernel/apic/io_apic.c -+++ b/arch/x86/kernel/apic/io_apic.c -@@ -1725,7 +1725,7 @@ static bool io_apic_level_ack_pending(st - return false; - } - --static inline bool ioapic_irqd_mask(struct irq_data *data) -+static inline bool ioapic_prepare_move(struct irq_data *data) - { - /* If we are moving the IRQ we need to mask it */ - if (unlikely(irqd_is_setaffinity_pending(data))) { -@@ -1736,9 +1736,9 @@ static inline bool ioapic_irqd_mask(stru - return false; - } - --static inline void ioapic_irqd_unmask(struct irq_data *data, bool masked) -+static inline void ioapic_finish_move(struct irq_data *data, bool moveit) - { -- if (unlikely(masked)) { -+ if (unlikely(moveit)) { - /* Only migrate the irq if the ack has been received. - * - * On rare occasions the broadcast level triggered ack gets -@@ -1773,11 +1773,11 @@ static inline void ioapic_irqd_unmask(st - } - } - #else --static inline bool ioapic_irqd_mask(struct irq_data *data) -+static inline bool ioapic_prepare_move(struct irq_data *data) - { - return false; - } --static inline void ioapic_irqd_unmask(struct irq_data *data, bool masked) -+static inline void ioapic_finish_move(struct irq_data *data, bool moveit) - { - } - #endif -@@ -1786,11 +1786,11 @@ static void ioapic_ack_level(struct irq_ - { - struct irq_cfg *cfg = irqd_cfg(irq_data); - unsigned long v; -- bool masked; -+ bool moveit; - int i; - - irq_complete_move(cfg); -- masked = ioapic_irqd_mask(irq_data); -+ moveit = ioapic_prepare_move(irq_data); - - /* - * It appears there is an erratum which affects at least version 0x11 -@@ -1845,7 +1845,7 @@ static void ioapic_ack_level(struct irq_ - eoi_ioapic_pin(cfg->vector, irq_data->chip_data); - } - -- ioapic_irqd_unmask(irq_data, masked); -+ ioapic_finish_move(irq_data, moveit); - } - - static void ioapic_ir_ack_level(struct irq_data *irq_data) diff --git a/kernel/patches-5.4.x-rt/0010-percpu-refcount-use-normal-instead-of-RCU-sched.patch b/kernel/patches-5.4.x-rt/0010-percpu-refcount-use-normal-instead-of-RCU-sched.patch deleted file mode 100644 index 03f68eae8..000000000 --- a/kernel/patches-5.4.x-rt/0010-percpu-refcount-use-normal-instead-of-RCU-sched.patch +++ /dev/null @@ -1,100 +0,0 @@ -From: Sebastian Andrzej Siewior -Date: Wed, 4 Sep 2019 17:59:36 +0200 -Subject: [PATCH] percpu-refcount: use normal instead of RCU-sched" -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -This is a revert of commit - a4244454df129 ("percpu-refcount: use RCU-sched insted of normal RCU") - -which claims the only reason for using RCU-sched is - "rcu_read_[un]lock() … are slightly more expensive than preempt_disable/enable()" - -and - "As the RCU critical sections are extremely short, using sched-RCU - shouldn't have any latency implications." - -The problem with RCU-sched is that it disables preemption and the -callback must not acquire any sleeping locks like spinlock_t on -PREEMPT_RT which is the case. - -Convert back to normal RCU. - -Signed-off-by: Sebastian Andrzej Siewior ---- - include/linux/percpu-refcount.h | 16 ++++++++-------- - 1 file changed, 8 insertions(+), 8 deletions(-) - ---- a/include/linux/percpu-refcount.h -+++ b/include/linux/percpu-refcount.h -@@ -186,14 +186,14 @@ static inline void percpu_ref_get_many(s - { - unsigned long __percpu *percpu_count; - -- rcu_read_lock_sched(); -+ rcu_read_lock(); - - if (__ref_is_percpu(ref, &percpu_count)) - this_cpu_add(*percpu_count, nr); - else - atomic_long_add(nr, &ref->count); - -- rcu_read_unlock_sched(); -+ rcu_read_unlock(); - } - - /** -@@ -223,7 +223,7 @@ static inline bool percpu_ref_tryget(str - unsigned long __percpu *percpu_count; - bool ret; - -- rcu_read_lock_sched(); -+ rcu_read_lock(); - - if (__ref_is_percpu(ref, &percpu_count)) { - this_cpu_inc(*percpu_count); -@@ -232,7 +232,7 @@ static inline bool percpu_ref_tryget(str - ret = atomic_long_inc_not_zero(&ref->count); - } - -- rcu_read_unlock_sched(); -+ rcu_read_unlock(); - - return ret; - } -@@ -257,7 +257,7 @@ static inline bool percpu_ref_tryget_liv - unsigned long __percpu *percpu_count; - bool ret = false; - -- rcu_read_lock_sched(); -+ rcu_read_lock(); - - if (__ref_is_percpu(ref, &percpu_count)) { - this_cpu_inc(*percpu_count); -@@ -266,7 +266,7 @@ static inline bool percpu_ref_tryget_liv - ret = atomic_long_inc_not_zero(&ref->count); - } - -- rcu_read_unlock_sched(); -+ rcu_read_unlock(); - - return ret; - } -@@ -285,14 +285,14 @@ static inline void percpu_ref_put_many(s - { - unsigned long __percpu *percpu_count; - -- rcu_read_lock_sched(); -+ rcu_read_lock(); - - if (__ref_is_percpu(ref, &percpu_count)) - this_cpu_sub(*percpu_count, nr); - else if (unlikely(atomic_long_sub_and_test(nr, &ref->count))) - ref->release(ref); - -- rcu_read_unlock_sched(); -+ rcu_read_unlock(); - } - - /** diff --git a/kernel/patches-5.4.x-rt/0011-drm-i915-Don-t-disable-interrupts-independently-of-t.patch b/kernel/patches-5.4.x-rt/0011-drm-i915-Don-t-disable-interrupts-independently-of-t.patch deleted file mode 100644 index 3831669a5..000000000 --- a/kernel/patches-5.4.x-rt/0011-drm-i915-Don-t-disable-interrupts-independently-of-t.patch +++ /dev/null @@ -1,70 +0,0 @@ -From: Sebastian Andrzej Siewior -Date: Wed, 10 Apr 2019 11:01:37 +0200 -Subject: [PATCH] drm/i915: Don't disable interrupts independently of the - lock - -The locks (active.lock and rq->lock) need to be taken with disabled -interrupts. This is done in i915_request_retire() by disabling the -interrupts independently of the locks itself. -While local_irq_disable()+spin_lock() equals spin_lock_irq() on vanilla -it does not on PREEMPT_RT. -Chris Wilson confirmed that local_irq_disable() was just introduced as -an optimisation to avoid enabling/disabling interrupts during -lock/unlock combo. - -Enable/disable interrupts as part of the locking instruction. - -Cc: Chris Wilson -Signed-off-by: Sebastian Andrzej Siewior ---- - drivers/gpu/drm/i915/i915_request.c | 12 ++++-------- - 1 file changed, 4 insertions(+), 8 deletions(-) - ---- a/drivers/gpu/drm/i915/i915_request.c -+++ b/drivers/gpu/drm/i915/i915_request.c -@@ -205,14 +205,14 @@ static void remove_from_engine(struct i9 - * check that the rq still belongs to the newly locked engine. - */ - locked = READ_ONCE(rq->engine); -- spin_lock(&locked->active.lock); -+ spin_lock_irq(&locked->active.lock); - while (unlikely(locked != (engine = READ_ONCE(rq->engine)))) { - spin_unlock(&locked->active.lock); - spin_lock(&engine->active.lock); - locked = engine; - } - list_del(&rq->sched.link); -- spin_unlock(&locked->active.lock); -+ spin_unlock_irq(&locked->active.lock); - } - - static bool i915_request_retire(struct i915_request *rq) -@@ -272,8 +272,6 @@ static bool i915_request_retire(struct i - active->retire(active, rq); - } - -- local_irq_disable(); -- - /* - * We only loosely track inflight requests across preemption, - * and so we may find ourselves attempting to retire a _completed_ -@@ -282,7 +280,7 @@ static bool i915_request_retire(struct i - */ - remove_from_engine(rq); - -- spin_lock(&rq->lock); -+ spin_lock_irq(&rq->lock); - i915_request_mark_complete(rq); - if (!i915_request_signaled(rq)) - dma_fence_signal_locked(&rq->fence); -@@ -297,9 +295,7 @@ static bool i915_request_retire(struct i - __notify_execute_cb(rq); - } - GEM_BUG_ON(!list_empty(&rq->execute_cb)); -- spin_unlock(&rq->lock); -- -- local_irq_enable(); -+ spin_unlock_irq(&rq->lock); - - remove_from_client(rq); - list_del(&rq->link); diff --git a/kernel/patches-5.4.x-rt/0012-block-Don-t-disable-interrupts-in-trigger_softirq.patch b/kernel/patches-5.4.x-rt/0012-block-Don-t-disable-interrupts-in-trigger_softirq.patch deleted file mode 100644 index 6be945444..000000000 --- a/kernel/patches-5.4.x-rt/0012-block-Don-t-disable-interrupts-in-trigger_softirq.patch +++ /dev/null @@ -1,35 +0,0 @@ -From: Sebastian Andrzej Siewior -Date: Fri, 15 Nov 2019 21:37:22 +0100 -Subject: [PATCH] block: Don't disable interrupts in trigger_softirq() - -trigger_softirq() is always invoked as a SMP-function call which is -always invoked with disables interrupts. - -Don't disable interrupt in trigger_softirq() because interrupts are -already disabled. - -Signed-off-by: Sebastian Andrzej Siewior ---- - block/blk-softirq.c | 4 ---- - 1 file changed, 4 deletions(-) - ---- a/block/blk-softirq.c -+++ b/block/blk-softirq.c -@@ -42,17 +42,13 @@ static __latent_entropy void blk_done_so - static void trigger_softirq(void *data) - { - struct request *rq = data; -- unsigned long flags; - struct list_head *list; - -- local_irq_save(flags); - list = this_cpu_ptr(&blk_cpu_done); - list_add_tail(&rq->ipi_list, list); - - if (list->next == &rq->ipi_list) - raise_softirq_irqoff(BLOCK_SOFTIRQ); -- -- local_irq_restore(flags); - } - - /* diff --git a/kernel/patches-5.4.x-rt/0013-arm64-KVM-compute_layout-before-altenates-are-applie.patch b/kernel/patches-5.4.x-rt/0013-arm64-KVM-compute_layout-before-altenates-are-applie.patch deleted file mode 100644 index 3e0b433de..000000000 --- a/kernel/patches-5.4.x-rt/0013-arm64-KVM-compute_layout-before-altenates-are-applie.patch +++ /dev/null @@ -1,89 +0,0 @@ -From: Sebastian Andrzej Siewior -Date: Thu, 26 Jul 2018 09:13:42 +0200 -Subject: [PATCH] arm64: KVM: Invoke compute_layout() before alternatives are - applied - -compute_layout() is invoked as part of an alternative fixup under -stop_machine(). This function invokes get_random_long() which acquires a -sleeping lock on -RT which can not be acquired in this context. - -Rename compute_layout() to kvm_compute_layout() and invoke it before -stop_machine() applies the alternatives. Add a __init prefix to -kvm_compute_layout() because the caller has it, too (and so the code can be -discarded after boot). - -Signed-off-by: Sebastian Andrzej Siewior ---- - arch/arm64/include/asm/kvm_mmu.h | 1 + - arch/arm64/kernel/smp.c | 4 ++++ - arch/arm64/kvm/va_layout.c | 8 +------- - 3 files changed, 6 insertions(+), 7 deletions(-) - ---- a/arch/arm64/include/asm/kvm_mmu.h -+++ b/arch/arm64/include/asm/kvm_mmu.h -@@ -91,6 +91,7 @@ alternative_cb_end - - void kvm_update_va_mask(struct alt_instr *alt, - __le32 *origptr, __le32 *updptr, int nr_inst); -+void kvm_compute_layout(void); - - static inline unsigned long __kern_hyp_va(unsigned long v) - { ---- a/arch/arm64/kernel/smp.c -+++ b/arch/arm64/kernel/smp.c -@@ -31,6 +31,7 @@ - #include - #include - #include -+#include - - #include - #include -@@ -39,6 +40,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -408,6 +410,8 @@ static void __init hyp_mode_check(void) - "CPU: CPUs started in inconsistent modes"); - else - pr_info("CPU: All CPU(s) started at EL1\n"); -+ if (IS_ENABLED(CONFIG_KVM_ARM_HOST)) -+ kvm_compute_layout(); - } - - void __init smp_cpus_done(unsigned int max_cpus) ---- a/arch/arm64/kvm/va_layout.c -+++ b/arch/arm64/kvm/va_layout.c -@@ -22,7 +22,7 @@ static u8 tag_lsb; - static u64 tag_val; - static u64 va_mask; - --static void compute_layout(void) -+__init void kvm_compute_layout(void) - { - phys_addr_t idmap_addr = __pa_symbol(__hyp_idmap_text_start); - u64 hyp_va_msb; -@@ -110,9 +110,6 @@ void __init kvm_update_va_mask(struct al - - BUG_ON(nr_inst != 5); - -- if (!has_vhe() && !va_mask) -- compute_layout(); -- - for (i = 0; i < nr_inst; i++) { - u32 rd, rn, insn, oinsn; - -@@ -156,9 +153,6 @@ void kvm_patch_vector_branch(struct alt_ - return; - } - -- if (!va_mask) -- compute_layout(); -- - /* - * Compute HYP VA by using the same computation as kern_hyp_va() - */ diff --git a/kernel/patches-5.4.x-rt/0014-net-sched-dev_deactivate_many-use-msleep-1-instead-o.patch b/kernel/patches-5.4.x-rt/0014-net-sched-dev_deactivate_many-use-msleep-1-instead-o.patch deleted file mode 100644 index 93e5ea78d..000000000 --- a/kernel/patches-5.4.x-rt/0014-net-sched-dev_deactivate_many-use-msleep-1-instead-o.patch +++ /dev/null @@ -1,57 +0,0 @@ -From: Marc Kleine-Budde -Date: Wed, 5 Mar 2014 00:49:47 +0100 -Subject: net: sched: Use msleep() instead of yield() - -On PREEMPT_RT enabled systems the interrupt handler run as threads at prio 50 -(by default). If a high priority userspace process tries to shut down a busy -network interface it might spin in a yield loop waiting for the device to -become idle. With the interrupt thread having a lower priority than the -looping process it might never be scheduled and so result in a deadlock on UP -systems. - -With Magic SysRq the following backtrace can be produced: - -> test_app R running 0 174 168 0x00000000 -> [] (__schedule+0x220/0x3fc) from [] (preempt_schedule_irq+0x48/0x80) -> [] (preempt_schedule_irq+0x48/0x80) from [] (svc_preempt+0x8/0x20) -> [] (svc_preempt+0x8/0x20) from [] (local_bh_enable+0x18/0x88) -> [] (local_bh_enable+0x18/0x88) from [] (dev_deactivate_many+0x220/0x264) -> [] (dev_deactivate_many+0x220/0x264) from [] (__dev_close_many+0x64/0xd4) -> [] (__dev_close_many+0x64/0xd4) from [] (__dev_close+0x28/0x3c) -> [] (__dev_close+0x28/0x3c) from [] (__dev_change_flags+0x88/0x130) -> [] (__dev_change_flags+0x88/0x130) from [] (dev_change_flags+0x10/0x48) -> [] (dev_change_flags+0x10/0x48) from [] (do_setlink+0x370/0x7ec) -> [] (do_setlink+0x370/0x7ec) from [] (rtnl_newlink+0x2b4/0x450) -> [] (rtnl_newlink+0x2b4/0x450) from [] (rtnetlink_rcv_msg+0x158/0x1f4) -> [] (rtnetlink_rcv_msg+0x158/0x1f4) from [] (netlink_rcv_skb+0xac/0xc0) -> [] (netlink_rcv_skb+0xac/0xc0) from [] (rtnetlink_rcv+0x18/0x24) -> [] (rtnetlink_rcv+0x18/0x24) from [] (netlink_unicast+0x13c/0x198) -> [] (netlink_unicast+0x13c/0x198) from [] (netlink_sendmsg+0x264/0x2e0) -> [] (netlink_sendmsg+0x264/0x2e0) from [] (sock_sendmsg+0x78/0x98) -> [] (sock_sendmsg+0x78/0x98) from [] (___sys_sendmsg.part.25+0x268/0x278) -> [] (___sys_sendmsg.part.25+0x268/0x278) from [] (__sys_sendmsg+0x48/0x78) -> [] (__sys_sendmsg+0x48/0x78) from [] (ret_fast_syscall+0x0/0x2c) - -This patch works around the problem by replacing yield() by msleep(1), giving -the interrupt thread time to finish, similar to other changes contained in the -rt patch set. Using wait_for_completion() instead would probably be a better -solution. - - -Signed-off-by: Marc Kleine-Budde -Signed-off-by: Sebastian Andrzej Siewior ---- - net/sched/sch_generic.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - ---- a/net/sched/sch_generic.c -+++ b/net/sched/sch_generic.c -@@ -1215,7 +1215,7 @@ void dev_deactivate_many(struct list_hea - /* Wait for outstanding qdisc_run calls. */ - list_for_each_entry(dev, head, close_list) { - while (some_qdisc_is_busy(dev)) -- yield(); -+ msleep(1); - /* The new qdisc is assigned at this point so we can safely - * unwind stale skb lists and qdisc statistics - */ diff --git a/kernel/patches-5.4.x-rt/0015-mm-vmalloc-remove-preempt_disable-enable-when-doing-.patch b/kernel/patches-5.4.x-rt/0015-mm-vmalloc-remove-preempt_disable-enable-when-doing-.patch deleted file mode 100644 index 53d97653c..000000000 --- a/kernel/patches-5.4.x-rt/0015-mm-vmalloc-remove-preempt_disable-enable-when-doing-.patch +++ /dev/null @@ -1,105 +0,0 @@ -From: "Uladzislau Rezki (Sony)" -Date: Sat, 30 Nov 2019 17:54:33 -0800 -Subject: [PATCH] mm/vmalloc: remove preempt_disable/enable when doing - preloading - -Some background. The preemption was disabled before to guarantee that a -preloaded object is available for a CPU, it was stored for. That was -achieved by combining the disabling the preemption and taking the spin -lock while the ne_fit_preload_node is checked. - -The aim was to not allocate in atomic context when spinlock is taken -later, for regular vmap allocations. But that approach conflicts with -CONFIG_PREEMPT_RT philosophy. It means that calling spin_lock() with -disabled preemption is forbidden in the CONFIG_PREEMPT_RT kernel. - -Therefore, get rid of preempt_disable() and preempt_enable() when the -preload is done for splitting purpose. As a result we do not guarantee -now that a CPU is preloaded, instead we minimize the case when it is -not, with this change, by populating the per cpu preload pointer under -the vmap_area_lock. - -This implies that at least each caller that has done the preallocation -will not fallback to an atomic allocation later. It is possible that -the preallocation would be pointless or that no preallocation is done -because of the race but the data shows that this is really rare. - -For example i run the special test case that follows the preload pattern -and path. 20 "unbind" threads run it and each does 1000000 allocations. -Only 3.5 times among 1000000 a CPU was not preloaded. So it can happen -but the number is negligible. - -[mhocko@suse.com: changelog additions] -Link: http://lkml.kernel.org/r/20191016095438.12391-1-urezki@gmail.com -Fixes: 82dd23e84be3 ("mm/vmalloc.c: preload a CPU with one object for split purpose") -Signed-off-by: Uladzislau Rezki (Sony) -Reviewed-by: Steven Rostedt (VMware) -Acked-by: Sebastian Andrzej Siewior -Acked-by: Daniel Wagner -Acked-by: Michal Hocko -Cc: Hillf Danton -Cc: Matthew Wilcox -Cc: Oleksiy Avramchenko -Cc: Peter Zijlstra -Cc: Thomas Gleixner -Signed-off-by: Andrew Morton -Signed-off-by: Linus Torvalds -Signed-off-by: Sebastian Andrzej Siewior ---- - mm/vmalloc.c | 37 ++++++++++++++++++++----------------- - 1 file changed, 20 insertions(+), 17 deletions(-) - ---- a/mm/vmalloc.c -+++ b/mm/vmalloc.c -@@ -1077,31 +1077,34 @@ static struct vmap_area *alloc_vmap_area - - retry: - /* -- * Preload this CPU with one extra vmap_area object to ensure -- * that we have it available when fit type of free area is -- * NE_FIT_TYPE. -+ * Preload this CPU with one extra vmap_area object. It is used -+ * when fit type of free area is NE_FIT_TYPE. Please note, it -+ * does not guarantee that an allocation occurs on a CPU that -+ * is preloaded, instead we minimize the case when it is not. -+ * It can happen because of cpu migration, because there is a -+ * race until the below spinlock is taken. - * - * The preload is done in non-atomic context, thus it allows us - * to use more permissive allocation masks to be more stable under -- * low memory condition and high memory pressure. -+ * low memory condition and high memory pressure. In rare case, -+ * if not preloaded, GFP_NOWAIT is used. - * -- * Even if it fails we do not really care about that. Just proceed -- * as it is. "overflow" path will refill the cache we allocate from. -+ * Set "pva" to NULL here, because of "retry" path. - */ -- preempt_disable(); -- if (!__this_cpu_read(ne_fit_preload_node)) { -- preempt_enable(); -- pva = kmem_cache_alloc_node(vmap_area_cachep, GFP_KERNEL, node); -- preempt_disable(); -+ pva = NULL; - -- if (__this_cpu_cmpxchg(ne_fit_preload_node, NULL, pva)) { -- if (pva) -- kmem_cache_free(vmap_area_cachep, pva); -- } -- } -+ if (!this_cpu_read(ne_fit_preload_node)) -+ /* -+ * Even if it fails we do not really care about that. -+ * Just proceed as it is. If needed "overflow" path -+ * will refill the cache we allocate from. -+ */ -+ pva = kmem_cache_alloc_node(vmap_area_cachep, GFP_KERNEL, node); - - spin_lock(&vmap_area_lock); -- preempt_enable(); -+ -+ if (pva && __this_cpu_cmpxchg(ne_fit_preload_node, NULL, pva)) -+ kmem_cache_free(vmap_area_cachep, pva); - - /* - * If an allocation fails, the "vend" address is diff --git a/kernel/patches-5.4.x-rt/0016-KVM-arm-arm64-Let-the-timer-expire-in-hardirq-contex.patch b/kernel/patches-5.4.x-rt/0016-KVM-arm-arm64-Let-the-timer-expire-in-hardirq-contex.patch deleted file mode 100644 index bc84f1c3d..000000000 --- a/kernel/patches-5.4.x-rt/0016-KVM-arm-arm64-Let-the-timer-expire-in-hardirq-contex.patch +++ /dev/null @@ -1,46 +0,0 @@ -From: Thomas Gleixner -Date: Tue, 13 Aug 2019 14:29:41 +0200 -Subject: [PATCH] KVM: arm/arm64: Let the timer expire in hardirq context - on RT - -The timers are canceled from an preempt-notifier which is invoked with -disabled preemption which is not allowed on PREEMPT_RT. -The timer callback is short so in could be invoked in hard-IRQ context -on -RT. - -Let the timer expire on hard-IRQ context even on -RT. - -Signed-off-by: Thomas Gleixner -Acked-by: Marc Zyngier -Tested-by: Julien Grall -Signed-off-by: Sebastian Andrzej Siewior ---- - virt/kvm/arm/arch_timer.c | 8 ++++---- - 1 file changed, 4 insertions(+), 4 deletions(-) - ---- a/virt/kvm/arm/arch_timer.c -+++ b/virt/kvm/arm/arch_timer.c -@@ -80,7 +80,7 @@ static inline bool userspace_irqchip(str - static void soft_timer_start(struct hrtimer *hrt, u64 ns) - { - hrtimer_start(hrt, ktime_add_ns(ktime_get(), ns), -- HRTIMER_MODE_ABS); -+ HRTIMER_MODE_ABS_HARD); - } - - static void soft_timer_cancel(struct hrtimer *hrt) -@@ -697,11 +697,11 @@ void kvm_timer_vcpu_init(struct kvm_vcpu - update_vtimer_cntvoff(vcpu, kvm_phys_timer_read()); - ptimer->cntvoff = 0; - -- hrtimer_init(&timer->bg_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); -+ hrtimer_init(&timer->bg_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); - timer->bg_timer.function = kvm_bg_timer_expire; - -- hrtimer_init(&vtimer->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); -- hrtimer_init(&ptimer->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); -+ hrtimer_init(&vtimer->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); -+ hrtimer_init(&ptimer->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); - vtimer->hrtimer.function = kvm_hrtimer_expire; - ptimer->hrtimer.function = kvm_hrtimer_expire; - diff --git a/kernel/patches-5.4.x-rt/0017-time-sched_clock-Expire-timer-in-hardirq-context.patch b/kernel/patches-5.4.x-rt/0017-time-sched_clock-Expire-timer-in-hardirq-context.patch deleted file mode 100644 index 75d291a1b..000000000 --- a/kernel/patches-5.4.x-rt/0017-time-sched_clock-Expire-timer-in-hardirq-context.patch +++ /dev/null @@ -1,55 +0,0 @@ -From: "Ahmed S. Darwish" -Date: Mon, 9 Mar 2020 18:15:29 +0000 -Subject: [PATCH] time/sched_clock: Expire timer in hardirq context - -To minimize latency, PREEMPT_RT kernels expires hrtimers in preemptible -softirq context by default. This can be overriden by marking the timer's -expiry with HRTIMER_MODE_HARD. - -sched_clock_timer is missing this annotation: if its callback is preempted -and the duration of the preemption exceeds the wrap around time of the -underlying clocksource, sched clock will get out of sync. - -Mark the sched_clock_timer for expiry in hard interrupt context. - -Signed-off-by: Ahmed S. Darwish -Signed-off-by: Thomas Gleixner -Link: https://lkml.kernel.org/r/20200309181529.26558-1-a.darwish@linutronix.de -Signed-off-by: Sebastian Andrzej Siewior ---- - kernel/time/sched_clock.c | 9 +++++---- - 1 file changed, 5 insertions(+), 4 deletions(-) - ---- a/kernel/time/sched_clock.c -+++ b/kernel/time/sched_clock.c -@@ -207,7 +207,8 @@ sched_clock_register(u64 (*read)(void), - - if (sched_clock_timer.function != NULL) { - /* update timeout for clock wrap */ -- hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL); -+ hrtimer_start(&sched_clock_timer, cd.wrap_kt, -+ HRTIMER_MODE_REL_HARD); - } - - r = rate; -@@ -251,9 +252,9 @@ void __init generic_sched_clock_init(voi - * Start the timer to keep sched_clock() properly updated and - * sets the initial epoch. - */ -- hrtimer_init(&sched_clock_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); -+ hrtimer_init(&sched_clock_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD); - sched_clock_timer.function = sched_clock_poll; -- hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL); -+ hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL_HARD); - } - - /* -@@ -290,7 +291,7 @@ void sched_clock_resume(void) - struct clock_read_data *rd = &cd.read_data[0]; - - rd->epoch_cyc = cd.actual_read_sched_clock(); -- hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL); -+ hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL_HARD); - rd->read_sched_clock = cd.actual_read_sched_clock; - } - diff --git a/kernel/patches-5.4.x-rt/0018-0001-printk-rb-add-printk-ring-buffer-documentation.patch b/kernel/patches-5.4.x-rt/0018-0001-printk-rb-add-printk-ring-buffer-documentation.patch deleted file mode 100644 index c50c2e4ef..000000000 --- a/kernel/patches-5.4.x-rt/0018-0001-printk-rb-add-printk-ring-buffer-documentation.patch +++ /dev/null @@ -1,393 +0,0 @@ -From: John Ogness -Date: Tue, 12 Feb 2019 15:29:39 +0100 -Subject: [PATCH 01/25] printk-rb: add printk ring buffer documentation - -The full documentation file for the printk ring buffer. - -Signed-off-by: John Ogness -Signed-off-by: Sebastian Andrzej Siewior ---- - Documentation/printk-ringbuffer.txt | 377 ++++++++++++++++++++++++++++++++++++ - 1 file changed, 377 insertions(+) - create mode 100644 Documentation/printk-ringbuffer.txt - ---- /dev/null -+++ b/Documentation/printk-ringbuffer.txt -@@ -0,0 +1,377 @@ -+struct printk_ringbuffer -+------------------------ -+John Ogness -+ -+Overview -+~~~~~~~~ -+As the name suggests, this ring buffer was implemented specifically to serve -+the needs of the printk() infrastructure. The ring buffer itself is not -+specific to printk and could be used for other purposes. _However_, the -+requirements and semantics of printk are rather unique. If you intend to use -+this ring buffer for anything other than printk, you need to be very clear on -+its features, behavior, and pitfalls. -+ -+Features -+^^^^^^^^ -+The printk ring buffer has the following features: -+ -+- single global buffer -+- resides in initialized data section (available at early boot) -+- lockless readers -+- supports multiple writers -+- supports multiple non-consuming readers -+- safe from any context (including NMI) -+- groups bytes into variable length blocks (referenced by entries) -+- entries tagged with sequence numbers -+ -+Behavior -+^^^^^^^^ -+Since the printk ring buffer readers are lockless, there exists no -+synchronization between readers and writers. Basically writers are the tasks -+in control and may overwrite any and all committed data at any time and from -+any context. For this reason readers can miss entries if they are overwritten -+before the reader was able to access the data. The reader API implementation -+is such that reader access to entries is atomic, so there is no risk of -+readers having to deal with partial or corrupt data. Also, entries are -+tagged with sequence numbers so readers can recognize if entries were missed. -+ -+Writing to the ring buffer consists of 2 steps. First a writer must reserve -+an entry of desired size. After this step the writer has exclusive access -+to the memory region. Once the data has been written to memory, it needs to -+be committed to the ring buffer. After this step the entry has been inserted -+into the ring buffer and assigned an appropriate sequence number. -+ -+Once committed, a writer must no longer access the data directly. This is -+because the data may have been overwritten and no longer exists. If a -+writer must access the data, it should either keep a private copy before -+committing the entry or use the reader API to gain access to the data. -+ -+Because of how the data backend is implemented, entries that have been -+reserved but not yet committed act as barriers, preventing future writers -+from filling the ring buffer beyond the location of the reserved but not -+yet committed entry region. For this reason it is *important* that writers -+perform both reserve and commit as quickly as possible. Also, be aware that -+preemption and local interrupts are disabled and writing to the ring buffer -+is processor-reentrant locked during the reserve/commit window. Writers in -+NMI contexts can still preempt any other writers, but as long as these -+writers do not write a large amount of data with respect to the ring buffer -+size, this should not become an issue. -+ -+API -+~~~ -+ -+Declaration -+^^^^^^^^^^^ -+The printk ring buffer can be instantiated as a static structure: -+ -+ /* declare a static struct printk_ringbuffer */ -+ #define DECLARE_STATIC_PRINTKRB(name, szbits, cpulockptr) -+ -+The value of szbits specifies the size of the ring buffer in bits. The -+cpulockptr field is a pointer to a prb_cpulock struct that is used to -+perform processor-reentrant spin locking for the writers. It is specified -+externally because it may be used for multiple ring buffers (or other -+code) to synchronize writers without risk of deadlock. -+ -+Here is an example of a declaration of a printk ring buffer specifying a -+32KB (2^15) ring buffer: -+ -+.... -+DECLARE_STATIC_PRINTKRB_CPULOCK(rb_cpulock); -+DECLARE_STATIC_PRINTKRB(rb, 15, &rb_cpulock); -+.... -+ -+If writers will be using multiple ring buffers and the ordering of that usage -+is not clear, the same prb_cpulock should be used for both ring buffers. -+ -+Writer API -+^^^^^^^^^^ -+The writer API consists of 2 functions. The first is to reserve an entry in -+the ring buffer, the second is to commit that data to the ring buffer. The -+reserved entry information is stored within a provided `struct prb_handle`. -+ -+ /* reserve an entry */ -+ char *prb_reserve(struct prb_handle *h, struct printk_ringbuffer *rb, -+ unsigned int size); -+ -+ /* commit a reserved entry to the ring buffer */ -+ void prb_commit(struct prb_handle *h); -+ -+Here is an example of a function to write data to a ring buffer: -+ -+.... -+int write_data(struct printk_ringbuffer *rb, char *data, int size) -+{ -+ struct prb_handle h; -+ char *buf; -+ -+ buf = prb_reserve(&h, rb, size); -+ if (!buf) -+ return -1; -+ memcpy(buf, data, size); -+ prb_commit(&h); -+ -+ return 0; -+} -+.... -+ -+Pitfalls -+++++++++ -+Be aware that prb_reserve() can fail. A retry might be successful, but it -+depends entirely on whether or not the next part of the ring buffer to -+overwrite belongs to reserved but not yet committed entries of other writers. -+Writers can use the prb_inc_lost() function to allow readers to notice that a -+message was lost. -+ -+Reader API -+^^^^^^^^^^ -+The reader API utilizes a `struct prb_iterator` to track the reader's -+position in the ring buffer. -+ -+ /* declare a pre-initialized static iterator for a ring buffer */ -+ #define DECLARE_STATIC_PRINTKRB_ITER(name, rbaddr) -+ -+ /* initialize iterator for a ring buffer (if static macro NOT used) */ -+ void prb_iter_init(struct prb_iterator *iter, -+ struct printk_ringbuffer *rb, u64 *seq); -+ -+ /* make a deep copy of an iterator */ -+ void prb_iter_copy(struct prb_iterator *dest, -+ struct prb_iterator *src); -+ -+ /* non-blocking, advance to next entry (and read the data) */ -+ int prb_iter_next(struct prb_iterator *iter, char *buf, -+ int size, u64 *seq); -+ -+ /* blocking, advance to next entry (and read the data) */ -+ int prb_iter_wait_next(struct prb_iterator *iter, char *buf, -+ int size, u64 *seq); -+ -+ /* position iterator at the entry seq */ -+ int prb_iter_seek(struct prb_iterator *iter, u64 seq); -+ -+ /* read data at current position */ -+ int prb_iter_data(struct prb_iterator *iter, char *buf, -+ int size, u64 *seq); -+ -+Typically prb_iter_data() is not needed because the data can be retrieved -+directly with prb_iter_next(). -+ -+Here is an example of a non-blocking function that will read all the data in -+a ring buffer: -+ -+.... -+void read_all_data(struct printk_ringbuffer *rb, char *buf, int size) -+{ -+ struct prb_iterator iter; -+ u64 prev_seq = 0; -+ u64 seq; -+ int ret; -+ -+ prb_iter_init(&iter, rb, NULL); -+ -+ for (;;) { -+ ret = prb_iter_next(&iter, buf, size, &seq); -+ if (ret > 0) { -+ if (seq != ++prev_seq) { -+ /* "seq - prev_seq" entries missed */ -+ prev_seq = seq; -+ } -+ /* process buf here */ -+ } else if (ret == 0) { -+ /* hit the end, done */ -+ break; -+ } else if (ret < 0) { -+ /* -+ * iterator is invalid, a writer overtook us, reset the -+ * iterator and keep going, entries were missed -+ */ -+ prb_iter_init(&iter, rb, NULL); -+ } -+ } -+} -+.... -+ -+Pitfalls -+++++++++ -+The reader's iterator can become invalid at any time because the reader was -+overtaken by a writer. Typically the reader should reset the iterator back -+to the current oldest entry (which will be newer than the entry the reader -+was at) and continue, noting the number of entries that were missed. -+ -+Utility API -+^^^^^^^^^^^ -+Several functions are available as convenience for external code. -+ -+ /* query the size of the data buffer */ -+ int prb_buffer_size(struct printk_ringbuffer *rb); -+ -+ /* skip a seq number to signify a lost record */ -+ void prb_inc_lost(struct printk_ringbuffer *rb); -+ -+ /* processor-reentrant spin lock */ -+ void prb_lock(struct prb_cpulock *cpu_lock, unsigned int *cpu_store); -+ -+ /* processor-reentrant spin unlock */ -+ void prb_lock(struct prb_cpulock *cpu_lock, unsigned int *cpu_store); -+ -+Pitfalls -+++++++++ -+Although the value returned by prb_buffer_size() does represent an absolute -+upper bound, the amount of data that can be stored within the ring buffer -+is actually less because of the additional storage space of a header for each -+entry. -+ -+The prb_lock() and prb_unlock() functions can be used to synchronize between -+ring buffer writers and other external activities. The function of a -+processor-reentrant spin lock is to disable preemption and local interrupts -+and synchronize against other processors. It does *not* protect against -+multiple contexts of a single processor, i.e NMI. -+ -+Implementation -+~~~~~~~~~~~~~~ -+This section describes several of the implementation concepts and details to -+help developers better understand the code. -+ -+Entries -+^^^^^^^ -+All ring buffer data is stored within a single static byte array. The reason -+for this is to ensure that any pointers to the data (past and present) will -+always point to valid memory. This is important because the lockless readers -+may be preempted for long periods of time and when they resume may be working -+with expired pointers. -+ -+Entries are identified by start index and size. (The start index plus size -+is the start index of the next entry.) The start index is not simply an -+offset into the byte array, but rather a logical position (lpos) that maps -+directly to byte array offsets. -+ -+For example, for a byte array of 1000, an entry may have have a start index -+of 100. Another entry may have a start index of 1100. And yet another 2100. -+All of these entry are pointing to the same memory region, but only the most -+recent entry is valid. The other entries are pointing to valid memory, but -+represent entries that have been overwritten. -+ -+Note that due to overflowing, the most recent entry is not necessarily the one -+with the highest lpos value. Indeed, the printk ring buffer initializes its -+data such that an overflow happens relatively quickly in order to validate the -+handling of this situation. The implementation assumes that an lpos (unsigned -+long) will never completely wrap while a reader is preempted. If this were to -+become an issue, the seq number (which never wraps) could be used to increase -+the robustness of handling this situation. -+ -+Buffer Wrapping -+^^^^^^^^^^^^^^^ -+If an entry starts near the end of the byte array but would extend beyond it, -+a special terminating entry (size = -1) is inserted into the byte array and -+the real entry is placed at the beginning of the byte array. This can waste -+space at the end of the byte array, but simplifies the implementation by -+allowing writers to always work with contiguous buffers. -+ -+Note that the size field is the first 4 bytes of the entry header. Also note -+that calc_next() always ensures that there are at least 4 bytes left at the -+end of the byte array to allow room for a terminating entry. -+ -+Ring Buffer Pointers -+^^^^^^^^^^^^^^^^^^^^ -+Three pointers (lpos values) are used to manage the ring buffer: -+ -+ - _tail_: points to the oldest entry -+ - _head_: points to where the next new committed entry will be -+ - _reserve_: points to where the next new reserved entry will be -+ -+These pointers always maintain a logical ordering: -+ -+ tail <= head <= reserve -+ -+The reserve pointer moves forward when a writer reserves a new entry. The -+head pointer moves forward when a writer commits a new entry. -+ -+The reserve pointer cannot overwrite the tail pointer in a wrap situation. In -+such a situation, the tail pointer must be "pushed forward", thus -+invalidating that oldest entry. Readers identify if they are accessing a -+valid entry by ensuring their entry pointer is `>= tail && < head`. -+ -+If the tail pointer is equal to the head pointer, it cannot be pushed and any -+reserve operation will fail. The only resolution is for writers to commit -+their reserved entries. -+ -+Processor-Reentrant Locking -+^^^^^^^^^^^^^^^^^^^^^^^^^^^ -+The purpose of the processor-reentrant locking is to limit the interruption -+scenarios of writers to 2 contexts. This allows for a simplified -+implementation where: -+ -+- The reserve/commit window only exists on 1 processor at a time. A reserve -+ can never fail due to uncommitted entries of other processors. -+ -+- When committing entries, it is trivial to handle the situation when -+ subsequent entries have already been committed, i.e. managing the head -+ pointer. -+ -+Performance -+~~~~~~~~~~~ -+Some basic tests were performed on a quad Intel(R) Xeon(R) CPU E5-2697 v4 at -+2.30GHz (36 cores / 72 threads). All tests involved writing a total of -+32,000,000 records at an average of 33 bytes each. Each writer was pinned to -+its own CPU and would write as fast as it could until a total of 32,000,000 -+records were written. All tests involved 2 readers that were both pinned -+together to another CPU. Each reader would read as fast as it could and track -+how many of the 32,000,000 records it could read. All tests used a ring buffer -+of 16KB in size, which holds around 350 records (header + data for each -+entry). -+ -+The only difference between the tests is the number of writers (and thus also -+the number of records per writer). As more writers are added, the time to -+write a record increases. This is because data pointers, modified via cmpxchg, -+and global data access in general become more contended. -+ -+1 writer -+^^^^^^^^ -+ runtime: 0m 18s -+ reader1: 16219900/32000000 (50%) records -+ reader2: 16141582/32000000 (50%) records -+ -+2 writers -+^^^^^^^^^ -+ runtime: 0m 32s -+ reader1: 16327957/32000000 (51%) records -+ reader2: 16313988/32000000 (50%) records -+ -+4 writers -+^^^^^^^^^ -+ runtime: 0m 42s -+ reader1: 16421642/32000000 (51%) records -+ reader2: 16417224/32000000 (51%) records -+ -+8 writers -+^^^^^^^^^ -+ runtime: 0m 43s -+ reader1: 16418300/32000000 (51%) records -+ reader2: 16432222/32000000 (51%) records -+ -+16 writers -+^^^^^^^^^^ -+ runtime: 0m 54s -+ reader1: 16539189/32000000 (51%) records -+ reader2: 16542711/32000000 (51%) records -+ -+32 writers -+^^^^^^^^^^ -+ runtime: 1m 13s -+ reader1: 16731808/32000000 (52%) records -+ reader2: 16735119/32000000 (52%) records -+ -+Comments -+^^^^^^^^ -+It is particularly interesting to compare/contrast the 1-writer and 32-writer -+tests. Despite the writing of the 32,000,000 records taking over 4 times -+longer, the readers (which perform no cmpxchg) were still unable to keep up. -+This shows that the memory contention between the increasing number of CPUs -+also has a dramatic effect on readers. -+ -+It should also be noted that in all cases each reader was able to read >=50% -+of the records. This means that a single reader would have been able to keep -+up with the writer(s) in all cases, becoming slightly easier as more writers -+are added. This was the purpose of pinning 2 readers to 1 CPU: to observe how -+maximum reader performance changes. diff --git a/kernel/patches-5.4.x-rt/0019-0002-printk-rb-add-prb-locking-functions.patch b/kernel/patches-5.4.x-rt/0019-0002-printk-rb-add-prb-locking-functions.patch deleted file mode 100644 index 29e2b17ff..000000000 --- a/kernel/patches-5.4.x-rt/0019-0002-printk-rb-add-prb-locking-functions.patch +++ /dev/null @@ -1,158 +0,0 @@ -From: John Ogness -Date: Tue, 12 Feb 2019 15:29:40 +0100 -Subject: [PATCH 02/25] printk-rb: add prb locking functions - -Add processor-reentrant spin locking functions. These allow -restricting the number of possible contexts to 2, which can simplify -implementing code that also supports NMI interruptions. - - prb_lock(); - - /* - * This code is synchronized with all contexts - * except an NMI on the same processor. - */ - - prb_unlock(); - -In order to support printk's emergency messages, a -processor-reentrant spin lock will be used to control raw access to -the emergency console. However, it must be the same -processor-reentrant spin lock as the one used by the ring buffer, -otherwise a deadlock can occur: - - CPU1: printk lock -> emergency -> serial lock - CPU2: serial lock -> printk lock - -By making the processor-reentrant implemtation available externally, -printk can use the same atomic_t for the ring buffer as for the -emergency console and thus avoid the above deadlock. - -Signed-off-by: John Ogness -Signed-off-by: Sebastian Andrzej Siewior ---- - include/linux/printk_ringbuffer.h | 24 +++++++++++ - lib/Makefile | 2 - lib/printk_ringbuffer.c | 77 ++++++++++++++++++++++++++++++++++++++ - 3 files changed, 102 insertions(+), 1 deletion(-) - create mode 100644 include/linux/printk_ringbuffer.h - create mode 100644 lib/printk_ringbuffer.c - ---- /dev/null -+++ b/include/linux/printk_ringbuffer.h -@@ -0,0 +1,24 @@ -+/* SPDX-License-Identifier: GPL-2.0 */ -+#ifndef _LINUX_PRINTK_RINGBUFFER_H -+#define _LINUX_PRINTK_RINGBUFFER_H -+ -+#include -+#include -+ -+struct prb_cpulock { -+ atomic_t owner; -+ unsigned long __percpu *irqflags; -+}; -+ -+#define DECLARE_STATIC_PRINTKRB_CPULOCK(name) \ -+static DEFINE_PER_CPU(unsigned long, _##name##_percpu_irqflags); \ -+static struct prb_cpulock name = { \ -+ .owner = ATOMIC_INIT(-1), \ -+ .irqflags = &_##name##_percpu_irqflags, \ -+} -+ -+/* utility functions */ -+void prb_lock(struct prb_cpulock *cpu_lock, unsigned int *cpu_store); -+void prb_unlock(struct prb_cpulock *cpu_lock, unsigned int cpu_store); -+ -+#endif /*_LINUX_PRINTK_RINGBUFFER_H */ ---- a/lib/Makefile -+++ b/lib/Makefile -@@ -26,7 +26,7 @@ endif - - lib-y := ctype.o string.o vsprintf.o cmdline.o \ - rbtree.o radix-tree.o timerqueue.o xarray.o \ -- idr.o extable.o \ -+ idr.o extable.o printk_ringbuffer.o \ - sha1.o chacha.o irq_regs.o argv_split.o \ - flex_proportions.o ratelimit.o show_mem.o \ - is_single_threaded.o plist.o decompress.o kobject_uevent.o \ ---- /dev/null -+++ b/lib/printk_ringbuffer.c -@@ -0,0 +1,77 @@ -+// SPDX-License-Identifier: GPL-2.0 -+#include -+#include -+ -+static bool __prb_trylock(struct prb_cpulock *cpu_lock, -+ unsigned int *cpu_store) -+{ -+ unsigned long *flags; -+ unsigned int cpu; -+ -+ cpu = get_cpu(); -+ -+ *cpu_store = atomic_read(&cpu_lock->owner); -+ /* memory barrier to ensure the current lock owner is visible */ -+ smp_rmb(); -+ if (*cpu_store == -1) { -+ flags = per_cpu_ptr(cpu_lock->irqflags, cpu); -+ local_irq_save(*flags); -+ if (atomic_try_cmpxchg_acquire(&cpu_lock->owner, -+ cpu_store, cpu)) { -+ return true; -+ } -+ local_irq_restore(*flags); -+ } else if (*cpu_store == cpu) { -+ return true; -+ } -+ -+ put_cpu(); -+ return false; -+} -+ -+/* -+ * prb_lock: Perform a processor-reentrant spin lock. -+ * @cpu_lock: A pointer to the lock object. -+ * @cpu_store: A "flags" pointer to store lock status information. -+ * -+ * If no processor has the lock, the calling processor takes the lock and -+ * becomes the owner. If the calling processor is already the owner of the -+ * lock, this function succeeds immediately. If lock is locked by another -+ * processor, this function spins until the calling processor becomes the -+ * owner. -+ * -+ * It is safe to call this function from any context and state. -+ */ -+void prb_lock(struct prb_cpulock *cpu_lock, unsigned int *cpu_store) -+{ -+ for (;;) { -+ if (__prb_trylock(cpu_lock, cpu_store)) -+ break; -+ cpu_relax(); -+ } -+} -+ -+/* -+ * prb_unlock: Perform a processor-reentrant spin unlock. -+ * @cpu_lock: A pointer to the lock object. -+ * @cpu_store: A "flags" object storing lock status information. -+ * -+ * Release the lock. The calling processor must be the owner of the lock. -+ * -+ * It is safe to call this function from any context and state. -+ */ -+void prb_unlock(struct prb_cpulock *cpu_lock, unsigned int cpu_store) -+{ -+ unsigned long *flags; -+ unsigned int cpu; -+ -+ cpu = atomic_read(&cpu_lock->owner); -+ atomic_set_release(&cpu_lock->owner, cpu_store); -+ -+ if (cpu_store == -1) { -+ flags = per_cpu_ptr(cpu_lock->irqflags, cpu); -+ local_irq_restore(*flags); -+ } -+ -+ put_cpu(); -+} diff --git a/kernel/patches-5.4.x-rt/0020-0003-printk-rb-define-ring-buffer-struct-and-initializer.patch b/kernel/patches-5.4.x-rt/0020-0003-printk-rb-define-ring-buffer-struct-and-initializer.patch deleted file mode 100644 index 9080713b1..000000000 --- a/kernel/patches-5.4.x-rt/0020-0003-printk-rb-define-ring-buffer-struct-and-initializer.patch +++ /dev/null @@ -1,57 +0,0 @@ -From: John Ogness -Date: Tue, 12 Feb 2019 15:29:41 +0100 -Subject: [PATCH 03/25] printk-rb: define ring buffer struct and initializer - -See Documentation/printk-ringbuffer.txt for details about the -initializer arguments. - -Signed-off-by: John Ogness -Signed-off-by: Sebastian Andrzej Siewior ---- - include/linux/printk_ringbuffer.h | 28 ++++++++++++++++++++++++++++ - 1 file changed, 28 insertions(+) - ---- a/include/linux/printk_ringbuffer.h -+++ b/include/linux/printk_ringbuffer.h -@@ -10,6 +10,20 @@ struct prb_cpulock { - unsigned long __percpu *irqflags; - }; - -+struct printk_ringbuffer { -+ void *buffer; -+ unsigned int size_bits; -+ -+ u64 seq; -+ -+ atomic_long_t tail; -+ atomic_long_t head; -+ atomic_long_t reserve; -+ -+ struct prb_cpulock *cpulock; -+ atomic_t ctx; -+}; -+ - #define DECLARE_STATIC_PRINTKRB_CPULOCK(name) \ - static DEFINE_PER_CPU(unsigned long, _##name##_percpu_irqflags); \ - static struct prb_cpulock name = { \ -@@ -17,6 +31,20 @@ static struct prb_cpulock name = { \ - .irqflags = &_##name##_percpu_irqflags, \ - } - -+#define DECLARE_STATIC_PRINTKRB(name, szbits, cpulockptr) \ -+static char _##name##_buffer[1 << (szbits)] \ -+ __aligned(__alignof__(long)); \ -+static struct printk_ringbuffer name = { \ -+ .buffer = &_##name##_buffer[0], \ -+ .size_bits = szbits, \ -+ .seq = 0, \ -+ .tail = ATOMIC_LONG_INIT(-111 * sizeof(long)), \ -+ .head = ATOMIC_LONG_INIT(-111 * sizeof(long)), \ -+ .reserve = ATOMIC_LONG_INIT(-111 * sizeof(long)), \ -+ .cpulock = cpulockptr, \ -+ .ctx = ATOMIC_INIT(0), \ -+} -+ - /* utility functions */ - void prb_lock(struct prb_cpulock *cpu_lock, unsigned int *cpu_store); - void prb_unlock(struct prb_cpulock *cpu_lock, unsigned int cpu_store); diff --git a/kernel/patches-5.4.x-rt/0021-0004-printk-rb-add-writer-interface.patch b/kernel/patches-5.4.x-rt/0021-0004-printk-rb-add-writer-interface.patch deleted file mode 100644 index e5f29a10e..000000000 --- a/kernel/patches-5.4.x-rt/0021-0004-printk-rb-add-writer-interface.patch +++ /dev/null @@ -1,233 +0,0 @@ -From: John Ogness -Date: Tue, 12 Feb 2019 15:29:42 +0100 -Subject: [PATCH 04/25] printk-rb: add writer interface - -Add the writer functions prb_reserve() and prb_commit(). These make -use of processor-reentrant spin locks to limit the number of possible -interruption scenarios for the writers. - -Signed-off-by: John Ogness -Signed-off-by: Sebastian Andrzej Siewior ---- - include/linux/printk_ringbuffer.h | 17 +++ - lib/printk_ringbuffer.c | 172 ++++++++++++++++++++++++++++++++++++++ - 2 files changed, 189 insertions(+) - ---- a/include/linux/printk_ringbuffer.h -+++ b/include/linux/printk_ringbuffer.h -@@ -24,6 +24,18 @@ struct printk_ringbuffer { - atomic_t ctx; - }; - -+struct prb_entry { -+ unsigned int size; -+ u64 seq; -+ char data[0]; -+}; -+ -+struct prb_handle { -+ struct printk_ringbuffer *rb; -+ unsigned int cpu; -+ struct prb_entry *entry; -+}; -+ - #define DECLARE_STATIC_PRINTKRB_CPULOCK(name) \ - static DEFINE_PER_CPU(unsigned long, _##name##_percpu_irqflags); \ - static struct prb_cpulock name = { \ -@@ -45,6 +57,11 @@ static struct printk_ringbuffer name = { - .ctx = ATOMIC_INIT(0), \ - } - -+/* writer interface */ -+char *prb_reserve(struct prb_handle *h, struct printk_ringbuffer *rb, -+ unsigned int size); -+void prb_commit(struct prb_handle *h); -+ - /* utility functions */ - void prb_lock(struct prb_cpulock *cpu_lock, unsigned int *cpu_store); - void prb_unlock(struct prb_cpulock *cpu_lock, unsigned int cpu_store); ---- a/lib/printk_ringbuffer.c -+++ b/lib/printk_ringbuffer.c -@@ -2,6 +2,14 @@ - #include - #include - -+#define PRB_SIZE(rb) (1 << rb->size_bits) -+#define PRB_SIZE_BITMASK(rb) (PRB_SIZE(rb) - 1) -+#define PRB_INDEX(rb, lpos) (lpos & PRB_SIZE_BITMASK(rb)) -+#define PRB_WRAPS(rb, lpos) (lpos >> rb->size_bits) -+#define PRB_WRAP_LPOS(rb, lpos, xtra) \ -+ ((PRB_WRAPS(rb, lpos) + xtra) << rb->size_bits) -+#define PRB_DATA_ALIGN sizeof(long) -+ - static bool __prb_trylock(struct prb_cpulock *cpu_lock, - unsigned int *cpu_store) - { -@@ -75,3 +83,167 @@ void prb_unlock(struct prb_cpulock *cpu_ - - put_cpu(); - } -+ -+static struct prb_entry *to_entry(struct printk_ringbuffer *rb, -+ unsigned long lpos) -+{ -+ char *buffer = rb->buffer; -+ buffer += PRB_INDEX(rb, lpos); -+ return (struct prb_entry *)buffer; -+} -+ -+static int calc_next(struct printk_ringbuffer *rb, unsigned long tail, -+ unsigned long lpos, int size, unsigned long *calced_next) -+{ -+ unsigned long next_lpos; -+ int ret = 0; -+again: -+ next_lpos = lpos + size; -+ if (next_lpos - tail > PRB_SIZE(rb)) -+ return -1; -+ -+ if (PRB_WRAPS(rb, lpos) != PRB_WRAPS(rb, next_lpos)) { -+ lpos = PRB_WRAP_LPOS(rb, next_lpos, 0); -+ ret |= 1; -+ goto again; -+ } -+ -+ *calced_next = next_lpos; -+ return ret; -+} -+ -+static bool push_tail(struct printk_ringbuffer *rb, unsigned long tail) -+{ -+ unsigned long new_tail; -+ struct prb_entry *e; -+ unsigned long head; -+ -+ if (tail != atomic_long_read(&rb->tail)) -+ return true; -+ -+ e = to_entry(rb, tail); -+ if (e->size != -1) -+ new_tail = tail + e->size; -+ else -+ new_tail = PRB_WRAP_LPOS(rb, tail, 1); -+ -+ /* make sure the new tail does not overtake the head */ -+ head = atomic_long_read(&rb->head); -+ if (head - new_tail > PRB_SIZE(rb)) -+ return false; -+ -+ atomic_long_cmpxchg(&rb->tail, tail, new_tail); -+ return true; -+} -+ -+/* -+ * prb_commit: Commit a reserved entry to the ring buffer. -+ * @h: An entry handle referencing the data entry to commit. -+ * -+ * Commit data that has been reserved using prb_reserve(). Once the data -+ * block has been committed, it can be invalidated at any time. If a writer -+ * is interested in using the data after committing, the writer should make -+ * its own copy first or use the prb_iter_ reader functions to access the -+ * data in the ring buffer. -+ * -+ * It is safe to call this function from any context and state. -+ */ -+void prb_commit(struct prb_handle *h) -+{ -+ struct printk_ringbuffer *rb = h->rb; -+ struct prb_entry *e; -+ unsigned long head; -+ unsigned long res; -+ -+ for (;;) { -+ if (atomic_read(&rb->ctx) != 1) { -+ /* the interrupted context will fixup head */ -+ atomic_dec(&rb->ctx); -+ break; -+ } -+ /* assign sequence numbers before moving head */ -+ head = atomic_long_read(&rb->head); -+ res = atomic_long_read(&rb->reserve); -+ while (head != res) { -+ e = to_entry(rb, head); -+ if (e->size == -1) { -+ head = PRB_WRAP_LPOS(rb, head, 1); -+ continue; -+ } -+ e->seq = ++rb->seq; -+ head += e->size; -+ } -+ atomic_long_set_release(&rb->head, res); -+ atomic_dec(&rb->ctx); -+ -+ if (atomic_long_read(&rb->reserve) == res) -+ break; -+ atomic_inc(&rb->ctx); -+ } -+ -+ prb_unlock(rb->cpulock, h->cpu); -+} -+ -+/* -+ * prb_reserve: Reserve an entry within a ring buffer. -+ * @h: An entry handle to be setup and reference an entry. -+ * @rb: A ring buffer to reserve data within. -+ * @size: The number of bytes to reserve. -+ * -+ * Reserve an entry of at least @size bytes to be used by the caller. If -+ * successful, the data region of the entry belongs to the caller and cannot -+ * be invalidated by any other task/context. For this reason, the caller -+ * should call prb_commit() as quickly as possible in order to avoid preventing -+ * other tasks/contexts from reserving data in the case that the ring buffer -+ * has wrapped. -+ * -+ * It is safe to call this function from any context and state. -+ * -+ * Returns a pointer to the reserved entry (and @h is setup to reference that -+ * entry) or NULL if it was not possible to reserve data. -+ */ -+char *prb_reserve(struct prb_handle *h, struct printk_ringbuffer *rb, -+ unsigned int size) -+{ -+ unsigned long tail, res1, res2; -+ int ret; -+ -+ if (size == 0) -+ return NULL; -+ size += sizeof(struct prb_entry); -+ size += PRB_DATA_ALIGN - 1; -+ size &= ~(PRB_DATA_ALIGN - 1); -+ if (size >= PRB_SIZE(rb)) -+ return NULL; -+ -+ h->rb = rb; -+ prb_lock(rb->cpulock, &h->cpu); -+ -+ atomic_inc(&rb->ctx); -+ -+ do { -+ for (;;) { -+ tail = atomic_long_read(&rb->tail); -+ res1 = atomic_long_read(&rb->reserve); -+ ret = calc_next(rb, tail, res1, size, &res2); -+ if (ret >= 0) -+ break; -+ if (!push_tail(rb, tail)) { -+ prb_commit(h); -+ return NULL; -+ } -+ } -+ } while (!atomic_long_try_cmpxchg_acquire(&rb->reserve, &res1, res2)); -+ -+ h->entry = to_entry(rb, res1); -+ -+ if (ret) { -+ /* handle wrap */ -+ h->entry->size = -1; -+ h->entry = to_entry(rb, PRB_WRAP_LPOS(rb, res2, 0)); -+ } -+ -+ h->entry->size = size; -+ -+ return &h->entry->data[0]; -+} diff --git a/kernel/patches-5.4.x-rt/0022-0005-printk-rb-add-basic-non-blocking-reading-interface.patch b/kernel/patches-5.4.x-rt/0022-0005-printk-rb-add-basic-non-blocking-reading-interface.patch deleted file mode 100644 index e583c1932..000000000 --- a/kernel/patches-5.4.x-rt/0022-0005-printk-rb-add-basic-non-blocking-reading-interface.patch +++ /dev/null @@ -1,259 +0,0 @@ -From: John Ogness -Date: Tue, 12 Feb 2019 15:29:43 +0100 -Subject: [PATCH 05/25] printk-rb: add basic non-blocking reading interface - -Add reader iterator static declaration/initializer, dynamic -initializer, and functions to iterate and retrieve ring buffer data. - -Signed-off-by: John Ogness -Signed-off-by: Sebastian Andrzej Siewior ---- - include/linux/printk_ringbuffer.h | 20 ++++ - lib/printk_ringbuffer.c | 190 ++++++++++++++++++++++++++++++++++++++ - 2 files changed, 210 insertions(+) - ---- a/include/linux/printk_ringbuffer.h -+++ b/include/linux/printk_ringbuffer.h -@@ -43,6 +43,19 @@ static struct prb_cpulock name = { \ - .irqflags = &_##name##_percpu_irqflags, \ - } - -+#define PRB_INIT ((unsigned long)-1) -+ -+#define DECLARE_STATIC_PRINTKRB_ITER(name, rbaddr) \ -+static struct prb_iterator name = { \ -+ .rb = rbaddr, \ -+ .lpos = PRB_INIT, \ -+} -+ -+struct prb_iterator { -+ struct printk_ringbuffer *rb; -+ unsigned long lpos; -+}; -+ - #define DECLARE_STATIC_PRINTKRB(name, szbits, cpulockptr) \ - static char _##name##_buffer[1 << (szbits)] \ - __aligned(__alignof__(long)); \ -@@ -62,6 +75,13 @@ char *prb_reserve(struct prb_handle *h, - unsigned int size); - void prb_commit(struct prb_handle *h); - -+/* reader interface */ -+void prb_iter_init(struct prb_iterator *iter, struct printk_ringbuffer *rb, -+ u64 *seq); -+void prb_iter_copy(struct prb_iterator *dest, struct prb_iterator *src); -+int prb_iter_next(struct prb_iterator *iter, char *buf, int size, u64 *seq); -+int prb_iter_data(struct prb_iterator *iter, char *buf, int size, u64 *seq); -+ - /* utility functions */ - void prb_lock(struct prb_cpulock *cpu_lock, unsigned int *cpu_store); - void prb_unlock(struct prb_cpulock *cpu_lock, unsigned int cpu_store); ---- a/lib/printk_ringbuffer.c -+++ b/lib/printk_ringbuffer.c -@@ -1,5 +1,7 @@ - // SPDX-License-Identifier: GPL-2.0 - #include -+#include -+#include - #include - - #define PRB_SIZE(rb) (1 << rb->size_bits) -@@ -8,6 +10,7 @@ - #define PRB_WRAPS(rb, lpos) (lpos >> rb->size_bits) - #define PRB_WRAP_LPOS(rb, lpos, xtra) \ - ((PRB_WRAPS(rb, lpos) + xtra) << rb->size_bits) -+#define PRB_DATA_SIZE(e) (e->size - sizeof(struct prb_entry)) - #define PRB_DATA_ALIGN sizeof(long) - - static bool __prb_trylock(struct prb_cpulock *cpu_lock, -@@ -247,3 +250,190 @@ char *prb_reserve(struct prb_handle *h, - - return &h->entry->data[0]; - } -+ -+/* -+ * prb_iter_copy: Copy an iterator. -+ * @dest: The iterator to copy to. -+ * @src: The iterator to copy from. -+ * -+ * Make a deep copy of an iterator. This is particularly useful for making -+ * backup copies of an iterator in case a form of rewinding it needed. -+ * -+ * It is safe to call this function from any context and state. But -+ * note that this function is not atomic. Callers should not make copies -+ * to/from iterators that can be accessed by other tasks/contexts. -+ */ -+void prb_iter_copy(struct prb_iterator *dest, struct prb_iterator *src) -+{ -+ memcpy(dest, src, sizeof(*dest)); -+} -+ -+/* -+ * prb_iter_init: Initialize an iterator for a ring buffer. -+ * @iter: The iterator to initialize. -+ * @rb: A ring buffer to that @iter should iterate. -+ * @seq: The sequence number of the position preceding the first record. -+ * May be NULL. -+ * -+ * Initialize an iterator to be used with a specified ring buffer. If @seq -+ * is non-NULL, it will be set such that prb_iter_next() will provide a -+ * sequence value of "@seq + 1" if no records were missed. -+ * -+ * It is safe to call this function from any context and state. -+ */ -+void prb_iter_init(struct prb_iterator *iter, struct printk_ringbuffer *rb, -+ u64 *seq) -+{ -+ memset(iter, 0, sizeof(*iter)); -+ iter->rb = rb; -+ iter->lpos = PRB_INIT; -+ -+ if (!seq) -+ return; -+ -+ for (;;) { -+ struct prb_iterator tmp_iter; -+ int ret; -+ -+ prb_iter_copy(&tmp_iter, iter); -+ -+ ret = prb_iter_next(&tmp_iter, NULL, 0, seq); -+ if (ret < 0) -+ continue; -+ -+ if (ret == 0) -+ *seq = 0; -+ else -+ (*seq)--; -+ break; -+ } -+} -+ -+static bool is_valid(struct printk_ringbuffer *rb, unsigned long lpos) -+{ -+ unsigned long head, tail; -+ -+ tail = atomic_long_read(&rb->tail); -+ head = atomic_long_read(&rb->head); -+ head -= tail; -+ lpos -= tail; -+ -+ if (lpos >= head) -+ return false; -+ return true; -+} -+ -+/* -+ * prb_iter_data: Retrieve the record data at the current position. -+ * @iter: Iterator tracking the current position. -+ * @buf: A buffer to store the data of the record. May be NULL. -+ * @size: The size of @buf. (Ignored if @buf is NULL.) -+ * @seq: The sequence number of the record. May be NULL. -+ * -+ * If @iter is at a record, provide the data and/or sequence number of that -+ * record (if specified by the caller). -+ * -+ * It is safe to call this function from any context and state. -+ * -+ * Returns >=0 if the current record contains valid data (returns 0 if @buf -+ * is NULL or returns the size of the data block if @buf is non-NULL) or -+ * -EINVAL if @iter is now invalid. -+ */ -+int prb_iter_data(struct prb_iterator *iter, char *buf, int size, u64 *seq) -+{ -+ struct printk_ringbuffer *rb = iter->rb; -+ unsigned long lpos = iter->lpos; -+ unsigned int datsize = 0; -+ struct prb_entry *e; -+ -+ if (buf || seq) { -+ e = to_entry(rb, lpos); -+ if (!is_valid(rb, lpos)) -+ return -EINVAL; -+ /* memory barrier to ensure valid lpos */ -+ smp_rmb(); -+ if (buf) { -+ datsize = PRB_DATA_SIZE(e); -+ /* memory barrier to ensure load of datsize */ -+ smp_rmb(); -+ if (!is_valid(rb, lpos)) -+ return -EINVAL; -+ if (PRB_INDEX(rb, lpos) + datsize > -+ PRB_SIZE(rb) - PRB_DATA_ALIGN) { -+ return -EINVAL; -+ } -+ if (size > datsize) -+ size = datsize; -+ memcpy(buf, &e->data[0], size); -+ } -+ if (seq) -+ *seq = e->seq; -+ /* memory barrier to ensure loads of entry data */ -+ smp_rmb(); -+ } -+ -+ if (!is_valid(rb, lpos)) -+ return -EINVAL; -+ -+ return datsize; -+} -+ -+/* -+ * prb_iter_next: Advance to the next record. -+ * @iter: Iterator tracking the current position. -+ * @buf: A buffer to store the data of the next record. May be NULL. -+ * @size: The size of @buf. (Ignored if @buf is NULL.) -+ * @seq: The sequence number of the next record. May be NULL. -+ * -+ * If a next record is available, @iter is advanced and (if specified) -+ * the data and/or sequence number of that record are provided. -+ * -+ * It is safe to call this function from any context and state. -+ * -+ * Returns 1 if @iter was advanced, 0 if @iter is at the end of the list, or -+ * -EINVAL if @iter is now invalid. -+ */ -+int prb_iter_next(struct prb_iterator *iter, char *buf, int size, u64 *seq) -+{ -+ struct printk_ringbuffer *rb = iter->rb; -+ unsigned long next_lpos; -+ struct prb_entry *e; -+ unsigned int esize; -+ -+ if (iter->lpos == PRB_INIT) { -+ next_lpos = atomic_long_read(&rb->tail); -+ } else { -+ if (!is_valid(rb, iter->lpos)) -+ return -EINVAL; -+ /* memory barrier to ensure valid lpos */ -+ smp_rmb(); -+ e = to_entry(rb, iter->lpos); -+ esize = e->size; -+ /* memory barrier to ensure load of size */ -+ smp_rmb(); -+ if (!is_valid(rb, iter->lpos)) -+ return -EINVAL; -+ next_lpos = iter->lpos + esize; -+ } -+ if (next_lpos == atomic_long_read(&rb->head)) -+ return 0; -+ if (!is_valid(rb, next_lpos)) -+ return -EINVAL; -+ /* memory barrier to ensure valid lpos */ -+ smp_rmb(); -+ -+ iter->lpos = next_lpos; -+ e = to_entry(rb, iter->lpos); -+ esize = e->size; -+ /* memory barrier to ensure load of size */ -+ smp_rmb(); -+ if (!is_valid(rb, iter->lpos)) -+ return -EINVAL; -+ if (esize == -1) -+ iter->lpos = PRB_WRAP_LPOS(rb, iter->lpos, 1); -+ -+ if (prb_iter_data(iter, buf, size, seq) < 0) -+ return -EINVAL; -+ -+ return 1; -+} diff --git a/kernel/patches-5.4.x-rt/0023-0006-printk-rb-add-blocking-reader-support.patch b/kernel/patches-5.4.x-rt/0023-0006-printk-rb-add-blocking-reader-support.patch deleted file mode 100644 index 5988d20f4..000000000 --- a/kernel/patches-5.4.x-rt/0023-0006-printk-rb-add-blocking-reader-support.patch +++ /dev/null @@ -1,161 +0,0 @@ -From: John Ogness -Date: Tue, 12 Feb 2019 15:29:44 +0100 -Subject: [PATCH 06/25] printk-rb: add blocking reader support - -Add a blocking read function for readers. An irq_work function is -used to signal the wait queue so that write notification can -be triggered from any context. - -Signed-off-by: John Ogness -Signed-off-by: Sebastian Andrzej Siewior ---- - include/linux/printk_ringbuffer.h | 20 +++++++++++++ - lib/printk_ringbuffer.c | 55 ++++++++++++++++++++++++++++++++++++++ - 2 files changed, 75 insertions(+) - ---- a/include/linux/printk_ringbuffer.h -+++ b/include/linux/printk_ringbuffer.h -@@ -2,8 +2,10 @@ - #ifndef _LINUX_PRINTK_RINGBUFFER_H - #define _LINUX_PRINTK_RINGBUFFER_H - -+#include - #include - #include -+#include - - struct prb_cpulock { - atomic_t owner; -@@ -22,6 +24,10 @@ struct printk_ringbuffer { - - struct prb_cpulock *cpulock; - atomic_t ctx; -+ -+ struct wait_queue_head *wq; -+ atomic_long_t wq_counter; -+ struct irq_work *wq_work; - }; - - struct prb_entry { -@@ -59,6 +65,15 @@ struct prb_iterator { - #define DECLARE_STATIC_PRINTKRB(name, szbits, cpulockptr) \ - static char _##name##_buffer[1 << (szbits)] \ - __aligned(__alignof__(long)); \ -+static DECLARE_WAIT_QUEUE_HEAD(_##name##_wait); \ -+static void _##name##_wake_work_func(struct irq_work *irq_work) \ -+{ \ -+ wake_up_interruptible_all(&_##name##_wait); \ -+} \ -+static struct irq_work _##name##_wake_work = { \ -+ .func = _##name##_wake_work_func, \ -+ .flags = IRQ_WORK_LAZY, \ -+}; \ - static struct printk_ringbuffer name = { \ - .buffer = &_##name##_buffer[0], \ - .size_bits = szbits, \ -@@ -68,6 +83,9 @@ static struct printk_ringbuffer name = { - .reserve = ATOMIC_LONG_INIT(-111 * sizeof(long)), \ - .cpulock = cpulockptr, \ - .ctx = ATOMIC_INIT(0), \ -+ .wq = &_##name##_wait, \ -+ .wq_counter = ATOMIC_LONG_INIT(0), \ -+ .wq_work = &_##name##_wake_work, \ - } - - /* writer interface */ -@@ -80,6 +98,8 @@ void prb_iter_init(struct prb_iterator * - u64 *seq); - void prb_iter_copy(struct prb_iterator *dest, struct prb_iterator *src); - int prb_iter_next(struct prb_iterator *iter, char *buf, int size, u64 *seq); -+int prb_iter_wait_next(struct prb_iterator *iter, char *buf, int size, -+ u64 *seq); - int prb_iter_data(struct prb_iterator *iter, char *buf, int size, u64 *seq); - - /* utility functions */ ---- a/lib/printk_ringbuffer.c -+++ b/lib/printk_ringbuffer.c -@@ -1,4 +1,5 @@ - // SPDX-License-Identifier: GPL-2.0 -+#include - #include - #include - #include -@@ -154,6 +155,7 @@ static bool push_tail(struct printk_ring - void prb_commit(struct prb_handle *h) - { - struct printk_ringbuffer *rb = h->rb; -+ bool changed = false; - struct prb_entry *e; - unsigned long head; - unsigned long res; -@@ -175,6 +177,7 @@ void prb_commit(struct prb_handle *h) - } - e->seq = ++rb->seq; - head += e->size; -+ changed = true; - } - atomic_long_set_release(&rb->head, res); - atomic_dec(&rb->ctx); -@@ -185,6 +188,18 @@ void prb_commit(struct prb_handle *h) - } - - prb_unlock(rb->cpulock, h->cpu); -+ -+ if (changed) { -+ atomic_long_inc(&rb->wq_counter); -+ if (wq_has_sleeper(rb->wq)) { -+#ifdef CONFIG_IRQ_WORK -+ irq_work_queue(rb->wq_work); -+#else -+ if (!in_nmi()) -+ wake_up_interruptible_all(rb->wq); -+#endif -+ } -+ } - } - - /* -@@ -437,3 +452,43 @@ int prb_iter_next(struct prb_iterator *i - - return 1; - } -+ -+/* -+ * prb_iter_wait_next: Advance to the next record, blocking if none available. -+ * @iter: Iterator tracking the current position. -+ * @buf: A buffer to store the data of the next record. May be NULL. -+ * @size: The size of @buf. (Ignored if @buf is NULL.) -+ * @seq: The sequence number of the next record. May be NULL. -+ * -+ * If a next record is already available, this function works like -+ * prb_iter_next(). Otherwise block interruptible until a next record is -+ * available. -+ * -+ * When a next record is available, @iter is advanced and (if specified) -+ * the data and/or sequence number of that record are provided. -+ * -+ * This function might sleep. -+ * -+ * Returns 1 if @iter was advanced, -EINVAL if @iter is now invalid, or -+ * -ERESTARTSYS if interrupted by a signal. -+ */ -+int prb_iter_wait_next(struct prb_iterator *iter, char *buf, int size, u64 *seq) -+{ -+ unsigned long last_seen; -+ int ret; -+ -+ for (;;) { -+ last_seen = atomic_long_read(&iter->rb->wq_counter); -+ -+ ret = prb_iter_next(iter, buf, size, seq); -+ if (ret != 0) -+ break; -+ -+ ret = wait_event_interruptible(*iter->rb->wq, -+ last_seen != atomic_long_read(&iter->rb->wq_counter)); -+ if (ret < 0) -+ break; -+ } -+ -+ return ret; -+} diff --git a/kernel/patches-5.4.x-rt/0024-0007-printk-rb-add-functionality-required-by-printk.patch b/kernel/patches-5.4.x-rt/0024-0007-printk-rb-add-functionality-required-by-printk.patch deleted file mode 100644 index 5d5365b87..000000000 --- a/kernel/patches-5.4.x-rt/0024-0007-printk-rb-add-functionality-required-by-printk.patch +++ /dev/null @@ -1,159 +0,0 @@ -From: John Ogness -Date: Tue, 12 Feb 2019 15:29:45 +0100 -Subject: [PATCH 07/25] printk-rb: add functionality required by printk - -The printk subsystem needs to be able to query the size of the ring -buffer, seek to specific entries within the ring buffer, and track -if records could not be stored in the ring buffer. - -Signed-off-by: John Ogness -Signed-off-by: Sebastian Andrzej Siewior ---- - include/linux/printk_ringbuffer.h | 5 ++ - lib/printk_ringbuffer.c | 95 ++++++++++++++++++++++++++++++++++++++ - 2 files changed, 100 insertions(+) - ---- a/include/linux/printk_ringbuffer.h -+++ b/include/linux/printk_ringbuffer.h -@@ -17,6 +17,7 @@ struct printk_ringbuffer { - unsigned int size_bits; - - u64 seq; -+ atomic_long_t lost; - - atomic_long_t tail; - atomic_long_t head; -@@ -78,6 +79,7 @@ static struct printk_ringbuffer name = { - .buffer = &_##name##_buffer[0], \ - .size_bits = szbits, \ - .seq = 0, \ -+ .lost = ATOMIC_LONG_INIT(0), \ - .tail = ATOMIC_LONG_INIT(-111 * sizeof(long)), \ - .head = ATOMIC_LONG_INIT(-111 * sizeof(long)), \ - .reserve = ATOMIC_LONG_INIT(-111 * sizeof(long)), \ -@@ -100,9 +102,12 @@ void prb_iter_copy(struct prb_iterator * - int prb_iter_next(struct prb_iterator *iter, char *buf, int size, u64 *seq); - int prb_iter_wait_next(struct prb_iterator *iter, char *buf, int size, - u64 *seq); -+int prb_iter_seek(struct prb_iterator *iter, u64 seq); - int prb_iter_data(struct prb_iterator *iter, char *buf, int size, u64 *seq); - - /* utility functions */ -+int prb_buffer_size(struct printk_ringbuffer *rb); -+void prb_inc_lost(struct printk_ringbuffer *rb); - void prb_lock(struct prb_cpulock *cpu_lock, unsigned int *cpu_store); - void prb_unlock(struct prb_cpulock *cpu_lock, unsigned int cpu_store); - ---- a/lib/printk_ringbuffer.c -+++ b/lib/printk_ringbuffer.c -@@ -175,11 +175,16 @@ void prb_commit(struct prb_handle *h) - head = PRB_WRAP_LPOS(rb, head, 1); - continue; - } -+ while (atomic_long_read(&rb->lost)) { -+ atomic_long_dec(&rb->lost); -+ rb->seq++; -+ } - e->seq = ++rb->seq; - head += e->size; - changed = true; - } - atomic_long_set_release(&rb->head, res); -+ - atomic_dec(&rb->ctx); - - if (atomic_long_read(&rb->reserve) == res) -@@ -492,3 +497,93 @@ int prb_iter_wait_next(struct prb_iterat - - return ret; - } -+ -+/* -+ * prb_iter_seek: Seek forward to a specific record. -+ * @iter: Iterator to advance. -+ * @seq: Record number to advance to. -+ * -+ * Advance @iter such that a following call to prb_iter_data() will provide -+ * the contents of the specified record. If a record is specified that does -+ * not yet exist, advance @iter to the end of the record list. -+ * -+ * Note that iterators cannot be rewound. So if a record is requested that -+ * exists but is previous to @iter in position, @iter is considered invalid. -+ * -+ * It is safe to call this function from any context and state. -+ * -+ * Returns 1 on succces, 0 if specified record does not yet exist (@iter is -+ * now at the end of the list), or -EINVAL if @iter is now invalid. -+ */ -+int prb_iter_seek(struct prb_iterator *iter, u64 seq) -+{ -+ u64 cur_seq; -+ int ret; -+ -+ /* first check if the iterator is already at the wanted seq */ -+ if (seq == 0) { -+ if (iter->lpos == PRB_INIT) -+ return 1; -+ else -+ return -EINVAL; -+ } -+ if (iter->lpos != PRB_INIT) { -+ if (prb_iter_data(iter, NULL, 0, &cur_seq) >= 0) { -+ if (cur_seq == seq) -+ return 1; -+ if (cur_seq > seq) -+ return -EINVAL; -+ } -+ } -+ -+ /* iterate to find the wanted seq */ -+ for (;;) { -+ ret = prb_iter_next(iter, NULL, 0, &cur_seq); -+ if (ret <= 0) -+ break; -+ -+ if (cur_seq == seq) -+ break; -+ -+ if (cur_seq > seq) { -+ ret = -EINVAL; -+ break; -+ } -+ } -+ -+ return ret; -+} -+ -+/* -+ * prb_buffer_size: Get the size of the ring buffer. -+ * @rb: The ring buffer to get the size of. -+ * -+ * Return the number of bytes used for the ring buffer entry storage area. -+ * Note that this area stores both entry header and entry data. Therefore -+ * this represents an upper bound to the amount of data that can be stored -+ * in the ring buffer. -+ * -+ * It is safe to call this function from any context and state. -+ * -+ * Returns the size in bytes of the entry storage area. -+ */ -+int prb_buffer_size(struct printk_ringbuffer *rb) -+{ -+ return PRB_SIZE(rb); -+} -+ -+/* -+ * prb_inc_lost: Increment the seq counter to signal a lost record. -+ * @rb: The ring buffer to increment the seq of. -+ * -+ * Increment the seq counter so that a seq number is intentially missing -+ * for the readers. This allows readers to identify that a record is -+ * missing. A writer will typically use this function if prb_reserve() -+ * fails. -+ * -+ * It is safe to call this function from any context and state. -+ */ -+void prb_inc_lost(struct printk_ringbuffer *rb) -+{ -+ atomic_long_inc(&rb->lost); -+} diff --git a/kernel/patches-5.4.x-rt/0025-0008-printk-add-ring-buffer-and-kthread.patch b/kernel/patches-5.4.x-rt/0025-0008-printk-add-ring-buffer-and-kthread.patch deleted file mode 100644 index 4bd53be65..000000000 --- a/kernel/patches-5.4.x-rt/0025-0008-printk-add-ring-buffer-and-kthread.patch +++ /dev/null @@ -1,168 +0,0 @@ -From: John Ogness -Date: Tue, 12 Feb 2019 15:29:46 +0100 -Subject: [PATCH 08/25] printk: add ring buffer and kthread - -The printk ring buffer provides an NMI-safe interface for writing -messages to a ring buffer. Using such a buffer for alleviates printk -callers from the current burdens of disabled preemption while calling -the console drivers (and possibly printing out many messages that -another task put into the log buffer). - -Create a ring buffer to be used for storing messages to be -printed to the consoles. - -Create a dedicated printk kthread to block on the ring buffer -and call the console drivers for the read messages. - -NOTE: The printk_delay is relocated to _after_ the message is - printed, where it makes more sense. - -Signed-off-by: John Ogness -Signed-off-by: Sebastian Andrzej Siewior ---- - kernel/printk/printk.c | 105 +++++++++++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 105 insertions(+) - ---- a/kernel/printk/printk.c -+++ b/kernel/printk/printk.c -@@ -45,6 +45,8 @@ - #include - #include - #include -+#include -+#include - #include - #include - #include -@@ -417,7 +419,12 @@ DEFINE_RAW_SPINLOCK(logbuf_lock); - printk_safe_exit_irqrestore(flags); \ - } while (0) - -+DECLARE_STATIC_PRINTKRB_CPULOCK(printk_cpulock); -+ - #ifdef CONFIG_PRINTK -+/* record buffer */ -+DECLARE_STATIC_PRINTKRB(printk_rb, CONFIG_LOG_BUF_SHIFT, &printk_cpulock); -+ - DECLARE_WAIT_QUEUE_HEAD(log_wait); - /* the next printk record to read by syslog(READ) or /proc/kmsg */ - static u64 syslog_seq; -@@ -780,6 +787,10 @@ static ssize_t msg_print_ext_body(char * - return p - buf; - } - -+#define PRINTK_SPRINT_MAX (LOG_LINE_MAX + PREFIX_MAX) -+#define PRINTK_RECORD_MAX (sizeof(struct printk_log) + \ -+ CONSOLE_EXT_LOG_MAX + PRINTK_SPRINT_MAX) -+ - /* /dev/kmsg - userspace message inject/listen interface */ - struct devkmsg_user { - u64 seq; -@@ -1620,6 +1631,34 @@ SYSCALL_DEFINE3(syslog, int, type, char - return do_syslog(type, buf, len, SYSLOG_FROM_READER); - } - -+static void format_text(struct printk_log *msg, u64 seq, -+ char *ext_text, size_t *ext_len, -+ char *text, size_t *len, bool time) -+{ -+ if (suppress_message_printing(msg->level)) { -+ /* -+ * Skip record that has level above the console -+ * loglevel and update each console's local seq. -+ */ -+ *len = 0; -+ *ext_len = 0; -+ return; -+ } -+ -+ *len = msg_print_text(msg, console_msg_format & MSG_FORMAT_SYSLOG, -+ time, text, PRINTK_SPRINT_MAX); -+ if (nr_ext_console_drivers) { -+ *ext_len = msg_print_ext_header(ext_text, CONSOLE_EXT_LOG_MAX, -+ msg, seq); -+ *ext_len += msg_print_ext_body(ext_text + *ext_len, -+ CONSOLE_EXT_LOG_MAX - *ext_len, -+ log_dict(msg), msg->dict_len, -+ log_text(msg), msg->text_len); -+ } else { -+ *ext_len = 0; -+ } -+} -+ - /* - * Special console_lock variants that help to reduce the risk of soft-lockups. - * They allow to pass console_lock to another printk() call using a busy wait. -@@ -2974,6 +3013,72 @@ void wake_up_klogd(void) - preempt_enable(); - } - -+static int printk_kthread_func(void *data) -+{ -+ struct prb_iterator iter; -+ struct printk_log *msg; -+ size_t ext_len; -+ char *ext_text; -+ u64 master_seq; -+ size_t len; -+ char *text; -+ char *buf; -+ int ret; -+ -+ ext_text = kmalloc(CONSOLE_EXT_LOG_MAX, GFP_KERNEL); -+ text = kmalloc(PRINTK_SPRINT_MAX, GFP_KERNEL); -+ buf = kmalloc(PRINTK_RECORD_MAX, GFP_KERNEL); -+ if (!ext_text || !text || !buf) -+ return -1; -+ -+ prb_iter_init(&iter, &printk_rb, NULL); -+ -+ /* the printk kthread never exits */ -+ for (;;) { -+ ret = prb_iter_wait_next(&iter, buf, -+ PRINTK_RECORD_MAX, &master_seq); -+ if (ret == -ERESTARTSYS) { -+ continue; -+ } else if (ret < 0) { -+ /* iterator invalid, start over */ -+ prb_iter_init(&iter, &printk_rb, NULL); -+ continue; -+ } -+ -+ msg = (struct printk_log *)buf; -+ format_text(msg, master_seq, ext_text, &ext_len, text, -+ &len, printk_time); -+ -+ console_lock(); -+ if (len > 0 || ext_len > 0) { -+ call_console_drivers(ext_text, ext_len, text, len); -+ boot_delay_msec(msg->level); -+ printk_delay(); -+ } -+ console_unlock(); -+ } -+ -+ kfree(ext_text); -+ kfree(text); -+ kfree(buf); -+ -+ return 0; -+} -+ -+static int __init init_printk_kthread(void) -+{ -+ struct task_struct *thread; -+ -+ thread = kthread_run(printk_kthread_func, NULL, "printk"); -+ if (IS_ERR(thread)) { -+ pr_err("printk: unable to create printing thread\n"); -+ return PTR_ERR(thread); -+ } -+ -+ return 0; -+} -+late_initcall(init_printk_kthread); -+ - void defer_console_output(void) - { - preempt_disable(); diff --git a/kernel/patches-5.4.x-rt/0026-0009-printk-remove-exclusive-console-hack.patch b/kernel/patches-5.4.x-rt/0026-0009-printk-remove-exclusive-console-hack.patch deleted file mode 100644 index 7369eb3e8..000000000 --- a/kernel/patches-5.4.x-rt/0026-0009-printk-remove-exclusive-console-hack.patch +++ /dev/null @@ -1,101 +0,0 @@ -From: John Ogness -Date: Tue, 12 Feb 2019 15:29:47 +0100 -Subject: [PATCH 09/25] printk: remove exclusive console hack - -In order to support printing the printk log history when new -consoles are registered, a global exclusive_console variable is -temporarily set. This only works because printk runs with -preemption disabled. - -When console printing is moved to a fully preemptible dedicated -kthread, this hack no longer works. - -Remove exclusive_console usage. - -Signed-off-by: John Ogness -Signed-off-by: Sebastian Andrzej Siewior ---- - kernel/printk/printk.c | 30 ++++-------------------------- - 1 file changed, 4 insertions(+), 26 deletions(-) - ---- a/kernel/printk/printk.c -+++ b/kernel/printk/printk.c -@@ -269,11 +269,6 @@ static void __up_console_sem(unsigned lo - static int console_locked, console_suspended; - - /* -- * If exclusive_console is non-NULL then only this console is to be printed to. -- */ --static struct console *exclusive_console; -- --/* - * Array of consoles built from command line options (console=) - */ - -@@ -443,7 +438,6 @@ static u32 log_next_idx; - /* the next printk record to write to the console */ - static u64 console_seq; - static u32 console_idx; --static u64 exclusive_console_stop_seq; - - /* the next printk record to read after the last 'clear' command */ - static u64 clear_seq; -@@ -1815,8 +1809,6 @@ static void call_console_drivers(const c - return; - - for_each_console(con) { -- if (exclusive_console && con != exclusive_console) -- continue; - if (!(con->flags & CON_ENABLED)) - continue; - if (!con->write) -@@ -2109,7 +2101,6 @@ static u64 syslog_seq; - static u32 syslog_idx; - static u64 console_seq; - static u32 console_idx; --static u64 exclusive_console_stop_seq; - static u64 log_first_seq; - static u32 log_first_idx; - static u64 log_next_seq; -@@ -2478,12 +2469,6 @@ void console_unlock(void) - goto skip; - } - -- /* Output to all consoles once old messages replayed. */ -- if (unlikely(exclusive_console && -- console_seq >= exclusive_console_stop_seq)) { -- exclusive_console = NULL; -- } -- - len += msg_print_text(msg, - console_msg_format & MSG_FORMAT_SYSLOG, - printk_time, text + len, sizeof(text) - len); -@@ -2809,17 +2794,6 @@ void register_console(struct console *ne - * for us. - */ - logbuf_lock_irqsave(flags); -- /* -- * We're about to replay the log buffer. Only do this to the -- * just-registered console to avoid excessive message spam to -- * the already-registered consoles. -- * -- * Set exclusive_console with disabled interrupts to reduce -- * race window with eventual console_flush_on_panic() that -- * ignores console_lock. -- */ -- exclusive_console = newcon; -- exclusive_console_stop_seq = console_seq; - console_seq = syslog_seq; - console_idx = syslog_idx; - logbuf_unlock_irqrestore(flags); -@@ -2833,6 +2807,10 @@ void register_console(struct console *ne - * boot consoles, real consoles, etc - this is to ensure that end - * users know there might be something in the kernel's log buffer that - * went to the bootconsole (that they do not see on the real console) -+ * -+ * This message is also important because it will trigger the -+ * printk kthread to begin dumping the log buffer to the newly -+ * registered console. - */ - pr_info("%sconsole [%s%d] enabled\n", - (newcon->flags & CON_BOOT) ? "boot" : "" , diff --git a/kernel/patches-5.4.x-rt/0027-0010-printk-redirect-emit-store-to-new-ringbuffer.patch b/kernel/patches-5.4.x-rt/0027-0010-printk-redirect-emit-store-to-new-ringbuffer.patch deleted file mode 100644 index 0355541c4..000000000 --- a/kernel/patches-5.4.x-rt/0027-0010-printk-redirect-emit-store-to-new-ringbuffer.patch +++ /dev/null @@ -1,437 +0,0 @@ -From: John Ogness -Date: Tue, 12 Feb 2019 15:29:48 +0100 -Subject: [PATCH 10/25] printk: redirect emit/store to new ringbuffer - -vprintk_emit and vprintk_store are the main functions that all printk -variants eventually go through. Change these to store the message in -the new printk ring buffer that the printk kthread is reading. - -Remove functions no longer in use because of the changes to -vprintk_emit and vprintk_store. - -In order to handle interrupts and NMIs, a second per-cpu ring buffer -(sprint_rb) is added. This ring buffer is used for NMI-safe memory -allocation in order to format the printk messages. - -NOTE: LOG_CONT is ignored for now and handled as individual messages. - LOG_CONT functions are masked behind "#if 0" blocks until their - functionality can be restored - -Signed-off-by: John Ogness -Signed-off-by: Sebastian Andrzej Siewior ---- - kernel/printk/printk.c | 325 +++++++------------------------------------------ - 1 file changed, 51 insertions(+), 274 deletions(-) - ---- a/kernel/printk/printk.c -+++ b/kernel/printk/printk.c -@@ -517,90 +517,6 @@ static u32 log_next(u32 idx) - return idx + msg->len; - } - --/* -- * Check whether there is enough free space for the given message. -- * -- * The same values of first_idx and next_idx mean that the buffer -- * is either empty or full. -- * -- * If the buffer is empty, we must respect the position of the indexes. -- * They cannot be reset to the beginning of the buffer. -- */ --static int logbuf_has_space(u32 msg_size, bool empty) --{ -- u32 free; -- -- if (log_next_idx > log_first_idx || empty) -- free = max(log_buf_len - log_next_idx, log_first_idx); -- else -- free = log_first_idx - log_next_idx; -- -- /* -- * We need space also for an empty header that signalizes wrapping -- * of the buffer. -- */ -- return free >= msg_size + sizeof(struct printk_log); --} -- --static int log_make_free_space(u32 msg_size) --{ -- while (log_first_seq < log_next_seq && -- !logbuf_has_space(msg_size, false)) { -- /* drop old messages until we have enough contiguous space */ -- log_first_idx = log_next(log_first_idx); -- log_first_seq++; -- } -- -- if (clear_seq < log_first_seq) { -- clear_seq = log_first_seq; -- clear_idx = log_first_idx; -- } -- -- /* sequence numbers are equal, so the log buffer is empty */ -- if (logbuf_has_space(msg_size, log_first_seq == log_next_seq)) -- return 0; -- -- return -ENOMEM; --} -- --/* compute the message size including the padding bytes */ --static u32 msg_used_size(u16 text_len, u16 dict_len, u32 *pad_len) --{ -- u32 size; -- -- size = sizeof(struct printk_log) + text_len + dict_len; -- *pad_len = (-size) & (LOG_ALIGN - 1); -- size += *pad_len; -- -- return size; --} -- --/* -- * Define how much of the log buffer we could take at maximum. The value -- * must be greater than two. Note that only half of the buffer is available -- * when the index points to the middle. -- */ --#define MAX_LOG_TAKE_PART 4 --static const char trunc_msg[] = ""; -- --static u32 truncate_msg(u16 *text_len, u16 *trunc_msg_len, -- u16 *dict_len, u32 *pad_len) --{ -- /* -- * The message should not take the whole buffer. Otherwise, it might -- * get removed too soon. -- */ -- u32 max_text_len = log_buf_len / MAX_LOG_TAKE_PART; -- if (*text_len > max_text_len) -- *text_len = max_text_len; -- /* enable the warning message */ -- *trunc_msg_len = strlen(trunc_msg); -- /* disable the "dict" completely */ -- *dict_len = 0; -- /* compute the size again, count also the warning message */ -- return msg_used_size(*text_len + *trunc_msg_len, 0, pad_len); --} -- - /* insert record into the buffer, discard old ones, update heads */ - static int log_store(u32 caller_id, int facility, int level, - enum log_flags flags, u64 ts_nsec, -@@ -608,57 +524,39 @@ static int log_store(u32 caller_id, int - const char *text, u16 text_len) - { - struct printk_log *msg; -- u32 size, pad_len; -- u16 trunc_msg_len = 0; -- -- /* number of '\0' padding bytes to next message */ -- size = msg_used_size(text_len, dict_len, &pad_len); -+ struct prb_handle h; -+ char *rbuf; -+ u32 size; - -- if (log_make_free_space(size)) { -- /* truncate the message if it is too long for empty buffer */ -- size = truncate_msg(&text_len, &trunc_msg_len, -- &dict_len, &pad_len); -- /* survive when the log buffer is too small for trunc_msg */ -- if (log_make_free_space(size)) -- return 0; -- } -+ size = sizeof(*msg) + text_len + dict_len; - -- if (log_next_idx + size + sizeof(struct printk_log) > log_buf_len) { -+ rbuf = prb_reserve(&h, &printk_rb, size); -+ if (!rbuf) { - /* -- * This message + an additional empty header does not fit -- * at the end of the buffer. Add an empty header with len == 0 -- * to signify a wrap around. -+ * An emergency message would have been printed, but -+ * it cannot be stored in the log. - */ -- memset(log_buf + log_next_idx, 0, sizeof(struct printk_log)); -- log_next_idx = 0; -+ prb_inc_lost(&printk_rb); -+ return 0; - } - - /* fill message */ -- msg = (struct printk_log *)(log_buf + log_next_idx); -+ msg = (struct printk_log *)rbuf; - memcpy(log_text(msg), text, text_len); - msg->text_len = text_len; -- if (trunc_msg_len) { -- memcpy(log_text(msg) + text_len, trunc_msg, trunc_msg_len); -- msg->text_len += trunc_msg_len; -- } - memcpy(log_dict(msg), dict, dict_len); - msg->dict_len = dict_len; - msg->facility = facility; - msg->level = level & 7; - msg->flags = flags & 0x1f; -- if (ts_nsec > 0) -- msg->ts_nsec = ts_nsec; -- else -- msg->ts_nsec = local_clock(); -+ msg->ts_nsec = ts_nsec; - #ifdef CONFIG_PRINTK_CALLER - msg->caller_id = caller_id; - #endif -- memset(log_dict(msg) + dict_len, 0, pad_len); - msg->len = size; - - /* insert message */ -- log_next_idx += msg->len; -- log_next_seq++; -+ prb_commit(&h); - - return msg->text_len; - } -@@ -1729,70 +1627,6 @@ static int console_lock_spinning_disable - return 1; - } - --/** -- * console_trylock_spinning - try to get console_lock by busy waiting -- * -- * This allows to busy wait for the console_lock when the current -- * owner is running in specially marked sections. It means that -- * the current owner is running and cannot reschedule until it -- * is ready to lose the lock. -- * -- * Return: 1 if we got the lock, 0 othrewise -- */ --static int console_trylock_spinning(void) --{ -- struct task_struct *owner = NULL; -- bool waiter; -- bool spin = false; -- unsigned long flags; -- -- if (console_trylock()) -- return 1; -- -- printk_safe_enter_irqsave(flags); -- -- raw_spin_lock(&console_owner_lock); -- owner = READ_ONCE(console_owner); -- waiter = READ_ONCE(console_waiter); -- if (!waiter && owner && owner != current) { -- WRITE_ONCE(console_waiter, true); -- spin = true; -- } -- raw_spin_unlock(&console_owner_lock); -- -- /* -- * If there is an active printk() writing to the -- * consoles, instead of having it write our data too, -- * see if we can offload that load from the active -- * printer, and do some printing ourselves. -- * Go into a spin only if there isn't already a waiter -- * spinning, and there is an active printer, and -- * that active printer isn't us (recursive printk?). -- */ -- if (!spin) { -- printk_safe_exit_irqrestore(flags); -- return 0; -- } -- -- /* We spin waiting for the owner to release us */ -- spin_acquire(&console_owner_dep_map, 0, 0, _THIS_IP_); -- /* Owner will clear console_waiter on hand off */ -- while (READ_ONCE(console_waiter)) -- cpu_relax(); -- spin_release(&console_owner_dep_map, 1, _THIS_IP_); -- -- printk_safe_exit_irqrestore(flags); -- /* -- * The owner passed the console lock to us. -- * Since we did not spin on console lock, annotate -- * this as a trylock. Otherwise lockdep will -- * complain. -- */ -- mutex_acquire(&console_lock_dep_map, 0, 1, _THIS_IP_); -- -- return 1; --} -- - /* - * Call the console drivers, asking them to write out - * log_buf[start] to log_buf[end - 1]. -@@ -1813,7 +1647,7 @@ static void call_console_drivers(const c - continue; - if (!con->write) - continue; -- if (!cpu_online(smp_processor_id()) && -+ if (!cpu_online(raw_smp_processor_id()) && - !(con->flags & CON_ANYTIME)) - continue; - if (con->flags & CON_EXTENDED) -@@ -1843,6 +1677,8 @@ static inline u32 printk_caller_id(void) - 0x80000000 + raw_smp_processor_id(); - } - -+/* FIXME: no support for LOG_CONT */ -+#if 0 - /* - * Continuation lines are buffered, and not committed to the record buffer - * until the line is complete, or a race forces it. The line fragments -@@ -1898,56 +1734,45 @@ static bool cont_add(u32 caller_id, int - - return true; - } -+#endif /* 0 */ - --static size_t log_output(int facility, int level, enum log_flags lflags, const char *dict, size_t dictlen, char *text, size_t text_len) --{ -- const u32 caller_id = printk_caller_id(); -- -- /* -- * If an earlier line was buffered, and we're a continuation -- * write from the same context, try to add it to the buffer. -- */ -- if (cont.len) { -- if (cont.caller_id == caller_id && (lflags & LOG_CONT)) { -- if (cont_add(caller_id, facility, level, lflags, text, text_len)) -- return text_len; -- } -- /* Otherwise, make sure it's flushed */ -- cont_flush(); -- } -- -- /* Skip empty continuation lines that couldn't be added - they just flush */ -- if (!text_len && (lflags & LOG_CONT)) -- return 0; -- -- /* If it doesn't end in a newline, try to buffer the current line */ -- if (!(lflags & LOG_NEWLINE)) { -- if (cont_add(caller_id, facility, level, lflags, text, text_len)) -- return text_len; -- } -- -- /* Store it in the record log */ -- return log_store(caller_id, facility, level, lflags, 0, -- dict, dictlen, text, text_len); --} -- --/* Must be called under logbuf_lock. */ - int vprintk_store(int facility, int level, - const char *dict, size_t dictlen, - const char *fmt, va_list args) - { -- static char textbuf[LOG_LINE_MAX]; -- char *text = textbuf; -- size_t text_len; -+ return vprintk_emit(facility, level, dict, dictlen, fmt, args); -+} -+ -+/* ring buffer used as memory allocator for temporary sprint buffers */ -+DECLARE_STATIC_PRINTKRB(sprint_rb, -+ ilog2(PRINTK_RECORD_MAX + sizeof(struct prb_entry) + -+ sizeof(long)) + 2, &printk_cpulock); -+ -+asmlinkage int vprintk_emit(int facility, int level, -+ const char *dict, size_t dictlen, -+ const char *fmt, va_list args) -+{ -+ const u32 caller_id = printk_caller_id(); - enum log_flags lflags = 0; -+ int printed_len = 0; -+ struct prb_handle h; -+ size_t text_len; -+ u64 ts_nsec; -+ char *text; -+ char *rbuf; - -- /* -- * The printf needs to come first; we need the syslog -- * prefix which might be passed-in as a parameter. -- */ -- text_len = vscnprintf(text, sizeof(textbuf), fmt, args); -+ ts_nsec = local_clock(); -+ -+ rbuf = prb_reserve(&h, &sprint_rb, PRINTK_SPRINT_MAX); -+ if (!rbuf) { -+ prb_inc_lost(&printk_rb); -+ return printed_len; -+ } -+ -+ text = rbuf; -+ text_len = vscnprintf(text, PRINTK_SPRINT_MAX, fmt, args); - -- /* mark and strip a trailing newline */ -+ /* strip and flag a trailing newline */ - if (text_len && text[text_len-1] == '\n') { - text_len--; - lflags |= LOG_NEWLINE; -@@ -1978,58 +1803,10 @@ int vprintk_store(int facility, int leve - if (dict) - lflags |= LOG_NEWLINE; - -- return log_output(facility, level, lflags, -- dict, dictlen, text, text_len); --} -- --asmlinkage int vprintk_emit(int facility, int level, -- const char *dict, size_t dictlen, -- const char *fmt, va_list args) --{ -- int printed_len; -- bool in_sched = false, pending_output; -- unsigned long flags; -- u64 curr_log_seq; -- -- /* Suppress unimportant messages after panic happens */ -- if (unlikely(suppress_printk)) -- return 0; -- -- if (level == LOGLEVEL_SCHED) { -- level = LOGLEVEL_DEFAULT; -- in_sched = true; -- } -- -- boot_delay_msec(level); -- printk_delay(); -- -- /* This stops the holder of console_sem just where we want him */ -- logbuf_lock_irqsave(flags); -- curr_log_seq = log_next_seq; -- printed_len = vprintk_store(facility, level, dict, dictlen, fmt, args); -- pending_output = (curr_log_seq != log_next_seq); -- logbuf_unlock_irqrestore(flags); -- -- /* If called from the scheduler, we can not call up(). */ -- if (!in_sched && pending_output) { -- /* -- * Disable preemption to avoid being preempted while holding -- * console_sem which would prevent anyone from printing to -- * console -- */ -- preempt_disable(); -- /* -- * Try to acquire and then immediately release the console -- * semaphore. The release will print out buffers and wake up -- * /dev/kmsg and syslog() users. -- */ -- if (console_trylock_spinning()) -- console_unlock(); -- preempt_enable(); -- } -+ printed_len = log_store(caller_id, facility, level, lflags, ts_nsec, -+ dict, dictlen, text, text_len); - -- if (pending_output) -- wake_up_klogd(); -+ prb_commit(&h); - return printed_len; - } - EXPORT_SYMBOL(vprintk_emit); -@@ -2494,7 +2271,7 @@ void console_unlock(void) - console_lock_spinning_enable(); - - stop_critical_timings(); /* don't trace print latency */ -- call_console_drivers(ext_text, ext_len, text, len); -+ //call_console_drivers(ext_text, ext_len, text, len); - start_critical_timings(); - - if (console_lock_spinning_disable_and_check()) { diff --git a/kernel/patches-5.4.x-rt/0028-0011-printk_safe-remove-printk-safe-code.patch b/kernel/patches-5.4.x-rt/0028-0011-printk_safe-remove-printk-safe-code.patch deleted file mode 100644 index aa8be9a69..000000000 --- a/kernel/patches-5.4.x-rt/0028-0011-printk_safe-remove-printk-safe-code.patch +++ /dev/null @@ -1,699 +0,0 @@ -From: John Ogness -Date: Tue, 12 Feb 2019 15:29:49 +0100 -Subject: [PATCH 11/25] printk_safe: remove printk safe code - -vprintk variants are now NMI-safe so there is no longer a need for -the "safe" calls. - -NOTE: This also removes printk flushing functionality. - -Signed-off-by: John Ogness -Signed-off-by: Sebastian Andrzej Siewior ---- - arch/powerpc/kernel/traps.c | 1 - arch/powerpc/kernel/watchdog.c | 5 - include/linux/hardirq.h | 2 - include/linux/printk.h | 27 -- - init/main.c | 1 - kernel/kexec_core.c | 1 - kernel/panic.c | 3 - kernel/printk/Makefile | 1 - kernel/printk/internal.h | 30 -- - kernel/printk/printk.c | 13 - - kernel/printk/printk_safe.c | 415 ----------------------------------------- - kernel/trace/trace.c | 2 - lib/nmi_backtrace.c | 6 - 13 files changed, 7 insertions(+), 500 deletions(-) - delete mode 100644 kernel/printk/printk_safe.c - ---- a/arch/powerpc/kernel/traps.c -+++ b/arch/powerpc/kernel/traps.c -@@ -171,7 +171,6 @@ extern void panic_flush_kmsg_start(void) - - extern void panic_flush_kmsg_end(void) - { -- printk_safe_flush_on_panic(); - kmsg_dump(KMSG_DUMP_PANIC); - bust_spinlocks(0); - debug_locks_off(); ---- a/arch/powerpc/kernel/watchdog.c -+++ b/arch/powerpc/kernel/watchdog.c -@@ -181,11 +181,6 @@ static void watchdog_smp_panic(int cpu, - - wd_smp_unlock(&flags); - -- printk_safe_flush(); -- /* -- * printk_safe_flush() seems to require another print -- * before anything actually goes out to console. -- */ - if (sysctl_hardlockup_all_cpu_backtrace) - trigger_allbutself_cpu_backtrace(); - ---- a/include/linux/hardirq.h -+++ b/include/linux/hardirq.h -@@ -68,7 +68,6 @@ extern void irq_exit(void); - #define nmi_enter() \ - do { \ - arch_nmi_enter(); \ -- printk_nmi_enter(); \ - lockdep_off(); \ - ftrace_nmi_enter(); \ - BUG_ON(in_nmi()); \ -@@ -85,7 +84,6 @@ extern void irq_exit(void); - preempt_count_sub(NMI_OFFSET + HARDIRQ_OFFSET); \ - ftrace_nmi_exit(); \ - lockdep_on(); \ -- printk_nmi_exit(); \ - arch_nmi_exit(); \ - } while (0) - ---- a/include/linux/printk.h -+++ b/include/linux/printk.h -@@ -146,18 +146,6 @@ static inline __printf(1, 2) __cold - void early_printk(const char *s, ...) { } - #endif - --#ifdef CONFIG_PRINTK_NMI --extern void printk_nmi_enter(void); --extern void printk_nmi_exit(void); --extern void printk_nmi_direct_enter(void); --extern void printk_nmi_direct_exit(void); --#else --static inline void printk_nmi_enter(void) { } --static inline void printk_nmi_exit(void) { } --static inline void printk_nmi_direct_enter(void) { } --static inline void printk_nmi_direct_exit(void) { } --#endif /* PRINTK_NMI */ -- - #ifdef CONFIG_PRINTK - asmlinkage __printf(5, 0) - int vprintk_emit(int facility, int level, -@@ -202,9 +190,6 @@ void __init setup_log_buf(int early); - void dump_stack_print_info(const char *log_lvl); - void show_regs_print_info(const char *log_lvl); - extern asmlinkage void dump_stack(void) __cold; --extern void printk_safe_init(void); --extern void printk_safe_flush(void); --extern void printk_safe_flush_on_panic(void); - #else - static inline __printf(1, 0) - int vprintk(const char *s, va_list args) -@@ -268,18 +253,6 @@ static inline void show_regs_print_info( - static inline void dump_stack(void) - { - } -- --static inline void printk_safe_init(void) --{ --} -- --static inline void printk_safe_flush(void) --{ --} -- --static inline void printk_safe_flush_on_panic(void) --{ --} - #endif - - extern int kptr_restrict; ---- a/init/main.c -+++ b/init/main.c -@@ -694,7 +694,6 @@ asmlinkage __visible void __init start_k - boot_init_stack_canary(); - - time_init(); -- printk_safe_init(); - perf_event_init(); - profile_init(); - call_function_init(); ---- a/kernel/kexec_core.c -+++ b/kernel/kexec_core.c -@@ -972,7 +972,6 @@ void crash_kexec(struct pt_regs *regs) - old_cpu = atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, this_cpu); - if (old_cpu == PANIC_CPU_INVALID) { - /* This is the 1st CPU which comes here, so go ahead. */ -- printk_safe_flush_on_panic(); - __crash_kexec(regs); - - /* ---- a/kernel/panic.c -+++ b/kernel/panic.c -@@ -237,7 +237,6 @@ void panic(const char *fmt, ...) - * Bypass the panic_cpu check and call __crash_kexec directly. - */ - if (!_crash_kexec_post_notifiers) { -- printk_safe_flush_on_panic(); - __crash_kexec(NULL); - - /* -@@ -261,8 +260,6 @@ void panic(const char *fmt, ...) - */ - atomic_notifier_call_chain(&panic_notifier_list, 0, buf); - -- /* Call flush even twice. It tries harder with a single online CPU */ -- printk_safe_flush_on_panic(); - kmsg_dump(KMSG_DUMP_PANIC); - - /* ---- a/kernel/printk/Makefile -+++ b/kernel/printk/Makefile -@@ -1,4 +1,3 @@ - # SPDX-License-Identifier: GPL-2.0-only - obj-y = printk.o --obj-$(CONFIG_PRINTK) += printk_safe.o - obj-$(CONFIG_A11Y_BRAILLE_CONSOLE) += braille.o ---- a/kernel/printk/internal.h -+++ b/kernel/printk/internal.h -@@ -20,32 +20,6 @@ int vprintk_store(int facility, int leve - __printf(1, 0) int vprintk_default(const char *fmt, va_list args); - __printf(1, 0) int vprintk_deferred(const char *fmt, va_list args); - __printf(1, 0) int vprintk_func(const char *fmt, va_list args); --void __printk_safe_enter(void); --void __printk_safe_exit(void); -- --#define printk_safe_enter_irqsave(flags) \ -- do { \ -- local_irq_save(flags); \ -- __printk_safe_enter(); \ -- } while (0) -- --#define printk_safe_exit_irqrestore(flags) \ -- do { \ -- __printk_safe_exit(); \ -- local_irq_restore(flags); \ -- } while (0) -- --#define printk_safe_enter_irq() \ -- do { \ -- local_irq_disable(); \ -- __printk_safe_enter(); \ -- } while (0) -- --#define printk_safe_exit_irq() \ -- do { \ -- __printk_safe_exit(); \ -- local_irq_enable(); \ -- } while (0) - - void defer_console_output(void); - -@@ -58,10 +32,10 @@ void defer_console_output(void); - * semaphore and some of console functions (console_unlock()/etc.), so - * printk-safe must preserve the existing local IRQ guarantees. - */ -+#endif /* CONFIG_PRINTK */ -+ - #define printk_safe_enter_irqsave(flags) local_irq_save(flags) - #define printk_safe_exit_irqrestore(flags) local_irq_restore(flags) - - #define printk_safe_enter_irq() local_irq_disable() - #define printk_safe_exit_irq() local_irq_enable() -- --#endif /* CONFIG_PRINTK */ ---- a/kernel/printk/printk.c -+++ b/kernel/printk/printk.c -@@ -1736,13 +1736,6 @@ static bool cont_add(u32 caller_id, int - } - #endif /* 0 */ - --int vprintk_store(int facility, int level, -- const char *dict, size_t dictlen, -- const char *fmt, va_list args) --{ -- return vprintk_emit(facility, level, dict, dictlen, fmt, args); --} -- - /* ring buffer used as memory allocator for temporary sprint buffers */ - DECLARE_STATIC_PRINTKRB(sprint_rb, - ilog2(PRINTK_RECORD_MAX + sizeof(struct prb_entry) + -@@ -1811,6 +1804,11 @@ asmlinkage int vprintk_emit(int facility - } - EXPORT_SYMBOL(vprintk_emit); - -+__printf(1, 0) int vprintk_func(const char *fmt, va_list args) -+{ -+ return vprintk_emit(0, LOGLEVEL_DEFAULT, NULL, 0, fmt, args); -+} -+ - asmlinkage int vprintk(const char *fmt, va_list args) - { - return vprintk_func(fmt, args); -@@ -3211,5 +3209,4 @@ void kmsg_dump_rewind(struct kmsg_dumper - logbuf_unlock_irqrestore(flags); - } - EXPORT_SYMBOL_GPL(kmsg_dump_rewind); -- - #endif ---- a/kernel/printk/printk_safe.c -+++ /dev/null -@@ -1,415 +0,0 @@ --// SPDX-License-Identifier: GPL-2.0-or-later --/* -- * printk_safe.c - Safe printk for printk-deadlock-prone contexts -- */ -- --#include --#include --#include --#include --#include --#include --#include -- --#include "internal.h" -- --/* -- * printk() could not take logbuf_lock in NMI context. Instead, -- * it uses an alternative implementation that temporary stores -- * the strings into a per-CPU buffer. The content of the buffer -- * is later flushed into the main ring buffer via IRQ work. -- * -- * The alternative implementation is chosen transparently -- * by examinig current printk() context mask stored in @printk_context -- * per-CPU variable. -- * -- * The implementation allows to flush the strings also from another CPU. -- * There are situations when we want to make sure that all buffers -- * were handled or when IRQs are blocked. -- */ --static int printk_safe_irq_ready __read_mostly; -- --#define SAFE_LOG_BUF_LEN ((1 << CONFIG_PRINTK_SAFE_LOG_BUF_SHIFT) - \ -- sizeof(atomic_t) - \ -- sizeof(atomic_t) - \ -- sizeof(struct irq_work)) -- --struct printk_safe_seq_buf { -- atomic_t len; /* length of written data */ -- atomic_t message_lost; -- struct irq_work work; /* IRQ work that flushes the buffer */ -- unsigned char buffer[SAFE_LOG_BUF_LEN]; --}; -- --static DEFINE_PER_CPU(struct printk_safe_seq_buf, safe_print_seq); --static DEFINE_PER_CPU(int, printk_context); -- --#ifdef CONFIG_PRINTK_NMI --static DEFINE_PER_CPU(struct printk_safe_seq_buf, nmi_print_seq); --#endif -- --/* Get flushed in a more safe context. */ --static void queue_flush_work(struct printk_safe_seq_buf *s) --{ -- if (printk_safe_irq_ready) -- irq_work_queue(&s->work); --} -- --/* -- * Add a message to per-CPU context-dependent buffer. NMI and printk-safe -- * have dedicated buffers, because otherwise printk-safe preempted by -- * NMI-printk would have overwritten the NMI messages. -- * -- * The messages are flushed from irq work (or from panic()), possibly, -- * from other CPU, concurrently with printk_safe_log_store(). Should this -- * happen, printk_safe_log_store() will notice the buffer->len mismatch -- * and repeat the write. -- */ --static __printf(2, 0) int printk_safe_log_store(struct printk_safe_seq_buf *s, -- const char *fmt, va_list args) --{ -- int add; -- size_t len; -- va_list ap; -- --again: -- len = atomic_read(&s->len); -- -- /* The trailing '\0' is not counted into len. */ -- if (len >= sizeof(s->buffer) - 1) { -- atomic_inc(&s->message_lost); -- queue_flush_work(s); -- return 0; -- } -- -- /* -- * Make sure that all old data have been read before the buffer -- * was reset. This is not needed when we just append data. -- */ -- if (!len) -- smp_rmb(); -- -- va_copy(ap, args); -- add = vscnprintf(s->buffer + len, sizeof(s->buffer) - len, fmt, ap); -- va_end(ap); -- if (!add) -- return 0; -- -- /* -- * Do it once again if the buffer has been flushed in the meantime. -- * Note that atomic_cmpxchg() is an implicit memory barrier that -- * makes sure that the data were written before updating s->len. -- */ -- if (atomic_cmpxchg(&s->len, len, len + add) != len) -- goto again; -- -- queue_flush_work(s); -- return add; --} -- --static inline void printk_safe_flush_line(const char *text, int len) --{ -- /* -- * Avoid any console drivers calls from here, because we may be -- * in NMI or printk_safe context (when in panic). The messages -- * must go only into the ring buffer at this stage. Consoles will -- * get explicitly called later when a crashdump is not generated. -- */ -- printk_deferred("%.*s", len, text); --} -- --/* printk part of the temporary buffer line by line */ --static int printk_safe_flush_buffer(const char *start, size_t len) --{ -- const char *c, *end; -- bool header; -- -- c = start; -- end = start + len; -- header = true; -- -- /* Print line by line. */ -- while (c < end) { -- if (*c == '\n') { -- printk_safe_flush_line(start, c - start + 1); -- start = ++c; -- header = true; -- continue; -- } -- -- /* Handle continuous lines or missing new line. */ -- if ((c + 1 < end) && printk_get_level(c)) { -- if (header) { -- c = printk_skip_level(c); -- continue; -- } -- -- printk_safe_flush_line(start, c - start); -- start = c++; -- header = true; -- continue; -- } -- -- header = false; -- c++; -- } -- -- /* Check if there was a partial line. Ignore pure header. */ -- if (start < end && !header) { -- static const char newline[] = KERN_CONT "\n"; -- -- printk_safe_flush_line(start, end - start); -- printk_safe_flush_line(newline, strlen(newline)); -- } -- -- return len; --} -- --static void report_message_lost(struct printk_safe_seq_buf *s) --{ -- int lost = atomic_xchg(&s->message_lost, 0); -- -- if (lost) -- printk_deferred("Lost %d message(s)!\n", lost); --} -- --/* -- * Flush data from the associated per-CPU buffer. The function -- * can be called either via IRQ work or independently. -- */ --static void __printk_safe_flush(struct irq_work *work) --{ -- static raw_spinlock_t read_lock = -- __RAW_SPIN_LOCK_INITIALIZER(read_lock); -- struct printk_safe_seq_buf *s = -- container_of(work, struct printk_safe_seq_buf, work); -- unsigned long flags; -- size_t len; -- int i; -- -- /* -- * The lock has two functions. First, one reader has to flush all -- * available message to make the lockless synchronization with -- * writers easier. Second, we do not want to mix messages from -- * different CPUs. This is especially important when printing -- * a backtrace. -- */ -- raw_spin_lock_irqsave(&read_lock, flags); -- -- i = 0; --more: -- len = atomic_read(&s->len); -- -- /* -- * This is just a paranoid check that nobody has manipulated -- * the buffer an unexpected way. If we printed something then -- * @len must only increase. Also it should never overflow the -- * buffer size. -- */ -- if ((i && i >= len) || len > sizeof(s->buffer)) { -- const char *msg = "printk_safe_flush: internal error\n"; -- -- printk_safe_flush_line(msg, strlen(msg)); -- len = 0; -- } -- -- if (!len) -- goto out; /* Someone else has already flushed the buffer. */ -- -- /* Make sure that data has been written up to the @len */ -- smp_rmb(); -- i += printk_safe_flush_buffer(s->buffer + i, len - i); -- -- /* -- * Check that nothing has got added in the meantime and truncate -- * the buffer. Note that atomic_cmpxchg() is an implicit memory -- * barrier that makes sure that the data were copied before -- * updating s->len. -- */ -- if (atomic_cmpxchg(&s->len, len, 0) != len) -- goto more; -- --out: -- report_message_lost(s); -- raw_spin_unlock_irqrestore(&read_lock, flags); --} -- --/** -- * printk_safe_flush - flush all per-cpu nmi buffers. -- * -- * The buffers are flushed automatically via IRQ work. This function -- * is useful only when someone wants to be sure that all buffers have -- * been flushed at some point. -- */ --void printk_safe_flush(void) --{ -- int cpu; -- -- for_each_possible_cpu(cpu) { --#ifdef CONFIG_PRINTK_NMI -- __printk_safe_flush(&per_cpu(nmi_print_seq, cpu).work); --#endif -- __printk_safe_flush(&per_cpu(safe_print_seq, cpu).work); -- } --} -- --/** -- * printk_safe_flush_on_panic - flush all per-cpu nmi buffers when the system -- * goes down. -- * -- * Similar to printk_safe_flush() but it can be called even in NMI context when -- * the system goes down. It does the best effort to get NMI messages into -- * the main ring buffer. -- * -- * Note that it could try harder when there is only one CPU online. -- */ --void printk_safe_flush_on_panic(void) --{ -- /* -- * Make sure that we could access the main ring buffer. -- * Do not risk a double release when more CPUs are up. -- */ -- if (raw_spin_is_locked(&logbuf_lock)) { -- if (num_online_cpus() > 1) -- return; -- -- debug_locks_off(); -- raw_spin_lock_init(&logbuf_lock); -- } -- -- printk_safe_flush(); --} -- --#ifdef CONFIG_PRINTK_NMI --/* -- * Safe printk() for NMI context. It uses a per-CPU buffer to -- * store the message. NMIs are not nested, so there is always only -- * one writer running. But the buffer might get flushed from another -- * CPU, so we need to be careful. -- */ --static __printf(1, 0) int vprintk_nmi(const char *fmt, va_list args) --{ -- struct printk_safe_seq_buf *s = this_cpu_ptr(&nmi_print_seq); -- -- return printk_safe_log_store(s, fmt, args); --} -- --void notrace printk_nmi_enter(void) --{ -- this_cpu_or(printk_context, PRINTK_NMI_CONTEXT_MASK); --} -- --void notrace printk_nmi_exit(void) --{ -- this_cpu_and(printk_context, ~PRINTK_NMI_CONTEXT_MASK); --} -- --/* -- * Marks a code that might produce many messages in NMI context -- * and the risk of losing them is more critical than eventual -- * reordering. -- * -- * It has effect only when called in NMI context. Then printk() -- * will try to store the messages into the main logbuf directly -- * and use the per-CPU buffers only as a fallback when the lock -- * is not available. -- */ --void printk_nmi_direct_enter(void) --{ -- if (this_cpu_read(printk_context) & PRINTK_NMI_CONTEXT_MASK) -- this_cpu_or(printk_context, PRINTK_NMI_DIRECT_CONTEXT_MASK); --} -- --void printk_nmi_direct_exit(void) --{ -- this_cpu_and(printk_context, ~PRINTK_NMI_DIRECT_CONTEXT_MASK); --} -- --#else -- --static __printf(1, 0) int vprintk_nmi(const char *fmt, va_list args) --{ -- return 0; --} -- --#endif /* CONFIG_PRINTK_NMI */ -- --/* -- * Lock-less printk(), to avoid deadlocks should the printk() recurse -- * into itself. It uses a per-CPU buffer to store the message, just like -- * NMI. -- */ --static __printf(1, 0) int vprintk_safe(const char *fmt, va_list args) --{ -- struct printk_safe_seq_buf *s = this_cpu_ptr(&safe_print_seq); -- -- return printk_safe_log_store(s, fmt, args); --} -- --/* Can be preempted by NMI. */ --void __printk_safe_enter(void) --{ -- this_cpu_inc(printk_context); --} -- --/* Can be preempted by NMI. */ --void __printk_safe_exit(void) --{ -- this_cpu_dec(printk_context); --} -- --__printf(1, 0) int vprintk_func(const char *fmt, va_list args) --{ -- /* -- * Try to use the main logbuf even in NMI. But avoid calling console -- * drivers that might have their own locks. -- */ -- if ((this_cpu_read(printk_context) & PRINTK_NMI_DIRECT_CONTEXT_MASK) && -- raw_spin_trylock(&logbuf_lock)) { -- int len; -- -- len = vprintk_store(0, LOGLEVEL_DEFAULT, NULL, 0, fmt, args); -- raw_spin_unlock(&logbuf_lock); -- defer_console_output(); -- return len; -- } -- -- /* Use extra buffer in NMI when logbuf_lock is taken or in safe mode. */ -- if (this_cpu_read(printk_context) & PRINTK_NMI_CONTEXT_MASK) -- return vprintk_nmi(fmt, args); -- -- /* Use extra buffer to prevent a recursion deadlock in safe mode. */ -- if (this_cpu_read(printk_context) & PRINTK_SAFE_CONTEXT_MASK) -- return vprintk_safe(fmt, args); -- -- /* No obstacles. */ -- return vprintk_default(fmt, args); --} -- --void __init printk_safe_init(void) --{ -- int cpu; -- -- for_each_possible_cpu(cpu) { -- struct printk_safe_seq_buf *s; -- -- s = &per_cpu(safe_print_seq, cpu); -- init_irq_work(&s->work, __printk_safe_flush); -- --#ifdef CONFIG_PRINTK_NMI -- s = &per_cpu(nmi_print_seq, cpu); -- init_irq_work(&s->work, __printk_safe_flush); --#endif -- } -- -- /* -- * In the highly unlikely event that a NMI were to trigger at -- * this moment. Make sure IRQ work is set up before this -- * variable is set. -- */ -- barrier(); -- printk_safe_irq_ready = 1; -- -- /* Flush pending messages that did not have scheduled IRQ works. */ -- printk_safe_flush(); --} ---- a/kernel/trace/trace.c -+++ b/kernel/trace/trace.c -@@ -8929,7 +8929,6 @@ void ftrace_dump(enum ftrace_dump_mode o - tracing_off(); - - local_irq_save(flags); -- printk_nmi_direct_enter(); - - /* Simulate the iterator */ - trace_init_global_iter(&iter); -@@ -9006,7 +9005,6 @@ void ftrace_dump(enum ftrace_dump_mode o - atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled); - } - atomic_dec(&dump_running); -- printk_nmi_direct_exit(); - local_irq_restore(flags); - } - EXPORT_SYMBOL_GPL(ftrace_dump); ---- a/lib/nmi_backtrace.c -+++ b/lib/nmi_backtrace.c -@@ -75,12 +75,6 @@ void nmi_trigger_cpumask_backtrace(const - touch_softlockup_watchdog(); - } - -- /* -- * Force flush any remote buffers that might be stuck in IRQ context -- * and therefore could not run their irq_work. -- */ -- printk_safe_flush(); -- - clear_bit_unlock(0, &backtrace_flag); - put_cpu(); - } diff --git a/kernel/patches-5.4.x-rt/0029-0012-printk-minimize-console-locking-implementation.patch b/kernel/patches-5.4.x-rt/0029-0012-printk-minimize-console-locking-implementation.patch deleted file mode 100644 index c509bd4e5..000000000 --- a/kernel/patches-5.4.x-rt/0029-0012-printk-minimize-console-locking-implementation.patch +++ /dev/null @@ -1,329 +0,0 @@ -From: John Ogness -Date: Tue, 12 Feb 2019 15:29:50 +0100 -Subject: [PATCH 12/25] printk: minimize console locking implementation - -Since printing of the printk buffer is now handled by the printk -kthread, minimize the console locking functions to just handle -locking of the console. - -NOTE: With this console_flush_on_panic will no longer flush. - -Signed-off-by: John Ogness -Signed-off-by: Sebastian Andrzej Siewior ---- - kernel/printk/printk.c | 255 ------------------------------------------------- - 1 file changed, 1 insertion(+), 254 deletions(-) - ---- a/kernel/printk/printk.c -+++ b/kernel/printk/printk.c -@@ -227,19 +227,7 @@ static int nr_ext_console_drivers; - - static int __down_trylock_console_sem(unsigned long ip) - { -- int lock_failed; -- unsigned long flags; -- -- /* -- * Here and in __up_console_sem() we need to be in safe mode, -- * because spindump/WARN/etc from under console ->lock will -- * deadlock in printk()->down_trylock_console_sem() otherwise. -- */ -- printk_safe_enter_irqsave(flags); -- lock_failed = down_trylock(&console_sem); -- printk_safe_exit_irqrestore(flags); -- -- if (lock_failed) -+ if (down_trylock(&console_sem)) - return 1; - mutex_acquire(&console_lock_dep_map, 0, 1, ip); - return 0; -@@ -248,13 +236,9 @@ static int __down_trylock_console_sem(un - - static void __up_console_sem(unsigned long ip) - { -- unsigned long flags; -- - mutex_release(&console_lock_dep_map, 1, ip); - -- printk_safe_enter_irqsave(flags); - up(&console_sem); -- printk_safe_exit_irqrestore(flags); - } - #define up_console_sem() __up_console_sem(_RET_IP_) - -@@ -1552,82 +1536,6 @@ static void format_text(struct printk_lo - } - - /* -- * Special console_lock variants that help to reduce the risk of soft-lockups. -- * They allow to pass console_lock to another printk() call using a busy wait. -- */ -- --#ifdef CONFIG_LOCKDEP --static struct lockdep_map console_owner_dep_map = { -- .name = "console_owner" --}; --#endif -- --static DEFINE_RAW_SPINLOCK(console_owner_lock); --static struct task_struct *console_owner; --static bool console_waiter; -- --/** -- * console_lock_spinning_enable - mark beginning of code where another -- * thread might safely busy wait -- * -- * This basically converts console_lock into a spinlock. This marks -- * the section where the console_lock owner can not sleep, because -- * there may be a waiter spinning (like a spinlock). Also it must be -- * ready to hand over the lock at the end of the section. -- */ --static void console_lock_spinning_enable(void) --{ -- raw_spin_lock(&console_owner_lock); -- console_owner = current; -- raw_spin_unlock(&console_owner_lock); -- -- /* The waiter may spin on us after setting console_owner */ -- spin_acquire(&console_owner_dep_map, 0, 0, _THIS_IP_); --} -- --/** -- * console_lock_spinning_disable_and_check - mark end of code where another -- * thread was able to busy wait and check if there is a waiter -- * -- * This is called at the end of the section where spinning is allowed. -- * It has two functions. First, it is a signal that it is no longer -- * safe to start busy waiting for the lock. Second, it checks if -- * there is a busy waiter and passes the lock rights to her. -- * -- * Important: Callers lose the lock if there was a busy waiter. -- * They must not touch items synchronized by console_lock -- * in this case. -- * -- * Return: 1 if the lock rights were passed, 0 otherwise. -- */ --static int console_lock_spinning_disable_and_check(void) --{ -- int waiter; -- -- raw_spin_lock(&console_owner_lock); -- waiter = READ_ONCE(console_waiter); -- console_owner = NULL; -- raw_spin_unlock(&console_owner_lock); -- -- if (!waiter) { -- spin_release(&console_owner_dep_map, 1, _THIS_IP_); -- return 0; -- } -- -- /* The waiter is now free to continue */ -- WRITE_ONCE(console_waiter, false); -- -- spin_release(&console_owner_dep_map, 1, _THIS_IP_); -- -- /* -- * Hand off console_lock to waiter. The waiter will perform -- * the up(). After this, the waiter is the console_lock owner. -- */ -- mutex_release(&console_lock_dep_map, 1, _THIS_IP_); -- return 1; --} -- --/* - * Call the console drivers, asking them to write out - * log_buf[start] to log_buf[end - 1]. - * The console_lock must be held. -@@ -1889,8 +1797,6 @@ static ssize_t msg_print_ext_header(char - static ssize_t msg_print_ext_body(char *buf, size_t size, - char *dict, size_t dict_len, - char *text, size_t text_len) { return 0; } --static void console_lock_spinning_enable(void) { } --static int console_lock_spinning_disable_and_check(void) { return 0; } - static void call_console_drivers(const char *ext_text, size_t ext_len, - const char *text, size_t len) {} - static size_t msg_print_text(const struct printk_log *msg, bool syslog, -@@ -2125,35 +2031,6 @@ int is_console_locked(void) - { - return console_locked; - } --EXPORT_SYMBOL(is_console_locked); -- --/* -- * Check if we have any console that is capable of printing while cpu is -- * booting or shutting down. Requires console_sem. -- */ --static int have_callable_console(void) --{ -- struct console *con; -- -- for_each_console(con) -- if ((con->flags & CON_ENABLED) && -- (con->flags & CON_ANYTIME)) -- return 1; -- -- return 0; --} -- --/* -- * Can we actually use the console at this time on this cpu? -- * -- * Console drivers may assume that per-cpu resources have been allocated. So -- * unless they're explicitly marked as being able to cope (CON_ANYTIME) don't -- * call them until this CPU is officially up. -- */ --static inline int can_use_console(void) --{ -- return cpu_online(raw_smp_processor_id()) || have_callable_console(); --} - - /** - * console_unlock - unlock the console system -@@ -2161,147 +2038,17 @@ static inline int can_use_console(void) - * Releases the console_lock which the caller holds on the console system - * and the console driver list. - * -- * While the console_lock was held, console output may have been buffered -- * by printk(). If this is the case, console_unlock(); emits -- * the output prior to releasing the lock. -- * -- * If there is output waiting, we wake /dev/kmsg and syslog() users. -- * - * console_unlock(); may be called from any context. - */ - void console_unlock(void) - { -- static char ext_text[CONSOLE_EXT_LOG_MAX]; -- static char text[LOG_LINE_MAX + PREFIX_MAX]; -- unsigned long flags; -- bool do_cond_resched, retry; -- - if (console_suspended) { - up_console_sem(); - return; - } - -- /* -- * Console drivers are called with interrupts disabled, so -- * @console_may_schedule should be cleared before; however, we may -- * end up dumping a lot of lines, for example, if called from -- * console registration path, and should invoke cond_resched() -- * between lines if allowable. Not doing so can cause a very long -- * scheduling stall on a slow console leading to RCU stall and -- * softlockup warnings which exacerbate the issue with more -- * messages practically incapacitating the system. -- * -- * console_trylock() is not able to detect the preemptive -- * context reliably. Therefore the value must be stored before -- * and cleared after the the "again" goto label. -- */ -- do_cond_resched = console_may_schedule; --again: -- console_may_schedule = 0; -- -- /* -- * We released the console_sem lock, so we need to recheck if -- * cpu is online and (if not) is there at least one CON_ANYTIME -- * console. -- */ -- if (!can_use_console()) { -- console_locked = 0; -- up_console_sem(); -- return; -- } -- -- for (;;) { -- struct printk_log *msg; -- size_t ext_len = 0; -- size_t len; -- -- printk_safe_enter_irqsave(flags); -- raw_spin_lock(&logbuf_lock); -- if (console_seq < log_first_seq) { -- len = sprintf(text, -- "** %llu printk messages dropped **\n", -- log_first_seq - console_seq); -- -- /* messages are gone, move to first one */ -- console_seq = log_first_seq; -- console_idx = log_first_idx; -- } else { -- len = 0; -- } --skip: -- if (console_seq == log_next_seq) -- break; -- -- msg = log_from_idx(console_idx); -- if (suppress_message_printing(msg->level)) { -- /* -- * Skip record we have buffered and already printed -- * directly to the console when we received it, and -- * record that has level above the console loglevel. -- */ -- console_idx = log_next(console_idx); -- console_seq++; -- goto skip; -- } -- -- len += msg_print_text(msg, -- console_msg_format & MSG_FORMAT_SYSLOG, -- printk_time, text + len, sizeof(text) - len); -- if (nr_ext_console_drivers) { -- ext_len = msg_print_ext_header(ext_text, -- sizeof(ext_text), -- msg, console_seq); -- ext_len += msg_print_ext_body(ext_text + ext_len, -- sizeof(ext_text) - ext_len, -- log_dict(msg), msg->dict_len, -- log_text(msg), msg->text_len); -- } -- console_idx = log_next(console_idx); -- console_seq++; -- raw_spin_unlock(&logbuf_lock); -- -- /* -- * While actively printing out messages, if another printk() -- * were to occur on another CPU, it may wait for this one to -- * finish. This task can not be preempted if there is a -- * waiter waiting to take over. -- */ -- console_lock_spinning_enable(); -- -- stop_critical_timings(); /* don't trace print latency */ -- //call_console_drivers(ext_text, ext_len, text, len); -- start_critical_timings(); -- -- if (console_lock_spinning_disable_and_check()) { -- printk_safe_exit_irqrestore(flags); -- return; -- } -- -- printk_safe_exit_irqrestore(flags); -- -- if (do_cond_resched) -- cond_resched(); -- } -- - console_locked = 0; -- -- raw_spin_unlock(&logbuf_lock); -- - up_console_sem(); -- -- /* -- * Someone could have filled up the buffer again, so re-check if there's -- * something to flush. In case we cannot trylock the console_sem again, -- * there's a new owner and the console_unlock() from them will do the -- * flush, no worries. -- */ -- raw_spin_lock(&logbuf_lock); -- retry = console_seq != log_next_seq; -- raw_spin_unlock(&logbuf_lock); -- printk_safe_exit_irqrestore(flags); -- -- if (retry && console_trylock()) -- goto again; - } - EXPORT_SYMBOL(console_unlock); - diff --git a/kernel/patches-5.4.x-rt/0030-0013-printk-track-seq-per-console.patch b/kernel/patches-5.4.x-rt/0030-0013-printk-track-seq-per-console.patch deleted file mode 100644 index 41a6f3d94..000000000 --- a/kernel/patches-5.4.x-rt/0030-0013-printk-track-seq-per-console.patch +++ /dev/null @@ -1,92 +0,0 @@ -From: John Ogness -Date: Tue, 12 Feb 2019 15:29:51 +0100 -Subject: [PATCH 13/25] printk: track seq per console - -Allow each console to track which seq record was last printed. This -simplifies identifying dropped records. - -Signed-off-by: John Ogness -Signed-off-by: Sebastian Andrzej Siewior ---- - include/linux/console.h | 1 + - kernel/printk/printk.c | 30 +++++++++++++++++++++++++++--- - 2 files changed, 28 insertions(+), 3 deletions(-) - ---- a/include/linux/console.h -+++ b/include/linux/console.h -@@ -153,6 +153,7 @@ struct console { - short flags; - short index; - int cflag; -+ unsigned long printk_seq; - void *data; - struct console *next; - }; ---- a/kernel/printk/printk.c -+++ b/kernel/printk/printk.c -@@ -1507,6 +1507,16 @@ SYSCALL_DEFINE3(syslog, int, type, char - return do_syslog(type, buf, len, SYSLOG_FROM_READER); - } - -+static void print_console_dropped(struct console *con, u64 count) -+{ -+ char text[64]; -+ int len; -+ -+ len = sprintf(text, "** %llu printk message%s dropped **\n", -+ count, count > 1 ? "s" : ""); -+ con->write(con, text, len); -+} -+ - static void format_text(struct printk_log *msg, u64 seq, - char *ext_text, size_t *ext_len, - char *text, size_t *len, bool time) -@@ -1540,7 +1550,7 @@ static void format_text(struct printk_lo - * log_buf[start] to log_buf[end - 1]. - * The console_lock must be held. - */ --static void call_console_drivers(const char *ext_text, size_t ext_len, -+static void call_console_drivers(u64 seq, const char *ext_text, size_t ext_len, - const char *text, size_t len) - { - struct console *con; -@@ -1558,6 +1568,19 @@ static void call_console_drivers(const c - if (!cpu_online(raw_smp_processor_id()) && - !(con->flags & CON_ANYTIME)) - continue; -+ if (con->printk_seq >= seq) -+ continue; -+ -+ con->printk_seq++; -+ if (con->printk_seq < seq) { -+ print_console_dropped(con, seq - con->printk_seq); -+ con->printk_seq = seq; -+ } -+ -+ /* for supressed messages, only seq is updated */ -+ if (len == 0 && ext_len == 0) -+ continue; -+ - if (con->flags & CON_EXTENDED) - con->write(con, ext_text, ext_len); - else -@@ -1797,7 +1820,7 @@ static ssize_t msg_print_ext_header(char - static ssize_t msg_print_ext_body(char *buf, size_t size, - char *dict, size_t dict_len, - char *text, size_t text_len) { return 0; } --static void call_console_drivers(const char *ext_text, size_t ext_len, -+static void call_console_drivers(u64 seq, const char *ext_text, size_t ext_len, - const char *text, size_t len) {} - static size_t msg_print_text(const struct printk_log *msg, bool syslog, - bool time, char *buf, size_t size) { return 0; } -@@ -2550,8 +2573,9 @@ static int printk_kthread_func(void *dat - &len, printk_time); - - console_lock(); -+ call_console_drivers(master_seq, ext_text, -+ ext_len, text, len); - if (len > 0 || ext_len > 0) { -- call_console_drivers(ext_text, ext_len, text, len); - boot_delay_msec(msg->level); - printk_delay(); - } diff --git a/kernel/patches-5.4.x-rt/0031-0014-printk-do-boot_delay_msec-inside-printk_delay.patch b/kernel/patches-5.4.x-rt/0031-0014-printk-do-boot_delay_msec-inside-printk_delay.patch deleted file mode 100644 index 0a12b5135..000000000 --- a/kernel/patches-5.4.x-rt/0031-0014-printk-do-boot_delay_msec-inside-printk_delay.patch +++ /dev/null @@ -1,71 +0,0 @@ -From: John Ogness -Date: Tue, 12 Feb 2019 15:29:52 +0100 -Subject: [PATCH 14/25] printk: do boot_delay_msec inside printk_delay - -Both functions needed to be called one after the other, so just -integrate boot_delay_msec into printk_delay for simplification. - -Signed-off-by: John Ogness -Signed-off-by: Sebastian Andrzej Siewior ---- - kernel/printk/printk.c | 35 +++++++++++++++++------------------ - 1 file changed, 17 insertions(+), 18 deletions(-) - ---- a/kernel/printk/printk.c -+++ b/kernel/printk/printk.c -@@ -1507,6 +1507,21 @@ SYSCALL_DEFINE3(syslog, int, type, char - return do_syslog(type, buf, len, SYSLOG_FROM_READER); - } - -+int printk_delay_msec __read_mostly; -+ -+static inline void printk_delay(int level) -+{ -+ boot_delay_msec(level); -+ if (unlikely(printk_delay_msec)) { -+ int m = printk_delay_msec; -+ -+ while (m--) { -+ mdelay(1); -+ touch_nmi_watchdog(); -+ } -+ } -+} -+ - static void print_console_dropped(struct console *con, u64 count) - { - char text[64]; -@@ -1588,20 +1603,6 @@ static void call_console_drivers(u64 seq - } - } - --int printk_delay_msec __read_mostly; -- --static inline void printk_delay(void) --{ -- if (unlikely(printk_delay_msec)) { -- int m = printk_delay_msec; -- -- while (m--) { -- mdelay(1); -- touch_nmi_watchdog(); -- } -- } --} -- - static inline u32 printk_caller_id(void) - { - return in_task() ? task_pid_nr(current) : -@@ -2575,10 +2576,8 @@ static int printk_kthread_func(void *dat - console_lock(); - call_console_drivers(master_seq, ext_text, - ext_len, text, len); -- if (len > 0 || ext_len > 0) { -- boot_delay_msec(msg->level); -- printk_delay(); -- } -+ if (len > 0 || ext_len > 0) -+ printk_delay(msg->level); - console_unlock(); - } - diff --git a/kernel/patches-5.4.x-rt/0032-0015-printk-print-history-for-new-consoles.patch b/kernel/patches-5.4.x-rt/0032-0015-printk-print-history-for-new-consoles.patch deleted file mode 100644 index 1b222c55b..000000000 --- a/kernel/patches-5.4.x-rt/0032-0015-printk-print-history-for-new-consoles.patch +++ /dev/null @@ -1,118 +0,0 @@ -From: John Ogness -Date: Tue, 12 Feb 2019 15:29:53 +0100 -Subject: [PATCH 15/25] printk: print history for new consoles - -When new consoles register, they currently print how many messages -they have missed. However, many (or all) of those messages may still -be in the ring buffer. Add functionality to print as much of the -history as available. This is a clean replacement of the old -exclusive console hack. - -Signed-off-by: John Ogness -Signed-off-by: Sebastian Andrzej Siewior ---- - include/linux/console.h | 1 - kernel/printk/printk.c | 75 ++++++++++++++++++++++++++++++++++++++++++++++++ - 2 files changed, 76 insertions(+) - ---- a/include/linux/console.h -+++ b/include/linux/console.h -@@ -154,6 +154,7 @@ struct console { - short index; - int cflag; - unsigned long printk_seq; -+ int wrote_history; - void *data; - struct console *next; - }; ---- a/kernel/printk/printk.c -+++ b/kernel/printk/printk.c -@@ -1560,6 +1560,77 @@ static void format_text(struct printk_lo - } - } - -+static void printk_write_history(struct console *con, u64 master_seq) -+{ -+ struct prb_iterator iter; -+ bool time = printk_time; -+ static char *ext_text; -+ static char *text; -+ static char *buf; -+ u64 seq; -+ -+ ext_text = kmalloc(CONSOLE_EXT_LOG_MAX, GFP_KERNEL); -+ text = kmalloc(PRINTK_SPRINT_MAX, GFP_KERNEL); -+ buf = kmalloc(PRINTK_RECORD_MAX, GFP_KERNEL); -+ if (!ext_text || !text || !buf) -+ return; -+ -+ if (!(con->flags & CON_ENABLED)) -+ goto out; -+ -+ if (!con->write) -+ goto out; -+ -+ if (!cpu_online(raw_smp_processor_id()) && -+ !(con->flags & CON_ANYTIME)) -+ goto out; -+ -+ prb_iter_init(&iter, &printk_rb, NULL); -+ -+ for (;;) { -+ struct printk_log *msg; -+ size_t ext_len; -+ size_t len; -+ int ret; -+ -+ ret = prb_iter_next(&iter, buf, PRINTK_RECORD_MAX, &seq); -+ if (ret == 0) { -+ break; -+ } else if (ret < 0) { -+ prb_iter_init(&iter, &printk_rb, NULL); -+ continue; -+ } -+ -+ if (seq > master_seq) -+ break; -+ -+ con->printk_seq++; -+ if (con->printk_seq < seq) { -+ print_console_dropped(con, seq - con->printk_seq); -+ con->printk_seq = seq; -+ } -+ -+ msg = (struct printk_log *)buf; -+ format_text(msg, master_seq, ext_text, &ext_len, text, -+ &len, time); -+ -+ if (len == 0 && ext_len == 0) -+ continue; -+ -+ if (con->flags & CON_EXTENDED) -+ con->write(con, ext_text, ext_len); -+ else -+ con->write(con, text, len); -+ -+ printk_delay(msg->level); -+ } -+out: -+ con->wrote_history = 1; -+ kfree(ext_text); -+ kfree(text); -+ kfree(buf); -+} -+ - /* - * Call the console drivers, asking them to write out - * log_buf[start] to log_buf[end - 1]. -@@ -1578,6 +1649,10 @@ static void call_console_drivers(u64 seq - for_each_console(con) { - if (!(con->flags & CON_ENABLED)) - continue; -+ if (!con->wrote_history) { -+ printk_write_history(con, seq); -+ continue; -+ } - if (!con->write) - continue; - if (!cpu_online(raw_smp_processor_id()) && diff --git a/kernel/patches-5.4.x-rt/0033-0016-printk-implement-CON_PRINTBUFFER.patch b/kernel/patches-5.4.x-rt/0033-0016-printk-implement-CON_PRINTBUFFER.patch deleted file mode 100644 index ed15f6624..000000000 --- a/kernel/patches-5.4.x-rt/0033-0016-printk-implement-CON_PRINTBUFFER.patch +++ /dev/null @@ -1,91 +0,0 @@ -From: John Ogness -Date: Tue, 12 Feb 2019 15:29:54 +0100 -Subject: [PATCH 16/25] printk: implement CON_PRINTBUFFER - -If the CON_PRINTBUFFER flag is not set, do not replay the history -for that console. - -Signed-off-by: John Ogness -Signed-off-by: Sebastian Andrzej Siewior ---- - kernel/printk/printk.c | 34 ++++++---------------------------- - 1 file changed, 6 insertions(+), 28 deletions(-) - ---- a/kernel/printk/printk.c -+++ b/kernel/printk/printk.c -@@ -419,10 +419,6 @@ static u32 log_first_idx; - static u64 log_next_seq; - static u32 log_next_idx; - --/* the next printk record to write to the console */ --static u64 console_seq; --static u32 console_idx; -- - /* the next printk record to read after the last 'clear' command */ - static u64 clear_seq; - static u32 clear_idx; -@@ -1650,8 +1646,12 @@ static void call_console_drivers(u64 seq - if (!(con->flags & CON_ENABLED)) - continue; - if (!con->wrote_history) { -- printk_write_history(con, seq); -- continue; -+ if (con->flags & CON_PRINTBUFFER) { -+ printk_write_history(con, seq); -+ continue; -+ } -+ con->wrote_history = 1; -+ con->printk_seq = seq - 1; - } - if (!con->write) - continue; -@@ -1881,8 +1881,6 @@ EXPORT_SYMBOL(printk); - - static u64 syslog_seq; - static u32 syslog_idx; --static u64 console_seq; --static u32 console_idx; - static u64 log_first_seq; - static u32 log_first_idx; - static u64 log_next_seq; -@@ -2206,15 +2204,6 @@ void console_flush_on_panic(enum con_flu - */ - console_trylock(); - console_may_schedule = 0; -- -- if (mode == CONSOLE_REPLAY_ALL) { -- unsigned long flags; -- -- logbuf_lock_irqsave(flags); -- console_seq = log_first_seq; -- console_idx = log_first_idx; -- logbuf_unlock_irqrestore(flags); -- } - console_unlock(); - } - -@@ -2293,7 +2282,6 @@ early_param("keep_bootcon", keep_bootcon - void register_console(struct console *newcon) - { - int i; -- unsigned long flags; - struct console *bcon = NULL; - struct console_cmdline *c; - static bool has_preferred; -@@ -2409,16 +2397,6 @@ void register_console(struct console *ne - if (newcon->flags & CON_EXTENDED) - nr_ext_console_drivers++; - -- if (newcon->flags & CON_PRINTBUFFER) { -- /* -- * console_unlock(); will print out the buffered messages -- * for us. -- */ -- logbuf_lock_irqsave(flags); -- console_seq = syslog_seq; -- console_idx = syslog_idx; -- logbuf_unlock_irqrestore(flags); -- } - console_unlock(); - console_sysfs_notify(); - diff --git a/kernel/patches-5.4.x-rt/0034-0017-printk-add-processor-number-to-output.patch b/kernel/patches-5.4.x-rt/0034-0017-printk-add-processor-number-to-output.patch deleted file mode 100644 index d46699c6c..000000000 --- a/kernel/patches-5.4.x-rt/0034-0017-printk-add-processor-number-to-output.patch +++ /dev/null @@ -1,99 +0,0 @@ -From: John Ogness -Date: Tue, 12 Feb 2019 15:29:55 +0100 -Subject: [PATCH 17/25] printk: add processor number to output - -It can be difficult to sort printk out if multiple processors are -printing simultaneously. Add the processor number to the printk -output to allow the messages to be sorted. - -Signed-off-by: John Ogness -Signed-off-by: Sebastian Andrzej Siewior ---- - kernel/printk/printk.c | 19 +++++++++++++++---- - 1 file changed, 15 insertions(+), 4 deletions(-) - ---- a/kernel/printk/printk.c -+++ b/kernel/printk/printk.c -@@ -348,6 +348,7 @@ enum log_flags { - - struct printk_log { - u64 ts_nsec; /* timestamp in nanoseconds */ -+ u16 cpu; /* cpu that generated record */ - u16 len; /* length of entire record */ - u16 text_len; /* length of text buffer */ - u16 dict_len; /* length of dictionary buffer */ -@@ -499,7 +500,7 @@ static u32 log_next(u32 idx) - - /* insert record into the buffer, discard old ones, update heads */ - static int log_store(u32 caller_id, int facility, int level, -- enum log_flags flags, u64 ts_nsec, -+ enum log_flags flags, u64 ts_nsec, u16 cpu, - const char *dict, u16 dict_len, - const char *text, u16 text_len) - { -@@ -533,6 +534,7 @@ static int log_store(u32 caller_id, int - #ifdef CONFIG_PRINTK_CALLER - msg->caller_id = caller_id; - #endif -+ msg->cpu = cpu; - msg->len = size; - - /* insert message */ -@@ -606,9 +608,9 @@ static ssize_t msg_print_ext_header(char - - do_div(ts_usec, 1000); - -- return scnprintf(buf, size, "%u,%llu,%llu,%c%s;", -+ return scnprintf(buf, size, "%u,%llu,%llu,%c%s,%hu;", - (msg->facility << 3) | msg->level, seq, ts_usec, -- msg->flags & LOG_CONT ? 'c' : '-', caller); -+ msg->flags & LOG_CONT ? 'c' : '-', caller, msg->cpu); - } - - static ssize_t msg_print_ext_body(char *buf, size_t size, -@@ -1142,6 +1144,11 @@ static inline void boot_delay_msec(int l - static bool printk_time = IS_ENABLED(CONFIG_PRINTK_TIME); - module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR); - -+static size_t print_cpu(u16 cpu, char *buf) -+{ -+ return sprintf(buf, "%03hu: ", cpu); -+} -+ - static size_t print_syslog(unsigned int level, char *buf) - { - return sprintf(buf, "<%u>", level); -@@ -1185,6 +1192,7 @@ static size_t print_prefix(const struct - buf[len++] = ' '; - buf[len] = '\0'; - } -+ len += print_cpu(msg->cpu, buf + len); - - return len; - } -@@ -1760,6 +1768,7 @@ asmlinkage int vprintk_emit(int facility - u64 ts_nsec; - char *text; - char *rbuf; -+ int cpu; - - ts_nsec = local_clock(); - -@@ -1769,6 +1778,8 @@ asmlinkage int vprintk_emit(int facility - return printed_len; - } - -+ cpu = raw_smp_processor_id(); -+ - text = rbuf; - text_len = vscnprintf(text, PRINTK_SPRINT_MAX, fmt, args); - -@@ -1803,7 +1814,7 @@ asmlinkage int vprintk_emit(int facility - if (dict) - lflags |= LOG_NEWLINE; - -- printed_len = log_store(caller_id, facility, level, lflags, ts_nsec, -+ printed_len = log_store(caller_id, facility, level, lflags, ts_nsec, cpu, - dict, dictlen, text, text_len); - - prb_commit(&h); diff --git a/kernel/patches-5.4.x-rt/0035-0018-console-add-write_atomic-interface.patch b/kernel/patches-5.4.x-rt/0035-0018-console-add-write_atomic-interface.patch deleted file mode 100644 index 6de18c0d8..000000000 --- a/kernel/patches-5.4.x-rt/0035-0018-console-add-write_atomic-interface.patch +++ /dev/null @@ -1,64 +0,0 @@ -From: John Ogness -Date: Tue, 12 Feb 2019 15:29:56 +0100 -Subject: [PATCH 18/25] console: add write_atomic interface - -Add a write_atomic callback to the console. This is an optional -function for console drivers. The function must be atomic (including -NMI safe) for writing to the console. - -Console drivers must still implement the write callback. The -write_atomic callback will only be used for emergency messages. - -Creating an NMI safe write_atomic that must synchronize with write -requires a careful implementation of the console driver. To aid with -the implementation, a set of console_atomic_* functions are provided: - - void console_atomic_lock(unsigned int *flags); - void console_atomic_unlock(unsigned int flags); - -These functions synchronize using the processor-reentrant cpu lock of -the printk buffer. - -Signed-off-by: John Ogness -Signed-off-by: Sebastian Andrzej Siewior ---- - include/linux/console.h | 4 ++++ - kernel/printk/printk.c | 12 ++++++++++++ - 2 files changed, 16 insertions(+) - ---- a/include/linux/console.h -+++ b/include/linux/console.h -@@ -145,6 +145,7 @@ static inline int con_debug_leave(void) - struct console { - char name[16]; - void (*write)(struct console *, const char *, unsigned); -+ void (*write_atomic)(struct console *, const char *, unsigned); - int (*read)(struct console *, char *, unsigned); - struct tty_driver *(*device)(struct console *, int *); - void (*unblank)(void); -@@ -236,4 +237,7 @@ extern void console_init(void); - void dummycon_register_output_notifier(struct notifier_block *nb); - void dummycon_unregister_output_notifier(struct notifier_block *nb); - -+extern void console_atomic_lock(unsigned int *flags); -+extern void console_atomic_unlock(unsigned int flags); -+ - #endif /* _LINUX_CONSOLE_H */ ---- a/kernel/printk/printk.c -+++ b/kernel/printk/printk.c -@@ -3044,3 +3044,15 @@ void kmsg_dump_rewind(struct kmsg_dumper - } - EXPORT_SYMBOL_GPL(kmsg_dump_rewind); - #endif -+ -+void console_atomic_lock(unsigned int *flags) -+{ -+ prb_lock(&printk_cpulock, flags); -+} -+EXPORT_SYMBOL(console_atomic_lock); -+ -+void console_atomic_unlock(unsigned int flags) -+{ -+ prb_unlock(&printk_cpulock, flags); -+} -+EXPORT_SYMBOL(console_atomic_unlock); diff --git a/kernel/patches-5.4.x-rt/0036-0019-printk-introduce-emergency-messages.patch b/kernel/patches-5.4.x-rt/0036-0019-printk-introduce-emergency-messages.patch deleted file mode 100644 index 21e12d3a7..000000000 --- a/kernel/patches-5.4.x-rt/0036-0019-printk-introduce-emergency-messages.patch +++ /dev/null @@ -1,272 +0,0 @@ -From: John Ogness -Date: Tue, 12 Feb 2019 15:29:57 +0100 -Subject: [PATCH 19/25] printk: introduce emergency messages - -Console messages are generally either critical or non-critical. -Critical messages are messages such as crashes or sysrq output. -Critical messages should never be lost because generally they provide -important debugging information. - -Since all console messages are output via a fully preemptible printk -kernel thread, it is possible that messages are not output because -that thread cannot be scheduled (BUG in scheduler, run-away RT task, -etc). - -To allow critical messages to be output independent of the -schedulability of the printk task, introduce an emergency mechanism -that _immediately_ outputs the message to the consoles. To avoid -possible unbounded latency issues, the emergency mechanism only -outputs the printk line provided by the caller and ignores any -pending messages in the log buffer. - -Critical messages are identified as messages (by default) with log -level LOGLEVEL_WARNING or more critical. This is configurable via the -kernel option CONSOLE_LOGLEVEL_EMERGENCY. - -Any messages output as emergency messages are skipped by the printk -thread on those consoles that output the emergency message. - -In order for a console driver to support emergency messages, the -write_atomic function must be implemented by the driver. If not -implemented, the emergency messages are handled like all other -messages and are printed by the printk thread. - -Signed-off-by: John Ogness -Signed-off-by: Sebastian Andrzej Siewior ---- - include/linux/printk.h | 2 - kernel/printk/printk.c | 111 ++++++++++++++++++++++++++++++++++++++++++++++--- - lib/Kconfig.debug | 17 +++++++ - 3 files changed, 124 insertions(+), 6 deletions(-) - ---- a/include/linux/printk.h -+++ b/include/linux/printk.h -@@ -58,6 +58,7 @@ static inline const char *printk_skip_he - */ - #define CONSOLE_LOGLEVEL_DEFAULT CONFIG_CONSOLE_LOGLEVEL_DEFAULT - #define CONSOLE_LOGLEVEL_QUIET CONFIG_CONSOLE_LOGLEVEL_QUIET -+#define CONSOLE_LOGLEVEL_EMERGENCY CONFIG_CONSOLE_LOGLEVEL_EMERGENCY - - extern int console_printk[]; - -@@ -65,6 +66,7 @@ extern int console_printk[]; - #define default_message_loglevel (console_printk[1]) - #define minimum_console_loglevel (console_printk[2]) - #define default_console_loglevel (console_printk[3]) -+#define emergency_console_loglevel (console_printk[4]) - - static inline void console_silent(void) - { ---- a/kernel/printk/printk.c -+++ b/kernel/printk/printk.c -@@ -46,6 +46,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -62,11 +63,12 @@ - #include "braille.h" - #include "internal.h" - --int console_printk[4] = { -+int console_printk[5] = { - CONSOLE_LOGLEVEL_DEFAULT, /* console_loglevel */ - MESSAGE_LOGLEVEL_DEFAULT, /* default_message_loglevel */ - CONSOLE_LOGLEVEL_MIN, /* minimum_console_loglevel */ - CONSOLE_LOGLEVEL_DEFAULT, /* default_console_loglevel */ -+ CONSOLE_LOGLEVEL_EMERGENCY, /* emergency_console_loglevel */ - }; - EXPORT_SYMBOL_GPL(console_printk); - -@@ -498,6 +500,9 @@ static u32 log_next(u32 idx) - return idx + msg->len; - } - -+static void printk_emergency(char *buffer, int level, u64 ts_nsec, u16 cpu, -+ char *text, u16 text_len); -+ - /* insert record into the buffer, discard old ones, update heads */ - static int log_store(u32 caller_id, int facility, int level, - enum log_flags flags, u64 ts_nsec, u16 cpu, -@@ -1641,7 +1646,7 @@ static void printk_write_history(struct - * The console_lock must be held. - */ - static void call_console_drivers(u64 seq, const char *ext_text, size_t ext_len, -- const char *text, size_t len) -+ const char *text, size_t len, int level) - { - struct console *con; - -@@ -1661,6 +1666,18 @@ static void call_console_drivers(u64 seq - con->wrote_history = 1; - con->printk_seq = seq - 1; - } -+ if (con->write_atomic && level < emergency_console_loglevel) { -+ /* skip emergency messages, already printed */ -+ if (con->printk_seq < seq) -+ con->printk_seq = seq; -+ continue; -+ } -+ if (con->flags & CON_BOOT) { -+ /* skip emergency messages, already printed */ -+ if (con->printk_seq < seq) -+ con->printk_seq = seq; -+ continue; -+ } - if (!con->write) - continue; - if (!cpu_online(raw_smp_processor_id()) && -@@ -1780,8 +1797,12 @@ asmlinkage int vprintk_emit(int facility - - cpu = raw_smp_processor_id(); - -- text = rbuf; -- text_len = vscnprintf(text, PRINTK_SPRINT_MAX, fmt, args); -+ /* -+ * If this turns out to be an emergency message, there -+ * may need to be a prefix added. Leave room for it. -+ */ -+ text = rbuf + PREFIX_MAX; -+ text_len = vscnprintf(text, PRINTK_SPRINT_MAX - PREFIX_MAX, fmt, args); - - /* strip and flag a trailing newline */ - if (text_len && text[text_len-1] == '\n') { -@@ -1814,6 +1835,14 @@ asmlinkage int vprintk_emit(int facility - if (dict) - lflags |= LOG_NEWLINE; - -+ /* -+ * NOTE: -+ * - rbuf points to beginning of allocated buffer -+ * - text points to beginning of text -+ * - there is room before text for prefix -+ */ -+ printk_emergency(rbuf, level, ts_nsec, cpu, text, text_len); -+ - printed_len = log_store(caller_id, facility, level, lflags, ts_nsec, cpu, - dict, dictlen, text, text_len); - -@@ -1906,7 +1935,7 @@ static ssize_t msg_print_ext_body(char * - char *dict, size_t dict_len, - char *text, size_t text_len) { return 0; } - static void call_console_drivers(u64 seq, const char *ext_text, size_t ext_len, -- const char *text, size_t len) {} -+ const char *text, size_t len, int level) {} - static size_t msg_print_text(const struct printk_log *msg, bool syslog, - bool time, char *buf, size_t size) { return 0; } - static bool suppress_message_printing(int level) { return false; } -@@ -2639,7 +2668,7 @@ static int printk_kthread_func(void *dat - - console_lock(); - call_console_drivers(master_seq, ext_text, -- ext_len, text, len); -+ ext_len, text, len, msg->level); - if (len > 0 || ext_len > 0) - printk_delay(msg->level); - console_unlock(); -@@ -3043,6 +3072,76 @@ void kmsg_dump_rewind(struct kmsg_dumper - logbuf_unlock_irqrestore(flags); - } - EXPORT_SYMBOL_GPL(kmsg_dump_rewind); -+ -+static bool console_can_emergency(int level) -+{ -+ struct console *con; -+ -+ for_each_console(con) { -+ if (!(con->flags & CON_ENABLED)) -+ continue; -+ if (con->write_atomic && level < emergency_console_loglevel) -+ return true; -+ if (con->write && (con->flags & CON_BOOT)) -+ return true; -+ } -+ return false; -+} -+ -+static void call_emergency_console_drivers(int level, const char *text, -+ size_t text_len) -+{ -+ struct console *con; -+ -+ for_each_console(con) { -+ if (!(con->flags & CON_ENABLED)) -+ continue; -+ if (con->write_atomic && level < emergency_console_loglevel) { -+ con->write_atomic(con, text, text_len); -+ continue; -+ } -+ if (con->write && (con->flags & CON_BOOT)) { -+ con->write(con, text, text_len); -+ continue; -+ } -+ } -+} -+ -+static void printk_emergency(char *buffer, int level, u64 ts_nsec, u16 cpu, -+ char *text, u16 text_len) -+{ -+ struct printk_log msg; -+ size_t prefix_len; -+ -+ if (!console_can_emergency(level)) -+ return; -+ -+ msg.level = level; -+ msg.ts_nsec = ts_nsec; -+ msg.cpu = cpu; -+ msg.facility = 0; -+ -+ /* "text" must have PREFIX_MAX preceding bytes available */ -+ -+ prefix_len = print_prefix(&msg, -+ console_msg_format & MSG_FORMAT_SYSLOG, -+ printk_time, buffer); -+ /* move the prefix forward to the beginning of the message text */ -+ text -= prefix_len; -+ memmove(text, buffer, prefix_len); -+ text_len += prefix_len; -+ -+ text[text_len++] = '\n'; -+ -+ call_emergency_console_drivers(level, text, text_len); -+ -+ touch_softlockup_watchdog_sync(); -+ clocksource_touch_watchdog(); -+ rcu_cpu_stall_reset(); -+ touch_nmi_watchdog(); -+ -+ printk_delay(level); -+} - #endif - - void console_atomic_lock(unsigned int *flags) ---- a/lib/Kconfig.debug -+++ b/lib/Kconfig.debug -@@ -61,6 +61,23 @@ config CONSOLE_LOGLEVEL_QUIET - will be used as the loglevel. IOW passing "quiet" will be the - equivalent of passing "loglevel=" - -+config CONSOLE_LOGLEVEL_EMERGENCY -+ int "Emergency console loglevel (1-15)" -+ range 1 15 -+ default "5" -+ help -+ The loglevel to determine if a console message is an emergency -+ message. -+ -+ If supported by the console driver, emergency messages will be -+ flushed to the console immediately. This can cause significant system -+ latencies so the value should be set such that only significant -+ messages are classified as emergency messages. -+ -+ Setting a default here is equivalent to passing in -+ emergency_loglevel= in the kernel bootargs. emergency_loglevel= -+ continues to override whatever value is specified here as well. -+ - config MESSAGE_LOGLEVEL_DEFAULT - int "Default message log level (1-7)" - range 1 7 diff --git a/kernel/patches-5.4.x-rt/0038-0021-printk-implement-KERN_CONT.patch b/kernel/patches-5.4.x-rt/0038-0021-printk-implement-KERN_CONT.patch deleted file mode 100644 index f2c03f9f2..000000000 --- a/kernel/patches-5.4.x-rt/0038-0021-printk-implement-KERN_CONT.patch +++ /dev/null @@ -1,132 +0,0 @@ -From: John Ogness -Date: Tue, 12 Feb 2019 15:29:59 +0100 -Subject: [PATCH 21/25] printk: implement KERN_CONT - -Implement KERN_CONT based on the printing CPU rather than on the -printing task. As long as the KERN_CONT messages are coming from the -same CPU and no non-KERN_CONT messages come, the messages are assumed -to belong to each other. - -Signed-off-by: John Ogness -Signed-off-by: Sebastian Andrzej Siewior ---- - kernel/printk/printk.c | 65 +++++++++++++++++++++++++++---------------------- - 1 file changed, 37 insertions(+), 28 deletions(-) - ---- a/kernel/printk/printk.c -+++ b/kernel/printk/printk.c -@@ -1709,8 +1709,6 @@ static inline u32 printk_caller_id(void) - 0x80000000 + raw_smp_processor_id(); - } - --/* FIXME: no support for LOG_CONT */ --#if 0 - /* - * Continuation lines are buffered, and not committed to the record buffer - * until the line is complete, or a race forces it. The line fragments -@@ -1721,52 +1719,55 @@ static struct cont { - char buf[LOG_LINE_MAX]; - size_t len; /* length == 0 means unused buffer */ - u32 caller_id; /* printk_caller_id() of first print */ -+ int cpu_owner; /* cpu of first print */ - u64 ts_nsec; /* time of first print */ - u8 level; /* log level of first message */ - u8 facility; /* log facility of first message */ - enum log_flags flags; /* prefix, newline flags */ --} cont; -+} cont[2]; - --static void cont_flush(void) -+static void cont_flush(int ctx) - { -- if (cont.len == 0) -+ struct cont *c = &cont[ctx]; -+ -+ if (c->len == 0) - return; - -- log_store(cont.caller_id, cont.facility, cont.level, cont.flags, -- cont.ts_nsec, NULL, 0, cont.buf, cont.len); -- cont.len = 0; -+ log_store(c->caller_id, c->facility, c->level, c->flags, -+ c->ts_nsec, c->cpu_owner, NULL, 0, c->buf, c->len); -+ c->len = 0; - } - --static bool cont_add(u32 caller_id, int facility, int level, -+static void cont_add(int ctx, int cpu, u32 caller_id, int facility, int level, - enum log_flags flags, const char *text, size_t len) - { -+ struct cont *c = &cont[ctx]; -+ -+ if (cpu != c->cpu_owner || !(flags & LOG_CONT)) -+ cont_flush(ctx); -+ - /* If the line gets too long, split it up in separate records. */ -- if (cont.len + len > sizeof(cont.buf)) { -- cont_flush(); -- return false; -- } -+ while (c->len + len > sizeof(c->buf)) -+ cont_flush(ctx); - -- if (!cont.len) { -- cont.facility = facility; -- cont.level = level; -- cont.caller_id = caller_id; -- cont.ts_nsec = local_clock(); -- cont.flags = flags; -+ if (!c->len) { -+ c->facility = facility; -+ c->level = level; -+ c->caller_id = caller_id; -+ c->ts_nsec = local_clock(); -+ c->flags = flags; -+ c->cpu_owner = cpu; - } - -- memcpy(cont.buf + cont.len, text, len); -- cont.len += len; -+ memcpy(c->buf + c->len, text, len); -+ c->len += len; - - // The original flags come from the first line, - // but later continuations can add a newline. - if (flags & LOG_NEWLINE) { -- cont.flags |= LOG_NEWLINE; -- cont_flush(); -+ c->flags |= LOG_NEWLINE; - } -- -- return true; - } --#endif /* 0 */ - - /* ring buffer used as memory allocator for temporary sprint buffers */ - DECLARE_STATIC_PRINTKRB(sprint_rb, -@@ -1778,6 +1779,7 @@ asmlinkage int vprintk_emit(int facility - const char *fmt, va_list args) - { - const u32 caller_id = printk_caller_id(); -+ int ctx = !!in_nmi(); - enum log_flags lflags = 0; - int printed_len = 0; - struct prb_handle h; -@@ -1843,8 +1845,15 @@ asmlinkage int vprintk_emit(int facility - */ - printk_emergency(rbuf, level, ts_nsec, cpu, text, text_len); - -- printed_len = log_store(caller_id, facility, level, lflags, ts_nsec, cpu, -- dict, dictlen, text, text_len); -+ if ((lflags & LOG_CONT) || !(lflags & LOG_NEWLINE)) { -+ cont_add(ctx, cpu, caller_id, facility, level, lflags, text, text_len); -+ printed_len = text_len; -+ } else { -+ if (cpu == cont[ctx].cpu_owner) -+ cont_flush(ctx); -+ printed_len = log_store(caller_id, facility, level, lflags, ts_nsec, cpu, -+ dict, dictlen, text, text_len); -+ } - - prb_commit(&h); - return printed_len; diff --git a/kernel/patches-5.4.x-rt/0039-0022-printk-implement-dev-kmsg.patch b/kernel/patches-5.4.x-rt/0039-0022-printk-implement-dev-kmsg.patch deleted file mode 100644 index 411044a17..000000000 --- a/kernel/patches-5.4.x-rt/0039-0022-printk-implement-dev-kmsg.patch +++ /dev/null @@ -1,304 +0,0 @@ -From: John Ogness -Date: Tue, 12 Feb 2019 15:30:00 +0100 -Subject: [PATCH 22/25] printk: implement /dev/kmsg - -Since printk messages are now logged to a new ring buffer, update -the /dev/kmsg functions to pull the messages from there. - -Signed-off-by: John Ogness -Signed-off-by: Sebastian Andrzej Siewior ---- - fs/proc/kmsg.c | 4 - - include/linux/printk.h | 1 - kernel/printk/printk.c | 162 +++++++++++++++++++++++++++++++++---------------- - 3 files changed, 113 insertions(+), 54 deletions(-) - ---- a/fs/proc/kmsg.c -+++ b/fs/proc/kmsg.c -@@ -18,8 +18,6 @@ - #include - #include - --extern wait_queue_head_t log_wait; -- - static int kmsg_open(struct inode * inode, struct file * file) - { - return do_syslog(SYSLOG_ACTION_OPEN, NULL, 0, SYSLOG_FROM_PROC); -@@ -42,7 +40,7 @@ static ssize_t kmsg_read(struct file *fi - - static __poll_t kmsg_poll(struct file *file, poll_table *wait) - { -- poll_wait(file, &log_wait, wait); -+ poll_wait(file, printk_wait_queue(), wait); - if (do_syslog(SYSLOG_ACTION_SIZE_UNREAD, NULL, 0, SYSLOG_FROM_PROC)) - return EPOLLIN | EPOLLRDNORM; - return 0; ---- a/include/linux/printk.h -+++ b/include/linux/printk.h -@@ -192,6 +192,7 @@ void __init setup_log_buf(int early); - void dump_stack_print_info(const char *log_lvl); - void show_regs_print_info(const char *log_lvl); - extern asmlinkage void dump_stack(void) __cold; -+struct wait_queue_head *printk_wait_queue(void); - #else - static inline __printf(1, 0) - int vprintk(const char *s, va_list args) ---- a/kernel/printk/printk.c -+++ b/kernel/printk/printk.c -@@ -673,10 +673,11 @@ static ssize_t msg_print_ext_body(char * - /* /dev/kmsg - userspace message inject/listen interface */ - struct devkmsg_user { - u64 seq; -- u32 idx; -+ struct prb_iterator iter; - struct ratelimit_state rs; - struct mutex lock; - char buf[CONSOLE_EXT_LOG_MAX]; -+ char msgbuf[PRINTK_RECORD_MAX]; - }; - - static __printf(3, 4) __cold -@@ -759,9 +760,11 @@ static ssize_t devkmsg_read(struct file - size_t count, loff_t *ppos) - { - struct devkmsg_user *user = file->private_data; -+ struct prb_iterator backup_iter; - struct printk_log *msg; -- size_t len; - ssize_t ret; -+ size_t len; -+ u64 seq; - - if (!user) - return -EBADF; -@@ -770,52 +773,67 @@ static ssize_t devkmsg_read(struct file - if (ret) - return ret; - -- logbuf_lock_irq(); -- while (user->seq == log_next_seq) { -- if (file->f_flags & O_NONBLOCK) { -- ret = -EAGAIN; -- logbuf_unlock_irq(); -- goto out; -- } -+ /* make a backup copy in case there is a problem */ -+ prb_iter_copy(&backup_iter, &user->iter); - -- logbuf_unlock_irq(); -- ret = wait_event_interruptible(log_wait, -- user->seq != log_next_seq); -- if (ret) -- goto out; -- logbuf_lock_irq(); -+ if (file->f_flags & O_NONBLOCK) { -+ ret = prb_iter_next(&user->iter, &user->msgbuf[0], -+ sizeof(user->msgbuf), &seq); -+ } else { -+ ret = prb_iter_wait_next(&user->iter, &user->msgbuf[0], -+ sizeof(user->msgbuf), &seq); - } -- -- if (user->seq < log_first_seq) { -- /* our last seen message is gone, return error and reset */ -- user->idx = log_first_idx; -- user->seq = log_first_seq; -+ if (ret == 0) { -+ /* end of list */ -+ ret = -EAGAIN; -+ goto out; -+ } else if (ret == -EINVAL) { -+ /* iterator invalid, return error and reset */ - ret = -EPIPE; -- logbuf_unlock_irq(); -+ prb_iter_init(&user->iter, &printk_rb, &user->seq); -+ goto out; -+ } else if (ret < 0) { -+ /* interrupted by signal */ - goto out; - } - -- msg = log_from_idx(user->idx); -+ if (user->seq == 0) { -+ user->seq = seq; -+ } else { -+ user->seq++; -+ if (user->seq < seq) { -+ ret = -EPIPE; -+ goto restore_out; -+ } -+ } -+ -+ msg = (struct printk_log *)&user->msgbuf[0]; - len = msg_print_ext_header(user->buf, sizeof(user->buf), - msg, user->seq); - len += msg_print_ext_body(user->buf + len, sizeof(user->buf) - len, - log_dict(msg), msg->dict_len, - log_text(msg), msg->text_len); - -- user->idx = log_next(user->idx); -- user->seq++; -- logbuf_unlock_irq(); -- - if (len > count) { - ret = -EINVAL; -- goto out; -+ goto restore_out; - } - - if (copy_to_user(buf, user->buf, len)) { - ret = -EFAULT; -- goto out; -+ goto restore_out; - } -+ - ret = len; -+ goto out; -+restore_out: -+ /* -+ * There was an error, but this message should not be -+ * lost because of it. Restore the backup and setup -+ * seq so that it will work with the next read. -+ */ -+ prb_iter_copy(&user->iter, &backup_iter); -+ user->seq = seq - 1; - out: - mutex_unlock(&user->lock); - return ret; -@@ -824,19 +842,21 @@ static ssize_t devkmsg_read(struct file - static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence) - { - struct devkmsg_user *user = file->private_data; -- loff_t ret = 0; -+ loff_t ret; - - if (!user) - return -EBADF; - if (offset) - return -ESPIPE; - -- logbuf_lock_irq(); -+ ret = mutex_lock_interruptible(&user->lock); -+ if (ret) -+ return ret; -+ - switch (whence) { - case SEEK_SET: - /* the first record */ -- user->idx = log_first_idx; -- user->seq = log_first_seq; -+ prb_iter_init(&user->iter, &printk_rb, &user->seq); - break; - case SEEK_DATA: - /* -@@ -844,40 +864,83 @@ static loff_t devkmsg_llseek(struct file - * like issued by 'dmesg -c'. Reading /dev/kmsg itself - * changes no global state, and does not clear anything. - */ -- user->idx = clear_idx; -- user->seq = clear_seq; -+ for (;;) { -+ prb_iter_init(&user->iter, &printk_rb, NULL); -+ ret = prb_iter_seek(&user->iter, clear_seq); -+ if (ret > 0) { -+ /* seeked to clear seq */ -+ user->seq = clear_seq; -+ break; -+ } else if (ret == 0) { -+ /* -+ * The end of the list was hit without -+ * ever seeing the clear seq. Just -+ * seek to the beginning of the list. -+ */ -+ prb_iter_init(&user->iter, &printk_rb, -+ &user->seq); -+ break; -+ } -+ /* iterator invalid, start over */ -+ } -+ ret = 0; - break; - case SEEK_END: - /* after the last record */ -- user->idx = log_next_idx; -- user->seq = log_next_seq; -+ for (;;) { -+ ret = prb_iter_next(&user->iter, NULL, 0, &user->seq); -+ if (ret == 0) -+ break; -+ else if (ret > 0) -+ continue; -+ /* iterator invalid, start over */ -+ prb_iter_init(&user->iter, &printk_rb, &user->seq); -+ } -+ ret = 0; - break; - default: - ret = -EINVAL; - } -- logbuf_unlock_irq(); -+ -+ mutex_unlock(&user->lock); - return ret; - } - -+struct wait_queue_head *printk_wait_queue(void) -+{ -+ /* FIXME: using prb internals! */ -+ return printk_rb.wq; -+} -+ - static __poll_t devkmsg_poll(struct file *file, poll_table *wait) - { - struct devkmsg_user *user = file->private_data; -+ struct prb_iterator iter; - __poll_t ret = 0; -+ int rbret; -+ u64 seq; - - if (!user) - return EPOLLERR|EPOLLNVAL; - -- poll_wait(file, &log_wait, wait); -+ poll_wait(file, printk_wait_queue(), wait); - -- logbuf_lock_irq(); -- if (user->seq < log_next_seq) { -- /* return error when data has vanished underneath us */ -- if (user->seq < log_first_seq) -- ret = EPOLLIN|EPOLLRDNORM|EPOLLERR|EPOLLPRI; -- else -- ret = EPOLLIN|EPOLLRDNORM; -- } -- logbuf_unlock_irq(); -+ mutex_lock(&user->lock); -+ -+ /* use copy so no actual iteration takes place */ -+ prb_iter_copy(&iter, &user->iter); -+ -+ rbret = prb_iter_next(&iter, &user->msgbuf[0], -+ sizeof(user->msgbuf), &seq); -+ if (rbret == 0) -+ goto out; -+ -+ ret = EPOLLIN|EPOLLRDNORM; -+ -+ if (rbret < 0 || (seq - user->seq) != 1) -+ ret |= EPOLLERR|EPOLLPRI; -+out: -+ mutex_unlock(&user->lock); - - return ret; - } -@@ -907,10 +970,7 @@ static int devkmsg_open(struct inode *in - - mutex_init(&user->lock); - -- logbuf_lock_irq(); -- user->idx = log_first_idx; -- user->seq = log_first_seq; -- logbuf_unlock_irq(); -+ prb_iter_init(&user->iter, &printk_rb, &user->seq); - - file->private_data = user; - return 0; diff --git a/kernel/patches-5.4.x-rt/0040-0023-printk-implement-syslog.patch b/kernel/patches-5.4.x-rt/0040-0023-printk-implement-syslog.patch deleted file mode 100644 index c5af3128e..000000000 --- a/kernel/patches-5.4.x-rt/0040-0023-printk-implement-syslog.patch +++ /dev/null @@ -1,493 +0,0 @@ -From: John Ogness -Date: Tue, 12 Feb 2019 15:30:01 +0100 -Subject: [PATCH 23/25] printk: implement syslog - -Since printk messages are now logged to a new ring buffer, update -the syslog functions to pull the messages from there. - -Signed-off-by: John Ogness -Signed-off-by: Sebastian Andrzej Siewior ---- - kernel/printk/printk.c | 342 +++++++++++++++++++++++++++++++++---------------- - 1 file changed, 236 insertions(+), 106 deletions(-) - ---- a/kernel/printk/printk.c -+++ b/kernel/printk/printk.c -@@ -407,10 +407,12 @@ DECLARE_STATIC_PRINTKRB_CPULOCK(printk_c - /* record buffer */ - DECLARE_STATIC_PRINTKRB(printk_rb, CONFIG_LOG_BUF_SHIFT, &printk_cpulock); - -+static DEFINE_MUTEX(syslog_lock); -+DECLARE_STATIC_PRINTKRB_ITER(syslog_iter, &printk_rb); -+ - DECLARE_WAIT_QUEUE_HEAD(log_wait); - /* the next printk record to read by syslog(READ) or /proc/kmsg */ - static u64 syslog_seq; --static u32 syslog_idx; - static size_t syslog_partial; - static bool syslog_time; - -@@ -1303,30 +1305,42 @@ static size_t msg_print_text(const struc - return len; - } - --static int syslog_print(char __user *buf, int size) -+static int syslog_print(char __user *buf, int size, char *text, -+ char *msgbuf, int *locked) - { -- char *text; -+ struct prb_iterator iter; - struct printk_log *msg; - int len = 0; -- -- text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL); -- if (!text) -- return -ENOMEM; -+ u64 seq; -+ int ret; - - while (size > 0) { - size_t n; - size_t skip; - -- logbuf_lock_irq(); -- if (syslog_seq < log_first_seq) { -- /* messages are gone, move to first one */ -- syslog_seq = log_first_seq; -- syslog_idx = log_first_idx; -- syslog_partial = 0; -+ for (;;) { -+ prb_iter_copy(&iter, &syslog_iter); -+ ret = prb_iter_next(&iter, msgbuf, -+ PRINTK_RECORD_MAX, &seq); -+ if (ret < 0) { -+ /* messages are gone, move to first one */ -+ prb_iter_init(&syslog_iter, &printk_rb, -+ &syslog_seq); -+ syslog_partial = 0; -+ continue; -+ } -+ break; - } -- if (syslog_seq == log_next_seq) { -- logbuf_unlock_irq(); -+ if (ret == 0) - break; -+ -+ /* -+ * If messages have been missed, the partial tracker -+ * is no longer valid and must be reset. -+ */ -+ if (syslog_seq > 0 && seq - 1 != syslog_seq) { -+ syslog_seq = seq - 1; -+ syslog_partial = 0; - } - - /* -@@ -1336,131 +1350,212 @@ static int syslog_print(char __user *buf - if (!syslog_partial) - syslog_time = printk_time; - -+ msg = (struct printk_log *)msgbuf; -+ - skip = syslog_partial; -- msg = log_from_idx(syslog_idx); - n = msg_print_text(msg, true, syslog_time, text, -- LOG_LINE_MAX + PREFIX_MAX); -+ PRINTK_SPRINT_MAX); - if (n - syslog_partial <= size) { - /* message fits into buffer, move forward */ -- syslog_idx = log_next(syslog_idx); -- syslog_seq++; -+ prb_iter_next(&syslog_iter, NULL, 0, &syslog_seq); - n -= syslog_partial; - syslog_partial = 0; -- } else if (!len){ -+ } else if (!len) { - /* partial read(), remember position */ - n = size; - syslog_partial += n; - } else - n = 0; -- logbuf_unlock_irq(); - - if (!n) - break; - -+ mutex_unlock(&syslog_lock); - if (copy_to_user(buf, text + skip, n)) { - if (!len) - len = -EFAULT; -+ *locked = 0; - break; - } -+ ret = mutex_lock_interruptible(&syslog_lock); - - len += n; - size -= n; - buf += n; -+ -+ if (ret) { -+ if (!len) -+ len = ret; -+ *locked = 0; -+ break; -+ } - } - -- kfree(text); - return len; - } - --static int syslog_print_all(char __user *buf, int size, bool clear) -+static int count_remaining(struct prb_iterator *iter, u64 until_seq, -+ char *msgbuf, int size, bool records, bool time) - { -- char *text; -+ struct prb_iterator local_iter; -+ struct printk_log *msg; - int len = 0; -- u64 next_seq; - u64 seq; -- u32 idx; -+ int ret; -+ -+ prb_iter_copy(&local_iter, iter); -+ for (;;) { -+ ret = prb_iter_next(&local_iter, msgbuf, size, &seq); -+ if (ret == 0) { -+ break; -+ } else if (ret < 0) { -+ /* the iter is invalid, restart from head */ -+ prb_iter_init(&local_iter, &printk_rb, NULL); -+ len = 0; -+ continue; -+ } -+ -+ if (until_seq && seq >= until_seq) -+ break; -+ -+ if (records) { -+ len++; -+ } else { -+ msg = (struct printk_log *)msgbuf; -+ len += msg_print_text(msg, true, time, NULL, 0); -+ } -+ } -+ -+ return len; -+} -+ -+static void syslog_clear(void) -+{ -+ struct prb_iterator iter; -+ int ret; -+ -+ prb_iter_init(&iter, &printk_rb, &clear_seq); -+ for (;;) { -+ ret = prb_iter_next(&iter, NULL, 0, &clear_seq); -+ if (ret == 0) -+ break; -+ else if (ret < 0) -+ prb_iter_init(&iter, &printk_rb, &clear_seq); -+ } -+} -+ -+static int syslog_print_all(char __user *buf, int size, bool clear) -+{ -+ struct prb_iterator iter; -+ struct printk_log *msg; -+ char *msgbuf = NULL; -+ char *text = NULL; -+ int textlen; -+ u64 seq = 0; -+ int len = 0; - bool time; -+ int ret; - -- text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL); -+ text = kmalloc(PRINTK_SPRINT_MAX, GFP_KERNEL); - if (!text) - return -ENOMEM; -+ msgbuf = kmalloc(PRINTK_RECORD_MAX, GFP_KERNEL); -+ if (!msgbuf) { -+ kfree(text); -+ return -ENOMEM; -+ } - - time = printk_time; -- logbuf_lock_irq(); -+ - /* -- * Find first record that fits, including all following records, -- * into the user-provided buffer for this dump. -+ * Setup iter to last event before clear. Clear may -+ * be lost, but keep going with a best effort. - */ -- seq = clear_seq; -- idx = clear_idx; -- while (seq < log_next_seq) { -- struct printk_log *msg = log_from_idx(idx); -- -- len += msg_print_text(msg, true, time, NULL, 0); -- idx = log_next(idx); -- seq++; -- } -+ prb_iter_init(&iter, &printk_rb, NULL); -+ prb_iter_seek(&iter, clear_seq); - -- /* move first record forward until length fits into the buffer */ -- seq = clear_seq; -- idx = clear_idx; -- while (len > size && seq < log_next_seq) { -- struct printk_log *msg = log_from_idx(idx); -+ /* count the total bytes after clear */ -+ len = count_remaining(&iter, 0, msgbuf, PRINTK_RECORD_MAX, -+ false, time); -+ -+ /* move iter forward until length fits into the buffer */ -+ while (len > size) { -+ ret = prb_iter_next(&iter, msgbuf, -+ PRINTK_RECORD_MAX, &seq); -+ if (ret == 0) { -+ break; -+ } else if (ret < 0) { -+ /* -+ * The iter is now invalid so clear will -+ * also be invalid. Restart from the head. -+ */ -+ prb_iter_init(&iter, &printk_rb, NULL); -+ len = count_remaining(&iter, 0, msgbuf, -+ PRINTK_RECORD_MAX, false, time); -+ continue; -+ } - -+ msg = (struct printk_log *)msgbuf; - len -= msg_print_text(msg, true, time, NULL, 0); -- idx = log_next(idx); -- seq++; -- } - -- /* last message fitting into this dump */ -- next_seq = log_next_seq; -+ if (clear) -+ clear_seq = seq; -+ } - -+ /* copy messages to buffer */ - len = 0; -- while (len >= 0 && seq < next_seq) { -- struct printk_log *msg = log_from_idx(idx); -- int textlen = msg_print_text(msg, true, time, text, -- LOG_LINE_MAX + PREFIX_MAX); -+ while (len >= 0 && len < size) { -+ if (clear) -+ clear_seq = seq; - -- idx = log_next(idx); -- seq++; -+ ret = prb_iter_next(&iter, msgbuf, -+ PRINTK_RECORD_MAX, &seq); -+ if (ret == 0) { -+ break; -+ } else if (ret < 0) { -+ /* -+ * The iter is now invalid. Make a best -+ * effort to grab the rest of the log -+ * from the new head. -+ */ -+ prb_iter_init(&iter, &printk_rb, NULL); -+ continue; -+ } -+ -+ msg = (struct printk_log *)msgbuf; -+ textlen = msg_print_text(msg, true, time, text, -+ PRINTK_SPRINT_MAX); -+ if (textlen < 0) { -+ len = textlen; -+ break; -+ } - -- logbuf_unlock_irq(); - if (copy_to_user(buf + len, text, textlen)) - len = -EFAULT; - else - len += textlen; -- logbuf_lock_irq(); -- -- if (seq < log_first_seq) { -- /* messages are gone, move to next one */ -- seq = log_first_seq; -- idx = log_first_idx; -- } - } - -- if (clear) { -- clear_seq = log_next_seq; -- clear_idx = log_next_idx; -- } -- logbuf_unlock_irq(); -+ if (clear && !seq) -+ syslog_clear(); - -- kfree(text); -+ if (text) -+ kfree(text); -+ if (msgbuf) -+ kfree(msgbuf); - return len; - } - --static void syslog_clear(void) --{ -- logbuf_lock_irq(); -- clear_seq = log_next_seq; -- clear_idx = log_next_idx; -- logbuf_unlock_irq(); --} -- - int do_syslog(int type, char __user *buf, int len, int source) - { - bool clear = false; - static int saved_console_loglevel = LOGLEVEL_DEFAULT; -+ struct prb_iterator iter; -+ char *msgbuf = NULL; -+ char *text = NULL; -+ int locked; - int error; -+ int ret; - - error = check_syslog_permissions(type, source); - if (error) -@@ -1478,11 +1573,49 @@ int do_syslog(int type, char __user *buf - return 0; - if (!access_ok(buf, len)) - return -EFAULT; -- error = wait_event_interruptible(log_wait, -- syslog_seq != log_next_seq); -+ -+ text = kmalloc(PRINTK_SPRINT_MAX, GFP_KERNEL); -+ msgbuf = kmalloc(PRINTK_RECORD_MAX, GFP_KERNEL); -+ if (!text || !msgbuf) { -+ error = -ENOMEM; -+ goto out; -+ } -+ -+ error = mutex_lock_interruptible(&syslog_lock); - if (error) -- return error; -- error = syslog_print(buf, len); -+ goto out; -+ -+ /* -+ * Wait until a first message is available. Use a copy -+ * because no iteration should occur for syslog now. -+ */ -+ for (;;) { -+ prb_iter_copy(&iter, &syslog_iter); -+ -+ mutex_unlock(&syslog_lock); -+ ret = prb_iter_wait_next(&iter, NULL, 0, NULL); -+ if (ret == -ERESTARTSYS) { -+ error = ret; -+ goto out; -+ } -+ error = mutex_lock_interruptible(&syslog_lock); -+ if (error) -+ goto out; -+ -+ if (ret == -EINVAL) { -+ prb_iter_init(&syslog_iter, &printk_rb, -+ &syslog_seq); -+ syslog_partial = 0; -+ continue; -+ } -+ break; -+ } -+ -+ /* print as much as will fit in the user buffer */ -+ locked = 1; -+ error = syslog_print(buf, len, text, msgbuf, &locked); -+ if (locked) -+ mutex_unlock(&syslog_lock); - break; - /* Read/clear last kernel messages */ - case SYSLOG_ACTION_READ_CLEAR: -@@ -1527,47 +1660,45 @@ int do_syslog(int type, char __user *buf - break; - /* Number of chars in the log buffer */ - case SYSLOG_ACTION_SIZE_UNREAD: -- logbuf_lock_irq(); -- if (syslog_seq < log_first_seq) { -- /* messages are gone, move to first one */ -- syslog_seq = log_first_seq; -- syslog_idx = log_first_idx; -- syslog_partial = 0; -- } -+ msgbuf = kmalloc(PRINTK_RECORD_MAX, GFP_KERNEL); -+ if (!msgbuf) -+ return -ENOMEM; -+ -+ error = mutex_lock_interruptible(&syslog_lock); -+ if (error) -+ goto out; -+ - if (source == SYSLOG_FROM_PROC) { - /* - * Short-cut for poll(/"proc/kmsg") which simply checks - * for pending data, not the size; return the count of - * records, not the length. - */ -- error = log_next_seq - syslog_seq; -+ error = count_remaining(&syslog_iter, 0, msgbuf, -+ PRINTK_RECORD_MAX, true, -+ printk_time); - } else { -- u64 seq = syslog_seq; -- u32 idx = syslog_idx; -- bool time = syslog_partial ? syslog_time : printk_time; -- -- while (seq < log_next_seq) { -- struct printk_log *msg = log_from_idx(idx); -- -- error += msg_print_text(msg, true, time, NULL, -- 0); -- time = printk_time; -- idx = log_next(idx); -- seq++; -- } -+ error = count_remaining(&syslog_iter, 0, msgbuf, -+ PRINTK_RECORD_MAX, false, -+ printk_time); - error -= syslog_partial; - } -- logbuf_unlock_irq(); -+ -+ mutex_unlock(&syslog_lock); - break; - /* Size of the log buffer */ - case SYSLOG_ACTION_SIZE_BUFFER: -- error = log_buf_len; -+ error = prb_buffer_size(&printk_rb); - break; - default: - error = -EINVAL; - break; - } -- -+out: -+ if (msgbuf) -+ kfree(msgbuf); -+ if (text) -+ kfree(text); - return error; - } - -@@ -1989,7 +2120,6 @@ EXPORT_SYMBOL(printk); - #define printk_time false - - static u64 syslog_seq; --static u32 syslog_idx; - static u64 log_first_seq; - static u32 log_first_idx; - static u64 log_next_seq; diff --git a/kernel/patches-5.4.x-rt/0041-0024-printk-implement-kmsg_dump.patch b/kernel/patches-5.4.x-rt/0041-0024-printk-implement-kmsg_dump.patch deleted file mode 100644 index 4de007640..000000000 --- a/kernel/patches-5.4.x-rt/0041-0024-printk-implement-kmsg_dump.patch +++ /dev/null @@ -1,397 +0,0 @@ -From: John Ogness -Date: Tue, 12 Feb 2019 15:30:02 +0100 -Subject: [PATCH 24/25] printk: implement kmsg_dump - -Since printk messages are now logged to a new ring buffer, update -the kmsg_dump functions to pull the messages from there. - -Signed-off-by: John Ogness -Signed-off-by: Sebastian Andrzej Siewior ---- - include/linux/kmsg_dump.h | 6 - - kernel/printk/printk.c | 258 ++++++++++++++++++++++++---------------------- - 2 files changed, 139 insertions(+), 125 deletions(-) - ---- a/include/linux/kmsg_dump.h -+++ b/include/linux/kmsg_dump.h -@@ -46,10 +46,8 @@ struct kmsg_dumper { - bool registered; - - /* private state of the kmsg iterator */ -- u32 cur_idx; -- u32 next_idx; -- u64 cur_seq; -- u64 next_seq; -+ u64 line_seq; -+ u64 buffer_end_seq; - }; - - #ifdef CONFIG_PRINTK ---- a/kernel/printk/printk.c -+++ b/kernel/printk/printk.c -@@ -417,13 +417,13 @@ static size_t syslog_partial; - static bool syslog_time; - - /* index and sequence number of the first record stored in the buffer */ --static u64 log_first_seq; - static u32 log_first_idx; - - /* index and sequence number of the next record to store in the buffer */ --static u64 log_next_seq; - static u32 log_next_idx; - -+static DEFINE_MUTEX(kmsg_dump_lock); -+ - /* the next printk record to read after the last 'clear' command */ - static u64 clear_seq; - static u32 clear_idx; -@@ -470,38 +470,6 @@ static char *log_dict(const struct print - return (char *)msg + sizeof(struct printk_log) + msg->text_len; - } - --/* get record by index; idx must point to valid msg */ --static struct printk_log *log_from_idx(u32 idx) --{ -- struct printk_log *msg = (struct printk_log *)(log_buf + idx); -- -- /* -- * A length == 0 record is the end of buffer marker. Wrap around and -- * read the message at the start of the buffer. -- */ -- if (!msg->len) -- return (struct printk_log *)log_buf; -- return msg; --} -- --/* get next record; idx must point to valid msg */ --static u32 log_next(u32 idx) --{ -- struct printk_log *msg = (struct printk_log *)(log_buf + idx); -- -- /* length == 0 indicates the end of the buffer; wrap */ -- /* -- * A length == 0 record is the end of buffer marker. Wrap around and -- * read the message at the start of the buffer as *this* one, and -- * return the one after that. -- */ -- if (!msg->len) { -- msg = (struct printk_log *)log_buf; -- return msg->len; -- } -- return idx + msg->len; --} -- - static void printk_emergency(char *buffer, int level, u64 ts_nsec, u16 cpu, - char *text, u16 text_len); - -@@ -2120,9 +2088,7 @@ EXPORT_SYMBOL(printk); - #define printk_time false - - static u64 syslog_seq; --static u64 log_first_seq; - static u32 log_first_idx; --static u64 log_next_seq; - static char *log_text(const struct printk_log *msg) { return NULL; } - static char *log_dict(const struct printk_log *msg) { return NULL; } - static struct printk_log *log_from_idx(u32 idx) { return NULL; } -@@ -3032,7 +2998,6 @@ module_param_named(always_kmsg_dump, alw - void kmsg_dump(enum kmsg_dump_reason reason) - { - struct kmsg_dumper *dumper; -- unsigned long flags; - - if ((reason > KMSG_DUMP_OOPS) && !always_kmsg_dump) - return; -@@ -3045,12 +3010,7 @@ void kmsg_dump(enum kmsg_dump_reason rea - /* initialize iterator with data about the stored records */ - dumper->active = true; - -- logbuf_lock_irqsave(flags); -- dumper->cur_seq = clear_seq; -- dumper->cur_idx = clear_idx; -- dumper->next_seq = log_next_seq; -- dumper->next_idx = log_next_idx; -- logbuf_unlock_irqrestore(flags); -+ kmsg_dump_rewind(dumper); - - /* invoke dumper which will iterate over records */ - dumper->dump(dumper, reason); -@@ -3083,33 +3043,67 @@ void kmsg_dump(enum kmsg_dump_reason rea - bool kmsg_dump_get_line_nolock(struct kmsg_dumper *dumper, bool syslog, - char *line, size_t size, size_t *len) - { -+ struct prb_iterator iter; - struct printk_log *msg; -- size_t l = 0; -- bool ret = false; -+ struct prb_handle h; -+ bool cont = false; -+ char *msgbuf; -+ char *rbuf; -+ size_t l; -+ u64 seq; -+ int ret; - - if (!dumper->active) -- goto out; -+ return cont; -+ -+ rbuf = prb_reserve(&h, &sprint_rb, PRINTK_RECORD_MAX); -+ if (!rbuf) -+ return cont; -+ msgbuf = rbuf; -+retry: -+ for (;;) { -+ prb_iter_init(&iter, &printk_rb, &seq); -+ -+ if (dumper->line_seq == seq) { -+ /* already where we want to be */ -+ break; -+ } else if (dumper->line_seq < seq) { -+ /* messages are gone, move to first available one */ -+ dumper->line_seq = seq; -+ break; -+ } - -- if (dumper->cur_seq < log_first_seq) { -- /* messages are gone, move to first available one */ -- dumper->cur_seq = log_first_seq; -- dumper->cur_idx = log_first_idx; -+ ret = prb_iter_seek(&iter, dumper->line_seq); -+ if (ret > 0) { -+ /* seeked to line_seq */ -+ break; -+ } else if (ret == 0) { -+ /* -+ * The end of the list was hit without ever seeing -+ * line_seq. Reset it to the beginning of the list. -+ */ -+ prb_iter_init(&iter, &printk_rb, &dumper->line_seq); -+ break; -+ } -+ /* iterator invalid, start over */ - } - -- /* last entry */ -- if (dumper->cur_seq >= log_next_seq) -+ ret = prb_iter_next(&iter, msgbuf, PRINTK_RECORD_MAX, -+ &dumper->line_seq); -+ if (ret == 0) - goto out; -+ else if (ret < 0) -+ goto retry; - -- msg = log_from_idx(dumper->cur_idx); -+ msg = (struct printk_log *)msgbuf; - l = msg_print_text(msg, syslog, printk_time, line, size); - -- dumper->cur_idx = log_next(dumper->cur_idx); -- dumper->cur_seq++; -- ret = true; --out: - if (len) - *len = l; -- return ret; -+ cont = true; -+out: -+ prb_commit(&h); -+ return cont; - } - - /** -@@ -3132,12 +3126,11 @@ bool kmsg_dump_get_line_nolock(struct km - bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog, - char *line, size_t size, size_t *len) - { -- unsigned long flags; - bool ret; - -- logbuf_lock_irqsave(flags); -+ mutex_lock(&kmsg_dump_lock); - ret = kmsg_dump_get_line_nolock(dumper, syslog, line, size, len); -- logbuf_unlock_irqrestore(flags); -+ mutex_unlock(&kmsg_dump_lock); - - return ret; - } -@@ -3165,74 +3158,101 @@ EXPORT_SYMBOL_GPL(kmsg_dump_get_line); - bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, - char *buf, size_t size, size_t *len) - { -- unsigned long flags; -- u64 seq; -- u32 idx; -- u64 next_seq; -- u32 next_idx; -- size_t l = 0; -- bool ret = false; -+ struct prb_iterator iter; - bool time = printk_time; -+ struct printk_log *msg; -+ u64 new_end_seq = 0; -+ struct prb_handle h; -+ bool cont = false; -+ char *msgbuf; -+ u64 end_seq; -+ int textlen; -+ u64 seq = 0; -+ char *rbuf; -+ int l = 0; -+ int ret; - - if (!dumper->active) -- goto out; -+ return cont; - -- logbuf_lock_irqsave(flags); -- if (dumper->cur_seq < log_first_seq) { -- /* messages are gone, move to first available one */ -- dumper->cur_seq = log_first_seq; -- dumper->cur_idx = log_first_idx; -- } -+ rbuf = prb_reserve(&h, &sprint_rb, PRINTK_RECORD_MAX); -+ if (!rbuf) -+ return cont; -+ msgbuf = rbuf; - -- /* last entry */ -- if (dumper->cur_seq >= dumper->next_seq) { -- logbuf_unlock_irqrestore(flags); -- goto out; -- } -- -- /* calculate length of entire buffer */ -- seq = dumper->cur_seq; -- idx = dumper->cur_idx; -- while (seq < dumper->next_seq) { -- struct printk_log *msg = log_from_idx(idx); -+ prb_iter_init(&iter, &printk_rb, NULL); - -- l += msg_print_text(msg, true, time, NULL, 0); -- idx = log_next(idx); -- seq++; -+ /* -+ * seek to the start record, which is set/modified -+ * by kmsg_dump_get_line_nolock() -+ */ -+ ret = prb_iter_seek(&iter, dumper->line_seq); -+ if (ret <= 0) -+ prb_iter_init(&iter, &printk_rb, &seq); -+ -+ /* work with a local end seq to have a constant value */ -+ end_seq = dumper->buffer_end_seq; -+ if (!end_seq) { -+ /* initialize end seq to "infinity" */ -+ end_seq = -1; -+ dumper->buffer_end_seq = end_seq; - } -+retry: -+ if (seq >= end_seq) -+ goto out; - -- /* move first record forward until length fits into the buffer */ -- seq = dumper->cur_seq; -- idx = dumper->cur_idx; -- while (l >= size && seq < dumper->next_seq) { -- struct printk_log *msg = log_from_idx(idx); -+ /* count the total bytes after seq */ -+ textlen = count_remaining(&iter, end_seq, msgbuf, -+ PRINTK_RECORD_MAX, 0, time); - -- l -= msg_print_text(msg, true, time, NULL, 0); -- idx = log_next(idx); -- seq++; -+ /* move iter forward until length fits into the buffer */ -+ while (textlen > size) { -+ ret = prb_iter_next(&iter, msgbuf, PRINTK_RECORD_MAX, &seq); -+ if (ret == 0) { -+ break; -+ } else if (ret < 0) { -+ prb_iter_init(&iter, &printk_rb, &seq); -+ goto retry; -+ } -+ -+ msg = (struct printk_log *)msgbuf; -+ textlen -= msg_print_text(msg, true, time, NULL, 0); - } - -- /* last message in next interation */ -- next_seq = seq; -- next_idx = idx; -+ /* save end seq for the next interation */ -+ new_end_seq = seq + 1; - -- l = 0; -- while (seq < dumper->next_seq) { -- struct printk_log *msg = log_from_idx(idx); -+ /* copy messages to buffer */ -+ while (l < size) { -+ ret = prb_iter_next(&iter, msgbuf, PRINTK_RECORD_MAX, &seq); -+ if (ret == 0) { -+ break; -+ } else if (ret < 0) { -+ /* -+ * iterator (and thus also the start position) -+ * invalid, start over from beginning of list -+ */ -+ prb_iter_init(&iter, &printk_rb, NULL); -+ continue; -+ } - -- l += msg_print_text(msg, syslog, time, buf + l, size - l); -- idx = log_next(idx); -- seq++; -+ if (seq >= end_seq) -+ break; -+ -+ msg = (struct printk_log *)msgbuf; -+ textlen = msg_print_text(msg, syslog, time, buf + l, size - l); -+ if (textlen > 0) -+ l += textlen; -+ cont = true; - } - -- dumper->next_seq = next_seq; -- dumper->next_idx = next_idx; -- ret = true; -- logbuf_unlock_irqrestore(flags); --out: -- if (len) -+ if (cont && len) - *len = l; -- return ret; -+out: -+ prb_commit(&h); -+ if (new_end_seq) -+ dumper->buffer_end_seq = new_end_seq; -+ return cont; - } - EXPORT_SYMBOL_GPL(kmsg_dump_get_buffer); - -@@ -3248,10 +3268,8 @@ EXPORT_SYMBOL_GPL(kmsg_dump_get_buffer); - */ - void kmsg_dump_rewind_nolock(struct kmsg_dumper *dumper) - { -- dumper->cur_seq = clear_seq; -- dumper->cur_idx = clear_idx; -- dumper->next_seq = log_next_seq; -- dumper->next_idx = log_next_idx; -+ dumper->line_seq = 0; -+ dumper->buffer_end_seq = 0; - } - - /** -@@ -3264,11 +3282,9 @@ void kmsg_dump_rewind_nolock(struct kmsg - */ - void kmsg_dump_rewind(struct kmsg_dumper *dumper) - { -- unsigned long flags; -- -- logbuf_lock_irqsave(flags); -+ mutex_lock(&kmsg_dump_lock); - kmsg_dump_rewind_nolock(dumper); -- logbuf_unlock_irqrestore(flags); -+ mutex_unlock(&kmsg_dump_lock); - } - EXPORT_SYMBOL_GPL(kmsg_dump_rewind); - diff --git a/kernel/patches-5.4.x-rt/0042-0025-printk-remove-unused-code.patch b/kernel/patches-5.4.x-rt/0042-0025-printk-remove-unused-code.patch deleted file mode 100644 index 31dba1a77..000000000 --- a/kernel/patches-5.4.x-rt/0042-0025-printk-remove-unused-code.patch +++ /dev/null @@ -1,346 +0,0 @@ -From: John Ogness -Date: Tue, 12 Feb 2019 15:30:03 +0100 -Subject: [PATCH 25/25] printk: remove unused code - -Code relating to the safe context and anything dealing with the -previous log buffer implementation is no longer in use. Remove it. - -Signed-off-by: John Ogness -Signed-off-by: Sebastian Andrzej Siewior ---- - kernel/printk/internal.h | 41 ----------- - kernel/printk/printk.c | 161 ++++------------------------------------------- - lib/bust_spinlocks.c | 3 - 3 files changed, 16 insertions(+), 189 deletions(-) - delete mode 100644 kernel/printk/internal.h - ---- a/kernel/printk/internal.h -+++ /dev/null -@@ -1,41 +0,0 @@ --/* SPDX-License-Identifier: GPL-2.0-or-later */ --/* -- * internal.h - printk internal definitions -- */ --#include -- --#ifdef CONFIG_PRINTK -- --#define PRINTK_SAFE_CONTEXT_MASK 0x3fffffff --#define PRINTK_NMI_DIRECT_CONTEXT_MASK 0x40000000 --#define PRINTK_NMI_CONTEXT_MASK 0x80000000 -- --extern raw_spinlock_t logbuf_lock; -- --__printf(5, 0) --int vprintk_store(int facility, int level, -- const char *dict, size_t dictlen, -- const char *fmt, va_list args); -- --__printf(1, 0) int vprintk_default(const char *fmt, va_list args); --__printf(1, 0) int vprintk_deferred(const char *fmt, va_list args); --__printf(1, 0) int vprintk_func(const char *fmt, va_list args); -- --void defer_console_output(void); -- --#else -- --__printf(1, 0) int vprintk_func(const char *fmt, va_list args) { return 0; } -- --/* -- * In !PRINTK builds we still export logbuf_lock spin_lock, console_sem -- * semaphore and some of console functions (console_unlock()/etc.), so -- * printk-safe must preserve the existing local IRQ guarantees. -- */ --#endif /* CONFIG_PRINTK */ -- --#define printk_safe_enter_irqsave(flags) local_irq_save(flags) --#define printk_safe_exit_irqrestore(flags) local_irq_restore(flags) -- --#define printk_safe_enter_irq() local_irq_disable() --#define printk_safe_exit_irq() local_irq_enable() ---- a/kernel/printk/printk.c -+++ b/kernel/printk/printk.c -@@ -61,7 +61,6 @@ - - #include "console_cmdline.h" - #include "braille.h" --#include "internal.h" - - int console_printk[5] = { - CONSOLE_LOGLEVEL_DEFAULT, /* console_loglevel */ -@@ -366,41 +365,6 @@ struct printk_log { - #endif - ; - --/* -- * The logbuf_lock protects kmsg buffer, indices, counters. This can be taken -- * within the scheduler's rq lock. It must be released before calling -- * console_unlock() or anything else that might wake up a process. -- */ --DEFINE_RAW_SPINLOCK(logbuf_lock); -- --/* -- * Helper macros to lock/unlock logbuf_lock and switch between -- * printk-safe/unsafe modes. -- */ --#define logbuf_lock_irq() \ -- do { \ -- printk_safe_enter_irq(); \ -- raw_spin_lock(&logbuf_lock); \ -- } while (0) -- --#define logbuf_unlock_irq() \ -- do { \ -- raw_spin_unlock(&logbuf_lock); \ -- printk_safe_exit_irq(); \ -- } while (0) -- --#define logbuf_lock_irqsave(flags) \ -- do { \ -- printk_safe_enter_irqsave(flags); \ -- raw_spin_lock(&logbuf_lock); \ -- } while (0) -- --#define logbuf_unlock_irqrestore(flags) \ -- do { \ -- raw_spin_unlock(&logbuf_lock); \ -- printk_safe_exit_irqrestore(flags); \ -- } while (0) -- - DECLARE_STATIC_PRINTKRB_CPULOCK(printk_cpulock); - - #ifdef CONFIG_PRINTK -@@ -410,23 +374,15 @@ DECLARE_STATIC_PRINTKRB(printk_rb, CONFI - static DEFINE_MUTEX(syslog_lock); - DECLARE_STATIC_PRINTKRB_ITER(syslog_iter, &printk_rb); - --DECLARE_WAIT_QUEUE_HEAD(log_wait); --/* the next printk record to read by syslog(READ) or /proc/kmsg */ -+/* the last printk record to read by syslog(READ) or /proc/kmsg */ - static u64 syslog_seq; - static size_t syslog_partial; - static bool syslog_time; - --/* index and sequence number of the first record stored in the buffer */ --static u32 log_first_idx; -- --/* index and sequence number of the next record to store in the buffer */ --static u32 log_next_idx; -- - static DEFINE_MUTEX(kmsg_dump_lock); - - /* the next printk record to read after the last 'clear' command */ - static u64 clear_seq; --static u32 clear_idx; - - #ifdef CONFIG_PRINTK_CALLER - #define PREFIX_MAX 48 -@@ -438,24 +394,16 @@ static u32 clear_idx; - #define LOG_LEVEL(v) ((v) & 0x07) - #define LOG_FACILITY(v) ((v) >> 3 & 0xff) - --/* record buffer */ --#define LOG_ALIGN __alignof__(struct printk_log) --#define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT) --#define LOG_BUF_LEN_MAX (u32)(1 << 31) --static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN); --static char *log_buf = __log_buf; --static u32 log_buf_len = __LOG_BUF_LEN; -- - /* Return log buffer address */ - char *log_buf_addr_get(void) - { -- return log_buf; -+ return printk_rb.buffer; - } - - /* Return log buffer size */ - u32 log_buf_len_get(void) - { -- return log_buf_len; -+ return (1 << printk_rb.size_bits); - } - - /* human readable text of the record */ -@@ -980,11 +928,6 @@ const struct file_operations kmsg_fops = - */ - void log_buf_vmcoreinfo_setup(void) - { -- VMCOREINFO_SYMBOL(log_buf); -- VMCOREINFO_SYMBOL(log_buf_len); -- VMCOREINFO_SYMBOL(log_first_idx); -- VMCOREINFO_SYMBOL(clear_idx); -- VMCOREINFO_SYMBOL(log_next_idx); - /* - * Export struct printk_log size and field offsets. User space tools can - * parse it and detect any changes to structure down the line. -@@ -1000,6 +943,8 @@ void log_buf_vmcoreinfo_setup(void) - } - #endif - -+/* FIXME: no support for buffer resizing */ -+#if 0 - /* requested log_buf_len from kernel cmdline */ - static unsigned long __initdata new_log_buf_len; - -@@ -1065,9 +1010,12 @@ static void __init log_buf_add_cpu(void) - #else /* !CONFIG_SMP */ - static inline void log_buf_add_cpu(void) {} - #endif /* CONFIG_SMP */ -+#endif /* 0 */ - - void __init setup_log_buf(int early) - { -+/* FIXME: no support for buffer resizing */ -+#if 0 - unsigned long flags; - char *new_log_buf; - unsigned int free; -@@ -1099,6 +1047,7 @@ void __init setup_log_buf(int early) - pr_info("log_buf_len: %u bytes\n", log_buf_len); - pr_info("early log buf free: %u(%u%%)\n", - free, (free * 100) / __LOG_BUF_LEN); -+#endif - } - - static bool __read_mostly ignore_loglevel; -@@ -2019,7 +1968,7 @@ asmlinkage int vprintk_emit(int facility - } - EXPORT_SYMBOL(vprintk_emit); - --__printf(1, 0) int vprintk_func(const char *fmt, va_list args) -+static __printf(1, 0) int vprintk_func(const char *fmt, va_list args) - { - return vprintk_emit(0, LOGLEVEL_DEFAULT, NULL, 0, fmt, args); - } -@@ -2080,31 +2029,6 @@ asmlinkage __visible int printk(const ch - return r; - } - EXPORT_SYMBOL(printk); -- --#else /* CONFIG_PRINTK */ -- --#define LOG_LINE_MAX 0 --#define PREFIX_MAX 0 --#define printk_time false -- --static u64 syslog_seq; --static u32 log_first_idx; --static char *log_text(const struct printk_log *msg) { return NULL; } --static char *log_dict(const struct printk_log *msg) { return NULL; } --static struct printk_log *log_from_idx(u32 idx) { return NULL; } --static u32 log_next(u32 idx) { return 0; } --static ssize_t msg_print_ext_header(char *buf, size_t size, -- struct printk_log *msg, -- u64 seq) { return 0; } --static ssize_t msg_print_ext_body(char *buf, size_t size, -- char *dict, size_t dict_len, -- char *text, size_t text_len) { return 0; } --static void call_console_drivers(u64 seq, const char *ext_text, size_t ext_len, -- const char *text, size_t len, int level) {} --static size_t msg_print_text(const struct printk_log *msg, bool syslog, -- bool time, char *buf, size_t size) { return 0; } --static bool suppress_message_printing(int level) { return false; } -- - #endif /* CONFIG_PRINTK */ - - #ifdef CONFIG_EARLY_PRINTK -@@ -2401,15 +2325,10 @@ void console_unblank(void) - void console_flush_on_panic(enum con_flush_mode mode) - { - /* -- * If someone else is holding the console lock, trylock will fail -- * and may_schedule may be set. Ignore and proceed to unlock so -- * that messages are flushed out. As this can be called from any -- * context and we don't want to get preempted while flushing, -- * ensure may_schedule is cleared. -+ * FIXME: This is currently a NOP. Emergency messages will have been -+ * printed, but what about if write_atomic is not available on the -+ * console? What if the printk kthread is still alive? - */ -- console_trylock(); -- console_may_schedule = 0; -- console_unlock(); - } - - /* -@@ -2758,43 +2677,6 @@ static int __init printk_late_init(void) - late_initcall(printk_late_init); - - #if defined CONFIG_PRINTK --/* -- * Delayed printk version, for scheduler-internal messages: -- */ --#define PRINTK_PENDING_WAKEUP 0x01 --#define PRINTK_PENDING_OUTPUT 0x02 -- --static DEFINE_PER_CPU(int, printk_pending); -- --static void wake_up_klogd_work_func(struct irq_work *irq_work) --{ -- int pending = __this_cpu_xchg(printk_pending, 0); -- -- if (pending & PRINTK_PENDING_OUTPUT) { -- /* If trylock fails, someone else is doing the printing */ -- if (console_trylock()) -- console_unlock(); -- } -- -- if (pending & PRINTK_PENDING_WAKEUP) -- wake_up_interruptible(&log_wait); --} -- --static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) = { -- .func = wake_up_klogd_work_func, -- .flags = IRQ_WORK_LAZY, --}; -- --void wake_up_klogd(void) --{ -- preempt_disable(); -- if (waitqueue_active(&log_wait)) { -- this_cpu_or(printk_pending, PRINTK_PENDING_WAKEUP); -- irq_work_queue(this_cpu_ptr(&wake_up_klogd_work)); -- } -- preempt_enable(); --} -- - static int printk_kthread_func(void *data) - { - struct prb_iterator iter; -@@ -2860,22 +2742,9 @@ static int __init init_printk_kthread(vo - } - late_initcall(init_printk_kthread); - --void defer_console_output(void) --{ -- preempt_disable(); -- __this_cpu_or(printk_pending, PRINTK_PENDING_OUTPUT); -- irq_work_queue(this_cpu_ptr(&wake_up_klogd_work)); -- preempt_enable(); --} -- --int vprintk_deferred(const char *fmt, va_list args) -+static int vprintk_deferred(const char *fmt, va_list args) - { -- int r; -- -- r = vprintk_emit(0, LOGLEVEL_SCHED, NULL, 0, fmt, args); -- defer_console_output(); -- -- return r; -+ return vprintk_emit(0, LOGLEVEL_SCHED, NULL, 0, fmt, args); - } - - int printk_deferred(const char *fmt, ...) ---- a/lib/bust_spinlocks.c -+++ b/lib/bust_spinlocks.c -@@ -26,7 +26,6 @@ void bust_spinlocks(int yes) - unblank_screen(); - #endif - console_unblank(); -- if (--oops_in_progress == 0) -- wake_up_klogd(); -+ --oops_in_progress; - } - } diff --git a/kernel/patches-5.4.x-rt/0043-printk-set-deferred-to-default-loglevel-enforce-mask.patch b/kernel/patches-5.4.x-rt/0043-printk-set-deferred-to-default-loglevel-enforce-mask.patch deleted file mode 100644 index 5942464ff..000000000 --- a/kernel/patches-5.4.x-rt/0043-printk-set-deferred-to-default-loglevel-enforce-mask.patch +++ /dev/null @@ -1,38 +0,0 @@ -From: John Ogness -Date: Thu, 14 Feb 2019 23:13:30 +0100 -Subject: [PATCH] printk: set deferred to default loglevel, enforce mask - -All messages printed via vpritnk_deferred() were being -automatically treated as emergency messages. - -Messages printed via vprintk_deferred() should be set to the -default loglevel. LOGLEVEL_SCHED is no longer relevant. - -Also, enforce the loglevel mask for emergency messages. - -Signed-off-by: John Ogness -Signed-off-by: Sebastian Andrzej Siewior ---- - kernel/printk/printk.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - ---- a/kernel/printk/printk.c -+++ b/kernel/printk/printk.c -@@ -1951,7 +1951,7 @@ asmlinkage int vprintk_emit(int facility - * - text points to beginning of text - * - there is room before text for prefix - */ -- printk_emergency(rbuf, level, ts_nsec, cpu, text, text_len); -+ printk_emergency(rbuf, level & 7, ts_nsec, cpu, text, text_len); - - if ((lflags & LOG_CONT) || !(lflags & LOG_NEWLINE)) { - cont_add(ctx, cpu, caller_id, facility, level, lflags, text, text_len); -@@ -2744,7 +2744,7 @@ late_initcall(init_printk_kthread); - - static int vprintk_deferred(const char *fmt, va_list args) - { -- return vprintk_emit(0, LOGLEVEL_SCHED, NULL, 0, fmt, args); -+ return vprintk_emit(0, LOGLEVEL_DEFAULT, NULL, 0, fmt, args); - } - - int printk_deferred(const char *fmt, ...) diff --git a/kernel/patches-5.4.x-rt/0044-serial-8250-remove-that-trylock-in-serial8250_consol.patch b/kernel/patches-5.4.x-rt/0044-serial-8250-remove-that-trylock-in-serial8250_consol.patch deleted file mode 100644 index e84ce5a21..000000000 --- a/kernel/patches-5.4.x-rt/0044-serial-8250-remove-that-trylock-in-serial8250_consol.patch +++ /dev/null @@ -1,43 +0,0 @@ -From: Sebastian Andrzej Siewior -Date: Thu, 14 Feb 2019 17:38:24 +0100 -Subject: [PATCH] serial: 8250: remove that trylock in - serial8250_console_write_atomic() - -This does not work as rtmutex in NMI context. As per John, it is not -needed. - -Signed-off-by: Sebastian Andrzej Siewior ---- - drivers/tty/serial/8250/8250_port.c | 11 ----------- - 1 file changed, 11 deletions(-) - ---- a/drivers/tty/serial/8250/8250_port.c -+++ b/drivers/tty/serial/8250/8250_port.c -@@ -3191,17 +3191,9 @@ void serial8250_console_write_atomic(str - { - struct uart_port *port = &up->port; - unsigned int flags; -- bool locked; - - console_atomic_lock(&flags); - -- /* -- * If possible, keep any other CPUs from working with the -- * UART until the atomic message is completed. This helps -- * to keep the output more orderly. -- */ -- locked = spin_trylock(&port->lock); -- - touch_nmi_watchdog(); - - clear_ier(up); -@@ -3216,9 +3208,6 @@ void serial8250_console_write_atomic(str - wait_for_xmitr(up, BOTH_EMPTY); - restore_ier(up); - -- if (locked) -- spin_unlock(&port->lock); -- - console_atomic_unlock(flags); - } - diff --git a/kernel/patches-5.4.x-rt/0045-serial-8250-export-symbols-which-are-used-by-symbols.patch b/kernel/patches-5.4.x-rt/0045-serial-8250-export-symbols-which-are-used-by-symbols.patch deleted file mode 100644 index b82fd3a5b..000000000 --- a/kernel/patches-5.4.x-rt/0045-serial-8250-export-symbols-which-are-used-by-symbols.patch +++ /dev/null @@ -1,38 +0,0 @@ -From: Sebastian Andrzej Siewior -Date: Sat, 16 Feb 2019 09:02:00 +0100 -Subject: [PATCH] serial: 8250: export symbols which are used by symbols - -Signed-off-by: Sebastian Andrzej Siewior ---- - drivers/tty/serial/8250/8250_port.c | 2 ++ - kernel/printk/printk.c | 1 + - 2 files changed, 3 insertions(+) - ---- a/drivers/tty/serial/8250/8250_port.c -+++ b/drivers/tty/serial/8250/8250_port.c -@@ -2025,6 +2025,7 @@ void clear_ier(struct uart_8250_port *up - } - console_atomic_unlock(flags); - } -+EXPORT_SYMBOL_GPL(clear_ier); - - void restore_ier(struct uart_8250_port *up) - { -@@ -2036,6 +2037,7 @@ void restore_ier(struct uart_8250_port * - serial_port_out(port, UART_IER, atomic_read(&ier_value)); - console_atomic_unlock(flags); - } -+EXPORT_SYMBOL_GPL(restore_ier); - - #ifdef CONFIG_CONSOLE_POLL - /* ---- a/kernel/printk/printk.c -+++ b/kernel/printk/printk.c -@@ -2257,6 +2257,7 @@ int is_console_locked(void) - { - return console_locked; - } -+EXPORT_SYMBOL(is_console_locked); - - /** - * console_unlock - unlock the console system diff --git a/kernel/patches-5.4.x-rt/0046-arm-remove-printk_nmi_.patch b/kernel/patches-5.4.x-rt/0046-arm-remove-printk_nmi_.patch deleted file mode 100644 index 616948ff1..000000000 --- a/kernel/patches-5.4.x-rt/0046-arm-remove-printk_nmi_.patch +++ /dev/null @@ -1,25 +0,0 @@ -From: Sebastian Andrzej Siewior -Date: Fri, 15 Feb 2019 14:34:20 +0100 -Subject: [PATCH] arm: remove printk_nmi_.*() - -It is no longer provided by the printk core code. - -Signed-off-by: Sebastian Andrzej Siewior ---- - arch/arm/kernel/smp.c | 2 -- - 1 file changed, 2 deletions(-) - ---- a/arch/arm/kernel/smp.c -+++ b/arch/arm/kernel/smp.c -@@ -682,11 +682,9 @@ void handle_IPI(int ipinr, struct pt_reg - break; - - case IPI_CPU_BACKTRACE: -- printk_nmi_enter(); - irq_enter(); - nmi_cpu_backtrace(regs); - irq_exit(); -- printk_nmi_exit(); - break; - - default: diff --git a/kernel/patches-5.4.x-rt/0047-printk-only-allow-kernel-to-emergency-message.patch b/kernel/patches-5.4.x-rt/0047-printk-only-allow-kernel-to-emergency-message.patch deleted file mode 100644 index 396335cd4..000000000 --- a/kernel/patches-5.4.x-rt/0047-printk-only-allow-kernel-to-emergency-message.patch +++ /dev/null @@ -1,67 +0,0 @@ -From: John Ogness -Date: Sun, 17 Feb 2019 03:11:20 +0100 -Subject: [PATCH] printk: only allow kernel to emergency message - -Emergency messages exist as a mechanism for the kernel to -communicate critical information to users. It is not meant for -use by userspace. Only allow facility=0 messages to be -processed by the emergency message code. - -Signed-off-by: John Ogness -Signed-off-by: Sebastian Andrzej Siewior ---- - kernel/printk/printk.c | 17 +++++++++++------ - 1 file changed, 11 insertions(+), 6 deletions(-) - ---- a/kernel/printk/printk.c -+++ b/kernel/printk/printk.c -@@ -1754,7 +1754,8 @@ static void printk_write_history(struct - * The console_lock must be held. - */ - static void call_console_drivers(u64 seq, const char *ext_text, size_t ext_len, -- const char *text, size_t len, int level) -+ const char *text, size_t len, int level, -+ int facility) - { - struct console *con; - -@@ -1774,13 +1775,14 @@ static void call_console_drivers(u64 seq - con->wrote_history = 1; - con->printk_seq = seq - 1; - } -- if (con->write_atomic && level < emergency_console_loglevel) { -+ if (con->write_atomic && level < emergency_console_loglevel && -+ facility == 0) { - /* skip emergency messages, already printed */ - if (con->printk_seq < seq) - con->printk_seq = seq; - continue; - } -- if (con->flags & CON_BOOT) { -+ if (con->flags & CON_BOOT && facility == 0) { - /* skip emergency messages, already printed */ - if (con->printk_seq < seq) - con->printk_seq = seq; -@@ -1951,7 +1953,10 @@ asmlinkage int vprintk_emit(int facility - * - text points to beginning of text - * - there is room before text for prefix - */ -- printk_emergency(rbuf, level & 7, ts_nsec, cpu, text, text_len); -+ if (facility == 0) { -+ /* only the kernel can create emergency messages */ -+ printk_emergency(rbuf, level & 7, ts_nsec, cpu, text, text_len); -+ } - - if ((lflags & LOG_CONT) || !(lflags & LOG_NEWLINE)) { - cont_add(ctx, cpu, caller_id, facility, level, lflags, text, text_len); -@@ -2715,8 +2720,8 @@ static int printk_kthread_func(void *dat - &len, printk_time); - - console_lock(); -- call_console_drivers(master_seq, ext_text, -- ext_len, text, len, msg->level); -+ call_console_drivers(master_seq, ext_text, ext_len, text, len, -+ msg->level, msg->facility); - if (len > 0 || ext_len > 0) - printk_delay(msg->level); - console_unlock(); diff --git a/kernel/patches-5.4.x-rt/0048-printk-devkmsg-llseek-reset-clear-if-it-is-lost.patch b/kernel/patches-5.4.x-rt/0048-printk-devkmsg-llseek-reset-clear-if-it-is-lost.patch deleted file mode 100644 index 6bb5e4506..000000000 --- a/kernel/patches-5.4.x-rt/0048-printk-devkmsg-llseek-reset-clear-if-it-is-lost.patch +++ /dev/null @@ -1,45 +0,0 @@ -From: John Ogness -Date: Fri, 22 Feb 2019 23:02:44 +0100 -Subject: [PATCH] printk: devkmsg: llseek: reset clear if it is lost - -SEEK_DATA will seek to the last clear record. If this clear record -is no longer in the ring buffer, devkmsg_llseek() will go into an -infinite loop. Fix that by resetting the clear sequence if the old -clear record is no longer in the ring buffer. - -Signed-off-by: John Ogness -Signed-off-by: Sebastian Andrzej Siewior ---- - kernel/printk/printk.c | 7 ++++++- - 1 file changed, 6 insertions(+), 1 deletion(-) - ---- a/kernel/printk/printk.c -+++ b/kernel/printk/printk.c -@@ -761,6 +761,7 @@ static loff_t devkmsg_llseek(struct file - { - struct devkmsg_user *user = file->private_data; - loff_t ret; -+ u64 seq; - - if (!user) - return -EBADF; -@@ -783,7 +784,7 @@ static loff_t devkmsg_llseek(struct file - * changes no global state, and does not clear anything. - */ - for (;;) { -- prb_iter_init(&user->iter, &printk_rb, NULL); -+ prb_iter_init(&user->iter, &printk_rb, &seq); - ret = prb_iter_seek(&user->iter, clear_seq); - if (ret > 0) { - /* seeked to clear seq */ -@@ -800,6 +801,10 @@ static loff_t devkmsg_llseek(struct file - break; - } - /* iterator invalid, start over */ -+ -+ /* reset clear_seq if it is no longer available */ -+ if (seq > clear_seq) -+ clear_seq = 0; - } - ret = 0; - break; diff --git a/kernel/patches-5.4.x-rt/0049-printk-print-rate-limitted-message-as-info.patch b/kernel/patches-5.4.x-rt/0049-printk-print-rate-limitted-message-as-info.patch deleted file mode 100644 index 24a0a284e..000000000 --- a/kernel/patches-5.4.x-rt/0049-printk-print-rate-limitted-message-as-info.patch +++ /dev/null @@ -1,24 +0,0 @@ -From: Sebastian Andrzej Siewior -Date: Fri, 22 Feb 2019 12:47:13 +0100 -Subject: [PATCH] printk: print "rate-limitted" message as info - -If messages which are injected via kmsg are dropped then they don't need -to be printed as warnings. This is to avoid latency spikes if the -interface decides to print a lot of important messages. - -Signed-off-by: Sebastian Andrzej Siewior ---- - include/linux/ratelimit.h | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - ---- a/include/linux/ratelimit.h -+++ b/include/linux/ratelimit.h -@@ -59,7 +59,7 @@ static inline void ratelimit_state_exit( - return; - - if (rs->missed) { -- pr_warn("%s: %d output lines suppressed due to ratelimiting\n", -+ pr_info("%s: %d output lines suppressed due to ratelimiting\n", - current->comm, rs->missed); - rs->missed = 0; - } diff --git a/kernel/patches-5.4.x-rt/0050-printk-kmsg_dump-remove-mutex-usage.patch b/kernel/patches-5.4.x-rt/0050-printk-kmsg_dump-remove-mutex-usage.patch deleted file mode 100644 index 6fa6ca680..000000000 --- a/kernel/patches-5.4.x-rt/0050-printk-kmsg_dump-remove-mutex-usage.patch +++ /dev/null @@ -1,84 +0,0 @@ -From: John Ogness -Date: Wed, 24 Apr 2019 16:36:04 +0200 -Subject: [PATCH] printk: kmsg_dump: remove mutex usage - -The kmsg dumper can be called from any context, but the dumping -helpers were using a mutex to synchronize the iterator against -concurrent dumps. - -Rather than trying to synchronize the iterator, use a local copy -of the iterator during the dump. Then no synchronization is -required. - -Reported-by: Scott Wood -Signed-off-by: John Ogness -Signed-off-by: Sebastian Andrzej Siewior ---- - kernel/printk/printk.c | 23 ++++++++++------------- - 1 file changed, 10 insertions(+), 13 deletions(-) - ---- a/kernel/printk/printk.c -+++ b/kernel/printk/printk.c -@@ -379,8 +379,6 @@ static u64 syslog_seq; - static size_t syslog_partial; - static bool syslog_time; - --static DEFINE_MUTEX(kmsg_dump_lock); -- - /* the next printk record to read after the last 'clear' command */ - static u64 clear_seq; - -@@ -2877,6 +2875,7 @@ module_param_named(always_kmsg_dump, alw - */ - void kmsg_dump(enum kmsg_dump_reason reason) - { -+ struct kmsg_dumper dumper_local; - struct kmsg_dumper *dumper; - - if ((reason > KMSG_DUMP_OOPS) && !always_kmsg_dump) -@@ -2887,16 +2886,18 @@ void kmsg_dump(enum kmsg_dump_reason rea - if (dumper->max_reason && reason > dumper->max_reason) - continue; - -- /* initialize iterator with data about the stored records */ -- dumper->active = true; -+ /* -+ * use a local copy to avoid modifying the -+ * iterator used by any other cpus/contexts -+ */ -+ memcpy(&dumper_local, dumper, sizeof(dumper_local)); - -- kmsg_dump_rewind(dumper); -+ /* initialize iterator with data about the stored records */ -+ dumper_local.active = true; -+ kmsg_dump_rewind(&dumper_local); - - /* invoke dumper which will iterate over records */ -- dumper->dump(dumper, reason); -- -- /* reset iterator */ -- dumper->active = false; -+ dumper_local.dump(&dumper_local, reason); - } - rcu_read_unlock(); - } -@@ -3008,9 +3009,7 @@ bool kmsg_dump_get_line(struct kmsg_dump - { - bool ret; - -- mutex_lock(&kmsg_dump_lock); - ret = kmsg_dump_get_line_nolock(dumper, syslog, line, size, len); -- mutex_unlock(&kmsg_dump_lock); - - return ret; - } -@@ -3162,9 +3161,7 @@ void kmsg_dump_rewind_nolock(struct kmsg - */ - void kmsg_dump_rewind(struct kmsg_dumper *dumper) - { -- mutex_lock(&kmsg_dump_lock); - kmsg_dump_rewind_nolock(dumper); -- mutex_unlock(&kmsg_dump_lock); - } - EXPORT_SYMBOL_GPL(kmsg_dump_rewind); - diff --git a/kernel/patches-5.4.x-rt/0051-printk-devkmsg-read-Return-EPIPE-when-the-first-mess.patch b/kernel/patches-5.4.x-rt/0051-printk-devkmsg-read-Return-EPIPE-when-the-first-mess.patch deleted file mode 100644 index 0e283165c..000000000 --- a/kernel/patches-5.4.x-rt/0051-printk-devkmsg-read-Return-EPIPE-when-the-first-mess.patch +++ /dev/null @@ -1,43 +0,0 @@ -From: He Zhe -Date: Tue, 24 Sep 2019 15:26:39 +0800 -Subject: [PATCH] printk: devkmsg: read: Return EPIPE when the first - message user-space wants has gone - -When user-space wants to read the first message, that is when user->seq -is 0, and that message has gone, it currently automatically resets -user->seq to current first seq. This mis-aligns with mainline kernel. - -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/ABI/testing/dev-kmsg#n39 -https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/kernel/printk/printk.c#n899 - -We should inform user-space that what it wants has gone by returning EPIPE -in such scenario. - -Link: https://lore.kernel.org/r/20190924072639.25986-1-zhe.he@windriver.com -Signed-off-by: He Zhe -Signed-off-by: Sebastian Andrzej Siewior ---- - kernel/printk/printk.c | 12 ++++-------- - 1 file changed, 4 insertions(+), 8 deletions(-) - ---- a/kernel/printk/printk.c -+++ b/kernel/printk/printk.c -@@ -713,14 +713,10 @@ static ssize_t devkmsg_read(struct file - goto out; - } - -- if (user->seq == 0) { -- user->seq = seq; -- } else { -- user->seq++; -- if (user->seq < seq) { -- ret = -EPIPE; -- goto restore_out; -- } -+ user->seq++; -+ if (user->seq < seq) { -+ ret = -EPIPE; -+ goto restore_out; - } - - msg = (struct printk_log *)&user->msgbuf[0]; diff --git a/kernel/patches-5.4.x-rt/0052-printk-handle-iterating-while-buffer-changing.patch b/kernel/patches-5.4.x-rt/0052-printk-handle-iterating-while-buffer-changing.patch deleted file mode 100644 index 1ce4ccb6b..000000000 --- a/kernel/patches-5.4.x-rt/0052-printk-handle-iterating-while-buffer-changing.patch +++ /dev/null @@ -1,43 +0,0 @@ -From: John Ogness -Date: Mon, 7 Oct 2019 16:20:39 +0200 -Subject: [PATCH] printk: handle iterating while buffer changing - -The syslog and kmsg_dump readers are provided buffers to fill. -Both try to maximize the provided buffer usage by calculating the -maximum number of messages that can fit. However, if after the -calculation, messages are dropped and new messages added, the -calculation will no longer match. - -For syslog, add a check to make sure the provided buffer is not -overfilled. - -For kmsg_dump, start over by recalculating the messages -available. - -Signed-off-by: John Ogness -Signed-off-by: Sebastian Andrzej Siewior ---- - kernel/printk/printk.c | 5 ++++- - 1 file changed, 4 insertions(+), 1 deletion(-) - ---- a/kernel/printk/printk.c -+++ b/kernel/printk/printk.c -@@ -1446,6 +1446,9 @@ static int syslog_print_all(char __user - break; - } - -+ if (len + textlen > size) -+ break; -+ - if (copy_to_user(buf + len, text, textlen)) - len = -EFAULT; - else -@@ -3085,7 +3088,7 @@ bool kmsg_dump_get_buffer(struct kmsg_du - ret = prb_iter_next(&iter, msgbuf, PRINTK_RECORD_MAX, &seq); - if (ret == 0) { - break; -- } else if (ret < 0) { -+ } else if (ret < 0 || seq >= end_seq) { - prb_iter_init(&iter, &printk_rb, &seq); - goto retry; - } diff --git a/kernel/patches-5.4.x-rt/0053-printk-hack-out-emergency-loglevel-usage.patch b/kernel/patches-5.4.x-rt/0053-printk-hack-out-emergency-loglevel-usage.patch deleted file mode 100644 index 71cd11d5d..000000000 --- a/kernel/patches-5.4.x-rt/0053-printk-hack-out-emergency-loglevel-usage.patch +++ /dev/null @@ -1,52 +0,0 @@ -From: John Ogness -Date: Tue, 3 Dec 2019 09:14:57 +0100 -Subject: [PATCH] printk: hack out emergency loglevel usage - -Instead of using an emergency loglevel to determine if atomic -messages should be printed, use oops_in_progress. This conforms -to the decision that latency-causing atomic messages never be -generated during normal operation. - -Signed-off-by: John Ogness -Signed-off-by: Sebastian Andrzej Siewior ---- - kernel/printk/printk.c | 13 +++---------- - 1 file changed, 3 insertions(+), 10 deletions(-) - ---- a/kernel/printk/printk.c -+++ b/kernel/printk/printk.c -@@ -1777,15 +1777,8 @@ static void call_console_drivers(u64 seq - con->wrote_history = 1; - con->printk_seq = seq - 1; - } -- if (con->write_atomic && level < emergency_console_loglevel && -- facility == 0) { -- /* skip emergency messages, already printed */ -- if (con->printk_seq < seq) -- con->printk_seq = seq; -- continue; -- } - if (con->flags & CON_BOOT && facility == 0) { -- /* skip emergency messages, already printed */ -+ /* skip boot messages, already printed */ - if (con->printk_seq < seq) - con->printk_seq = seq; - continue; -@@ -3171,7 +3164,7 @@ static bool console_can_emergency(int le - for_each_console(con) { - if (!(con->flags & CON_ENABLED)) - continue; -- if (con->write_atomic && level < emergency_console_loglevel) -+ if (con->write_atomic && oops_in_progress) - return true; - if (con->write && (con->flags & CON_BOOT)) - return true; -@@ -3187,7 +3180,7 @@ static void call_emergency_console_drive - for_each_console(con) { - if (!(con->flags & CON_ENABLED)) - continue; -- if (con->write_atomic && level < emergency_console_loglevel) { -+ if (con->write_atomic && oops_in_progress) { - con->write_atomic(con, text, text_len); - continue; - } diff --git a/kernel/patches-5.4.x-rt/0054-serial-8250-only-atomic-lock-for-console.patch b/kernel/patches-5.4.x-rt/0054-serial-8250-only-atomic-lock-for-console.patch deleted file mode 100644 index 9abc5c608..000000000 --- a/kernel/patches-5.4.x-rt/0054-serial-8250-only-atomic-lock-for-console.patch +++ /dev/null @@ -1,384 +0,0 @@ -From: John Ogness -Date: Fri, 10 Jan 2020 16:45:31 +0106 -Subject: [PATCH] serial: 8250: only atomic lock for console - -The atomic console implementation requires that IER is synchronized -between atomic and non-atomic usage. However, it was implemented such -that the console_atomic_lock was performed for all IER access, even -if that port was not a console. - -The implementation also used a usage counter to keep track of IER -clear/restore windows. However, this is not needed because the -console_atomic_lock synchronization of IER access with prevent any -situations where IER is prematurely restored or left cleared. - -Move the IER access functions to inline macros. They will only -console_atomic_lock if the port is a console. Remove the -restore_ier() function by having clear_ier() return the prior IER -value so that the caller can restore it using set_ier(). Rename the -IER access functions to match other 8250 wrapper macros. - -Suggested-by: Dick Hollenbeck -Signed-off-by: John Ogness -Signed-off-by: Sebastian Andrzej Siewior ---- - drivers/tty/serial/8250/8250.h | 65 +++++++++++++++++++--------- - drivers/tty/serial/8250/8250_core.c | 6 +- - drivers/tty/serial/8250/8250_dma.c | 4 - - drivers/tty/serial/8250/8250_port.c | 81 ++++++++---------------------------- - 4 files changed, 66 insertions(+), 90 deletions(-) - ---- a/drivers/tty/serial/8250/8250.h -+++ b/drivers/tty/serial/8250/8250.h -@@ -96,10 +96,6 @@ struct serial8250_config { - #define SERIAL8250_SHARE_IRQS 0 - #endif - --void set_ier(struct uart_8250_port *up, unsigned char ier); --void clear_ier(struct uart_8250_port *up); --void restore_ier(struct uart_8250_port *up); -- - #define SERIAL8250_PORT_FLAGS(_base, _irq, _flags) \ - { \ - .iobase = _base, \ -@@ -134,39 +130,64 @@ static inline void serial_dl_write(struc - up->dl_write(up, value); - } - --static inline bool serial8250_set_THRI(struct uart_8250_port *up) -+static inline void serial8250_set_IER(struct uart_8250_port *up, -+ unsigned char ier) - { -- if (up->ier & UART_IER_THRI) -- return false; -- up->ier |= UART_IER_THRI; -- serial_out(up, UART_IER, up->ier); -- return true; -+ struct uart_port *port = &up->port; -+ unsigned int flags; -+ bool is_console; -+ -+ is_console = uart_console(port); -+ -+ if (is_console) -+ console_atomic_lock(&flags); -+ -+ serial_out(up, UART_IER, ier); -+ -+ if (is_console) -+ console_atomic_unlock(flags); - } - --static inline bool serial8250_set_THRI_sier(struct uart_8250_port *up) -+static inline unsigned char serial8250_clear_IER(struct uart_8250_port *up) - { -- if (up->ier & UART_IER_THRI) -- return false; -- up->ier |= UART_IER_THRI; -- set_ier(up, up->ier); -- return true; -+ struct uart_port *port = &up->port; -+ unsigned int clearval = 0; -+ unsigned int prior; -+ unsigned int flags; -+ bool is_console; -+ -+ is_console = uart_console(port); -+ -+ if (up->capabilities & UART_CAP_UUE) -+ clearval = UART_IER_UUE; -+ -+ if (is_console) -+ console_atomic_lock(&flags); -+ -+ prior = serial_port_in(port, UART_IER); -+ serial_port_out(port, UART_IER, clearval); -+ -+ if (is_console) -+ console_atomic_unlock(flags); -+ -+ return prior; - } - --static inline bool serial8250_clear_THRI(struct uart_8250_port *up) -+static inline bool serial8250_set_THRI(struct uart_8250_port *up) - { -- if (!(up->ier & UART_IER_THRI)) -+ if (up->ier & UART_IER_THRI) - return false; -- up->ier &= ~UART_IER_THRI; -- serial_out(up, UART_IER, up->ier); -+ up->ier |= UART_IER_THRI; -+ serial8250_set_IER(up, up->ier); - return true; - } - --static inline bool serial8250_clear_THRI_sier(struct uart_8250_port *up) -+static inline bool serial8250_clear_THRI(struct uart_8250_port *up) - { - if (!(up->ier & UART_IER_THRI)) - return false; - up->ier &= ~UART_IER_THRI; -- set_ier(up, up->ier); -+ serial8250_set_IER(up, up->ier); - return true; - } - ---- a/drivers/tty/serial/8250/8250_core.c -+++ b/drivers/tty/serial/8250/8250_core.c -@@ -265,7 +265,7 @@ static void serial8250_timeout(struct ti - static void serial8250_backup_timeout(struct timer_list *t) - { - struct uart_8250_port *up = from_timer(up, t, timer); -- unsigned int iir, lsr; -+ unsigned int iir, ier = 0, lsr; - unsigned long flags; - - spin_lock_irqsave(&up->port.lock, flags); -@@ -275,7 +275,7 @@ static void serial8250_backup_timeout(st - * based handler. - */ - if (up->port.irq) -- clear_ier(up); -+ ier = serial8250_clear_IER(up); - - iir = serial_in(up, UART_IIR); - -@@ -298,7 +298,7 @@ static void serial8250_backup_timeout(st - serial8250_tx_chars(up); - - if (up->port.irq) -- restore_ier(up); -+ serial8250_set_IER(up, ier); - - spin_unlock_irqrestore(&up->port.lock, flags); - ---- a/drivers/tty/serial/8250/8250_dma.c -+++ b/drivers/tty/serial/8250/8250_dma.c -@@ -35,7 +35,7 @@ static void __dma_tx_complete(void *para - - ret = serial8250_tx_dma(p); - if (ret) -- serial8250_set_THRI_sier(p); -+ serial8250_set_THRI(p); - - spin_unlock_irqrestore(&p->port.lock, flags); - } -@@ -98,7 +98,7 @@ int serial8250_tx_dma(struct uart_8250_p - dma_async_issue_pending(dma->txchan); - if (dma->tx_err) { - dma->tx_err = 0; -- serial8250_clear_THRI_sier(p); -+ serial8250_clear_THRI(p); - } - return 0; - err: ---- a/drivers/tty/serial/8250/8250_port.c -+++ b/drivers/tty/serial/8250/8250_port.c -@@ -721,7 +721,7 @@ static void serial8250_set_sleep(struct - serial_out(p, UART_EFR, UART_EFR_ECB); - serial_out(p, UART_LCR, 0); - } -- set_ier(p, sleep ? UART_IERX_SLEEP : 0); -+ serial8250_set_IER(p, sleep ? UART_IERX_SLEEP : 0); - if (p->capabilities & UART_CAP_EFR) { - serial_out(p, UART_LCR, UART_LCR_CONF_MODE_B); - serial_out(p, UART_EFR, efr); -@@ -1390,7 +1390,7 @@ static void serial8250_stop_rx(struct ua - - up->ier &= ~(UART_IER_RLSI | UART_IER_RDI); - up->port.read_status_mask &= ~UART_LSR_DR; -- set_ier(up, up->ier); -+ serial8250_set_IER(up, up->ier); - - serial8250_rpm_put(up); - } -@@ -1408,7 +1408,7 @@ static void __do_stop_tx_rs485(struct ua - serial8250_clear_and_reinit_fifos(p); - - p->ier |= UART_IER_RLSI | UART_IER_RDI; -- set_ier(p, p->ier); -+ serial8250_set_IER(p, p->ier); - } - } - static enum hrtimer_restart serial8250_em485_handle_stop_tx(struct hrtimer *t) -@@ -1459,7 +1459,7 @@ static void __stop_tx_rs485(struct uart_ - - static inline void __do_stop_tx(struct uart_8250_port *p) - { -- if (serial8250_clear_THRI_sier(p)) -+ if (serial8250_clear_THRI(p)) - serial8250_rpm_put_tx(p); - } - -@@ -1509,7 +1509,7 @@ static inline void __start_tx(struct uar - if (up->dma && !up->dma->tx_dma(up)) - return; - -- if (serial8250_set_THRI_sier(up)) { -+ if (serial8250_set_THRI(up)) { - if (up->bugs & UART_BUG_TXEN) { - unsigned char lsr; - -@@ -1616,7 +1616,7 @@ static void serial8250_disable_ms(struct - mctrl_gpio_disable_ms(up->gpios); - - up->ier &= ~UART_IER_MSI; -- set_ier(up, up->ier); -+ serial8250_set_IER(up, up->ier); - } - - static void serial8250_enable_ms(struct uart_port *port) -@@ -1632,7 +1632,7 @@ static void serial8250_enable_ms(struct - up->ier |= UART_IER_MSI; - - serial8250_rpm_get(up); -- set_ier(up, up->ier); -+ serial8250_set_IER(up, up->ier); - serial8250_rpm_put(up); - } - -@@ -1991,54 +1991,6 @@ static void wait_for_xmitr(struct uart_8 - } - } - --static atomic_t ier_counter = ATOMIC_INIT(0); --static atomic_t ier_value = ATOMIC_INIT(0); -- --void set_ier(struct uart_8250_port *up, unsigned char ier) --{ -- struct uart_port *port = &up->port; -- unsigned int flags; -- -- console_atomic_lock(&flags); -- if (atomic_read(&ier_counter) > 0) -- atomic_set(&ier_value, ier); -- else -- serial_port_out(port, UART_IER, ier); -- console_atomic_unlock(flags); --} -- --void clear_ier(struct uart_8250_port *up) --{ -- struct uart_port *port = &up->port; -- unsigned int ier_cleared = 0; -- unsigned int flags; -- unsigned int ier; -- -- console_atomic_lock(&flags); -- atomic_inc(&ier_counter); -- ier = serial_port_in(port, UART_IER); -- if (up->capabilities & UART_CAP_UUE) -- ier_cleared = UART_IER_UUE; -- if (ier != ier_cleared) { -- serial_port_out(port, UART_IER, ier_cleared); -- atomic_set(&ier_value, ier); -- } -- console_atomic_unlock(flags); --} --EXPORT_SYMBOL_GPL(clear_ier); -- --void restore_ier(struct uart_8250_port *up) --{ -- struct uart_port *port = &up->port; -- unsigned int flags; -- -- console_atomic_lock(&flags); -- if (atomic_fetch_dec(&ier_counter) == 1) -- serial_port_out(port, UART_IER, atomic_read(&ier_value)); -- console_atomic_unlock(flags); --} --EXPORT_SYMBOL_GPL(restore_ier); -- - #ifdef CONFIG_CONSOLE_POLL - /* - * Console polling routines for writing and reading from the uart while -@@ -2070,10 +2022,11 @@ static int serial8250_get_poll_char(stru - static void serial8250_put_poll_char(struct uart_port *port, - unsigned char c) - { -+ unsigned int ier; - struct uart_8250_port *up = up_to_u8250p(port); - - serial8250_rpm_get(up); -- clear_ier(up); -+ ier = serial8250_clear_IER(up); - - wait_for_xmitr(up, BOTH_EMPTY); - /* -@@ -2086,7 +2039,7 @@ static void serial8250_put_poll_char(str - * and restore the IER - */ - wait_for_xmitr(up, BOTH_EMPTY); -- restore_ier(up); -+ serial8250_set_IER(up, ier); - serial8250_rpm_put(up); - } - -@@ -2398,7 +2351,7 @@ void serial8250_do_shutdown(struct uart_ - */ - spin_lock_irqsave(&port->lock, flags); - up->ier = 0; -- set_ier(up, 0); -+ serial8250_set_IER(up, 0); - spin_unlock_irqrestore(&port->lock, flags); - - synchronize_irq(port->irq); -@@ -2683,7 +2636,7 @@ serial8250_do_set_termios(struct uart_po - if (up->capabilities & UART_CAP_RTOIE) - up->ier |= UART_IER_RTOIE; - -- set_ier(up, up->ier); -+ serial8250_set_IER(up, up->ier); - - if (up->capabilities & UART_CAP_EFR) { - unsigned char efr = 0; -@@ -3193,12 +3146,13 @@ void serial8250_console_write_atomic(str - { - struct uart_port *port = &up->port; - unsigned int flags; -+ unsigned int ier; - - console_atomic_lock(&flags); - - touch_nmi_watchdog(); - -- clear_ier(up); -+ ier = serial8250_clear_IER(up); - - if (atomic_fetch_inc(&up->console_printing)) { - uart_console_write(port, "\n", 1, -@@ -3208,7 +3162,7 @@ void serial8250_console_write_atomic(str - atomic_dec(&up->console_printing); - - wait_for_xmitr(up, BOTH_EMPTY); -- restore_ier(up); -+ serial8250_set_IER(up, ier); - - console_atomic_unlock(flags); - } -@@ -3224,13 +3178,14 @@ void serial8250_console_write(struct uar - { - struct uart_port *port = &up->port; - unsigned long flags; -+ unsigned int ier; - - touch_nmi_watchdog(); - - serial8250_rpm_get(up); - spin_lock_irqsave(&port->lock, flags); - -- clear_ier(up); -+ ier = serial8250_clear_IER(up); - - /* check scratch reg to see if port powered off during system sleep */ - if (up->canary && (up->canary != serial_port_in(port, UART_SCR))) { -@@ -3247,7 +3202,7 @@ void serial8250_console_write(struct uar - * and restore the IER - */ - wait_for_xmitr(up, BOTH_EMPTY); -- restore_ier(up); -+ serial8250_set_IER(up, ier); - - /* - * The receive handling will happen properly because the diff --git a/kernel/patches-5.4.x-rt/0055-serial-8250-fsl-ingenic-mtk-fix-atomic-console.patch b/kernel/patches-5.4.x-rt/0055-serial-8250-fsl-ingenic-mtk-fix-atomic-console.patch deleted file mode 100644 index 6db09fe03..000000000 --- a/kernel/patches-5.4.x-rt/0055-serial-8250-fsl-ingenic-mtk-fix-atomic-console.patch +++ /dev/null @@ -1,102 +0,0 @@ -From: John Ogness -Date: Fri, 10 Jan 2020 16:45:32 +0106 -Subject: [PATCH] serial: 8250: fsl/ingenic/mtk: fix atomic console - -A few 8250 implementations have their own IER access. If the port -is a console, wrap the accesses with console_atomic_lock. - -Signed-off-by: John Ogness -Signed-off-by: Sebastian Andrzej Siewior ---- - drivers/tty/serial/8250/8250_fsl.c | 9 +++++++++ - drivers/tty/serial/8250/8250_ingenic.c | 7 +++++++ - drivers/tty/serial/8250/8250_mtk.c | 29 +++++++++++++++++++++++++++-- - 3 files changed, 43 insertions(+), 2 deletions(-) - ---- a/drivers/tty/serial/8250/8250_fsl.c -+++ b/drivers/tty/serial/8250/8250_fsl.c -@@ -57,9 +57,18 @@ int fsl8250_handle_irq(struct uart_port - - /* Stop processing interrupts on input overrun */ - if ((orig_lsr & UART_LSR_OE) && (up->overrun_backoff_time_ms > 0)) { -+ unsigned int ca_flags; - unsigned long delay; -+ bool is_console; - -+ is_console = uart_console(port); -+ -+ if (is_console) -+ console_atomic_lock(&ca_flags); - up->ier = port->serial_in(port, UART_IER); -+ if (is_console) -+ console_atomic_unlock(ca_flags); -+ - if (up->ier & (UART_IER_RLSI | UART_IER_RDI)) { - port->ops->stop_rx(port); - } else { ---- a/drivers/tty/serial/8250/8250_ingenic.c -+++ b/drivers/tty/serial/8250/8250_ingenic.c -@@ -146,6 +146,8 @@ OF_EARLYCON_DECLARE(x1000_uart, "ingenic - - static void ingenic_uart_serial_out(struct uart_port *p, int offset, int value) - { -+ unsigned int flags; -+ bool is_console; - int ier; - - switch (offset) { -@@ -167,7 +169,12 @@ static void ingenic_uart_serial_out(stru - * If we have enabled modem status IRQs we should enable - * modem mode. - */ -+ is_console = uart_console(p); -+ if (is_console) -+ console_atomic_lock(&flags); - ier = p->serial_in(p, UART_IER); -+ if (is_console) -+ console_atomic_unlock(flags); - - if (ier & UART_IER_MSI) - value |= UART_MCR_MDCE | UART_MCR_FCM; ---- a/drivers/tty/serial/8250/8250_mtk.c -+++ b/drivers/tty/serial/8250/8250_mtk.c -@@ -212,12 +212,37 @@ static void mtk8250_shutdown(struct uart - - static void mtk8250_disable_intrs(struct uart_8250_port *up, int mask) - { -- serial_out(up, UART_IER, serial_in(up, UART_IER) & (~mask)); -+ struct uart_port *port = &up->port; -+ unsigned int flags; -+ unsigned int ier; -+ bool is_console; -+ -+ is_console = uart_console(port); -+ -+ if (is_console) -+ console_atomic_lock(&flags); -+ -+ ier = serial_in(up, UART_IER); -+ serial_out(up, UART_IER, ier & (~mask)); -+ -+ if (is_console) -+ console_atomic_unlock(flags); - } - - static void mtk8250_enable_intrs(struct uart_8250_port *up, int mask) - { -- serial_out(up, UART_IER, serial_in(up, UART_IER) | mask); -+ struct uart_port *port = &up->port; -+ unsigned int flags; -+ unsigned int ier; -+ -+ if (uart_console(port)) -+ console_atomic_lock(&flags); -+ -+ ier = serial_in(up, UART_IER); -+ serial_out(up, UART_IER, ier | mask); -+ -+ if (uart_console(port)) -+ console_atomic_unlock(flags); - } - - static void mtk8250_set_flow_ctrl(struct uart_8250_port *up, int mode) diff --git a/kernel/patches-5.4.x-rt/0056-0001-locking-percpu-rwsem-lockdep-Make-percpu-rwsem-use-i.patch b/kernel/patches-5.4.x-rt/0056-0001-locking-percpu-rwsem-lockdep-Make-percpu-rwsem-use-i.patch deleted file mode 100644 index 9d8dbcd73..000000000 --- a/kernel/patches-5.4.x-rt/0056-0001-locking-percpu-rwsem-lockdep-Make-percpu-rwsem-use-i.patch +++ /dev/null @@ -1,217 +0,0 @@ -From: Peter Zijlstra -Date: Fri, 31 Jan 2020 16:07:04 +0100 -Subject: [PATCH 1/7] locking/percpu-rwsem, lockdep: Make percpu-rwsem use its - own lockdep_map - -As preparation for replacing the embedded rwsem, give percpu-rwsem its -own lockdep_map. - -Signed-off-by: Peter Zijlstra (Intel) -Tested-by: Juri Lelli -Signed-off-by: Sebastian Andrzej Siewior ---- - include/linux/percpu-rwsem.h | 29 +++++++++++++++++++---------- - kernel/cpu.c | 4 ++-- - kernel/locking/percpu-rwsem.c | 16 ++++++++++++---- - kernel/locking/rwsem.c | 4 ++-- - kernel/locking/rwsem.h | 2 ++ - 5 files changed, 37 insertions(+), 18 deletions(-) - ---- a/include/linux/percpu-rwsem.h -+++ b/include/linux/percpu-rwsem.h -@@ -15,8 +15,17 @@ struct percpu_rw_semaphore { - struct rw_semaphore rw_sem; /* slowpath */ - struct rcuwait writer; /* blocked writer */ - int readers_block; -+#ifdef CONFIG_DEBUG_LOCK_ALLOC -+ struct lockdep_map dep_map; -+#endif - }; - -+#ifdef CONFIG_DEBUG_LOCK_ALLOC -+#define __PERCPU_RWSEM_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname }, -+#else -+#define __PERCPU_RWSEM_DEP_MAP_INIT(lockname) -+#endif -+ - #define __DEFINE_PERCPU_RWSEM(name, is_static) \ - static DEFINE_PER_CPU(unsigned int, __percpu_rwsem_rc_##name); \ - is_static struct percpu_rw_semaphore name = { \ -@@ -24,7 +33,9 @@ is_static struct percpu_rw_semaphore nam - .read_count = &__percpu_rwsem_rc_##name, \ - .rw_sem = __RWSEM_INITIALIZER(name.rw_sem), \ - .writer = __RCUWAIT_INITIALIZER(name.writer), \ -+ __PERCPU_RWSEM_DEP_MAP_INIT(name) \ - } -+ - #define DEFINE_PERCPU_RWSEM(name) \ - __DEFINE_PERCPU_RWSEM(name, /* not static */) - #define DEFINE_STATIC_PERCPU_RWSEM(name) \ -@@ -37,7 +48,7 @@ static inline void percpu_down_read(stru - { - might_sleep(); - -- rwsem_acquire_read(&sem->rw_sem.dep_map, 0, 0, _RET_IP_); -+ rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_); - - preempt_disable(); - /* -@@ -76,13 +87,15 @@ static inline int percpu_down_read_trylo - */ - - if (ret) -- rwsem_acquire_read(&sem->rw_sem.dep_map, 0, 1, _RET_IP_); -+ rwsem_acquire_read(&sem->dep_map, 0, 1, _RET_IP_); - - return ret; - } - - static inline void percpu_up_read(struct percpu_rw_semaphore *sem) - { -+ rwsem_release(&sem->dep_map, 1, _RET_IP_); -+ - preempt_disable(); - /* - * Same as in percpu_down_read(). -@@ -92,8 +105,6 @@ static inline void percpu_up_read(struct - else - __percpu_up_read(sem); /* Unconditional memory barrier */ - preempt_enable(); -- -- rwsem_release(&sem->rw_sem.dep_map, 1, _RET_IP_); - } - - extern void percpu_down_write(struct percpu_rw_semaphore *); -@@ -110,15 +121,13 @@ extern void percpu_free_rwsem(struct per - __percpu_init_rwsem(sem, #sem, &rwsem_key); \ - }) - --#define percpu_rwsem_is_held(sem) lockdep_is_held(&(sem)->rw_sem) -- --#define percpu_rwsem_assert_held(sem) \ -- lockdep_assert_held(&(sem)->rw_sem) -+#define percpu_rwsem_is_held(sem) lockdep_is_held(sem) -+#define percpu_rwsem_assert_held(sem) lockdep_assert_held(sem) - - static inline void percpu_rwsem_release(struct percpu_rw_semaphore *sem, - bool read, unsigned long ip) - { -- lock_release(&sem->rw_sem.dep_map, 1, ip); -+ lock_release(&sem->dep_map, 1, ip); - #ifdef CONFIG_RWSEM_SPIN_ON_OWNER - if (!read) - atomic_long_set(&sem->rw_sem.owner, RWSEM_OWNER_UNKNOWN); -@@ -128,7 +137,7 @@ static inline void percpu_rwsem_release( - static inline void percpu_rwsem_acquire(struct percpu_rw_semaphore *sem, - bool read, unsigned long ip) - { -- lock_acquire(&sem->rw_sem.dep_map, 0, 1, read, 1, NULL, ip); -+ lock_acquire(&sem->dep_map, 0, 1, read, 1, NULL, ip); - #ifdef CONFIG_RWSEM_SPIN_ON_OWNER - if (!read) - atomic_long_set(&sem->rw_sem.owner, (long)current); ---- a/kernel/cpu.c -+++ b/kernel/cpu.c -@@ -331,12 +331,12 @@ void lockdep_assert_cpus_held(void) - - static void lockdep_acquire_cpus_lock(void) - { -- rwsem_acquire(&cpu_hotplug_lock.rw_sem.dep_map, 0, 0, _THIS_IP_); -+ rwsem_acquire(&cpu_hotplug_lock.dep_map, 0, 0, _THIS_IP_); - } - - static void lockdep_release_cpus_lock(void) - { -- rwsem_release(&cpu_hotplug_lock.rw_sem.dep_map, 1, _THIS_IP_); -+ rwsem_release(&cpu_hotplug_lock.dep_map, 1, _THIS_IP_); - } - - /* ---- a/kernel/locking/percpu-rwsem.c -+++ b/kernel/locking/percpu-rwsem.c -@@ -11,7 +11,7 @@ - #include "rwsem.h" - - int __percpu_init_rwsem(struct percpu_rw_semaphore *sem, -- const char *name, struct lock_class_key *rwsem_key) -+ const char *name, struct lock_class_key *key) - { - sem->read_count = alloc_percpu(int); - if (unlikely(!sem->read_count)) -@@ -19,9 +19,13 @@ int __percpu_init_rwsem(struct percpu_rw - - /* ->rw_sem represents the whole percpu_rw_semaphore for lockdep */ - rcu_sync_init(&sem->rss); -- __init_rwsem(&sem->rw_sem, name, rwsem_key); -+ init_rwsem(&sem->rw_sem); - rcuwait_init(&sem->writer); - sem->readers_block = 0; -+#ifdef CONFIG_DEBUG_LOCK_ALLOC -+ debug_check_no_locks_freed((void *)sem, sizeof(*sem)); -+ lockdep_init_map(&sem->dep_map, name, key, 0); -+#endif - return 0; - } - EXPORT_SYMBOL_GPL(__percpu_init_rwsem); -@@ -142,10 +146,12 @@ static bool readers_active_check(struct - - void percpu_down_write(struct percpu_rw_semaphore *sem) - { -+ rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_); -+ - /* Notify readers to take the slow path. */ - rcu_sync_enter(&sem->rss); - -- down_write(&sem->rw_sem); -+ __down_write(&sem->rw_sem); - - /* - * Notify new readers to block; up until now, and thus throughout the -@@ -168,6 +174,8 @@ EXPORT_SYMBOL_GPL(percpu_down_write); - - void percpu_up_write(struct percpu_rw_semaphore *sem) - { -+ rwsem_release(&sem->dep_map, 1, _RET_IP_); -+ - /* - * Signal the writer is done, no fast path yet. - * -@@ -183,7 +191,7 @@ void percpu_up_write(struct percpu_rw_se - /* - * Release the write lock, this will allow readers back in the game. - */ -- up_write(&sem->rw_sem); -+ __up_write(&sem->rw_sem); - - /* - * Once this completes (at least one RCU-sched grace period hence) the ---- a/kernel/locking/rwsem.c -+++ b/kernel/locking/rwsem.c -@@ -1383,7 +1383,7 @@ static inline int __down_read_trylock(st - /* - * lock for writing - */ --static inline void __down_write(struct rw_semaphore *sem) -+inline void __down_write(struct rw_semaphore *sem) - { - long tmp = RWSEM_UNLOCKED_VALUE; - -@@ -1446,7 +1446,7 @@ inline void __up_read(struct rw_semaphor - /* - * unlock after writing - */ --static inline void __up_write(struct rw_semaphore *sem) -+inline void __up_write(struct rw_semaphore *sem) - { - long tmp; - ---- a/kernel/locking/rwsem.h -+++ b/kernel/locking/rwsem.h -@@ -6,5 +6,7 @@ - - extern void __down_read(struct rw_semaphore *sem); - extern void __up_read(struct rw_semaphore *sem); -+extern void __down_write(struct rw_semaphore *sem); -+extern void __up_write(struct rw_semaphore *sem); - - #endif /* __INTERNAL_RWSEM_H */ diff --git a/kernel/patches-5.4.x-rt/0057-0002-locking-percpu-rwsem-Convert-to-bool.patch b/kernel/patches-5.4.x-rt/0057-0002-locking-percpu-rwsem-Convert-to-bool.patch deleted file mode 100644 index 46ff3d02a..000000000 --- a/kernel/patches-5.4.x-rt/0057-0002-locking-percpu-rwsem-Convert-to-bool.patch +++ /dev/null @@ -1,75 +0,0 @@ -From: Peter Zijlstra -Date: Fri, 31 Jan 2020 16:07:05 +0100 -Subject: [PATCH 2/7] locking/percpu-rwsem: Convert to bool - -Use bool where possible. - -Signed-off-by: Peter Zijlstra (Intel) -Tested-by: Juri Lelli -Signed-off-by: Sebastian Andrzej Siewior ---- - include/linux/percpu-rwsem.h | 6 +++--- - kernel/locking/percpu-rwsem.c | 8 ++++---- - 2 files changed, 7 insertions(+), 7 deletions(-) - ---- a/include/linux/percpu-rwsem.h -+++ b/include/linux/percpu-rwsem.h -@@ -41,7 +41,7 @@ is_static struct percpu_rw_semaphore nam - #define DEFINE_STATIC_PERCPU_RWSEM(name) \ - __DEFINE_PERCPU_RWSEM(name, static) - --extern int __percpu_down_read(struct percpu_rw_semaphore *, int); -+extern bool __percpu_down_read(struct percpu_rw_semaphore *, bool); - extern void __percpu_up_read(struct percpu_rw_semaphore *); - - static inline void percpu_down_read(struct percpu_rw_semaphore *sem) -@@ -69,9 +69,9 @@ static inline void percpu_down_read(stru - preempt_enable(); - } - --static inline int percpu_down_read_trylock(struct percpu_rw_semaphore *sem) -+static inline bool percpu_down_read_trylock(struct percpu_rw_semaphore *sem) - { -- int ret = 1; -+ bool ret = true; - - preempt_disable(); - /* ---- a/kernel/locking/percpu-rwsem.c -+++ b/kernel/locking/percpu-rwsem.c -@@ -45,7 +45,7 @@ void percpu_free_rwsem(struct percpu_rw_ - } - EXPORT_SYMBOL_GPL(percpu_free_rwsem); - --int __percpu_down_read(struct percpu_rw_semaphore *sem, int try) -+bool __percpu_down_read(struct percpu_rw_semaphore *sem, bool try) - { - /* - * Due to having preemption disabled the decrement happens on -@@ -69,7 +69,7 @@ int __percpu_down_read(struct percpu_rw_ - * release in percpu_up_write(). - */ - if (likely(!smp_load_acquire(&sem->readers_block))) -- return 1; -+ return true; - - /* - * Per the above comment; we still have preemption disabled and -@@ -78,7 +78,7 @@ int __percpu_down_read(struct percpu_rw_ - __percpu_up_read(sem); - - if (try) -- return 0; -+ return false; - - /* - * We either call schedule() in the wait, or we'll fall through -@@ -94,7 +94,7 @@ int __percpu_down_read(struct percpu_rw_ - __up_read(&sem->rw_sem); - - preempt_disable(); -- return 1; -+ return true; - } - EXPORT_SYMBOL_GPL(__percpu_down_read); - diff --git a/kernel/patches-5.4.x-rt/0058-0003-locking-percpu-rwsem-Move-__this_cpu_inc-into-the-sl.patch b/kernel/patches-5.4.x-rt/0058-0003-locking-percpu-rwsem-Move-__this_cpu_inc-into-the-sl.patch deleted file mode 100644 index e9f0e0e5f..000000000 --- a/kernel/patches-5.4.x-rt/0058-0003-locking-percpu-rwsem-Move-__this_cpu_inc-into-the-sl.patch +++ /dev/null @@ -1,53 +0,0 @@ -From: Peter Zijlstra -Date: Fri, 31 Jan 2020 16:07:06 +0100 -Subject: [PATCH 3/7] locking/percpu-rwsem: Move __this_cpu_inc() into the - slowpath - -As preparation to rework __percpu_down_read() move the -__this_cpu_inc() into it. - -Signed-off-by: Peter Zijlstra (Intel) -Tested-by: Juri Lelli -Signed-off-by: Sebastian Andrzej Siewior ---- - include/linux/percpu-rwsem.h | 10 ++++++---- - kernel/locking/percpu-rwsem.c | 2 ++ - 2 files changed, 8 insertions(+), 4 deletions(-) - ---- a/include/linux/percpu-rwsem.h -+++ b/include/linux/percpu-rwsem.h -@@ -59,8 +59,9 @@ static inline void percpu_down_read(stru - * and that once the synchronize_rcu() is done, the writer will see - * anything we did within this RCU-sched read-size critical section. - */ -- __this_cpu_inc(*sem->read_count); -- if (unlikely(!rcu_sync_is_idle(&sem->rss))) -+ if (likely(rcu_sync_is_idle(&sem->rss))) -+ __this_cpu_inc(*sem->read_count); -+ else - __percpu_down_read(sem, false); /* Unconditional memory barrier */ - /* - * The preempt_enable() prevents the compiler from -@@ -77,8 +78,9 @@ static inline bool percpu_down_read_tryl - /* - * Same as in percpu_down_read(). - */ -- __this_cpu_inc(*sem->read_count); -- if (unlikely(!rcu_sync_is_idle(&sem->rss))) -+ if (likely(rcu_sync_is_idle(&sem->rss))) -+ __this_cpu_inc(*sem->read_count); -+ else - ret = __percpu_down_read(sem, true); /* Unconditional memory barrier */ - preempt_enable(); - /* ---- a/kernel/locking/percpu-rwsem.c -+++ b/kernel/locking/percpu-rwsem.c -@@ -47,6 +47,8 @@ EXPORT_SYMBOL_GPL(percpu_free_rwsem); - - bool __percpu_down_read(struct percpu_rw_semaphore *sem, bool try) - { -+ __this_cpu_inc(*sem->read_count); -+ - /* - * Due to having preemption disabled the decrement happens on - * the same CPU as the increment, avoiding the diff --git a/kernel/patches-5.4.x-rt/0059-0004-locking-percpu-rwsem-Extract-__percpu_down_read_tryl.patch b/kernel/patches-5.4.x-rt/0059-0004-locking-percpu-rwsem-Extract-__percpu_down_read_tryl.patch deleted file mode 100644 index 70ac7a9b5..000000000 --- a/kernel/patches-5.4.x-rt/0059-0004-locking-percpu-rwsem-Extract-__percpu_down_read_tryl.patch +++ /dev/null @@ -1,50 +0,0 @@ -From: Peter Zijlstra -Date: Fri, 31 Jan 2020 16:07:07 +0100 -Subject: [PATCH 4/7] locking/percpu-rwsem: Extract - __percpu_down_read_trylock() - -In preparation for removing the embedded rwsem and building a custom -lock, extract the read-trylock primitive. - -Signed-off-by: Peter Zijlstra (Intel) -Tested-by: Juri Lelli -Signed-off-by: Sebastian Andrzej Siewior ---- - kernel/locking/percpu-rwsem.c | 19 +++++++++++++------ - 1 file changed, 13 insertions(+), 6 deletions(-) - ---- a/kernel/locking/percpu-rwsem.c -+++ b/kernel/locking/percpu-rwsem.c -@@ -45,7 +45,7 @@ void percpu_free_rwsem(struct percpu_rw_ - } - EXPORT_SYMBOL_GPL(percpu_free_rwsem); - --bool __percpu_down_read(struct percpu_rw_semaphore *sem, bool try) -+static bool __percpu_down_read_trylock(struct percpu_rw_semaphore *sem) - { - __this_cpu_inc(*sem->read_count); - -@@ -73,11 +73,18 @@ bool __percpu_down_read(struct percpu_rw - if (likely(!smp_load_acquire(&sem->readers_block))) - return true; - -- /* -- * Per the above comment; we still have preemption disabled and -- * will thus decrement on the same CPU as we incremented. -- */ -- __percpu_up_read(sem); -+ __this_cpu_dec(*sem->read_count); -+ -+ /* Prod writer to re-evaluate readers_active_check() */ -+ rcuwait_wake_up(&sem->writer); -+ -+ return false; -+} -+ -+bool __percpu_down_read(struct percpu_rw_semaphore *sem, bool try) -+{ -+ if (__percpu_down_read_trylock(sem)) -+ return true; - - if (try) - return false; diff --git a/kernel/patches-5.4.x-rt/0060-0005-locking-percpu-rwsem-Remove-the-embedded-rwsem.patch b/kernel/patches-5.4.x-rt/0060-0005-locking-percpu-rwsem-Remove-the-embedded-rwsem.patch deleted file mode 100644 index 1a8ef0e35..000000000 --- a/kernel/patches-5.4.x-rt/0060-0005-locking-percpu-rwsem-Remove-the-embedded-rwsem.patch +++ /dev/null @@ -1,433 +0,0 @@ -From: Peter Zijlstra -Date: Fri, 31 Jan 2020 16:07:08 +0100 -Subject: [PATCH 5/7] locking/percpu-rwsem: Remove the embedded rwsem - -The filesystem freezer uses percpu-rwsem in a way that is effectively -write_non_owner() and achieves this with a few horrible hacks that -rely on the rwsem (!percpu) implementation. - -When PREEMPT_RT replaces the rwsem implementation with a PI aware -variant this comes apart. - -Remove the embedded rwsem and implement it using a waitqueue and an -atomic_t. - - - make readers_block an atomic, and use it, with the waitqueue - for a blocking test-and-set write-side. - - - have the read-side wait for the 'lock' state to clear. - -Have the waiters use FIFO queueing and mark them (reader/writer) with -a new WQ_FLAG. Use a custom wake_function to wake either a single -writer or all readers until a writer. - -Signed-off-by: Peter Zijlstra (Intel) -Tested-by: Juri Lelli -Signed-off-by: Sebastian Andrzej Siewior ---- - include/linux/percpu-rwsem.h | 19 +---- - include/linux/rwsem.h | 6 - - include/linux/wait.h | 1 - kernel/locking/percpu-rwsem.c | 153 ++++++++++++++++++++++++++++++------------ - kernel/locking/rwsem.c | 11 +-- - kernel/locking/rwsem.h | 12 --- - 6 files changed, 123 insertions(+), 79 deletions(-) - ---- a/include/linux/percpu-rwsem.h -+++ b/include/linux/percpu-rwsem.h -@@ -3,18 +3,18 @@ - #define _LINUX_PERCPU_RWSEM_H - - #include --#include - #include - #include -+#include - #include - #include - - struct percpu_rw_semaphore { - struct rcu_sync rss; - unsigned int __percpu *read_count; -- struct rw_semaphore rw_sem; /* slowpath */ -- struct rcuwait writer; /* blocked writer */ -- int readers_block; -+ struct rcuwait writer; -+ wait_queue_head_t waiters; -+ atomic_t block; - #ifdef CONFIG_DEBUG_LOCK_ALLOC - struct lockdep_map dep_map; - #endif -@@ -31,8 +31,9 @@ static DEFINE_PER_CPU(unsigned int, __pe - is_static struct percpu_rw_semaphore name = { \ - .rss = __RCU_SYNC_INITIALIZER(name.rss), \ - .read_count = &__percpu_rwsem_rc_##name, \ -- .rw_sem = __RWSEM_INITIALIZER(name.rw_sem), \ - .writer = __RCUWAIT_INITIALIZER(name.writer), \ -+ .waiters = __WAIT_QUEUE_HEAD_INITIALIZER(name.waiters), \ -+ .block = ATOMIC_INIT(0), \ - __PERCPU_RWSEM_DEP_MAP_INIT(name) \ - } - -@@ -130,20 +131,12 @@ static inline void percpu_rwsem_release( - bool read, unsigned long ip) - { - lock_release(&sem->dep_map, 1, ip); --#ifdef CONFIG_RWSEM_SPIN_ON_OWNER -- if (!read) -- atomic_long_set(&sem->rw_sem.owner, RWSEM_OWNER_UNKNOWN); --#endif - } - - static inline void percpu_rwsem_acquire(struct percpu_rw_semaphore *sem, - bool read, unsigned long ip) - { - lock_acquire(&sem->dep_map, 0, 1, read, 1, NULL, ip); --#ifdef CONFIG_RWSEM_SPIN_ON_OWNER -- if (!read) -- atomic_long_set(&sem->rw_sem.owner, (long)current); --#endif - } - - #endif ---- a/include/linux/rwsem.h -+++ b/include/linux/rwsem.h -@@ -53,12 +53,6 @@ struct rw_semaphore { - #endif - }; - --/* -- * Setting all bits of the owner field except bit 0 will indicate -- * that the rwsem is writer-owned with an unknown owner. -- */ --#define RWSEM_OWNER_UNKNOWN (-2L) -- - /* In all implementations count != 0 means locked */ - static inline int rwsem_is_locked(struct rw_semaphore *sem) - { ---- a/include/linux/wait.h -+++ b/include/linux/wait.h -@@ -20,6 +20,7 @@ int default_wake_function(struct wait_qu - #define WQ_FLAG_EXCLUSIVE 0x01 - #define WQ_FLAG_WOKEN 0x02 - #define WQ_FLAG_BOOKMARK 0x04 -+#define WQ_FLAG_CUSTOM 0x08 - - /* - * A single wait-queue entry structure: ---- a/kernel/locking/percpu-rwsem.c -+++ b/kernel/locking/percpu-rwsem.c -@@ -1,15 +1,14 @@ - // SPDX-License-Identifier: GPL-2.0-only - #include --#include - #include -+#include - #include - #include - #include - #include -+#include - #include - --#include "rwsem.h" -- - int __percpu_init_rwsem(struct percpu_rw_semaphore *sem, - const char *name, struct lock_class_key *key) - { -@@ -17,11 +16,10 @@ int __percpu_init_rwsem(struct percpu_rw - if (unlikely(!sem->read_count)) - return -ENOMEM; - -- /* ->rw_sem represents the whole percpu_rw_semaphore for lockdep */ - rcu_sync_init(&sem->rss); -- init_rwsem(&sem->rw_sem); - rcuwait_init(&sem->writer); -- sem->readers_block = 0; -+ init_waitqueue_head(&sem->waiters); -+ atomic_set(&sem->block, 0); - #ifdef CONFIG_DEBUG_LOCK_ALLOC - debug_check_no_locks_freed((void *)sem, sizeof(*sem)); - lockdep_init_map(&sem->dep_map, name, key, 0); -@@ -54,23 +52,23 @@ static bool __percpu_down_read_trylock(s - * the same CPU as the increment, avoiding the - * increment-on-one-CPU-and-decrement-on-another problem. - * -- * If the reader misses the writer's assignment of readers_block, then -- * the writer is guaranteed to see the reader's increment. -+ * If the reader misses the writer's assignment of sem->block, then the -+ * writer is guaranteed to see the reader's increment. - * - * Conversely, any readers that increment their sem->read_count after -- * the writer looks are guaranteed to see the readers_block value, -- * which in turn means that they are guaranteed to immediately -- * decrement their sem->read_count, so that it doesn't matter that the -- * writer missed them. -+ * the writer looks are guaranteed to see the sem->block value, which -+ * in turn means that they are guaranteed to immediately decrement -+ * their sem->read_count, so that it doesn't matter that the writer -+ * missed them. - */ - - smp_mb(); /* A matches D */ - - /* -- * If !readers_block the critical section starts here, matched by the -+ * If !sem->block the critical section starts here, matched by the - * release in percpu_up_write(). - */ -- if (likely(!smp_load_acquire(&sem->readers_block))) -+ if (likely(!atomic_read_acquire(&sem->block))) - return true; - - __this_cpu_dec(*sem->read_count); -@@ -81,6 +79,88 @@ static bool __percpu_down_read_trylock(s - return false; - } - -+static inline bool __percpu_down_write_trylock(struct percpu_rw_semaphore *sem) -+{ -+ if (atomic_read(&sem->block)) -+ return false; -+ -+ return atomic_xchg(&sem->block, 1) == 0; -+} -+ -+static bool __percpu_rwsem_trylock(struct percpu_rw_semaphore *sem, bool reader) -+{ -+ if (reader) { -+ bool ret; -+ -+ preempt_disable(); -+ ret = __percpu_down_read_trylock(sem); -+ preempt_enable(); -+ -+ return ret; -+ } -+ return __percpu_down_write_trylock(sem); -+} -+ -+/* -+ * The return value of wait_queue_entry::func means: -+ * -+ * <0 - error, wakeup is terminated and the error is returned -+ * 0 - no wakeup, a next waiter is tried -+ * >0 - woken, if EXCLUSIVE, counted towards @nr_exclusive. -+ * -+ * We use EXCLUSIVE for both readers and writers to preserve FIFO order, -+ * and play games with the return value to allow waking multiple readers. -+ * -+ * Specifically, we wake readers until we've woken a single writer, or until a -+ * trylock fails. -+ */ -+static int percpu_rwsem_wake_function(struct wait_queue_entry *wq_entry, -+ unsigned int mode, int wake_flags, -+ void *key) -+{ -+ struct task_struct *p = get_task_struct(wq_entry->private); -+ bool reader = wq_entry->flags & WQ_FLAG_CUSTOM; -+ struct percpu_rw_semaphore *sem = key; -+ -+ /* concurrent against percpu_down_write(), can get stolen */ -+ if (!__percpu_rwsem_trylock(sem, reader)) -+ return 1; -+ -+ list_del_init(&wq_entry->entry); -+ smp_store_release(&wq_entry->private, NULL); -+ -+ wake_up_process(p); -+ put_task_struct(p); -+ -+ return !reader; /* wake (readers until) 1 writer */ -+} -+ -+static void percpu_rwsem_wait(struct percpu_rw_semaphore *sem, bool reader) -+{ -+ DEFINE_WAIT_FUNC(wq_entry, percpu_rwsem_wake_function); -+ bool wait; -+ -+ spin_lock_irq(&sem->waiters.lock); -+ /* -+ * Serialize against the wakeup in percpu_up_write(), if we fail -+ * the trylock, the wakeup must see us on the list. -+ */ -+ wait = !__percpu_rwsem_trylock(sem, reader); -+ if (wait) { -+ wq_entry.flags |= WQ_FLAG_EXCLUSIVE | reader * WQ_FLAG_CUSTOM; -+ __add_wait_queue_entry_tail(&sem->waiters, &wq_entry); -+ } -+ spin_unlock_irq(&sem->waiters.lock); -+ -+ while (wait) { -+ set_current_state(TASK_UNINTERRUPTIBLE); -+ if (!smp_load_acquire(&wq_entry.private)) -+ break; -+ schedule(); -+ } -+ __set_current_state(TASK_RUNNING); -+} -+ - bool __percpu_down_read(struct percpu_rw_semaphore *sem, bool try) - { - if (__percpu_down_read_trylock(sem)) -@@ -89,20 +169,10 @@ bool __percpu_down_read(struct percpu_rw - if (try) - return false; - -- /* -- * We either call schedule() in the wait, or we'll fall through -- * and reschedule on the preempt_enable() in percpu_down_read(). -- */ -- preempt_enable_no_resched(); -- -- /* -- * Avoid lockdep for the down/up_read() we already have them. -- */ -- __down_read(&sem->rw_sem); -- this_cpu_inc(*sem->read_count); -- __up_read(&sem->rw_sem); -- -+ preempt_enable(); -+ percpu_rwsem_wait(sem, /* .reader = */ true); - preempt_disable(); -+ - return true; - } - EXPORT_SYMBOL_GPL(__percpu_down_read); -@@ -117,7 +187,7 @@ void __percpu_up_read(struct percpu_rw_s - */ - __this_cpu_dec(*sem->read_count); - -- /* Prod writer to recheck readers_active */ -+ /* Prod writer to re-evaluate readers_active_check() */ - rcuwait_wake_up(&sem->writer); - } - EXPORT_SYMBOL_GPL(__percpu_up_read); -@@ -137,6 +207,8 @@ EXPORT_SYMBOL_GPL(__percpu_up_read); - * zero. If this sum is zero, then it is stable due to the fact that if any - * newly arriving readers increment a given counter, they will immediately - * decrement that same counter. -+ * -+ * Assumes sem->block is set. - */ - static bool readers_active_check(struct percpu_rw_semaphore *sem) - { -@@ -160,23 +232,22 @@ void percpu_down_write(struct percpu_rw_ - /* Notify readers to take the slow path. */ - rcu_sync_enter(&sem->rss); - -- __down_write(&sem->rw_sem); -- - /* -- * Notify new readers to block; up until now, and thus throughout the -- * longish rcu_sync_enter() above, new readers could still come in. -+ * Try set sem->block; this provides writer-writer exclusion. -+ * Having sem->block set makes new readers block. - */ -- WRITE_ONCE(sem->readers_block, 1); -+ if (!__percpu_down_write_trylock(sem)) -+ percpu_rwsem_wait(sem, /* .reader = */ false); - -- smp_mb(); /* D matches A */ -+ /* smp_mb() implied by __percpu_down_write_trylock() on success -- D matches A */ - - /* -- * If they don't see our writer of readers_block, then we are -- * guaranteed to see their sem->read_count increment, and therefore -- * will wait for them. -+ * If they don't see our store of sem->block, then we are guaranteed to -+ * see their sem->read_count increment, and therefore will wait for -+ * them. - */ - -- /* Wait for all now active readers to complete. */ -+ /* Wait for all active readers to complete. */ - rcuwait_wait_event(&sem->writer, readers_active_check(sem)); - } - EXPORT_SYMBOL_GPL(percpu_down_write); -@@ -195,12 +266,12 @@ void percpu_up_write(struct percpu_rw_se - * Therefore we force it through the slow path which guarantees an - * acquire and thereby guarantees the critical section's consistency. - */ -- smp_store_release(&sem->readers_block, 0); -+ atomic_set_release(&sem->block, 0); - - /* -- * Release the write lock, this will allow readers back in the game. -+ * Prod any pending reader/writer to make progress. - */ -- __up_write(&sem->rw_sem); -+ __wake_up(&sem->waiters, TASK_NORMAL, 1, sem); - - /* - * Once this completes (at least one RCU-sched grace period hence) the ---- a/kernel/locking/rwsem.c -+++ b/kernel/locking/rwsem.c -@@ -28,7 +28,6 @@ - #include - #include - --#include "rwsem.h" - #include "lock_events.h" - - /* -@@ -660,8 +659,6 @@ static inline bool rwsem_can_spin_on_own - unsigned long flags; - bool ret = true; - -- BUILD_BUG_ON(!(RWSEM_OWNER_UNKNOWN & RWSEM_NONSPINNABLE)); -- - if (need_resched()) { - lockevent_inc(rwsem_opt_fail); - return false; -@@ -1338,7 +1335,7 @@ static struct rw_semaphore *rwsem_downgr - /* - * lock for reading - */ --inline void __down_read(struct rw_semaphore *sem) -+static inline void __down_read(struct rw_semaphore *sem) - { - if (!rwsem_read_trylock(sem)) { - rwsem_down_read_slowpath(sem, TASK_UNINTERRUPTIBLE); -@@ -1383,7 +1380,7 @@ static inline int __down_read_trylock(st - /* - * lock for writing - */ --inline void __down_write(struct rw_semaphore *sem) -+static inline void __down_write(struct rw_semaphore *sem) - { - long tmp = RWSEM_UNLOCKED_VALUE; - -@@ -1426,7 +1423,7 @@ static inline int __down_write_trylock(s - /* - * unlock after reading - */ --inline void __up_read(struct rw_semaphore *sem) -+static inline void __up_read(struct rw_semaphore *sem) - { - long tmp; - -@@ -1446,7 +1443,7 @@ inline void __up_read(struct rw_semaphor - /* - * unlock after writing - */ --inline void __up_write(struct rw_semaphore *sem) -+static inline void __up_write(struct rw_semaphore *sem) - { - long tmp; - ---- a/kernel/locking/rwsem.h -+++ b/kernel/locking/rwsem.h -@@ -1,12 +0,0 @@ --/* SPDX-License-Identifier: GPL-2.0 */ -- --#ifndef __INTERNAL_RWSEM_H --#define __INTERNAL_RWSEM_H --#include -- --extern void __down_read(struct rw_semaphore *sem); --extern void __up_read(struct rw_semaphore *sem); --extern void __down_write(struct rw_semaphore *sem); --extern void __up_write(struct rw_semaphore *sem); -- --#endif /* __INTERNAL_RWSEM_H */ diff --git a/kernel/patches-5.4.x-rt/0061-0006-locking-percpu-rwsem-Fold-__percpu_up_read.patch b/kernel/patches-5.4.x-rt/0061-0006-locking-percpu-rwsem-Fold-__percpu_up_read.patch deleted file mode 100644 index a89d99f6e..000000000 --- a/kernel/patches-5.4.x-rt/0061-0006-locking-percpu-rwsem-Fold-__percpu_up_read.patch +++ /dev/null @@ -1,85 +0,0 @@ -From: Davidlohr Bueso -Date: Fri, 31 Jan 2020 16:07:09 +0100 -Subject: [PATCH 6/7] locking/percpu-rwsem: Fold __percpu_up_read() - -Now that __percpu_up_read() is only ever used from percpu_up_read() -merge them, it's a small function. - -Signed-off-by: Peter Zijlstra (Intel) -Signed-off-by: Sebastian Andrzej Siewior ---- - include/linux/percpu-rwsem.h | 19 +++++++++++++++---- - kernel/exit.c | 1 + - kernel/locking/percpu-rwsem.c | 15 --------------- - 3 files changed, 16 insertions(+), 19 deletions(-) - ---- a/include/linux/percpu-rwsem.h -+++ b/include/linux/percpu-rwsem.h -@@ -43,7 +43,6 @@ is_static struct percpu_rw_semaphore nam - __DEFINE_PERCPU_RWSEM(name, static) - - extern bool __percpu_down_read(struct percpu_rw_semaphore *, bool); --extern void __percpu_up_read(struct percpu_rw_semaphore *); - - static inline void percpu_down_read(struct percpu_rw_semaphore *sem) - { -@@ -103,10 +102,22 @@ static inline void percpu_up_read(struct - /* - * Same as in percpu_down_read(). - */ -- if (likely(rcu_sync_is_idle(&sem->rss))) -+ if (likely(rcu_sync_is_idle(&sem->rss))) { - __this_cpu_dec(*sem->read_count); -- else -- __percpu_up_read(sem); /* Unconditional memory barrier */ -+ } else { -+ /* -+ * slowpath; reader will only ever wake a single blocked -+ * writer. -+ */ -+ smp_mb(); /* B matches C */ -+ /* -+ * In other words, if they see our decrement (presumably to -+ * aggregate zero, as that is the only time it matters) they -+ * will also see our critical section. -+ */ -+ __this_cpu_dec(*sem->read_count); -+ rcuwait_wake_up(&sem->writer); -+ } - preempt_enable(); - } - ---- a/kernel/exit.c -+++ b/kernel/exit.c -@@ -258,6 +258,7 @@ void rcuwait_wake_up(struct rcuwait *w) - wake_up_process(task); - rcu_read_unlock(); - } -+EXPORT_SYMBOL_GPL(rcuwait_wake_up); - - /* - * Determine if a process group is "orphaned", according to the POSIX ---- a/kernel/locking/percpu-rwsem.c -+++ b/kernel/locking/percpu-rwsem.c -@@ -177,21 +177,6 @@ bool __percpu_down_read(struct percpu_rw - } - EXPORT_SYMBOL_GPL(__percpu_down_read); - --void __percpu_up_read(struct percpu_rw_semaphore *sem) --{ -- smp_mb(); /* B matches C */ -- /* -- * In other words, if they see our decrement (presumably to aggregate -- * zero, as that is the only time it matters) they will also see our -- * critical section. -- */ -- __this_cpu_dec(*sem->read_count); -- -- /* Prod writer to re-evaluate readers_active_check() */ -- rcuwait_wake_up(&sem->writer); --} --EXPORT_SYMBOL_GPL(__percpu_up_read); -- - #define per_cpu_sum(var) \ - ({ \ - typeof(var) __sum = 0; \ diff --git a/kernel/patches-5.4.x-rt/0062-0007-locking-percpu-rwsem-Add-might_sleep-for-writer-lock.patch b/kernel/patches-5.4.x-rt/0062-0007-locking-percpu-rwsem-Add-might_sleep-for-writer-lock.patch deleted file mode 100644 index 8f5e2a791..000000000 --- a/kernel/patches-5.4.x-rt/0062-0007-locking-percpu-rwsem-Add-might_sleep-for-writer-lock.patch +++ /dev/null @@ -1,26 +0,0 @@ -From: Davidlohr Bueso -Date: Fri, 31 Jan 2020 16:07:10 +0100 -Subject: [PATCH 7/7] locking/percpu-rwsem: Add might_sleep() for writer - locking - -We are missing this annotation in percpu_down_write(). Correct -this. - -Signed-off-by: Davidlohr Bueso -Signed-off-by: Peter Zijlstra (Intel) -Link: https://lkml.kernel.org/r/20200108013305.7732-1-dave@stgolabs.net -Signed-off-by: Sebastian Andrzej Siewior ---- - kernel/locking/percpu-rwsem.c | 1 + - 1 file changed, 1 insertion(+) - ---- a/kernel/locking/percpu-rwsem.c -+++ b/kernel/locking/percpu-rwsem.c -@@ -212,6 +212,7 @@ static bool readers_active_check(struct - - void percpu_down_write(struct percpu_rw_semaphore *sem) - { -+ might_sleep(); - rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_); - - /* Notify readers to take the slow path. */ diff --git a/kernel/patches-5.4.x-rt/0063-fs-buffer-Make-BH_Uptodate_Lock-bit_spin_lock-a-regu.patch b/kernel/patches-5.4.x-rt/0063-fs-buffer-Make-BH_Uptodate_Lock-bit_spin_lock-a-regu.patch deleted file mode 100644 index 17b26e49f..000000000 --- a/kernel/patches-5.4.x-rt/0063-fs-buffer-Make-BH_Uptodate_Lock-bit_spin_lock-a-regu.patch +++ /dev/null @@ -1,192 +0,0 @@ -From: Thomas Gleixner -Date: Fri, 15 Nov 2019 18:54:20 +0100 -Subject: [PATCH] fs/buffer: Make BH_Uptodate_Lock bit_spin_lock a regular - spinlock_t - -Bit spinlocks are problematic if PREEMPT_RT is enabled, because they -disable preemption, which is undesired for latency reasons and breaks when -regular spinlocks are taken within the bit_spinlock locked region because -regular spinlocks are converted to 'sleeping spinlocks' on RT. So RT -replaces the bit spinlocks with regular spinlocks to avoid this problem. -Bit spinlocks are also not covered by lock debugging, e.g. lockdep. - -Substitute the BH_Uptodate_Lock bit spinlock with a regular spinlock. - -Reviewed-by: Jan Kara -Signed-off-by: Thomas Gleixner -[bigeasy: remove the wrapper and use always spinlock_t and move it into - the padding hole] -Signed-off-by: Sebastian Andrzej Siewior ---- -v2…v3: rename uptodate_lock to b_uptodate_lock. - -v1…v2: Move the spinlock_t to the padding hole as per Jan Kara. pahole says -its total size remained unchanged, before - -| atomic_t b_count; /* 96 4 */ -| -| /* size: 104, cachelines: 2, members: 12 */ -| /* padding: 4 */ -| /* last cacheline: 40 bytes */ - -after - -| atomic_t b_count; /* 96 4 */ -| spinlock_t uptodate_lock; /* 100 4 */ -| -| /* size: 104, cachelines: 2, members: 13 */ -| /* last cacheline: 40 bytes */ - - fs/buffer.c | 19 +++++++------------ - fs/ext4/page-io.c | 8 +++----- - fs/ntfs/aops.c | 9 +++------ - include/linux/buffer_head.h | 6 +++--- - 4 files changed, 16 insertions(+), 26 deletions(-) - ---- a/fs/buffer.c -+++ b/fs/buffer.c -@@ -275,8 +275,7 @@ static void end_buffer_async_read(struct - * decide that the page is now completely done. - */ - first = page_buffers(page); -- local_irq_save(flags); -- bit_spin_lock(BH_Uptodate_Lock, &first->b_state); -+ spin_lock_irqsave(&first->b_uptodate_lock, flags); - clear_buffer_async_read(bh); - unlock_buffer(bh); - tmp = bh; -@@ -289,8 +288,7 @@ static void end_buffer_async_read(struct - } - tmp = tmp->b_this_page; - } while (tmp != bh); -- bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); -- local_irq_restore(flags); -+ spin_unlock_irqrestore(&first->b_uptodate_lock, flags); - - /* - * If none of the buffers had errors and they are all -@@ -302,8 +300,7 @@ static void end_buffer_async_read(struct - return; - - still_busy: -- bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); -- local_irq_restore(flags); -+ spin_unlock_irqrestore(&first->b_uptodate_lock, flags); - return; - } - -@@ -331,8 +328,7 @@ void end_buffer_async_write(struct buffe - } - - first = page_buffers(page); -- local_irq_save(flags); -- bit_spin_lock(BH_Uptodate_Lock, &first->b_state); -+ spin_lock_irqsave(&first->b_uptodate_lock, flags); - - clear_buffer_async_write(bh); - unlock_buffer(bh); -@@ -344,14 +340,12 @@ void end_buffer_async_write(struct buffe - } - tmp = tmp->b_this_page; - } -- bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); -- local_irq_restore(flags); -+ spin_unlock_irqrestore(&first->b_uptodate_lock, flags); - end_page_writeback(page); - return; - - still_busy: -- bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); -- local_irq_restore(flags); -+ spin_unlock_irqrestore(&first->b_uptodate_lock, flags); - return; - } - EXPORT_SYMBOL(end_buffer_async_write); -@@ -3345,6 +3339,7 @@ struct buffer_head *alloc_buffer_head(gf - struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags); - if (ret) { - INIT_LIST_HEAD(&ret->b_assoc_buffers); -+ spin_lock_init(&ret->b_uptodate_lock); - preempt_disable(); - __this_cpu_inc(bh_accounting.nr); - recalc_bh_state(); ---- a/fs/ext4/page-io.c -+++ b/fs/ext4/page-io.c -@@ -87,11 +87,10 @@ static void ext4_finish_bio(struct bio * - } - bh = head = page_buffers(page); - /* -- * We check all buffers in the page under BH_Uptodate_Lock -+ * We check all buffers in the page under b_uptodate_lock - * to avoid races with other end io clearing async_write flags - */ -- local_irq_save(flags); -- bit_spin_lock(BH_Uptodate_Lock, &head->b_state); -+ spin_lock_irqsave(&head->b_uptodate_lock, flags); - do { - if (bh_offset(bh) < bio_start || - bh_offset(bh) + bh->b_size > bio_end) { -@@ -103,8 +102,7 @@ static void ext4_finish_bio(struct bio * - if (bio->bi_status) - buffer_io_error(bh); - } while ((bh = bh->b_this_page) != head); -- bit_spin_unlock(BH_Uptodate_Lock, &head->b_state); -- local_irq_restore(flags); -+ spin_unlock_irqrestore(&head->b_uptodate_lock, flags); - if (!under_io) { - fscrypt_free_bounce_page(bounce_page); - end_page_writeback(page); ---- a/fs/ntfs/aops.c -+++ b/fs/ntfs/aops.c -@@ -92,8 +92,7 @@ static void ntfs_end_buffer_async_read(s - "0x%llx.", (unsigned long long)bh->b_blocknr); - } - first = page_buffers(page); -- local_irq_save(flags); -- bit_spin_lock(BH_Uptodate_Lock, &first->b_state); -+ spin_lock_irqsave(&first->b_uptodate_lock, flags); - clear_buffer_async_read(bh); - unlock_buffer(bh); - tmp = bh; -@@ -108,8 +107,7 @@ static void ntfs_end_buffer_async_read(s - } - tmp = tmp->b_this_page; - } while (tmp != bh); -- bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); -- local_irq_restore(flags); -+ spin_unlock_irqrestore(&first->b_uptodate_lock, flags); - /* - * If none of the buffers had errors then we can set the page uptodate, - * but we first have to perform the post read mst fixups, if the -@@ -142,8 +140,7 @@ static void ntfs_end_buffer_async_read(s - unlock_page(page); - return; - still_busy: -- bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); -- local_irq_restore(flags); -+ spin_unlock_irqrestore(&first->b_uptodate_lock, flags); - return; - } - ---- a/include/linux/buffer_head.h -+++ b/include/linux/buffer_head.h -@@ -22,9 +22,6 @@ enum bh_state_bits { - BH_Dirty, /* Is dirty */ - BH_Lock, /* Is locked */ - BH_Req, /* Has been submitted for I/O */ -- BH_Uptodate_Lock,/* Used by the first bh in a page, to serialise -- * IO completion of other buffers in the page -- */ - - BH_Mapped, /* Has a disk mapping */ - BH_New, /* Disk mapping was newly created by get_block */ -@@ -76,6 +73,9 @@ struct buffer_head { - struct address_space *b_assoc_map; /* mapping this buffer is - associated with */ - atomic_t b_count; /* users using this buffer_head */ -+ spinlock_t b_uptodate_lock; /* Used by the first bh in a page, to -+ * serialise IO completion of other -+ * buffers in the page */ - }; - - /* diff --git a/kernel/patches-5.4.x-rt/0064-thermal-x86_pkg_temp-make-pkg_temp_lock-a-raw-spinlo.patch b/kernel/patches-5.4.x-rt/0064-thermal-x86_pkg_temp-make-pkg_temp_lock-a-raw-spinlo.patch deleted file mode 100644 index 7f544dae5..000000000 --- a/kernel/patches-5.4.x-rt/0064-thermal-x86_pkg_temp-make-pkg_temp_lock-a-raw-spinlo.patch +++ /dev/null @@ -1,109 +0,0 @@ -From: Clark Williams -Date: Mon, 15 Jul 2019 15:25:00 -0500 -Subject: [PATCH] thermal/x86_pkg_temp: Make pkg_temp_lock a raw_spinlock_t - -The spinlock pkg_temp_lock has the potential of being taken in atomic -context because it can be acquired from the thermal IRQ vector. -It's static and limited scope so go ahead and make it a raw spinlock. - -Signed-off-by: Clark Williams -Signed-off-by: Sebastian Andrzej Siewior ---- - drivers/thermal/intel/x86_pkg_temp_thermal.c | 24 ++++++++++++------------ - 1 file changed, 12 insertions(+), 12 deletions(-) - ---- a/drivers/thermal/intel/x86_pkg_temp_thermal.c -+++ b/drivers/thermal/intel/x86_pkg_temp_thermal.c -@@ -63,7 +63,7 @@ static int max_id __read_mostly; - /* Array of zone pointers */ - static struct zone_device **zones; - /* Serializes interrupt notification, work and hotplug */ --static DEFINE_SPINLOCK(pkg_temp_lock); -+static DEFINE_RAW_SPINLOCK(pkg_temp_lock); - /* Protects zone operation in the work function against hotplug removal */ - static DEFINE_MUTEX(thermal_zone_mutex); - -@@ -266,12 +266,12 @@ static void pkg_temp_thermal_threshold_w - u64 msr_val, wr_val; - - mutex_lock(&thermal_zone_mutex); -- spin_lock_irq(&pkg_temp_lock); -+ raw_spin_lock_irq(&pkg_temp_lock); - ++pkg_work_cnt; - - zonedev = pkg_temp_thermal_get_dev(cpu); - if (!zonedev) { -- spin_unlock_irq(&pkg_temp_lock); -+ raw_spin_unlock_irq(&pkg_temp_lock); - mutex_unlock(&thermal_zone_mutex); - return; - } -@@ -285,7 +285,7 @@ static void pkg_temp_thermal_threshold_w - } - - enable_pkg_thres_interrupt(); -- spin_unlock_irq(&pkg_temp_lock); -+ raw_spin_unlock_irq(&pkg_temp_lock); - - /* - * If tzone is not NULL, then thermal_zone_mutex will prevent the -@@ -310,7 +310,7 @@ static int pkg_thermal_notify(u64 msr_va - struct zone_device *zonedev; - unsigned long flags; - -- spin_lock_irqsave(&pkg_temp_lock, flags); -+ raw_spin_lock_irqsave(&pkg_temp_lock, flags); - ++pkg_interrupt_cnt; - - disable_pkg_thres_interrupt(); -@@ -322,7 +322,7 @@ static int pkg_thermal_notify(u64 msr_va - pkg_thermal_schedule_work(zonedev->cpu, &zonedev->work); - } - -- spin_unlock_irqrestore(&pkg_temp_lock, flags); -+ raw_spin_unlock_irqrestore(&pkg_temp_lock, flags); - return 0; - } - -@@ -368,9 +368,9 @@ static int pkg_temp_thermal_device_add(u - zonedev->msr_pkg_therm_high); - - cpumask_set_cpu(cpu, &zonedev->cpumask); -- spin_lock_irq(&pkg_temp_lock); -+ raw_spin_lock_irq(&pkg_temp_lock); - zones[id] = zonedev; -- spin_unlock_irq(&pkg_temp_lock); -+ raw_spin_unlock_irq(&pkg_temp_lock); - return 0; - } - -@@ -407,7 +407,7 @@ static int pkg_thermal_cpu_offline(unsig - } - - /* Protect against work and interrupts */ -- spin_lock_irq(&pkg_temp_lock); -+ raw_spin_lock_irq(&pkg_temp_lock); - - /* - * Check whether this cpu was the current target and store the new -@@ -439,9 +439,9 @@ static int pkg_thermal_cpu_offline(unsig - * To cancel the work we need to drop the lock, otherwise - * we might deadlock if the work needs to be flushed. - */ -- spin_unlock_irq(&pkg_temp_lock); -+ raw_spin_unlock_irq(&pkg_temp_lock); - cancel_delayed_work_sync(&zonedev->work); -- spin_lock_irq(&pkg_temp_lock); -+ raw_spin_lock_irq(&pkg_temp_lock); - /* - * If this is not the last cpu in the package and the work - * did not run after we dropped the lock above, then we -@@ -452,7 +452,7 @@ static int pkg_thermal_cpu_offline(unsig - pkg_thermal_schedule_work(target, &zonedev->work); - } - -- spin_unlock_irq(&pkg_temp_lock); -+ raw_spin_unlock_irq(&pkg_temp_lock); - - /* Final cleanup if this is the last cpu */ - if (lastcpu) diff --git a/kernel/patches-5.4.x-rt/0065-perf-core-Add-SRCU-annotation-for-pmus-list-walk.patch b/kernel/patches-5.4.x-rt/0065-perf-core-Add-SRCU-annotation-for-pmus-list-walk.patch deleted file mode 100644 index 5632c3877..000000000 --- a/kernel/patches-5.4.x-rt/0065-perf-core-Add-SRCU-annotation-for-pmus-list-walk.patch +++ /dev/null @@ -1,30 +0,0 @@ -From: Sebastian Andrzej Siewior -Date: Fri, 15 Nov 2019 18:04:07 +0100 -Subject: [PATCH] perf/core: Add SRCU annotation for pmus list walk - -Since commit - 28875945ba98d ("rcu: Add support for consolidated-RCU reader checking") - -there is an additional check to ensure that a RCU related lock is held -while the RCU list is iterated. -This section holds the SRCU reader lock instead. - -Add annotation to list_for_each_entry_rcu() that pmus_srcu must be -acquired during the list traversal. - -Signed-off-by: Sebastian Andrzej Siewior ---- - kernel/events/core.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - ---- a/kernel/events/core.c -+++ b/kernel/events/core.c -@@ -10264,7 +10264,7 @@ static struct pmu *perf_init_event(struc - goto unlock; - } - -- list_for_each_entry_rcu(pmu, &pmus, entry) { -+ list_for_each_entry_rcu(pmu, &pmus, entry, lockdep_is_held(&pmus_srcu)) { - ret = perf_try_init_event(pmu, event); - if (!ret) - goto unlock; diff --git a/kernel/patches-5.4.x-rt/0066-kmemleak-Turn-kmemleak_lock-and-object-lock-to-raw_s.patch b/kernel/patches-5.4.x-rt/0066-kmemleak-Turn-kmemleak_lock-and-object-lock-to-raw_s.patch deleted file mode 100644 index 23039044e..000000000 --- a/kernel/patches-5.4.x-rt/0066-kmemleak-Turn-kmemleak_lock-and-object-lock-to-raw_s.patch +++ /dev/null @@ -1,411 +0,0 @@ -From: He Zhe -Date: Wed, 19 Dec 2018 16:30:57 +0100 -Subject: [PATCH] kmemleak: Turn kmemleak_lock and object->lock to - raw_spinlock_t - -kmemleak_lock as a rwlock on RT can possibly be acquired in atomic context -which does work on RT. -Since the kmemleak operation is performed in atomic context make it a -raw_spinlock_t so it can also be acquired on RT. This is used for -debugging and is not enabled by default in a production like environment -(where performance/latency matters) so it makes sense to make it a -raw_spinlock_t instead trying to get rid of the atomic context. -Turn also the kmemleak_object->lock into raw_spinlock_t which is -acquired (nested) while the kmemleak_lock is held. - -The time spent in "echo scan > kmemleak" slightly improved on 64core box -with this patch applied after boot. - -Acked-by: Catalin Marinas -Link: https://lkml.kernel.org/r/20181218150744.GB20197@arrakis.emea.arm.com -Link: https://lkml.kernel.org/r/1542877459-144382-1-git-send-email-zhe.he@windriver.com -Link: https://lkml.kernel.org/r/20190927082230.34152-1-yongxin.liu@windriver.com -Signed-off-by: He Zhe -Signed-off-by: Liu Haitao -Signed-off-by: Yongxin Liu -[bigeasy: Redo the description. Merge the individual bits: He Zhe did -the kmemleak_lock, Liu Haitao the ->lock and Yongxin Liu forwarded the -patch.] -Signed-off-by: Sebastian Andrzej Siewior ---- - mm/kmemleak.c | 112 +++++++++++++++++++++++++++++----------------------------- - 1 file changed, 56 insertions(+), 56 deletions(-) - ---- a/mm/kmemleak.c -+++ b/mm/kmemleak.c -@@ -13,7 +13,7 @@ - * - * The following locks and mutexes are used by kmemleak: - * -- * - kmemleak_lock (rwlock): protects the object_list modifications and -+ * - kmemleak_lock (raw_spinlock_t): protects the object_list modifications and - * accesses to the object_tree_root. The object_list is the main list - * holding the metadata (struct kmemleak_object) for the allocated memory - * blocks. The object_tree_root is a red black tree used to look-up -@@ -22,13 +22,13 @@ - * object_tree_root in the create_object() function called from the - * kmemleak_alloc() callback and removed in delete_object() called from the - * kmemleak_free() callback -- * - kmemleak_object.lock (spinlock): protects a kmemleak_object. Accesses to -- * the metadata (e.g. count) are protected by this lock. Note that some -- * members of this structure may be protected by other means (atomic or -- * kmemleak_lock). This lock is also held when scanning the corresponding -- * memory block to avoid the kernel freeing it via the kmemleak_free() -- * callback. This is less heavyweight than holding a global lock like -- * kmemleak_lock during scanning -+ * - kmemleak_object.lock (raw_spinlock_t): protects a kmemleak_object. -+ * Accesses to the metadata (e.g. count) are protected by this lock. Note -+ * that some members of this structure may be protected by other means -+ * (atomic or kmemleak_lock). This lock is also held when scanning the -+ * corresponding memory block to avoid the kernel freeing it via the -+ * kmemleak_free() callback. This is less heavyweight than holding a global -+ * lock like kmemleak_lock during scanning. - * - scan_mutex (mutex): ensures that only one thread may scan the memory for - * unreferenced objects at a time. The gray_list contains the objects which - * are already referenced or marked as false positives and need to be -@@ -135,7 +135,7 @@ struct kmemleak_scan_area { - * (use_count) and freed using the RCU mechanism. - */ - struct kmemleak_object { -- spinlock_t lock; -+ raw_spinlock_t lock; - unsigned int flags; /* object status flags */ - struct list_head object_list; - struct list_head gray_list; -@@ -191,8 +191,8 @@ static int mem_pool_free_count = ARRAY_S - static LIST_HEAD(mem_pool_free_list); - /* search tree for object boundaries */ - static struct rb_root object_tree_root = RB_ROOT; --/* rw_lock protecting the access to object_list and object_tree_root */ --static DEFINE_RWLOCK(kmemleak_lock); -+/* protecting the access to object_list and object_tree_root */ -+static DEFINE_RAW_SPINLOCK(kmemleak_lock); - - /* allocation caches for kmemleak internal data */ - static struct kmem_cache *object_cache; -@@ -426,7 +426,7 @@ static struct kmemleak_object *mem_pool_ - } - - /* slab allocation failed, try the memory pool */ -- write_lock_irqsave(&kmemleak_lock, flags); -+ raw_spin_lock_irqsave(&kmemleak_lock, flags); - object = list_first_entry_or_null(&mem_pool_free_list, - typeof(*object), object_list); - if (object) -@@ -435,7 +435,7 @@ static struct kmemleak_object *mem_pool_ - object = &mem_pool[--mem_pool_free_count]; - else - pr_warn_once("Memory pool empty, consider increasing CONFIG_DEBUG_KMEMLEAK_MEM_POOL_SIZE\n"); -- write_unlock_irqrestore(&kmemleak_lock, flags); -+ raw_spin_unlock_irqrestore(&kmemleak_lock, flags); - - return object; - } -@@ -453,9 +453,9 @@ static void mem_pool_free(struct kmemlea - } - - /* add the object to the memory pool free list */ -- write_lock_irqsave(&kmemleak_lock, flags); -+ raw_spin_lock_irqsave(&kmemleak_lock, flags); - list_add(&object->object_list, &mem_pool_free_list); -- write_unlock_irqrestore(&kmemleak_lock, flags); -+ raw_spin_unlock_irqrestore(&kmemleak_lock, flags); - } - - /* -@@ -514,9 +514,9 @@ static struct kmemleak_object *find_and_ - struct kmemleak_object *object; - - rcu_read_lock(); -- read_lock_irqsave(&kmemleak_lock, flags); -+ raw_spin_lock_irqsave(&kmemleak_lock, flags); - object = lookup_object(ptr, alias); -- read_unlock_irqrestore(&kmemleak_lock, flags); -+ raw_spin_unlock_irqrestore(&kmemleak_lock, flags); - - /* check whether the object is still available */ - if (object && !get_object(object)) -@@ -546,11 +546,11 @@ static struct kmemleak_object *find_and_ - unsigned long flags; - struct kmemleak_object *object; - -- write_lock_irqsave(&kmemleak_lock, flags); -+ raw_spin_lock_irqsave(&kmemleak_lock, flags); - object = lookup_object(ptr, alias); - if (object) - __remove_object(object); -- write_unlock_irqrestore(&kmemleak_lock, flags); -+ raw_spin_unlock_irqrestore(&kmemleak_lock, flags); - - return object; - } -@@ -585,7 +585,7 @@ static struct kmemleak_object *create_ob - INIT_LIST_HEAD(&object->object_list); - INIT_LIST_HEAD(&object->gray_list); - INIT_HLIST_HEAD(&object->area_list); -- spin_lock_init(&object->lock); -+ raw_spin_lock_init(&object->lock); - atomic_set(&object->use_count, 1); - object->flags = OBJECT_ALLOCATED; - object->pointer = ptr; -@@ -617,7 +617,7 @@ static struct kmemleak_object *create_ob - /* kernel backtrace */ - object->trace_len = __save_stack_trace(object->trace); - -- write_lock_irqsave(&kmemleak_lock, flags); -+ raw_spin_lock_irqsave(&kmemleak_lock, flags); - - untagged_ptr = (unsigned long)kasan_reset_tag((void *)ptr); - min_addr = min(min_addr, untagged_ptr); -@@ -649,7 +649,7 @@ static struct kmemleak_object *create_ob - - list_add_tail_rcu(&object->object_list, &object_list); - out: -- write_unlock_irqrestore(&kmemleak_lock, flags); -+ raw_spin_unlock_irqrestore(&kmemleak_lock, flags); - return object; - } - -@@ -667,9 +667,9 @@ static void __delete_object(struct kmeml - * Locking here also ensures that the corresponding memory block - * cannot be freed when it is being scanned. - */ -- spin_lock_irqsave(&object->lock, flags); -+ raw_spin_lock_irqsave(&object->lock, flags); - object->flags &= ~OBJECT_ALLOCATED; -- spin_unlock_irqrestore(&object->lock, flags); -+ raw_spin_unlock_irqrestore(&object->lock, flags); - put_object(object); - } - -@@ -739,9 +739,9 @@ static void paint_it(struct kmemleak_obj - { - unsigned long flags; - -- spin_lock_irqsave(&object->lock, flags); -+ raw_spin_lock_irqsave(&object->lock, flags); - __paint_it(object, color); -- spin_unlock_irqrestore(&object->lock, flags); -+ raw_spin_unlock_irqrestore(&object->lock, flags); - } - - static void paint_ptr(unsigned long ptr, int color) -@@ -798,7 +798,7 @@ static void add_scan_area(unsigned long - if (scan_area_cache) - area = kmem_cache_alloc(scan_area_cache, gfp_kmemleak_mask(gfp)); - -- spin_lock_irqsave(&object->lock, flags); -+ raw_spin_lock_irqsave(&object->lock, flags); - if (!area) { - pr_warn_once("Cannot allocate a scan area, scanning the full object\n"); - /* mark the object for full scan to avoid false positives */ -@@ -820,7 +820,7 @@ static void add_scan_area(unsigned long - - hlist_add_head(&area->node, &object->area_list); - out_unlock: -- spin_unlock_irqrestore(&object->lock, flags); -+ raw_spin_unlock_irqrestore(&object->lock, flags); - put_object(object); - } - -@@ -842,9 +842,9 @@ static void object_set_excess_ref(unsign - return; - } - -- spin_lock_irqsave(&object->lock, flags); -+ raw_spin_lock_irqsave(&object->lock, flags); - object->excess_ref = excess_ref; -- spin_unlock_irqrestore(&object->lock, flags); -+ raw_spin_unlock_irqrestore(&object->lock, flags); - put_object(object); - } - -@@ -864,9 +864,9 @@ static void object_no_scan(unsigned long - return; - } - -- spin_lock_irqsave(&object->lock, flags); -+ raw_spin_lock_irqsave(&object->lock, flags); - object->flags |= OBJECT_NO_SCAN; -- spin_unlock_irqrestore(&object->lock, flags); -+ raw_spin_unlock_irqrestore(&object->lock, flags); - put_object(object); - } - -@@ -1026,9 +1026,9 @@ void __ref kmemleak_update_trace(const v - return; - } - -- spin_lock_irqsave(&object->lock, flags); -+ raw_spin_lock_irqsave(&object->lock, flags); - object->trace_len = __save_stack_trace(object->trace); -- spin_unlock_irqrestore(&object->lock, flags); -+ raw_spin_unlock_irqrestore(&object->lock, flags); - - put_object(object); - } -@@ -1233,7 +1233,7 @@ static void scan_block(void *_start, voi - unsigned long flags; - unsigned long untagged_ptr; - -- read_lock_irqsave(&kmemleak_lock, flags); -+ raw_spin_lock_irqsave(&kmemleak_lock, flags); - for (ptr = start; ptr < end; ptr++) { - struct kmemleak_object *object; - unsigned long pointer; -@@ -1268,7 +1268,7 @@ static void scan_block(void *_start, voi - * previously acquired in scan_object(). These locks are - * enclosed by scan_mutex. - */ -- spin_lock_nested(&object->lock, SINGLE_DEPTH_NESTING); -+ raw_spin_lock_nested(&object->lock, SINGLE_DEPTH_NESTING); - /* only pass surplus references (object already gray) */ - if (color_gray(object)) { - excess_ref = object->excess_ref; -@@ -1277,7 +1277,7 @@ static void scan_block(void *_start, voi - excess_ref = 0; - update_refs(object); - } -- spin_unlock(&object->lock); -+ raw_spin_unlock(&object->lock); - - if (excess_ref) { - object = lookup_object(excess_ref, 0); -@@ -1286,12 +1286,12 @@ static void scan_block(void *_start, voi - if (object == scanned) - /* circular reference, ignore */ - continue; -- spin_lock_nested(&object->lock, SINGLE_DEPTH_NESTING); -+ raw_spin_lock_nested(&object->lock, SINGLE_DEPTH_NESTING); - update_refs(object); -- spin_unlock(&object->lock); -+ raw_spin_unlock(&object->lock); - } - } -- read_unlock_irqrestore(&kmemleak_lock, flags); -+ raw_spin_unlock_irqrestore(&kmemleak_lock, flags); - } - - /* -@@ -1324,7 +1324,7 @@ static void scan_object(struct kmemleak_ - * Once the object->lock is acquired, the corresponding memory block - * cannot be freed (the same lock is acquired in delete_object). - */ -- spin_lock_irqsave(&object->lock, flags); -+ raw_spin_lock_irqsave(&object->lock, flags); - if (object->flags & OBJECT_NO_SCAN) - goto out; - if (!(object->flags & OBJECT_ALLOCATED)) -@@ -1344,9 +1344,9 @@ static void scan_object(struct kmemleak_ - if (start >= end) - break; - -- spin_unlock_irqrestore(&object->lock, flags); -+ raw_spin_unlock_irqrestore(&object->lock, flags); - cond_resched(); -- spin_lock_irqsave(&object->lock, flags); -+ raw_spin_lock_irqsave(&object->lock, flags); - } while (object->flags & OBJECT_ALLOCATED); - } else - hlist_for_each_entry(area, &object->area_list, node) -@@ -1354,7 +1354,7 @@ static void scan_object(struct kmemleak_ - (void *)(area->start + area->size), - object); - out: -- spin_unlock_irqrestore(&object->lock, flags); -+ raw_spin_unlock_irqrestore(&object->lock, flags); - } - - /* -@@ -1407,7 +1407,7 @@ static void kmemleak_scan(void) - /* prepare the kmemleak_object's */ - rcu_read_lock(); - list_for_each_entry_rcu(object, &object_list, object_list) { -- spin_lock_irqsave(&object->lock, flags); -+ raw_spin_lock_irqsave(&object->lock, flags); - #ifdef DEBUG - /* - * With a few exceptions there should be a maximum of -@@ -1424,7 +1424,7 @@ static void kmemleak_scan(void) - if (color_gray(object) && get_object(object)) - list_add_tail(&object->gray_list, &gray_list); - -- spin_unlock_irqrestore(&object->lock, flags); -+ raw_spin_unlock_irqrestore(&object->lock, flags); - } - rcu_read_unlock(); - -@@ -1492,14 +1492,14 @@ static void kmemleak_scan(void) - */ - rcu_read_lock(); - list_for_each_entry_rcu(object, &object_list, object_list) { -- spin_lock_irqsave(&object->lock, flags); -+ raw_spin_lock_irqsave(&object->lock, flags); - if (color_white(object) && (object->flags & OBJECT_ALLOCATED) - && update_checksum(object) && get_object(object)) { - /* color it gray temporarily */ - object->count = object->min_count; - list_add_tail(&object->gray_list, &gray_list); - } -- spin_unlock_irqrestore(&object->lock, flags); -+ raw_spin_unlock_irqrestore(&object->lock, flags); - } - rcu_read_unlock(); - -@@ -1519,7 +1519,7 @@ static void kmemleak_scan(void) - */ - rcu_read_lock(); - list_for_each_entry_rcu(object, &object_list, object_list) { -- spin_lock_irqsave(&object->lock, flags); -+ raw_spin_lock_irqsave(&object->lock, flags); - if (unreferenced_object(object) && - !(object->flags & OBJECT_REPORTED)) { - object->flags |= OBJECT_REPORTED; -@@ -1529,7 +1529,7 @@ static void kmemleak_scan(void) - - new_leaks++; - } -- spin_unlock_irqrestore(&object->lock, flags); -+ raw_spin_unlock_irqrestore(&object->lock, flags); - } - rcu_read_unlock(); - -@@ -1681,10 +1681,10 @@ static int kmemleak_seq_show(struct seq_ - struct kmemleak_object *object = v; - unsigned long flags; - -- spin_lock_irqsave(&object->lock, flags); -+ raw_spin_lock_irqsave(&object->lock, flags); - if ((object->flags & OBJECT_REPORTED) && unreferenced_object(object)) - print_unreferenced(seq, object); -- spin_unlock_irqrestore(&object->lock, flags); -+ raw_spin_unlock_irqrestore(&object->lock, flags); - return 0; - } - -@@ -1714,9 +1714,9 @@ static int dump_str_object_info(const ch - return -EINVAL; - } - -- spin_lock_irqsave(&object->lock, flags); -+ raw_spin_lock_irqsave(&object->lock, flags); - dump_object_info(object); -- spin_unlock_irqrestore(&object->lock, flags); -+ raw_spin_unlock_irqrestore(&object->lock, flags); - - put_object(object); - return 0; -@@ -1735,11 +1735,11 @@ static void kmemleak_clear(void) - - rcu_read_lock(); - list_for_each_entry_rcu(object, &object_list, object_list) { -- spin_lock_irqsave(&object->lock, flags); -+ raw_spin_lock_irqsave(&object->lock, flags); - if ((object->flags & OBJECT_REPORTED) && - unreferenced_object(object)) - __paint_it(object, KMEMLEAK_GREY); -- spin_unlock_irqrestore(&object->lock, flags); -+ raw_spin_unlock_irqrestore(&object->lock, flags); - } - rcu_read_unlock(); - diff --git a/kernel/patches-5.4.x-rt/0067-smp-Use-smp_cond_func_t-as-type-for-the-conditional-.patch b/kernel/patches-5.4.x-rt/0067-smp-Use-smp_cond_func_t-as-type-for-the-conditional-.patch deleted file mode 100644 index 293d2901d..000000000 --- a/kernel/patches-5.4.x-rt/0067-smp-Use-smp_cond_func_t-as-type-for-the-conditional-.patch +++ /dev/null @@ -1,99 +0,0 @@ -From: Sebastian Andrzej Siewior -Date: Thu, 16 Jan 2020 12:00:31 +0100 -Subject: [PATCH] smp: Use smp_cond_func_t as type for the conditional - function - -Use a typdef for the conditional function instead defining it each time in -the function prototype. - -Signed-off-by: Sebastian Andrzej Siewior ---- - include/linux/smp.h | 14 +++++++------- - kernel/smp.c | 11 +++++------ - kernel/up.c | 11 +++++------ - 3 files changed, 17 insertions(+), 19 deletions(-) - ---- a/include/linux/smp.h -+++ b/include/linux/smp.h -@@ -15,6 +15,7 @@ - #include - - typedef void (*smp_call_func_t)(void *info); -+typedef bool (*smp_cond_func_t)(int cpu, void *info); - struct __call_single_data { - struct llist_node llist; - smp_call_func_t func; -@@ -49,13 +50,12 @@ void on_each_cpu_mask(const struct cpuma - * cond_func returns a positive value. This may include the local - * processor. - */ --void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info), -- smp_call_func_t func, void *info, bool wait, -- gfp_t gfp_flags); -- --void on_each_cpu_cond_mask(bool (*cond_func)(int cpu, void *info), -- smp_call_func_t func, void *info, bool wait, -- gfp_t gfp_flags, const struct cpumask *mask); -+void on_each_cpu_cond(smp_cond_func_t cond_func, smp_call_func_t func, -+ void *info, bool wait, gfp_t gfp_flags); -+ -+void on_each_cpu_cond_mask(smp_cond_func_t cond_func, smp_call_func_t func, -+ void *info, bool wait, gfp_t gfp_flags, -+ const struct cpumask *mask); - - int smp_call_function_single_async(int cpu, call_single_data_t *csd); - ---- a/kernel/smp.c -+++ b/kernel/smp.c -@@ -680,9 +680,9 @@ EXPORT_SYMBOL(on_each_cpu_mask); - * You must not call this function with disabled interrupts or - * from a hardware interrupt handler or from a bottom half handler. - */ --void on_each_cpu_cond_mask(bool (*cond_func)(int cpu, void *info), -- smp_call_func_t func, void *info, bool wait, -- gfp_t gfp_flags, const struct cpumask *mask) -+void on_each_cpu_cond_mask(smp_cond_func_t cond_func, smp_call_func_t func, -+ void *info, bool wait, gfp_t gfp_flags, -+ const struct cpumask *mask) - { - cpumask_var_t cpus; - int cpu, ret; -@@ -714,9 +714,8 @@ void on_each_cpu_cond_mask(bool (*cond_f - } - EXPORT_SYMBOL(on_each_cpu_cond_mask); - --void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info), -- smp_call_func_t func, void *info, bool wait, -- gfp_t gfp_flags) -+void on_each_cpu_cond(smp_cond_func_t cond_func, smp_call_func_t func, -+ void *info, bool wait, gfp_t gfp_flags) - { - on_each_cpu_cond_mask(cond_func, func, info, wait, gfp_flags, - cpu_online_mask); ---- a/kernel/up.c -+++ b/kernel/up.c -@@ -68,9 +68,9 @@ EXPORT_SYMBOL(on_each_cpu_mask); - * Preemption is disabled here to make sure the cond_func is called under the - * same condtions in UP and SMP. - */ --void on_each_cpu_cond_mask(bool (*cond_func)(int cpu, void *info), -- smp_call_func_t func, void *info, bool wait, -- gfp_t gfp_flags, const struct cpumask *mask) -+void on_each_cpu_cond_mask(smp_cond_func_t cond_func, smp_call_func_t func, -+ void *info, bool wait, gfp_t gfp_flags, -+ const struct cpumask *mask) - { - unsigned long flags; - -@@ -84,9 +84,8 @@ void on_each_cpu_cond_mask(bool (*cond_f - } - EXPORT_SYMBOL(on_each_cpu_cond_mask); - --void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info), -- smp_call_func_t func, void *info, bool wait, -- gfp_t gfp_flags) -+void on_each_cpu_cond(smp_cond_func_t cond_func, smp_call_func_t func, -+ void *info, bool wait, gfp_t gfp_flags) - { - on_each_cpu_cond_mask(cond_func, func, info, wait, gfp_flags, NULL); - } diff --git a/kernel/patches-5.4.x-rt/0068-smp-Add-a-smp_cond_func_t-argument-to-smp_call_funct.patch b/kernel/patches-5.4.x-rt/0068-smp-Add-a-smp_cond_func_t-argument-to-smp_call_funct.patch deleted file mode 100644 index 0dc396cf4..000000000 --- a/kernel/patches-5.4.x-rt/0068-smp-Add-a-smp_cond_func_t-argument-to-smp_call_funct.patch +++ /dev/null @@ -1,139 +0,0 @@ -From: Sebastian Andrzej Siewior -Date: Thu, 16 Jan 2020 12:14:38 +0100 -Subject: [PATCH] smp: Add a smp_cond_func_t argument to - smp_call_function_many() - -on_each_cpu_cond_mask() allocates a new CPU mask. The newly allocated -mask is a subset of the provided mask based on the conditional function. -This memory allocation could be avoided by extending -smp_call_function_many() with the conditional function and performing the -remote function call based on the mask and the conditional function. - -Rename smp_call_function_many() to smp_call_function_many_cond() and add -the smp_cond_func_t argument. If smp_cond_func_t is provided then it is -used before invoking the function. -Provide smp_call_function_many() with cond_func set to NULL. -Let on_each_cpu_cond_mask() use smp_call_function_many_cond(). - -Signed-off-by: Sebastian Andrzej Siewior ---- - kernel/smp.c | 77 +++++++++++++++++++++++++++-------------------------------- - 1 file changed, 36 insertions(+), 41 deletions(-) - ---- a/kernel/smp.c -+++ b/kernel/smp.c -@@ -395,22 +395,9 @@ int smp_call_function_any(const struct c - } - EXPORT_SYMBOL_GPL(smp_call_function_any); - --/** -- * smp_call_function_many(): Run a function on a set of other CPUs. -- * @mask: The set of cpus to run on (only runs on online subset). -- * @func: The function to run. This must be fast and non-blocking. -- * @info: An arbitrary pointer to pass to the function. -- * @wait: If true, wait (atomically) until function has completed -- * on other CPUs. -- * -- * If @wait is true, then returns once @func has returned. -- * -- * You must not call this function with disabled interrupts or from a -- * hardware interrupt handler or from a bottom half handler. Preemption -- * must be disabled when calling this function. -- */ --void smp_call_function_many(const struct cpumask *mask, -- smp_call_func_t func, void *info, bool wait) -+static void smp_call_function_many_cond(const struct cpumask *mask, -+ smp_call_func_t func, void *info, -+ bool wait, smp_cond_func_t cond_func) - { - struct call_function_data *cfd; - int cpu, next_cpu, this_cpu = smp_processor_id(); -@@ -448,7 +435,8 @@ void smp_call_function_many(const struct - - /* Fastpath: do that cpu by itself. */ - if (next_cpu >= nr_cpu_ids) { -- smp_call_function_single(cpu, func, info, wait); -+ if (!cond_func || cond_func(cpu, info)) -+ smp_call_function_single(cpu, func, info, wait); - return; - } - -@@ -465,6 +453,9 @@ void smp_call_function_many(const struct - for_each_cpu(cpu, cfd->cpumask) { - call_single_data_t *csd = per_cpu_ptr(cfd->csd, cpu); - -+ if (cond_func && !cond_func(cpu, info)) -+ continue; -+ - csd_lock(csd); - if (wait) - csd->flags |= CSD_FLAG_SYNCHRONOUS; -@@ -486,6 +477,26 @@ void smp_call_function_many(const struct - } - } - } -+ -+/** -+ * smp_call_function_many(): Run a function on a set of other CPUs. -+ * @mask: The set of cpus to run on (only runs on online subset). -+ * @func: The function to run. This must be fast and non-blocking. -+ * @info: An arbitrary pointer to pass to the function. -+ * @wait: If true, wait (atomically) until function has completed -+ * on other CPUs. -+ * -+ * If @wait is true, then returns once @func has returned. -+ * -+ * You must not call this function with disabled interrupts or from a -+ * hardware interrupt handler or from a bottom half handler. Preemption -+ * must be disabled when calling this function. -+ */ -+void smp_call_function_many(const struct cpumask *mask, -+ smp_call_func_t func, void *info, bool wait) -+{ -+ smp_call_function_many_cond(mask, func, info, wait, NULL); -+} - EXPORT_SYMBOL(smp_call_function_many); - - /** -@@ -684,33 +695,17 @@ void on_each_cpu_cond_mask(smp_cond_func - void *info, bool wait, gfp_t gfp_flags, - const struct cpumask *mask) - { -- cpumask_var_t cpus; -- int cpu, ret; -+ int cpu = get_cpu(); - -- might_sleep_if(gfpflags_allow_blocking(gfp_flags)); -+ smp_call_function_many_cond(mask, func, info, wait, cond_func); -+ if (cpumask_test_cpu(cpu, mask) && cond_func(cpu, info)) { -+ unsigned long flags; - -- if (likely(zalloc_cpumask_var(&cpus, (gfp_flags|__GFP_NOWARN)))) { -- preempt_disable(); -- for_each_cpu(cpu, mask) -- if (cond_func(cpu, info)) -- __cpumask_set_cpu(cpu, cpus); -- on_each_cpu_mask(cpus, func, info, wait); -- preempt_enable(); -- free_cpumask_var(cpus); -- } else { -- /* -- * No free cpumask, bother. No matter, we'll -- * just have to IPI them one by one. -- */ -- preempt_disable(); -- for_each_cpu(cpu, mask) -- if (cond_func(cpu, info)) { -- ret = smp_call_function_single(cpu, func, -- info, wait); -- WARN_ON_ONCE(ret); -- } -- preempt_enable(); -+ local_irq_save(flags); -+ func(info); -+ local_irq_restore(flags); - } -+ put_cpu(); - } - EXPORT_SYMBOL(on_each_cpu_cond_mask); - diff --git a/kernel/patches-5.4.x-rt/0069-smp-Remove-allocation-mask-from-on_each_cpu_cond.patch b/kernel/patches-5.4.x-rt/0069-smp-Remove-allocation-mask-from-on_each_cpu_cond.patch deleted file mode 100644 index 1a08853dd..000000000 --- a/kernel/patches-5.4.x-rt/0069-smp-Remove-allocation-mask-from-on_each_cpu_cond.patch +++ /dev/null @@ -1,127 +0,0 @@ -From: Sebastian Andrzej Siewior -Date: Thu, 16 Jan 2020 13:13:41 +0100 -Subject: [PATCH] smp: Remove allocation mask from on_each_cpu_cond.*() - -The allocation mask is no longer used by on_each_cpu_cond() and -on_each_cpu_cond_mask() and ca be removed. - -Signed-off-by: Sebastian Andrzej Siewior ---- - arch/x86/mm/tlb.c | 2 +- - fs/buffer.c | 2 +- - include/linux/smp.h | 5 ++--- - kernel/smp.c | 13 +++---------- - kernel/up.c | 7 +++---- - mm/slub.c | 2 +- - 6 files changed, 11 insertions(+), 20 deletions(-) - ---- a/arch/x86/mm/tlb.c -+++ b/arch/x86/mm/tlb.c -@@ -708,7 +708,7 @@ void native_flush_tlb_others(const struc - (void *)info, 1); - else - on_each_cpu_cond_mask(tlb_is_not_lazy, flush_tlb_func_remote, -- (void *)info, 1, GFP_ATOMIC, cpumask); -+ (void *)info, 1, cpumask); - } - - /* ---- a/fs/buffer.c -+++ b/fs/buffer.c -@@ -1387,7 +1387,7 @@ static bool has_bh_in_lru(int cpu, void - - void invalidate_bh_lrus(void) - { -- on_each_cpu_cond(has_bh_in_lru, invalidate_bh_lru, NULL, 1, GFP_KERNEL); -+ on_each_cpu_cond(has_bh_in_lru, invalidate_bh_lru, NULL, 1); - } - EXPORT_SYMBOL_GPL(invalidate_bh_lrus); - ---- a/include/linux/smp.h -+++ b/include/linux/smp.h -@@ -51,11 +51,10 @@ void on_each_cpu_mask(const struct cpuma - * processor. - */ - void on_each_cpu_cond(smp_cond_func_t cond_func, smp_call_func_t func, -- void *info, bool wait, gfp_t gfp_flags); -+ void *info, bool wait); - - void on_each_cpu_cond_mask(smp_cond_func_t cond_func, smp_call_func_t func, -- void *info, bool wait, gfp_t gfp_flags, -- const struct cpumask *mask); -+ void *info, bool wait, const struct cpumask *mask); - - int smp_call_function_single_async(int cpu, call_single_data_t *csd); - ---- a/kernel/smp.c -+++ b/kernel/smp.c -@@ -679,11 +679,6 @@ EXPORT_SYMBOL(on_each_cpu_mask); - * @info: An arbitrary pointer to pass to both functions. - * @wait: If true, wait (atomically) until function has - * completed on other CPUs. -- * @gfp_flags: GFP flags to use when allocating the cpumask -- * used internally by the function. -- * -- * The function might sleep if the GFP flags indicates a non -- * atomic allocation is allowed. - * - * Preemption is disabled to protect against CPUs going offline but not online. - * CPUs going online during the call will not be seen or sent an IPI. -@@ -692,8 +687,7 @@ EXPORT_SYMBOL(on_each_cpu_mask); - * from a hardware interrupt handler or from a bottom half handler. - */ - void on_each_cpu_cond_mask(smp_cond_func_t cond_func, smp_call_func_t func, -- void *info, bool wait, gfp_t gfp_flags, -- const struct cpumask *mask) -+ void *info, bool wait, const struct cpumask *mask) - { - int cpu = get_cpu(); - -@@ -710,10 +704,9 @@ void on_each_cpu_cond_mask(smp_cond_func - EXPORT_SYMBOL(on_each_cpu_cond_mask); - - void on_each_cpu_cond(smp_cond_func_t cond_func, smp_call_func_t func, -- void *info, bool wait, gfp_t gfp_flags) -+ void *info, bool wait) - { -- on_each_cpu_cond_mask(cond_func, func, info, wait, gfp_flags, -- cpu_online_mask); -+ on_each_cpu_cond_mask(cond_func, func, info, wait, cpu_online_mask); - } - EXPORT_SYMBOL(on_each_cpu_cond); - ---- a/kernel/up.c -+++ b/kernel/up.c -@@ -69,8 +69,7 @@ EXPORT_SYMBOL(on_each_cpu_mask); - * same condtions in UP and SMP. - */ - void on_each_cpu_cond_mask(smp_cond_func_t cond_func, smp_call_func_t func, -- void *info, bool wait, gfp_t gfp_flags, -- const struct cpumask *mask) -+ void *info, bool wait, const struct cpumask *mask) - { - unsigned long flags; - -@@ -85,9 +84,9 @@ void on_each_cpu_cond_mask(smp_cond_func - EXPORT_SYMBOL(on_each_cpu_cond_mask); - - void on_each_cpu_cond(smp_cond_func_t cond_func, smp_call_func_t func, -- void *info, bool wait, gfp_t gfp_flags) -+ void *info, bool wait) - { -- on_each_cpu_cond_mask(cond_func, func, info, wait, gfp_flags, NULL); -+ on_each_cpu_cond_mask(cond_func, func, info, wait, NULL); - } - EXPORT_SYMBOL(on_each_cpu_cond); - ---- a/mm/slub.c -+++ b/mm/slub.c -@@ -2338,7 +2338,7 @@ static bool has_cpu_slab(int cpu, void * - - static void flush_all(struct kmem_cache *s) - { -- on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1, GFP_ATOMIC); -+ on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1); - } - - /* diff --git a/kernel/patches-5.4.x-rt/0070-drm-vmwgfx-Drop-preempt_disable-in-vmw_fifo_ping_hos.patch b/kernel/patches-5.4.x-rt/0070-drm-vmwgfx-Drop-preempt_disable-in-vmw_fifo_ping_hos.patch deleted file mode 100644 index 089cd86b9..000000000 --- a/kernel/patches-5.4.x-rt/0070-drm-vmwgfx-Drop-preempt_disable-in-vmw_fifo_ping_hos.patch +++ /dev/null @@ -1,35 +0,0 @@ -From: Sebastian Andrzej Siewior -Date: Fri, 21 Feb 2020 18:57:11 +0100 -Subject: [PATCH] drm/vmwgfx: Drop preempt_disable() in - vmw_fifo_ping_host() - -vmw_fifo_ping_host() disables preemption around a test and a register -write via vmw_write(). The write function acquires a spinlock_t typed -lock which is not allowed in a preempt_disable()ed section on -PREEMPT_RT. This has been reported in the bugzilla. - -It has been explained by Thomas Hellstrom that this preempt_disable()ed -section is not required for correctness. - -Remove the preempt_disable() section. - -Link: https://bugzilla.kernel.org/show_bug.cgi?id=206591 -Link: https://lkml.kernel.org/r/0b5e1c65d89951de993deab06d1d197b40fd67aa.camel@vmware.com -Signed-off-by: Sebastian Andrzej Siewior ---- - drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c | 2 -- - 1 file changed, 2 deletions(-) - ---- a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c -+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c -@@ -169,10 +169,8 @@ void vmw_fifo_ping_host(struct vmw_priva - { - u32 *fifo_mem = dev_priv->mmio_virt; - -- preempt_disable(); - if (cmpxchg(fifo_mem + SVGA_FIFO_BUSY, 0, 1) == 0) - vmw_write(dev_priv, SVGA_REG_SYNC, reason); -- preempt_enable(); - } - - void vmw_fifo_release(struct vmw_private *dev_priv, struct vmw_fifo_state *fifo) diff --git a/kernel/patches-5.4.x-rt/0071-mm-compaction-Really-limit-compact_unevictable_allow.patch b/kernel/patches-5.4.x-rt/0071-mm-compaction-Really-limit-compact_unevictable_allow.patch deleted file mode 100644 index b34687eac..000000000 --- a/kernel/patches-5.4.x-rt/0071-mm-compaction-Really-limit-compact_unevictable_allow.patch +++ /dev/null @@ -1,32 +0,0 @@ -From: Sebastian Andrzej Siewior -Date: Tue, 3 Mar 2020 13:43:25 +0100 -Subject: [PATCH] =?UTF-8?q?mm/compaction:=20Really=20limit=20compact=5Fune?= - =?UTF-8?q?victable=5Fallowed=20to=200=E2=80=A61?= -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -The proc file `compact_unevictable_allowed' should allow 0 and 1 only, -the `extra*' attribues have been set properly but without -proc_dointvec_minmax() as the `proc_handler' the limit will not be -enforced. - -Use proc_dointvec_minmax() as the `proc_handler' to enfoce the valid -specified range. - -Signed-off-by: Sebastian Andrzej Siewior ---- - kernel/sysctl.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - ---- a/kernel/sysctl.c -+++ b/kernel/sysctl.c -@@ -1493,7 +1493,7 @@ static struct ctl_table vm_table[] = { - .data = &sysctl_compact_unevictable_allowed, - .maxlen = sizeof(int), - .mode = 0644, -- .proc_handler = proc_dointvec, -+ .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }, diff --git a/kernel/patches-5.4.x-rt/0072-mm-compaction-Disable-compact_unevictable_allowed-on.patch b/kernel/patches-5.4.x-rt/0072-mm-compaction-Disable-compact_unevictable_allowed-on.patch deleted file mode 100644 index 838166248..000000000 --- a/kernel/patches-5.4.x-rt/0072-mm-compaction-Disable-compact_unevictable_allowed-on.patch +++ /dev/null @@ -1,102 +0,0 @@ -From: Sebastian Andrzej Siewior -Date: Fri, 8 Nov 2019 12:55:47 +0100 -Subject: [PATCH] mm/compaction: Disable compact_unevictable_allowed on RT - -Since commit - 5bbe3547aa3ba ("mm: allow compaction of unevictable pages") - -it is allowed to examine mlocked pages and compact them by default. -On -RT even minor pagefaults are problematic because it may take a few -100us to resolve them and until then the task is blocked. - -Make compact_unevictable_allowed = 0 default and issue a warning on RT -if it is changed. - -Link: https://lore.kernel.org/linux-mm/20190710144138.qyn4tuttdq6h7kqx@linutronix.de/ -Acked-by: Mel Gorman -Acked-by: Vlastimil Babka -Signed-off-by: Sebastian Andrzej Siewior ---- - Documentation/admin-guide/sysctl/vm.rst | 3 +++ - kernel/sysctl.c | 29 ++++++++++++++++++++++++++++- - mm/compaction.c | 4 ++++ - 3 files changed, 35 insertions(+), 1 deletion(-) - ---- a/Documentation/admin-guide/sysctl/vm.rst -+++ b/Documentation/admin-guide/sysctl/vm.rst -@@ -128,6 +128,9 @@ allowed to examine the unevictable lru ( - This should be used on systems where stalls for minor page faults are an - acceptable trade for large contiguous free memory. Set to 0 to prevent - compaction from moving pages that are unevictable. Default value is 1. -+On CONFIG_PREEMPT_RT the default value is 0 in order to avoid a page fault, due -+to compaction, which would block the task from becomming active until the fault -+is resolved. - - - dirty_background_bytes ---- a/kernel/sysctl.c -+++ b/kernel/sysctl.c -@@ -212,6 +212,11 @@ static int proc_do_cad_pid(struct ctl_ta - void __user *buffer, size_t *lenp, loff_t *ppos); - static int proc_taint(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos); -+#ifdef CONFIG_COMPACTION -+static int proc_dointvec_minmax_warn_RT_change(struct ctl_table *table, -+ int write, void __user *buffer, -+ size_t *lenp, loff_t *ppos); -+#endif - #endif - - #ifdef CONFIG_PRINTK -@@ -1493,7 +1498,7 @@ static struct ctl_table vm_table[] = { - .data = &sysctl_compact_unevictable_allowed, - .maxlen = sizeof(int), - .mode = 0644, -- .proc_handler = proc_dointvec_minmax, -+ .proc_handler = proc_dointvec_minmax_warn_RT_change, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }, -@@ -2581,6 +2586,28 @@ int proc_dointvec(struct ctl_table *tabl - return do_proc_dointvec(table, write, buffer, lenp, ppos, NULL, NULL); - } - -+#ifdef CONFIG_COMPACTION -+static int proc_dointvec_minmax_warn_RT_change(struct ctl_table *table, -+ int write, void __user *buffer, -+ size_t *lenp, loff_t *ppos) -+{ -+ int ret, old; -+ -+ if (!IS_ENABLED(CONFIG_PREEMPT_RT) || !write) -+ return proc_dointvec_minmax(table, write, buffer, lenp, ppos); -+ -+ old = *(int *)table->data; -+ ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); -+ if (ret) -+ return ret; -+ if (old != *(int *)table->data) -+ pr_warn_once("sysctl attribute %s changed by %s[%d]\n", -+ table->procname, current->comm, -+ task_pid_nr(current)); -+ return ret; -+} -+#endif -+ - /** - * proc_douintvec - read a vector of unsigned integers - * @table: the sysctl table ---- a/mm/compaction.c -+++ b/mm/compaction.c -@@ -1590,7 +1590,11 @@ typedef enum { - * Allow userspace to control policy on scanning the unevictable LRU for - * compactable pages. - */ -+#ifdef CONFIG_PREEMPT_RT -+int sysctl_compact_unevictable_allowed __read_mostly = 0; -+#else - int sysctl_compact_unevictable_allowed __read_mostly = 1; -+#endif - - static inline void - update_fast_start_pfn(struct compact_control *cc, unsigned long pfn) diff --git a/kernel/patches-5.4.x-rt/0073-Use-CONFIG_PREEMPTION.patch b/kernel/patches-5.4.x-rt/0073-Use-CONFIG_PREEMPTION.patch deleted file mode 100644 index 9226e5a3e..000000000 --- a/kernel/patches-5.4.x-rt/0073-Use-CONFIG_PREEMPTION.patch +++ /dev/null @@ -1,1521 +0,0 @@ -From: Sebastian Andrzej Siewior -Date: Fri, 26 Jul 2019 11:30:49 +0200 -Subject: [PATCH] Use CONFIG_PREEMPTION - -Thisi is an all-in-one patch of the current `PREEMPTION' branch. - -Signed-off-by: Sebastian Andrzej Siewior ---- - Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.html | 8 - - Documentation/RCU/Design/Requirements/Requirements.html | 24 ++-- - Documentation/RCU/checklist.txt | 4 - Documentation/RCU/rcubarrier.txt | 8 - - Documentation/RCU/stallwarn.txt | 4 - Documentation/RCU/whatisRCU.txt | 7 - - Documentation/trace/ftrace-uses.rst | 2 - arch/arc/kernel/entry.S | 6 - - arch/arm/include/asm/switch_to.h | 2 - arch/arm/kernel/entry-armv.S | 4 - arch/arm/kernel/traps.c | 2 - arch/arm/mm/cache-v7.S | 4 - arch/arm/mm/cache-v7m.S | 4 - arch/arm64/Kconfig | 52 +++++----- - arch/arm64/crypto/sha256-glue.c | 2 - arch/arm64/include/asm/assembler.h | 6 - - arch/arm64/include/asm/preempt.h | 4 - arch/arm64/kernel/entry.S | 2 - arch/arm64/kernel/traps.c | 3 - arch/c6x/kernel/entry.S | 8 - - arch/csky/kernel/entry.S | 4 - arch/h8300/kernel/entry.S | 6 - - arch/hexagon/kernel/vm_entry.S | 6 - - arch/ia64/kernel/entry.S | 12 +- - arch/ia64/kernel/kprobes.c | 2 - arch/m68k/coldfire/entry.S | 2 - arch/microblaze/kernel/entry.S | 2 - arch/mips/include/asm/asmmacro.h | 4 - arch/mips/kernel/entry.S | 6 - - arch/nds32/Kconfig | 2 - arch/nds32/kernel/ex-exit.S | 4 - arch/nios2/kernel/entry.S | 2 - arch/parisc/Kconfig | 2 - arch/parisc/kernel/entry.S | 10 - - arch/powerpc/Kconfig | 2 - arch/powerpc/kernel/entry_32.S | 4 - arch/powerpc/kernel/entry_64.S | 4 - arch/powerpc/kernel/traps.c | 7 + - arch/riscv/kernel/entry.S | 4 - arch/s390/Kconfig | 2 - arch/s390/include/asm/preempt.h | 4 - arch/s390/kernel/dumpstack.c | 2 - arch/s390/kernel/entry.S | 2 - arch/sh/Kconfig | 2 - arch/sh/kernel/cpu/sh5/entry.S | 4 - arch/sh/kernel/entry-common.S | 4 - arch/sparc/Kconfig | 2 - arch/sparc/kernel/rtrap_64.S | 2 - arch/xtensa/kernel/entry.S | 2 - arch/xtensa/kernel/traps.c | 7 - - drivers/gpu/drm/Kconfig | 2 - drivers/media/platform/Kconfig | 2 - drivers/video/backlight/Kconfig | 4 - drivers/xen/preempt.c | 4 - fs/btrfs/volumes.h | 2 - fs/stack.c | 6 - - include/linux/fs.h | 4 - include/linux/genhd.h | 6 - - include/linux/rcupdate.h | 4 - include/xen/xen-ops.h | 4 - kernel/Kconfig.locks | 12 +- - kernel/rcu/Kconfig | 4 - kernel/rcu/rcutorture.c | 2 - kernel/rcu/srcutiny.c | 2 - kernel/rcu/tree.c | 4 - kernel/rcu/tree_exp.h | 2 - kernel/rcu/tree_plugin.h | 4 - kernel/trace/trace.c | 2 - kernel/workqueue.c | 2 - lib/Kconfig.debug | 2 - mm/memory.c | 2 - mm/slub.c | 12 +- - net/core/dev.c | 2 - 73 files changed, 191 insertions(+), 173 deletions(-) - ---- a/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.html -+++ b/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.html -@@ -56,8 +56,8 @@ sections. - RCU-preempt Expedited Grace Periods - -

--CONFIG_PREEMPT=y kernels implement RCU-preempt. --The overall flow of the handling of a given CPU by an RCU-preempt -+CONFIG_PREEMPT=y and CONFIG_PREEMPT_RT=y kernels implement -+RCU-preempt. The overall flow of the handling of a given CPU by an RCU-preempt - expedited grace period is shown in the following diagram: - -

ExpRCUFlow.svg -@@ -140,8 +140,8 @@ or offline, among other things. - RCU-sched Expedited Grace Periods - -

--CONFIG_PREEMPT=n kernels implement RCU-sched. --The overall flow of the handling of a given CPU by an RCU-sched -+CONFIG_PREEMPT=n and CONFIG_PREEMPT_RT=n kernels implement -+RCU-sched. The overall flow of the handling of a given CPU by an RCU-sched - expedited grace period is shown in the following diagram: - -

ExpSchedFlow.svg ---- a/Documentation/RCU/Design/Requirements/Requirements.html -+++ b/Documentation/RCU/Design/Requirements/Requirements.html -@@ -106,7 +106,7 @@ big RCU read-side critical section. - Production-quality implementations of rcu_read_lock() and - rcu_read_unlock() are extremely lightweight, and in - fact have exactly zero overhead in Linux kernels built for production --use with CONFIG_PREEMPT=n. -+use with CONFIG_PREEMPTION=n. - -

- This guarantee allows ordering to be enforced with extremely low -@@ -1499,7 +1499,7 @@ costs have plummeted. - However, as I learned from Matt Mackall's - bloatwatch - efforts, memory footprint is critically important on single-CPU systems with --non-preemptible (CONFIG_PREEMPT=n) kernels, and thus -+non-preemptible (CONFIG_PREEMPTION=n) kernels, and thus - tiny RCU - was born. - Josh Triplett has since taken over the small-memory banner with his -@@ -1887,7 +1887,7 @@ constructs, there are limitations. -

- Implementations of RCU for which rcu_read_lock() - and rcu_read_unlock() generate no code, such as --Linux-kernel RCU when CONFIG_PREEMPT=n, can be -+Linux-kernel RCU when CONFIG_PREEMPTION=n, can be - nested arbitrarily deeply. - After all, there is no overhead. - Except that if all these instances of rcu_read_lock() -@@ -2229,7 +2229,7 @@ be a no-op. -

- However, once the scheduler has spawned its first kthread, this early - boot trick fails for synchronize_rcu() (as well as for --synchronize_rcu_expedited()) in CONFIG_PREEMPT=y -+synchronize_rcu_expedited()) in CONFIG_PREEMPTION=y - kernels. - The reason is that an RCU read-side critical section might be preempted, - which means that a subsequent synchronize_rcu() really does have -@@ -2568,7 +2568,7 @@ The compiler must not be permitted to tr - -

- If the compiler did make this transformation in a --CONFIG_PREEMPT=n kernel build, and if get_user() did -+CONFIG_PREEMPTION=n kernel build, and if get_user() did - page fault, the result would be a quiescent state in the middle - of an RCU read-side critical section. - This misplaced quiescent state could result in line 4 being -@@ -2906,7 +2906,7 @@ in conjunction with the - The real-time-latency response requirements are such that the - traditional approach of disabling preemption across RCU - read-side critical sections is inappropriate. --Kernels built with CONFIG_PREEMPT=y therefore -+Kernels built with CONFIG_PREEMPTION=y therefore - use an RCU implementation that allows RCU read-side critical - sections to be preempted. - This requirement made its presence known after users made it -@@ -3064,7 +3064,7 @@ includes - rcu_barrier_bh(), and - rcu_read_lock_bh_held(). - However, the update-side APIs are now simple wrappers for other RCU --flavors, namely RCU-sched in CONFIG_PREEMPT=n kernels and RCU-preempt -+flavors, namely RCU-sched in CONFIG_PREEMPTION=n kernels and RCU-preempt - otherwise. - -

Sched Flavor (Historical)

-@@ -3088,12 +3088,12 @@ of an RCU read-side critical section can - Therefore, RCU-sched was created, which follows “classic” - RCU in that an RCU-sched grace period waits for for pre-existing - interrupt and NMI handlers. --In kernels built with CONFIG_PREEMPT=n, the RCU and RCU-sched -+In kernels built with CONFIG_PREEMPTION=n, the RCU and RCU-sched - APIs have identical implementations, while kernels built with --CONFIG_PREEMPT=y provide a separate implementation for each. -+CONFIG_PREEMPTION=y provide a separate implementation for each. - -

--Note well that in CONFIG_PREEMPT=y kernels, -+Note well that in CONFIG_PREEMPTION=y kernels, - rcu_read_lock_sched() and rcu_read_unlock_sched() - disable and re-enable preemption, respectively. - This means that if there was a preemption attempt during the -@@ -3302,12 +3302,12 @@ The tasks-RCU API is quite compact, cons - call_rcu_tasks(), - synchronize_rcu_tasks(), and - rcu_barrier_tasks(). --In CONFIG_PREEMPT=n kernels, trampolines cannot be preempted, -+In CONFIG_PREEMPTION=n kernels, trampolines cannot be preempted, - so these APIs map to - call_rcu(), - synchronize_rcu(), and - rcu_barrier(), respectively. --In CONFIG_PREEMPT=y kernels, trampolines can be preempted, -+In CONFIG_PREEMPTION=y kernels, trampolines can be preempted, - and these three APIs are therefore implemented by separate functions - that check for voluntary context switches. - ---- a/Documentation/RCU/checklist.txt -+++ b/Documentation/RCU/checklist.txt -@@ -210,8 +210,8 @@ over a rather long period of time, but i - the rest of the system. - - 7. As of v4.20, a given kernel implements only one RCU flavor, -- which is RCU-sched for PREEMPT=n and RCU-preempt for PREEMPT=y. -- If the updater uses call_rcu() or synchronize_rcu(), -+ which is RCU-sched for PREEMPTION=n and RCU-preempt for -+ PREEMPTION=y. If the updater uses call_rcu() or synchronize_rcu(), - then the corresponding readers my use rcu_read_lock() and - rcu_read_unlock(), rcu_read_lock_bh() and rcu_read_unlock_bh(), - or any pair of primitives that disables and re-enables preemption, ---- a/Documentation/RCU/rcubarrier.txt -+++ b/Documentation/RCU/rcubarrier.txt -@@ -6,8 +6,8 @@ RCU (read-copy update) is a synchronizat - of as a replacement for read-writer locking (among other things), but with - very low-overhead readers that are immune to deadlock, priority inversion, - and unbounded latency. RCU read-side critical sections are delimited --by rcu_read_lock() and rcu_read_unlock(), which, in non-CONFIG_PREEMPT --kernels, generate no code whatsoever. -+by rcu_read_lock() and rcu_read_unlock(), which, in -+non-CONFIG_PREEMPTION kernels, generate no code whatsoever. - - This means that RCU writers are unaware of the presence of concurrent - readers, so that RCU updates to shared data must be undertaken quite -@@ -303,10 +303,10 @@ Answer: This cannot happen. The reason i - to smp_call_function() and further to smp_call_function_on_cpu(), - causing this latter to spin until the cross-CPU invocation of - rcu_barrier_func() has completed. This by itself would prevent -- a grace period from completing on non-CONFIG_PREEMPT kernels, -+ a grace period from completing on non-CONFIG_PREEMPTION kernels, - since each CPU must undergo a context switch (or other quiescent - state) before the grace period can complete. However, this is -- of no use in CONFIG_PREEMPT kernels. -+ of no use in CONFIG_PREEMPTION kernels. - - Therefore, on_each_cpu() disables preemption across its call - to smp_call_function() and also across the local call to ---- a/Documentation/RCU/stallwarn.txt -+++ b/Documentation/RCU/stallwarn.txt -@@ -20,7 +20,7 @@ o A CPU looping with preemption disabled - - o A CPU looping with bottom halves disabled. - --o For !CONFIG_PREEMPT kernels, a CPU looping anywhere in the kernel -+o For !CONFIG_PREEMPTION kernels, a CPU looping anywhere in the kernel - without invoking schedule(). If the looping in the kernel is - really expected and desirable behavior, you might need to add - some calls to cond_resched(). -@@ -39,7 +39,7 @@ o Anything that prevents RCU's grace-per - result in the "rcu_.*kthread starved for" console-log message, - which will include additional debugging information. - --o A CPU-bound real-time task in a CONFIG_PREEMPT kernel, which might -+o A CPU-bound real-time task in a CONFIG_PREEMPTION kernel, which might - happen to preempt a low-priority task in the middle of an RCU - read-side critical section. This is especially damaging if - that low-priority task is not permitted to run on any other CPU, ---- a/Documentation/RCU/whatisRCU.txt -+++ b/Documentation/RCU/whatisRCU.txt -@@ -648,9 +648,10 @@ Quick Quiz #1: Why is this argument naiv - - This section presents a "toy" RCU implementation that is based on - "classic RCU". It is also short on performance (but only for updates) and --on features such as hotplug CPU and the ability to run in CONFIG_PREEMPT --kernels. The definitions of rcu_dereference() and rcu_assign_pointer() --are the same as those shown in the preceding section, so they are omitted. -+on features such as hotplug CPU and the ability to run in -+CONFIG_PREEMPTION kernels. The definitions of rcu_dereference() and -+rcu_assign_pointer() are the same as those shown in the preceding -+section, so they are omitted. - - void rcu_read_lock(void) { } - ---- a/Documentation/trace/ftrace-uses.rst -+++ b/Documentation/trace/ftrace-uses.rst -@@ -146,7 +146,7 @@ FTRACE_OPS_FL_RECURSION_SAFE - itself or any nested functions that those functions call. - - If this flag is set, it is possible that the callback will also -- be called with preemption enabled (when CONFIG_PREEMPT is set), -+ be called with preemption enabled (when CONFIG_PREEMPTION is set), - but this is not guaranteed. - - FTRACE_OPS_FL_IPMODIFY ---- a/arch/arc/kernel/entry.S -+++ b/arch/arc/kernel/entry.S -@@ -337,11 +337,11 @@ ENTRY(ret_from_exception) - resume_kernel_mode: - - ; Disable Interrupts from this point on -- ; CONFIG_PREEMPT: This is a must for preempt_schedule_irq() -- ; !CONFIG_PREEMPT: To ensure restore_regs is intr safe -+ ; CONFIG_PREEMPTION: This is a must for preempt_schedule_irq() -+ ; !CONFIG_PREEMPTION: To ensure restore_regs is intr safe - IRQ_DISABLE r9 - --#ifdef CONFIG_PREEMPT -+#ifdef CONFIG_PREEMPTION - - ; Can't preempt if preemption disabled - GET_CURR_THR_INFO_FROM_SP r10 ---- a/arch/arm/include/asm/switch_to.h -+++ b/arch/arm/include/asm/switch_to.h -@@ -10,7 +10,7 @@ - * to ensure that the maintenance completes in case we migrate to another - * CPU. - */ --#if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP) && defined(CONFIG_CPU_V7) -+#if defined(CONFIG_PREEMPTION) && defined(CONFIG_SMP) && defined(CONFIG_CPU_V7) - #define __complete_pending_tlbi() dsb(ish) - #else - #define __complete_pending_tlbi() ---- a/arch/arm/kernel/entry-armv.S -+++ b/arch/arm/kernel/entry-armv.S -@@ -211,7 +211,7 @@ ENDPROC(__dabt_svc) - svc_entry - irq_handler - --#ifdef CONFIG_PREEMPT -+#ifdef CONFIG_PREEMPTION - ldr r8, [tsk, #TI_PREEMPT] @ get preempt count - ldr r0, [tsk, #TI_FLAGS] @ get flags - teq r8, #0 @ if preempt count != 0 -@@ -226,7 +226,7 @@ ENDPROC(__irq_svc) - - .ltorg - --#ifdef CONFIG_PREEMPT -+#ifdef CONFIG_PREEMPTION - svc_preempt: - mov r8, lr - 1: bl preempt_schedule_irq @ irq en/disable is done inside ---- a/arch/arm/kernel/traps.c -+++ b/arch/arm/kernel/traps.c -@@ -248,6 +248,8 @@ void show_stack(struct task_struct *tsk, - - #ifdef CONFIG_PREEMPT - #define S_PREEMPT " PREEMPT" -+#elif defined(CONFIG_PREEMPT_RT) -+#define S_PREEMPT " PREEMPT_RT" - #else - #define S_PREEMPT "" - #endif ---- a/arch/arm/mm/cache-v7.S -+++ b/arch/arm/mm/cache-v7.S -@@ -135,13 +135,13 @@ ENTRY(v7_flush_dcache_all) - and r1, r1, #7 @ mask of the bits for current cache only - cmp r1, #2 @ see what cache we have at this level - blt skip @ skip if no cache, or just i-cache --#ifdef CONFIG_PREEMPT -+#ifdef CONFIG_PREEMPTION - save_and_disable_irqs_notrace r9 @ make cssr&csidr read atomic - #endif - mcr p15, 2, r10, c0, c0, 0 @ select current cache level in cssr - isb @ isb to sych the new cssr&csidr - mrc p15, 1, r1, c0, c0, 0 @ read the new csidr --#ifdef CONFIG_PREEMPT -+#ifdef CONFIG_PREEMPTION - restore_irqs_notrace r9 - #endif - and r2, r1, #7 @ extract the length of the cache lines ---- a/arch/arm/mm/cache-v7m.S -+++ b/arch/arm/mm/cache-v7m.S -@@ -183,13 +183,13 @@ ENTRY(v7m_flush_dcache_all) - and r1, r1, #7 @ mask of the bits for current cache only - cmp r1, #2 @ see what cache we have at this level - blt skip @ skip if no cache, or just i-cache --#ifdef CONFIG_PREEMPT -+#ifdef CONFIG_PREEMPTION - save_and_disable_irqs_notrace r9 @ make cssr&csidr read atomic - #endif - write_csselr r10, r1 @ set current cache level - isb @ isb to sych the new cssr&csidr - read_ccsidr r1 @ read the new csidr --#ifdef CONFIG_PREEMPT -+#ifdef CONFIG_PREEMPTION - restore_irqs_notrace r9 - #endif - and r2, r1, #7 @ extract the length of the cache lines ---- a/arch/arm64/Kconfig -+++ b/arch/arm64/Kconfig -@@ -35,32 +35,32 @@ config ARM64 - select ARCH_HAS_TEARDOWN_DMA_OPS if IOMMU_SUPPORT - select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST - select ARCH_HAVE_NMI_SAFE_CMPXCHG -- select ARCH_INLINE_READ_LOCK if !PREEMPT -- select ARCH_INLINE_READ_LOCK_BH if !PREEMPT -- select ARCH_INLINE_READ_LOCK_IRQ if !PREEMPT -- select ARCH_INLINE_READ_LOCK_IRQSAVE if !PREEMPT -- select ARCH_INLINE_READ_UNLOCK if !PREEMPT -- select ARCH_INLINE_READ_UNLOCK_BH if !PREEMPT -- select ARCH_INLINE_READ_UNLOCK_IRQ if !PREEMPT -- select ARCH_INLINE_READ_UNLOCK_IRQRESTORE if !PREEMPT -- select ARCH_INLINE_WRITE_LOCK if !PREEMPT -- select ARCH_INLINE_WRITE_LOCK_BH if !PREEMPT -- select ARCH_INLINE_WRITE_LOCK_IRQ if !PREEMPT -- select ARCH_INLINE_WRITE_LOCK_IRQSAVE if !PREEMPT -- select ARCH_INLINE_WRITE_UNLOCK if !PREEMPT -- select ARCH_INLINE_WRITE_UNLOCK_BH if !PREEMPT -- select ARCH_INLINE_WRITE_UNLOCK_IRQ if !PREEMPT -- select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE if !PREEMPT -- select ARCH_INLINE_SPIN_TRYLOCK if !PREEMPT -- select ARCH_INLINE_SPIN_TRYLOCK_BH if !PREEMPT -- select ARCH_INLINE_SPIN_LOCK if !PREEMPT -- select ARCH_INLINE_SPIN_LOCK_BH if !PREEMPT -- select ARCH_INLINE_SPIN_LOCK_IRQ if !PREEMPT -- select ARCH_INLINE_SPIN_LOCK_IRQSAVE if !PREEMPT -- select ARCH_INLINE_SPIN_UNLOCK if !PREEMPT -- select ARCH_INLINE_SPIN_UNLOCK_BH if !PREEMPT -- select ARCH_INLINE_SPIN_UNLOCK_IRQ if !PREEMPT -- select ARCH_INLINE_SPIN_UNLOCK_IRQRESTORE if !PREEMPT -+ select ARCH_INLINE_READ_LOCK if !PREEMPTION -+ select ARCH_INLINE_READ_LOCK_BH if !PREEMPTION -+ select ARCH_INLINE_READ_LOCK_IRQ if !PREEMPTION -+ select ARCH_INLINE_READ_LOCK_IRQSAVE if !PREEMPTION -+ select ARCH_INLINE_READ_UNLOCK if !PREEMPTION -+ select ARCH_INLINE_READ_UNLOCK_BH if !PREEMPTION -+ select ARCH_INLINE_READ_UNLOCK_IRQ if !PREEMPTION -+ select ARCH_INLINE_READ_UNLOCK_IRQRESTORE if !PREEMPTION -+ select ARCH_INLINE_WRITE_LOCK if !PREEMPTION -+ select ARCH_INLINE_WRITE_LOCK_BH if !PREEMPTION -+ select ARCH_INLINE_WRITE_LOCK_IRQ if !PREEMPTION -+ select ARCH_INLINE_WRITE_LOCK_IRQSAVE if !PREEMPTION -+ select ARCH_INLINE_WRITE_UNLOCK if !PREEMPTION -+ select ARCH_INLINE_WRITE_UNLOCK_BH if !PREEMPTION -+ select ARCH_INLINE_WRITE_UNLOCK_IRQ if !PREEMPTION -+ select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE if !PREEMPTION -+ select ARCH_INLINE_SPIN_TRYLOCK if !PREEMPTION -+ select ARCH_INLINE_SPIN_TRYLOCK_BH if !PREEMPTION -+ select ARCH_INLINE_SPIN_LOCK if !PREEMPTION -+ select ARCH_INLINE_SPIN_LOCK_BH if !PREEMPTION -+ select ARCH_INLINE_SPIN_LOCK_IRQ if !PREEMPTION -+ select ARCH_INLINE_SPIN_LOCK_IRQSAVE if !PREEMPTION -+ select ARCH_INLINE_SPIN_UNLOCK if !PREEMPTION -+ select ARCH_INLINE_SPIN_UNLOCK_BH if !PREEMPTION -+ select ARCH_INLINE_SPIN_UNLOCK_IRQ if !PREEMPTION -+ select ARCH_INLINE_SPIN_UNLOCK_IRQRESTORE if !PREEMPTION - select ARCH_KEEP_MEMBLOCK - select ARCH_USE_CMPXCHG_LOCKREF - select ARCH_USE_QUEUED_RWLOCKS ---- a/arch/arm64/crypto/sha256-glue.c -+++ b/arch/arm64/crypto/sha256-glue.c -@@ -97,7 +97,7 @@ static int sha256_update_neon(struct sha - * input when running on a preemptible kernel, but process the - * data block by block instead. - */ -- if (IS_ENABLED(CONFIG_PREEMPT) && -+ if (IS_ENABLED(CONFIG_PREEMPTION) && - chunk + sctx->count % SHA256_BLOCK_SIZE > SHA256_BLOCK_SIZE) - chunk = SHA256_BLOCK_SIZE - - sctx->count % SHA256_BLOCK_SIZE; ---- a/arch/arm64/include/asm/assembler.h -+++ b/arch/arm64/include/asm/assembler.h -@@ -699,8 +699,8 @@ USER(\label, ic ivau, \tmp2) // invali - * where