mirror of
https://github.com/linuxkit/linuxkit.git
synced 2026-05-08 16:42:32 +00:00
@@ -0,0 +1,77 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Fri, 30 Oct 2020 13:59:06 +0100
|
||||
Subject: [PATCH] highmem: Don't disable preemption on RT in kmap_atomic()
|
||||
|
||||
Disabling preemption makes it impossible to acquire sleeping locks within
|
||||
kmap_atomic() section.
|
||||
For PREEMPT_RT it is sufficient to disable migration.
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/highmem-internal.h | 27 ++++++++++++++++++++++-----
|
||||
1 file changed, 22 insertions(+), 5 deletions(-)
|
||||
|
||||
--- a/include/linux/highmem-internal.h
|
||||
+++ b/include/linux/highmem-internal.h
|
||||
@@ -90,7 +90,11 @@ static inline void __kunmap_local(void *
|
||||
|
||||
static inline void *kmap_atomic_prot(struct page *page, pgprot_t prot)
|
||||
{
|
||||
- preempt_disable();
|
||||
+ if (IS_ENABLED(CONFIG_PREEMPT_RT))
|
||||
+ migrate_disable();
|
||||
+ else
|
||||
+ preempt_disable();
|
||||
+
|
||||
pagefault_disable();
|
||||
return __kmap_local_page_prot(page, prot);
|
||||
}
|
||||
@@ -102,7 +106,11 @@ static inline void *kmap_atomic(struct p
|
||||
|
||||
static inline void *kmap_atomic_pfn(unsigned long pfn)
|
||||
{
|
||||
- preempt_disable();
|
||||
+ if (IS_ENABLED(CONFIG_PREEMPT_RT))
|
||||
+ migrate_disable();
|
||||
+ else
|
||||
+ preempt_disable();
|
||||
+
|
||||
pagefault_disable();
|
||||
return __kmap_local_pfn_prot(pfn, kmap_prot);
|
||||
}
|
||||
@@ -111,7 +119,10 @@ static inline void __kunmap_atomic(void
|
||||
{
|
||||
kunmap_local_indexed(addr);
|
||||
pagefault_enable();
|
||||
- preempt_enable();
|
||||
+ if (IS_ENABLED(CONFIG_PREEMPT_RT))
|
||||
+ migrate_enable();
|
||||
+ else
|
||||
+ preempt_enable();
|
||||
}
|
||||
|
||||
unsigned int __nr_free_highpages(void);
|
||||
@@ -184,7 +195,10 @@ static inline void __kunmap_local(void *
|
||||
|
||||
static inline void *kmap_atomic(struct page *page)
|
||||
{
|
||||
- preempt_disable();
|
||||
+ if (IS_ENABLED(CONFIG_PREEMPT_RT))
|
||||
+ migrate_disable();
|
||||
+ else
|
||||
+ preempt_disable();
|
||||
pagefault_disable();
|
||||
return page_address(page);
|
||||
}
|
||||
@@ -205,7 +219,10 @@ static inline void __kunmap_atomic(void
|
||||
kunmap_flush_on_unmap(addr);
|
||||
#endif
|
||||
pagefault_enable();
|
||||
- preempt_enable();
|
||||
+ if (IS_ENABLED(CONFIG_PREEMPT_RT))
|
||||
+ migrate_enable();
|
||||
+ else
|
||||
+ preempt_enable();
|
||||
}
|
||||
|
||||
static inline unsigned int nr_free_highpages(void) { return 0; }
|
||||
@@ -0,0 +1,55 @@
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Sun, 6 Dec 2020 22:40:07 +0100
|
||||
Subject: [PATCH] timers: Move clearing of base::timer_running under base::lock
|
||||
|
||||
syzbot reported KCSAN data races vs. timer_base::timer_running being set to
|
||||
NULL without holding base::lock in expire_timers().
|
||||
|
||||
This looks innocent and most reads are clearly not problematic but for a
|
||||
non-RT kernel it's completely irrelevant whether the store happens before
|
||||
or after taking the lock. For an RT kernel moving the store under the lock
|
||||
requires an extra unlock/lock pair in the case that there is a waiter for
|
||||
the timer. But that's not the end of the world and definitely not worth the
|
||||
trouble of adding boatloads of comments and annotations to the code. Famous
|
||||
last words...
|
||||
|
||||
Reported-by: syzbot+aa7c2385d46c5eba0b89@syzkaller.appspotmail.com
|
||||
Reported-by: syzbot+abea4558531bae1ba9fe@syzkaller.appspotmail.com
|
||||
Link: https://lkml.kernel.org/r/87lfea7gw8.fsf@nanos.tec.linutronix.de
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Cc: stable-rt@vger.kernel.org
|
||||
---
|
||||
kernel/time/timer.c | 6 ++++--
|
||||
1 file changed, 4 insertions(+), 2 deletions(-)
|
||||
|
||||
--- a/kernel/time/timer.c
|
||||
+++ b/kernel/time/timer.c
|
||||
@@ -1263,8 +1263,10 @@ static inline void timer_base_unlock_exp
|
||||
static void timer_sync_wait_running(struct timer_base *base)
|
||||
{
|
||||
if (atomic_read(&base->timer_waiters)) {
|
||||
+ raw_spin_unlock_irq(&base->lock);
|
||||
spin_unlock(&base->expiry_lock);
|
||||
spin_lock(&base->expiry_lock);
|
||||
+ raw_spin_lock_irq(&base->lock);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1455,14 +1457,14 @@ static void expire_timers(struct timer_b
|
||||
if (timer->flags & TIMER_IRQSAFE) {
|
||||
raw_spin_unlock(&base->lock);
|
||||
call_timer_fn(timer, fn, baseclk);
|
||||
- base->running_timer = NULL;
|
||||
raw_spin_lock(&base->lock);
|
||||
+ base->running_timer = NULL;
|
||||
} else {
|
||||
raw_spin_unlock_irq(&base->lock);
|
||||
call_timer_fn(timer, fn, baseclk);
|
||||
+ raw_spin_lock_irq(&base->lock);
|
||||
base->running_timer = NULL;
|
||||
timer_sync_wait_running(base);
|
||||
- raw_spin_lock_irq(&base->lock);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,245 @@
|
||||
From: Tian Tao <tiantao6@hisilicon.com>
|
||||
Date: Sat, 13 Feb 2021 20:58:30 +1300
|
||||
Subject: [PATCH 1/2] mm/zswap: add a flag to indicate if zpool can do sleep
|
||||
map
|
||||
|
||||
Patch series "Fix the compatibility of zsmalloc and zswap".
|
||||
|
||||
The compatibility of zsmalloc and zswap was broken by commit 1ec3b5fe6eec
|
||||
("mm/zswap: move to use crypto_acomp API for hardware acceleration").
|
||||
|
||||
Patch #1 adds a flag to zpool, then zswap used it to determine if zpool
|
||||
drivers such as zbud/z3fold/zsmalloc will enter an atomic context after
|
||||
mapping.
|
||||
|
||||
The difference between zbud/z3fold and zsmalloc is that zsmalloc requires
|
||||
an atomic context that since its map function holds a preempt-disabled
|
||||
lock, but zbud/z3fold don't require an atomic context. So patch #2 sets
|
||||
flag sleep_mapped to true indicating that zbud/z3fold can sleep after
|
||||
mapping. zsmalloc didn't support sleep after mapping, so don't set that
|
||||
flag to true.
|
||||
|
||||
This patch (of 2):
|
||||
|
||||
Add a flag to zpool, named as "sleep_mapped", and have it set true for
|
||||
zbud/z3fold, not set this flag for zsmalloc, so its default value is
|
||||
false. Then zswap could go the current path if the flag is true; and if
|
||||
it's false, copy data from src to a temporary buffer, then unmap the
|
||||
handle, take the mutex, process the buffer instead of src to avoid
|
||||
sleeping function called from atomic context.
|
||||
|
||||
[natechancellor@gmail.com: add return value in zswap_frontswap_load]
|
||||
Link: https://lkml.kernel.org/r/20210121214804.926843-1-natechancellor@gmail.com
|
||||
[tiantao6@hisilicon.com: fix potential memory leak]
|
||||
Link: https://lkml.kernel.org/r/1611538365-51811-1-git-send-email-tiantao6@hisilicon.com
|
||||
[colin.king@canonical.com: fix potential uninitialized pointer read on tmp]
|
||||
Link: https://lkml.kernel.org/r/20210128141728.639030-1-colin.king@canonical.com
|
||||
[tiantao6@hisilicon.com: fix variable 'entry' is uninitialized when used]
|
||||
Link: https://lkml.kernel.org/r/1611223030-58346-1-git-send-email-tiantao6@hisilicon.com
|
||||
Link: https://lkml.kernel.org/r/1611035683-12732-1-git-send-email-tiantao6@hisilicon.com
|
||||
Link: https://lkml.kernel.org/r/1611035683-12732-2-git-send-email-tiantao6@hisilicon.com
|
||||
[song.bao.hua@hisilicon.com: Rewrote changelog]
|
||||
Fixes: 1ec3b5fe6e ("mm/zswap: move to use crypto_acomp API for hardware acceleration")
|
||||
Signed-off-by: Tian Tao <tiantao6@hisilicon.com>
|
||||
Signed-off-by: Nathan Chancellor <natechancellor@gmail.com>
|
||||
Signed-off-by: Colin Ian King <colin.king@canonical.com>
|
||||
Reviewed-by: Vitaly Wool <vitaly.wool@konsulko.com>
|
||||
Acked-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Reported-by: Mike Galbraith <efault@gmx.de>
|
||||
Cc: Dan Streetman <ddstreet@ieee.org>
|
||||
Cc: Seth Jennings <sjenning@redhat.com>
|
||||
Cc: Dan Carpenter <dan.carpenter@oracle.com>
|
||||
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
Signed-off-by: Barry Song <song.bao.hua@hisilicon.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/zpool.h | 3 ++
|
||||
mm/zpool.c | 13 ++++++++++++
|
||||
mm/zswap.c | 51 +++++++++++++++++++++++++++++++++++++++++++++-----
|
||||
3 files changed, 62 insertions(+), 5 deletions(-)
|
||||
|
||||
--- a/include/linux/zpool.h
|
||||
+++ b/include/linux/zpool.h
|
||||
@@ -73,6 +73,7 @@ u64 zpool_get_total_size(struct zpool *p
|
||||
* @malloc: allocate mem from a pool.
|
||||
* @free: free mem from a pool.
|
||||
* @shrink: shrink the pool.
|
||||
+ * @sleep_mapped: whether zpool driver can sleep during map.
|
||||
* @map: map a handle.
|
||||
* @unmap: unmap a handle.
|
||||
* @total_size: get total size of a pool.
|
||||
@@ -100,6 +101,7 @@ struct zpool_driver {
|
||||
int (*shrink)(void *pool, unsigned int pages,
|
||||
unsigned int *reclaimed);
|
||||
|
||||
+ bool sleep_mapped;
|
||||
void *(*map)(void *pool, unsigned long handle,
|
||||
enum zpool_mapmode mm);
|
||||
void (*unmap)(void *pool, unsigned long handle);
|
||||
@@ -112,5 +114,6 @@ void zpool_register_driver(struct zpool_
|
||||
int zpool_unregister_driver(struct zpool_driver *driver);
|
||||
|
||||
bool zpool_evictable(struct zpool *pool);
|
||||
+bool zpool_can_sleep_mapped(struct zpool *pool);
|
||||
|
||||
#endif
|
||||
--- a/mm/zpool.c
|
||||
+++ b/mm/zpool.c
|
||||
@@ -23,6 +23,7 @@ struct zpool {
|
||||
void *pool;
|
||||
const struct zpool_ops *ops;
|
||||
bool evictable;
|
||||
+ bool can_sleep_mapped;
|
||||
|
||||
struct list_head list;
|
||||
};
|
||||
@@ -183,6 +184,7 @@ struct zpool *zpool_create_pool(const ch
|
||||
zpool->pool = driver->create(name, gfp, ops, zpool);
|
||||
zpool->ops = ops;
|
||||
zpool->evictable = driver->shrink && ops && ops->evict;
|
||||
+ zpool->can_sleep_mapped = driver->sleep_mapped;
|
||||
|
||||
if (!zpool->pool) {
|
||||
pr_err("couldn't create %s pool\n", type);
|
||||
@@ -393,6 +395,17 @@ bool zpool_evictable(struct zpool *zpool
|
||||
return zpool->evictable;
|
||||
}
|
||||
|
||||
+/**
|
||||
+ * zpool_can_sleep_mapped - Test if zpool can sleep when do mapped.
|
||||
+ * @zpool: The zpool to test
|
||||
+ *
|
||||
+ * Returns: true if zpool can sleep; false otherwise.
|
||||
+ */
|
||||
+bool zpool_can_sleep_mapped(struct zpool *zpool)
|
||||
+{
|
||||
+ return zpool->can_sleep_mapped;
|
||||
+}
|
||||
+
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>");
|
||||
MODULE_DESCRIPTION("Common API for compressed memory storage");
|
||||
--- a/mm/zswap.c
|
||||
+++ b/mm/zswap.c
|
||||
@@ -935,13 +935,19 @@ static int zswap_writeback_entry(struct
|
||||
struct scatterlist input, output;
|
||||
struct crypto_acomp_ctx *acomp_ctx;
|
||||
|
||||
- u8 *src;
|
||||
+ u8 *src, *tmp = NULL;
|
||||
unsigned int dlen;
|
||||
int ret;
|
||||
struct writeback_control wbc = {
|
||||
.sync_mode = WB_SYNC_NONE,
|
||||
};
|
||||
|
||||
+ if (!zpool_can_sleep_mapped(pool)) {
|
||||
+ tmp = kmalloc(PAGE_SIZE, GFP_ATOMIC);
|
||||
+ if (!tmp)
|
||||
+ return -ENOMEM;
|
||||
+ }
|
||||
+
|
||||
/* extract swpentry from data */
|
||||
zhdr = zpool_map_handle(pool, handle, ZPOOL_MM_RO);
|
||||
swpentry = zhdr->swpentry; /* here */
|
||||
@@ -955,6 +961,7 @@ static int zswap_writeback_entry(struct
|
||||
/* entry was invalidated */
|
||||
spin_unlock(&tree->lock);
|
||||
zpool_unmap_handle(pool, handle);
|
||||
+ kfree(tmp);
|
||||
return 0;
|
||||
}
|
||||
spin_unlock(&tree->lock);
|
||||
@@ -979,6 +986,14 @@ static int zswap_writeback_entry(struct
|
||||
dlen = PAGE_SIZE;
|
||||
src = (u8 *)zhdr + sizeof(struct zswap_header);
|
||||
|
||||
+ if (!zpool_can_sleep_mapped(pool)) {
|
||||
+
|
||||
+ memcpy(tmp, src, entry->length);
|
||||
+ src = tmp;
|
||||
+
|
||||
+ zpool_unmap_handle(pool, handle);
|
||||
+ }
|
||||
+
|
||||
mutex_lock(acomp_ctx->mutex);
|
||||
sg_init_one(&input, src, entry->length);
|
||||
sg_init_table(&output, 1);
|
||||
@@ -1033,7 +1048,11 @@ static int zswap_writeback_entry(struct
|
||||
spin_unlock(&tree->lock);
|
||||
|
||||
end:
|
||||
- zpool_unmap_handle(pool, handle);
|
||||
+ if (zpool_can_sleep_mapped(pool))
|
||||
+ zpool_unmap_handle(pool, handle);
|
||||
+ else
|
||||
+ kfree(tmp);
|
||||
+
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -1235,7 +1254,7 @@ static int zswap_frontswap_load(unsigned
|
||||
struct zswap_entry *entry;
|
||||
struct scatterlist input, output;
|
||||
struct crypto_acomp_ctx *acomp_ctx;
|
||||
- u8 *src, *dst;
|
||||
+ u8 *src, *dst, *tmp;
|
||||
unsigned int dlen;
|
||||
int ret;
|
||||
|
||||
@@ -1253,15 +1272,33 @@ static int zswap_frontswap_load(unsigned
|
||||
dst = kmap_atomic(page);
|
||||
zswap_fill_page(dst, entry->value);
|
||||
kunmap_atomic(dst);
|
||||
+ ret = 0;
|
||||
goto freeentry;
|
||||
}
|
||||
|
||||
+ if (!zpool_can_sleep_mapped(entry->pool->zpool)) {
|
||||
+
|
||||
+ tmp = kmalloc(entry->length, GFP_ATOMIC);
|
||||
+ if (!tmp) {
|
||||
+ ret = -ENOMEM;
|
||||
+ goto freeentry;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
/* decompress */
|
||||
dlen = PAGE_SIZE;
|
||||
src = zpool_map_handle(entry->pool->zpool, entry->handle, ZPOOL_MM_RO);
|
||||
if (zpool_evictable(entry->pool->zpool))
|
||||
src += sizeof(struct zswap_header);
|
||||
|
||||
+ if (!zpool_can_sleep_mapped(entry->pool->zpool)) {
|
||||
+
|
||||
+ memcpy(tmp, src, entry->length);
|
||||
+ src = tmp;
|
||||
+
|
||||
+ zpool_unmap_handle(entry->pool->zpool, entry->handle);
|
||||
+ }
|
||||
+
|
||||
acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx);
|
||||
mutex_lock(acomp_ctx->mutex);
|
||||
sg_init_one(&input, src, entry->length);
|
||||
@@ -1271,7 +1308,11 @@ static int zswap_frontswap_load(unsigned
|
||||
ret = crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait);
|
||||
mutex_unlock(acomp_ctx->mutex);
|
||||
|
||||
- zpool_unmap_handle(entry->pool->zpool, entry->handle);
|
||||
+ if (zpool_can_sleep_mapped(entry->pool->zpool))
|
||||
+ zpool_unmap_handle(entry->pool->zpool, entry->handle);
|
||||
+ else
|
||||
+ kfree(tmp);
|
||||
+
|
||||
BUG_ON(ret);
|
||||
|
||||
freeentry:
|
||||
@@ -1279,7 +1320,7 @@ static int zswap_frontswap_load(unsigned
|
||||
zswap_entry_put(tree, entry);
|
||||
spin_unlock(&tree->lock);
|
||||
|
||||
- return 0;
|
||||
+ return ret;
|
||||
}
|
||||
|
||||
/* frees an entry in zswap */
|
||||
@@ -0,0 +1,45 @@
|
||||
From: Tian Tao <tiantao6@hisilicon.com>
|
||||
Date: Sat, 13 Feb 2021 20:58:31 +1300
|
||||
Subject: [PATCH 2/2] mm: set the sleep_mapped to true for zbud and z3fold
|
||||
|
||||
zpool driver adds a flag to indicate whether the zpool driver can enter
|
||||
an atomic context after mapping. This patch sets it true for z3fold and
|
||||
zbud.
|
||||
|
||||
Link: https://lkml.kernel.org/r/1611035683-12732-3-git-send-email-tiantao6@hisilicon.com
|
||||
[song.bao.hua@hisilicon.com: Rewrote changelog]
|
||||
Fixes: 1ec3b5fe6e ("mm/zswap: move to use crypto_acomp API for hardware acceleration")
|
||||
Signed-off-by: Tian Tao <tiantao6@hisilicon.com>
|
||||
Reviewed-by: Vitaly Wool <vitaly.wool@konsulko.com>
|
||||
Acked-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Reported-by: Mike Galbraith <efault@gmx.de>
|
||||
Cc: Seth Jennings <sjenning@redhat.com>
|
||||
Cc: Dan Streetman <ddstreet@ieee.org>
|
||||
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
Signed-off-by: Barry Song <song.bao.hua@hisilicon.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
mm/z3fold.c | 1 +
|
||||
mm/zbud.c | 1 +
|
||||
2 files changed, 2 insertions(+)
|
||||
|
||||
--- a/mm/z3fold.c
|
||||
+++ b/mm/z3fold.c
|
||||
@@ -1778,6 +1778,7 @@ static u64 z3fold_zpool_total_size(void
|
||||
|
||||
static struct zpool_driver z3fold_zpool_driver = {
|
||||
.type = "z3fold",
|
||||
+ .sleep_mapped = true,
|
||||
.owner = THIS_MODULE,
|
||||
.create = z3fold_zpool_create,
|
||||
.destroy = z3fold_zpool_destroy,
|
||||
--- a/mm/zbud.c
|
||||
+++ b/mm/zbud.c
|
||||
@@ -203,6 +203,7 @@ static u64 zbud_zpool_total_size(void *p
|
||||
|
||||
static struct zpool_driver zbud_zpool_driver = {
|
||||
.type = "zbud",
|
||||
+ .sleep_mapped = true,
|
||||
.owner = THIS_MODULE,
|
||||
.create = zbud_zpool_create,
|
||||
.destroy = zbud_zpool_destroy,
|
||||
@@ -0,0 +1,42 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Sat, 23 Jan 2021 21:10:26 +0100
|
||||
Subject: [PATCH] blk-mq: Always complete remote completions requests in
|
||||
softirq
|
||||
|
||||
Controllers with multiple queues have their IRQ-handelers pinned to a
|
||||
CPU. The core shouldn't need to complete the request on a remote CPU.
|
||||
|
||||
Remove this case and always raise the softirq to complete the request.
|
||||
|
||||
Reviewed-by: Christoph Hellwig <hch@lst.de>
|
||||
Reviewed-by: Daniel Wagner <dwagner@suse.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Signed-off-by: Jens Axboe <axboe@kernel.dk>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
block/blk-mq.c | 14 +-------------
|
||||
1 file changed, 1 insertion(+), 13 deletions(-)
|
||||
|
||||
--- a/block/blk-mq.c
|
||||
+++ b/block/blk-mq.c
|
||||
@@ -628,19 +628,7 @@ static void __blk_mq_complete_request_re
|
||||
{
|
||||
struct request *rq = data;
|
||||
|
||||
- /*
|
||||
- * For most of single queue controllers, there is only one irq vector
|
||||
- * for handling I/O completion, and the only irq's affinity is set
|
||||
- * to all possible CPUs. On most of ARCHs, this affinity means the irq
|
||||
- * is handled on one specific CPU.
|
||||
- *
|
||||
- * So complete I/O requests in softirq context in case of single queue
|
||||
- * devices to avoid degrading I/O performance due to irqsoff latency.
|
||||
- */
|
||||
- if (rq->q->nr_hw_queues == 1)
|
||||
- blk_mq_trigger_softirq(rq);
|
||||
- else
|
||||
- rq->q->mq_ops->complete(rq);
|
||||
+ blk_mq_trigger_softirq(rq);
|
||||
}
|
||||
|
||||
static inline bool blk_mq_complete_need_ipi(struct request *rq)
|
||||
@@ -0,0 +1,188 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Sat, 23 Jan 2021 21:10:27 +0100
|
||||
Subject: [PATCH] blk-mq: Use llist_head for blk_cpu_done
|
||||
|
||||
With llist_head it is possible to avoid the locking (the irq-off region)
|
||||
when items are added. This makes it possible to add items on a remote
|
||||
CPU without additional locking.
|
||||
llist_add() returns true if the list was previously empty. This can be
|
||||
used to invoke the SMP function call / raise sofirq only if the first
|
||||
item was added (otherwise it is already pending).
|
||||
This simplifies the code a little and reduces the IRQ-off regions.
|
||||
|
||||
blk_mq_raise_softirq() needs a preempt-disable section to ensure the
|
||||
request is enqueued on the same CPU as the softirq is raised.
|
||||
Some callers (USB-storage) invoke this path in preemptible context.
|
||||
|
||||
Reviewed-by: Christoph Hellwig <hch@lst.de>
|
||||
Reviewed-by: Daniel Wagner <dwagner@suse.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Signed-off-by: Jens Axboe <axboe@kernel.dk>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
block/blk-mq.c | 97 ++++++++++++++++++++-----------------------------
|
||||
include/linux/blkdev.h | 2 -
|
||||
2 files changed, 42 insertions(+), 57 deletions(-)
|
||||
|
||||
--- a/block/blk-mq.c
|
||||
+++ b/block/blk-mq.c
|
||||
@@ -41,7 +41,7 @@
|
||||
#include "blk-mq-sched.h"
|
||||
#include "blk-rq-qos.h"
|
||||
|
||||
-static DEFINE_PER_CPU(struct list_head, blk_cpu_done);
|
||||
+static DEFINE_PER_CPU(struct llist_head, blk_cpu_done);
|
||||
|
||||
static void blk_mq_poll_stats_start(struct request_queue *q);
|
||||
static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb);
|
||||
@@ -567,68 +567,29 @@ void blk_mq_end_request(struct request *
|
||||
}
|
||||
EXPORT_SYMBOL(blk_mq_end_request);
|
||||
|
||||
-/*
|
||||
- * Softirq action handler - move entries to local list and loop over them
|
||||
- * while passing them to the queue registered handler.
|
||||
- */
|
||||
-static __latent_entropy void blk_done_softirq(struct softirq_action *h)
|
||||
+static void blk_complete_reqs(struct llist_head *list)
|
||||
{
|
||||
- struct list_head *cpu_list, local_list;
|
||||
-
|
||||
- local_irq_disable();
|
||||
- cpu_list = this_cpu_ptr(&blk_cpu_done);
|
||||
- list_replace_init(cpu_list, &local_list);
|
||||
- local_irq_enable();
|
||||
-
|
||||
- while (!list_empty(&local_list)) {
|
||||
- struct request *rq;
|
||||
+ struct llist_node *entry = llist_reverse_order(llist_del_all(list));
|
||||
+ struct request *rq, *next;
|
||||
|
||||
- rq = list_entry(local_list.next, struct request, ipi_list);
|
||||
- list_del_init(&rq->ipi_list);
|
||||
+ llist_for_each_entry_safe(rq, next, entry, ipi_list)
|
||||
rq->q->mq_ops->complete(rq);
|
||||
- }
|
||||
}
|
||||
|
||||
-static void blk_mq_trigger_softirq(struct request *rq)
|
||||
+static __latent_entropy void blk_done_softirq(struct softirq_action *h)
|
||||
{
|
||||
- struct list_head *list;
|
||||
- unsigned long flags;
|
||||
-
|
||||
- local_irq_save(flags);
|
||||
- list = this_cpu_ptr(&blk_cpu_done);
|
||||
- list_add_tail(&rq->ipi_list, list);
|
||||
-
|
||||
- /*
|
||||
- * If the list only contains our just added request, signal a raise of
|
||||
- * the softirq. If there are already entries there, someone already
|
||||
- * raised the irq but it hasn't run yet.
|
||||
- */
|
||||
- if (list->next == &rq->ipi_list)
|
||||
- raise_softirq_irqoff(BLOCK_SOFTIRQ);
|
||||
- local_irq_restore(flags);
|
||||
+ blk_complete_reqs(this_cpu_ptr(&blk_cpu_done));
|
||||
}
|
||||
|
||||
static int blk_softirq_cpu_dead(unsigned int cpu)
|
||||
{
|
||||
- /*
|
||||
- * If a CPU goes away, splice its entries to the current CPU
|
||||
- * and trigger a run of the softirq
|
||||
- */
|
||||
- local_irq_disable();
|
||||
- list_splice_init(&per_cpu(blk_cpu_done, cpu),
|
||||
- this_cpu_ptr(&blk_cpu_done));
|
||||
- raise_softirq_irqoff(BLOCK_SOFTIRQ);
|
||||
- local_irq_enable();
|
||||
-
|
||||
+ blk_complete_reqs(&per_cpu(blk_cpu_done, cpu));
|
||||
return 0;
|
||||
}
|
||||
|
||||
-
|
||||
static void __blk_mq_complete_request_remote(void *data)
|
||||
{
|
||||
- struct request *rq = data;
|
||||
-
|
||||
- blk_mq_trigger_softirq(rq);
|
||||
+ __raise_softirq_irqoff(BLOCK_SOFTIRQ);
|
||||
}
|
||||
|
||||
static inline bool blk_mq_complete_need_ipi(struct request *rq)
|
||||
@@ -657,6 +618,30 @@ static inline bool blk_mq_complete_need_
|
||||
return cpu_online(rq->mq_ctx->cpu);
|
||||
}
|
||||
|
||||
+static void blk_mq_complete_send_ipi(struct request *rq)
|
||||
+{
|
||||
+ struct llist_head *list;
|
||||
+ unsigned int cpu;
|
||||
+
|
||||
+ cpu = rq->mq_ctx->cpu;
|
||||
+ list = &per_cpu(blk_cpu_done, cpu);
|
||||
+ if (llist_add(&rq->ipi_list, list)) {
|
||||
+ INIT_CSD(&rq->csd, __blk_mq_complete_request_remote, rq);
|
||||
+ smp_call_function_single_async(cpu, &rq->csd);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+static void blk_mq_raise_softirq(struct request *rq)
|
||||
+{
|
||||
+ struct llist_head *list;
|
||||
+
|
||||
+ preempt_disable();
|
||||
+ list = this_cpu_ptr(&blk_cpu_done);
|
||||
+ if (llist_add(&rq->ipi_list, list))
|
||||
+ raise_softirq(BLOCK_SOFTIRQ);
|
||||
+ preempt_enable();
|
||||
+}
|
||||
+
|
||||
bool blk_mq_complete_request_remote(struct request *rq)
|
||||
{
|
||||
WRITE_ONCE(rq->state, MQ_RQ_COMPLETE);
|
||||
@@ -669,15 +654,15 @@ bool blk_mq_complete_request_remote(stru
|
||||
return false;
|
||||
|
||||
if (blk_mq_complete_need_ipi(rq)) {
|
||||
- INIT_CSD(&rq->csd, __blk_mq_complete_request_remote, rq);
|
||||
- smp_call_function_single_async(rq->mq_ctx->cpu, &rq->csd);
|
||||
- } else {
|
||||
- if (rq->q->nr_hw_queues > 1)
|
||||
- return false;
|
||||
- blk_mq_trigger_softirq(rq);
|
||||
+ blk_mq_complete_send_ipi(rq);
|
||||
+ return true;
|
||||
}
|
||||
|
||||
- return true;
|
||||
+ if (rq->q->nr_hw_queues == 1) {
|
||||
+ blk_mq_raise_softirq(rq);
|
||||
+ return true;
|
||||
+ }
|
||||
+ return false;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_mq_complete_request_remote);
|
||||
|
||||
@@ -3892,7 +3877,7 @@ static int __init blk_mq_init(void)
|
||||
int i;
|
||||
|
||||
for_each_possible_cpu(i)
|
||||
- INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i));
|
||||
+ init_llist_head(&per_cpu(blk_cpu_done, i));
|
||||
open_softirq(BLOCK_SOFTIRQ, blk_done_softirq);
|
||||
|
||||
cpuhp_setup_state_nocalls(CPUHP_BLOCK_SOFTIRQ_DEAD,
|
||||
--- a/include/linux/blkdev.h
|
||||
+++ b/include/linux/blkdev.h
|
||||
@@ -153,7 +153,7 @@ struct request {
|
||||
*/
|
||||
union {
|
||||
struct hlist_node hash; /* merge hash */
|
||||
- struct list_head ipi_list;
|
||||
+ struct llist_node ipi_list;
|
||||
};
|
||||
|
||||
/*
|
||||
@@ -0,0 +1,79 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Mon, 9 Nov 2020 21:30:41 +0100
|
||||
Subject: [PATCH 1/2] kthread: Move prio/affinite change into the newly created
|
||||
thread
|
||||
|
||||
With enabled threaded interrupts the nouveau driver reported the
|
||||
following:
|
||||
| Chain exists of:
|
||||
| &mm->mmap_lock#2 --> &device->mutex --> &cpuset_rwsem
|
||||
|
|
||||
| Possible unsafe locking scenario:
|
||||
|
|
||||
| CPU0 CPU1
|
||||
| ---- ----
|
||||
| lock(&cpuset_rwsem);
|
||||
| lock(&device->mutex);
|
||||
| lock(&cpuset_rwsem);
|
||||
| lock(&mm->mmap_lock#2);
|
||||
|
||||
The device->mutex is nvkm_device::mutex.
|
||||
|
||||
Unblocking the lockchain at `cpuset_rwsem' is probably the easiest thing
|
||||
to do.
|
||||
Move the priority reset to the start of the newly created thread.
|
||||
|
||||
Fixes: 710da3c8ea7df ("sched/core: Prevent race condition between cpuset and __sched_setscheduler()")
|
||||
Reported-by: Mike Galbraith <efault@gmx.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Link: https://lkml.kernel.org/r/a23a826af7c108ea5651e73b8fbae5e653f16e86.camel@gmx.de
|
||||
---
|
||||
kernel/kthread.c | 16 ++++++++--------
|
||||
1 file changed, 8 insertions(+), 8 deletions(-)
|
||||
|
||||
--- a/kernel/kthread.c
|
||||
+++ b/kernel/kthread.c
|
||||
@@ -243,6 +243,7 @@ EXPORT_SYMBOL_GPL(kthread_parkme);
|
||||
|
||||
static int kthread(void *_create)
|
||||
{
|
||||
+ static const struct sched_param param = { .sched_priority = 0 };
|
||||
/* Copy data: it's on kthread's stack */
|
||||
struct kthread_create_info *create = _create;
|
||||
int (*threadfn)(void *data) = create->threadfn;
|
||||
@@ -273,6 +274,13 @@ static int kthread(void *_create)
|
||||
init_completion(&self->parked);
|
||||
current->vfork_done = &self->exited;
|
||||
|
||||
+ /*
|
||||
+ * The new thread inherited kthreadd's priority and CPU mask. Reset
|
||||
+ * back to default in case they have been changed.
|
||||
+ */
|
||||
+ sched_setscheduler_nocheck(current, SCHED_NORMAL, ¶m);
|
||||
+ set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_FLAG_KTHREAD));
|
||||
+
|
||||
/* OK, tell user we're spawned, wait for stop or wakeup */
|
||||
__set_current_state(TASK_UNINTERRUPTIBLE);
|
||||
create->result = current;
|
||||
@@ -370,7 +378,6 @@ struct task_struct *__kthread_create_on_
|
||||
}
|
||||
task = create->result;
|
||||
if (!IS_ERR(task)) {
|
||||
- static const struct sched_param param = { .sched_priority = 0 };
|
||||
char name[TASK_COMM_LEN];
|
||||
|
||||
/*
|
||||
@@ -379,13 +386,6 @@ struct task_struct *__kthread_create_on_
|
||||
*/
|
||||
vsnprintf(name, sizeof(name), namefmt, args);
|
||||
set_task_comm(task, name);
|
||||
- /*
|
||||
- * root may have changed our (kthreadd's) priority or CPU mask.
|
||||
- * The kernel thread should not inherit these properties.
|
||||
- */
|
||||
- sched_setscheduler_nocheck(task, SCHED_NORMAL, ¶m);
|
||||
- set_cpus_allowed_ptr(task,
|
||||
- housekeeping_cpumask(HK_FLAG_KTHREAD));
|
||||
}
|
||||
kfree(create);
|
||||
return task;
|
||||
@@ -0,0 +1,55 @@
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Mon, 9 Nov 2020 23:32:39 +0100
|
||||
Subject: [PATCH 2/2] genirq: Move prio assignment into the newly created
|
||||
thread
|
||||
|
||||
With enabled threaded interrupts the nouveau driver reported the
|
||||
following:
|
||||
| Chain exists of:
|
||||
| &mm->mmap_lock#2 --> &device->mutex --> &cpuset_rwsem
|
||||
|
|
||||
| Possible unsafe locking scenario:
|
||||
|
|
||||
| CPU0 CPU1
|
||||
| ---- ----
|
||||
| lock(&cpuset_rwsem);
|
||||
| lock(&device->mutex);
|
||||
| lock(&cpuset_rwsem);
|
||||
| lock(&mm->mmap_lock#2);
|
||||
|
||||
The device->mutex is nvkm_device::mutex.
|
||||
|
||||
Unblocking the lockchain at `cpuset_rwsem' is probably the easiest thing
|
||||
to do.
|
||||
Move the priority assignment to the start of the newly created thread.
|
||||
|
||||
Fixes: 710da3c8ea7df ("sched/core: Prevent race condition between cpuset and __sched_setscheduler()")
|
||||
Reported-by: Mike Galbraith <efault@gmx.de>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
[bigeasy: Patch description]
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Link: https://lkml.kernel.org/r/a23a826af7c108ea5651e73b8fbae5e653f16e86.camel@gmx.de
|
||||
---
|
||||
kernel/irq/manage.c | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
--- a/kernel/irq/manage.c
|
||||
+++ b/kernel/irq/manage.c
|
||||
@@ -1225,6 +1225,8 @@ static int irq_thread(void *data)
|
||||
irqreturn_t (*handler_fn)(struct irq_desc *desc,
|
||||
struct irqaction *action);
|
||||
|
||||
+ sched_set_fifo(current);
|
||||
+
|
||||
if (force_irqthreads && test_bit(IRQTF_FORCED_THREAD,
|
||||
&action->thread_flags))
|
||||
handler_fn = irq_forced_thread_fn;
|
||||
@@ -1390,8 +1392,6 @@ setup_irq_thread(struct irqaction *new,
|
||||
if (IS_ERR(t))
|
||||
return PTR_ERR(t);
|
||||
|
||||
- sched_set_fifo(t);
|
||||
-
|
||||
/*
|
||||
* We keep the reference to the task struct even if
|
||||
* the thread dies to avoid that the interrupt code
|
||||
@@ -0,0 +1,123 @@
|
||||
From: Valentin Schneider <valentin.schneider@arm.com>
|
||||
Date: Sun, 22 Nov 2020 20:19:04 +0000
|
||||
Subject: [PATCH] notifier: Make atomic_notifiers use raw_spinlock
|
||||
|
||||
Booting a recent PREEMPT_RT kernel (v5.10-rc3-rt7-rebase) on my arm64 Juno
|
||||
leads to the idle task blocking on an RT sleeping spinlock down some
|
||||
notifier path:
|
||||
|
||||
[ 1.809101] BUG: scheduling while atomic: swapper/5/0/0x00000002
|
||||
[ 1.809116] Modules linked in:
|
||||
[ 1.809123] Preemption disabled at:
|
||||
[ 1.809125] secondary_start_kernel (arch/arm64/kernel/smp.c:227)
|
||||
[ 1.809146] CPU: 5 PID: 0 Comm: swapper/5 Tainted: G W 5.10.0-rc3-rt7 #168
|
||||
[ 1.809153] Hardware name: ARM Juno development board (r0) (DT)
|
||||
[ 1.809158] Call trace:
|
||||
[ 1.809160] dump_backtrace (arch/arm64/kernel/stacktrace.c:100 (discriminator 1))
|
||||
[ 1.809170] show_stack (arch/arm64/kernel/stacktrace.c:198)
|
||||
[ 1.809178] dump_stack (lib/dump_stack.c:122)
|
||||
[ 1.809188] __schedule_bug (kernel/sched/core.c:4886)
|
||||
[ 1.809197] __schedule (./arch/arm64/include/asm/preempt.h:18 kernel/sched/core.c:4913 kernel/sched/core.c:5040)
|
||||
[ 1.809204] preempt_schedule_lock (kernel/sched/core.c:5365 (discriminator 1))
|
||||
[ 1.809210] rt_spin_lock_slowlock_locked (kernel/locking/rtmutex.c:1072)
|
||||
[ 1.809217] rt_spin_lock_slowlock (kernel/locking/rtmutex.c:1110)
|
||||
[ 1.809224] rt_spin_lock (./include/linux/rcupdate.h:647 kernel/locking/rtmutex.c:1139)
|
||||
[ 1.809231] atomic_notifier_call_chain_robust (kernel/notifier.c:71 kernel/notifier.c:118 kernel/notifier.c:186)
|
||||
[ 1.809240] cpu_pm_enter (kernel/cpu_pm.c:39 kernel/cpu_pm.c:93)
|
||||
[ 1.809249] psci_enter_idle_state (drivers/cpuidle/cpuidle-psci.c:52 drivers/cpuidle/cpuidle-psci.c:129)
|
||||
[ 1.809258] cpuidle_enter_state (drivers/cpuidle/cpuidle.c:238)
|
||||
[ 1.809267] cpuidle_enter (drivers/cpuidle/cpuidle.c:353)
|
||||
[ 1.809275] do_idle (kernel/sched/idle.c:132 kernel/sched/idle.c:213 kernel/sched/idle.c:273)
|
||||
[ 1.809282] cpu_startup_entry (kernel/sched/idle.c:368 (discriminator 1))
|
||||
[ 1.809288] secondary_start_kernel (arch/arm64/kernel/smp.c:273)
|
||||
|
||||
Two points worth noting:
|
||||
|
||||
1) That this is conceptually the same issue as pointed out in:
|
||||
313c8c16ee62 ("PM / CPU: replace raw_notifier with atomic_notifier")
|
||||
2) Only the _robust() variant of atomic_notifier callchains suffer from
|
||||
this
|
||||
|
||||
AFAICT only the cpu_pm_notifier_chain really needs to be changed, but
|
||||
singling it out would mean introducing a new (truly) non-blocking API. At
|
||||
the same time, callers that are fine with any blocking within the call
|
||||
chain should use blocking notifiers, so patching up all atomic_notifier's
|
||||
doesn't seem *too* crazy to me.
|
||||
|
||||
Fixes: 70d932985757 ("notifier: Fix broken error handling pattern")
|
||||
Signed-off-by: Valentin Schneider <valentin.schneider@arm.com>
|
||||
Reviewed-by: Daniel Bristot de Oliveira <bristot@redhat.com>
|
||||
Link: https://lkml.kernel.org/r/20201122201904.30940-1-valentin.schneider@arm.com
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/notifier.h | 6 +++---
|
||||
kernel/notifier.c | 12 ++++++------
|
||||
2 files changed, 9 insertions(+), 9 deletions(-)
|
||||
|
||||
--- a/include/linux/notifier.h
|
||||
+++ b/include/linux/notifier.h
|
||||
@@ -58,7 +58,7 @@ struct notifier_block {
|
||||
};
|
||||
|
||||
struct atomic_notifier_head {
|
||||
- spinlock_t lock;
|
||||
+ raw_spinlock_t lock;
|
||||
struct notifier_block __rcu *head;
|
||||
};
|
||||
|
||||
@@ -78,7 +78,7 @@ struct srcu_notifier_head {
|
||||
};
|
||||
|
||||
#define ATOMIC_INIT_NOTIFIER_HEAD(name) do { \
|
||||
- spin_lock_init(&(name)->lock); \
|
||||
+ raw_spin_lock_init(&(name)->lock); \
|
||||
(name)->head = NULL; \
|
||||
} while (0)
|
||||
#define BLOCKING_INIT_NOTIFIER_HEAD(name) do { \
|
||||
@@ -95,7 +95,7 @@ extern void srcu_init_notifier_head(stru
|
||||
cleanup_srcu_struct(&(name)->srcu);
|
||||
|
||||
#define ATOMIC_NOTIFIER_INIT(name) { \
|
||||
- .lock = __SPIN_LOCK_UNLOCKED(name.lock), \
|
||||
+ .lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \
|
||||
.head = NULL }
|
||||
#define BLOCKING_NOTIFIER_INIT(name) { \
|
||||
.rwsem = __RWSEM_INITIALIZER((name).rwsem), \
|
||||
--- a/kernel/notifier.c
|
||||
+++ b/kernel/notifier.c
|
||||
@@ -142,9 +142,9 @@ int atomic_notifier_chain_register(struc
|
||||
unsigned long flags;
|
||||
int ret;
|
||||
|
||||
- spin_lock_irqsave(&nh->lock, flags);
|
||||
+ raw_spin_lock_irqsave(&nh->lock, flags);
|
||||
ret = notifier_chain_register(&nh->head, n);
|
||||
- spin_unlock_irqrestore(&nh->lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&nh->lock, flags);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(atomic_notifier_chain_register);
|
||||
@@ -164,9 +164,9 @@ int atomic_notifier_chain_unregister(str
|
||||
unsigned long flags;
|
||||
int ret;
|
||||
|
||||
- spin_lock_irqsave(&nh->lock, flags);
|
||||
+ raw_spin_lock_irqsave(&nh->lock, flags);
|
||||
ret = notifier_chain_unregister(&nh->head, n);
|
||||
- spin_unlock_irqrestore(&nh->lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&nh->lock, flags);
|
||||
synchronize_rcu();
|
||||
return ret;
|
||||
}
|
||||
@@ -182,9 +182,9 @@ int atomic_notifier_call_chain_robust(st
|
||||
* Musn't use RCU; because then the notifier list can
|
||||
* change between the up and down traversal.
|
||||
*/
|
||||
- spin_lock_irqsave(&nh->lock, flags);
|
||||
+ raw_spin_lock_irqsave(&nh->lock, flags);
|
||||
ret = notifier_call_chain_robust(&nh->head, val_up, val_down, v);
|
||||
- spin_unlock_irqrestore(&nh->lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&nh->lock, flags);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -0,0 +1,34 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Tue, 15 Dec 2020 15:16:45 +0100
|
||||
Subject: [PATCH 1/5] rcu: Make RCU_BOOST default on CONFIG_PREEMPT_RT
|
||||
|
||||
On PREEMPT_RT kernels, RCU callbacks are deferred to the `rcuc' kthread.
|
||||
This can stall RCU grace periods due to lengthy preemption not only of RCU
|
||||
readers but also of 'rcuc' kthreads, either of which prevent grace periods
|
||||
from completing, which can in turn result in OOM. Because PREEMPT_RT
|
||||
kernels have more kthreads that can block grace periods, it is more
|
||||
important for such kernels to enable RCU_BOOST.
|
||||
|
||||
This commit therefore makes RCU_BOOST the default on PREEMPT_RT.
|
||||
RCU_BOOST can still be manually disabled if need be.
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
kernel/rcu/Kconfig | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
--- a/kernel/rcu/Kconfig
|
||||
+++ b/kernel/rcu/Kconfig
|
||||
@@ -188,8 +188,8 @@ config RCU_FAST_NO_HZ
|
||||
|
||||
config RCU_BOOST
|
||||
bool "Enable RCU priority boosting"
|
||||
- depends on RT_MUTEXES && PREEMPT_RCU && RCU_EXPERT
|
||||
- default n
|
||||
+ depends on (RT_MUTEXES && PREEMPT_RCU && RCU_EXPERT) || PREEMPT_RT
|
||||
+ default y if PREEMPT_RT
|
||||
help
|
||||
This option boosts the priority of preempted RCU readers that
|
||||
block the current preemptible RCU grace period for too long.
|
||||
@@ -0,0 +1,57 @@
|
||||
From: Scott Wood <swood@redhat.com>
|
||||
Date: Tue, 15 Dec 2020 15:16:46 +0100
|
||||
Subject: [PATCH 2/5] rcu: Unconditionally use rcuc threads on PREEMPT_RT
|
||||
|
||||
PREEMPT_RT systems have long used the rcutree.use_softirq kernel
|
||||
boot parameter to avoid use of RCU_SOFTIRQ handlers, which can disrupt
|
||||
real-time applications by invoking callbacks during return from interrupts
|
||||
that arrived while executing time-critical code. This kernel boot
|
||||
parameter instead runs RCU core processing in an 'rcuc' kthread, thus
|
||||
allowing the scheduler to do its job of avoiding disrupting time-critical
|
||||
code.
|
||||
|
||||
This commit therefore disables the rcutree.use_softirq kernel boot
|
||||
parameter on PREEMPT_RT systems, thus forcing such systems to do RCU
|
||||
core processing in 'rcuc' kthreads. This approach has long been in
|
||||
use by users of the -rt patchset, and there have been no complaints.
|
||||
There is therefore no way for the system administrator to override this
|
||||
choice, at least without modifying and rebuilding the kernel.
|
||||
|
||||
Signed-off-by: Scott Wood <swood@redhat.com>
|
||||
[bigeasy: Reword commit message]
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
[ paulmck: Update kernel-parameters.txt accordingly. ]
|
||||
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
Documentation/admin-guide/kernel-parameters.txt | 4 ++++
|
||||
kernel/rcu/tree.c | 4 +++-
|
||||
2 files changed, 7 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/Documentation/admin-guide/kernel-parameters.txt
|
||||
+++ b/Documentation/admin-guide/kernel-parameters.txt
|
||||
@@ -4092,6 +4092,10 @@
|
||||
value, meaning that RCU_SOFTIRQ is used by default.
|
||||
Specify rcutree.use_softirq=0 to use rcuc kthreads.
|
||||
|
||||
+ But note that CONFIG_PREEMPT_RT=y kernels disable
|
||||
+ this kernel boot parameter, forcibly setting it
|
||||
+ to zero.
|
||||
+
|
||||
rcutree.rcu_fanout_exact= [KNL]
|
||||
Disable autobalancing of the rcu_node combining
|
||||
tree. This is used by rcutorture, and might
|
||||
--- a/kernel/rcu/tree.c
|
||||
+++ b/kernel/rcu/tree.c
|
||||
@@ -100,8 +100,10 @@ static struct rcu_state rcu_state = {
|
||||
static bool dump_tree;
|
||||
module_param(dump_tree, bool, 0444);
|
||||
/* By default, use RCU_SOFTIRQ instead of rcuc kthreads. */
|
||||
-static bool use_softirq = true;
|
||||
+static bool use_softirq = !IS_ENABLED(CONFIG_PREEMPT_RT);
|
||||
+#ifndef CONFIG_PREEMPT_RT
|
||||
module_param(use_softirq, bool, 0444);
|
||||
+#endif
|
||||
/* Control rcu_node-tree auto-balancing at boot time. */
|
||||
static bool rcu_fanout_exact;
|
||||
module_param(rcu_fanout_exact, bool, 0444);
|
||||
@@ -0,0 +1,62 @@
|
||||
From: Julia Cartwright <julia@ni.com>
|
||||
Date: Tue, 15 Dec 2020 15:16:47 +0100
|
||||
Subject: [PATCH 3/5] rcu: Enable rcu_normal_after_boot unconditionally for RT
|
||||
|
||||
Expedited RCU grace periods send IPIs to all non-idle CPUs, and thus can
|
||||
disrupt time-critical code in real-time applications. However, there
|
||||
is a portion of boot-time processing (presumably before any real-time
|
||||
applications have started) where expedited RCU grace periods are the only
|
||||
option. And so it is that experience with the -rt patchset indicates that
|
||||
PREEMPT_RT systems should always set the rcupdate.rcu_normal_after_boot
|
||||
kernel boot parameter.
|
||||
|
||||
This commit therefore makes the post-boot application environment safe
|
||||
for real-time applications by making PREEMPT_RT systems disable the
|
||||
rcupdate.rcu_normal_after_boot kernel boot parameter and acting as
|
||||
if this parameter had been set. This means that post-boot calls to
|
||||
synchronize_rcu_expedited() will be treated as if they were instead
|
||||
calls to synchronize_rcu(), thus preventing the IPIs, and thus avoiding
|
||||
disrupting real-time applications.
|
||||
|
||||
Suggested-by: Luiz Capitulino <lcapitulino@redhat.com>
|
||||
Acked-by: Paul E. McKenney <paulmck@linux.ibm.com>
|
||||
Signed-off-by: Julia Cartwright <julia@ni.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
[ paulmck: Update kernel-parameters.txt accordingly. ]
|
||||
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
Documentation/admin-guide/kernel-parameters.txt | 7 +++++++
|
||||
kernel/rcu/update.c | 4 +++-
|
||||
2 files changed, 10 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/Documentation/admin-guide/kernel-parameters.txt
|
||||
+++ b/Documentation/admin-guide/kernel-parameters.txt
|
||||
@@ -4474,6 +4474,13 @@
|
||||
only normal grace-period primitives. No effect
|
||||
on CONFIG_TINY_RCU kernels.
|
||||
|
||||
+ But note that CONFIG_PREEMPT_RT=y kernels enables
|
||||
+ this kernel boot parameter, forcibly setting
|
||||
+ it to the value one, that is, converting any
|
||||
+ post-boot attempt at an expedited RCU grace
|
||||
+ period to instead use normal non-expedited
|
||||
+ grace-period processing.
|
||||
+
|
||||
rcupdate.rcu_task_ipi_delay= [KNL]
|
||||
Set time in jiffies during which RCU tasks will
|
||||
avoid sending IPIs, starting with the beginning
|
||||
--- a/kernel/rcu/update.c
|
||||
+++ b/kernel/rcu/update.c
|
||||
@@ -56,8 +56,10 @@
|
||||
#ifndef CONFIG_TINY_RCU
|
||||
module_param(rcu_expedited, int, 0);
|
||||
module_param(rcu_normal, int, 0);
|
||||
-static int rcu_normal_after_boot;
|
||||
+static int rcu_normal_after_boot = IS_ENABLED(CONFIG_PREEMPT_RT);
|
||||
+#ifndef CONFIG_PREEMPT_RT
|
||||
module_param(rcu_normal_after_boot, int, 0);
|
||||
+#endif
|
||||
#endif /* #ifndef CONFIG_TINY_RCU */
|
||||
|
||||
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
||||
@@ -0,0 +1,28 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Tue, 15 Dec 2020 15:16:48 +0100
|
||||
Subject: [PATCH 4/5] doc: Update RCU's requirements page about the PREEMPT_RT
|
||||
wiki.
|
||||
|
||||
The PREEMPT_RT wiki moved from kernel.org to the Linux Foundation wiki.
|
||||
The kernel.org wiki is read only.
|
||||
|
||||
This commit therefore updates the URL of the active PREEMPT_RT wiki.
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
Documentation/RCU/Design/Requirements/Requirements.rst | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/Documentation/RCU/Design/Requirements/Requirements.rst
|
||||
+++ b/Documentation/RCU/Design/Requirements/Requirements.rst
|
||||
@@ -2319,7 +2319,7 @@ decides to throw at it.
|
||||
|
||||
The Linux kernel is used for real-time workloads, especially in
|
||||
conjunction with the `-rt
|
||||
-patchset <https://rt.wiki.kernel.org/index.php/Main_Page>`__. The
|
||||
+patchset <https://wiki.linuxfoundation.org/realtime/>`__. The
|
||||
real-time-latency response requirements are such that the traditional
|
||||
approach of disabling preemption across RCU read-side critical sections
|
||||
is inappropriate. Kernels built with ``CONFIG_PREEMPT=y`` therefore use
|
||||
@@ -0,0 +1,233 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Tue, 15 Dec 2020 15:16:49 +0100
|
||||
Subject: [PATCH 5/5] doc: Use CONFIG_PREEMPTION
|
||||
|
||||
CONFIG_PREEMPTION is selected by CONFIG_PREEMPT and by CONFIG_PREEMPT_RT.
|
||||
Both PREEMPT and PREEMPT_RT require the same functionality which today
|
||||
depends on CONFIG_PREEMPT.
|
||||
|
||||
Update the documents and mention CONFIG_PREEMPTION. Spell out
|
||||
CONFIG_PREEMPT_RT (instead PREEMPT_RT) since it is an option now.
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.rst | 4 -
|
||||
Documentation/RCU/Design/Requirements/Requirements.rst | 24 +++++-----
|
||||
Documentation/RCU/checklist.rst | 2
|
||||
Documentation/RCU/rcubarrier.rst | 6 +-
|
||||
Documentation/RCU/stallwarn.rst | 4 -
|
||||
Documentation/RCU/whatisRCU.rst | 10 ++--
|
||||
6 files changed, 25 insertions(+), 25 deletions(-)
|
||||
|
||||
--- a/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.rst
|
||||
+++ b/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.rst
|
||||
@@ -38,7 +38,7 @@ sections.
|
||||
RCU-preempt Expedited Grace Periods
|
||||
===================================
|
||||
|
||||
-``CONFIG_PREEMPT=y`` kernels implement RCU-preempt.
|
||||
+``CONFIG_PREEMPTION=y`` kernels implement RCU-preempt.
|
||||
The overall flow of the handling of a given CPU by an RCU-preempt
|
||||
expedited grace period is shown in the following diagram:
|
||||
|
||||
@@ -112,7 +112,7 @@ things.
|
||||
RCU-sched Expedited Grace Periods
|
||||
---------------------------------
|
||||
|
||||
-``CONFIG_PREEMPT=n`` kernels implement RCU-sched. The overall flow of
|
||||
+``CONFIG_PREEMPTION=n`` kernels implement RCU-sched. The overall flow of
|
||||
the handling of a given CPU by an RCU-sched expedited grace period is
|
||||
shown in the following diagram:
|
||||
|
||||
--- a/Documentation/RCU/Design/Requirements/Requirements.rst
|
||||
+++ b/Documentation/RCU/Design/Requirements/Requirements.rst
|
||||
@@ -78,7 +78,7 @@ RCU treats a nested set as one big RCU r
|
||||
Production-quality implementations of ``rcu_read_lock()`` and
|
||||
``rcu_read_unlock()`` are extremely lightweight, and in fact have
|
||||
exactly zero overhead in Linux kernels built for production use with
|
||||
-``CONFIG_PREEMPT=n``.
|
||||
+``CONFIG_PREEMPTION=n``.
|
||||
|
||||
This guarantee allows ordering to be enforced with extremely low
|
||||
overhead to readers, for example:
|
||||
@@ -1182,7 +1182,7 @@ and has become decreasingly so as memory
|
||||
costs have plummeted. However, as I learned from Matt Mackall's
|
||||
`bloatwatch <http://elinux.org/Linux_Tiny-FAQ>`__ efforts, memory
|
||||
footprint is critically important on single-CPU systems with
|
||||
-non-preemptible (``CONFIG_PREEMPT=n``) kernels, and thus `tiny
|
||||
+non-preemptible (``CONFIG_PREEMPTION=n``) kernels, and thus `tiny
|
||||
RCU <https://lkml.kernel.org/g/20090113221724.GA15307@linux.vnet.ibm.com>`__
|
||||
was born. Josh Triplett has since taken over the small-memory banner
|
||||
with his `Linux kernel tinification <https://tiny.wiki.kernel.org/>`__
|
||||
@@ -1498,7 +1498,7 @@ limitations.
|
||||
|
||||
Implementations of RCU for which ``rcu_read_lock()`` and
|
||||
``rcu_read_unlock()`` generate no code, such as Linux-kernel RCU when
|
||||
-``CONFIG_PREEMPT=n``, can be nested arbitrarily deeply. After all, there
|
||||
+``CONFIG_PREEMPTION=n``, can be nested arbitrarily deeply. After all, there
|
||||
is no overhead. Except that if all these instances of
|
||||
``rcu_read_lock()`` and ``rcu_read_unlock()`` are visible to the
|
||||
compiler, compilation will eventually fail due to exhausting memory,
|
||||
@@ -1771,7 +1771,7 @@ implementation can be a no-op.
|
||||
|
||||
However, once the scheduler has spawned its first kthread, this early
|
||||
boot trick fails for ``synchronize_rcu()`` (as well as for
|
||||
-``synchronize_rcu_expedited()``) in ``CONFIG_PREEMPT=y`` kernels. The
|
||||
+``synchronize_rcu_expedited()``) in ``CONFIG_PREEMPTION=y`` kernels. The
|
||||
reason is that an RCU read-side critical section might be preempted,
|
||||
which means that a subsequent ``synchronize_rcu()`` really does have to
|
||||
wait for something, as opposed to simply returning immediately.
|
||||
@@ -2040,7 +2040,7 @@ The compiler must not be permitted to tr
|
||||
5 rcu_read_unlock();
|
||||
6 do_something_with(v, user_v);
|
||||
|
||||
-If the compiler did make this transformation in a ``CONFIG_PREEMPT=n`` kernel
|
||||
+If the compiler did make this transformation in a ``CONFIG_PREEMPTION=n`` kernel
|
||||
build, and if ``get_user()`` did page fault, the result would be a quiescent
|
||||
state in the middle of an RCU read-side critical section. This misplaced
|
||||
quiescent state could result in line 4 being a use-after-free access,
|
||||
@@ -2322,7 +2322,7 @@ conjunction with the `-rt
|
||||
patchset <https://wiki.linuxfoundation.org/realtime/>`__. The
|
||||
real-time-latency response requirements are such that the traditional
|
||||
approach of disabling preemption across RCU read-side critical sections
|
||||
-is inappropriate. Kernels built with ``CONFIG_PREEMPT=y`` therefore use
|
||||
+is inappropriate. Kernels built with ``CONFIG_PREEMPTION=y`` therefore use
|
||||
an RCU implementation that allows RCU read-side critical sections to be
|
||||
preempted. This requirement made its presence known after users made it
|
||||
clear that an earlier `real-time
|
||||
@@ -2444,7 +2444,7 @@ includes ``rcu_read_lock_bh()``, ``rcu_r
|
||||
``call_rcu_bh()``, ``rcu_barrier_bh()``, and
|
||||
``rcu_read_lock_bh_held()``. However, the update-side APIs are now
|
||||
simple wrappers for other RCU flavors, namely RCU-sched in
|
||||
-CONFIG_PREEMPT=n kernels and RCU-preempt otherwise.
|
||||
+CONFIG_PREEMPTION=n kernels and RCU-preempt otherwise.
|
||||
|
||||
Sched Flavor (Historical)
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
@@ -2462,11 +2462,11 @@ not have this property, given that any p
|
||||
RCU read-side critical section can be a quiescent state. Therefore,
|
||||
*RCU-sched* was created, which follows “classic” RCU in that an
|
||||
RCU-sched grace period waits for pre-existing interrupt and NMI
|
||||
-handlers. In kernels built with ``CONFIG_PREEMPT=n``, the RCU and
|
||||
+handlers. In kernels built with ``CONFIG_PREEMPTION=n``, the RCU and
|
||||
RCU-sched APIs have identical implementations, while kernels built with
|
||||
-``CONFIG_PREEMPT=y`` provide a separate implementation for each.
|
||||
+``CONFIG_PREEMPTION=y`` provide a separate implementation for each.
|
||||
|
||||
-Note well that in ``CONFIG_PREEMPT=y`` kernels,
|
||||
+Note well that in ``CONFIG_PREEMPTION=y`` kernels,
|
||||
``rcu_read_lock_sched()`` and ``rcu_read_unlock_sched()`` disable and
|
||||
re-enable preemption, respectively. This means that if there was a
|
||||
preemption attempt during the RCU-sched read-side critical section,
|
||||
@@ -2629,10 +2629,10 @@ userspace execution also delimit tasks-R
|
||||
|
||||
The tasks-RCU API is quite compact, consisting only of
|
||||
``call_rcu_tasks()``, ``synchronize_rcu_tasks()``, and
|
||||
-``rcu_barrier_tasks()``. In ``CONFIG_PREEMPT=n`` kernels, trampolines
|
||||
+``rcu_barrier_tasks()``. In ``CONFIG_PREEMPTION=n`` kernels, trampolines
|
||||
cannot be preempted, so these APIs map to ``call_rcu()``,
|
||||
``synchronize_rcu()``, and ``rcu_barrier()``, respectively. In
|
||||
-``CONFIG_PREEMPT=y`` kernels, trampolines can be preempted, and these
|
||||
+``CONFIG_PREEMPTION=y`` kernels, trampolines can be preempted, and these
|
||||
three APIs are therefore implemented by separate functions that check
|
||||
for voluntary context switches.
|
||||
|
||||
--- a/Documentation/RCU/checklist.rst
|
||||
+++ b/Documentation/RCU/checklist.rst
|
||||
@@ -214,7 +214,7 @@ over a rather long period of time, but i
|
||||
the rest of the system.
|
||||
|
||||
7. As of v4.20, a given kernel implements only one RCU flavor,
|
||||
- which is RCU-sched for PREEMPT=n and RCU-preempt for PREEMPT=y.
|
||||
+ which is RCU-sched for PREEMPTION=n and RCU-preempt for PREEMPTION=y.
|
||||
If the updater uses call_rcu() or synchronize_rcu(),
|
||||
then the corresponding readers my use rcu_read_lock() and
|
||||
rcu_read_unlock(), rcu_read_lock_bh() and rcu_read_unlock_bh(),
|
||||
--- a/Documentation/RCU/rcubarrier.rst
|
||||
+++ b/Documentation/RCU/rcubarrier.rst
|
||||
@@ -9,7 +9,7 @@ RCU (read-copy update) is a synchronizat
|
||||
of as a replacement for read-writer locking (among other things), but with
|
||||
very low-overhead readers that are immune to deadlock, priority inversion,
|
||||
and unbounded latency. RCU read-side critical sections are delimited
|
||||
-by rcu_read_lock() and rcu_read_unlock(), which, in non-CONFIG_PREEMPT
|
||||
+by rcu_read_lock() and rcu_read_unlock(), which, in non-CONFIG_PREEMPTION
|
||||
kernels, generate no code whatsoever.
|
||||
|
||||
This means that RCU writers are unaware of the presence of concurrent
|
||||
@@ -329,10 +329,10 @@ Answer: This cannot happen. The reason i
|
||||
to smp_call_function() and further to smp_call_function_on_cpu(),
|
||||
causing this latter to spin until the cross-CPU invocation of
|
||||
rcu_barrier_func() has completed. This by itself would prevent
|
||||
- a grace period from completing on non-CONFIG_PREEMPT kernels,
|
||||
+ a grace period from completing on non-CONFIG_PREEMPTION kernels,
|
||||
since each CPU must undergo a context switch (or other quiescent
|
||||
state) before the grace period can complete. However, this is
|
||||
- of no use in CONFIG_PREEMPT kernels.
|
||||
+ of no use in CONFIG_PREEMPTION kernels.
|
||||
|
||||
Therefore, on_each_cpu() disables preemption across its call
|
||||
to smp_call_function() and also across the local call to
|
||||
--- a/Documentation/RCU/stallwarn.rst
|
||||
+++ b/Documentation/RCU/stallwarn.rst
|
||||
@@ -25,7 +25,7 @@ So your kernel printed an RCU CPU stall
|
||||
|
||||
- A CPU looping with bottom halves disabled.
|
||||
|
||||
-- For !CONFIG_PREEMPT kernels, a CPU looping anywhere in the kernel
|
||||
+- For !CONFIG_PREEMPTION kernels, a CPU looping anywhere in the kernel
|
||||
without invoking schedule(). If the looping in the kernel is
|
||||
really expected and desirable behavior, you might need to add
|
||||
some calls to cond_resched().
|
||||
@@ -44,7 +44,7 @@ So your kernel printed an RCU CPU stall
|
||||
result in the ``rcu_.*kthread starved for`` console-log message,
|
||||
which will include additional debugging information.
|
||||
|
||||
-- A CPU-bound real-time task in a CONFIG_PREEMPT kernel, which might
|
||||
+- A CPU-bound real-time task in a CONFIG_PREEMPTION kernel, which might
|
||||
happen to preempt a low-priority task in the middle of an RCU
|
||||
read-side critical section. This is especially damaging if
|
||||
that low-priority task is not permitted to run on any other CPU,
|
||||
--- a/Documentation/RCU/whatisRCU.rst
|
||||
+++ b/Documentation/RCU/whatisRCU.rst
|
||||
@@ -683,7 +683,7 @@ so there can be no deadlock cycle.
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
This section presents a "toy" RCU implementation that is based on
|
||||
"classic RCU". It is also short on performance (but only for updates) and
|
||||
-on features such as hotplug CPU and the ability to run in CONFIG_PREEMPT
|
||||
+on features such as hotplug CPU and the ability to run in CONFIG_PREEMPTION
|
||||
kernels. The definitions of rcu_dereference() and rcu_assign_pointer()
|
||||
are the same as those shown in the preceding section, so they are omitted.
|
||||
::
|
||||
@@ -739,7 +739,7 @@ to that data item, so we can safely recl
|
||||
Quick Quiz #3:
|
||||
If it is illegal to block in an RCU read-side
|
||||
critical section, what the heck do you do in
|
||||
- PREEMPT_RT, where normal spinlocks can block???
|
||||
+ CONFIG_PREEMPT_RT, where normal spinlocks can block???
|
||||
|
||||
:ref:`Answers to Quick Quiz <8_whatisRCU>`
|
||||
|
||||
@@ -1093,7 +1093,7 @@ the right tool for your job.
|
||||
overhead is **negative**.
|
||||
|
||||
Answer:
|
||||
- Imagine a single-CPU system with a non-CONFIG_PREEMPT
|
||||
+ Imagine a single-CPU system with a non-CONFIG_PREEMPTION
|
||||
kernel where a routing table is used by process-context
|
||||
code, but can be updated by irq-context code (for example,
|
||||
by an "ICMP REDIRECT" packet). The usual way of handling
|
||||
@@ -1120,10 +1120,10 @@ the right tool for your job.
|
||||
Quick Quiz #3:
|
||||
If it is illegal to block in an RCU read-side
|
||||
critical section, what the heck do you do in
|
||||
- PREEMPT_RT, where normal spinlocks can block???
|
||||
+ CONFIG_PREEMPT_RT, where normal spinlocks can block???
|
||||
|
||||
Answer:
|
||||
- Just as PREEMPT_RT permits preemption of spinlock
|
||||
+ Just as CONFIG_PREEMPT_RT permits preemption of spinlock
|
||||
critical sections, it permits preemption of RCU
|
||||
read-side critical sections. It also permits
|
||||
spinlocks blocking while in RCU read-side critical
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,173 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Mon, 25 Jan 2021 20:45:09 +0100
|
||||
Subject: [PATCH 2/4] tracing: Inline tracing_gen_ctx_flags()
|
||||
|
||||
Inline tracing_gen_ctx_flags(). This allows to have one ifdef
|
||||
CONFIG_TRACE_IRQFLAGS_SUPPORT.
|
||||
|
||||
This requires to move `trace_flag_type' so tracing_gen_ctx_flags() can
|
||||
use it.
|
||||
|
||||
Link: https://lkml.kernel.org/r/20210125194511.3924915-3-bigeasy@linutronix.de
|
||||
|
||||
Suggested-by: Steven Rostedt <rostedt@goodmis.org>
|
||||
Link: https://lkml.kernel.org/r/20210125140323.6b1ff20c@gandalf.local.home
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/trace_events.h | 54 ++++++++++++++++++++++++++++++++++++++++---
|
||||
kernel/trace/trace.c | 38 +-----------------------------
|
||||
kernel/trace/trace.h | 19 ---------------
|
||||
3 files changed, 53 insertions(+), 58 deletions(-)
|
||||
|
||||
--- a/include/linux/trace_events.h
|
||||
+++ b/include/linux/trace_events.h
|
||||
@@ -160,9 +160,57 @@ static inline void tracing_generic_entry
|
||||
entry->flags = trace_ctx >> 16;
|
||||
}
|
||||
|
||||
-unsigned int tracing_gen_ctx_flags(unsigned long irqflags);
|
||||
-unsigned int tracing_gen_ctx(void);
|
||||
-unsigned int tracing_gen_ctx_dec(void);
|
||||
+unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status);
|
||||
+
|
||||
+enum trace_flag_type {
|
||||
+ TRACE_FLAG_IRQS_OFF = 0x01,
|
||||
+ TRACE_FLAG_IRQS_NOSUPPORT = 0x02,
|
||||
+ TRACE_FLAG_NEED_RESCHED = 0x04,
|
||||
+ TRACE_FLAG_HARDIRQ = 0x08,
|
||||
+ TRACE_FLAG_SOFTIRQ = 0x10,
|
||||
+ TRACE_FLAG_PREEMPT_RESCHED = 0x20,
|
||||
+ TRACE_FLAG_NMI = 0x40,
|
||||
+};
|
||||
+
|
||||
+#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
|
||||
+static inline unsigned int tracing_gen_ctx_flags(unsigned long irqflags)
|
||||
+{
|
||||
+ unsigned int irq_status = irqs_disabled_flags(irqflags) ?
|
||||
+ TRACE_FLAG_IRQS_OFF : 0;
|
||||
+ return tracing_gen_ctx_irq_test(irq_status);
|
||||
+}
|
||||
+static inline unsigned int tracing_gen_ctx(void)
|
||||
+{
|
||||
+ unsigned long irqflags;
|
||||
+
|
||||
+ local_save_flags(irqflags);
|
||||
+ return tracing_gen_ctx_flags(irqflags);
|
||||
+}
|
||||
+#else
|
||||
+
|
||||
+static inline unsigned int tracing_gen_ctx_flags(unsigned long irqflags)
|
||||
+{
|
||||
+ return tracing_gen_ctx_irq_test(TRACE_FLAG_IRQS_NOSUPPORT);
|
||||
+}
|
||||
+static inline unsigned int tracing_gen_ctx(void)
|
||||
+{
|
||||
+ return tracing_gen_ctx_irq_test(TRACE_FLAG_IRQS_NOSUPPORT);
|
||||
+}
|
||||
+#endif
|
||||
+
|
||||
+static inline unsigned int tracing_gen_ctx_dec(void)
|
||||
+{
|
||||
+ unsigned int trace_ctx;
|
||||
+
|
||||
+ trace_ctx = tracing_gen_ctx();
|
||||
+ /*
|
||||
+ * Subtract one from the preeption counter if preemption is enabled,
|
||||
+ * see trace_event_buffer_reserve()for details.
|
||||
+ */
|
||||
+ if (IS_ENABLED(CONFIG_PREEMPTION))
|
||||
+ trace_ctx--;
|
||||
+ return trace_ctx;
|
||||
+}
|
||||
|
||||
struct trace_event_file;
|
||||
|
||||
--- a/kernel/trace/trace.c
|
||||
+++ b/kernel/trace/trace.c
|
||||
@@ -2578,20 +2578,13 @@ enum print_line_t trace_handle_return(st
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(trace_handle_return);
|
||||
|
||||
-unsigned int tracing_gen_ctx_flags(unsigned long irqflags)
|
||||
+unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
|
||||
{
|
||||
- unsigned int trace_flags = 0;
|
||||
+ unsigned int trace_flags = irqs_status;
|
||||
unsigned int pc;
|
||||
|
||||
pc = preempt_count();
|
||||
|
||||
-#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
|
||||
- if (irqs_disabled_flags(irqflags))
|
||||
- trace_flags |= TRACE_FLAG_IRQS_OFF;
|
||||
-#else
|
||||
- trace_flags |= TRACE_FLAG_IRQS_NOSUPPORT;
|
||||
-#endif
|
||||
-
|
||||
if (pc & NMI_MASK)
|
||||
trace_flags |= TRACE_FLAG_NMI;
|
||||
if (pc & HARDIRQ_MASK)
|
||||
@@ -2607,33 +2600,6 @@ unsigned int tracing_gen_ctx_flags(unsig
|
||||
return (trace_flags << 16) | (pc & 0xff);
|
||||
}
|
||||
|
||||
-unsigned int tracing_gen_ctx(void)
|
||||
-{
|
||||
- unsigned long irqflags;
|
||||
-
|
||||
-#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
|
||||
- local_save_flags(irqflags);
|
||||
-#else
|
||||
- irqflags = 0;
|
||||
-#endif
|
||||
- return tracing_gen_ctx_flags(irqflags);
|
||||
-}
|
||||
-
|
||||
-unsigned int tracing_gen_ctx_dec(void)
|
||||
-{
|
||||
- unsigned int trace_ctx;
|
||||
-
|
||||
- trace_ctx = tracing_gen_ctx();
|
||||
-
|
||||
- /*
|
||||
- * Subtract one from the preeption counter if preemption is enabled,
|
||||
- * see trace_event_buffer_reserve()for details.
|
||||
- */
|
||||
- if (IS_ENABLED(CONFIG_PREEMPTION))
|
||||
- trace_ctx--;
|
||||
- return trace_ctx;
|
||||
-}
|
||||
-
|
||||
struct ring_buffer_event *
|
||||
trace_buffer_lock_reserve(struct trace_buffer *buffer,
|
||||
int type,
|
||||
--- a/kernel/trace/trace.h
|
||||
+++ b/kernel/trace/trace.h
|
||||
@@ -136,25 +136,6 @@ struct kretprobe_trace_entry_head {
|
||||
unsigned long ret_ip;
|
||||
};
|
||||
|
||||
-/*
|
||||
- * trace_flag_type is an enumeration that holds different
|
||||
- * states when a trace occurs. These are:
|
||||
- * IRQS_OFF - interrupts were disabled
|
||||
- * IRQS_NOSUPPORT - arch does not support irqs_disabled_flags
|
||||
- * NEED_RESCHED - reschedule is requested
|
||||
- * HARDIRQ - inside an interrupt handler
|
||||
- * SOFTIRQ - inside a softirq handler
|
||||
- */
|
||||
-enum trace_flag_type {
|
||||
- TRACE_FLAG_IRQS_OFF = 0x01,
|
||||
- TRACE_FLAG_IRQS_NOSUPPORT = 0x02,
|
||||
- TRACE_FLAG_NEED_RESCHED = 0x04,
|
||||
- TRACE_FLAG_HARDIRQ = 0x08,
|
||||
- TRACE_FLAG_SOFTIRQ = 0x10,
|
||||
- TRACE_FLAG_PREEMPT_RESCHED = 0x20,
|
||||
- TRACE_FLAG_NMI = 0x40,
|
||||
-};
|
||||
-
|
||||
#define TRACE_BUF_SIZE 1024
|
||||
|
||||
struct trace_array;
|
||||
@@ -0,0 +1,41 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Mon, 25 Jan 2021 20:45:10 +0100
|
||||
Subject: [PATCH 3/4] tracing: Use in_serving_softirq() to deduct softirq
|
||||
status.
|
||||
|
||||
PREEMPT_RT does not report "serving softirq" because the tracing core
|
||||
looks at the preemption counter while PREEMPT_RT does not update it
|
||||
while processing softirqs in order to remain preemptible. The
|
||||
information is stored somewhere else.
|
||||
The in_serving_softirq() macro and the SOFTIRQ_OFFSET define are still
|
||||
working but not on the preempt-counter.
|
||||
|
||||
Use in_serving_softirq() macro which works on PREEMPT_RT. On !PREEMPT_RT
|
||||
the compiler (gcc-10 / clang-11) is smart enough to optimize the
|
||||
in_serving_softirq() related read of the preemption counter away.
|
||||
The only difference I noticed by using in_serving_softirq() on
|
||||
!PREEMPT_RT is that gcc-10 implemented tracing_gen_ctx_flags() as
|
||||
reading FLAG, jmp _tracing_gen_ctx_flags(). Without in_serving_softirq()
|
||||
it inlined _tracing_gen_ctx_flags() into tracing_gen_ctx_flags().
|
||||
|
||||
Link: https://lkml.kernel.org/r/20210125194511.3924915-4-bigeasy@linutronix.de
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
kernel/trace/trace.c | 3 +--
|
||||
1 file changed, 1 insertion(+), 2 deletions(-)
|
||||
|
||||
--- a/kernel/trace/trace.c
|
||||
+++ b/kernel/trace/trace.c
|
||||
@@ -2589,8 +2589,7 @@ unsigned int tracing_gen_ctx_irq_test(un
|
||||
trace_flags |= TRACE_FLAG_NMI;
|
||||
if (pc & HARDIRQ_MASK)
|
||||
trace_flags |= TRACE_FLAG_HARDIRQ;
|
||||
-
|
||||
- if (pc & SOFTIRQ_OFFSET)
|
||||
+ if (in_serving_softirq())
|
||||
trace_flags |= TRACE_FLAG_SOFTIRQ;
|
||||
|
||||
if (tif_need_resched())
|
||||
@@ -0,0 +1,36 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Mon, 25 Jan 2021 20:45:11 +0100
|
||||
Subject: [PATCH 4/4] tracing: Remove NULL check from current in
|
||||
tracing_generic_entry_update().
|
||||
|
||||
I can't imagine when or why `current' would return a NULL pointer. This
|
||||
check was added in commit
|
||||
72829bc3d63cd ("ftrace: move enums to ftrace.h and make helper function global")
|
||||
|
||||
but it doesn't give me hint why it was needed.
|
||||
|
||||
Assume `current' never returns a NULL pointer and remove the check.
|
||||
|
||||
Link: https://lkml.kernel.org/r/20210125194511.3924915-5-bigeasy@linutronix.de
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/trace_events.h | 4 +---
|
||||
1 file changed, 1 insertion(+), 3 deletions(-)
|
||||
|
||||
--- a/include/linux/trace_events.h
|
||||
+++ b/include/linux/trace_events.h
|
||||
@@ -152,10 +152,8 @@ static inline void tracing_generic_entry
|
||||
unsigned short type,
|
||||
unsigned int trace_ctx)
|
||||
{
|
||||
- struct task_struct *tsk = current;
|
||||
-
|
||||
entry->preempt_count = trace_ctx & 0xff;
|
||||
- entry->pid = (tsk) ? tsk->pid : 0;
|
||||
+ entry->pid = current->pid;
|
||||
entry->type = type;
|
||||
entry->flags = trace_ctx >> 16;
|
||||
}
|
||||
@@ -0,0 +1,37 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Fri, 19 Feb 2021 17:51:07 +0100
|
||||
Subject: [PATCH] powerpc/mm: Move the linear_mapping_mutex to the ifdef where
|
||||
it is used
|
||||
|
||||
The mutex linear_mapping_mutex is defined at the of the file while its
|
||||
only two user are within the CONFIG_MEMORY_HOTPLUG block.
|
||||
A compile without CONFIG_MEMORY_HOTPLUG set fails on PREEMPT_RT because
|
||||
its mutex implementation is smart enough to realize that it is unused.
|
||||
|
||||
Move the definition of linear_mapping_mutex to ifdef block where it is
|
||||
used.
|
||||
|
||||
Fixes: 1f73ad3e8d755 ("powerpc/mm: print warning in arch_remove_linear_mapping()")
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
arch/powerpc/mm/mem.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/arch/powerpc/mm/mem.c
|
||||
+++ b/arch/powerpc/mm/mem.c
|
||||
@@ -54,7 +54,6 @@
|
||||
|
||||
#include <mm/mmu_decl.h>
|
||||
|
||||
-static DEFINE_MUTEX(linear_mapping_mutex);
|
||||
unsigned long long memory_limit;
|
||||
bool init_mem_is_free;
|
||||
|
||||
@@ -72,6 +71,7 @@ pgprot_t phys_mem_access_prot(struct fil
|
||||
EXPORT_SYMBOL(phys_mem_access_prot);
|
||||
|
||||
#ifdef CONFIG_MEMORY_HOTPLUG
|
||||
+static DEFINE_MUTEX(linear_mapping_mutex);
|
||||
|
||||
#ifdef CONFIG_NUMA
|
||||
int memory_add_physaddr_to_nid(u64 start)
|
||||
@@ -0,0 +1,49 @@
|
||||
From: John Ogness <john.ogness@linutronix.de>
|
||||
Date: Wed, 17 Feb 2021 16:15:31 +0100
|
||||
Subject: [PATCH 02/28] printk: limit second loop of syslog_print_all
|
||||
|
||||
The second loop of syslog_print_all() subtracts lengths that were
|
||||
added in the first loop. With commit b031a684bfd0 ("printk: remove
|
||||
logbuf_lock writer-protection of ringbuffer") it is possible that
|
||||
records are (over)written during syslog_print_all(). This allows the
|
||||
possibility of the second loop subtracting lengths that were never
|
||||
added in the first loop.
|
||||
|
||||
This situation can result in syslog_print_all() filling the buffer
|
||||
starting from a later record, even though there may have been room
|
||||
to fit the earlier record(s) as well.
|
||||
|
||||
Fixes: b031a684bfd0 ("printk: remove logbuf_lock writer-protection of ringbuffer")
|
||||
Signed-off-by: John Ogness <john.ogness@linutronix.de>
|
||||
Reviewed-by: Petr Mladek <pmladek@suse.com>
|
||||
---
|
||||
kernel/printk/printk.c | 9 ++++++++-
|
||||
1 file changed, 8 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/kernel/printk/printk.c
|
||||
+++ b/kernel/printk/printk.c
|
||||
@@ -1494,6 +1494,7 @@ static int syslog_print_all(char __user
|
||||
struct printk_info info;
|
||||
unsigned int line_count;
|
||||
struct printk_record r;
|
||||
+ u64 max_seq;
|
||||
char *text;
|
||||
int len = 0;
|
||||
u64 seq;
|
||||
@@ -1512,9 +1513,15 @@ static int syslog_print_all(char __user
|
||||
prb_for_each_info(clear_seq, prb, seq, &info, &line_count)
|
||||
len += get_record_print_text_size(&info, line_count, true, time);
|
||||
|
||||
+ /*
|
||||
+ * Set an upper bound for the next loop to avoid subtracting lengths
|
||||
+ * that were never added.
|
||||
+ */
|
||||
+ max_seq = seq;
|
||||
+
|
||||
/* move first record forward until length fits into the buffer */
|
||||
prb_for_each_info(clear_seq, prb, seq, &info, &line_count) {
|
||||
- if (len <= size)
|
||||
+ if (len <= size || info.seq >= max_seq)
|
||||
break;
|
||||
len -= get_record_print_text_size(&info, line_count, true, time);
|
||||
}
|
||||
@@ -0,0 +1,36 @@
|
||||
From: John Ogness <john.ogness@linutronix.de>
|
||||
Date: Mon, 21 Dec 2020 11:19:39 +0106
|
||||
Subject: [PATCH 03/28] printk: kmsg_dump: remove unused fields
|
||||
|
||||
struct kmsg_dumper still contains some fields that were used to
|
||||
iterate the old ringbuffer. They are no longer used. Remove them
|
||||
and update the struct documentation.
|
||||
|
||||
Signed-off-by: John Ogness <john.ogness@linutronix.de>
|
||||
Reviewed-by: Petr Mladek <pmladek@suse.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/kmsg_dump.h | 5 +++--
|
||||
1 file changed, 3 insertions(+), 2 deletions(-)
|
||||
|
||||
--- a/include/linux/kmsg_dump.h
|
||||
+++ b/include/linux/kmsg_dump.h
|
||||
@@ -36,6 +36,9 @@ enum kmsg_dump_reason {
|
||||
* through the record iterator
|
||||
* @max_reason: filter for highest reason number that should be dumped
|
||||
* @registered: Flag that specifies if this is already registered
|
||||
+ * @active: Flag that specifies if this is currently dumping
|
||||
+ * @cur_seq: Points to the oldest message to dump (private)
|
||||
+ * @next_seq: Points after the newest message to dump (private)
|
||||
*/
|
||||
struct kmsg_dumper {
|
||||
struct list_head list;
|
||||
@@ -45,8 +48,6 @@ struct kmsg_dumper {
|
||||
bool registered;
|
||||
|
||||
/* private state of the kmsg iterator */
|
||||
- u32 cur_idx;
|
||||
- u32 next_idx;
|
||||
u64 cur_seq;
|
||||
u64 next_seq;
|
||||
};
|
||||
@@ -0,0 +1,136 @@
|
||||
From: John Ogness <john.ogness@linutronix.de>
|
||||
Date: Mon, 30 Nov 2020 01:41:56 +0106
|
||||
Subject: [PATCH 04/28] printk: refactor kmsg_dump_get_buffer()
|
||||
|
||||
kmsg_dump_get_buffer() requires nearly the same logic as
|
||||
syslog_print_all(), but uses different variable names and
|
||||
does not make use of the ringbuffer loop macros. Modify
|
||||
kmsg_dump_get_buffer() so that the implementation is as similar
|
||||
to syslog_print_all() as possible.
|
||||
|
||||
A follow-up commit will move this common logic into a
|
||||
separate helper function.
|
||||
|
||||
Signed-off-by: John Ogness <john.ogness@linutronix.de>
|
||||
Reviewed-by: Petr Mladek <pmladek@suse.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/kmsg_dump.h | 2 -
|
||||
kernel/printk/printk.c | 60 ++++++++++++++++++++++++----------------------
|
||||
2 files changed, 33 insertions(+), 29 deletions(-)
|
||||
|
||||
--- a/include/linux/kmsg_dump.h
|
||||
+++ b/include/linux/kmsg_dump.h
|
||||
@@ -62,7 +62,7 @@ bool kmsg_dump_get_line(struct kmsg_dump
|
||||
char *line, size_t size, size_t *len);
|
||||
|
||||
bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog,
|
||||
- char *buf, size_t size, size_t *len);
|
||||
+ char *buf, size_t size, size_t *len_out);
|
||||
|
||||
void kmsg_dump_rewind_nolock(struct kmsg_dumper *dumper);
|
||||
|
||||
--- a/kernel/printk/printk.c
|
||||
+++ b/kernel/printk/printk.c
|
||||
@@ -3424,7 +3424,7 @@ EXPORT_SYMBOL_GPL(kmsg_dump_get_line);
|
||||
* read.
|
||||
*/
|
||||
bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog,
|
||||
- char *buf, size_t size, size_t *len)
|
||||
+ char *buf, size_t size, size_t *len_out)
|
||||
{
|
||||
struct printk_info info;
|
||||
unsigned int line_count;
|
||||
@@ -3432,12 +3432,10 @@ bool kmsg_dump_get_buffer(struct kmsg_du
|
||||
unsigned long flags;
|
||||
u64 seq;
|
||||
u64 next_seq;
|
||||
- size_t l = 0;
|
||||
+ size_t len = 0;
|
||||
bool ret = false;
|
||||
bool time = printk_time;
|
||||
|
||||
- prb_rec_init_rd(&r, &info, buf, size);
|
||||
-
|
||||
if (!dumper->active || !buf || !size)
|
||||
goto out;
|
||||
|
||||
@@ -3455,48 +3453,54 @@ bool kmsg_dump_get_buffer(struct kmsg_du
|
||||
goto out;
|
||||
}
|
||||
|
||||
- /* calculate length of entire buffer */
|
||||
- seq = dumper->cur_seq;
|
||||
- while (prb_read_valid_info(prb, seq, &info, &line_count)) {
|
||||
- if (r.info->seq >= dumper->next_seq)
|
||||
+ /*
|
||||
+ * Find first record that fits, including all following records,
|
||||
+ * into the user-provided buffer for this dump.
|
||||
+ */
|
||||
+
|
||||
+ prb_for_each_info(dumper->cur_seq, prb, seq, &info, &line_count) {
|
||||
+ if (info.seq >= dumper->next_seq)
|
||||
break;
|
||||
- l += get_record_print_text_size(&info, line_count, syslog, time);
|
||||
- seq = r.info->seq + 1;
|
||||
+ len += get_record_print_text_size(&info, line_count, syslog, time);
|
||||
}
|
||||
|
||||
- /* move first record forward until length fits into the buffer */
|
||||
- seq = dumper->cur_seq;
|
||||
- while (l >= size && prb_read_valid_info(prb, seq,
|
||||
- &info, &line_count)) {
|
||||
- if (r.info->seq >= dumper->next_seq)
|
||||
+ /*
|
||||
+ * Move first record forward until length fits into the buffer. Ignore
|
||||
+ * newest messages that were not counted in the above cycle. Messages
|
||||
+ * might appear and get lost in the meantime. This is the best effort
|
||||
+ * that prevents an infinite loop.
|
||||
+ */
|
||||
+ prb_for_each_info(dumper->cur_seq, prb, seq, &info, &line_count) {
|
||||
+ if (len < size || info.seq >= dumper->next_seq)
|
||||
break;
|
||||
- l -= get_record_print_text_size(&info, line_count, syslog, time);
|
||||
- seq = r.info->seq + 1;
|
||||
+ len -= get_record_print_text_size(&info, line_count, syslog, time);
|
||||
}
|
||||
|
||||
- /* last message in next interation */
|
||||
+ /*
|
||||
+ * Next kmsg_dump_get_buffer() invocation will dump block of
|
||||
+ * older records stored right before this one.
|
||||
+ */
|
||||
next_seq = seq;
|
||||
|
||||
- /* actually read text into the buffer now */
|
||||
- l = 0;
|
||||
- while (prb_read_valid(prb, seq, &r)) {
|
||||
+ prb_rec_init_rd(&r, &info, buf, size);
|
||||
+
|
||||
+ len = 0;
|
||||
+ prb_for_each_record(seq, prb, seq, &r) {
|
||||
if (r.info->seq >= dumper->next_seq)
|
||||
break;
|
||||
|
||||
- l += record_print_text(&r, syslog, time);
|
||||
-
|
||||
- /* adjust record to store to remaining buffer space */
|
||||
- prb_rec_init_rd(&r, &info, buf + l, size - l);
|
||||
+ len += record_print_text(&r, syslog, time);
|
||||
|
||||
- seq = r.info->seq + 1;
|
||||
+ /* Adjust record to store to remaining buffer space. */
|
||||
+ prb_rec_init_rd(&r, &info, buf + len, size - len);
|
||||
}
|
||||
|
||||
dumper->next_seq = next_seq;
|
||||
ret = true;
|
||||
logbuf_unlock_irqrestore(flags);
|
||||
out:
|
||||
- if (len)
|
||||
- *len = l;
|
||||
+ if (len_out)
|
||||
+ *len_out = len;
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kmsg_dump_get_buffer);
|
||||
@@ -0,0 +1,140 @@
|
||||
From: John Ogness <john.ogness@linutronix.de>
|
||||
Date: Wed, 13 Jan 2021 11:29:53 +0106
|
||||
Subject: [PATCH 05/28] printk: consolidate
|
||||
kmsg_dump_get_buffer/syslog_print_all code
|
||||
|
||||
The logic for finding records to fit into a buffer is the same for
|
||||
kmsg_dump_get_buffer() and syslog_print_all(). Introduce a helper
|
||||
function find_first_fitting_seq() to handle this logic.
|
||||
|
||||
Signed-off-by: John Ogness <john.ogness@linutronix.de>
|
||||
---
|
||||
kernel/printk/printk.c | 87 ++++++++++++++++++++++++++++---------------------
|
||||
1 file changed, 50 insertions(+), 37 deletions(-)
|
||||
|
||||
--- a/kernel/printk/printk.c
|
||||
+++ b/kernel/printk/printk.c
|
||||
@@ -1421,6 +1421,50 @@ static size_t get_record_print_text_size
|
||||
return ((prefix_len * line_count) + info->text_len + 1);
|
||||
}
|
||||
|
||||
+/*
|
||||
+ * Beginning with @start_seq, find the first record where it and all following
|
||||
+ * records up to (but not including) @max_seq fit into @size.
|
||||
+ *
|
||||
+ * @max_seq is simply an upper bound and does not need to exist. If the caller
|
||||
+ * does not require an upper bound, -1 can be used for @max_seq.
|
||||
+ */
|
||||
+static u64 find_first_fitting_seq(u64 start_seq, u64 max_seq, size_t size,
|
||||
+ bool syslog, bool time)
|
||||
+{
|
||||
+ struct printk_info info;
|
||||
+ unsigned int line_count;
|
||||
+ size_t len = 0;
|
||||
+ u64 seq;
|
||||
+
|
||||
+ /* Determine the size of the records up to @max_seq. */
|
||||
+ prb_for_each_info(start_seq, prb, seq, &info, &line_count) {
|
||||
+ if (info.seq >= max_seq)
|
||||
+ break;
|
||||
+ len += get_record_print_text_size(&info, line_count, syslog, time);
|
||||
+ }
|
||||
+
|
||||
+ /*
|
||||
+ * Adjust the upper bound for the next loop to avoid subtracting
|
||||
+ * lengths that were never added.
|
||||
+ */
|
||||
+ if (seq < max_seq)
|
||||
+ max_seq = seq;
|
||||
+
|
||||
+ /*
|
||||
+ * Move first record forward until length fits into the buffer. Ignore
|
||||
+ * newest messages that were not counted in the above cycle. Messages
|
||||
+ * might appear and get lost in the meantime. This is a best effort
|
||||
+ * that prevents an infinite loop that could occur with a retry.
|
||||
+ */
|
||||
+ prb_for_each_info(start_seq, prb, seq, &info, &line_count) {
|
||||
+ if (len <= size || info.seq >= max_seq)
|
||||
+ break;
|
||||
+ len -= get_record_print_text_size(&info, line_count, syslog, time);
|
||||
+ }
|
||||
+
|
||||
+ return seq;
|
||||
+}
|
||||
+
|
||||
static int syslog_print(char __user *buf, int size)
|
||||
{
|
||||
struct printk_info info;
|
||||
@@ -1492,9 +1536,7 @@ static int syslog_print(char __user *buf
|
||||
static int syslog_print_all(char __user *buf, int size, bool clear)
|
||||
{
|
||||
struct printk_info info;
|
||||
- unsigned int line_count;
|
||||
struct printk_record r;
|
||||
- u64 max_seq;
|
||||
char *text;
|
||||
int len = 0;
|
||||
u64 seq;
|
||||
@@ -1510,21 +1552,7 @@ static int syslog_print_all(char __user
|
||||
* Find first record that fits, including all following records,
|
||||
* into the user-provided buffer for this dump.
|
||||
*/
|
||||
- prb_for_each_info(clear_seq, prb, seq, &info, &line_count)
|
||||
- len += get_record_print_text_size(&info, line_count, true, time);
|
||||
-
|
||||
- /*
|
||||
- * Set an upper bound for the next loop to avoid subtracting lengths
|
||||
- * that were never added.
|
||||
- */
|
||||
- max_seq = seq;
|
||||
-
|
||||
- /* move first record forward until length fits into the buffer */
|
||||
- prb_for_each_info(clear_seq, prb, seq, &info, &line_count) {
|
||||
- if (len <= size || info.seq >= max_seq)
|
||||
- break;
|
||||
- len -= get_record_print_text_size(&info, line_count, true, time);
|
||||
- }
|
||||
+ seq = find_first_fitting_seq(clear_seq, -1, size, true, time);
|
||||
|
||||
prb_rec_init_rd(&r, &info, text, LOG_LINE_MAX + PREFIX_MAX);
|
||||
|
||||
@@ -3427,7 +3455,6 @@ bool kmsg_dump_get_buffer(struct kmsg_du
|
||||
char *buf, size_t size, size_t *len_out)
|
||||
{
|
||||
struct printk_info info;
|
||||
- unsigned int line_count;
|
||||
struct printk_record r;
|
||||
unsigned long flags;
|
||||
u64 seq;
|
||||
@@ -3455,26 +3482,12 @@ bool kmsg_dump_get_buffer(struct kmsg_du
|
||||
|
||||
/*
|
||||
* Find first record that fits, including all following records,
|
||||
- * into the user-provided buffer for this dump.
|
||||
+ * into the user-provided buffer for this dump. Pass in size-1
|
||||
+ * because this function (by way of record_print_text()) will
|
||||
+ * not write more than size-1 bytes of text into @buf.
|
||||
*/
|
||||
-
|
||||
- prb_for_each_info(dumper->cur_seq, prb, seq, &info, &line_count) {
|
||||
- if (info.seq >= dumper->next_seq)
|
||||
- break;
|
||||
- len += get_record_print_text_size(&info, line_count, syslog, time);
|
||||
- }
|
||||
-
|
||||
- /*
|
||||
- * Move first record forward until length fits into the buffer. Ignore
|
||||
- * newest messages that were not counted in the above cycle. Messages
|
||||
- * might appear and get lost in the meantime. This is the best effort
|
||||
- * that prevents an infinite loop.
|
||||
- */
|
||||
- prb_for_each_info(dumper->cur_seq, prb, seq, &info, &line_count) {
|
||||
- if (len < size || info.seq >= dumper->next_seq)
|
||||
- break;
|
||||
- len -= get_record_print_text_size(&info, line_count, syslog, time);
|
||||
- }
|
||||
+ seq = find_first_fitting_seq(dumper->cur_seq, dumper->next_seq,
|
||||
+ size - 1, syslog, time);
|
||||
|
||||
/*
|
||||
* Next kmsg_dump_get_buffer() invocation will dump block of
|
||||
@@ -0,0 +1,88 @@
|
||||
From: John Ogness <john.ogness@linutronix.de>
|
||||
Date: Thu, 10 Dec 2020 12:48:01 +0106
|
||||
Subject: [PATCH 06/28] printk: introduce CONSOLE_LOG_MAX for improved
|
||||
multi-line support
|
||||
|
||||
Instead of using "LOG_LINE_MAX + PREFIX_MAX" for temporary buffer
|
||||
sizes, introduce CONSOLE_LOG_MAX. This represents the maximum size
|
||||
that is allowed to be printed to the console for a single record.
|
||||
|
||||
Rather than setting CONSOLE_LOG_MAX to "LOG_LINE_MAX + PREFIX_MAX"
|
||||
(1024), increase it to 4096. With a larger buffer size, multi-line
|
||||
records that are nearly LOG_LINE_MAX in length will have a better
|
||||
chance of being fully printed. (When formatting a record for the
|
||||
console, each line of a multi-line record is prepended with a copy
|
||||
of the prefix.)
|
||||
|
||||
Signed-off-by: John Ogness <john.ogness@linutronix.de>
|
||||
---
|
||||
kernel/printk/printk.c | 18 +++++++++++-------
|
||||
1 file changed, 11 insertions(+), 7 deletions(-)
|
||||
|
||||
--- a/kernel/printk/printk.c
|
||||
+++ b/kernel/printk/printk.c
|
||||
@@ -410,8 +410,13 @@ static u64 clear_seq;
|
||||
#else
|
||||
#define PREFIX_MAX 32
|
||||
#endif
|
||||
+
|
||||
+/* the maximum size allowed to be reserved for a record */
|
||||
#define LOG_LINE_MAX (1024 - PREFIX_MAX)
|
||||
|
||||
+/* the maximum size of a formatted record (i.e. with prefix added per line) */
|
||||
+#define CONSOLE_LOG_MAX 4096
|
||||
+
|
||||
#define LOG_LEVEL(v) ((v) & 0x07)
|
||||
#define LOG_FACILITY(v) ((v) >> 3 & 0xff)
|
||||
|
||||
@@ -1472,11 +1477,11 @@ static int syslog_print(char __user *buf
|
||||
char *text;
|
||||
int len = 0;
|
||||
|
||||
- text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL);
|
||||
+ text = kmalloc(CONSOLE_LOG_MAX, GFP_KERNEL);
|
||||
if (!text)
|
||||
return -ENOMEM;
|
||||
|
||||
- prb_rec_init_rd(&r, &info, text, LOG_LINE_MAX + PREFIX_MAX);
|
||||
+ prb_rec_init_rd(&r, &info, text, CONSOLE_LOG_MAX);
|
||||
|
||||
while (size > 0) {
|
||||
size_t n;
|
||||
@@ -1542,7 +1547,7 @@ static int syslog_print_all(char __user
|
||||
u64 seq;
|
||||
bool time;
|
||||
|
||||
- text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL);
|
||||
+ text = kmalloc(CONSOLE_LOG_MAX, GFP_KERNEL);
|
||||
if (!text)
|
||||
return -ENOMEM;
|
||||
|
||||
@@ -1554,7 +1559,7 @@ static int syslog_print_all(char __user
|
||||
*/
|
||||
seq = find_first_fitting_seq(clear_seq, -1, size, true, time);
|
||||
|
||||
- prb_rec_init_rd(&r, &info, text, LOG_LINE_MAX + PREFIX_MAX);
|
||||
+ prb_rec_init_rd(&r, &info, text, CONSOLE_LOG_MAX);
|
||||
|
||||
len = 0;
|
||||
prb_for_each_record(seq, prb, seq, &r) {
|
||||
@@ -2187,8 +2192,7 @@ EXPORT_SYMBOL(printk);
|
||||
|
||||
#else /* CONFIG_PRINTK */
|
||||
|
||||
-#define LOG_LINE_MAX 0
|
||||
-#define PREFIX_MAX 0
|
||||
+#define CONSOLE_LOG_MAX 0
|
||||
#define printk_time false
|
||||
|
||||
#define prb_read_valid(rb, seq, r) false
|
||||
@@ -2506,7 +2510,7 @@ static inline int can_use_console(void)
|
||||
void console_unlock(void)
|
||||
{
|
||||
static char ext_text[CONSOLE_EXT_LOG_MAX];
|
||||
- static char text[LOG_LINE_MAX + PREFIX_MAX];
|
||||
+ static char text[CONSOLE_LOG_MAX];
|
||||
unsigned long flags;
|
||||
bool do_cond_resched, retry;
|
||||
struct printk_info info;
|
||||
@@ -0,0 +1,140 @@
|
||||
From: John Ogness <john.ogness@linutronix.de>
|
||||
Date: Mon, 30 Nov 2020 01:41:58 +0106
|
||||
Subject: [PATCH 07/28] printk: use seqcount_latch for clear_seq
|
||||
|
||||
kmsg_dump_rewind_nolock() locklessly reads @clear_seq. However,
|
||||
this is not done atomically. Since @clear_seq is 64-bit, this
|
||||
cannot be an atomic operation for all platforms. Therefore, use
|
||||
a seqcount_latch to allow readers to always read a consistent
|
||||
value.
|
||||
|
||||
Signed-off-by: John Ogness <john.ogness@linutronix.de>
|
||||
Reviewed-by: Petr Mladek <pmladek@suse.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
kernel/printk/printk.c | 58 ++++++++++++++++++++++++++++++++++++++++++-------
|
||||
1 file changed, 50 insertions(+), 8 deletions(-)
|
||||
|
||||
--- a/kernel/printk/printk.c
|
||||
+++ b/kernel/printk/printk.c
|
||||
@@ -402,8 +402,21 @@ static u64 console_seq;
|
||||
static u64 exclusive_console_stop_seq;
|
||||
static unsigned long console_dropped;
|
||||
|
||||
-/* the next printk record to read after the last 'clear' command */
|
||||
-static u64 clear_seq;
|
||||
+struct latched_seq {
|
||||
+ seqcount_latch_t latch;
|
||||
+ u64 val[2];
|
||||
+};
|
||||
+
|
||||
+/*
|
||||
+ * The next printk record to read after the last 'clear' command. There are
|
||||
+ * two copies (updated with seqcount_latch) so that reads can locklessly
|
||||
+ * access a valid value. Writers are synchronized by @logbuf_lock.
|
||||
+ */
|
||||
+static struct latched_seq clear_seq = {
|
||||
+ .latch = SEQCNT_LATCH_ZERO(clear_seq.latch),
|
||||
+ .val[0] = 0,
|
||||
+ .val[1] = 0,
|
||||
+};
|
||||
|
||||
#ifdef CONFIG_PRINTK_CALLER
|
||||
#define PREFIX_MAX 48
|
||||
@@ -457,6 +470,31 @@ bool printk_percpu_data_ready(void)
|
||||
return __printk_percpu_data_ready;
|
||||
}
|
||||
|
||||
+/* Must be called under logbuf_lock. */
|
||||
+static void latched_seq_write(struct latched_seq *ls, u64 val)
|
||||
+{
|
||||
+ raw_write_seqcount_latch(&ls->latch);
|
||||
+ ls->val[0] = val;
|
||||
+ raw_write_seqcount_latch(&ls->latch);
|
||||
+ ls->val[1] = val;
|
||||
+}
|
||||
+
|
||||
+/* Can be called from any context. */
|
||||
+static u64 latched_seq_read_nolock(struct latched_seq *ls)
|
||||
+{
|
||||
+ unsigned int seq;
|
||||
+ unsigned int idx;
|
||||
+ u64 val;
|
||||
+
|
||||
+ do {
|
||||
+ seq = raw_read_seqcount_latch(&ls->latch);
|
||||
+ idx = seq & 0x1;
|
||||
+ val = ls->val[idx];
|
||||
+ } while (read_seqcount_latch_retry(&ls->latch, seq));
|
||||
+
|
||||
+ return val;
|
||||
+}
|
||||
+
|
||||
/* Return log buffer address */
|
||||
char *log_buf_addr_get(void)
|
||||
{
|
||||
@@ -801,7 +839,7 @@ static loff_t devkmsg_llseek(struct file
|
||||
* like issued by 'dmesg -c'. Reading /dev/kmsg itself
|
||||
* changes no global state, and does not clear anything.
|
||||
*/
|
||||
- user->seq = clear_seq;
|
||||
+ user->seq = latched_seq_read_nolock(&clear_seq);
|
||||
break;
|
||||
case SEEK_END:
|
||||
/* after the last record */
|
||||
@@ -960,6 +998,9 @@ void log_buf_vmcoreinfo_setup(void)
|
||||
|
||||
VMCOREINFO_SIZE(atomic_long_t);
|
||||
VMCOREINFO_TYPE_OFFSET(atomic_long_t, counter);
|
||||
+
|
||||
+ VMCOREINFO_STRUCT_SIZE(latched_seq);
|
||||
+ VMCOREINFO_OFFSET(latched_seq, val);
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -1557,7 +1598,8 @@ static int syslog_print_all(char __user
|
||||
* Find first record that fits, including all following records,
|
||||
* into the user-provided buffer for this dump.
|
||||
*/
|
||||
- seq = find_first_fitting_seq(clear_seq, -1, size, true, time);
|
||||
+ seq = find_first_fitting_seq(latched_seq_read_nolock(&clear_seq), -1,
|
||||
+ size, true, time);
|
||||
|
||||
prb_rec_init_rd(&r, &info, text, CONSOLE_LOG_MAX);
|
||||
|
||||
@@ -1584,7 +1626,7 @@ static int syslog_print_all(char __user
|
||||
}
|
||||
|
||||
if (clear)
|
||||
- clear_seq = seq;
|
||||
+ latched_seq_write(&clear_seq, seq);
|
||||
logbuf_unlock_irq();
|
||||
|
||||
kfree(text);
|
||||
@@ -1594,7 +1636,7 @@ static int syslog_print_all(char __user
|
||||
static void syslog_clear(void)
|
||||
{
|
||||
logbuf_lock_irq();
|
||||
- clear_seq = prb_next_seq(prb);
|
||||
+ latched_seq_write(&clear_seq, prb_next_seq(prb));
|
||||
logbuf_unlock_irq();
|
||||
}
|
||||
|
||||
@@ -3336,7 +3378,7 @@ void kmsg_dump(enum kmsg_dump_reason rea
|
||||
dumper->active = true;
|
||||
|
||||
logbuf_lock_irqsave(flags);
|
||||
- dumper->cur_seq = clear_seq;
|
||||
+ dumper->cur_seq = latched_seq_read_nolock(&clear_seq);
|
||||
dumper->next_seq = prb_next_seq(prb);
|
||||
logbuf_unlock_irqrestore(flags);
|
||||
|
||||
@@ -3534,7 +3576,7 @@ EXPORT_SYMBOL_GPL(kmsg_dump_get_buffer);
|
||||
*/
|
||||
void kmsg_dump_rewind_nolock(struct kmsg_dumper *dumper)
|
||||
{
|
||||
- dumper->cur_seq = clear_seq;
|
||||
+ dumper->cur_seq = latched_seq_read_nolock(&clear_seq);
|
||||
dumper->next_seq = prb_next_seq(prb);
|
||||
}
|
||||
|
||||
@@ -0,0 +1,105 @@
|
||||
From: John Ogness <john.ogness@linutronix.de>
|
||||
Date: Thu, 10 Dec 2020 15:33:40 +0106
|
||||
Subject: [PATCH 08/28] printk: use atomic64_t for devkmsg_user.seq
|
||||
|
||||
@user->seq is indirectly protected by @logbuf_lock. Once @logbuf_lock
|
||||
is removed, @user->seq will be no longer safe from an atomicity point
|
||||
of view.
|
||||
|
||||
In preparation for the removal of @logbuf_lock, change it to
|
||||
atomic64_t to provide this safety.
|
||||
|
||||
Signed-off-by: John Ogness <john.ogness@linutronix.de>
|
||||
---
|
||||
kernel/printk/printk.c | 22 +++++++++++-----------
|
||||
1 file changed, 11 insertions(+), 11 deletions(-)
|
||||
|
||||
--- a/kernel/printk/printk.c
|
||||
+++ b/kernel/printk/printk.c
|
||||
@@ -662,7 +662,7 @@ static ssize_t msg_print_ext_body(char *
|
||||
|
||||
/* /dev/kmsg - userspace message inject/listen interface */
|
||||
struct devkmsg_user {
|
||||
- u64 seq;
|
||||
+ atomic64_t seq;
|
||||
struct ratelimit_state rs;
|
||||
struct mutex lock;
|
||||
char buf[CONSOLE_EXT_LOG_MAX];
|
||||
@@ -763,7 +763,7 @@ static ssize_t devkmsg_read(struct file
|
||||
return ret;
|
||||
|
||||
logbuf_lock_irq();
|
||||
- if (!prb_read_valid(prb, user->seq, r)) {
|
||||
+ if (!prb_read_valid(prb, atomic64_read(&user->seq), r)) {
|
||||
if (file->f_flags & O_NONBLOCK) {
|
||||
ret = -EAGAIN;
|
||||
logbuf_unlock_irq();
|
||||
@@ -772,15 +772,15 @@ static ssize_t devkmsg_read(struct file
|
||||
|
||||
logbuf_unlock_irq();
|
||||
ret = wait_event_interruptible(log_wait,
|
||||
- prb_read_valid(prb, user->seq, r));
|
||||
+ prb_read_valid(prb, atomic64_read(&user->seq), r));
|
||||
if (ret)
|
||||
goto out;
|
||||
logbuf_lock_irq();
|
||||
}
|
||||
|
||||
- if (r->info->seq != user->seq) {
|
||||
+ if (r->info->seq != atomic64_read(&user->seq)) {
|
||||
/* our last seen message is gone, return error and reset */
|
||||
- user->seq = r->info->seq;
|
||||
+ atomic64_set(&user->seq, r->info->seq);
|
||||
ret = -EPIPE;
|
||||
logbuf_unlock_irq();
|
||||
goto out;
|
||||
@@ -791,7 +791,7 @@ static ssize_t devkmsg_read(struct file
|
||||
&r->text_buf[0], r->info->text_len,
|
||||
&r->info->dev_info);
|
||||
|
||||
- user->seq = r->info->seq + 1;
|
||||
+ atomic64_set(&user->seq, r->info->seq + 1);
|
||||
logbuf_unlock_irq();
|
||||
|
||||
if (len > count) {
|
||||
@@ -831,7 +831,7 @@ static loff_t devkmsg_llseek(struct file
|
||||
switch (whence) {
|
||||
case SEEK_SET:
|
||||
/* the first record */
|
||||
- user->seq = prb_first_valid_seq(prb);
|
||||
+ atomic64_set(&user->seq, prb_first_valid_seq(prb));
|
||||
break;
|
||||
case SEEK_DATA:
|
||||
/*
|
||||
@@ -839,11 +839,11 @@ static loff_t devkmsg_llseek(struct file
|
||||
* like issued by 'dmesg -c'. Reading /dev/kmsg itself
|
||||
* changes no global state, and does not clear anything.
|
||||
*/
|
||||
- user->seq = latched_seq_read_nolock(&clear_seq);
|
||||
+ atomic64_set(&user->seq, latched_seq_read_nolock(&clear_seq));
|
||||
break;
|
||||
case SEEK_END:
|
||||
/* after the last record */
|
||||
- user->seq = prb_next_seq(prb);
|
||||
+ atomic64_set(&user->seq, prb_next_seq(prb));
|
||||
break;
|
||||
default:
|
||||
ret = -EINVAL;
|
||||
@@ -866,7 +866,7 @@ static __poll_t devkmsg_poll(struct file
|
||||
logbuf_lock_irq();
|
||||
if (prb_read_valid_info(prb, user->seq, &info, NULL)) {
|
||||
/* return error when data has vanished underneath us */
|
||||
- if (info.seq != user->seq)
|
||||
+ if (info.seq != atomic64_read(&user->seq))
|
||||
ret = EPOLLIN|EPOLLRDNORM|EPOLLERR|EPOLLPRI;
|
||||
else
|
||||
ret = EPOLLIN|EPOLLRDNORM;
|
||||
@@ -905,7 +905,7 @@ static int devkmsg_open(struct inode *in
|
||||
&user->text_buf[0], sizeof(user->text_buf));
|
||||
|
||||
logbuf_lock_irq();
|
||||
- user->seq = prb_first_valid_seq(prb);
|
||||
+ atomic64_set(&user->seq, prb_first_valid_seq(prb));
|
||||
logbuf_unlock_irq();
|
||||
|
||||
file->private_data = user;
|
||||
152
kernel/patches-5.11.x-rt/0028-0009-printk-add-syslog_lock.patch
Normal file
152
kernel/patches-5.11.x-rt/0028-0009-printk-add-syslog_lock.patch
Normal file
@@ -0,0 +1,152 @@
|
||||
From: John Ogness <john.ogness@linutronix.de>
|
||||
Date: Thu, 10 Dec 2020 16:58:02 +0106
|
||||
Subject: [PATCH 09/28] printk: add syslog_lock
|
||||
|
||||
The global variables @syslog_seq, @syslog_partial, @syslog_time
|
||||
and write access to @clear_seq are protected by @logbuf_lock.
|
||||
Once @logbuf_lock is removed, these variables will need their
|
||||
own synchronization method. Introduce @syslog_lock for this
|
||||
purpose.
|
||||
|
||||
@syslog_lock is a raw_spin_lock for now. This simplifies the
|
||||
transition to removing @logbuf_lock. Once @logbuf_lock and the
|
||||
safe buffers are removed, @syslog_lock can change to spin_lock.
|
||||
|
||||
Signed-off-by: John Ogness <john.ogness@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
kernel/printk/printk.c | 41 +++++++++++++++++++++++++++++++++++++----
|
||||
1 file changed, 37 insertions(+), 4 deletions(-)
|
||||
|
||||
--- a/kernel/printk/printk.c
|
||||
+++ b/kernel/printk/printk.c
|
||||
@@ -390,8 +390,12 @@ DEFINE_RAW_SPINLOCK(logbuf_lock);
|
||||
printk_safe_exit_irqrestore(flags); \
|
||||
} while (0)
|
||||
|
||||
+/* syslog_lock protects syslog_* variables and write access to clear_seq. */
|
||||
+static DEFINE_RAW_SPINLOCK(syslog_lock);
|
||||
+
|
||||
#ifdef CONFIG_PRINTK
|
||||
DECLARE_WAIT_QUEUE_HEAD(log_wait);
|
||||
+/* All 3 protected by @syslog_lock. */
|
||||
/* the next printk record to read by syslog(READ) or /proc/kmsg */
|
||||
static u64 syslog_seq;
|
||||
static size_t syslog_partial;
|
||||
@@ -410,7 +414,7 @@ struct latched_seq {
|
||||
/*
|
||||
* The next printk record to read after the last 'clear' command. There are
|
||||
* two copies (updated with seqcount_latch) so that reads can locklessly
|
||||
- * access a valid value. Writers are synchronized by @logbuf_lock.
|
||||
+ * access a valid value. Writers are synchronized by @syslog_lock.
|
||||
*/
|
||||
static struct latched_seq clear_seq = {
|
||||
.latch = SEQCNT_LATCH_ZERO(clear_seq.latch),
|
||||
@@ -470,7 +474,7 @@ bool printk_percpu_data_ready(void)
|
||||
return __printk_percpu_data_ready;
|
||||
}
|
||||
|
||||
-/* Must be called under logbuf_lock. */
|
||||
+/* Must be called under syslog_lock. */
|
||||
static void latched_seq_write(struct latched_seq *ls, u64 val)
|
||||
{
|
||||
raw_write_seqcount_latch(&ls->latch);
|
||||
@@ -1529,7 +1533,9 @@ static int syslog_print(char __user *buf
|
||||
size_t skip;
|
||||
|
||||
logbuf_lock_irq();
|
||||
+ raw_spin_lock(&syslog_lock);
|
||||
if (!prb_read_valid(prb, syslog_seq, &r)) {
|
||||
+ raw_spin_unlock(&syslog_lock);
|
||||
logbuf_unlock_irq();
|
||||
break;
|
||||
}
|
||||
@@ -1559,6 +1565,7 @@ static int syslog_print(char __user *buf
|
||||
syslog_partial += n;
|
||||
} else
|
||||
n = 0;
|
||||
+ raw_spin_unlock(&syslog_lock);
|
||||
logbuf_unlock_irq();
|
||||
|
||||
if (!n)
|
||||
@@ -1625,8 +1632,11 @@ static int syslog_print_all(char __user
|
||||
break;
|
||||
}
|
||||
|
||||
- if (clear)
|
||||
+ if (clear) {
|
||||
+ raw_spin_lock(&syslog_lock);
|
||||
latched_seq_write(&clear_seq, seq);
|
||||
+ raw_spin_unlock(&syslog_lock);
|
||||
+ }
|
||||
logbuf_unlock_irq();
|
||||
|
||||
kfree(text);
|
||||
@@ -1636,10 +1646,24 @@ static int syslog_print_all(char __user
|
||||
static void syslog_clear(void)
|
||||
{
|
||||
logbuf_lock_irq();
|
||||
+ raw_spin_lock(&syslog_lock);
|
||||
latched_seq_write(&clear_seq, prb_next_seq(prb));
|
||||
+ raw_spin_unlock(&syslog_lock);
|
||||
logbuf_unlock_irq();
|
||||
}
|
||||
|
||||
+/* Return a consistent copy of @syslog_seq. */
|
||||
+static u64 read_syslog_seq_irq(void)
|
||||
+{
|
||||
+ u64 seq;
|
||||
+
|
||||
+ raw_spin_lock_irq(&syslog_lock);
|
||||
+ seq = syslog_seq;
|
||||
+ raw_spin_unlock_irq(&syslog_lock);
|
||||
+
|
||||
+ return seq;
|
||||
+}
|
||||
+
|
||||
int do_syslog(int type, char __user *buf, int len, int source)
|
||||
{
|
||||
struct printk_info info;
|
||||
@@ -1663,8 +1687,9 @@ int do_syslog(int type, char __user *buf
|
||||
return 0;
|
||||
if (!access_ok(buf, len))
|
||||
return -EFAULT;
|
||||
+
|
||||
error = wait_event_interruptible(log_wait,
|
||||
- prb_read_valid(prb, syslog_seq, NULL));
|
||||
+ prb_read_valid(prb, read_syslog_seq_irq(), NULL));
|
||||
if (error)
|
||||
return error;
|
||||
error = syslog_print(buf, len);
|
||||
@@ -1713,8 +1738,10 @@ int do_syslog(int type, char __user *buf
|
||||
/* Number of chars in the log buffer */
|
||||
case SYSLOG_ACTION_SIZE_UNREAD:
|
||||
logbuf_lock_irq();
|
||||
+ raw_spin_lock(&syslog_lock);
|
||||
if (!prb_read_valid_info(prb, syslog_seq, &info, NULL)) {
|
||||
/* No unread messages. */
|
||||
+ raw_spin_unlock(&syslog_lock);
|
||||
logbuf_unlock_irq();
|
||||
return 0;
|
||||
}
|
||||
@@ -1743,6 +1770,7 @@ int do_syslog(int type, char __user *buf
|
||||
}
|
||||
error -= syslog_partial;
|
||||
}
|
||||
+ raw_spin_unlock(&syslog_lock);
|
||||
logbuf_unlock_irq();
|
||||
break;
|
||||
/* Size of the log buffer */
|
||||
@@ -2992,7 +3020,12 @@ void register_console(struct console *ne
|
||||
*/
|
||||
exclusive_console = newcon;
|
||||
exclusive_console_stop_seq = console_seq;
|
||||
+
|
||||
+ /* Get a consistent copy of @syslog_seq. */
|
||||
+ raw_spin_lock(&syslog_lock);
|
||||
console_seq = syslog_seq;
|
||||
+ raw_spin_unlock(&syslog_lock);
|
||||
+
|
||||
logbuf_unlock_irqrestore(flags);
|
||||
}
|
||||
console_unlock();
|
||||
@@ -0,0 +1,535 @@
|
||||
From: John Ogness <john.ogness@linutronix.de>
|
||||
Date: Fri, 18 Dec 2020 11:40:08 +0000
|
||||
Subject: [PATCH 10/28] printk: introduce a kmsg_dump iterator
|
||||
|
||||
Rather than store the iterator information into the registered
|
||||
kmsg_dump structure, create a separate iterator structure. The
|
||||
kmsg_dump_iter structure can reside on the stack of the caller,
|
||||
thus allowing lockless use of the kmsg_dump functions.
|
||||
|
||||
This is in preparation for removal of @logbuf_lock.
|
||||
|
||||
Signed-off-by: John Ogness <john.ogness@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
arch/powerpc/kernel/nvram_64.c | 12 +++--
|
||||
arch/powerpc/platforms/powernv/opal-kmsg.c | 3 -
|
||||
arch/powerpc/xmon/xmon.c | 6 +-
|
||||
arch/um/kernel/kmsg_dump.c | 5 +-
|
||||
drivers/hv/vmbus_drv.c | 5 +-
|
||||
drivers/mtd/mtdoops.c | 5 +-
|
||||
fs/pstore/platform.c | 5 +-
|
||||
include/linux/kmsg_dump.h | 43 ++++++++++---------
|
||||
kernel/debug/kdb/kdb_main.c | 10 ++--
|
||||
kernel/printk/printk.c | 65 +++++++++++++----------------
|
||||
10 files changed, 84 insertions(+), 75 deletions(-)
|
||||
|
||||
--- a/arch/powerpc/kernel/nvram_64.c
|
||||
+++ b/arch/powerpc/kernel/nvram_64.c
|
||||
@@ -73,7 +73,8 @@ static const char *nvram_os_partitions[]
|
||||
};
|
||||
|
||||
static void oops_to_nvram(struct kmsg_dumper *dumper,
|
||||
- enum kmsg_dump_reason reason);
|
||||
+ enum kmsg_dump_reason reason,
|
||||
+ struct kmsg_dumper_iter *iter);
|
||||
|
||||
static struct kmsg_dumper nvram_kmsg_dumper = {
|
||||
.dump = oops_to_nvram
|
||||
@@ -643,7 +644,8 @@ void __init nvram_init_oops_partition(in
|
||||
* partition. If that's too much, go back and capture uncompressed text.
|
||||
*/
|
||||
static void oops_to_nvram(struct kmsg_dumper *dumper,
|
||||
- enum kmsg_dump_reason reason)
|
||||
+ enum kmsg_dump_reason reason,
|
||||
+ struct kmsg_dumper_iter *iter)
|
||||
{
|
||||
struct oops_log_info *oops_hdr = (struct oops_log_info *)oops_buf;
|
||||
static unsigned int oops_count = 0;
|
||||
@@ -681,13 +683,13 @@ static void oops_to_nvram(struct kmsg_du
|
||||
return;
|
||||
|
||||
if (big_oops_buf) {
|
||||
- kmsg_dump_get_buffer(dumper, false,
|
||||
+ kmsg_dump_get_buffer(iter, false,
|
||||
big_oops_buf, big_oops_buf_sz, &text_len);
|
||||
rc = zip_oops(text_len);
|
||||
}
|
||||
if (rc != 0) {
|
||||
- kmsg_dump_rewind(dumper);
|
||||
- kmsg_dump_get_buffer(dumper, false,
|
||||
+ kmsg_dump_rewind(iter);
|
||||
+ kmsg_dump_get_buffer(iter, false,
|
||||
oops_data, oops_data_sz, &text_len);
|
||||
err_type = ERR_TYPE_KERNEL_PANIC;
|
||||
oops_hdr->version = cpu_to_be16(OOPS_HDR_VERSION);
|
||||
--- a/arch/powerpc/platforms/powernv/opal-kmsg.c
|
||||
+++ b/arch/powerpc/platforms/powernv/opal-kmsg.c
|
||||
@@ -20,7 +20,8 @@
|
||||
* message, it just ensures that OPAL completely flushes the console buffer.
|
||||
*/
|
||||
static void kmsg_dump_opal_console_flush(struct kmsg_dumper *dumper,
|
||||
- enum kmsg_dump_reason reason)
|
||||
+ enum kmsg_dump_reason reason,
|
||||
+ struct kmsg_dumper_iter *iter)
|
||||
{
|
||||
/*
|
||||
* Outside of a panic context the pollers will continue to run,
|
||||
--- a/arch/powerpc/xmon/xmon.c
|
||||
+++ b/arch/powerpc/xmon/xmon.c
|
||||
@@ -3005,7 +3005,7 @@ print_address(unsigned long addr)
|
||||
static void
|
||||
dump_log_buf(void)
|
||||
{
|
||||
- struct kmsg_dumper dumper = { .active = 1 };
|
||||
+ struct kmsg_dumper_iter iter = { .active = 1 };
|
||||
unsigned char buf[128];
|
||||
size_t len;
|
||||
|
||||
@@ -3017,9 +3017,9 @@ dump_log_buf(void)
|
||||
catch_memory_errors = 1;
|
||||
sync();
|
||||
|
||||
- kmsg_dump_rewind_nolock(&dumper);
|
||||
+ kmsg_dump_rewind_nolock(&iter);
|
||||
xmon_start_pagination();
|
||||
- while (kmsg_dump_get_line_nolock(&dumper, false, buf, sizeof(buf), &len)) {
|
||||
+ while (kmsg_dump_get_line_nolock(&iter, false, buf, sizeof(buf), &len)) {
|
||||
buf[len] = '\0';
|
||||
printf("%s", buf);
|
||||
}
|
||||
--- a/arch/um/kernel/kmsg_dump.c
|
||||
+++ b/arch/um/kernel/kmsg_dump.c
|
||||
@@ -7,7 +7,8 @@
|
||||
#include <os.h>
|
||||
|
||||
static void kmsg_dumper_stdout(struct kmsg_dumper *dumper,
|
||||
- enum kmsg_dump_reason reason)
|
||||
+ enum kmsg_dump_reason reason,
|
||||
+ struct kmsg_dumper_iter *iter)
|
||||
{
|
||||
static char line[1024];
|
||||
struct console *con;
|
||||
@@ -30,7 +31,7 @@ static void kmsg_dumper_stdout(struct km
|
||||
return;
|
||||
|
||||
printf("kmsg_dump:\n");
|
||||
- while (kmsg_dump_get_line(dumper, true, line, sizeof(line), &len)) {
|
||||
+ while (kmsg_dump_get_line(iter, true, line, sizeof(line), &len)) {
|
||||
line[len] = '\0';
|
||||
printf("%s", line);
|
||||
}
|
||||
--- a/drivers/hv/vmbus_drv.c
|
||||
+++ b/drivers/hv/vmbus_drv.c
|
||||
@@ -1362,7 +1362,8 @@ static void vmbus_isr(void)
|
||||
* buffer and call into Hyper-V to transfer the data.
|
||||
*/
|
||||
static void hv_kmsg_dump(struct kmsg_dumper *dumper,
|
||||
- enum kmsg_dump_reason reason)
|
||||
+ enum kmsg_dump_reason reason,
|
||||
+ struct kmsg_dumper_iter *iter)
|
||||
{
|
||||
size_t bytes_written;
|
||||
phys_addr_t panic_pa;
|
||||
@@ -1377,7 +1378,7 @@ static void hv_kmsg_dump(struct kmsg_dum
|
||||
* Write dump contents to the page. No need to synchronize; panic should
|
||||
* be single-threaded.
|
||||
*/
|
||||
- kmsg_dump_get_buffer(dumper, false, hv_panic_page, HV_HYP_PAGE_SIZE,
|
||||
+ kmsg_dump_get_buffer(iter, false, hv_panic_page, HV_HYP_PAGE_SIZE,
|
||||
&bytes_written);
|
||||
if (bytes_written)
|
||||
hyperv_report_panic_msg(panic_pa, bytes_written);
|
||||
--- a/drivers/mtd/mtdoops.c
|
||||
+++ b/drivers/mtd/mtdoops.c
|
||||
@@ -267,7 +267,8 @@ static void find_next_position(struct mt
|
||||
}
|
||||
|
||||
static void mtdoops_do_dump(struct kmsg_dumper *dumper,
|
||||
- enum kmsg_dump_reason reason)
|
||||
+ enum kmsg_dump_reason reason,
|
||||
+ struct kmsg_dumper_iter *iter)
|
||||
{
|
||||
struct mtdoops_context *cxt = container_of(dumper,
|
||||
struct mtdoops_context, dump);
|
||||
@@ -276,7 +277,7 @@ static void mtdoops_do_dump(struct kmsg_
|
||||
if (reason == KMSG_DUMP_OOPS && !dump_oops)
|
||||
return;
|
||||
|
||||
- kmsg_dump_get_buffer(dumper, true, cxt->oops_buf + MTDOOPS_HEADER_SIZE,
|
||||
+ kmsg_dump_get_buffer(iter, true, cxt->oops_buf + MTDOOPS_HEADER_SIZE,
|
||||
record_size - MTDOOPS_HEADER_SIZE, NULL);
|
||||
|
||||
if (reason != KMSG_DUMP_OOPS) {
|
||||
--- a/fs/pstore/platform.c
|
||||
+++ b/fs/pstore/platform.c
|
||||
@@ -383,7 +383,8 @@ void pstore_record_init(struct pstore_re
|
||||
* end of the buffer.
|
||||
*/
|
||||
static void pstore_dump(struct kmsg_dumper *dumper,
|
||||
- enum kmsg_dump_reason reason)
|
||||
+ enum kmsg_dump_reason reason,
|
||||
+ struct kmsg_dumper_iter *iter)
|
||||
{
|
||||
unsigned long total = 0;
|
||||
const char *why;
|
||||
@@ -435,7 +436,7 @@ static void pstore_dump(struct kmsg_dump
|
||||
dst_size -= header_size;
|
||||
|
||||
/* Write dump contents. */
|
||||
- if (!kmsg_dump_get_buffer(dumper, true, dst + header_size,
|
||||
+ if (!kmsg_dump_get_buffer(iter, true, dst + header_size,
|
||||
dst_size, &dump_size))
|
||||
break;
|
||||
|
||||
--- a/include/linux/kmsg_dump.h
|
||||
+++ b/include/linux/kmsg_dump.h
|
||||
@@ -30,43 +30,48 @@ enum kmsg_dump_reason {
|
||||
};
|
||||
|
||||
/**
|
||||
+ * struct kmsg_dumper_iter - iterator for kernel crash message dumper
|
||||
+ * @active: Flag that specifies if this is currently dumping
|
||||
+ * @cur_seq: Points to the oldest message to dump (private)
|
||||
+ * @next_seq: Points after the newest message to dump (private)
|
||||
+ */
|
||||
+struct kmsg_dumper_iter {
|
||||
+ bool active;
|
||||
+ u64 cur_seq;
|
||||
+ u64 next_seq;
|
||||
+};
|
||||
+
|
||||
+/**
|
||||
* struct kmsg_dumper - kernel crash message dumper structure
|
||||
* @list: Entry in the dumper list (private)
|
||||
* @dump: Call into dumping code which will retrieve the data with
|
||||
* through the record iterator
|
||||
* @max_reason: filter for highest reason number that should be dumped
|
||||
* @registered: Flag that specifies if this is already registered
|
||||
- * @active: Flag that specifies if this is currently dumping
|
||||
- * @cur_seq: Points to the oldest message to dump (private)
|
||||
- * @next_seq: Points after the newest message to dump (private)
|
||||
*/
|
||||
struct kmsg_dumper {
|
||||
struct list_head list;
|
||||
- void (*dump)(struct kmsg_dumper *dumper, enum kmsg_dump_reason reason);
|
||||
+ void (*dump)(struct kmsg_dumper *dumper, enum kmsg_dump_reason reason,
|
||||
+ struct kmsg_dumper_iter *iter);
|
||||
enum kmsg_dump_reason max_reason;
|
||||
- bool active;
|
||||
bool registered;
|
||||
-
|
||||
- /* private state of the kmsg iterator */
|
||||
- u64 cur_seq;
|
||||
- u64 next_seq;
|
||||
};
|
||||
|
||||
#ifdef CONFIG_PRINTK
|
||||
void kmsg_dump(enum kmsg_dump_reason reason);
|
||||
|
||||
-bool kmsg_dump_get_line_nolock(struct kmsg_dumper *dumper, bool syslog,
|
||||
+bool kmsg_dump_get_line_nolock(struct kmsg_dumper_iter *iter, bool syslog,
|
||||
char *line, size_t size, size_t *len);
|
||||
|
||||
-bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog,
|
||||
+bool kmsg_dump_get_line(struct kmsg_dumper_iter *iter, bool syslog,
|
||||
char *line, size_t size, size_t *len);
|
||||
|
||||
-bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog,
|
||||
+bool kmsg_dump_get_buffer(struct kmsg_dumper_iter *iter, bool syslog,
|
||||
char *buf, size_t size, size_t *len_out);
|
||||
|
||||
-void kmsg_dump_rewind_nolock(struct kmsg_dumper *dumper);
|
||||
+void kmsg_dump_rewind_nolock(struct kmsg_dumper_iter *iter);
|
||||
|
||||
-void kmsg_dump_rewind(struct kmsg_dumper *dumper);
|
||||
+void kmsg_dump_rewind(struct kmsg_dumper_iter *dumper_iter);
|
||||
|
||||
int kmsg_dump_register(struct kmsg_dumper *dumper);
|
||||
|
||||
@@ -78,30 +83,30 @@ static inline void kmsg_dump(enum kmsg_d
|
||||
{
|
||||
}
|
||||
|
||||
-static inline bool kmsg_dump_get_line_nolock(struct kmsg_dumper *dumper,
|
||||
+static inline bool kmsg_dump_get_line_nolock(struct kmsg_dumper_iter *iter,
|
||||
bool syslog, const char *line,
|
||||
size_t size, size_t *len)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
-static inline bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog,
|
||||
+static inline bool kmsg_dump_get_line(struct kmsg_dumper_iter *iter, bool syslog,
|
||||
const char *line, size_t size, size_t *len)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
-static inline bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog,
|
||||
+static inline bool kmsg_dump_get_buffer(struct kmsg_dumper_iter *iter, bool syslog,
|
||||
char *buf, size_t size, size_t *len)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
-static inline void kmsg_dump_rewind_nolock(struct kmsg_dumper *dumper)
|
||||
+static inline void kmsg_dump_rewind_nolock(struct kmsg_dumper_iter *iter)
|
||||
{
|
||||
}
|
||||
|
||||
-static inline void kmsg_dump_rewind(struct kmsg_dumper *dumper)
|
||||
+static inline void kmsg_dump_rewind(struct kmsg_dumper_iter *iter)
|
||||
{
|
||||
}
|
||||
|
||||
--- a/kernel/debug/kdb/kdb_main.c
|
||||
+++ b/kernel/debug/kdb/kdb_main.c
|
||||
@@ -2101,7 +2101,7 @@ static int kdb_dmesg(int argc, const cha
|
||||
int adjust = 0;
|
||||
int n = 0;
|
||||
int skip = 0;
|
||||
- struct kmsg_dumper dumper = { .active = 1 };
|
||||
+ struct kmsg_dumper_iter iter = { .active = 1 };
|
||||
size_t len;
|
||||
char buf[201];
|
||||
|
||||
@@ -2126,8 +2126,8 @@ static int kdb_dmesg(int argc, const cha
|
||||
kdb_set(2, setargs);
|
||||
}
|
||||
|
||||
- kmsg_dump_rewind_nolock(&dumper);
|
||||
- while (kmsg_dump_get_line_nolock(&dumper, 1, NULL, 0, NULL))
|
||||
+ kmsg_dump_rewind_nolock(&iter);
|
||||
+ while (kmsg_dump_get_line_nolock(&iter, 1, NULL, 0, NULL))
|
||||
n++;
|
||||
|
||||
if (lines < 0) {
|
||||
@@ -2159,8 +2159,8 @@ static int kdb_dmesg(int argc, const cha
|
||||
if (skip >= n || skip < 0)
|
||||
return 0;
|
||||
|
||||
- kmsg_dump_rewind_nolock(&dumper);
|
||||
- while (kmsg_dump_get_line_nolock(&dumper, 1, buf, sizeof(buf), &len)) {
|
||||
+ kmsg_dump_rewind_nolock(&iter);
|
||||
+ while (kmsg_dump_get_line_nolock(&iter, 1, buf, sizeof(buf), &len)) {
|
||||
if (skip) {
|
||||
skip--;
|
||||
continue;
|
||||
--- a/kernel/printk/printk.c
|
||||
+++ b/kernel/printk/printk.c
|
||||
@@ -3389,6 +3389,7 @@ EXPORT_SYMBOL_GPL(kmsg_dump_reason_str);
|
||||
*/
|
||||
void kmsg_dump(enum kmsg_dump_reason reason)
|
||||
{
|
||||
+ struct kmsg_dumper_iter iter;
|
||||
struct kmsg_dumper *dumper;
|
||||
unsigned long flags;
|
||||
|
||||
@@ -3408,25 +3409,21 @@ void kmsg_dump(enum kmsg_dump_reason rea
|
||||
continue;
|
||||
|
||||
/* initialize iterator with data about the stored records */
|
||||
- dumper->active = true;
|
||||
-
|
||||
+ iter.active = true;
|
||||
logbuf_lock_irqsave(flags);
|
||||
- dumper->cur_seq = latched_seq_read_nolock(&clear_seq);
|
||||
- dumper->next_seq = prb_next_seq(prb);
|
||||
+ iter.cur_seq = latched_seq_read_nolock(&clear_seq);
|
||||
+ iter.next_seq = prb_next_seq(prb);
|
||||
logbuf_unlock_irqrestore(flags);
|
||||
|
||||
/* invoke dumper which will iterate over records */
|
||||
- dumper->dump(dumper, reason);
|
||||
-
|
||||
- /* reset iterator */
|
||||
- dumper->active = false;
|
||||
+ dumper->dump(dumper, reason, &iter);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
/**
|
||||
* kmsg_dump_get_line_nolock - retrieve one kmsg log line (unlocked version)
|
||||
- * @dumper: registered kmsg dumper
|
||||
+ * @iter: kmsg dumper iterator
|
||||
* @syslog: include the "<4>" prefixes
|
||||
* @line: buffer to copy the line to
|
||||
* @size: maximum size of the buffer
|
||||
@@ -3443,7 +3440,7 @@ void kmsg_dump(enum kmsg_dump_reason rea
|
||||
*
|
||||
* The function is similar to kmsg_dump_get_line(), but grabs no locks.
|
||||
*/
|
||||
-bool kmsg_dump_get_line_nolock(struct kmsg_dumper *dumper, bool syslog,
|
||||
+bool kmsg_dump_get_line_nolock(struct kmsg_dumper_iter *iter, bool syslog,
|
||||
char *line, size_t size, size_t *len)
|
||||
{
|
||||
struct printk_info info;
|
||||
@@ -3454,16 +3451,16 @@ bool kmsg_dump_get_line_nolock(struct km
|
||||
|
||||
prb_rec_init_rd(&r, &info, line, size);
|
||||
|
||||
- if (!dumper->active)
|
||||
+ if (!iter->active)
|
||||
goto out;
|
||||
|
||||
/* Read text or count text lines? */
|
||||
if (line) {
|
||||
- if (!prb_read_valid(prb, dumper->cur_seq, &r))
|
||||
+ if (!prb_read_valid(prb, iter->cur_seq, &r))
|
||||
goto out;
|
||||
l = record_print_text(&r, syslog, printk_time);
|
||||
} else {
|
||||
- if (!prb_read_valid_info(prb, dumper->cur_seq,
|
||||
+ if (!prb_read_valid_info(prb, iter->cur_seq,
|
||||
&info, &line_count)) {
|
||||
goto out;
|
||||
}
|
||||
@@ -3472,7 +3469,7 @@ bool kmsg_dump_get_line_nolock(struct km
|
||||
|
||||
}
|
||||
|
||||
- dumper->cur_seq = r.info->seq + 1;
|
||||
+ iter->cur_seq = r.info->seq + 1;
|
||||
ret = true;
|
||||
out:
|
||||
if (len)
|
||||
@@ -3482,7 +3479,7 @@ bool kmsg_dump_get_line_nolock(struct km
|
||||
|
||||
/**
|
||||
* kmsg_dump_get_line - retrieve one kmsg log line
|
||||
- * @dumper: registered kmsg dumper
|
||||
+ * @iter: kmsg dumper iterator
|
||||
* @syslog: include the "<4>" prefixes
|
||||
* @line: buffer to copy the line to
|
||||
* @size: maximum size of the buffer
|
||||
@@ -3497,14 +3494,14 @@ bool kmsg_dump_get_line_nolock(struct km
|
||||
* A return value of FALSE indicates that there are no more records to
|
||||
* read.
|
||||
*/
|
||||
-bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog,
|
||||
+bool kmsg_dump_get_line(struct kmsg_dumper_iter *iter, bool syslog,
|
||||
char *line, size_t size, size_t *len)
|
||||
{
|
||||
unsigned long flags;
|
||||
bool ret;
|
||||
|
||||
logbuf_lock_irqsave(flags);
|
||||
- ret = kmsg_dump_get_line_nolock(dumper, syslog, line, size, len);
|
||||
+ ret = kmsg_dump_get_line_nolock(iter, syslog, line, size, len);
|
||||
logbuf_unlock_irqrestore(flags);
|
||||
|
||||
return ret;
|
||||
@@ -3513,7 +3510,7 @@ EXPORT_SYMBOL_GPL(kmsg_dump_get_line);
|
||||
|
||||
/**
|
||||
* kmsg_dump_get_buffer - copy kmsg log lines
|
||||
- * @dumper: registered kmsg dumper
|
||||
+ * @iter: kmsg dumper iterator
|
||||
* @syslog: include the "<4>" prefixes
|
||||
* @buf: buffer to copy the line to
|
||||
* @size: maximum size of the buffer
|
||||
@@ -3530,7 +3527,7 @@ EXPORT_SYMBOL_GPL(kmsg_dump_get_line);
|
||||
* A return value of FALSE indicates that there are no more records to
|
||||
* read.
|
||||
*/
|
||||
-bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog,
|
||||
+bool kmsg_dump_get_buffer(struct kmsg_dumper_iter *iter, bool syslog,
|
||||
char *buf, size_t size, size_t *len_out)
|
||||
{
|
||||
struct printk_info info;
|
||||
@@ -3542,19 +3539,19 @@ bool kmsg_dump_get_buffer(struct kmsg_du
|
||||
bool ret = false;
|
||||
bool time = printk_time;
|
||||
|
||||
- if (!dumper->active || !buf || !size)
|
||||
+ if (!iter->active || !buf || !size)
|
||||
goto out;
|
||||
|
||||
logbuf_lock_irqsave(flags);
|
||||
- if (prb_read_valid_info(prb, dumper->cur_seq, &info, NULL)) {
|
||||
- if (info.seq != dumper->cur_seq) {
|
||||
+ if (prb_read_valid_info(prb, iter->cur_seq, &info, NULL)) {
|
||||
+ if (info.seq != iter->cur_seq) {
|
||||
/* messages are gone, move to first available one */
|
||||
- dumper->cur_seq = info.seq;
|
||||
+ iter->cur_seq = info.seq;
|
||||
}
|
||||
}
|
||||
|
||||
/* last entry */
|
||||
- if (dumper->cur_seq >= dumper->next_seq) {
|
||||
+ if (iter->cur_seq >= iter->next_seq) {
|
||||
logbuf_unlock_irqrestore(flags);
|
||||
goto out;
|
||||
}
|
||||
@@ -3565,7 +3562,7 @@ bool kmsg_dump_get_buffer(struct kmsg_du
|
||||
* because this function (by way of record_print_text()) will
|
||||
* not write more than size-1 bytes of text into @buf.
|
||||
*/
|
||||
- seq = find_first_fitting_seq(dumper->cur_seq, dumper->next_seq,
|
||||
+ seq = find_first_fitting_seq(iter->cur_seq, iter->next_seq,
|
||||
size - 1, syslog, time);
|
||||
|
||||
/*
|
||||
@@ -3578,7 +3575,7 @@ bool kmsg_dump_get_buffer(struct kmsg_du
|
||||
|
||||
len = 0;
|
||||
prb_for_each_record(seq, prb, seq, &r) {
|
||||
- if (r.info->seq >= dumper->next_seq)
|
||||
+ if (r.info->seq >= iter->next_seq)
|
||||
break;
|
||||
|
||||
len += record_print_text(&r, syslog, time);
|
||||
@@ -3587,7 +3584,7 @@ bool kmsg_dump_get_buffer(struct kmsg_du
|
||||
prb_rec_init_rd(&r, &info, buf + len, size - len);
|
||||
}
|
||||
|
||||
- dumper->next_seq = next_seq;
|
||||
+ iter->next_seq = next_seq;
|
||||
ret = true;
|
||||
logbuf_unlock_irqrestore(flags);
|
||||
out:
|
||||
@@ -3599,7 +3596,7 @@ EXPORT_SYMBOL_GPL(kmsg_dump_get_buffer);
|
||||
|
||||
/**
|
||||
* kmsg_dump_rewind_nolock - reset the iterator (unlocked version)
|
||||
- * @dumper: registered kmsg dumper
|
||||
+ * @iter: kmsg dumper iterator
|
||||
*
|
||||
* Reset the dumper's iterator so that kmsg_dump_get_line() and
|
||||
* kmsg_dump_get_buffer() can be called again and used multiple
|
||||
@@ -3607,26 +3604,26 @@ EXPORT_SYMBOL_GPL(kmsg_dump_get_buffer);
|
||||
*
|
||||
* The function is similar to kmsg_dump_rewind(), but grabs no locks.
|
||||
*/
|
||||
-void kmsg_dump_rewind_nolock(struct kmsg_dumper *dumper)
|
||||
+void kmsg_dump_rewind_nolock(struct kmsg_dumper_iter *iter)
|
||||
{
|
||||
- dumper->cur_seq = latched_seq_read_nolock(&clear_seq);
|
||||
- dumper->next_seq = prb_next_seq(prb);
|
||||
+ iter->cur_seq = latched_seq_read_nolock(&clear_seq);
|
||||
+ iter->next_seq = prb_next_seq(prb);
|
||||
}
|
||||
|
||||
/**
|
||||
* kmsg_dump_rewind - reset the iterator
|
||||
- * @dumper: registered kmsg dumper
|
||||
+ * @iter: kmsg dumper iterator
|
||||
*
|
||||
* Reset the dumper's iterator so that kmsg_dump_get_line() and
|
||||
* kmsg_dump_get_buffer() can be called again and used multiple
|
||||
* times within the same dumper.dump() callback.
|
||||
*/
|
||||
-void kmsg_dump_rewind(struct kmsg_dumper *dumper)
|
||||
+void kmsg_dump_rewind(struct kmsg_dumper_iter *iter)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
logbuf_lock_irqsave(flags);
|
||||
- kmsg_dump_rewind_nolock(dumper);
|
||||
+ kmsg_dump_rewind_nolock(iter);
|
||||
logbuf_unlock_irqrestore(flags);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kmsg_dump_rewind);
|
||||
@@ -0,0 +1,54 @@
|
||||
From: John Ogness <john.ogness@linutronix.de>
|
||||
Date: Mon, 21 Dec 2020 11:10:03 +0106
|
||||
Subject: [PATCH 11/28] um: synchronize kmsg_dumper
|
||||
|
||||
The kmsg_dumper can be called from any context and CPU, possibly
|
||||
from multiple CPUs simultaneously. Since a static buffer is used
|
||||
to retrieve the kernel logs, this buffer must be protected against
|
||||
simultaneous dumping.
|
||||
|
||||
Cc: Richard Weinberger <richard@nod.at>
|
||||
Signed-off-by: John Ogness <john.ogness@linutronix.de>
|
||||
Reviewed-by: Petr Mladek <pmladek@suse.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
arch/um/kernel/kmsg_dump.c | 8 ++++++++
|
||||
1 file changed, 8 insertions(+)
|
||||
|
||||
--- a/arch/um/kernel/kmsg_dump.c
|
||||
+++ b/arch/um/kernel/kmsg_dump.c
|
||||
@@ -1,5 +1,6 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
#include <linux/kmsg_dump.h>
|
||||
+#include <linux/spinlock.h>
|
||||
#include <linux/console.h>
|
||||
#include <linux/string.h>
|
||||
#include <shared/init.h>
|
||||
@@ -10,8 +11,10 @@ static void kmsg_dumper_stdout(struct km
|
||||
enum kmsg_dump_reason reason,
|
||||
struct kmsg_dumper_iter *iter)
|
||||
{
|
||||
+ static DEFINE_SPINLOCK(lock);
|
||||
static char line[1024];
|
||||
struct console *con;
|
||||
+ unsigned long flags;
|
||||
size_t len = 0;
|
||||
|
||||
/* only dump kmsg when no console is available */
|
||||
@@ -30,11 +33,16 @@ static void kmsg_dumper_stdout(struct km
|
||||
if (con)
|
||||
return;
|
||||
|
||||
+ if (!spin_trylock_irqsave(&lock, flags))
|
||||
+ return;
|
||||
+
|
||||
printf("kmsg_dump:\n");
|
||||
while (kmsg_dump_get_line(iter, true, line, sizeof(line), &len)) {
|
||||
line[len] = '\0';
|
||||
printf("%s", line);
|
||||
}
|
||||
+
|
||||
+ spin_unlock_irqrestore(&lock, flags);
|
||||
}
|
||||
|
||||
static struct kmsg_dumper kmsg_dumper = {
|
||||
@@ -0,0 +1,475 @@
|
||||
From: John Ogness <john.ogness@linutronix.de>
|
||||
Date: Tue, 26 Jan 2021 17:43:19 +0106
|
||||
Subject: [PATCH 12/28] printk: remove logbuf_lock
|
||||
|
||||
Since the ringbuffer is lockless, there is no need for it to be
|
||||
protected by @logbuf_lock. Remove @logbuf_lock.
|
||||
|
||||
This means that printk_nmi_direct and printk_safe_flush_on_panic()
|
||||
no longer need to acquire any lock to run.
|
||||
|
||||
@console_seq, @exclusive_console_stop_seq, @console_dropped are
|
||||
protected by @console_lock.
|
||||
|
||||
Signed-off-by: John Ogness <john.ogness@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
kernel/printk/internal.h | 4 -
|
||||
kernel/printk/printk.c | 118 ++++++++++++++------------------------------
|
||||
kernel/printk/printk_safe.c | 29 ++--------
|
||||
3 files changed, 48 insertions(+), 103 deletions(-)
|
||||
|
||||
--- a/kernel/printk/internal.h
|
||||
+++ b/kernel/printk/internal.h
|
||||
@@ -12,8 +12,6 @@
|
||||
|
||||
#define PRINTK_NMI_CONTEXT_OFFSET 0x010000000
|
||||
|
||||
-extern raw_spinlock_t logbuf_lock;
|
||||
-
|
||||
__printf(4, 0)
|
||||
int vprintk_store(int facility, int level,
|
||||
const struct dev_printk_info *dev_info,
|
||||
@@ -59,7 +57,7 @@ void defer_console_output(void);
|
||||
__printf(1, 0) int vprintk_func(const char *fmt, va_list args) { return 0; }
|
||||
|
||||
/*
|
||||
- * In !PRINTK builds we still export logbuf_lock spin_lock, console_sem
|
||||
+ * In !PRINTK builds we still export console_sem
|
||||
* semaphore and some of console functions (console_unlock()/etc.), so
|
||||
* printk-safe must preserve the existing local IRQ guarantees.
|
||||
*/
|
||||
--- a/kernel/printk/printk.c
|
||||
+++ b/kernel/printk/printk.c
|
||||
@@ -355,41 +355,6 @@ enum log_flags {
|
||||
LOG_CONT = 8, /* text is a fragment of a continuation line */
|
||||
};
|
||||
|
||||
-/*
|
||||
- * The logbuf_lock protects kmsg buffer, indices, counters. This can be taken
|
||||
- * within the scheduler's rq lock. It must be released before calling
|
||||
- * console_unlock() or anything else that might wake up a process.
|
||||
- */
|
||||
-DEFINE_RAW_SPINLOCK(logbuf_lock);
|
||||
-
|
||||
-/*
|
||||
- * Helper macros to lock/unlock logbuf_lock and switch between
|
||||
- * printk-safe/unsafe modes.
|
||||
- */
|
||||
-#define logbuf_lock_irq() \
|
||||
- do { \
|
||||
- printk_safe_enter_irq(); \
|
||||
- raw_spin_lock(&logbuf_lock); \
|
||||
- } while (0)
|
||||
-
|
||||
-#define logbuf_unlock_irq() \
|
||||
- do { \
|
||||
- raw_spin_unlock(&logbuf_lock); \
|
||||
- printk_safe_exit_irq(); \
|
||||
- } while (0)
|
||||
-
|
||||
-#define logbuf_lock_irqsave(flags) \
|
||||
- do { \
|
||||
- printk_safe_enter_irqsave(flags); \
|
||||
- raw_spin_lock(&logbuf_lock); \
|
||||
- } while (0)
|
||||
-
|
||||
-#define logbuf_unlock_irqrestore(flags) \
|
||||
- do { \
|
||||
- raw_spin_unlock(&logbuf_lock); \
|
||||
- printk_safe_exit_irqrestore(flags); \
|
||||
- } while (0)
|
||||
-
|
||||
/* syslog_lock protects syslog_* variables and write access to clear_seq. */
|
||||
static DEFINE_RAW_SPINLOCK(syslog_lock);
|
||||
|
||||
@@ -401,6 +366,7 @@ static u64 syslog_seq;
|
||||
static size_t syslog_partial;
|
||||
static bool syslog_time;
|
||||
|
||||
+/* All 3 protected by @console_sem. */
|
||||
/* the next printk record to write to the console */
|
||||
static u64 console_seq;
|
||||
static u64 exclusive_console_stop_seq;
|
||||
@@ -766,27 +732,27 @@ static ssize_t devkmsg_read(struct file
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
- logbuf_lock_irq();
|
||||
+ printk_safe_enter_irq();
|
||||
if (!prb_read_valid(prb, atomic64_read(&user->seq), r)) {
|
||||
if (file->f_flags & O_NONBLOCK) {
|
||||
ret = -EAGAIN;
|
||||
- logbuf_unlock_irq();
|
||||
+ printk_safe_exit_irq();
|
||||
goto out;
|
||||
}
|
||||
|
||||
- logbuf_unlock_irq();
|
||||
+ printk_safe_exit_irq();
|
||||
ret = wait_event_interruptible(log_wait,
|
||||
prb_read_valid(prb, atomic64_read(&user->seq), r));
|
||||
if (ret)
|
||||
goto out;
|
||||
- logbuf_lock_irq();
|
||||
+ printk_safe_enter_irq();
|
||||
}
|
||||
|
||||
if (r->info->seq != atomic64_read(&user->seq)) {
|
||||
/* our last seen message is gone, return error and reset */
|
||||
atomic64_set(&user->seq, r->info->seq);
|
||||
ret = -EPIPE;
|
||||
- logbuf_unlock_irq();
|
||||
+ printk_safe_exit_irq();
|
||||
goto out;
|
||||
}
|
||||
|
||||
@@ -796,7 +762,7 @@ static ssize_t devkmsg_read(struct file
|
||||
&r->info->dev_info);
|
||||
|
||||
atomic64_set(&user->seq, r->info->seq + 1);
|
||||
- logbuf_unlock_irq();
|
||||
+ printk_safe_exit_irq();
|
||||
|
||||
if (len > count) {
|
||||
ret = -EINVAL;
|
||||
@@ -831,7 +797,7 @@ static loff_t devkmsg_llseek(struct file
|
||||
if (offset)
|
||||
return -ESPIPE;
|
||||
|
||||
- logbuf_lock_irq();
|
||||
+ printk_safe_enter_irq();
|
||||
switch (whence) {
|
||||
case SEEK_SET:
|
||||
/* the first record */
|
||||
@@ -852,7 +818,7 @@ static loff_t devkmsg_llseek(struct file
|
||||
default:
|
||||
ret = -EINVAL;
|
||||
}
|
||||
- logbuf_unlock_irq();
|
||||
+ printk_safe_exit_irq();
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -867,15 +833,15 @@ static __poll_t devkmsg_poll(struct file
|
||||
|
||||
poll_wait(file, &log_wait, wait);
|
||||
|
||||
- logbuf_lock_irq();
|
||||
- if (prb_read_valid_info(prb, user->seq, &info, NULL)) {
|
||||
+ printk_safe_enter_irq();
|
||||
+ if (prb_read_valid_info(prb, atomic64_read(&user->seq), &info, NULL)) {
|
||||
/* return error when data has vanished underneath us */
|
||||
if (info.seq != atomic64_read(&user->seq))
|
||||
ret = EPOLLIN|EPOLLRDNORM|EPOLLERR|EPOLLPRI;
|
||||
else
|
||||
ret = EPOLLIN|EPOLLRDNORM;
|
||||
}
|
||||
- logbuf_unlock_irq();
|
||||
+ printk_safe_exit_irq();
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -908,9 +874,9 @@ static int devkmsg_open(struct inode *in
|
||||
prb_rec_init_rd(&user->record, &user->info,
|
||||
&user->text_buf[0], sizeof(user->text_buf));
|
||||
|
||||
- logbuf_lock_irq();
|
||||
+ printk_safe_enter_irq();
|
||||
atomic64_set(&user->seq, prb_first_valid_seq(prb));
|
||||
- logbuf_unlock_irq();
|
||||
+ printk_safe_exit_irq();
|
||||
|
||||
file->private_data = user;
|
||||
return 0;
|
||||
@@ -1532,11 +1498,11 @@ static int syslog_print(char __user *buf
|
||||
size_t n;
|
||||
size_t skip;
|
||||
|
||||
- logbuf_lock_irq();
|
||||
+ printk_safe_enter_irq();
|
||||
raw_spin_lock(&syslog_lock);
|
||||
if (!prb_read_valid(prb, syslog_seq, &r)) {
|
||||
raw_spin_unlock(&syslog_lock);
|
||||
- logbuf_unlock_irq();
|
||||
+ printk_safe_exit_irq();
|
||||
break;
|
||||
}
|
||||
if (r.info->seq != syslog_seq) {
|
||||
@@ -1566,7 +1532,7 @@ static int syslog_print(char __user *buf
|
||||
} else
|
||||
n = 0;
|
||||
raw_spin_unlock(&syslog_lock);
|
||||
- logbuf_unlock_irq();
|
||||
+ printk_safe_exit_irq();
|
||||
|
||||
if (!n)
|
||||
break;
|
||||
@@ -1600,7 +1566,7 @@ static int syslog_print_all(char __user
|
||||
return -ENOMEM;
|
||||
|
||||
time = printk_time;
|
||||
- logbuf_lock_irq();
|
||||
+ printk_safe_enter_irq();
|
||||
/*
|
||||
* Find first record that fits, including all following records,
|
||||
* into the user-provided buffer for this dump.
|
||||
@@ -1621,12 +1587,12 @@ static int syslog_print_all(char __user
|
||||
break;
|
||||
}
|
||||
|
||||
- logbuf_unlock_irq();
|
||||
+ printk_safe_exit_irq();
|
||||
if (copy_to_user(buf + len, text, textlen))
|
||||
len = -EFAULT;
|
||||
else
|
||||
len += textlen;
|
||||
- logbuf_lock_irq();
|
||||
+ printk_safe_enter_irq();
|
||||
|
||||
if (len < 0)
|
||||
break;
|
||||
@@ -1637,7 +1603,7 @@ static int syslog_print_all(char __user
|
||||
latched_seq_write(&clear_seq, seq);
|
||||
raw_spin_unlock(&syslog_lock);
|
||||
}
|
||||
- logbuf_unlock_irq();
|
||||
+ printk_safe_exit_irq();
|
||||
|
||||
kfree(text);
|
||||
return len;
|
||||
@@ -1645,11 +1611,11 @@ static int syslog_print_all(char __user
|
||||
|
||||
static void syslog_clear(void)
|
||||
{
|
||||
- logbuf_lock_irq();
|
||||
+ printk_safe_enter_irq();
|
||||
raw_spin_lock(&syslog_lock);
|
||||
latched_seq_write(&clear_seq, prb_next_seq(prb));
|
||||
raw_spin_unlock(&syslog_lock);
|
||||
- logbuf_unlock_irq();
|
||||
+ printk_safe_exit_irq();
|
||||
}
|
||||
|
||||
/* Return a consistent copy of @syslog_seq. */
|
||||
@@ -1737,12 +1703,12 @@ int do_syslog(int type, char __user *buf
|
||||
break;
|
||||
/* Number of chars in the log buffer */
|
||||
case SYSLOG_ACTION_SIZE_UNREAD:
|
||||
- logbuf_lock_irq();
|
||||
+ printk_safe_enter_irq();
|
||||
raw_spin_lock(&syslog_lock);
|
||||
if (!prb_read_valid_info(prb, syslog_seq, &info, NULL)) {
|
||||
/* No unread messages. */
|
||||
raw_spin_unlock(&syslog_lock);
|
||||
- logbuf_unlock_irq();
|
||||
+ printk_safe_exit_irq();
|
||||
return 0;
|
||||
}
|
||||
if (info.seq != syslog_seq) {
|
||||
@@ -1771,7 +1737,7 @@ int do_syslog(int type, char __user *buf
|
||||
error -= syslog_partial;
|
||||
}
|
||||
raw_spin_unlock(&syslog_lock);
|
||||
- logbuf_unlock_irq();
|
||||
+ printk_safe_exit_irq();
|
||||
break;
|
||||
/* Size of the log buffer */
|
||||
case SYSLOG_ACTION_SIZE_BUFFER:
|
||||
@@ -2627,7 +2593,6 @@ void console_unlock(void)
|
||||
size_t len;
|
||||
|
||||
printk_safe_enter_irqsave(flags);
|
||||
- raw_spin_lock(&logbuf_lock);
|
||||
skip:
|
||||
if (!prb_read_valid(prb, console_seq, &r))
|
||||
break;
|
||||
@@ -2671,7 +2636,6 @@ void console_unlock(void)
|
||||
console_msg_format & MSG_FORMAT_SYSLOG,
|
||||
printk_time);
|
||||
console_seq++;
|
||||
- raw_spin_unlock(&logbuf_lock);
|
||||
|
||||
/*
|
||||
* While actively printing out messages, if another printk()
|
||||
@@ -2698,8 +2662,6 @@ void console_unlock(void)
|
||||
|
||||
console_locked = 0;
|
||||
|
||||
- raw_spin_unlock(&logbuf_lock);
|
||||
-
|
||||
up_console_sem();
|
||||
|
||||
/*
|
||||
@@ -2708,9 +2670,7 @@ void console_unlock(void)
|
||||
* there's a new owner and the console_unlock() from them will do the
|
||||
* flush, no worries.
|
||||
*/
|
||||
- raw_spin_lock(&logbuf_lock);
|
||||
retry = prb_read_valid(prb, console_seq, NULL);
|
||||
- raw_spin_unlock(&logbuf_lock);
|
||||
printk_safe_exit_irqrestore(flags);
|
||||
|
||||
if (retry && console_trylock())
|
||||
@@ -2777,9 +2737,9 @@ void console_flush_on_panic(enum con_flu
|
||||
if (mode == CONSOLE_REPLAY_ALL) {
|
||||
unsigned long flags;
|
||||
|
||||
- logbuf_lock_irqsave(flags);
|
||||
+ printk_safe_enter_irqsave(flags);
|
||||
console_seq = prb_first_valid_seq(prb);
|
||||
- logbuf_unlock_irqrestore(flags);
|
||||
+ printk_safe_exit_irqrestore(flags);
|
||||
}
|
||||
console_unlock();
|
||||
}
|
||||
@@ -3008,7 +2968,7 @@ void register_console(struct console *ne
|
||||
* console_unlock(); will print out the buffered messages
|
||||
* for us.
|
||||
*/
|
||||
- logbuf_lock_irqsave(flags);
|
||||
+ printk_safe_enter_irqsave(flags);
|
||||
/*
|
||||
* We're about to replay the log buffer. Only do this to the
|
||||
* just-registered console to avoid excessive message spam to
|
||||
@@ -3026,7 +2986,7 @@ void register_console(struct console *ne
|
||||
console_seq = syslog_seq;
|
||||
raw_spin_unlock(&syslog_lock);
|
||||
|
||||
- logbuf_unlock_irqrestore(flags);
|
||||
+ printk_safe_exit_irqrestore(flags);
|
||||
}
|
||||
console_unlock();
|
||||
console_sysfs_notify();
|
||||
@@ -3410,10 +3370,10 @@ void kmsg_dump(enum kmsg_dump_reason rea
|
||||
|
||||
/* initialize iterator with data about the stored records */
|
||||
iter.active = true;
|
||||
- logbuf_lock_irqsave(flags);
|
||||
+ printk_safe_enter_irqsave(flags);
|
||||
iter.cur_seq = latched_seq_read_nolock(&clear_seq);
|
||||
iter.next_seq = prb_next_seq(prb);
|
||||
- logbuf_unlock_irqrestore(flags);
|
||||
+ printk_safe_exit_irqrestore(flags);
|
||||
|
||||
/* invoke dumper which will iterate over records */
|
||||
dumper->dump(dumper, reason, &iter);
|
||||
@@ -3500,9 +3460,9 @@ bool kmsg_dump_get_line(struct kmsg_dump
|
||||
unsigned long flags;
|
||||
bool ret;
|
||||
|
||||
- logbuf_lock_irqsave(flags);
|
||||
+ printk_safe_enter_irqsave(flags);
|
||||
ret = kmsg_dump_get_line_nolock(iter, syslog, line, size, len);
|
||||
- logbuf_unlock_irqrestore(flags);
|
||||
+ printk_safe_exit_irqrestore(flags);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -3542,7 +3502,7 @@ bool kmsg_dump_get_buffer(struct kmsg_du
|
||||
if (!iter->active || !buf || !size)
|
||||
goto out;
|
||||
|
||||
- logbuf_lock_irqsave(flags);
|
||||
+ printk_safe_enter_irqsave(flags);
|
||||
if (prb_read_valid_info(prb, iter->cur_seq, &info, NULL)) {
|
||||
if (info.seq != iter->cur_seq) {
|
||||
/* messages are gone, move to first available one */
|
||||
@@ -3552,7 +3512,7 @@ bool kmsg_dump_get_buffer(struct kmsg_du
|
||||
|
||||
/* last entry */
|
||||
if (iter->cur_seq >= iter->next_seq) {
|
||||
- logbuf_unlock_irqrestore(flags);
|
||||
+ printk_safe_exit_irqrestore(flags);
|
||||
goto out;
|
||||
}
|
||||
|
||||
@@ -3586,7 +3546,7 @@ bool kmsg_dump_get_buffer(struct kmsg_du
|
||||
|
||||
iter->next_seq = next_seq;
|
||||
ret = true;
|
||||
- logbuf_unlock_irqrestore(flags);
|
||||
+ printk_safe_exit_irqrestore(flags);
|
||||
out:
|
||||
if (len_out)
|
||||
*len_out = len;
|
||||
@@ -3622,9 +3582,9 @@ void kmsg_dump_rewind(struct kmsg_dumper
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
- logbuf_lock_irqsave(flags);
|
||||
+ printk_safe_enter_irqsave(flags);
|
||||
kmsg_dump_rewind_nolock(iter);
|
||||
- logbuf_unlock_irqrestore(flags);
|
||||
+ printk_safe_exit_irqrestore(flags);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kmsg_dump_rewind);
|
||||
|
||||
--- a/kernel/printk/printk_safe.c
|
||||
+++ b/kernel/printk/printk_safe.c
|
||||
@@ -16,7 +16,7 @@
|
||||
#include "internal.h"
|
||||
|
||||
/*
|
||||
- * printk() could not take logbuf_lock in NMI context. Instead,
|
||||
+ * In NMI and safe mode, printk() avoids taking locks. Instead,
|
||||
* it uses an alternative implementation that temporary stores
|
||||
* the strings into a per-CPU buffer. The content of the buffer
|
||||
* is later flushed into the main ring buffer via IRQ work.
|
||||
@@ -266,18 +266,6 @@ void printk_safe_flush(void)
|
||||
*/
|
||||
void printk_safe_flush_on_panic(void)
|
||||
{
|
||||
- /*
|
||||
- * Make sure that we could access the main ring buffer.
|
||||
- * Do not risk a double release when more CPUs are up.
|
||||
- */
|
||||
- if (raw_spin_is_locked(&logbuf_lock)) {
|
||||
- if (num_online_cpus() > 1)
|
||||
- return;
|
||||
-
|
||||
- debug_locks_off();
|
||||
- raw_spin_lock_init(&logbuf_lock);
|
||||
- }
|
||||
-
|
||||
if (raw_spin_is_locked(&safe_read_lock)) {
|
||||
if (num_online_cpus() > 1)
|
||||
return;
|
||||
@@ -319,9 +307,7 @@ void noinstr printk_nmi_exit(void)
|
||||
* reordering.
|
||||
*
|
||||
* It has effect only when called in NMI context. Then printk()
|
||||
- * will try to store the messages into the main logbuf directly
|
||||
- * and use the per-CPU buffers only as a fallback when the lock
|
||||
- * is not available.
|
||||
+ * will store the messages into the main logbuf directly.
|
||||
*/
|
||||
void printk_nmi_direct_enter(void)
|
||||
{
|
||||
@@ -376,20 +362,21 @@ void __printk_safe_exit(void)
|
||||
#endif
|
||||
|
||||
/*
|
||||
- * Try to use the main logbuf even in NMI. But avoid calling console
|
||||
+ * Use the main logbuf even in NMI. But avoid calling console
|
||||
* drivers that might have their own locks.
|
||||
*/
|
||||
- if ((this_cpu_read(printk_context) & PRINTK_NMI_DIRECT_CONTEXT_MASK) &&
|
||||
- raw_spin_trylock(&logbuf_lock)) {
|
||||
+ if ((this_cpu_read(printk_context) & PRINTK_NMI_DIRECT_CONTEXT_MASK)) {
|
||||
+ unsigned long flags;
|
||||
int len;
|
||||
|
||||
+ printk_safe_enter_irqsave(flags);
|
||||
len = vprintk_store(0, LOGLEVEL_DEFAULT, NULL, fmt, args);
|
||||
- raw_spin_unlock(&logbuf_lock);
|
||||
+ printk_safe_exit_irqrestore(flags);
|
||||
defer_console_output();
|
||||
return len;
|
||||
}
|
||||
|
||||
- /* Use extra buffer in NMI when logbuf_lock is taken or in safe mode. */
|
||||
+ /* Use extra buffer in NMI. */
|
||||
if (this_cpu_read(printk_context) & PRINTK_NMI_CONTEXT_MASK)
|
||||
return vprintk_nmi(fmt, args);
|
||||
|
||||
@@ -0,0 +1,213 @@
|
||||
From: John Ogness <john.ogness@linutronix.de>
|
||||
Date: Mon, 21 Dec 2020 10:27:58 +0106
|
||||
Subject: [PATCH 13/28] printk: kmsg_dump: remove _nolock() variants
|
||||
|
||||
kmsg_dump_rewind() and kmsg_dump_get_line() are lockless, so there is
|
||||
no need for _nolock() variants. Remove these functions and switch all
|
||||
callers of the _nolock() variants.
|
||||
|
||||
The functions without _nolock() were chosen because they are already
|
||||
exported to kernel modules.
|
||||
|
||||
Signed-off-by: John Ogness <john.ogness@linutronix.de>
|
||||
---
|
||||
arch/powerpc/xmon/xmon.c | 4 +-
|
||||
include/linux/kmsg_dump.h | 18 -------------
|
||||
kernel/debug/kdb/kdb_main.c | 8 ++---
|
||||
kernel/printk/printk.c | 60 +++++---------------------------------------
|
||||
4 files changed, 15 insertions(+), 75 deletions(-)
|
||||
|
||||
--- a/arch/powerpc/xmon/xmon.c
|
||||
+++ b/arch/powerpc/xmon/xmon.c
|
||||
@@ -3017,9 +3017,9 @@ dump_log_buf(void)
|
||||
catch_memory_errors = 1;
|
||||
sync();
|
||||
|
||||
- kmsg_dump_rewind_nolock(&iter);
|
||||
+ kmsg_dump_rewind(&iter);
|
||||
xmon_start_pagination();
|
||||
- while (kmsg_dump_get_line_nolock(&iter, false, buf, sizeof(buf), &len)) {
|
||||
+ while (kmsg_dump_get_line(&iter, false, buf, sizeof(buf), &len)) {
|
||||
buf[len] = '\0';
|
||||
printf("%s", buf);
|
||||
}
|
||||
--- a/include/linux/kmsg_dump.h
|
||||
+++ b/include/linux/kmsg_dump.h
|
||||
@@ -60,18 +60,13 @@ struct kmsg_dumper {
|
||||
#ifdef CONFIG_PRINTK
|
||||
void kmsg_dump(enum kmsg_dump_reason reason);
|
||||
|
||||
-bool kmsg_dump_get_line_nolock(struct kmsg_dumper_iter *iter, bool syslog,
|
||||
- char *line, size_t size, size_t *len);
|
||||
-
|
||||
bool kmsg_dump_get_line(struct kmsg_dumper_iter *iter, bool syslog,
|
||||
char *line, size_t size, size_t *len);
|
||||
|
||||
bool kmsg_dump_get_buffer(struct kmsg_dumper_iter *iter, bool syslog,
|
||||
char *buf, size_t size, size_t *len_out);
|
||||
|
||||
-void kmsg_dump_rewind_nolock(struct kmsg_dumper_iter *iter);
|
||||
-
|
||||
-void kmsg_dump_rewind(struct kmsg_dumper_iter *dumper_iter);
|
||||
+void kmsg_dump_rewind(struct kmsg_dumper_iter *iter);
|
||||
|
||||
int kmsg_dump_register(struct kmsg_dumper *dumper);
|
||||
|
||||
@@ -83,13 +78,6 @@ static inline void kmsg_dump(enum kmsg_d
|
||||
{
|
||||
}
|
||||
|
||||
-static inline bool kmsg_dump_get_line_nolock(struct kmsg_dumper_iter *iter,
|
||||
- bool syslog, const char *line,
|
||||
- size_t size, size_t *len)
|
||||
-{
|
||||
- return false;
|
||||
-}
|
||||
-
|
||||
static inline bool kmsg_dump_get_line(struct kmsg_dumper_iter *iter, bool syslog,
|
||||
const char *line, size_t size, size_t *len)
|
||||
{
|
||||
@@ -102,10 +90,6 @@ static inline bool kmsg_dump_get_buffer(
|
||||
return false;
|
||||
}
|
||||
|
||||
-static inline void kmsg_dump_rewind_nolock(struct kmsg_dumper_iter *iter)
|
||||
-{
|
||||
-}
|
||||
-
|
||||
static inline void kmsg_dump_rewind(struct kmsg_dumper_iter *iter)
|
||||
{
|
||||
}
|
||||
--- a/kernel/debug/kdb/kdb_main.c
|
||||
+++ b/kernel/debug/kdb/kdb_main.c
|
||||
@@ -2126,8 +2126,8 @@ static int kdb_dmesg(int argc, const cha
|
||||
kdb_set(2, setargs);
|
||||
}
|
||||
|
||||
- kmsg_dump_rewind_nolock(&iter);
|
||||
- while (kmsg_dump_get_line_nolock(&iter, 1, NULL, 0, NULL))
|
||||
+ kmsg_dump_rewind(&iter);
|
||||
+ while (kmsg_dump_get_line(&iter, 1, NULL, 0, NULL))
|
||||
n++;
|
||||
|
||||
if (lines < 0) {
|
||||
@@ -2159,8 +2159,8 @@ static int kdb_dmesg(int argc, const cha
|
||||
if (skip >= n || skip < 0)
|
||||
return 0;
|
||||
|
||||
- kmsg_dump_rewind_nolock(&iter);
|
||||
- while (kmsg_dump_get_line_nolock(&iter, 1, buf, sizeof(buf), &len)) {
|
||||
+ kmsg_dump_rewind(&iter);
|
||||
+ while (kmsg_dump_get_line(&iter, 1, buf, sizeof(buf), &len)) {
|
||||
if (skip) {
|
||||
skip--;
|
||||
continue;
|
||||
--- a/kernel/printk/printk.c
|
||||
+++ b/kernel/printk/printk.c
|
||||
@@ -3382,7 +3382,7 @@ void kmsg_dump(enum kmsg_dump_reason rea
|
||||
}
|
||||
|
||||
/**
|
||||
- * kmsg_dump_get_line_nolock - retrieve one kmsg log line (unlocked version)
|
||||
+ * kmsg_dump_get_line - retrieve one kmsg log line
|
||||
* @iter: kmsg dumper iterator
|
||||
* @syslog: include the "<4>" prefixes
|
||||
* @line: buffer to copy the line to
|
||||
@@ -3397,18 +3397,18 @@ void kmsg_dump(enum kmsg_dump_reason rea
|
||||
*
|
||||
* A return value of FALSE indicates that there are no more records to
|
||||
* read.
|
||||
- *
|
||||
- * The function is similar to kmsg_dump_get_line(), but grabs no locks.
|
||||
*/
|
||||
-bool kmsg_dump_get_line_nolock(struct kmsg_dumper_iter *iter, bool syslog,
|
||||
- char *line, size_t size, size_t *len)
|
||||
+bool kmsg_dump_get_line(struct kmsg_dumper_iter *iter, bool syslog,
|
||||
+ char *line, size_t size, size_t *len)
|
||||
{
|
||||
struct printk_info info;
|
||||
unsigned int line_count;
|
||||
struct printk_record r;
|
||||
+ unsigned long flags;
|
||||
size_t l = 0;
|
||||
bool ret = false;
|
||||
|
||||
+ printk_safe_enter_irqsave(flags);
|
||||
prb_rec_init_rd(&r, &info, line, size);
|
||||
|
||||
if (!iter->active)
|
||||
@@ -3432,40 +3432,11 @@ bool kmsg_dump_get_line_nolock(struct km
|
||||
iter->cur_seq = r.info->seq + 1;
|
||||
ret = true;
|
||||
out:
|
||||
+ printk_safe_exit_irqrestore(flags);
|
||||
if (len)
|
||||
*len = l;
|
||||
return ret;
|
||||
}
|
||||
-
|
||||
-/**
|
||||
- * kmsg_dump_get_line - retrieve one kmsg log line
|
||||
- * @iter: kmsg dumper iterator
|
||||
- * @syslog: include the "<4>" prefixes
|
||||
- * @line: buffer to copy the line to
|
||||
- * @size: maximum size of the buffer
|
||||
- * @len: length of line placed into buffer
|
||||
- *
|
||||
- * Start at the beginning of the kmsg buffer, with the oldest kmsg
|
||||
- * record, and copy one record into the provided buffer.
|
||||
- *
|
||||
- * Consecutive calls will return the next available record moving
|
||||
- * towards the end of the buffer with the youngest messages.
|
||||
- *
|
||||
- * A return value of FALSE indicates that there are no more records to
|
||||
- * read.
|
||||
- */
|
||||
-bool kmsg_dump_get_line(struct kmsg_dumper_iter *iter, bool syslog,
|
||||
- char *line, size_t size, size_t *len)
|
||||
-{
|
||||
- unsigned long flags;
|
||||
- bool ret;
|
||||
-
|
||||
- printk_safe_enter_irqsave(flags);
|
||||
- ret = kmsg_dump_get_line_nolock(iter, syslog, line, size, len);
|
||||
- printk_safe_exit_irqrestore(flags);
|
||||
-
|
||||
- return ret;
|
||||
-}
|
||||
EXPORT_SYMBOL_GPL(kmsg_dump_get_line);
|
||||
|
||||
/**
|
||||
@@ -3555,22 +3526,6 @@ bool kmsg_dump_get_buffer(struct kmsg_du
|
||||
EXPORT_SYMBOL_GPL(kmsg_dump_get_buffer);
|
||||
|
||||
/**
|
||||
- * kmsg_dump_rewind_nolock - reset the iterator (unlocked version)
|
||||
- * @iter: kmsg dumper iterator
|
||||
- *
|
||||
- * Reset the dumper's iterator so that kmsg_dump_get_line() and
|
||||
- * kmsg_dump_get_buffer() can be called again and used multiple
|
||||
- * times within the same dumper.dump() callback.
|
||||
- *
|
||||
- * The function is similar to kmsg_dump_rewind(), but grabs no locks.
|
||||
- */
|
||||
-void kmsg_dump_rewind_nolock(struct kmsg_dumper_iter *iter)
|
||||
-{
|
||||
- iter->cur_seq = latched_seq_read_nolock(&clear_seq);
|
||||
- iter->next_seq = prb_next_seq(prb);
|
||||
-}
|
||||
-
|
||||
-/**
|
||||
* kmsg_dump_rewind - reset the iterator
|
||||
* @iter: kmsg dumper iterator
|
||||
*
|
||||
@@ -3583,7 +3538,8 @@ void kmsg_dump_rewind(struct kmsg_dumper
|
||||
unsigned long flags;
|
||||
|
||||
printk_safe_enter_irqsave(flags);
|
||||
- kmsg_dump_rewind_nolock(iter);
|
||||
+ iter->cur_seq = latched_seq_read_nolock(&clear_seq);
|
||||
+ iter->next_seq = prb_next_seq(prb);
|
||||
printk_safe_exit_irqrestore(flags);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kmsg_dump_rewind);
|
||||
@@ -0,0 +1,35 @@
|
||||
From: John Ogness <john.ogness@linutronix.de>
|
||||
Date: Wed, 17 Feb 2021 18:23:16 +0100
|
||||
Subject: [PATCH 14/28] printk: kmsg_dump: use kmsg_dump_rewind
|
||||
|
||||
kmsg_dump() is open coding the kmsg_dump_rewind(). Call
|
||||
kmsg_dump_rewind() instead.
|
||||
|
||||
Signed-off-by: John Ogness <john.ogness@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
kernel/printk/printk.c | 6 +-----
|
||||
1 file changed, 1 insertion(+), 5 deletions(-)
|
||||
|
||||
--- a/kernel/printk/printk.c
|
||||
+++ b/kernel/printk/printk.c
|
||||
@@ -3351,7 +3351,6 @@ void kmsg_dump(enum kmsg_dump_reason rea
|
||||
{
|
||||
struct kmsg_dumper_iter iter;
|
||||
struct kmsg_dumper *dumper;
|
||||
- unsigned long flags;
|
||||
|
||||
rcu_read_lock();
|
||||
list_for_each_entry_rcu(dumper, &dump_list, list) {
|
||||
@@ -3370,10 +3369,7 @@ void kmsg_dump(enum kmsg_dump_reason rea
|
||||
|
||||
/* initialize iterator with data about the stored records */
|
||||
iter.active = true;
|
||||
- printk_safe_enter_irqsave(flags);
|
||||
- iter.cur_seq = latched_seq_read_nolock(&clear_seq);
|
||||
- iter.next_seq = prb_next_seq(prb);
|
||||
- printk_safe_exit_irqrestore(flags);
|
||||
+ kmsg_dump_rewind(&iter);
|
||||
|
||||
/* invoke dumper which will iterate over records */
|
||||
dumper->dump(dumper, reason, &iter);
|
||||
@@ -0,0 +1,41 @@
|
||||
From: John Ogness <john.ogness@linutronix.de>
|
||||
Date: Wed, 17 Feb 2021 18:28:05 +0100
|
||||
Subject: [PATCH 15/28] printk: console: remove unnecessary safe buffer usage
|
||||
|
||||
Upon registering a console, safe buffers are activated when setting
|
||||
up the sequence number to replay the log. However, these are already
|
||||
protected by @console_sem and @syslog_lock. Remove the unnecessary
|
||||
safe buffer usage.
|
||||
|
||||
Signed-off-by: John Ogness <john.ogness@linutronix.de>
|
||||
---
|
||||
kernel/printk/printk.c | 10 +++-------
|
||||
1 file changed, 3 insertions(+), 7 deletions(-)
|
||||
|
||||
--- a/kernel/printk/printk.c
|
||||
+++ b/kernel/printk/printk.c
|
||||
@@ -2967,9 +2967,7 @@ void register_console(struct console *ne
|
||||
/*
|
||||
* console_unlock(); will print out the buffered messages
|
||||
* for us.
|
||||
- */
|
||||
- printk_safe_enter_irqsave(flags);
|
||||
- /*
|
||||
+ *
|
||||
* We're about to replay the log buffer. Only do this to the
|
||||
* just-registered console to avoid excessive message spam to
|
||||
* the already-registered consoles.
|
||||
@@ -2982,11 +2980,9 @@ void register_console(struct console *ne
|
||||
exclusive_console_stop_seq = console_seq;
|
||||
|
||||
/* Get a consistent copy of @syslog_seq. */
|
||||
- raw_spin_lock(&syslog_lock);
|
||||
+ raw_spin_lock_irqsave(&syslog_lock, flags);
|
||||
console_seq = syslog_seq;
|
||||
- raw_spin_unlock(&syslog_lock);
|
||||
-
|
||||
- printk_safe_exit_irqrestore(flags);
|
||||
+ raw_spin_unlock_irqrestore(&syslog_lock, flags);
|
||||
}
|
||||
console_unlock();
|
||||
console_sysfs_notify();
|
||||
@@ -0,0 +1,136 @@
|
||||
From: John Ogness <john.ogness@linutronix.de>
|
||||
Date: Fri, 11 Dec 2020 00:55:25 +0106
|
||||
Subject: [PATCH 16/28] printk: track/limit recursion
|
||||
|
||||
Limit printk() recursion to 1 level. This is enough to print a
|
||||
stacktrace for the printk call, should a WARN or BUG occur.
|
||||
|
||||
Signed-off-by: John Ogness <john.ogness@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
kernel/printk/printk.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++--
|
||||
1 file changed, 71 insertions(+), 3 deletions(-)
|
||||
|
||||
--- a/kernel/printk/printk.c
|
||||
+++ b/kernel/printk/printk.c
|
||||
@@ -1940,6 +1940,65 @@ static void call_console_drivers(const c
|
||||
}
|
||||
}
|
||||
|
||||
+#ifdef CONFIG_PRINTK_NMI
|
||||
+#define NUM_RECURSION_CTX 2
|
||||
+#else
|
||||
+#define NUM_RECURSION_CTX 1
|
||||
+#endif
|
||||
+
|
||||
+struct printk_recursion {
|
||||
+ char count[NUM_RECURSION_CTX];
|
||||
+};
|
||||
+
|
||||
+static DEFINE_PER_CPU(struct printk_recursion, percpu_printk_recursion);
|
||||
+static char printk_recursion_count[NUM_RECURSION_CTX];
|
||||
+
|
||||
+static char *printk_recursion_counter(void)
|
||||
+{
|
||||
+ struct printk_recursion *rec;
|
||||
+ char *count;
|
||||
+
|
||||
+ if (!printk_percpu_data_ready()) {
|
||||
+ count = &printk_recursion_count[0];
|
||||
+ } else {
|
||||
+ rec = this_cpu_ptr(&percpu_printk_recursion);
|
||||
+
|
||||
+ count = &rec->count[0];
|
||||
+ }
|
||||
+
|
||||
+#ifdef CONFIG_PRINTK_NMI
|
||||
+ if (in_nmi())
|
||||
+ count++;
|
||||
+#endif
|
||||
+
|
||||
+ return count;
|
||||
+}
|
||||
+
|
||||
+static bool printk_enter_irqsave(unsigned long *flags)
|
||||
+{
|
||||
+ char *count;
|
||||
+
|
||||
+ local_irq_save(*flags);
|
||||
+ count = printk_recursion_counter();
|
||||
+ /* Only 1 level of recursion allowed. */
|
||||
+ if (*count > 1) {
|
||||
+ local_irq_restore(*flags);
|
||||
+ return false;
|
||||
+ }
|
||||
+ (*count)++;
|
||||
+
|
||||
+ return true;
|
||||
+}
|
||||
+
|
||||
+static void printk_exit_irqrestore(unsigned long flags)
|
||||
+{
|
||||
+ char *count;
|
||||
+
|
||||
+ count = printk_recursion_counter();
|
||||
+ (*count)--;
|
||||
+ local_irq_restore(flags);
|
||||
+}
|
||||
+
|
||||
int printk_delay_msec __read_mostly;
|
||||
|
||||
static inline void printk_delay(void)
|
||||
@@ -2040,11 +2099,13 @@ int vprintk_store(int facility, int leve
|
||||
struct prb_reserved_entry e;
|
||||
enum log_flags lflags = 0;
|
||||
struct printk_record r;
|
||||
+ unsigned long irqflags;
|
||||
u16 trunc_msg_len = 0;
|
||||
char prefix_buf[8];
|
||||
u16 reserve_size;
|
||||
va_list args2;
|
||||
u16 text_len;
|
||||
+ int ret = 0;
|
||||
u64 ts_nsec;
|
||||
|
||||
/*
|
||||
@@ -2055,6 +2116,9 @@ int vprintk_store(int facility, int leve
|
||||
*/
|
||||
ts_nsec = local_clock();
|
||||
|
||||
+ if (!printk_enter_irqsave(&irqflags))
|
||||
+ return 0;
|
||||
+
|
||||
/*
|
||||
* The sprintf needs to come first since the syslog prefix might be
|
||||
* passed in as a parameter. An extra byte must be reserved so that
|
||||
@@ -2092,7 +2156,8 @@ int vprintk_store(int facility, int leve
|
||||
prb_commit(&e);
|
||||
}
|
||||
|
||||
- return text_len;
|
||||
+ ret = text_len;
|
||||
+ goto out;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2108,7 +2173,7 @@ int vprintk_store(int facility, int leve
|
||||
|
||||
prb_rec_init_wr(&r, reserve_size + trunc_msg_len);
|
||||
if (!prb_reserve(&e, prb, &r))
|
||||
- return 0;
|
||||
+ goto out;
|
||||
}
|
||||
|
||||
/* fill message */
|
||||
@@ -2130,7 +2195,10 @@ int vprintk_store(int facility, int leve
|
||||
else
|
||||
prb_final_commit(&e);
|
||||
|
||||
- return (text_len + trunc_msg_len);
|
||||
+ ret = text_len + trunc_msg_len;
|
||||
+out:
|
||||
+ printk_exit_irqrestore(irqflags);
|
||||
+ return ret;
|
||||
}
|
||||
|
||||
asmlinkage int vprintk_emit(int facility, int level,
|
||||
@@ -0,0 +1,854 @@
|
||||
From: John Ogness <john.ogness@linutronix.de>
|
||||
Date: Mon, 30 Nov 2020 01:42:00 +0106
|
||||
Subject: [PATCH 17/28] printk: remove safe buffers
|
||||
|
||||
With @logbuf_lock removed, the high level printk functions for
|
||||
storing messages are lockless. Messages can be stored from any
|
||||
context, so there is no need for the NMI and safe buffers anymore.
|
||||
|
||||
Remove the NMI and safe buffers. In NMI or safe contexts, store
|
||||
the message immediately but still use irq_work to defer the console
|
||||
printing.
|
||||
|
||||
Signed-off-by: John Ogness <john.ogness@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
arch/powerpc/kernel/traps.c | 1
|
||||
arch/powerpc/kernel/watchdog.c | 5
|
||||
include/linux/printk.h | 10 -
|
||||
kernel/kexec_core.c | 1
|
||||
kernel/panic.c | 3
|
||||
kernel/printk/internal.h | 2
|
||||
kernel/printk/printk.c | 85 +---------
|
||||
kernel/printk/printk_safe.c | 329 -----------------------------------------
|
||||
lib/nmi_backtrace.c | 6
|
||||
9 files changed, 17 insertions(+), 425 deletions(-)
|
||||
|
||||
--- a/arch/powerpc/kernel/traps.c
|
||||
+++ b/arch/powerpc/kernel/traps.c
|
||||
@@ -170,7 +170,6 @@ extern void panic_flush_kmsg_start(void)
|
||||
|
||||
extern void panic_flush_kmsg_end(void)
|
||||
{
|
||||
- printk_safe_flush_on_panic();
|
||||
kmsg_dump(KMSG_DUMP_PANIC);
|
||||
bust_spinlocks(0);
|
||||
debug_locks_off();
|
||||
--- a/arch/powerpc/kernel/watchdog.c
|
||||
+++ b/arch/powerpc/kernel/watchdog.c
|
||||
@@ -181,11 +181,6 @@ static void watchdog_smp_panic(int cpu,
|
||||
|
||||
wd_smp_unlock(&flags);
|
||||
|
||||
- printk_safe_flush();
|
||||
- /*
|
||||
- * printk_safe_flush() seems to require another print
|
||||
- * before anything actually goes out to console.
|
||||
- */
|
||||
if (sysctl_hardlockup_all_cpu_backtrace)
|
||||
trigger_allbutself_cpu_backtrace();
|
||||
|
||||
--- a/include/linux/printk.h
|
||||
+++ b/include/linux/printk.h
|
||||
@@ -207,8 +207,6 @@ void __init setup_log_buf(int early);
|
||||
void dump_stack_print_info(const char *log_lvl);
|
||||
void show_regs_print_info(const char *log_lvl);
|
||||
extern asmlinkage void dump_stack(void) __cold;
|
||||
-extern void printk_safe_flush(void);
|
||||
-extern void printk_safe_flush_on_panic(void);
|
||||
#else
|
||||
static inline __printf(1, 0)
|
||||
int vprintk(const char *s, va_list args)
|
||||
@@ -272,14 +270,6 @@ static inline void show_regs_print_info(
|
||||
static inline void dump_stack(void)
|
||||
{
|
||||
}
|
||||
-
|
||||
-static inline void printk_safe_flush(void)
|
||||
-{
|
||||
-}
|
||||
-
|
||||
-static inline void printk_safe_flush_on_panic(void)
|
||||
-{
|
||||
-}
|
||||
#endif
|
||||
|
||||
extern int kptr_restrict;
|
||||
--- a/kernel/kexec_core.c
|
||||
+++ b/kernel/kexec_core.c
|
||||
@@ -977,7 +977,6 @@ void crash_kexec(struct pt_regs *regs)
|
||||
old_cpu = atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, this_cpu);
|
||||
if (old_cpu == PANIC_CPU_INVALID) {
|
||||
/* This is the 1st CPU which comes here, so go ahead. */
|
||||
- printk_safe_flush_on_panic();
|
||||
__crash_kexec(regs);
|
||||
|
||||
/*
|
||||
--- a/kernel/panic.c
|
||||
+++ b/kernel/panic.c
|
||||
@@ -247,7 +247,6 @@ void panic(const char *fmt, ...)
|
||||
* Bypass the panic_cpu check and call __crash_kexec directly.
|
||||
*/
|
||||
if (!_crash_kexec_post_notifiers) {
|
||||
- printk_safe_flush_on_panic();
|
||||
__crash_kexec(NULL);
|
||||
|
||||
/*
|
||||
@@ -271,8 +270,6 @@ void panic(const char *fmt, ...)
|
||||
*/
|
||||
atomic_notifier_call_chain(&panic_notifier_list, 0, buf);
|
||||
|
||||
- /* Call flush even twice. It tries harder with a single online CPU */
|
||||
- printk_safe_flush_on_panic();
|
||||
kmsg_dump(KMSG_DUMP_PANIC);
|
||||
|
||||
/*
|
||||
--- a/kernel/printk/internal.h
|
||||
+++ b/kernel/printk/internal.h
|
||||
@@ -23,7 +23,6 @@ int vprintk_store(int facility, int leve
|
||||
void __printk_safe_enter(void);
|
||||
void __printk_safe_exit(void);
|
||||
|
||||
-void printk_safe_init(void);
|
||||
bool printk_percpu_data_ready(void);
|
||||
|
||||
#define printk_safe_enter_irqsave(flags) \
|
||||
@@ -67,6 +66,5 @@ void defer_console_output(void);
|
||||
#define printk_safe_enter_irq() local_irq_disable()
|
||||
#define printk_safe_exit_irq() local_irq_enable()
|
||||
|
||||
-static inline void printk_safe_init(void) { }
|
||||
static inline bool printk_percpu_data_ready(void) { return false; }
|
||||
#endif /* CONFIG_PRINTK */
|
||||
--- a/kernel/printk/printk.c
|
||||
+++ b/kernel/printk/printk.c
|
||||
@@ -732,27 +732,22 @@ static ssize_t devkmsg_read(struct file
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
- printk_safe_enter_irq();
|
||||
if (!prb_read_valid(prb, atomic64_read(&user->seq), r)) {
|
||||
if (file->f_flags & O_NONBLOCK) {
|
||||
ret = -EAGAIN;
|
||||
- printk_safe_exit_irq();
|
||||
goto out;
|
||||
}
|
||||
|
||||
- printk_safe_exit_irq();
|
||||
ret = wait_event_interruptible(log_wait,
|
||||
prb_read_valid(prb, atomic64_read(&user->seq), r));
|
||||
if (ret)
|
||||
goto out;
|
||||
- printk_safe_enter_irq();
|
||||
}
|
||||
|
||||
if (r->info->seq != atomic64_read(&user->seq)) {
|
||||
/* our last seen message is gone, return error and reset */
|
||||
atomic64_set(&user->seq, r->info->seq);
|
||||
ret = -EPIPE;
|
||||
- printk_safe_exit_irq();
|
||||
goto out;
|
||||
}
|
||||
|
||||
@@ -762,7 +757,6 @@ static ssize_t devkmsg_read(struct file
|
||||
&r->info->dev_info);
|
||||
|
||||
atomic64_set(&user->seq, r->info->seq + 1);
|
||||
- printk_safe_exit_irq();
|
||||
|
||||
if (len > count) {
|
||||
ret = -EINVAL;
|
||||
@@ -797,7 +791,6 @@ static loff_t devkmsg_llseek(struct file
|
||||
if (offset)
|
||||
return -ESPIPE;
|
||||
|
||||
- printk_safe_enter_irq();
|
||||
switch (whence) {
|
||||
case SEEK_SET:
|
||||
/* the first record */
|
||||
@@ -818,7 +811,6 @@ static loff_t devkmsg_llseek(struct file
|
||||
default:
|
||||
ret = -EINVAL;
|
||||
}
|
||||
- printk_safe_exit_irq();
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -833,7 +825,6 @@ static __poll_t devkmsg_poll(struct file
|
||||
|
||||
poll_wait(file, &log_wait, wait);
|
||||
|
||||
- printk_safe_enter_irq();
|
||||
if (prb_read_valid_info(prb, atomic64_read(&user->seq), &info, NULL)) {
|
||||
/* return error when data has vanished underneath us */
|
||||
if (info.seq != atomic64_read(&user->seq))
|
||||
@@ -841,7 +832,6 @@ static __poll_t devkmsg_poll(struct file
|
||||
else
|
||||
ret = EPOLLIN|EPOLLRDNORM;
|
||||
}
|
||||
- printk_safe_exit_irq();
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -874,9 +864,7 @@ static int devkmsg_open(struct inode *in
|
||||
prb_rec_init_rd(&user->record, &user->info,
|
||||
&user->text_buf[0], sizeof(user->text_buf));
|
||||
|
||||
- printk_safe_enter_irq();
|
||||
atomic64_set(&user->seq, prb_first_valid_seq(prb));
|
||||
- printk_safe_exit_irq();
|
||||
|
||||
file->private_data = user;
|
||||
return 0;
|
||||
@@ -1042,9 +1030,6 @@ static inline void log_buf_add_cpu(void)
|
||||
|
||||
static void __init set_percpu_data_ready(void)
|
||||
{
|
||||
- printk_safe_init();
|
||||
- /* Make sure we set this flag only after printk_safe() init is done */
|
||||
- barrier();
|
||||
__printk_percpu_data_ready = true;
|
||||
}
|
||||
|
||||
@@ -1142,8 +1127,6 @@ void __init setup_log_buf(int early)
|
||||
new_descs, ilog2(new_descs_count),
|
||||
new_infos);
|
||||
|
||||
- printk_safe_enter_irqsave(flags);
|
||||
-
|
||||
log_buf_len = new_log_buf_len;
|
||||
log_buf = new_log_buf;
|
||||
new_log_buf_len = 0;
|
||||
@@ -1159,8 +1142,6 @@ void __init setup_log_buf(int early)
|
||||
*/
|
||||
prb = &printk_rb_dynamic;
|
||||
|
||||
- printk_safe_exit_irqrestore(flags);
|
||||
-
|
||||
if (seq != prb_next_seq(&printk_rb_static)) {
|
||||
pr_err("dropped %llu messages\n",
|
||||
prb_next_seq(&printk_rb_static) - seq);
|
||||
@@ -1498,11 +1479,9 @@ static int syslog_print(char __user *buf
|
||||
size_t n;
|
||||
size_t skip;
|
||||
|
||||
- printk_safe_enter_irq();
|
||||
- raw_spin_lock(&syslog_lock);
|
||||
+ raw_spin_lock_irq(&syslog_lock);
|
||||
if (!prb_read_valid(prb, syslog_seq, &r)) {
|
||||
- raw_spin_unlock(&syslog_lock);
|
||||
- printk_safe_exit_irq();
|
||||
+ raw_spin_unlock_irq(&syslog_lock);
|
||||
break;
|
||||
}
|
||||
if (r.info->seq != syslog_seq) {
|
||||
@@ -1531,8 +1510,7 @@ static int syslog_print(char __user *buf
|
||||
syslog_partial += n;
|
||||
} else
|
||||
n = 0;
|
||||
- raw_spin_unlock(&syslog_lock);
|
||||
- printk_safe_exit_irq();
|
||||
+ raw_spin_unlock_irq(&syslog_lock);
|
||||
|
||||
if (!n)
|
||||
break;
|
||||
@@ -1566,7 +1544,6 @@ static int syslog_print_all(char __user
|
||||
return -ENOMEM;
|
||||
|
||||
time = printk_time;
|
||||
- printk_safe_enter_irq();
|
||||
/*
|
||||
* Find first record that fits, including all following records,
|
||||
* into the user-provided buffer for this dump.
|
||||
@@ -1587,23 +1564,20 @@ static int syslog_print_all(char __user
|
||||
break;
|
||||
}
|
||||
|
||||
- printk_safe_exit_irq();
|
||||
if (copy_to_user(buf + len, text, textlen))
|
||||
len = -EFAULT;
|
||||
else
|
||||
len += textlen;
|
||||
- printk_safe_enter_irq();
|
||||
|
||||
if (len < 0)
|
||||
break;
|
||||
}
|
||||
|
||||
if (clear) {
|
||||
- raw_spin_lock(&syslog_lock);
|
||||
+ raw_spin_lock_irq(&syslog_lock);
|
||||
latched_seq_write(&clear_seq, seq);
|
||||
- raw_spin_unlock(&syslog_lock);
|
||||
+ raw_spin_unlock_irq(&syslog_lock);
|
||||
}
|
||||
- printk_safe_exit_irq();
|
||||
|
||||
kfree(text);
|
||||
return len;
|
||||
@@ -1611,11 +1585,9 @@ static int syslog_print_all(char __user
|
||||
|
||||
static void syslog_clear(void)
|
||||
{
|
||||
- printk_safe_enter_irq();
|
||||
- raw_spin_lock(&syslog_lock);
|
||||
+ raw_spin_lock_irq(&syslog_lock);
|
||||
latched_seq_write(&clear_seq, prb_next_seq(prb));
|
||||
- raw_spin_unlock(&syslog_lock);
|
||||
- printk_safe_exit_irq();
|
||||
+ raw_spin_unlock_irq(&syslog_lock);
|
||||
}
|
||||
|
||||
/* Return a consistent copy of @syslog_seq. */
|
||||
@@ -1703,12 +1675,10 @@ int do_syslog(int type, char __user *buf
|
||||
break;
|
||||
/* Number of chars in the log buffer */
|
||||
case SYSLOG_ACTION_SIZE_UNREAD:
|
||||
- printk_safe_enter_irq();
|
||||
- raw_spin_lock(&syslog_lock);
|
||||
+ raw_spin_lock_irq(&syslog_lock);
|
||||
if (!prb_read_valid_info(prb, syslog_seq, &info, NULL)) {
|
||||
/* No unread messages. */
|
||||
- raw_spin_unlock(&syslog_lock);
|
||||
- printk_safe_exit_irq();
|
||||
+ raw_spin_unlock_irq(&syslog_lock);
|
||||
return 0;
|
||||
}
|
||||
if (info.seq != syslog_seq) {
|
||||
@@ -1736,8 +1706,7 @@ int do_syslog(int type, char __user *buf
|
||||
}
|
||||
error -= syslog_partial;
|
||||
}
|
||||
- raw_spin_unlock(&syslog_lock);
|
||||
- printk_safe_exit_irq();
|
||||
+ raw_spin_unlock_irq(&syslog_lock);
|
||||
break;
|
||||
/* Size of the log buffer */
|
||||
case SYSLOG_ACTION_SIZE_BUFFER:
|
||||
@@ -2207,7 +2176,6 @@ asmlinkage int vprintk_emit(int facility
|
||||
{
|
||||
int printed_len;
|
||||
bool in_sched = false;
|
||||
- unsigned long flags;
|
||||
|
||||
/* Suppress unimportant messages after panic happens */
|
||||
if (unlikely(suppress_printk))
|
||||
@@ -2221,9 +2189,7 @@ asmlinkage int vprintk_emit(int facility
|
||||
boot_delay_msec(level);
|
||||
printk_delay();
|
||||
|
||||
- printk_safe_enter_irqsave(flags);
|
||||
printed_len = vprintk_store(facility, level, dev_info, fmt, args);
|
||||
- printk_safe_exit_irqrestore(flags);
|
||||
|
||||
/* If called from the scheduler, we can not call up(). */
|
||||
if (!in_sched) {
|
||||
@@ -2615,7 +2581,6 @@ void console_unlock(void)
|
||||
{
|
||||
static char ext_text[CONSOLE_EXT_LOG_MAX];
|
||||
static char text[CONSOLE_LOG_MAX];
|
||||
- unsigned long flags;
|
||||
bool do_cond_resched, retry;
|
||||
struct printk_info info;
|
||||
struct printk_record r;
|
||||
@@ -2660,7 +2625,6 @@ void console_unlock(void)
|
||||
size_t ext_len = 0;
|
||||
size_t len;
|
||||
|
||||
- printk_safe_enter_irqsave(flags);
|
||||
skip:
|
||||
if (!prb_read_valid(prb, console_seq, &r))
|
||||
break;
|
||||
@@ -2717,12 +2681,8 @@ void console_unlock(void)
|
||||
call_console_drivers(ext_text, ext_len, text, len);
|
||||
start_critical_timings();
|
||||
|
||||
- if (console_lock_spinning_disable_and_check()) {
|
||||
- printk_safe_exit_irqrestore(flags);
|
||||
+ if (console_lock_spinning_disable_and_check())
|
||||
return;
|
||||
- }
|
||||
-
|
||||
- printk_safe_exit_irqrestore(flags);
|
||||
|
||||
if (do_cond_resched)
|
||||
cond_resched();
|
||||
@@ -2739,8 +2699,6 @@ void console_unlock(void)
|
||||
* flush, no worries.
|
||||
*/
|
||||
retry = prb_read_valid(prb, console_seq, NULL);
|
||||
- printk_safe_exit_irqrestore(flags);
|
||||
-
|
||||
if (retry && console_trylock())
|
||||
goto again;
|
||||
}
|
||||
@@ -2802,13 +2760,8 @@ void console_flush_on_panic(enum con_flu
|
||||
console_trylock();
|
||||
console_may_schedule = 0;
|
||||
|
||||
- if (mode == CONSOLE_REPLAY_ALL) {
|
||||
- unsigned long flags;
|
||||
-
|
||||
- printk_safe_enter_irqsave(flags);
|
||||
+ if (mode == CONSOLE_REPLAY_ALL)
|
||||
console_seq = prb_first_valid_seq(prb);
|
||||
- printk_safe_exit_irqrestore(flags);
|
||||
- }
|
||||
console_unlock();
|
||||
}
|
||||
|
||||
@@ -3464,11 +3417,9 @@ bool kmsg_dump_get_line(struct kmsg_dump
|
||||
struct printk_info info;
|
||||
unsigned int line_count;
|
||||
struct printk_record r;
|
||||
- unsigned long flags;
|
||||
size_t l = 0;
|
||||
bool ret = false;
|
||||
|
||||
- printk_safe_enter_irqsave(flags);
|
||||
prb_rec_init_rd(&r, &info, line, size);
|
||||
|
||||
if (!iter->active)
|
||||
@@ -3492,7 +3443,6 @@ bool kmsg_dump_get_line(struct kmsg_dump
|
||||
iter->cur_seq = r.info->seq + 1;
|
||||
ret = true;
|
||||
out:
|
||||
- printk_safe_exit_irqrestore(flags);
|
||||
if (len)
|
||||
*len = l;
|
||||
return ret;
|
||||
@@ -3523,7 +3473,6 @@ bool kmsg_dump_get_buffer(struct kmsg_du
|
||||
{
|
||||
struct printk_info info;
|
||||
struct printk_record r;
|
||||
- unsigned long flags;
|
||||
u64 seq;
|
||||
u64 next_seq;
|
||||
size_t len = 0;
|
||||
@@ -3533,7 +3482,6 @@ bool kmsg_dump_get_buffer(struct kmsg_du
|
||||
if (!iter->active || !buf || !size)
|
||||
goto out;
|
||||
|
||||
- printk_safe_enter_irqsave(flags);
|
||||
if (prb_read_valid_info(prb, iter->cur_seq, &info, NULL)) {
|
||||
if (info.seq != iter->cur_seq) {
|
||||
/* messages are gone, move to first available one */
|
||||
@@ -3542,10 +3490,8 @@ bool kmsg_dump_get_buffer(struct kmsg_du
|
||||
}
|
||||
|
||||
/* last entry */
|
||||
- if (iter->cur_seq >= iter->next_seq) {
|
||||
- printk_safe_exit_irqrestore(flags);
|
||||
+ if (iter->cur_seq >= iter->next_seq)
|
||||
goto out;
|
||||
- }
|
||||
|
||||
/*
|
||||
* Find first record that fits, including all following records,
|
||||
@@ -3577,7 +3523,6 @@ bool kmsg_dump_get_buffer(struct kmsg_du
|
||||
|
||||
iter->next_seq = next_seq;
|
||||
ret = true;
|
||||
- printk_safe_exit_irqrestore(flags);
|
||||
out:
|
||||
if (len_out)
|
||||
*len_out = len;
|
||||
@@ -3595,12 +3540,8 @@ EXPORT_SYMBOL_GPL(kmsg_dump_get_buffer);
|
||||
*/
|
||||
void kmsg_dump_rewind(struct kmsg_dumper_iter *iter)
|
||||
{
|
||||
- unsigned long flags;
|
||||
-
|
||||
- printk_safe_enter_irqsave(flags);
|
||||
iter->cur_seq = latched_seq_read_nolock(&clear_seq);
|
||||
iter->next_seq = prb_next_seq(prb);
|
||||
- printk_safe_exit_irqrestore(flags);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kmsg_dump_rewind);
|
||||
|
||||
--- a/kernel/printk/printk_safe.c
|
||||
+++ b/kernel/printk/printk_safe.c
|
||||
@@ -15,282 +15,9 @@
|
||||
|
||||
#include "internal.h"
|
||||
|
||||
-/*
|
||||
- * In NMI and safe mode, printk() avoids taking locks. Instead,
|
||||
- * it uses an alternative implementation that temporary stores
|
||||
- * the strings into a per-CPU buffer. The content of the buffer
|
||||
- * is later flushed into the main ring buffer via IRQ work.
|
||||
- *
|
||||
- * The alternative implementation is chosen transparently
|
||||
- * by examining current printk() context mask stored in @printk_context
|
||||
- * per-CPU variable.
|
||||
- *
|
||||
- * The implementation allows to flush the strings also from another CPU.
|
||||
- * There are situations when we want to make sure that all buffers
|
||||
- * were handled or when IRQs are blocked.
|
||||
- */
|
||||
-
|
||||
-#define SAFE_LOG_BUF_LEN ((1 << CONFIG_PRINTK_SAFE_LOG_BUF_SHIFT) - \
|
||||
- sizeof(atomic_t) - \
|
||||
- sizeof(atomic_t) - \
|
||||
- sizeof(struct irq_work))
|
||||
-
|
||||
-struct printk_safe_seq_buf {
|
||||
- atomic_t len; /* length of written data */
|
||||
- atomic_t message_lost;
|
||||
- struct irq_work work; /* IRQ work that flushes the buffer */
|
||||
- unsigned char buffer[SAFE_LOG_BUF_LEN];
|
||||
-};
|
||||
-
|
||||
-static DEFINE_PER_CPU(struct printk_safe_seq_buf, safe_print_seq);
|
||||
static DEFINE_PER_CPU(int, printk_context);
|
||||
|
||||
-static DEFINE_RAW_SPINLOCK(safe_read_lock);
|
||||
-
|
||||
-#ifdef CONFIG_PRINTK_NMI
|
||||
-static DEFINE_PER_CPU(struct printk_safe_seq_buf, nmi_print_seq);
|
||||
-#endif
|
||||
-
|
||||
-/* Get flushed in a more safe context. */
|
||||
-static void queue_flush_work(struct printk_safe_seq_buf *s)
|
||||
-{
|
||||
- if (printk_percpu_data_ready())
|
||||
- irq_work_queue(&s->work);
|
||||
-}
|
||||
-
|
||||
-/*
|
||||
- * Add a message to per-CPU context-dependent buffer. NMI and printk-safe
|
||||
- * have dedicated buffers, because otherwise printk-safe preempted by
|
||||
- * NMI-printk would have overwritten the NMI messages.
|
||||
- *
|
||||
- * The messages are flushed from irq work (or from panic()), possibly,
|
||||
- * from other CPU, concurrently with printk_safe_log_store(). Should this
|
||||
- * happen, printk_safe_log_store() will notice the buffer->len mismatch
|
||||
- * and repeat the write.
|
||||
- */
|
||||
-static __printf(2, 0) int printk_safe_log_store(struct printk_safe_seq_buf *s,
|
||||
- const char *fmt, va_list args)
|
||||
-{
|
||||
- int add;
|
||||
- size_t len;
|
||||
- va_list ap;
|
||||
-
|
||||
-again:
|
||||
- len = atomic_read(&s->len);
|
||||
-
|
||||
- /* The trailing '\0' is not counted into len. */
|
||||
- if (len >= sizeof(s->buffer) - 1) {
|
||||
- atomic_inc(&s->message_lost);
|
||||
- queue_flush_work(s);
|
||||
- return 0;
|
||||
- }
|
||||
-
|
||||
- /*
|
||||
- * Make sure that all old data have been read before the buffer
|
||||
- * was reset. This is not needed when we just append data.
|
||||
- */
|
||||
- if (!len)
|
||||
- smp_rmb();
|
||||
-
|
||||
- va_copy(ap, args);
|
||||
- add = vscnprintf(s->buffer + len, sizeof(s->buffer) - len, fmt, ap);
|
||||
- va_end(ap);
|
||||
- if (!add)
|
||||
- return 0;
|
||||
-
|
||||
- /*
|
||||
- * Do it once again if the buffer has been flushed in the meantime.
|
||||
- * Note that atomic_cmpxchg() is an implicit memory barrier that
|
||||
- * makes sure that the data were written before updating s->len.
|
||||
- */
|
||||
- if (atomic_cmpxchg(&s->len, len, len + add) != len)
|
||||
- goto again;
|
||||
-
|
||||
- queue_flush_work(s);
|
||||
- return add;
|
||||
-}
|
||||
-
|
||||
-static inline void printk_safe_flush_line(const char *text, int len)
|
||||
-{
|
||||
- /*
|
||||
- * Avoid any console drivers calls from here, because we may be
|
||||
- * in NMI or printk_safe context (when in panic). The messages
|
||||
- * must go only into the ring buffer at this stage. Consoles will
|
||||
- * get explicitly called later when a crashdump is not generated.
|
||||
- */
|
||||
- printk_deferred("%.*s", len, text);
|
||||
-}
|
||||
-
|
||||
-/* printk part of the temporary buffer line by line */
|
||||
-static int printk_safe_flush_buffer(const char *start, size_t len)
|
||||
-{
|
||||
- const char *c, *end;
|
||||
- bool header;
|
||||
-
|
||||
- c = start;
|
||||
- end = start + len;
|
||||
- header = true;
|
||||
-
|
||||
- /* Print line by line. */
|
||||
- while (c < end) {
|
||||
- if (*c == '\n') {
|
||||
- printk_safe_flush_line(start, c - start + 1);
|
||||
- start = ++c;
|
||||
- header = true;
|
||||
- continue;
|
||||
- }
|
||||
-
|
||||
- /* Handle continuous lines or missing new line. */
|
||||
- if ((c + 1 < end) && printk_get_level(c)) {
|
||||
- if (header) {
|
||||
- c = printk_skip_level(c);
|
||||
- continue;
|
||||
- }
|
||||
-
|
||||
- printk_safe_flush_line(start, c - start);
|
||||
- start = c++;
|
||||
- header = true;
|
||||
- continue;
|
||||
- }
|
||||
-
|
||||
- header = false;
|
||||
- c++;
|
||||
- }
|
||||
-
|
||||
- /* Check if there was a partial line. Ignore pure header. */
|
||||
- if (start < end && !header) {
|
||||
- static const char newline[] = KERN_CONT "\n";
|
||||
-
|
||||
- printk_safe_flush_line(start, end - start);
|
||||
- printk_safe_flush_line(newline, strlen(newline));
|
||||
- }
|
||||
-
|
||||
- return len;
|
||||
-}
|
||||
-
|
||||
-static void report_message_lost(struct printk_safe_seq_buf *s)
|
||||
-{
|
||||
- int lost = atomic_xchg(&s->message_lost, 0);
|
||||
-
|
||||
- if (lost)
|
||||
- printk_deferred("Lost %d message(s)!\n", lost);
|
||||
-}
|
||||
-
|
||||
-/*
|
||||
- * Flush data from the associated per-CPU buffer. The function
|
||||
- * can be called either via IRQ work or independently.
|
||||
- */
|
||||
-static void __printk_safe_flush(struct irq_work *work)
|
||||
-{
|
||||
- struct printk_safe_seq_buf *s =
|
||||
- container_of(work, struct printk_safe_seq_buf, work);
|
||||
- unsigned long flags;
|
||||
- size_t len;
|
||||
- int i;
|
||||
-
|
||||
- /*
|
||||
- * The lock has two functions. First, one reader has to flush all
|
||||
- * available message to make the lockless synchronization with
|
||||
- * writers easier. Second, we do not want to mix messages from
|
||||
- * different CPUs. This is especially important when printing
|
||||
- * a backtrace.
|
||||
- */
|
||||
- raw_spin_lock_irqsave(&safe_read_lock, flags);
|
||||
-
|
||||
- i = 0;
|
||||
-more:
|
||||
- len = atomic_read(&s->len);
|
||||
-
|
||||
- /*
|
||||
- * This is just a paranoid check that nobody has manipulated
|
||||
- * the buffer an unexpected way. If we printed something then
|
||||
- * @len must only increase. Also it should never overflow the
|
||||
- * buffer size.
|
||||
- */
|
||||
- if ((i && i >= len) || len > sizeof(s->buffer)) {
|
||||
- const char *msg = "printk_safe_flush: internal error\n";
|
||||
-
|
||||
- printk_safe_flush_line(msg, strlen(msg));
|
||||
- len = 0;
|
||||
- }
|
||||
-
|
||||
- if (!len)
|
||||
- goto out; /* Someone else has already flushed the buffer. */
|
||||
-
|
||||
- /* Make sure that data has been written up to the @len */
|
||||
- smp_rmb();
|
||||
- i += printk_safe_flush_buffer(s->buffer + i, len - i);
|
||||
-
|
||||
- /*
|
||||
- * Check that nothing has got added in the meantime and truncate
|
||||
- * the buffer. Note that atomic_cmpxchg() is an implicit memory
|
||||
- * barrier that makes sure that the data were copied before
|
||||
- * updating s->len.
|
||||
- */
|
||||
- if (atomic_cmpxchg(&s->len, len, 0) != len)
|
||||
- goto more;
|
||||
-
|
||||
-out:
|
||||
- report_message_lost(s);
|
||||
- raw_spin_unlock_irqrestore(&safe_read_lock, flags);
|
||||
-}
|
||||
-
|
||||
-/**
|
||||
- * printk_safe_flush - flush all per-cpu nmi buffers.
|
||||
- *
|
||||
- * The buffers are flushed automatically via IRQ work. This function
|
||||
- * is useful only when someone wants to be sure that all buffers have
|
||||
- * been flushed at some point.
|
||||
- */
|
||||
-void printk_safe_flush(void)
|
||||
-{
|
||||
- int cpu;
|
||||
-
|
||||
- for_each_possible_cpu(cpu) {
|
||||
#ifdef CONFIG_PRINTK_NMI
|
||||
- __printk_safe_flush(&per_cpu(nmi_print_seq, cpu).work);
|
||||
-#endif
|
||||
- __printk_safe_flush(&per_cpu(safe_print_seq, cpu).work);
|
||||
- }
|
||||
-}
|
||||
-
|
||||
-/**
|
||||
- * printk_safe_flush_on_panic - flush all per-cpu nmi buffers when the system
|
||||
- * goes down.
|
||||
- *
|
||||
- * Similar to printk_safe_flush() but it can be called even in NMI context when
|
||||
- * the system goes down. It does the best effort to get NMI messages into
|
||||
- * the main ring buffer.
|
||||
- *
|
||||
- * Note that it could try harder when there is only one CPU online.
|
||||
- */
|
||||
-void printk_safe_flush_on_panic(void)
|
||||
-{
|
||||
- if (raw_spin_is_locked(&safe_read_lock)) {
|
||||
- if (num_online_cpus() > 1)
|
||||
- return;
|
||||
-
|
||||
- debug_locks_off();
|
||||
- raw_spin_lock_init(&safe_read_lock);
|
||||
- }
|
||||
-
|
||||
- printk_safe_flush();
|
||||
-}
|
||||
-
|
||||
-#ifdef CONFIG_PRINTK_NMI
|
||||
-/*
|
||||
- * Safe printk() for NMI context. It uses a per-CPU buffer to
|
||||
- * store the message. NMIs are not nested, so there is always only
|
||||
- * one writer running. But the buffer might get flushed from another
|
||||
- * CPU, so we need to be careful.
|
||||
- */
|
||||
-static __printf(1, 0) int vprintk_nmi(const char *fmt, va_list args)
|
||||
-{
|
||||
- struct printk_safe_seq_buf *s = this_cpu_ptr(&nmi_print_seq);
|
||||
-
|
||||
- return printk_safe_log_store(s, fmt, args);
|
||||
-}
|
||||
-
|
||||
void noinstr printk_nmi_enter(void)
|
||||
{
|
||||
this_cpu_add(printk_context, PRINTK_NMI_CONTEXT_OFFSET);
|
||||
@@ -305,9 +32,6 @@ void noinstr printk_nmi_exit(void)
|
||||
* Marks a code that might produce many messages in NMI context
|
||||
* and the risk of losing them is more critical than eventual
|
||||
* reordering.
|
||||
- *
|
||||
- * It has effect only when called in NMI context. Then printk()
|
||||
- * will store the messages into the main logbuf directly.
|
||||
*/
|
||||
void printk_nmi_direct_enter(void)
|
||||
{
|
||||
@@ -320,27 +44,8 @@ void printk_nmi_direct_exit(void)
|
||||
this_cpu_and(printk_context, ~PRINTK_NMI_DIRECT_CONTEXT_MASK);
|
||||
}
|
||||
|
||||
-#else
|
||||
-
|
||||
-static __printf(1, 0) int vprintk_nmi(const char *fmt, va_list args)
|
||||
-{
|
||||
- return 0;
|
||||
-}
|
||||
-
|
||||
#endif /* CONFIG_PRINTK_NMI */
|
||||
|
||||
-/*
|
||||
- * Lock-less printk(), to avoid deadlocks should the printk() recurse
|
||||
- * into itself. It uses a per-CPU buffer to store the message, just like
|
||||
- * NMI.
|
||||
- */
|
||||
-static __printf(1, 0) int vprintk_safe(const char *fmt, va_list args)
|
||||
-{
|
||||
- struct printk_safe_seq_buf *s = this_cpu_ptr(&safe_print_seq);
|
||||
-
|
||||
- return printk_safe_log_store(s, fmt, args);
|
||||
-}
|
||||
-
|
||||
/* Can be preempted by NMI. */
|
||||
void __printk_safe_enter(void)
|
||||
{
|
||||
@@ -365,8 +70,10 @@ void __printk_safe_exit(void)
|
||||
* Use the main logbuf even in NMI. But avoid calling console
|
||||
* drivers that might have their own locks.
|
||||
*/
|
||||
- if ((this_cpu_read(printk_context) & PRINTK_NMI_DIRECT_CONTEXT_MASK)) {
|
||||
- unsigned long flags;
|
||||
+ if (this_cpu_read(printk_context) &
|
||||
+ (PRINTK_NMI_DIRECT_CONTEXT_MASK |
|
||||
+ PRINTK_NMI_CONTEXT_MASK |
|
||||
+ PRINTK_SAFE_CONTEXT_MASK)) {
|
||||
int len;
|
||||
|
||||
printk_safe_enter_irqsave(flags);
|
||||
@@ -376,34 +83,6 @@ void __printk_safe_exit(void)
|
||||
return len;
|
||||
}
|
||||
|
||||
- /* Use extra buffer in NMI. */
|
||||
- if (this_cpu_read(printk_context) & PRINTK_NMI_CONTEXT_MASK)
|
||||
- return vprintk_nmi(fmt, args);
|
||||
-
|
||||
- /* Use extra buffer to prevent a recursion deadlock in safe mode. */
|
||||
- if (this_cpu_read(printk_context) & PRINTK_SAFE_CONTEXT_MASK)
|
||||
- return vprintk_safe(fmt, args);
|
||||
-
|
||||
/* No obstacles. */
|
||||
return vprintk_default(fmt, args);
|
||||
}
|
||||
-
|
||||
-void __init printk_safe_init(void)
|
||||
-{
|
||||
- int cpu;
|
||||
-
|
||||
- for_each_possible_cpu(cpu) {
|
||||
- struct printk_safe_seq_buf *s;
|
||||
-
|
||||
- s = &per_cpu(safe_print_seq, cpu);
|
||||
- init_irq_work(&s->work, __printk_safe_flush);
|
||||
-
|
||||
-#ifdef CONFIG_PRINTK_NMI
|
||||
- s = &per_cpu(nmi_print_seq, cpu);
|
||||
- init_irq_work(&s->work, __printk_safe_flush);
|
||||
-#endif
|
||||
- }
|
||||
-
|
||||
- /* Flush pending messages that did not have scheduled IRQ works. */
|
||||
- printk_safe_flush();
|
||||
-}
|
||||
--- a/lib/nmi_backtrace.c
|
||||
+++ b/lib/nmi_backtrace.c
|
||||
@@ -75,12 +75,6 @@ void nmi_trigger_cpumask_backtrace(const
|
||||
touch_softlockup_watchdog();
|
||||
}
|
||||
|
||||
- /*
|
||||
- * Force flush any remote buffers that might be stuck in IRQ context
|
||||
- * and therefore could not run their irq_work.
|
||||
- */
|
||||
- printk_safe_flush();
|
||||
-
|
||||
clear_bit_unlock(0, &backtrace_flag);
|
||||
put_cpu();
|
||||
}
|
||||
@@ -0,0 +1,112 @@
|
||||
From: John Ogness <john.ogness@linutronix.de>
|
||||
Date: Thu, 18 Feb 2021 17:37:41 +0100
|
||||
Subject: [PATCH 18/28] printk: convert @syslog_lock to spin_lock
|
||||
|
||||
Signed-off-by: John Ogness <john.ogness@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
kernel/printk/printk.c | 30 +++++++++++++++---------------
|
||||
1 file changed, 15 insertions(+), 15 deletions(-)
|
||||
|
||||
--- a/kernel/printk/printk.c
|
||||
+++ b/kernel/printk/printk.c
|
||||
@@ -356,7 +356,7 @@ enum log_flags {
|
||||
};
|
||||
|
||||
/* syslog_lock protects syslog_* variables and write access to clear_seq. */
|
||||
-static DEFINE_RAW_SPINLOCK(syslog_lock);
|
||||
+static DEFINE_SPINLOCK(syslog_lock);
|
||||
|
||||
#ifdef CONFIG_PRINTK
|
||||
DECLARE_WAIT_QUEUE_HEAD(log_wait);
|
||||
@@ -1479,9 +1479,9 @@ static int syslog_print(char __user *buf
|
||||
size_t n;
|
||||
size_t skip;
|
||||
|
||||
- raw_spin_lock_irq(&syslog_lock);
|
||||
+ spin_lock_irq(&syslog_lock);
|
||||
if (!prb_read_valid(prb, syslog_seq, &r)) {
|
||||
- raw_spin_unlock_irq(&syslog_lock);
|
||||
+ spin_unlock_irq(&syslog_lock);
|
||||
break;
|
||||
}
|
||||
if (r.info->seq != syslog_seq) {
|
||||
@@ -1510,7 +1510,7 @@ static int syslog_print(char __user *buf
|
||||
syslog_partial += n;
|
||||
} else
|
||||
n = 0;
|
||||
- raw_spin_unlock_irq(&syslog_lock);
|
||||
+ spin_unlock_irq(&syslog_lock);
|
||||
|
||||
if (!n)
|
||||
break;
|
||||
@@ -1574,9 +1574,9 @@ static int syslog_print_all(char __user
|
||||
}
|
||||
|
||||
if (clear) {
|
||||
- raw_spin_lock_irq(&syslog_lock);
|
||||
+ spin_lock_irq(&syslog_lock);
|
||||
latched_seq_write(&clear_seq, seq);
|
||||
- raw_spin_unlock_irq(&syslog_lock);
|
||||
+ spin_unlock_irq(&syslog_lock);
|
||||
}
|
||||
|
||||
kfree(text);
|
||||
@@ -1585,9 +1585,9 @@ static int syslog_print_all(char __user
|
||||
|
||||
static void syslog_clear(void)
|
||||
{
|
||||
- raw_spin_lock_irq(&syslog_lock);
|
||||
+ spin_lock_irq(&syslog_lock);
|
||||
latched_seq_write(&clear_seq, prb_next_seq(prb));
|
||||
- raw_spin_unlock_irq(&syslog_lock);
|
||||
+ spin_unlock_irq(&syslog_lock);
|
||||
}
|
||||
|
||||
/* Return a consistent copy of @syslog_seq. */
|
||||
@@ -1595,9 +1595,9 @@ static u64 read_syslog_seq_irq(void)
|
||||
{
|
||||
u64 seq;
|
||||
|
||||
- raw_spin_lock_irq(&syslog_lock);
|
||||
+ spin_lock_irq(&syslog_lock);
|
||||
seq = syslog_seq;
|
||||
- raw_spin_unlock_irq(&syslog_lock);
|
||||
+ spin_unlock_irq(&syslog_lock);
|
||||
|
||||
return seq;
|
||||
}
|
||||
@@ -1675,10 +1675,10 @@ int do_syslog(int type, char __user *buf
|
||||
break;
|
||||
/* Number of chars in the log buffer */
|
||||
case SYSLOG_ACTION_SIZE_UNREAD:
|
||||
- raw_spin_lock_irq(&syslog_lock);
|
||||
+ spin_lock_irq(&syslog_lock);
|
||||
if (!prb_read_valid_info(prb, syslog_seq, &info, NULL)) {
|
||||
/* No unread messages. */
|
||||
- raw_spin_unlock_irq(&syslog_lock);
|
||||
+ spin_unlock_irq(&syslog_lock);
|
||||
return 0;
|
||||
}
|
||||
if (info.seq != syslog_seq) {
|
||||
@@ -1706,7 +1706,7 @@ int do_syslog(int type, char __user *buf
|
||||
}
|
||||
error -= syslog_partial;
|
||||
}
|
||||
- raw_spin_unlock_irq(&syslog_lock);
|
||||
+ spin_unlock_irq(&syslog_lock);
|
||||
break;
|
||||
/* Size of the log buffer */
|
||||
case SYSLOG_ACTION_SIZE_BUFFER:
|
||||
@@ -3001,9 +3001,9 @@ void register_console(struct console *ne
|
||||
exclusive_console_stop_seq = console_seq;
|
||||
|
||||
/* Get a consistent copy of @syslog_seq. */
|
||||
- raw_spin_lock_irqsave(&syslog_lock, flags);
|
||||
+ spin_lock_irqsave(&syslog_lock, flags);
|
||||
console_seq = syslog_seq;
|
||||
- raw_spin_unlock_irqrestore(&syslog_lock, flags);
|
||||
+ spin_unlock_irqrestore(&syslog_lock, flags);
|
||||
}
|
||||
console_unlock();
|
||||
console_sysfs_notify();
|
||||
@@ -0,0 +1,154 @@
|
||||
From: John Ogness <john.ogness@linutronix.de>
|
||||
Date: Mon, 30 Nov 2020 01:42:01 +0106
|
||||
Subject: [PATCH 19/28] console: add write_atomic interface
|
||||
|
||||
Add a write_atomic() callback to the console. This is an optional
|
||||
function for console drivers. The function must be atomic (including
|
||||
NMI safe) for writing to the console.
|
||||
|
||||
Console drivers must still implement the write() callback. The
|
||||
write_atomic() callback will only be used in special situations,
|
||||
such as when the kernel panics.
|
||||
|
||||
Creating an NMI safe write_atomic() that must synchronize with
|
||||
write() requires a careful implementation of the console driver. To
|
||||
aid with the implementation, a set of console_atomic_*() functions
|
||||
are provided:
|
||||
|
||||
void console_atomic_lock(unsigned int *flags);
|
||||
void console_atomic_unlock(unsigned int flags);
|
||||
|
||||
These functions synchronize using a processor-reentrant spinlock
|
||||
(called a cpulock).
|
||||
|
||||
Signed-off-by: John Ogness <john.ogness@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/console.h | 4 +
|
||||
kernel/printk/printk.c | 100 ++++++++++++++++++++++++++++++++++++++++++++++++
|
||||
2 files changed, 104 insertions(+)
|
||||
|
||||
--- a/include/linux/console.h
|
||||
+++ b/include/linux/console.h
|
||||
@@ -140,6 +140,7 @@ static inline int con_debug_leave(void)
|
||||
struct console {
|
||||
char name[16];
|
||||
void (*write)(struct console *, const char *, unsigned);
|
||||
+ void (*write_atomic)(struct console *co, const char *s, unsigned int count);
|
||||
int (*read)(struct console *, char *, unsigned);
|
||||
struct tty_driver *(*device)(struct console *, int *);
|
||||
void (*unblank)(void);
|
||||
@@ -229,4 +230,7 @@ extern void console_init(void);
|
||||
void dummycon_register_output_notifier(struct notifier_block *nb);
|
||||
void dummycon_unregister_output_notifier(struct notifier_block *nb);
|
||||
|
||||
+extern void console_atomic_lock(unsigned int *flags);
|
||||
+extern void console_atomic_unlock(unsigned int flags);
|
||||
+
|
||||
#endif /* _LINUX_CONSOLE_H */
|
||||
--- a/kernel/printk/printk.c
|
||||
+++ b/kernel/printk/printk.c
|
||||
@@ -3546,3 +3546,103 @@ void kmsg_dump_rewind(struct kmsg_dumper
|
||||
EXPORT_SYMBOL_GPL(kmsg_dump_rewind);
|
||||
|
||||
#endif
|
||||
+
|
||||
+struct prb_cpulock {
|
||||
+ atomic_t owner;
|
||||
+ unsigned long __percpu *irqflags;
|
||||
+};
|
||||
+
|
||||
+#define DECLARE_STATIC_PRINTKRB_CPULOCK(name) \
|
||||
+static DEFINE_PER_CPU(unsigned long, _##name##_percpu_irqflags); \
|
||||
+static struct prb_cpulock name = { \
|
||||
+ .owner = ATOMIC_INIT(-1), \
|
||||
+ .irqflags = &_##name##_percpu_irqflags, \
|
||||
+}
|
||||
+
|
||||
+static bool __prb_trylock(struct prb_cpulock *cpu_lock,
|
||||
+ unsigned int *cpu_store)
|
||||
+{
|
||||
+ unsigned long *flags;
|
||||
+ unsigned int cpu;
|
||||
+
|
||||
+ cpu = get_cpu();
|
||||
+
|
||||
+ *cpu_store = atomic_read(&cpu_lock->owner);
|
||||
+ /* memory barrier to ensure the current lock owner is visible */
|
||||
+ smp_rmb();
|
||||
+ if (*cpu_store == -1) {
|
||||
+ flags = per_cpu_ptr(cpu_lock->irqflags, cpu);
|
||||
+ local_irq_save(*flags);
|
||||
+ if (atomic_try_cmpxchg_acquire(&cpu_lock->owner,
|
||||
+ cpu_store, cpu)) {
|
||||
+ return true;
|
||||
+ }
|
||||
+ local_irq_restore(*flags);
|
||||
+ } else if (*cpu_store == cpu) {
|
||||
+ return true;
|
||||
+ }
|
||||
+
|
||||
+ put_cpu();
|
||||
+ return false;
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+ * prb_lock: Perform a processor-reentrant spin lock.
|
||||
+ * @cpu_lock: A pointer to the lock object.
|
||||
+ * @cpu_store: A "flags" pointer to store lock status information.
|
||||
+ *
|
||||
+ * If no processor has the lock, the calling processor takes the lock and
|
||||
+ * becomes the owner. If the calling processor is already the owner of the
|
||||
+ * lock, this function succeeds immediately. If lock is locked by another
|
||||
+ * processor, this function spins until the calling processor becomes the
|
||||
+ * owner.
|
||||
+ *
|
||||
+ * It is safe to call this function from any context and state.
|
||||
+ */
|
||||
+static void prb_lock(struct prb_cpulock *cpu_lock, unsigned int *cpu_store)
|
||||
+{
|
||||
+ for (;;) {
|
||||
+ if (__prb_trylock(cpu_lock, cpu_store))
|
||||
+ break;
|
||||
+ cpu_relax();
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+ * prb_unlock: Perform a processor-reentrant spin unlock.
|
||||
+ * @cpu_lock: A pointer to the lock object.
|
||||
+ * @cpu_store: A "flags" object storing lock status information.
|
||||
+ *
|
||||
+ * Release the lock. The calling processor must be the owner of the lock.
|
||||
+ *
|
||||
+ * It is safe to call this function from any context and state.
|
||||
+ */
|
||||
+static void prb_unlock(struct prb_cpulock *cpu_lock, unsigned int cpu_store)
|
||||
+{
|
||||
+ unsigned long *flags;
|
||||
+ unsigned int cpu;
|
||||
+
|
||||
+ cpu = atomic_read(&cpu_lock->owner);
|
||||
+ atomic_set_release(&cpu_lock->owner, cpu_store);
|
||||
+
|
||||
+ if (cpu_store == -1) {
|
||||
+ flags = per_cpu_ptr(cpu_lock->irqflags, cpu);
|
||||
+ local_irq_restore(*flags);
|
||||
+ }
|
||||
+
|
||||
+ put_cpu();
|
||||
+}
|
||||
+
|
||||
+DECLARE_STATIC_PRINTKRB_CPULOCK(printk_cpulock);
|
||||
+
|
||||
+void console_atomic_lock(unsigned int *flags)
|
||||
+{
|
||||
+ prb_lock(&printk_cpulock, flags);
|
||||
+}
|
||||
+EXPORT_SYMBOL(console_atomic_lock);
|
||||
+
|
||||
+void console_atomic_unlock(unsigned int flags)
|
||||
+{
|
||||
+ prb_unlock(&printk_cpulock, flags);
|
||||
+}
|
||||
+EXPORT_SYMBOL(console_atomic_unlock);
|
||||
@@ -1,83 +1,99 @@
|
||||
From: John Ogness <john.ogness@linutronix.de>
|
||||
Date: Tue, 12 Feb 2019 15:29:58 +0100
|
||||
Subject: [PATCH 20/25] serial: 8250: implement write_atomic
|
||||
Date: Mon, 30 Nov 2020 01:42:02 +0106
|
||||
Subject: [PATCH 20/28] serial: 8250: implement write_atomic
|
||||
|
||||
Implement a non-sleeping NMI-safe write_atomic console function in
|
||||
order to support emergency printk messages.
|
||||
Implement a non-sleeping NMI-safe write_atomic() console function in
|
||||
order to support emergency console printing.
|
||||
|
||||
Since interrupts need to be disabled during transmit, all usage of
|
||||
the IER register was wrapped with access functions that use the
|
||||
console_atomic_lock function to synchronize register access while
|
||||
tracking the state of the interrupts. This was necessary because
|
||||
write_atomic is can be calling from an NMI context that has
|
||||
preempted write_atomic.
|
||||
the IER register is wrapped with access functions that use the
|
||||
console_atomic_lock() function to synchronize register access while
|
||||
tracking the state of the interrupts. This is necessary because
|
||||
write_atomic() can be called from an NMI context that has preempted
|
||||
write_atomic().
|
||||
|
||||
Signed-off-by: John Ogness <john.ogness@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
drivers/tty/serial/8250/8250.h | 22 +++++
|
||||
drivers/tty/serial/8250/8250_core.c | 19 +++-
|
||||
drivers/tty/serial/8250/8250_dma.c | 4
|
||||
drivers/tty/serial/8250/8250_port.c | 154 ++++++++++++++++++++++++++----------
|
||||
include/linux/serial_8250.h | 5 +
|
||||
5 files changed, 157 insertions(+), 47 deletions(-)
|
||||
drivers/tty/serial/8250/8250.h | 47 ++++++++++++++++
|
||||
drivers/tty/serial/8250/8250_core.c | 17 ++++--
|
||||
drivers/tty/serial/8250/8250_fsl.c | 9 +++
|
||||
drivers/tty/serial/8250/8250_ingenic.c | 7 ++
|
||||
drivers/tty/serial/8250/8250_mtk.c | 29 +++++++++-
|
||||
drivers/tty/serial/8250/8250_port.c | 92 ++++++++++++++++++++-------------
|
||||
include/linux/serial_8250.h | 5 +
|
||||
7 files changed, 162 insertions(+), 44 deletions(-)
|
||||
|
||||
--- a/drivers/tty/serial/8250/8250.h
|
||||
+++ b/drivers/tty/serial/8250/8250.h
|
||||
@@ -96,6 +96,10 @@ struct serial8250_config {
|
||||
#define SERIAL8250_SHARE_IRQS 0
|
||||
#endif
|
||||
|
||||
+void set_ier(struct uart_8250_port *up, unsigned char ier);
|
||||
+void clear_ier(struct uart_8250_port *up);
|
||||
+void restore_ier(struct uart_8250_port *up);
|
||||
+
|
||||
#define SERIAL8250_PORT_FLAGS(_base, _irq, _flags) \
|
||||
{ \
|
||||
.iobase = _base, \
|
||||
@@ -139,6 +143,15 @@ static inline bool serial8250_set_THRI(s
|
||||
return true;
|
||||
@@ -130,12 +130,55 @@ static inline void serial_dl_write(struc
|
||||
up->dl_write(up, value);
|
||||
}
|
||||
|
||||
+static inline bool serial8250_set_THRI_sier(struct uart_8250_port *up)
|
||||
+static inline void serial8250_set_IER(struct uart_8250_port *up,
|
||||
+ unsigned char ier)
|
||||
+{
|
||||
+ if (up->ier & UART_IER_THRI)
|
||||
+ return false;
|
||||
+ up->ier |= UART_IER_THRI;
|
||||
+ set_ier(up, up->ier);
|
||||
+ return true;
|
||||
+ struct uart_port *port = &up->port;
|
||||
+ unsigned int flags;
|
||||
+ bool is_console;
|
||||
+
|
||||
+ is_console = uart_console(port);
|
||||
+
|
||||
+ if (is_console)
|
||||
+ console_atomic_lock(&flags);
|
||||
+
|
||||
+ serial_out(up, UART_IER, ier);
|
||||
+
|
||||
+ if (is_console)
|
||||
+ console_atomic_unlock(flags);
|
||||
+}
|
||||
+
|
||||
static inline bool serial8250_clear_THRI(struct uart_8250_port *up)
|
||||
+static inline unsigned char serial8250_clear_IER(struct uart_8250_port *up)
|
||||
+{
|
||||
+ struct uart_port *port = &up->port;
|
||||
+ unsigned int clearval = 0;
|
||||
+ unsigned int prior;
|
||||
+ unsigned int flags;
|
||||
+ bool is_console;
|
||||
+
|
||||
+ is_console = uart_console(port);
|
||||
+
|
||||
+ if (up->capabilities & UART_CAP_UUE)
|
||||
+ clearval = UART_IER_UUE;
|
||||
+
|
||||
+ if (is_console)
|
||||
+ console_atomic_lock(&flags);
|
||||
+
|
||||
+ prior = serial_port_in(port, UART_IER);
|
||||
+ serial_port_out(port, UART_IER, clearval);
|
||||
+
|
||||
+ if (is_console)
|
||||
+ console_atomic_unlock(flags);
|
||||
+
|
||||
+ return prior;
|
||||
+}
|
||||
+
|
||||
static inline bool serial8250_set_THRI(struct uart_8250_port *up)
|
||||
{
|
||||
if (!(up->ier & UART_IER_THRI))
|
||||
@@ -148,6 +161,15 @@ static inline bool serial8250_clear_THRI
|
||||
if (up->ier & UART_IER_THRI)
|
||||
return false;
|
||||
up->ier |= UART_IER_THRI;
|
||||
- serial_out(up, UART_IER, up->ier);
|
||||
+ serial8250_set_IER(up, up->ier);
|
||||
return true;
|
||||
}
|
||||
|
||||
+static inline bool serial8250_clear_THRI_sier(struct uart_8250_port *up)
|
||||
+{
|
||||
+ if (!(up->ier & UART_IER_THRI))
|
||||
+ return false;
|
||||
+ up->ier &= ~UART_IER_THRI;
|
||||
+ set_ier(up, up->ier);
|
||||
+ return true;
|
||||
+}
|
||||
+
|
||||
struct uart_8250_port *serial8250_get_port(int line);
|
||||
@@ -144,7 +187,7 @@ static inline bool serial8250_clear_THRI
|
||||
if (!(up->ier & UART_IER_THRI))
|
||||
return false;
|
||||
up->ier &= ~UART_IER_THRI;
|
||||
- serial_out(up, UART_IER, up->ier);
|
||||
+ serial8250_set_IER(up, up->ier);
|
||||
return true;
|
||||
}
|
||||
|
||||
void serial8250_rpm_get(struct uart_8250_port *p);
|
||||
--- a/drivers/tty/serial/8250/8250_core.c
|
||||
+++ b/drivers/tty/serial/8250/8250_core.c
|
||||
@@ -265,7 +265,7 @@ static void serial8250_timeout(struct ti
|
||||
static void serial8250_backup_timeout(struct timer_list *t)
|
||||
{
|
||||
struct uart_8250_port *up = from_timer(up, t, timer);
|
||||
- unsigned int iir, ier = 0, lsr;
|
||||
+ unsigned int iir, lsr;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&up->port.lock, flags);
|
||||
@@ -274,10 +274,8 @@ static void serial8250_backup_timeout(st
|
||||
* Must disable interrupts or else we risk racing with the interrupt
|
||||
* based handler.
|
||||
@@ -87,7 +103,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
- serial_out(up, UART_IER, 0);
|
||||
- }
|
||||
+ if (up->port.irq)
|
||||
+ clear_ier(up);
|
||||
+ ier = serial8250_clear_IER(up);
|
||||
|
||||
iir = serial_in(up, UART_IIR);
|
||||
|
||||
@@ -96,7 +112,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
|
||||
if (up->port.irq)
|
||||
- serial_out(up, UART_IER, ier);
|
||||
+ restore_ier(up);
|
||||
+ serial8250_set_IER(up, ier);
|
||||
|
||||
spin_unlock_irqrestore(&up->port.lock, flags);
|
||||
|
||||
@@ -115,7 +131,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
static void univ8250_console_write(struct console *co, const char *s,
|
||||
unsigned int count)
|
||||
{
|
||||
@@ -663,6 +669,7 @@ static int univ8250_console_match(struct
|
||||
@@ -671,6 +677,7 @@ static int univ8250_console_match(struct
|
||||
|
||||
static struct console univ8250_console = {
|
||||
.name = "ttyS",
|
||||
@@ -123,149 +139,141 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
.write = univ8250_console_write,
|
||||
.device = uart_console_device,
|
||||
.setup = univ8250_console_setup,
|
||||
--- a/drivers/tty/serial/8250/8250_dma.c
|
||||
+++ b/drivers/tty/serial/8250/8250_dma.c
|
||||
@@ -35,7 +35,7 @@ static void __dma_tx_complete(void *para
|
||||
--- a/drivers/tty/serial/8250/8250_fsl.c
|
||||
+++ b/drivers/tty/serial/8250/8250_fsl.c
|
||||
@@ -60,9 +60,18 @@ int fsl8250_handle_irq(struct uart_port
|
||||
|
||||
ret = serial8250_tx_dma(p);
|
||||
if (ret)
|
||||
- serial8250_set_THRI(p);
|
||||
+ serial8250_set_THRI_sier(p);
|
||||
/* Stop processing interrupts on input overrun */
|
||||
if ((orig_lsr & UART_LSR_OE) && (up->overrun_backoff_time_ms > 0)) {
|
||||
+ unsigned int ca_flags;
|
||||
unsigned long delay;
|
||||
+ bool is_console;
|
||||
|
||||
spin_unlock_irqrestore(&p->port.lock, flags);
|
||||
+ is_console = uart_console(port);
|
||||
+
|
||||
+ if (is_console)
|
||||
+ console_atomic_lock(&ca_flags);
|
||||
up->ier = port->serial_in(port, UART_IER);
|
||||
+ if (is_console)
|
||||
+ console_atomic_unlock(ca_flags);
|
||||
+
|
||||
if (up->ier & (UART_IER_RLSI | UART_IER_RDI)) {
|
||||
port->ops->stop_rx(port);
|
||||
} else {
|
||||
--- a/drivers/tty/serial/8250/8250_ingenic.c
|
||||
+++ b/drivers/tty/serial/8250/8250_ingenic.c
|
||||
@@ -146,6 +146,8 @@ OF_EARLYCON_DECLARE(x1000_uart, "ingenic
|
||||
|
||||
static void ingenic_uart_serial_out(struct uart_port *p, int offset, int value)
|
||||
{
|
||||
+ unsigned int flags;
|
||||
+ bool is_console;
|
||||
int ier;
|
||||
|
||||
switch (offset) {
|
||||
@@ -167,7 +169,12 @@ static void ingenic_uart_serial_out(stru
|
||||
* If we have enabled modem status IRQs we should enable
|
||||
* modem mode.
|
||||
*/
|
||||
+ is_console = uart_console(p);
|
||||
+ if (is_console)
|
||||
+ console_atomic_lock(&flags);
|
||||
ier = p->serial_in(p, UART_IER);
|
||||
+ if (is_console)
|
||||
+ console_atomic_unlock(flags);
|
||||
|
||||
if (ier & UART_IER_MSI)
|
||||
value |= UART_MCR_MDCE | UART_MCR_FCM;
|
||||
--- a/drivers/tty/serial/8250/8250_mtk.c
|
||||
+++ b/drivers/tty/serial/8250/8250_mtk.c
|
||||
@@ -213,12 +213,37 @@ static void mtk8250_shutdown(struct uart
|
||||
|
||||
static void mtk8250_disable_intrs(struct uart_8250_port *up, int mask)
|
||||
{
|
||||
- serial_out(up, UART_IER, serial_in(up, UART_IER) & (~mask));
|
||||
+ struct uart_port *port = &up->port;
|
||||
+ unsigned int flags;
|
||||
+ unsigned int ier;
|
||||
+ bool is_console;
|
||||
+
|
||||
+ is_console = uart_console(port);
|
||||
+
|
||||
+ if (is_console)
|
||||
+ console_atomic_lock(&flags);
|
||||
+
|
||||
+ ier = serial_in(up, UART_IER);
|
||||
+ serial_out(up, UART_IER, ier & (~mask));
|
||||
+
|
||||
+ if (is_console)
|
||||
+ console_atomic_unlock(flags);
|
||||
}
|
||||
@@ -98,7 +98,7 @@ int serial8250_tx_dma(struct uart_8250_p
|
||||
dma_async_issue_pending(dma->txchan);
|
||||
if (dma->tx_err) {
|
||||
dma->tx_err = 0;
|
||||
- serial8250_clear_THRI(p);
|
||||
+ serial8250_clear_THRI_sier(p);
|
||||
}
|
||||
return 0;
|
||||
err:
|
||||
|
||||
static void mtk8250_enable_intrs(struct uart_8250_port *up, int mask)
|
||||
{
|
||||
- serial_out(up, UART_IER, serial_in(up, UART_IER) | mask);
|
||||
+ struct uart_port *port = &up->port;
|
||||
+ unsigned int flags;
|
||||
+ unsigned int ier;
|
||||
+
|
||||
+ if (uart_console(port))
|
||||
+ console_atomic_lock(&flags);
|
||||
+
|
||||
+ ier = serial_in(up, UART_IER);
|
||||
+ serial_out(up, UART_IER, ier | mask);
|
||||
+
|
||||
+ if (uart_console(port))
|
||||
+ console_atomic_unlock(flags);
|
||||
}
|
||||
|
||||
static void mtk8250_set_flow_ctrl(struct uart_8250_port *up, int mode)
|
||||
--- a/drivers/tty/serial/8250/8250_port.c
|
||||
+++ b/drivers/tty/serial/8250/8250_port.c
|
||||
@@ -721,7 +721,7 @@ static void serial8250_set_sleep(struct
|
||||
@@ -757,7 +757,7 @@ static void serial8250_set_sleep(struct
|
||||
serial_out(p, UART_EFR, UART_EFR_ECB);
|
||||
serial_out(p, UART_LCR, 0);
|
||||
}
|
||||
- serial_out(p, UART_IER, sleep ? UART_IERX_SLEEP : 0);
|
||||
+ set_ier(p, sleep ? UART_IERX_SLEEP : 0);
|
||||
+ serial8250_set_IER(p, sleep ? UART_IERX_SLEEP : 0);
|
||||
if (p->capabilities & UART_CAP_EFR) {
|
||||
serial_out(p, UART_LCR, UART_LCR_CONF_MODE_B);
|
||||
serial_out(p, UART_EFR, efr);
|
||||
@@ -1390,7 +1390,7 @@ static void serial8250_stop_rx(struct ua
|
||||
@@ -1429,7 +1429,7 @@ static void serial8250_stop_rx(struct ua
|
||||
|
||||
up->ier &= ~(UART_IER_RLSI | UART_IER_RDI);
|
||||
up->port.read_status_mask &= ~UART_LSR_DR;
|
||||
- serial_port_out(port, UART_IER, up->ier);
|
||||
+ set_ier(up, up->ier);
|
||||
+ serial8250_set_IER(up, up->ier);
|
||||
|
||||
serial8250_rpm_put(up);
|
||||
}
|
||||
@@ -1408,7 +1408,7 @@ static void __do_stop_tx_rs485(struct ua
|
||||
@@ -1459,7 +1459,7 @@ void serial8250_em485_stop_tx(struct uar
|
||||
serial8250_clear_and_reinit_fifos(p);
|
||||
|
||||
p->ier |= UART_IER_RLSI | UART_IER_RDI;
|
||||
- serial_port_out(&p->port, UART_IER, p->ier);
|
||||
+ set_ier(p, p->ier);
|
||||
+ serial8250_set_IER(p, p->ier);
|
||||
}
|
||||
}
|
||||
static enum hrtimer_restart serial8250_em485_handle_stop_tx(struct hrtimer *t)
|
||||
@@ -1459,7 +1459,7 @@ static void __stop_tx_rs485(struct uart_
|
||||
|
||||
static inline void __do_stop_tx(struct uart_8250_port *p)
|
||||
{
|
||||
- if (serial8250_clear_THRI(p))
|
||||
+ if (serial8250_clear_THRI_sier(p))
|
||||
serial8250_rpm_put_tx(p);
|
||||
}
|
||||
|
||||
@@ -1509,7 +1509,7 @@ static inline void __start_tx(struct uar
|
||||
if (up->dma && !up->dma->tx_dma(up))
|
||||
return;
|
||||
|
||||
- if (serial8250_set_THRI(up)) {
|
||||
+ if (serial8250_set_THRI_sier(up)) {
|
||||
if (up->bugs & UART_BUG_TXEN) {
|
||||
unsigned char lsr;
|
||||
|
||||
@@ -1616,7 +1616,7 @@ static void serial8250_disable_ms(struct
|
||||
EXPORT_SYMBOL_GPL(serial8250_em485_stop_tx);
|
||||
@@ -1687,7 +1687,7 @@ static void serial8250_disable_ms(struct
|
||||
mctrl_gpio_disable_ms(up->gpios);
|
||||
|
||||
up->ier &= ~UART_IER_MSI;
|
||||
- serial_port_out(port, UART_IER, up->ier);
|
||||
+ set_ier(up, up->ier);
|
||||
+ serial8250_set_IER(up, up->ier);
|
||||
}
|
||||
|
||||
static void serial8250_enable_ms(struct uart_port *port)
|
||||
@@ -1632,7 +1632,7 @@ static void serial8250_enable_ms(struct
|
||||
@@ -1703,7 +1703,7 @@ static void serial8250_enable_ms(struct
|
||||
up->ier |= UART_IER_MSI;
|
||||
|
||||
serial8250_rpm_get(up);
|
||||
- serial_port_out(port, UART_IER, up->ier);
|
||||
+ set_ier(up, up->ier);
|
||||
+ serial8250_set_IER(up, up->ier);
|
||||
serial8250_rpm_put(up);
|
||||
}
|
||||
|
||||
@@ -1991,6 +1991,52 @@ static void wait_for_xmitr(struct uart_8
|
||||
}
|
||||
}
|
||||
|
||||
+static atomic_t ier_counter = ATOMIC_INIT(0);
|
||||
+static atomic_t ier_value = ATOMIC_INIT(0);
|
||||
+
|
||||
+void set_ier(struct uart_8250_port *up, unsigned char ier)
|
||||
+{
|
||||
+ struct uart_port *port = &up->port;
|
||||
+ unsigned int flags;
|
||||
+
|
||||
+ console_atomic_lock(&flags);
|
||||
+ if (atomic_read(&ier_counter) > 0)
|
||||
+ atomic_set(&ier_value, ier);
|
||||
+ else
|
||||
+ serial_port_out(port, UART_IER, ier);
|
||||
+ console_atomic_unlock(flags);
|
||||
+}
|
||||
+
|
||||
+void clear_ier(struct uart_8250_port *up)
|
||||
+{
|
||||
+ struct uart_port *port = &up->port;
|
||||
+ unsigned int ier_cleared = 0;
|
||||
+ unsigned int flags;
|
||||
+ unsigned int ier;
|
||||
+
|
||||
+ console_atomic_lock(&flags);
|
||||
+ atomic_inc(&ier_counter);
|
||||
+ ier = serial_port_in(port, UART_IER);
|
||||
+ if (up->capabilities & UART_CAP_UUE)
|
||||
+ ier_cleared = UART_IER_UUE;
|
||||
+ if (ier != ier_cleared) {
|
||||
+ serial_port_out(port, UART_IER, ier_cleared);
|
||||
+ atomic_set(&ier_value, ier);
|
||||
+ }
|
||||
+ console_atomic_unlock(flags);
|
||||
+}
|
||||
+
|
||||
+void restore_ier(struct uart_8250_port *up)
|
||||
+{
|
||||
+ struct uart_port *port = &up->port;
|
||||
+ unsigned int flags;
|
||||
+
|
||||
+ console_atomic_lock(&flags);
|
||||
+ if (atomic_fetch_dec(&ier_counter) == 1)
|
||||
+ serial_port_out(port, UART_IER, atomic_read(&ier_value));
|
||||
+ console_atomic_unlock(flags);
|
||||
+}
|
||||
+
|
||||
#ifdef CONFIG_CONSOLE_POLL
|
||||
/*
|
||||
* Console polling routines for writing and reading from the uart while
|
||||
@@ -2022,18 +2068,10 @@ static int serial8250_get_poll_char(stru
|
||||
static void serial8250_put_poll_char(struct uart_port *port,
|
||||
unsigned char c)
|
||||
{
|
||||
- unsigned int ier;
|
||||
@@ -2118,14 +2118,7 @@ static void serial8250_put_poll_char(str
|
||||
struct uart_8250_port *up = up_to_u8250p(port);
|
||||
|
||||
serial8250_rpm_get(up);
|
||||
@@ -277,38 +285,38 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
- serial_port_out(port, UART_IER, UART_IER_UUE);
|
||||
- else
|
||||
- serial_port_out(port, UART_IER, 0);
|
||||
+ clear_ier(up);
|
||||
+ ier = serial8250_clear_IER(up);
|
||||
|
||||
wait_for_xmitr(up, BOTH_EMPTY);
|
||||
/*
|
||||
@@ -2046,7 +2084,7 @@ static void serial8250_put_poll_char(str
|
||||
@@ -2138,7 +2131,7 @@ static void serial8250_put_poll_char(str
|
||||
* and restore the IER
|
||||
*/
|
||||
wait_for_xmitr(up, BOTH_EMPTY);
|
||||
- serial_port_out(port, UART_IER, ier);
|
||||
+ restore_ier(up);
|
||||
+ serial8250_set_IER(up, ier);
|
||||
serial8250_rpm_put(up);
|
||||
}
|
||||
|
||||
@@ -2358,7 +2396,7 @@ void serial8250_do_shutdown(struct uart_
|
||||
@@ -2441,7 +2434,7 @@ void serial8250_do_shutdown(struct uart_
|
||||
*/
|
||||
spin_lock_irqsave(&port->lock, flags);
|
||||
up->ier = 0;
|
||||
- serial_port_out(port, UART_IER, 0);
|
||||
+ set_ier(up, 0);
|
||||
+ serial8250_set_IER(up, 0);
|
||||
spin_unlock_irqrestore(&port->lock, flags);
|
||||
|
||||
synchronize_irq(port->irq);
|
||||
@@ -2643,7 +2681,7 @@ serial8250_do_set_termios(struct uart_po
|
||||
@@ -2771,7 +2764,7 @@ serial8250_do_set_termios(struct uart_po
|
||||
if (up->capabilities & UART_CAP_RTOIE)
|
||||
up->ier |= UART_IER_RTOIE;
|
||||
|
||||
- serial_port_out(port, UART_IER, up->ier);
|
||||
+ set_ier(up, up->ier);
|
||||
+ serial8250_set_IER(up, up->ier);
|
||||
|
||||
if (up->capabilities & UART_CAP_EFR) {
|
||||
unsigned char efr = 0;
|
||||
@@ -3107,7 +3145,7 @@ EXPORT_SYMBOL_GPL(serial8250_set_default
|
||||
@@ -3237,7 +3230,7 @@ EXPORT_SYMBOL_GPL(serial8250_set_default
|
||||
|
||||
#ifdef CONFIG_SERIAL_8250_CONSOLE
|
||||
|
||||
@@ -317,7 +325,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
{
|
||||
struct uart_8250_port *up = up_to_u8250p(port);
|
||||
|
||||
@@ -3115,6 +3153,18 @@ static void serial8250_console_putchar(s
|
||||
@@ -3245,6 +3238,18 @@ static void serial8250_console_putchar(s
|
||||
serial_port_out(port, UART_TX, ch);
|
||||
}
|
||||
|
||||
@@ -336,7 +344,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
/*
|
||||
* Restore serial console when h/w power-off detected
|
||||
*/
|
||||
@@ -3136,6 +3186,42 @@ static void serial8250_console_restore(s
|
||||
@@ -3266,6 +3271,32 @@ static void serial8250_console_restore(s
|
||||
serial8250_out_MCR(up, UART_MCR_DTR | UART_MCR_RTS);
|
||||
}
|
||||
|
||||
@@ -345,20 +353,13 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
+{
|
||||
+ struct uart_port *port = &up->port;
|
||||
+ unsigned int flags;
|
||||
+ bool locked;
|
||||
+ unsigned int ier;
|
||||
+
|
||||
+ console_atomic_lock(&flags);
|
||||
+
|
||||
+ /*
|
||||
+ * If possible, keep any other CPUs from working with the
|
||||
+ * UART until the atomic message is completed. This helps
|
||||
+ * to keep the output more orderly.
|
||||
+ */
|
||||
+ locked = spin_trylock(&port->lock);
|
||||
+
|
||||
+ touch_nmi_watchdog();
|
||||
+
|
||||
+ clear_ier(up);
|
||||
+ ier = serial8250_clear_IER(up);
|
||||
+
|
||||
+ if (atomic_fetch_inc(&up->console_printing)) {
|
||||
+ uart_console_write(port, "\n", 1,
|
||||
@@ -368,10 +369,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
+ atomic_dec(&up->console_printing);
|
||||
+
|
||||
+ wait_for_xmitr(up, BOTH_EMPTY);
|
||||
+ restore_ier(up);
|
||||
+
|
||||
+ if (locked)
|
||||
+ spin_unlock(&port->lock);
|
||||
+ serial8250_set_IER(up, ier);
|
||||
+
|
||||
+ console_atomic_unlock(flags);
|
||||
+}
|
||||
@@ -379,18 +377,14 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
/*
|
||||
* Print a string to the serial port trying not to disturb
|
||||
* any possible real use of the port...
|
||||
@@ -3147,27 +3233,13 @@ void serial8250_console_write(struct uar
|
||||
{
|
||||
@@ -3282,24 +3313,12 @@ void serial8250_console_write(struct uar
|
||||
struct uart_port *port = &up->port;
|
||||
unsigned long flags;
|
||||
- unsigned int ier;
|
||||
unsigned int ier;
|
||||
- int locked = 1;
|
||||
|
||||
touch_nmi_watchdog();
|
||||
|
||||
serial8250_rpm_get(up);
|
||||
+ spin_lock_irqsave(&port->lock, flags);
|
||||
|
||||
- if (oops_in_progress)
|
||||
- locked = spin_trylock_irqsave(&port->lock, flags);
|
||||
- else
|
||||
@@ -400,17 +394,18 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
- * First save the IER then disable the interrupts
|
||||
- */
|
||||
- ier = serial_port_in(port, UART_IER);
|
||||
-
|
||||
+ spin_lock_irqsave(&port->lock, flags);
|
||||
|
||||
- if (up->capabilities & UART_CAP_UUE)
|
||||
- serial_port_out(port, UART_IER, UART_IER_UUE);
|
||||
- else
|
||||
- serial_port_out(port, UART_IER, 0);
|
||||
+ clear_ier(up);
|
||||
+ ier = serial8250_clear_IER(up);
|
||||
|
||||
/* check scratch reg to see if port powered off during system sleep */
|
||||
if (up->canary && (up->canary != serial_port_in(port, UART_SCR))) {
|
||||
@@ -3175,14 +3247,16 @@ void serial8250_console_write(struct uar
|
||||
up->canary = 0;
|
||||
@@ -3313,7 +3332,9 @@ void serial8250_console_write(struct uar
|
||||
mdelay(port->rs485.delay_rts_before_send);
|
||||
}
|
||||
|
||||
+ atomic_inc(&up->console_printing);
|
||||
@@ -419,25 +414,27 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
|
||||
/*
|
||||
* Finally, wait for transmitter to become empty
|
||||
* and restore the IER
|
||||
*/
|
||||
wait_for_xmitr(up, BOTH_EMPTY);
|
||||
@@ -3326,8 +3347,7 @@ void serial8250_console_write(struct uar
|
||||
if (em485->tx_stopped)
|
||||
up->rs485_stop_tx(up);
|
||||
}
|
||||
-
|
||||
- serial_port_out(port, UART_IER, ier);
|
||||
+ restore_ier(up);
|
||||
+ serial8250_set_IER(up, ier);
|
||||
|
||||
/*
|
||||
* The receive handling will happen properly because the
|
||||
@@ -3194,8 +3268,7 @@ void serial8250_console_write(struct uar
|
||||
@@ -3339,8 +3359,7 @@ void serial8250_console_write(struct uar
|
||||
if (up->msr_saved_flags)
|
||||
serial8250_modem_status(up);
|
||||
|
||||
- if (locked)
|
||||
- spin_unlock_irqrestore(&port->lock, flags);
|
||||
+ spin_unlock_irqrestore(&port->lock, flags);
|
||||
serial8250_rpm_put(up);
|
||||
}
|
||||
|
||||
@@ -3216,6 +3289,7 @@ static unsigned int probe_baud(struct ua
|
||||
static unsigned int probe_baud(struct uart_port *port)
|
||||
@@ -3360,6 +3379,7 @@ static unsigned int probe_baud(struct ua
|
||||
|
||||
int serial8250_console_setup(struct uart_port *port, char *options, bool probe)
|
||||
{
|
||||
@@ -445,7 +442,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
int baud = 9600;
|
||||
int bits = 8;
|
||||
int parity = 'n';
|
||||
@@ -3224,6 +3298,8 @@ int serial8250_console_setup(struct uart
|
||||
@@ -3369,6 +3389,8 @@ int serial8250_console_setup(struct uart
|
||||
if (!port->iobase && !port->membase)
|
||||
return -ENODEV;
|
||||
|
||||
@@ -464,7 +461,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
#include <linux/serial_core.h>
|
||||
#include <linux/serial_reg.h>
|
||||
#include <linux/platform_device.h>
|
||||
@@ -123,6 +124,8 @@ struct uart_8250_port {
|
||||
@@ -125,6 +126,8 @@ struct uart_8250_port {
|
||||
#define MSR_SAVE_FLAGS UART_MSR_ANY_DELTA
|
||||
unsigned char msr_saved_flags;
|
||||
|
||||
@@ -473,12 +470,12 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
struct uart_8250_dma *dma;
|
||||
const struct uart_8250_ops *ops;
|
||||
|
||||
@@ -174,6 +177,8 @@ void serial8250_init_port(struct uart_82
|
||||
@@ -180,6 +183,8 @@ void serial8250_init_port(struct uart_82
|
||||
void serial8250_set_defaults(struct uart_8250_port *up);
|
||||
void serial8250_console_write(struct uart_8250_port *up, const char *s,
|
||||
unsigned int count);
|
||||
+void serial8250_console_write_atomic(struct uart_8250_port *up, const char *s,
|
||||
+ unsigned int count);
|
||||
int serial8250_console_setup(struct uart_port *port, char *options, bool probe);
|
||||
int serial8250_console_exit(struct uart_port *port);
|
||||
|
||||
extern void serial8250_set_isa_configurator(void (*v)
|
||||
@@ -0,0 +1,82 @@
|
||||
From: John Ogness <john.ogness@linutronix.de>
|
||||
Date: Mon, 30 Nov 2020 01:42:03 +0106
|
||||
Subject: [PATCH 21/28] printk: relocate printk_delay() and vprintk_default()
|
||||
|
||||
Move printk_delay() and vprintk_default() "as is" further up so that
|
||||
they can be used by new functions in an upcoming commit.
|
||||
|
||||
Signed-off-by: John Ogness <john.ogness@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
kernel/printk/printk.c | 40 ++++++++++++++++++++--------------------
|
||||
1 file changed, 20 insertions(+), 20 deletions(-)
|
||||
|
||||
--- a/kernel/printk/printk.c
|
||||
+++ b/kernel/printk/printk.c
|
||||
@@ -1725,6 +1725,20 @@ SYSCALL_DEFINE3(syslog, int, type, char
|
||||
return do_syslog(type, buf, len, SYSLOG_FROM_READER);
|
||||
}
|
||||
|
||||
+int printk_delay_msec __read_mostly;
|
||||
+
|
||||
+static inline void printk_delay(void)
|
||||
+{
|
||||
+ if (unlikely(printk_delay_msec)) {
|
||||
+ int m = printk_delay_msec;
|
||||
+
|
||||
+ while (m--) {
|
||||
+ mdelay(1);
|
||||
+ touch_nmi_watchdog();
|
||||
+ }
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* Special console_lock variants that help to reduce the risk of soft-lockups.
|
||||
* They allow to pass console_lock to another printk() call using a busy wait.
|
||||
@@ -1968,20 +1982,6 @@ static void printk_exit_irqrestore(unsig
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
-int printk_delay_msec __read_mostly;
|
||||
-
|
||||
-static inline void printk_delay(void)
|
||||
-{
|
||||
- if (unlikely(printk_delay_msec)) {
|
||||
- int m = printk_delay_msec;
|
||||
-
|
||||
- while (m--) {
|
||||
- mdelay(1);
|
||||
- touch_nmi_watchdog();
|
||||
- }
|
||||
- }
|
||||
-}
|
||||
-
|
||||
static inline u32 printk_caller_id(void)
|
||||
{
|
||||
return in_task() ? task_pid_nr(current) :
|
||||
@@ -2214,18 +2214,18 @@ asmlinkage int vprintk_emit(int facility
|
||||
}
|
||||
EXPORT_SYMBOL(vprintk_emit);
|
||||
|
||||
-asmlinkage int vprintk(const char *fmt, va_list args)
|
||||
-{
|
||||
- return vprintk_func(fmt, args);
|
||||
-}
|
||||
-EXPORT_SYMBOL(vprintk);
|
||||
-
|
||||
int vprintk_default(const char *fmt, va_list args)
|
||||
{
|
||||
return vprintk_emit(0, LOGLEVEL_DEFAULT, NULL, fmt, args);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(vprintk_default);
|
||||
|
||||
+asmlinkage int vprintk(const char *fmt, va_list args)
|
||||
+{
|
||||
+ return vprintk_func(fmt, args);
|
||||
+}
|
||||
+EXPORT_SYMBOL(vprintk);
|
||||
+
|
||||
/**
|
||||
* printk - print a kernel message
|
||||
* @fmt: format string
|
||||
@@ -0,0 +1,37 @@
|
||||
From: John Ogness <john.ogness@linutronix.de>
|
||||
Date: Mon, 30 Nov 2020 01:42:04 +0106
|
||||
Subject: [PATCH 22/28] printk: combine boot_delay_msec() into printk_delay()
|
||||
|
||||
boot_delay_msec() is always called immediately before printk_delay()
|
||||
so just combine the two.
|
||||
|
||||
Signed-off-by: John Ogness <john.ogness@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
kernel/printk/printk.c | 7 ++++---
|
||||
1 file changed, 4 insertions(+), 3 deletions(-)
|
||||
|
||||
--- a/kernel/printk/printk.c
|
||||
+++ b/kernel/printk/printk.c
|
||||
@@ -1727,8 +1727,10 @@ SYSCALL_DEFINE3(syslog, int, type, char
|
||||
|
||||
int printk_delay_msec __read_mostly;
|
||||
|
||||
-static inline void printk_delay(void)
|
||||
+static inline void printk_delay(int level)
|
||||
{
|
||||
+ boot_delay_msec(level);
|
||||
+
|
||||
if (unlikely(printk_delay_msec)) {
|
||||
int m = printk_delay_msec;
|
||||
|
||||
@@ -2186,8 +2188,7 @@ asmlinkage int vprintk_emit(int facility
|
||||
in_sched = true;
|
||||
}
|
||||
|
||||
- boot_delay_msec(level);
|
||||
- printk_delay();
|
||||
+ printk_delay(level);
|
||||
|
||||
printed_len = vprintk_store(facility, level, dev_info, fmt, args);
|
||||
|
||||
@@ -0,0 +1,125 @@
|
||||
From: John Ogness <john.ogness@linutronix.de>
|
||||
Date: Mon, 30 Nov 2020 01:42:05 +0106
|
||||
Subject: [PATCH 23/28] printk: change @console_seq to atomic64_t
|
||||
|
||||
In preparation for atomic printing, change @console_seq to atomic
|
||||
so that it can be accessed without requiring @console_sem.
|
||||
|
||||
Signed-off-by: John Ogness <john.ogness@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
kernel/printk/printk.c | 34 +++++++++++++++++++---------------
|
||||
1 file changed, 19 insertions(+), 15 deletions(-)
|
||||
|
||||
--- a/kernel/printk/printk.c
|
||||
+++ b/kernel/printk/printk.c
|
||||
@@ -366,12 +366,13 @@ static u64 syslog_seq;
|
||||
static size_t syslog_partial;
|
||||
static bool syslog_time;
|
||||
|
||||
-/* All 3 protected by @console_sem. */
|
||||
-/* the next printk record to write to the console */
|
||||
-static u64 console_seq;
|
||||
+/* Both protected by @console_sem. */
|
||||
static u64 exclusive_console_stop_seq;
|
||||
static unsigned long console_dropped;
|
||||
|
||||
+/* the next printk record to write to the console */
|
||||
+static atomic64_t console_seq = ATOMIC64_INIT(0);
|
||||
+
|
||||
struct latched_seq {
|
||||
seqcount_latch_t latch;
|
||||
u64 val[2];
|
||||
@@ -2270,7 +2271,7 @@ EXPORT_SYMBOL(printk);
|
||||
#define prb_first_valid_seq(rb) 0
|
||||
|
||||
static u64 syslog_seq;
|
||||
-static u64 console_seq;
|
||||
+static atomic64_t console_seq = ATOMIC64_INIT(0);
|
||||
static u64 exclusive_console_stop_seq;
|
||||
static unsigned long console_dropped;
|
||||
|
||||
@@ -2585,6 +2586,7 @@ void console_unlock(void)
|
||||
bool do_cond_resched, retry;
|
||||
struct printk_info info;
|
||||
struct printk_record r;
|
||||
+ u64 seq;
|
||||
|
||||
if (console_suspended) {
|
||||
up_console_sem();
|
||||
@@ -2627,12 +2629,14 @@ void console_unlock(void)
|
||||
size_t len;
|
||||
|
||||
skip:
|
||||
- if (!prb_read_valid(prb, console_seq, &r))
|
||||
+ seq = atomic64_read(&console_seq);
|
||||
+ if (!prb_read_valid(prb, seq, &r))
|
||||
break;
|
||||
|
||||
- if (console_seq != r.info->seq) {
|
||||
- console_dropped += r.info->seq - console_seq;
|
||||
- console_seq = r.info->seq;
|
||||
+ if (seq != r.info->seq) {
|
||||
+ console_dropped += r.info->seq - seq;
|
||||
+ atomic64_set(&console_seq, r.info->seq);
|
||||
+ seq = r.info->seq;
|
||||
}
|
||||
|
||||
if (suppress_message_printing(r.info->level)) {
|
||||
@@ -2641,13 +2645,13 @@ void console_unlock(void)
|
||||
* directly to the console when we received it, and
|
||||
* record that has level above the console loglevel.
|
||||
*/
|
||||
- console_seq++;
|
||||
+ atomic64_set(&console_seq, seq + 1);
|
||||
goto skip;
|
||||
}
|
||||
|
||||
/* Output to all consoles once old messages replayed. */
|
||||
if (unlikely(exclusive_console &&
|
||||
- console_seq >= exclusive_console_stop_seq)) {
|
||||
+ seq >= exclusive_console_stop_seq)) {
|
||||
exclusive_console = NULL;
|
||||
}
|
||||
|
||||
@@ -2668,7 +2672,7 @@ void console_unlock(void)
|
||||
len = record_print_text(&r,
|
||||
console_msg_format & MSG_FORMAT_SYSLOG,
|
||||
printk_time);
|
||||
- console_seq++;
|
||||
+ atomic64_set(&console_seq, seq + 1);
|
||||
|
||||
/*
|
||||
* While actively printing out messages, if another printk()
|
||||
@@ -2699,7 +2703,7 @@ void console_unlock(void)
|
||||
* there's a new owner and the console_unlock() from them will do the
|
||||
* flush, no worries.
|
||||
*/
|
||||
- retry = prb_read_valid(prb, console_seq, NULL);
|
||||
+ retry = prb_read_valid(prb, atomic64_read(&console_seq), NULL);
|
||||
if (retry && console_trylock())
|
||||
goto again;
|
||||
}
|
||||
@@ -2762,7 +2766,7 @@ void console_flush_on_panic(enum con_flu
|
||||
console_may_schedule = 0;
|
||||
|
||||
if (mode == CONSOLE_REPLAY_ALL)
|
||||
- console_seq = prb_first_valid_seq(prb);
|
||||
+ atomic64_set(&console_seq, prb_first_valid_seq(prb));
|
||||
console_unlock();
|
||||
}
|
||||
|
||||
@@ -2999,11 +3003,11 @@ void register_console(struct console *ne
|
||||
* ignores console_lock.
|
||||
*/
|
||||
exclusive_console = newcon;
|
||||
- exclusive_console_stop_seq = console_seq;
|
||||
+ exclusive_console_stop_seq = atomic64_read(&console_seq);
|
||||
|
||||
/* Get a consistent copy of @syslog_seq. */
|
||||
spin_lock_irqsave(&syslog_lock, flags);
|
||||
- console_seq = syslog_seq;
|
||||
+ atomic64_set(&console_seq, syslog_seq);
|
||||
spin_unlock_irqrestore(&syslog_lock, flags);
|
||||
}
|
||||
console_unlock();
|
||||
@@ -0,0 +1,298 @@
|
||||
From: John Ogness <john.ogness@linutronix.de>
|
||||
Date: Mon, 30 Nov 2020 01:42:06 +0106
|
||||
Subject: [PATCH 24/28] printk: introduce kernel sync mode
|
||||
|
||||
When the kernel performs an OOPS, enter into "sync mode":
|
||||
|
||||
- only atomic consoles (write_atomic() callback) will print
|
||||
- printing occurs within vprintk_store() instead of console_unlock()
|
||||
|
||||
CONSOLE_LOG_MAX is moved to printk.h to support the per-console
|
||||
buffer used in sync mode.
|
||||
|
||||
Signed-off-by: John Ogness <john.ogness@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/console.h | 4 +
|
||||
include/linux/printk.h | 6 ++
|
||||
kernel/printk/printk.c | 133 +++++++++++++++++++++++++++++++++++++++++++++---
|
||||
3 files changed, 137 insertions(+), 6 deletions(-)
|
||||
|
||||
--- a/include/linux/console.h
|
||||
+++ b/include/linux/console.h
|
||||
@@ -16,6 +16,7 @@
|
||||
|
||||
#include <linux/atomic.h>
|
||||
#include <linux/types.h>
|
||||
+#include <linux/printk.h>
|
||||
|
||||
struct vc_data;
|
||||
struct console_font_op;
|
||||
@@ -150,6 +151,9 @@ struct console {
|
||||
short flags;
|
||||
short index;
|
||||
int cflag;
|
||||
+#ifdef CONFIG_PRINTK
|
||||
+ char sync_buf[CONSOLE_LOG_MAX];
|
||||
+#endif
|
||||
void *data;
|
||||
struct console *next;
|
||||
};
|
||||
--- a/include/linux/printk.h
|
||||
+++ b/include/linux/printk.h
|
||||
@@ -46,6 +46,12 @@ static inline const char *printk_skip_he
|
||||
|
||||
#define CONSOLE_EXT_LOG_MAX 8192
|
||||
|
||||
+/*
|
||||
+ * The maximum size of a record formatted for console printing
|
||||
+ * (i.e. with the prefix prepended to every line).
|
||||
+ */
|
||||
+#define CONSOLE_LOG_MAX 4096
|
||||
+
|
||||
/* printk's without a loglevel use this.. */
|
||||
#define MESSAGE_LOGLEVEL_DEFAULT CONFIG_MESSAGE_LOGLEVEL_DEFAULT
|
||||
|
||||
--- a/kernel/printk/printk.c
|
||||
+++ b/kernel/printk/printk.c
|
||||
@@ -44,6 +44,7 @@
|
||||
#include <linux/irq_work.h>
|
||||
#include <linux/ctype.h>
|
||||
#include <linux/uio.h>
|
||||
+#include <linux/clocksource.h>
|
||||
#include <linux/sched/clock.h>
|
||||
#include <linux/sched/debug.h>
|
||||
#include <linux/sched/task_stack.h>
|
||||
@@ -359,6 +360,9 @@ enum log_flags {
|
||||
static DEFINE_SPINLOCK(syslog_lock);
|
||||
|
||||
#ifdef CONFIG_PRINTK
|
||||
+/* Set to enable sync mode. Once set, it is never cleared. */
|
||||
+static bool sync_mode;
|
||||
+
|
||||
DECLARE_WAIT_QUEUE_HEAD(log_wait);
|
||||
/* All 3 protected by @syslog_lock. */
|
||||
/* the next printk record to read by syslog(READ) or /proc/kmsg */
|
||||
@@ -398,9 +402,6 @@ static struct latched_seq clear_seq = {
|
||||
/* the maximum size allowed to be reserved for a record */
|
||||
#define LOG_LINE_MAX (1024 - PREFIX_MAX)
|
||||
|
||||
-/* the maximum size of a formatted record (i.e. with prefix added per line) */
|
||||
-#define CONSOLE_LOG_MAX 4096
|
||||
-
|
||||
#define LOG_LEVEL(v) ((v) & 0x07)
|
||||
#define LOG_FACILITY(v) ((v) >> 3 & 0xff)
|
||||
|
||||
@@ -1742,6 +1743,91 @@ static inline void printk_delay(int leve
|
||||
}
|
||||
}
|
||||
|
||||
+static bool kernel_sync_mode(void)
|
||||
+{
|
||||
+ return (oops_in_progress || sync_mode);
|
||||
+}
|
||||
+
|
||||
+static bool console_can_sync(struct console *con)
|
||||
+{
|
||||
+ if (!(con->flags & CON_ENABLED))
|
||||
+ return false;
|
||||
+ if (con->write_atomic && kernel_sync_mode())
|
||||
+ return true;
|
||||
+ return false;
|
||||
+}
|
||||
+
|
||||
+static bool call_sync_console_driver(struct console *con, const char *text, size_t text_len)
|
||||
+{
|
||||
+ if (!(con->flags & CON_ENABLED))
|
||||
+ return false;
|
||||
+ if (con->write_atomic && kernel_sync_mode())
|
||||
+ con->write_atomic(con, text, text_len);
|
||||
+ else
|
||||
+ return false;
|
||||
+
|
||||
+ return true;
|
||||
+}
|
||||
+
|
||||
+static bool have_atomic_console(void)
|
||||
+{
|
||||
+ struct console *con;
|
||||
+
|
||||
+ for_each_console(con) {
|
||||
+ if (!(con->flags & CON_ENABLED))
|
||||
+ continue;
|
||||
+ if (con->write_atomic)
|
||||
+ return true;
|
||||
+ }
|
||||
+ return false;
|
||||
+}
|
||||
+
|
||||
+static bool print_sync(struct console *con, u64 *seq)
|
||||
+{
|
||||
+ struct printk_info info;
|
||||
+ struct printk_record r;
|
||||
+ size_t text_len;
|
||||
+
|
||||
+ prb_rec_init_rd(&r, &info, &con->sync_buf[0], sizeof(con->sync_buf));
|
||||
+
|
||||
+ if (!prb_read_valid(prb, *seq, &r))
|
||||
+ return false;
|
||||
+
|
||||
+ text_len = record_print_text(&r, console_msg_format & MSG_FORMAT_SYSLOG, printk_time);
|
||||
+
|
||||
+ if (!call_sync_console_driver(con, &con->sync_buf[0], text_len))
|
||||
+ return false;
|
||||
+
|
||||
+ *seq = r.info->seq;
|
||||
+
|
||||
+ touch_softlockup_watchdog_sync();
|
||||
+ clocksource_touch_watchdog();
|
||||
+ rcu_cpu_stall_reset();
|
||||
+ touch_nmi_watchdog();
|
||||
+
|
||||
+ if (text_len)
|
||||
+ printk_delay(r.info->level);
|
||||
+
|
||||
+ return true;
|
||||
+}
|
||||
+
|
||||
+static void print_sync_until(struct console *con, u64 seq)
|
||||
+{
|
||||
+ unsigned int flags;
|
||||
+ u64 printk_seq;
|
||||
+
|
||||
+ console_atomic_lock(&flags);
|
||||
+ for (;;) {
|
||||
+ printk_seq = atomic64_read(&console_seq);
|
||||
+ if (printk_seq >= seq)
|
||||
+ break;
|
||||
+ if (!print_sync(con, &printk_seq))
|
||||
+ break;
|
||||
+ atomic64_set(&console_seq, printk_seq + 1);
|
||||
+ }
|
||||
+ console_atomic_unlock(flags);
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* Special console_lock variants that help to reduce the risk of soft-lockups.
|
||||
* They allow to pass console_lock to another printk() call using a busy wait.
|
||||
@@ -1916,6 +2002,8 @@ static void call_console_drivers(const c
|
||||
if (!cpu_online(smp_processor_id()) &&
|
||||
!(con->flags & CON_ANYTIME))
|
||||
continue;
|
||||
+ if (kernel_sync_mode())
|
||||
+ continue;
|
||||
if (con->flags & CON_EXTENDED)
|
||||
con->write(con, ext_text, ext_len);
|
||||
else {
|
||||
@@ -2070,6 +2158,7 @@ int vprintk_store(int facility, int leve
|
||||
const u32 caller_id = printk_caller_id();
|
||||
struct prb_reserved_entry e;
|
||||
enum log_flags lflags = 0;
|
||||
+ bool final_commit = false;
|
||||
struct printk_record r;
|
||||
unsigned long irqflags;
|
||||
u16 trunc_msg_len = 0;
|
||||
@@ -2079,6 +2168,7 @@ int vprintk_store(int facility, int leve
|
||||
u16 text_len;
|
||||
int ret = 0;
|
||||
u64 ts_nsec;
|
||||
+ u64 seq;
|
||||
|
||||
/*
|
||||
* Since the duration of printk() can vary depending on the message
|
||||
@@ -2117,6 +2207,7 @@ int vprintk_store(int facility, int leve
|
||||
if (lflags & LOG_CONT) {
|
||||
prb_rec_init_wr(&r, reserve_size);
|
||||
if (prb_reserve_in_last(&e, prb, &r, caller_id, LOG_LINE_MAX)) {
|
||||
+ seq = r.info->seq;
|
||||
text_len = printk_sprint(&r.text_buf[r.info->text_len], reserve_size,
|
||||
facility, &lflags, fmt, args);
|
||||
r.info->text_len += text_len;
|
||||
@@ -2124,6 +2215,7 @@ int vprintk_store(int facility, int leve
|
||||
if (lflags & LOG_NEWLINE) {
|
||||
r.info->flags |= LOG_NEWLINE;
|
||||
prb_final_commit(&e);
|
||||
+ final_commit = true;
|
||||
} else {
|
||||
prb_commit(&e);
|
||||
}
|
||||
@@ -2148,6 +2240,8 @@ int vprintk_store(int facility, int leve
|
||||
goto out;
|
||||
}
|
||||
|
||||
+ seq = r.info->seq;
|
||||
+
|
||||
/* fill message */
|
||||
text_len = printk_sprint(&r.text_buf[0], reserve_size, facility, &lflags, fmt, args);
|
||||
if (trunc_msg_len)
|
||||
@@ -2162,13 +2256,25 @@ int vprintk_store(int facility, int leve
|
||||
memcpy(&r.info->dev_info, dev_info, sizeof(r.info->dev_info));
|
||||
|
||||
/* A message without a trailing newline can be continued. */
|
||||
- if (!(lflags & LOG_NEWLINE))
|
||||
+ if (!(lflags & LOG_NEWLINE)) {
|
||||
prb_commit(&e);
|
||||
- else
|
||||
+ } else {
|
||||
prb_final_commit(&e);
|
||||
+ final_commit = true;
|
||||
+ }
|
||||
|
||||
ret = text_len + trunc_msg_len;
|
||||
out:
|
||||
+ /* only the kernel may perform synchronous printing */
|
||||
+ if (facility == 0 && final_commit) {
|
||||
+ struct console *con;
|
||||
+
|
||||
+ for_each_console(con) {
|
||||
+ if (console_can_sync(con))
|
||||
+ print_sync_until(con, seq + 1);
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
printk_exit_irqrestore(irqflags);
|
||||
return ret;
|
||||
}
|
||||
@@ -2264,12 +2370,13 @@ EXPORT_SYMBOL(printk);
|
||||
|
||||
#else /* CONFIG_PRINTK */
|
||||
|
||||
-#define CONSOLE_LOG_MAX 0
|
||||
#define printk_time false
|
||||
|
||||
#define prb_read_valid(rb, seq, r) false
|
||||
#define prb_first_valid_seq(rb) 0
|
||||
|
||||
+#define kernel_sync_mode() false
|
||||
+
|
||||
static u64 syslog_seq;
|
||||
static atomic64_t console_seq = ATOMIC64_INIT(0);
|
||||
static u64 exclusive_console_stop_seq;
|
||||
@@ -2562,6 +2669,8 @@ static int have_callable_console(void)
|
||||
*/
|
||||
static inline int can_use_console(void)
|
||||
{
|
||||
+ if (kernel_sync_mode())
|
||||
+ return false;
|
||||
return cpu_online(raw_smp_processor_id()) || have_callable_console();
|
||||
}
|
||||
|
||||
@@ -3374,6 +3483,18 @@ void kmsg_dump(enum kmsg_dump_reason rea
|
||||
struct kmsg_dumper_iter iter;
|
||||
struct kmsg_dumper *dumper;
|
||||
|
||||
+ if (!oops_in_progress) {
|
||||
+ /*
|
||||
+ * If atomic consoles are available, activate kernel sync mode
|
||||
+ * to make sure any final messages are visible. The trailing
|
||||
+ * printk message is important to flush any pending messages.
|
||||
+ */
|
||||
+ if (have_atomic_console()) {
|
||||
+ sync_mode = true;
|
||||
+ pr_info("enabled sync mode\n");
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
rcu_read_lock();
|
||||
list_for_each_entry_rcu(dumper, &dump_list, list) {
|
||||
enum kmsg_dump_reason max_reason = dumper->max_reason;
|
||||
@@ -0,0 +1,838 @@
|
||||
From: John Ogness <john.ogness@linutronix.de>
|
||||
Date: Mon, 30 Nov 2020 01:42:07 +0106
|
||||
Subject: [PATCH 25/28] printk: move console printing to kthreads
|
||||
|
||||
Create a kthread for each console to perform console printing. Now
|
||||
all console printing is fully asynchronous except for the boot
|
||||
console and when the kernel enters sync mode (and there are atomic
|
||||
consoles available).
|
||||
|
||||
The console_lock() and console_unlock() functions now only do what
|
||||
their name says... locking and unlocking of the console.
|
||||
|
||||
Signed-off-by: John Ogness <john.ogness@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/console.h | 2
|
||||
kernel/printk/printk.c | 625 ++++++++++++++----------------------------------
|
||||
2 files changed, 186 insertions(+), 441 deletions(-)
|
||||
|
||||
--- a/include/linux/console.h
|
||||
+++ b/include/linux/console.h
|
||||
@@ -154,6 +154,8 @@ struct console {
|
||||
#ifdef CONFIG_PRINTK
|
||||
char sync_buf[CONSOLE_LOG_MAX];
|
||||
#endif
|
||||
+ atomic64_t printk_seq;
|
||||
+ struct task_struct *thread;
|
||||
void *data;
|
||||
struct console *next;
|
||||
};
|
||||
--- a/kernel/printk/printk.c
|
||||
+++ b/kernel/printk/printk.c
|
||||
@@ -44,6 +44,7 @@
|
||||
#include <linux/irq_work.h>
|
||||
#include <linux/ctype.h>
|
||||
#include <linux/uio.h>
|
||||
+#include <linux/kthread.h>
|
||||
#include <linux/clocksource.h>
|
||||
#include <linux/sched/clock.h>
|
||||
#include <linux/sched/debug.h>
|
||||
@@ -268,11 +269,6 @@ static void __up_console_sem(unsigned lo
|
||||
static int console_locked, console_suspended;
|
||||
|
||||
/*
|
||||
- * If exclusive_console is non-NULL then only this console is to be printed to.
|
||||
- */
|
||||
-static struct console *exclusive_console;
|
||||
-
|
||||
-/*
|
||||
* Array of consoles built from command line options (console=)
|
||||
*/
|
||||
|
||||
@@ -356,10 +352,10 @@ enum log_flags {
|
||||
LOG_CONT = 8, /* text is a fragment of a continuation line */
|
||||
};
|
||||
|
||||
+#ifdef CONFIG_PRINTK
|
||||
/* syslog_lock protects syslog_* variables and write access to clear_seq. */
|
||||
static DEFINE_SPINLOCK(syslog_lock);
|
||||
|
||||
-#ifdef CONFIG_PRINTK
|
||||
/* Set to enable sync mode. Once set, it is never cleared. */
|
||||
static bool sync_mode;
|
||||
|
||||
@@ -370,13 +366,6 @@ static u64 syslog_seq;
|
||||
static size_t syslog_partial;
|
||||
static bool syslog_time;
|
||||
|
||||
-/* Both protected by @console_sem. */
|
||||
-static u64 exclusive_console_stop_seq;
|
||||
-static unsigned long console_dropped;
|
||||
-
|
||||
-/* the next printk record to write to the console */
|
||||
-static atomic64_t console_seq = ATOMIC64_INIT(0);
|
||||
-
|
||||
struct latched_seq {
|
||||
seqcount_latch_t latch;
|
||||
u64 val[2];
|
||||
@@ -1754,6 +1743,8 @@ static bool console_can_sync(struct cons
|
||||
return false;
|
||||
if (con->write_atomic && kernel_sync_mode())
|
||||
return true;
|
||||
+ if (con->write && (con->flags & CON_BOOT) && !con->thread)
|
||||
+ return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -1763,6 +1754,8 @@ static bool call_sync_console_driver(str
|
||||
return false;
|
||||
if (con->write_atomic && kernel_sync_mode())
|
||||
con->write_atomic(con, text, text_len);
|
||||
+ else if (con->write && (con->flags & CON_BOOT) && !con->thread)
|
||||
+ con->write(con, text, text_len);
|
||||
else
|
||||
return false;
|
||||
|
||||
@@ -1818,202 +1811,16 @@ static void print_sync_until(struct cons
|
||||
|
||||
console_atomic_lock(&flags);
|
||||
for (;;) {
|
||||
- printk_seq = atomic64_read(&console_seq);
|
||||
+ printk_seq = atomic64_read(&con->printk_seq);
|
||||
if (printk_seq >= seq)
|
||||
break;
|
||||
if (!print_sync(con, &printk_seq))
|
||||
break;
|
||||
- atomic64_set(&console_seq, printk_seq + 1);
|
||||
+ atomic64_set(&con->printk_seq, printk_seq + 1);
|
||||
}
|
||||
console_atomic_unlock(flags);
|
||||
}
|
||||
|
||||
-/*
|
||||
- * Special console_lock variants that help to reduce the risk of soft-lockups.
|
||||
- * They allow to pass console_lock to another printk() call using a busy wait.
|
||||
- */
|
||||
-
|
||||
-#ifdef CONFIG_LOCKDEP
|
||||
-static struct lockdep_map console_owner_dep_map = {
|
||||
- .name = "console_owner"
|
||||
-};
|
||||
-#endif
|
||||
-
|
||||
-static DEFINE_RAW_SPINLOCK(console_owner_lock);
|
||||
-static struct task_struct *console_owner;
|
||||
-static bool console_waiter;
|
||||
-
|
||||
-/**
|
||||
- * console_lock_spinning_enable - mark beginning of code where another
|
||||
- * thread might safely busy wait
|
||||
- *
|
||||
- * This basically converts console_lock into a spinlock. This marks
|
||||
- * the section where the console_lock owner can not sleep, because
|
||||
- * there may be a waiter spinning (like a spinlock). Also it must be
|
||||
- * ready to hand over the lock at the end of the section.
|
||||
- */
|
||||
-static void console_lock_spinning_enable(void)
|
||||
-{
|
||||
- raw_spin_lock(&console_owner_lock);
|
||||
- console_owner = current;
|
||||
- raw_spin_unlock(&console_owner_lock);
|
||||
-
|
||||
- /* The waiter may spin on us after setting console_owner */
|
||||
- spin_acquire(&console_owner_dep_map, 0, 0, _THIS_IP_);
|
||||
-}
|
||||
-
|
||||
-/**
|
||||
- * console_lock_spinning_disable_and_check - mark end of code where another
|
||||
- * thread was able to busy wait and check if there is a waiter
|
||||
- *
|
||||
- * This is called at the end of the section where spinning is allowed.
|
||||
- * It has two functions. First, it is a signal that it is no longer
|
||||
- * safe to start busy waiting for the lock. Second, it checks if
|
||||
- * there is a busy waiter and passes the lock rights to her.
|
||||
- *
|
||||
- * Important: Callers lose the lock if there was a busy waiter.
|
||||
- * They must not touch items synchronized by console_lock
|
||||
- * in this case.
|
||||
- *
|
||||
- * Return: 1 if the lock rights were passed, 0 otherwise.
|
||||
- */
|
||||
-static int console_lock_spinning_disable_and_check(void)
|
||||
-{
|
||||
- int waiter;
|
||||
-
|
||||
- raw_spin_lock(&console_owner_lock);
|
||||
- waiter = READ_ONCE(console_waiter);
|
||||
- console_owner = NULL;
|
||||
- raw_spin_unlock(&console_owner_lock);
|
||||
-
|
||||
- if (!waiter) {
|
||||
- spin_release(&console_owner_dep_map, _THIS_IP_);
|
||||
- return 0;
|
||||
- }
|
||||
-
|
||||
- /* The waiter is now free to continue */
|
||||
- WRITE_ONCE(console_waiter, false);
|
||||
-
|
||||
- spin_release(&console_owner_dep_map, _THIS_IP_);
|
||||
-
|
||||
- /*
|
||||
- * Hand off console_lock to waiter. The waiter will perform
|
||||
- * the up(). After this, the waiter is the console_lock owner.
|
||||
- */
|
||||
- mutex_release(&console_lock_dep_map, _THIS_IP_);
|
||||
- return 1;
|
||||
-}
|
||||
-
|
||||
-/**
|
||||
- * console_trylock_spinning - try to get console_lock by busy waiting
|
||||
- *
|
||||
- * This allows to busy wait for the console_lock when the current
|
||||
- * owner is running in specially marked sections. It means that
|
||||
- * the current owner is running and cannot reschedule until it
|
||||
- * is ready to lose the lock.
|
||||
- *
|
||||
- * Return: 1 if we got the lock, 0 othrewise
|
||||
- */
|
||||
-static int console_trylock_spinning(void)
|
||||
-{
|
||||
- struct task_struct *owner = NULL;
|
||||
- bool waiter;
|
||||
- bool spin = false;
|
||||
- unsigned long flags;
|
||||
-
|
||||
- if (console_trylock())
|
||||
- return 1;
|
||||
-
|
||||
- printk_safe_enter_irqsave(flags);
|
||||
-
|
||||
- raw_spin_lock(&console_owner_lock);
|
||||
- owner = READ_ONCE(console_owner);
|
||||
- waiter = READ_ONCE(console_waiter);
|
||||
- if (!waiter && owner && owner != current) {
|
||||
- WRITE_ONCE(console_waiter, true);
|
||||
- spin = true;
|
||||
- }
|
||||
- raw_spin_unlock(&console_owner_lock);
|
||||
-
|
||||
- /*
|
||||
- * If there is an active printk() writing to the
|
||||
- * consoles, instead of having it write our data too,
|
||||
- * see if we can offload that load from the active
|
||||
- * printer, and do some printing ourselves.
|
||||
- * Go into a spin only if there isn't already a waiter
|
||||
- * spinning, and there is an active printer, and
|
||||
- * that active printer isn't us (recursive printk?).
|
||||
- */
|
||||
- if (!spin) {
|
||||
- printk_safe_exit_irqrestore(flags);
|
||||
- return 0;
|
||||
- }
|
||||
-
|
||||
- /* We spin waiting for the owner to release us */
|
||||
- spin_acquire(&console_owner_dep_map, 0, 0, _THIS_IP_);
|
||||
- /* Owner will clear console_waiter on hand off */
|
||||
- while (READ_ONCE(console_waiter))
|
||||
- cpu_relax();
|
||||
- spin_release(&console_owner_dep_map, _THIS_IP_);
|
||||
-
|
||||
- printk_safe_exit_irqrestore(flags);
|
||||
- /*
|
||||
- * The owner passed the console lock to us.
|
||||
- * Since we did not spin on console lock, annotate
|
||||
- * this as a trylock. Otherwise lockdep will
|
||||
- * complain.
|
||||
- */
|
||||
- mutex_acquire(&console_lock_dep_map, 0, 1, _THIS_IP_);
|
||||
-
|
||||
- return 1;
|
||||
-}
|
||||
-
|
||||
-/*
|
||||
- * Call the console drivers, asking them to write out
|
||||
- * log_buf[start] to log_buf[end - 1].
|
||||
- * The console_lock must be held.
|
||||
- */
|
||||
-static void call_console_drivers(const char *ext_text, size_t ext_len,
|
||||
- const char *text, size_t len)
|
||||
-{
|
||||
- static char dropped_text[64];
|
||||
- size_t dropped_len = 0;
|
||||
- struct console *con;
|
||||
-
|
||||
- trace_console_rcuidle(text, len);
|
||||
-
|
||||
- if (!console_drivers)
|
||||
- return;
|
||||
-
|
||||
- if (console_dropped) {
|
||||
- dropped_len = snprintf(dropped_text, sizeof(dropped_text),
|
||||
- "** %lu printk messages dropped **\n",
|
||||
- console_dropped);
|
||||
- console_dropped = 0;
|
||||
- }
|
||||
-
|
||||
- for_each_console(con) {
|
||||
- if (exclusive_console && con != exclusive_console)
|
||||
- continue;
|
||||
- if (!(con->flags & CON_ENABLED))
|
||||
- continue;
|
||||
- if (!con->write)
|
||||
- continue;
|
||||
- if (!cpu_online(smp_processor_id()) &&
|
||||
- !(con->flags & CON_ANYTIME))
|
||||
- continue;
|
||||
- if (kernel_sync_mode())
|
||||
- continue;
|
||||
- if (con->flags & CON_EXTENDED)
|
||||
- con->write(con, ext_text, ext_len);
|
||||
- else {
|
||||
- if (dropped_len)
|
||||
- con->write(con, dropped_text, dropped_len);
|
||||
- con->write(con, text, len);
|
||||
- }
|
||||
- }
|
||||
-}
|
||||
-
|
||||
#ifdef CONFIG_PRINTK_NMI
|
||||
#define NUM_RECURSION_CTX 2
|
||||
#else
|
||||
@@ -2284,39 +2091,16 @@ asmlinkage int vprintk_emit(int facility
|
||||
const char *fmt, va_list args)
|
||||
{
|
||||
int printed_len;
|
||||
- bool in_sched = false;
|
||||
|
||||
/* Suppress unimportant messages after panic happens */
|
||||
if (unlikely(suppress_printk))
|
||||
return 0;
|
||||
|
||||
- if (level == LOGLEVEL_SCHED) {
|
||||
+ if (level == LOGLEVEL_SCHED)
|
||||
level = LOGLEVEL_DEFAULT;
|
||||
- in_sched = true;
|
||||
- }
|
||||
-
|
||||
- printk_delay(level);
|
||||
|
||||
printed_len = vprintk_store(facility, level, dev_info, fmt, args);
|
||||
|
||||
- /* If called from the scheduler, we can not call up(). */
|
||||
- if (!in_sched) {
|
||||
- /*
|
||||
- * Disable preemption to avoid being preempted while holding
|
||||
- * console_sem which would prevent anyone from printing to
|
||||
- * console
|
||||
- */
|
||||
- preempt_disable();
|
||||
- /*
|
||||
- * Try to acquire and then immediately release the console
|
||||
- * semaphore. The release will print out buffers and wake up
|
||||
- * /dev/kmsg and syslog() users.
|
||||
- */
|
||||
- if (console_trylock_spinning())
|
||||
- console_unlock();
|
||||
- preempt_enable();
|
||||
- }
|
||||
-
|
||||
wake_up_klogd();
|
||||
return printed_len;
|
||||
}
|
||||
@@ -2368,38 +2152,158 @@ asmlinkage __visible int printk(const ch
|
||||
}
|
||||
EXPORT_SYMBOL(printk);
|
||||
|
||||
-#else /* CONFIG_PRINTK */
|
||||
+static int printk_kthread_func(void *data)
|
||||
+{
|
||||
+ struct console *con = data;
|
||||
+ unsigned long dropped = 0;
|
||||
+ char *dropped_text = NULL;
|
||||
+ struct printk_info info;
|
||||
+ struct printk_record r;
|
||||
+ char *ext_text = NULL;
|
||||
+ size_t dropped_len;
|
||||
+ int ret = -ENOMEM;
|
||||
+ char *text = NULL;
|
||||
+ char *write_text;
|
||||
+ u64 printk_seq;
|
||||
+ size_t len;
|
||||
+ int error;
|
||||
+ u64 seq;
|
||||
|
||||
-#define printk_time false
|
||||
+ if (con->flags & CON_EXTENDED) {
|
||||
+ ext_text = kmalloc(CONSOLE_EXT_LOG_MAX, GFP_KERNEL);
|
||||
+ if (!ext_text)
|
||||
+ goto out;
|
||||
+ }
|
||||
+ text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL);
|
||||
+ dropped_text = kmalloc(64, GFP_KERNEL);
|
||||
+ if (!text || !dropped_text)
|
||||
+ goto out;
|
||||
|
||||
-#define prb_read_valid(rb, seq, r) false
|
||||
-#define prb_first_valid_seq(rb) 0
|
||||
+ if (con->flags & CON_EXTENDED)
|
||||
+ write_text = ext_text;
|
||||
+ else
|
||||
+ write_text = text;
|
||||
|
||||
-#define kernel_sync_mode() false
|
||||
+ seq = atomic64_read(&con->printk_seq);
|
||||
|
||||
-static u64 syslog_seq;
|
||||
-static atomic64_t console_seq = ATOMIC64_INIT(0);
|
||||
-static u64 exclusive_console_stop_seq;
|
||||
-static unsigned long console_dropped;
|
||||
+ prb_rec_init_rd(&r, &info, text, LOG_LINE_MAX + PREFIX_MAX);
|
||||
+
|
||||
+ for (;;) {
|
||||
+ error = wait_event_interruptible(log_wait,
|
||||
+ prb_read_valid(prb, seq, &r) || kthread_should_stop());
|
||||
+
|
||||
+ if (kthread_should_stop())
|
||||
+ break;
|
||||
+
|
||||
+ if (error)
|
||||
+ continue;
|
||||
+
|
||||
+ if (seq != r.info->seq) {
|
||||
+ dropped += r.info->seq - seq;
|
||||
+ seq = r.info->seq;
|
||||
+ }
|
||||
+
|
||||
+ seq++;
|
||||
+
|
||||
+ if (!(con->flags & CON_ENABLED))
|
||||
+ continue;
|
||||
+
|
||||
+ if (suppress_message_printing(r.info->level))
|
||||
+ continue;
|
||||
+
|
||||
+ if (con->flags & CON_EXTENDED) {
|
||||
+ len = info_print_ext_header(ext_text,
|
||||
+ CONSOLE_EXT_LOG_MAX,
|
||||
+ r.info);
|
||||
+ len += msg_print_ext_body(ext_text + len,
|
||||
+ CONSOLE_EXT_LOG_MAX - len,
|
||||
+ &r.text_buf[0], r.info->text_len,
|
||||
+ &r.info->dev_info);
|
||||
+ } else {
|
||||
+ len = record_print_text(&r,
|
||||
+ console_msg_format & MSG_FORMAT_SYSLOG,
|
||||
+ printk_time);
|
||||
+ }
|
||||
+
|
||||
+ printk_seq = atomic64_read(&con->printk_seq);
|
||||
+
|
||||
+ console_lock();
|
||||
+ console_may_schedule = 0;
|
||||
|
||||
-static size_t record_print_text(const struct printk_record *r,
|
||||
- bool syslog, bool time)
|
||||
+ if (kernel_sync_mode() && con->write_atomic) {
|
||||
+ console_unlock();
|
||||
+ break;
|
||||
+ }
|
||||
+
|
||||
+ if (!(con->flags & CON_EXTENDED) && dropped) {
|
||||
+ dropped_len = snprintf(dropped_text, 64,
|
||||
+ "** %lu printk messages dropped **\n",
|
||||
+ dropped);
|
||||
+ dropped = 0;
|
||||
+
|
||||
+ con->write(con, dropped_text, dropped_len);
|
||||
+ printk_delay(r.info->level);
|
||||
+ }
|
||||
+
|
||||
+ con->write(con, write_text, len);
|
||||
+ if (len)
|
||||
+ printk_delay(r.info->level);
|
||||
+
|
||||
+ atomic64_cmpxchg_relaxed(&con->printk_seq, printk_seq, seq);
|
||||
+
|
||||
+ console_unlock();
|
||||
+ }
|
||||
+out:
|
||||
+ kfree(dropped_text);
|
||||
+ kfree(text);
|
||||
+ kfree(ext_text);
|
||||
+ pr_info("%sconsole [%s%d]: printing thread stopped\n",
|
||||
+ (con->flags & CON_BOOT) ? "boot" : "",
|
||||
+ con->name, con->index);
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+/* Must be called within console_lock(). */
|
||||
+static void start_printk_kthread(struct console *con)
|
||||
{
|
||||
- return 0;
|
||||
+ con->thread = kthread_run(printk_kthread_func, con,
|
||||
+ "pr/%s%d", con->name, con->index);
|
||||
+ if (IS_ERR(con->thread)) {
|
||||
+ pr_err("%sconsole [%s%d]: unable to start printing thread\n",
|
||||
+ (con->flags & CON_BOOT) ? "boot" : "",
|
||||
+ con->name, con->index);
|
||||
+ return;
|
||||
+ }
|
||||
+ pr_info("%sconsole [%s%d]: printing thread started\n",
|
||||
+ (con->flags & CON_BOOT) ? "boot" : "",
|
||||
+ con->name, con->index);
|
||||
}
|
||||
-static ssize_t info_print_ext_header(char *buf, size_t size,
|
||||
- struct printk_info *info)
|
||||
+
|
||||
+/* protected by console_lock */
|
||||
+static bool kthreads_started;
|
||||
+
|
||||
+/* Must be called within console_lock(). */
|
||||
+static void console_try_thread(struct console *con)
|
||||
{
|
||||
- return 0;
|
||||
+ if (kthreads_started) {
|
||||
+ start_printk_kthread(con);
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ /*
|
||||
+ * The printing threads have not been started yet. If this console
|
||||
+ * can print synchronously, print all unprinted messages.
|
||||
+ */
|
||||
+ if (console_can_sync(con))
|
||||
+ print_sync_until(con, prb_next_seq(prb));
|
||||
}
|
||||
-static ssize_t msg_print_ext_body(char *buf, size_t size,
|
||||
- char *text, size_t text_len,
|
||||
- struct dev_printk_info *dev_info) { return 0; }
|
||||
-static void console_lock_spinning_enable(void) { }
|
||||
-static int console_lock_spinning_disable_and_check(void) { return 0; }
|
||||
-static void call_console_drivers(const char *ext_text, size_t ext_len,
|
||||
- const char *text, size_t len) {}
|
||||
-static bool suppress_message_printing(int level) { return false; }
|
||||
+
|
||||
+#else /* CONFIG_PRINTK */
|
||||
+
|
||||
+#define prb_first_valid_seq(rb) 0
|
||||
+#define prb_next_seq(rb) 0
|
||||
+
|
||||
+#define console_try_thread(con)
|
||||
|
||||
#endif /* CONFIG_PRINTK */
|
||||
|
||||
@@ -2644,36 +2548,6 @@ int is_console_locked(void)
|
||||
}
|
||||
EXPORT_SYMBOL(is_console_locked);
|
||||
|
||||
-/*
|
||||
- * Check if we have any console that is capable of printing while cpu is
|
||||
- * booting or shutting down. Requires console_sem.
|
||||
- */
|
||||
-static int have_callable_console(void)
|
||||
-{
|
||||
- struct console *con;
|
||||
-
|
||||
- for_each_console(con)
|
||||
- if ((con->flags & CON_ENABLED) &&
|
||||
- (con->flags & CON_ANYTIME))
|
||||
- return 1;
|
||||
-
|
||||
- return 0;
|
||||
-}
|
||||
-
|
||||
-/*
|
||||
- * Can we actually use the console at this time on this cpu?
|
||||
- *
|
||||
- * Console drivers may assume that per-cpu resources have been allocated. So
|
||||
- * unless they're explicitly marked as being able to cope (CON_ANYTIME) don't
|
||||
- * call them until this CPU is officially up.
|
||||
- */
|
||||
-static inline int can_use_console(void)
|
||||
-{
|
||||
- if (kernel_sync_mode())
|
||||
- return false;
|
||||
- return cpu_online(raw_smp_processor_id()) || have_callable_console();
|
||||
-}
|
||||
-
|
||||
/**
|
||||
* console_unlock - unlock the console system
|
||||
*
|
||||
@@ -2690,131 +2564,14 @@ static inline int can_use_console(void)
|
||||
*/
|
||||
void console_unlock(void)
|
||||
{
|
||||
- static char ext_text[CONSOLE_EXT_LOG_MAX];
|
||||
- static char text[CONSOLE_LOG_MAX];
|
||||
- bool do_cond_resched, retry;
|
||||
- struct printk_info info;
|
||||
- struct printk_record r;
|
||||
- u64 seq;
|
||||
-
|
||||
if (console_suspended) {
|
||||
up_console_sem();
|
||||
return;
|
||||
}
|
||||
|
||||
- prb_rec_init_rd(&r, &info, text, sizeof(text));
|
||||
-
|
||||
- /*
|
||||
- * Console drivers are called with interrupts disabled, so
|
||||
- * @console_may_schedule should be cleared before; however, we may
|
||||
- * end up dumping a lot of lines, for example, if called from
|
||||
- * console registration path, and should invoke cond_resched()
|
||||
- * between lines if allowable. Not doing so can cause a very long
|
||||
- * scheduling stall on a slow console leading to RCU stall and
|
||||
- * softlockup warnings which exacerbate the issue with more
|
||||
- * messages practically incapacitating the system.
|
||||
- *
|
||||
- * console_trylock() is not able to detect the preemptive
|
||||
- * context reliably. Therefore the value must be stored before
|
||||
- * and cleared after the "again" goto label.
|
||||
- */
|
||||
- do_cond_resched = console_may_schedule;
|
||||
-again:
|
||||
- console_may_schedule = 0;
|
||||
-
|
||||
- /*
|
||||
- * We released the console_sem lock, so we need to recheck if
|
||||
- * cpu is online and (if not) is there at least one CON_ANYTIME
|
||||
- * console.
|
||||
- */
|
||||
- if (!can_use_console()) {
|
||||
- console_locked = 0;
|
||||
- up_console_sem();
|
||||
- return;
|
||||
- }
|
||||
-
|
||||
- for (;;) {
|
||||
- size_t ext_len = 0;
|
||||
- size_t len;
|
||||
-
|
||||
-skip:
|
||||
- seq = atomic64_read(&console_seq);
|
||||
- if (!prb_read_valid(prb, seq, &r))
|
||||
- break;
|
||||
-
|
||||
- if (seq != r.info->seq) {
|
||||
- console_dropped += r.info->seq - seq;
|
||||
- atomic64_set(&console_seq, r.info->seq);
|
||||
- seq = r.info->seq;
|
||||
- }
|
||||
-
|
||||
- if (suppress_message_printing(r.info->level)) {
|
||||
- /*
|
||||
- * Skip record we have buffered and already printed
|
||||
- * directly to the console when we received it, and
|
||||
- * record that has level above the console loglevel.
|
||||
- */
|
||||
- atomic64_set(&console_seq, seq + 1);
|
||||
- goto skip;
|
||||
- }
|
||||
-
|
||||
- /* Output to all consoles once old messages replayed. */
|
||||
- if (unlikely(exclusive_console &&
|
||||
- seq >= exclusive_console_stop_seq)) {
|
||||
- exclusive_console = NULL;
|
||||
- }
|
||||
-
|
||||
- /*
|
||||
- * Handle extended console text first because later
|
||||
- * record_print_text() will modify the record buffer in-place.
|
||||
- */
|
||||
- if (nr_ext_console_drivers) {
|
||||
- ext_len = info_print_ext_header(ext_text,
|
||||
- sizeof(ext_text),
|
||||
- r.info);
|
||||
- ext_len += msg_print_ext_body(ext_text + ext_len,
|
||||
- sizeof(ext_text) - ext_len,
|
||||
- &r.text_buf[0],
|
||||
- r.info->text_len,
|
||||
- &r.info->dev_info);
|
||||
- }
|
||||
- len = record_print_text(&r,
|
||||
- console_msg_format & MSG_FORMAT_SYSLOG,
|
||||
- printk_time);
|
||||
- atomic64_set(&console_seq, seq + 1);
|
||||
-
|
||||
- /*
|
||||
- * While actively printing out messages, if another printk()
|
||||
- * were to occur on another CPU, it may wait for this one to
|
||||
- * finish. This task can not be preempted if there is a
|
||||
- * waiter waiting to take over.
|
||||
- */
|
||||
- console_lock_spinning_enable();
|
||||
-
|
||||
- stop_critical_timings(); /* don't trace print latency */
|
||||
- call_console_drivers(ext_text, ext_len, text, len);
|
||||
- start_critical_timings();
|
||||
-
|
||||
- if (console_lock_spinning_disable_and_check())
|
||||
- return;
|
||||
-
|
||||
- if (do_cond_resched)
|
||||
- cond_resched();
|
||||
- }
|
||||
-
|
||||
console_locked = 0;
|
||||
|
||||
up_console_sem();
|
||||
-
|
||||
- /*
|
||||
- * Someone could have filled up the buffer again, so re-check if there's
|
||||
- * something to flush. In case we cannot trylock the console_sem again,
|
||||
- * there's a new owner and the console_unlock() from them will do the
|
||||
- * flush, no worries.
|
||||
- */
|
||||
- retry = prb_read_valid(prb, atomic64_read(&console_seq), NULL);
|
||||
- if (retry && console_trylock())
|
||||
- goto again;
|
||||
}
|
||||
EXPORT_SYMBOL(console_unlock);
|
||||
|
||||
@@ -2864,18 +2621,20 @@ void console_unblank(void)
|
||||
*/
|
||||
void console_flush_on_panic(enum con_flush_mode mode)
|
||||
{
|
||||
- /*
|
||||
- * If someone else is holding the console lock, trylock will fail
|
||||
- * and may_schedule may be set. Ignore and proceed to unlock so
|
||||
- * that messages are flushed out. As this can be called from any
|
||||
- * context and we don't want to get preempted while flushing,
|
||||
- * ensure may_schedule is cleared.
|
||||
- */
|
||||
- console_trylock();
|
||||
+ struct console *c;
|
||||
+ u64 seq;
|
||||
+
|
||||
+ if (!console_trylock())
|
||||
+ return;
|
||||
+
|
||||
console_may_schedule = 0;
|
||||
|
||||
- if (mode == CONSOLE_REPLAY_ALL)
|
||||
- atomic64_set(&console_seq, prb_first_valid_seq(prb));
|
||||
+ if (mode == CONSOLE_REPLAY_ALL) {
|
||||
+ seq = prb_first_valid_seq(prb);
|
||||
+ for_each_console(c)
|
||||
+ atomic64_set(&c->printk_seq, seq);
|
||||
+ }
|
||||
+
|
||||
console_unlock();
|
||||
}
|
||||
|
||||
@@ -3010,7 +2769,6 @@ static int try_enable_new_console(struct
|
||||
*/
|
||||
void register_console(struct console *newcon)
|
||||
{
|
||||
- unsigned long flags;
|
||||
struct console *bcon = NULL;
|
||||
int err;
|
||||
|
||||
@@ -3034,6 +2792,8 @@ void register_console(struct console *ne
|
||||
}
|
||||
}
|
||||
|
||||
+ newcon->thread = NULL;
|
||||
+
|
||||
if (console_drivers && console_drivers->flags & CON_BOOT)
|
||||
bcon = console_drivers;
|
||||
|
||||
@@ -3098,27 +2858,12 @@ void register_console(struct console *ne
|
||||
if (newcon->flags & CON_EXTENDED)
|
||||
nr_ext_console_drivers++;
|
||||
|
||||
- if (newcon->flags & CON_PRINTBUFFER) {
|
||||
- /*
|
||||
- * console_unlock(); will print out the buffered messages
|
||||
- * for us.
|
||||
- *
|
||||
- * We're about to replay the log buffer. Only do this to the
|
||||
- * just-registered console to avoid excessive message spam to
|
||||
- * the already-registered consoles.
|
||||
- *
|
||||
- * Set exclusive_console with disabled interrupts to reduce
|
||||
- * race window with eventual console_flush_on_panic() that
|
||||
- * ignores console_lock.
|
||||
- */
|
||||
- exclusive_console = newcon;
|
||||
- exclusive_console_stop_seq = atomic64_read(&console_seq);
|
||||
+ if (newcon->flags & CON_PRINTBUFFER)
|
||||
+ atomic64_set(&newcon->printk_seq, 0);
|
||||
+ else
|
||||
+ atomic64_set(&newcon->printk_seq, prb_next_seq(prb));
|
||||
|
||||
- /* Get a consistent copy of @syslog_seq. */
|
||||
- spin_lock_irqsave(&syslog_lock, flags);
|
||||
- atomic64_set(&console_seq, syslog_seq);
|
||||
- spin_unlock_irqrestore(&syslog_lock, flags);
|
||||
- }
|
||||
+ console_try_thread(newcon);
|
||||
console_unlock();
|
||||
console_sysfs_notify();
|
||||
|
||||
@@ -3192,6 +2937,9 @@ int unregister_console(struct console *c
|
||||
console_unlock();
|
||||
console_sysfs_notify();
|
||||
|
||||
+ if (console->thread && !IS_ERR(console->thread))
|
||||
+ kthread_stop(console->thread);
|
||||
+
|
||||
if (console->exit)
|
||||
res = console->exit(console);
|
||||
|
||||
@@ -3274,6 +3022,15 @@ static int __init printk_late_init(void)
|
||||
unregister_console(con);
|
||||
}
|
||||
}
|
||||
+
|
||||
+#ifdef CONFIG_PRINTK
|
||||
+ console_lock();
|
||||
+ for_each_console(con)
|
||||
+ start_printk_kthread(con);
|
||||
+ kthreads_started = true;
|
||||
+ console_unlock();
|
||||
+#endif
|
||||
+
|
||||
ret = cpuhp_setup_state_nocalls(CPUHP_PRINTK_DEAD, "printk:dead", NULL,
|
||||
console_cpu_notify);
|
||||
WARN_ON(ret < 0);
|
||||
@@ -3289,7 +3046,6 @@ late_initcall(printk_late_init);
|
||||
* Delayed printk version, for scheduler-internal messages:
|
||||
*/
|
||||
#define PRINTK_PENDING_WAKEUP 0x01
|
||||
-#define PRINTK_PENDING_OUTPUT 0x02
|
||||
|
||||
static DEFINE_PER_CPU(int, printk_pending);
|
||||
|
||||
@@ -3297,14 +3053,8 @@ static void wake_up_klogd_work_func(stru
|
||||
{
|
||||
int pending = __this_cpu_xchg(printk_pending, 0);
|
||||
|
||||
- if (pending & PRINTK_PENDING_OUTPUT) {
|
||||
- /* If trylock fails, someone else is doing the printing */
|
||||
- if (console_trylock())
|
||||
- console_unlock();
|
||||
- }
|
||||
-
|
||||
if (pending & PRINTK_PENDING_WAKEUP)
|
||||
- wake_up_interruptible(&log_wait);
|
||||
+ wake_up_interruptible_all(&log_wait);
|
||||
}
|
||||
|
||||
static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) =
|
||||
@@ -3325,13 +3075,6 @@ void wake_up_klogd(void)
|
||||
|
||||
void defer_console_output(void)
|
||||
{
|
||||
- if (!printk_percpu_data_ready())
|
||||
- return;
|
||||
-
|
||||
- preempt_disable();
|
||||
- __this_cpu_or(printk_pending, PRINTK_PENDING_OUTPUT);
|
||||
- irq_work_queue(this_cpu_ptr(&wake_up_klogd_work));
|
||||
- preempt_enable();
|
||||
}
|
||||
|
||||
int vprintk_deferred(const char *fmt, va_list args)
|
||||
@@ -0,0 +1,407 @@
|
||||
From: John Ogness <john.ogness@linutronix.de>
|
||||
Date: Mon, 30 Nov 2020 01:42:08 +0106
|
||||
Subject: [PATCH 26/28] printk: remove deferred printing
|
||||
|
||||
Since printing occurs either atomically or from the printing
|
||||
kthread, there is no need for any deferring or tracking possible
|
||||
recursion paths. Remove all printk context tracking.
|
||||
|
||||
Signed-off-by: John Ogness <john.ogness@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
arch/arm/kernel/smp.c | 2 -
|
||||
arch/powerpc/kexec/crash.c | 3 -
|
||||
include/linux/hardirq.h | 2 -
|
||||
include/linux/printk.h | 12 ------
|
||||
kernel/printk/Makefile | 1
|
||||
kernel/printk/internal.h | 70 -----------------------------------
|
||||
kernel/printk/printk.c | 58 +++++++++++------------------
|
||||
kernel/printk/printk_safe.c | 88 --------------------------------------------
|
||||
kernel/trace/trace.c | 2 -
|
||||
9 files changed, 22 insertions(+), 216 deletions(-)
|
||||
delete mode 100644 kernel/printk/internal.h
|
||||
delete mode 100644 kernel/printk/printk_safe.c
|
||||
|
||||
--- a/arch/arm/kernel/smp.c
|
||||
+++ b/arch/arm/kernel/smp.c
|
||||
@@ -671,9 +671,7 @@ static void do_handle_IPI(int ipinr)
|
||||
break;
|
||||
|
||||
case IPI_CPU_BACKTRACE:
|
||||
- printk_nmi_enter();
|
||||
nmi_cpu_backtrace(get_irq_regs());
|
||||
- printk_nmi_exit();
|
||||
break;
|
||||
|
||||
default:
|
||||
--- a/arch/powerpc/kexec/crash.c
|
||||
+++ b/arch/powerpc/kexec/crash.c
|
||||
@@ -311,9 +311,6 @@ void default_machine_crash_shutdown(stru
|
||||
unsigned int i;
|
||||
int (*old_handler)(struct pt_regs *regs);
|
||||
|
||||
- /* Avoid hardlocking with irresponsive CPU holding logbuf_lock */
|
||||
- printk_nmi_enter();
|
||||
-
|
||||
/*
|
||||
* This function is only called after the system
|
||||
* has panicked or is otherwise in a critical state.
|
||||
--- a/include/linux/hardirq.h
|
||||
+++ b/include/linux/hardirq.h
|
||||
@@ -115,7 +115,6 @@ extern void rcu_nmi_exit(void);
|
||||
do { \
|
||||
lockdep_off(); \
|
||||
arch_nmi_enter(); \
|
||||
- printk_nmi_enter(); \
|
||||
BUG_ON(in_nmi() == NMI_MASK); \
|
||||
__preempt_count_add(NMI_OFFSET + HARDIRQ_OFFSET); \
|
||||
} while (0)
|
||||
@@ -134,7 +133,6 @@ extern void rcu_nmi_exit(void);
|
||||
do { \
|
||||
BUG_ON(!in_nmi()); \
|
||||
__preempt_count_sub(NMI_OFFSET + HARDIRQ_OFFSET); \
|
||||
- printk_nmi_exit(); \
|
||||
arch_nmi_exit(); \
|
||||
lockdep_on(); \
|
||||
} while (0)
|
||||
--- a/include/linux/printk.h
|
||||
+++ b/include/linux/printk.h
|
||||
@@ -155,18 +155,6 @@ static inline __printf(1, 2) __cold
|
||||
void early_printk(const char *s, ...) { }
|
||||
#endif
|
||||
|
||||
-#ifdef CONFIG_PRINTK_NMI
|
||||
-extern void printk_nmi_enter(void);
|
||||
-extern void printk_nmi_exit(void);
|
||||
-extern void printk_nmi_direct_enter(void);
|
||||
-extern void printk_nmi_direct_exit(void);
|
||||
-#else
|
||||
-static inline void printk_nmi_enter(void) { }
|
||||
-static inline void printk_nmi_exit(void) { }
|
||||
-static inline void printk_nmi_direct_enter(void) { }
|
||||
-static inline void printk_nmi_direct_exit(void) { }
|
||||
-#endif /* PRINTK_NMI */
|
||||
-
|
||||
struct dev_printk_info;
|
||||
|
||||
#ifdef CONFIG_PRINTK
|
||||
--- a/kernel/printk/Makefile
|
||||
+++ b/kernel/printk/Makefile
|
||||
@@ -1,5 +1,4 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
obj-y = printk.o
|
||||
-obj-$(CONFIG_PRINTK) += printk_safe.o
|
||||
obj-$(CONFIG_A11Y_BRAILLE_CONSOLE) += braille.o
|
||||
obj-$(CONFIG_PRINTK) += printk_ringbuffer.o
|
||||
--- a/kernel/printk/internal.h
|
||||
+++ /dev/null
|
||||
@@ -1,70 +0,0 @@
|
||||
-/* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
-/*
|
||||
- * internal.h - printk internal definitions
|
||||
- */
|
||||
-#include <linux/percpu.h>
|
||||
-
|
||||
-#ifdef CONFIG_PRINTK
|
||||
-
|
||||
-#define PRINTK_SAFE_CONTEXT_MASK 0x007ffffff
|
||||
-#define PRINTK_NMI_DIRECT_CONTEXT_MASK 0x008000000
|
||||
-#define PRINTK_NMI_CONTEXT_MASK 0xff0000000
|
||||
-
|
||||
-#define PRINTK_NMI_CONTEXT_OFFSET 0x010000000
|
||||
-
|
||||
-__printf(4, 0)
|
||||
-int vprintk_store(int facility, int level,
|
||||
- const struct dev_printk_info *dev_info,
|
||||
- const char *fmt, va_list args);
|
||||
-
|
||||
-__printf(1, 0) int vprintk_default(const char *fmt, va_list args);
|
||||
-__printf(1, 0) int vprintk_deferred(const char *fmt, va_list args);
|
||||
-__printf(1, 0) int vprintk_func(const char *fmt, va_list args);
|
||||
-void __printk_safe_enter(void);
|
||||
-void __printk_safe_exit(void);
|
||||
-
|
||||
-bool printk_percpu_data_ready(void);
|
||||
-
|
||||
-#define printk_safe_enter_irqsave(flags) \
|
||||
- do { \
|
||||
- local_irq_save(flags); \
|
||||
- __printk_safe_enter(); \
|
||||
- } while (0)
|
||||
-
|
||||
-#define printk_safe_exit_irqrestore(flags) \
|
||||
- do { \
|
||||
- __printk_safe_exit(); \
|
||||
- local_irq_restore(flags); \
|
||||
- } while (0)
|
||||
-
|
||||
-#define printk_safe_enter_irq() \
|
||||
- do { \
|
||||
- local_irq_disable(); \
|
||||
- __printk_safe_enter(); \
|
||||
- } while (0)
|
||||
-
|
||||
-#define printk_safe_exit_irq() \
|
||||
- do { \
|
||||
- __printk_safe_exit(); \
|
||||
- local_irq_enable(); \
|
||||
- } while (0)
|
||||
-
|
||||
-void defer_console_output(void);
|
||||
-
|
||||
-#else
|
||||
-
|
||||
-__printf(1, 0) int vprintk_func(const char *fmt, va_list args) { return 0; }
|
||||
-
|
||||
-/*
|
||||
- * In !PRINTK builds we still export console_sem
|
||||
- * semaphore and some of console functions (console_unlock()/etc.), so
|
||||
- * printk-safe must preserve the existing local IRQ guarantees.
|
||||
- */
|
||||
-#define printk_safe_enter_irqsave(flags) local_irq_save(flags)
|
||||
-#define printk_safe_exit_irqrestore(flags) local_irq_restore(flags)
|
||||
-
|
||||
-#define printk_safe_enter_irq() local_irq_disable()
|
||||
-#define printk_safe_exit_irq() local_irq_enable()
|
||||
-
|
||||
-static inline bool printk_percpu_data_ready(void) { return false; }
|
||||
-#endif /* CONFIG_PRINTK */
|
||||
--- a/kernel/printk/printk.c
|
||||
+++ b/kernel/printk/printk.c
|
||||
@@ -45,6 +45,7 @@
|
||||
#include <linux/ctype.h>
|
||||
#include <linux/uio.h>
|
||||
#include <linux/kthread.h>
|
||||
+#include <linux/kdb.h>
|
||||
#include <linux/clocksource.h>
|
||||
#include <linux/sched/clock.h>
|
||||
#include <linux/sched/debug.h>
|
||||
@@ -60,7 +61,6 @@
|
||||
#include "printk_ringbuffer.h"
|
||||
#include "console_cmdline.h"
|
||||
#include "braille.h"
|
||||
-#include "internal.h"
|
||||
|
||||
int console_printk[4] = {
|
||||
CONSOLE_LOGLEVEL_DEFAULT, /* console_loglevel */
|
||||
@@ -227,19 +227,7 @@ static int nr_ext_console_drivers;
|
||||
|
||||
static int __down_trylock_console_sem(unsigned long ip)
|
||||
{
|
||||
- int lock_failed;
|
||||
- unsigned long flags;
|
||||
-
|
||||
- /*
|
||||
- * Here and in __up_console_sem() we need to be in safe mode,
|
||||
- * because spindump/WARN/etc from under console ->lock will
|
||||
- * deadlock in printk()->down_trylock_console_sem() otherwise.
|
||||
- */
|
||||
- printk_safe_enter_irqsave(flags);
|
||||
- lock_failed = down_trylock(&console_sem);
|
||||
- printk_safe_exit_irqrestore(flags);
|
||||
-
|
||||
- if (lock_failed)
|
||||
+ if (down_trylock(&console_sem))
|
||||
return 1;
|
||||
mutex_acquire(&console_lock_dep_map, 0, 1, ip);
|
||||
return 0;
|
||||
@@ -248,13 +236,9 @@ static int __down_trylock_console_sem(un
|
||||
|
||||
static void __up_console_sem(unsigned long ip)
|
||||
{
|
||||
- unsigned long flags;
|
||||
-
|
||||
mutex_release(&console_lock_dep_map, ip);
|
||||
|
||||
- printk_safe_enter_irqsave(flags);
|
||||
up(&console_sem);
|
||||
- printk_safe_exit_irqrestore(flags);
|
||||
}
|
||||
#define up_console_sem() __up_console_sem(_RET_IP_)
|
||||
|
||||
@@ -426,7 +410,7 @@ static struct printk_ringbuffer *prb = &
|
||||
*/
|
||||
static bool __printk_percpu_data_ready __read_mostly;
|
||||
|
||||
-bool printk_percpu_data_ready(void)
|
||||
+static bool printk_percpu_data_ready(void)
|
||||
{
|
||||
return __printk_percpu_data_ready;
|
||||
}
|
||||
@@ -1060,7 +1044,6 @@ void __init setup_log_buf(int early)
|
||||
struct printk_record r;
|
||||
size_t new_descs_size;
|
||||
size_t new_infos_size;
|
||||
- unsigned long flags;
|
||||
char *new_log_buf;
|
||||
unsigned int free;
|
||||
u64 seq;
|
||||
@@ -1958,9 +1941,9 @@ static u16 printk_sprint(char *text, u16
|
||||
}
|
||||
|
||||
__printf(4, 0)
|
||||
-int vprintk_store(int facility, int level,
|
||||
- const struct dev_printk_info *dev_info,
|
||||
- const char *fmt, va_list args)
|
||||
+static int vprintk_store(int facility, int level,
|
||||
+ const struct dev_printk_info *dev_info,
|
||||
+ const char *fmt, va_list args)
|
||||
{
|
||||
const u32 caller_id = printk_caller_id();
|
||||
struct prb_reserved_entry e;
|
||||
@@ -2106,11 +2089,22 @@ asmlinkage int vprintk_emit(int facility
|
||||
}
|
||||
EXPORT_SYMBOL(vprintk_emit);
|
||||
|
||||
-int vprintk_default(const char *fmt, va_list args)
|
||||
+__printf(1, 0)
|
||||
+static int vprintk_default(const char *fmt, va_list args)
|
||||
{
|
||||
return vprintk_emit(0, LOGLEVEL_DEFAULT, NULL, fmt, args);
|
||||
}
|
||||
-EXPORT_SYMBOL_GPL(vprintk_default);
|
||||
+
|
||||
+__printf(1, 0)
|
||||
+static int vprintk_func(const char *fmt, va_list args)
|
||||
+{
|
||||
+#ifdef CONFIG_KGDB_KDB
|
||||
+ /* Allow to pass printk() to kdb but avoid a recursion. */
|
||||
+ if (unlikely(kdb_trap_printk && kdb_printf_cpu < 0))
|
||||
+ return vkdb_printf(KDB_MSGSRC_PRINTK, fmt, args);
|
||||
+#endif
|
||||
+ return vprintk_default(fmt, args);
|
||||
+}
|
||||
|
||||
asmlinkage int vprintk(const char *fmt, va_list args)
|
||||
{
|
||||
@@ -3073,18 +3067,10 @@ void wake_up_klogd(void)
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
-void defer_console_output(void)
|
||||
+__printf(1, 0)
|
||||
+static int vprintk_deferred(const char *fmt, va_list args)
|
||||
{
|
||||
-}
|
||||
-
|
||||
-int vprintk_deferred(const char *fmt, va_list args)
|
||||
-{
|
||||
- int r;
|
||||
-
|
||||
- r = vprintk_emit(0, LOGLEVEL_SCHED, NULL, fmt, args);
|
||||
- defer_console_output();
|
||||
-
|
||||
- return r;
|
||||
+ return vprintk_emit(0, LOGLEVEL_DEFAULT, NULL, fmt, args);
|
||||
}
|
||||
|
||||
int printk_deferred(const char *fmt, ...)
|
||||
--- a/kernel/printk/printk_safe.c
|
||||
+++ /dev/null
|
||||
@@ -1,88 +0,0 @@
|
||||
-// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
-/*
|
||||
- * printk_safe.c - Safe printk for printk-deadlock-prone contexts
|
||||
- */
|
||||
-
|
||||
-#include <linux/preempt.h>
|
||||
-#include <linux/spinlock.h>
|
||||
-#include <linux/debug_locks.h>
|
||||
-#include <linux/kdb.h>
|
||||
-#include <linux/smp.h>
|
||||
-#include <linux/cpumask.h>
|
||||
-#include <linux/irq_work.h>
|
||||
-#include <linux/printk.h>
|
||||
-#include <linux/kprobes.h>
|
||||
-
|
||||
-#include "internal.h"
|
||||
-
|
||||
-static DEFINE_PER_CPU(int, printk_context);
|
||||
-
|
||||
-#ifdef CONFIG_PRINTK_NMI
|
||||
-void noinstr printk_nmi_enter(void)
|
||||
-{
|
||||
- this_cpu_add(printk_context, PRINTK_NMI_CONTEXT_OFFSET);
|
||||
-}
|
||||
-
|
||||
-void noinstr printk_nmi_exit(void)
|
||||
-{
|
||||
- this_cpu_sub(printk_context, PRINTK_NMI_CONTEXT_OFFSET);
|
||||
-}
|
||||
-
|
||||
-/*
|
||||
- * Marks a code that might produce many messages in NMI context
|
||||
- * and the risk of losing them is more critical than eventual
|
||||
- * reordering.
|
||||
- */
|
||||
-void printk_nmi_direct_enter(void)
|
||||
-{
|
||||
- if (this_cpu_read(printk_context) & PRINTK_NMI_CONTEXT_MASK)
|
||||
- this_cpu_or(printk_context, PRINTK_NMI_DIRECT_CONTEXT_MASK);
|
||||
-}
|
||||
-
|
||||
-void printk_nmi_direct_exit(void)
|
||||
-{
|
||||
- this_cpu_and(printk_context, ~PRINTK_NMI_DIRECT_CONTEXT_MASK);
|
||||
-}
|
||||
-
|
||||
-#endif /* CONFIG_PRINTK_NMI */
|
||||
-
|
||||
-/* Can be preempted by NMI. */
|
||||
-void __printk_safe_enter(void)
|
||||
-{
|
||||
- this_cpu_inc(printk_context);
|
||||
-}
|
||||
-
|
||||
-/* Can be preempted by NMI. */
|
||||
-void __printk_safe_exit(void)
|
||||
-{
|
||||
- this_cpu_dec(printk_context);
|
||||
-}
|
||||
-
|
||||
-__printf(1, 0) int vprintk_func(const char *fmt, va_list args)
|
||||
-{
|
||||
-#ifdef CONFIG_KGDB_KDB
|
||||
- /* Allow to pass printk() to kdb but avoid a recursion. */
|
||||
- if (unlikely(kdb_trap_printk && kdb_printf_cpu < 0))
|
||||
- return vkdb_printf(KDB_MSGSRC_PRINTK, fmt, args);
|
||||
-#endif
|
||||
-
|
||||
- /*
|
||||
- * Use the main logbuf even in NMI. But avoid calling console
|
||||
- * drivers that might have their own locks.
|
||||
- */
|
||||
- if (this_cpu_read(printk_context) &
|
||||
- (PRINTK_NMI_DIRECT_CONTEXT_MASK |
|
||||
- PRINTK_NMI_CONTEXT_MASK |
|
||||
- PRINTK_SAFE_CONTEXT_MASK)) {
|
||||
- int len;
|
||||
-
|
||||
- printk_safe_enter_irqsave(flags);
|
||||
- len = vprintk_store(0, LOGLEVEL_DEFAULT, NULL, fmt, args);
|
||||
- printk_safe_exit_irqrestore(flags);
|
||||
- defer_console_output();
|
||||
- return len;
|
||||
- }
|
||||
-
|
||||
- /* No obstacles. */
|
||||
- return vprintk_default(fmt, args);
|
||||
-}
|
||||
--- a/kernel/trace/trace.c
|
||||
+++ b/kernel/trace/trace.c
|
||||
@@ -9325,7 +9325,6 @@ void ftrace_dump(enum ftrace_dump_mode o
|
||||
tracing_off();
|
||||
|
||||
local_irq_save(flags);
|
||||
- printk_nmi_direct_enter();
|
||||
|
||||
/* Simulate the iterator */
|
||||
trace_init_global_iter(&iter);
|
||||
@@ -9405,7 +9404,6 @@ void ftrace_dump(enum ftrace_dump_mode o
|
||||
atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
|
||||
}
|
||||
atomic_dec(&dump_running);
|
||||
- printk_nmi_direct_exit();
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(ftrace_dump);
|
||||
@@ -0,0 +1,67 @@
|
||||
From: John Ogness <john.ogness@linutronix.de>
|
||||
Date: Mon, 30 Nov 2020 01:42:09 +0106
|
||||
Subject: [PATCH 27/28] printk: add console handover
|
||||
|
||||
If earlyprintk is used, a boot console will print directly to the
|
||||
console immediately. The boot console will unregister itself as soon
|
||||
as a non-boot console registers. However, the non-boot console does
|
||||
not begin printing until its kthread has started. Since this happens
|
||||
much later, there is a long pause in the console output. If the
|
||||
ringbuffer is small, messages could even be dropped during the
|
||||
pause.
|
||||
|
||||
Add a new CON_HANDOVER console flag to be used internally by printk
|
||||
in order to track which non-boot console took over from a boot
|
||||
console. If handover consoles have implemented write_atomic(), they
|
||||
are allowed to print directly to the console until their kthread can
|
||||
take over.
|
||||
|
||||
Signed-off-by: John Ogness <john.ogness@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/console.h | 1 +
|
||||
kernel/printk/printk.c | 8 +++++++-
|
||||
2 files changed, 8 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/include/linux/console.h
|
||||
+++ b/include/linux/console.h
|
||||
@@ -137,6 +137,7 @@ static inline int con_debug_leave(void)
|
||||
#define CON_ANYTIME (16) /* Safe to call when cpu is offline */
|
||||
#define CON_BRL (32) /* Used for a braille device */
|
||||
#define CON_EXTENDED (64) /* Use the extended output format a la /dev/kmsg */
|
||||
+#define CON_HANDOVER (128) /* Device was previously a boot console. */
|
||||
|
||||
struct console {
|
||||
char name[16];
|
||||
--- a/kernel/printk/printk.c
|
||||
+++ b/kernel/printk/printk.c
|
||||
@@ -1726,6 +1726,8 @@ static bool console_can_sync(struct cons
|
||||
return false;
|
||||
if (con->write_atomic && kernel_sync_mode())
|
||||
return true;
|
||||
+ if (con->write_atomic && (con->flags & CON_HANDOVER) && !con->thread)
|
||||
+ return true;
|
||||
if (con->write && (con->flags & CON_BOOT) && !con->thread)
|
||||
return true;
|
||||
return false;
|
||||
@@ -1737,6 +1739,8 @@ static bool call_sync_console_driver(str
|
||||
return false;
|
||||
if (con->write_atomic && kernel_sync_mode())
|
||||
con->write_atomic(con, text, text_len);
|
||||
+ else if (con->write_atomic && (con->flags & CON_HANDOVER) && !con->thread)
|
||||
+ con->write_atomic(con, text, text_len);
|
||||
else if (con->write && (con->flags & CON_BOOT) && !con->thread)
|
||||
con->write(con, text, text_len);
|
||||
else
|
||||
@@ -2829,8 +2833,10 @@ void register_console(struct console *ne
|
||||
* the real console are the same physical device, it's annoying to
|
||||
* see the beginning boot messages twice
|
||||
*/
|
||||
- if (bcon && ((newcon->flags & (CON_CONSDEV | CON_BOOT)) == CON_CONSDEV))
|
||||
+ if (bcon && ((newcon->flags & (CON_CONSDEV | CON_BOOT)) == CON_CONSDEV)) {
|
||||
newcon->flags &= ~CON_PRINTBUFFER;
|
||||
+ newcon->flags |= CON_HANDOVER;
|
||||
+ }
|
||||
|
||||
/*
|
||||
* Put this console in the list - keep the
|
||||
198
kernel/patches-5.11.x-rt/0047-0028-printk-add-pr_flush.patch
Normal file
198
kernel/patches-5.11.x-rt/0047-0028-printk-add-pr_flush.patch
Normal file
@@ -0,0 +1,198 @@
|
||||
From: John Ogness <john.ogness@linutronix.de>
|
||||
Date: Mon, 30 Nov 2020 01:42:10 +0106
|
||||
Subject: [PATCH 28/28] printk: add pr_flush()
|
||||
|
||||
Provide a function to allow waiting for console printers to catch
|
||||
up to the latest logged message.
|
||||
|
||||
Use pr_flush() to give console printers a chance to finish in
|
||||
critical situations if no atomic console is available. For now
|
||||
pr_flush() is only used in the most common error paths:
|
||||
panic(), print_oops_end_marker(), report_bug(), kmsg_dump().
|
||||
|
||||
Signed-off-by: John Ogness <john.ogness@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/printk.h | 2 +
|
||||
kernel/panic.c | 28 ++++++++++-------
|
||||
kernel/printk/printk.c | 79 +++++++++++++++++++++++++++++++++++++++++++++++++
|
||||
lib/bug.c | 1
|
||||
4 files changed, 99 insertions(+), 11 deletions(-)
|
||||
|
||||
--- a/include/linux/printk.h
|
||||
+++ b/include/linux/printk.h
|
||||
@@ -481,6 +481,8 @@ extern int kptr_restrict;
|
||||
no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
|
||||
#endif
|
||||
|
||||
+bool pr_flush(int timeout_ms, bool reset_on_progress);
|
||||
+
|
||||
/*
|
||||
* ratelimited messages with local ratelimit_state,
|
||||
* no local ratelimit_state used in the !PRINTK case
|
||||
--- a/kernel/panic.c
|
||||
+++ b/kernel/panic.c
|
||||
@@ -177,12 +177,28 @@ static void panic_print_sys_info(void)
|
||||
void panic(const char *fmt, ...)
|
||||
{
|
||||
static char buf[1024];
|
||||
+ va_list args2;
|
||||
va_list args;
|
||||
long i, i_next = 0, len;
|
||||
int state = 0;
|
||||
int old_cpu, this_cpu;
|
||||
bool _crash_kexec_post_notifiers = crash_kexec_post_notifiers;
|
||||
|
||||
+ console_verbose();
|
||||
+ pr_emerg("Kernel panic - not syncing:\n");
|
||||
+ va_start(args2, fmt);
|
||||
+ va_copy(args, args2);
|
||||
+ vprintk(fmt, args2);
|
||||
+ va_end(args2);
|
||||
+#ifdef CONFIG_DEBUG_BUGVERBOSE
|
||||
+ /*
|
||||
+ * Avoid nested stack-dumping if a panic occurs during oops processing
|
||||
+ */
|
||||
+ if (!test_taint(TAINT_DIE) && oops_in_progress <= 1)
|
||||
+ dump_stack();
|
||||
+#endif
|
||||
+ pr_flush(1000, true);
|
||||
+
|
||||
/*
|
||||
* Disable local interrupts. This will prevent panic_smp_self_stop
|
||||
* from deadlocking the first cpu that invokes the panic, since
|
||||
@@ -213,24 +229,13 @@ void panic(const char *fmt, ...)
|
||||
if (old_cpu != PANIC_CPU_INVALID && old_cpu != this_cpu)
|
||||
panic_smp_self_stop();
|
||||
|
||||
- console_verbose();
|
||||
bust_spinlocks(1);
|
||||
- va_start(args, fmt);
|
||||
len = vscnprintf(buf, sizeof(buf), fmt, args);
|
||||
va_end(args);
|
||||
|
||||
if (len && buf[len - 1] == '\n')
|
||||
buf[len - 1] = '\0';
|
||||
|
||||
- pr_emerg("Kernel panic - not syncing: %s\n", buf);
|
||||
-#ifdef CONFIG_DEBUG_BUGVERBOSE
|
||||
- /*
|
||||
- * Avoid nested stack-dumping if a panic occurs during oops processing
|
||||
- */
|
||||
- if (!test_taint(TAINT_DIE) && oops_in_progress <= 1)
|
||||
- dump_stack();
|
||||
-#endif
|
||||
-
|
||||
/*
|
||||
* If kgdb is enabled, give it a chance to run before we stop all
|
||||
* the other CPUs or else we won't be able to debug processes left
|
||||
@@ -552,6 +557,7 @@ static void print_oops_end_marker(void)
|
||||
{
|
||||
init_oops_id();
|
||||
pr_warn("---[ end trace %016llx ]---\n", (unsigned long long)oops_id);
|
||||
+ pr_flush(1000, true);
|
||||
}
|
||||
|
||||
/*
|
||||
--- a/kernel/printk/printk.c
|
||||
+++ b/kernel/printk/printk.c
|
||||
@@ -3228,6 +3228,12 @@ void kmsg_dump(enum kmsg_dump_reason rea
|
||||
sync_mode = true;
|
||||
pr_info("enabled sync mode\n");
|
||||
}
|
||||
+
|
||||
+ /*
|
||||
+ * Give the printing threads time to flush, allowing up to
|
||||
+ * 1s of no printing forward progress before giving up.
|
||||
+ */
|
||||
+ pr_flush(1000, true);
|
||||
}
|
||||
|
||||
rcu_read_lock();
|
||||
@@ -3507,3 +3513,76 @@ void console_atomic_unlock(unsigned int
|
||||
prb_unlock(&printk_cpulock, flags);
|
||||
}
|
||||
EXPORT_SYMBOL(console_atomic_unlock);
|
||||
+
|
||||
+static void pr_msleep(bool may_sleep, int ms)
|
||||
+{
|
||||
+ if (may_sleep) {
|
||||
+ msleep(ms);
|
||||
+ } else {
|
||||
+ while (ms--)
|
||||
+ udelay(1000);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+/**
|
||||
+ * pr_flush() - Wait for printing threads to catch up.
|
||||
+ *
|
||||
+ * @timeout_ms: The maximum time (in ms) to wait.
|
||||
+ * @reset_on_progress: Reset the timeout if forward progress is seen.
|
||||
+ *
|
||||
+ * A value of 0 for @timeout_ms means no waiting will occur. A value of -1
|
||||
+ * represents infinite waiting.
|
||||
+ *
|
||||
+ * If @reset_on_progress is true, the timeout will be reset whenever any
|
||||
+ * printer has been seen to make some forward progress.
|
||||
+ *
|
||||
+ * Context: Any context.
|
||||
+ * Return: true if all enabled printers are caught up.
|
||||
+ */
|
||||
+bool pr_flush(int timeout_ms, bool reset_on_progress)
|
||||
+{
|
||||
+ int remaining = timeout_ms;
|
||||
+ struct console *con;
|
||||
+ u64 last_diff = 0;
|
||||
+ bool may_sleep;
|
||||
+ u64 printk_seq;
|
||||
+ u64 diff;
|
||||
+ u64 seq;
|
||||
+
|
||||
+ may_sleep = (preemptible() && !in_softirq());
|
||||
+
|
||||
+ seq = prb_next_seq(prb);
|
||||
+
|
||||
+ for (;;) {
|
||||
+ diff = 0;
|
||||
+
|
||||
+ for_each_console(con) {
|
||||
+ if (!(con->flags & CON_ENABLED))
|
||||
+ continue;
|
||||
+ printk_seq = atomic64_read(&con->printk_seq);
|
||||
+ if (printk_seq < seq)
|
||||
+ diff += seq - printk_seq;
|
||||
+ }
|
||||
+
|
||||
+ if (diff != last_diff && reset_on_progress)
|
||||
+ remaining = timeout_ms;
|
||||
+
|
||||
+ if (!diff || remaining == 0)
|
||||
+ break;
|
||||
+
|
||||
+ if (remaining < 0) {
|
||||
+ pr_msleep(may_sleep, 100);
|
||||
+ } else if (remaining < 100) {
|
||||
+ pr_msleep(may_sleep, remaining);
|
||||
+ remaining = 0;
|
||||
+ } else {
|
||||
+ pr_msleep(may_sleep, 100);
|
||||
+ remaining -= 100;
|
||||
+ }
|
||||
+
|
||||
+ last_diff = diff;
|
||||
+ }
|
||||
+
|
||||
+ return (diff == 0);
|
||||
+}
|
||||
+EXPORT_SYMBOL(pr_flush);
|
||||
--- a/lib/bug.c
|
||||
+++ b/lib/bug.c
|
||||
@@ -205,6 +205,7 @@ enum bug_trap_type report_bug(unsigned l
|
||||
else
|
||||
pr_crit("Kernel BUG at %pB [verbose debug info unavailable]\n",
|
||||
(void *)bugaddr);
|
||||
+ pr_flush(1000, true);
|
||||
|
||||
return BUG_TRAP_TYPE_BUG;
|
||||
}
|
||||
@@ -0,0 +1,76 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Thu, 18 Feb 2021 18:31:26 +0100
|
||||
Subject: [PATCH] kcov: Remove kcov include from sched.h and move it to its
|
||||
users.
|
||||
|
||||
The recent addition of in_serving_softirq() to kconv.h results in
|
||||
compile failure on PREEMPT_RT because it requires
|
||||
task_struct::softirq_disable_cnt. This is not available if kconv.h is
|
||||
included from sched.h.
|
||||
|
||||
It is not needed to include kconv.h from sched.h. All but the net/ user
|
||||
already include the kconv header file.
|
||||
|
||||
Move the include of the kconv.h header from sched.h it its users.
|
||||
Additionally include sched.h from kconv.h to ensure that everything
|
||||
task_struct related is available.
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/kcov.h | 1 +
|
||||
include/linux/sched.h | 1 -
|
||||
net/core/skbuff.c | 1 +
|
||||
net/mac80211/iface.c | 1 +
|
||||
net/mac80211/rx.c | 1 +
|
||||
5 files changed, 4 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/include/linux/kcov.h
|
||||
+++ b/include/linux/kcov.h
|
||||
@@ -2,6 +2,7 @@
|
||||
#ifndef _LINUX_KCOV_H
|
||||
#define _LINUX_KCOV_H
|
||||
|
||||
+#include <linux/sched.h>
|
||||
#include <uapi/linux/kcov.h>
|
||||
|
||||
struct task_struct;
|
||||
--- a/include/linux/sched.h
|
||||
+++ b/include/linux/sched.h
|
||||
@@ -14,7 +14,6 @@
|
||||
#include <linux/pid.h>
|
||||
#include <linux/sem.h>
|
||||
#include <linux/shm.h>
|
||||
-#include <linux/kcov.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/plist.h>
|
||||
#include <linux/hrtimer.h>
|
||||
--- a/net/core/skbuff.c
|
||||
+++ b/net/core/skbuff.c
|
||||
@@ -60,6 +60,7 @@
|
||||
#include <linux/prefetch.h>
|
||||
#include <linux/if_vlan.h>
|
||||
#include <linux/mpls.h>
|
||||
+#include <linux/kcov.h>
|
||||
|
||||
#include <net/protocol.h>
|
||||
#include <net/dst.h>
|
||||
--- a/net/mac80211/iface.c
|
||||
+++ b/net/mac80211/iface.c
|
||||
@@ -15,6 +15,7 @@
|
||||
#include <linux/if_arp.h>
|
||||
#include <linux/netdevice.h>
|
||||
#include <linux/rtnetlink.h>
|
||||
+#include <linux/kcov.h>
|
||||
#include <net/mac80211.h>
|
||||
#include <net/ieee80211_radiotap.h>
|
||||
#include "ieee80211_i.h"
|
||||
--- a/net/mac80211/rx.c
|
||||
+++ b/net/mac80211/rx.c
|
||||
@@ -17,6 +17,7 @@
|
||||
#include <linux/etherdevice.h>
|
||||
#include <linux/rcupdate.h>
|
||||
#include <linux/export.h>
|
||||
+#include <linux/kcov.h>
|
||||
#include <linux/bitops.h>
|
||||
#include <net/mac80211.h>
|
||||
#include <net/ieee80211_radiotap.h>
|
||||
@@ -0,0 +1,43 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Tue, 3 Jul 2018 18:19:48 +0200
|
||||
Subject: [PATCH] cgroup: use irqsave in cgroup_rstat_flush_locked()
|
||||
|
||||
All callers of cgroup_rstat_flush_locked() acquire cgroup_rstat_lock
|
||||
either with spin_lock_irq() or spin_lock_irqsave().
|
||||
cgroup_rstat_flush_locked() itself acquires cgroup_rstat_cpu_lock which
|
||||
is a raw_spin_lock. This lock is also acquired in cgroup_rstat_updated()
|
||||
in IRQ context and therefore requires _irqsave() locking suffix in
|
||||
cgroup_rstat_flush_locked().
|
||||
Since there is no difference between spin_lock_t and raw_spin_lock_t
|
||||
on !RT lockdep does not complain here. On RT lockdep complains because
|
||||
the interrupts were not disabled here and a deadlock is possible.
|
||||
|
||||
Acquire the raw_spin_lock_t with disabled interrupts.
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
kernel/cgroup/rstat.c | 5 +++--
|
||||
1 file changed, 3 insertions(+), 2 deletions(-)
|
||||
|
||||
--- a/kernel/cgroup/rstat.c
|
||||
+++ b/kernel/cgroup/rstat.c
|
||||
@@ -149,8 +149,9 @@ static void cgroup_rstat_flush_locked(st
|
||||
raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_rstat_cpu_lock,
|
||||
cpu);
|
||||
struct cgroup *pos = NULL;
|
||||
+ unsigned long flags;
|
||||
|
||||
- raw_spin_lock(cpu_lock);
|
||||
+ raw_spin_lock_irqsave(cpu_lock, flags);
|
||||
while ((pos = cgroup_rstat_cpu_pop_updated(pos, cgrp, cpu))) {
|
||||
struct cgroup_subsys_state *css;
|
||||
|
||||
@@ -162,7 +163,7 @@ static void cgroup_rstat_flush_locked(st
|
||||
css->ss->css_rstat_flush(css, cpu);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
- raw_spin_unlock(cpu_lock);
|
||||
+ raw_spin_unlock_irqrestore(cpu_lock, flags);
|
||||
|
||||
/* if @may_sleep, play nice and yield if necessary */
|
||||
if (may_sleep && (need_resched() ||
|
||||
@@ -20,7 +20,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
|
||||
--- a/mm/workingset.c
|
||||
+++ b/mm/workingset.c
|
||||
@@ -367,6 +367,8 @@ static struct list_lru shadow_nodes;
|
||||
@@ -430,6 +430,8 @@ static struct list_lru shadow_nodes;
|
||||
|
||||
void workingset_update_node(struct xa_node *node)
|
||||
{
|
||||
@@ -29,7 +29,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
/*
|
||||
* Track non-empty nodes that contain only shadow entries;
|
||||
* unlink those that contain pages or are being freed.
|
||||
@@ -375,7 +377,8 @@ void workingset_update_node(struct xa_no
|
||||
@@ -438,7 +440,8 @@ void workingset_update_node(struct xa_no
|
||||
* already where they should be. The list_empty() test is safe
|
||||
* as node->private_list is protected by the i_pages lock.
|
||||
*/
|
||||
@@ -0,0 +1,138 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Fri, 14 Aug 2020 18:53:34 +0200
|
||||
Subject: [PATCH] shmem: Use raw_spinlock_t for ->stat_lock
|
||||
|
||||
Each CPU has SHMEM_INO_BATCH inodes available in `->ino_batch' which is
|
||||
per-CPU. Access here is serialized by disabling preemption. If the pool is
|
||||
empty, it gets reloaded from `->next_ino'. Access here is serialized by
|
||||
->stat_lock which is a spinlock_t and can not be acquired with disabled
|
||||
preemption.
|
||||
One way around it would make per-CPU ino_batch struct containing the inode
|
||||
number a local_lock_t.
|
||||
Another sollution is to promote ->stat_lock to a raw_spinlock_t. The critical
|
||||
sections are short. The mpol_put() should be moved outside of the critical
|
||||
section to avoid invoking the destrutor with disabled preemption.
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/shmem_fs.h | 2 +-
|
||||
mm/shmem.c | 31 +++++++++++++++++--------------
|
||||
2 files changed, 18 insertions(+), 15 deletions(-)
|
||||
|
||||
--- a/include/linux/shmem_fs.h
|
||||
+++ b/include/linux/shmem_fs.h
|
||||
@@ -31,7 +31,7 @@ struct shmem_sb_info {
|
||||
struct percpu_counter used_blocks; /* How many are allocated */
|
||||
unsigned long max_inodes; /* How many inodes are allowed */
|
||||
unsigned long free_inodes; /* How many are left for allocation */
|
||||
- spinlock_t stat_lock; /* Serialize shmem_sb_info changes */
|
||||
+ raw_spinlock_t stat_lock; /* Serialize shmem_sb_info changes */
|
||||
umode_t mode; /* Mount mode for root directory */
|
||||
unsigned char huge; /* Whether to try for hugepages */
|
||||
kuid_t uid; /* Mount uid for root directory */
|
||||
--- a/mm/shmem.c
|
||||
+++ b/mm/shmem.c
|
||||
@@ -278,10 +278,10 @@ static int shmem_reserve_inode(struct su
|
||||
ino_t ino;
|
||||
|
||||
if (!(sb->s_flags & SB_KERNMOUNT)) {
|
||||
- spin_lock(&sbinfo->stat_lock);
|
||||
+ raw_spin_lock(&sbinfo->stat_lock);
|
||||
if (sbinfo->max_inodes) {
|
||||
if (!sbinfo->free_inodes) {
|
||||
- spin_unlock(&sbinfo->stat_lock);
|
||||
+ raw_spin_unlock(&sbinfo->stat_lock);
|
||||
return -ENOSPC;
|
||||
}
|
||||
sbinfo->free_inodes--;
|
||||
@@ -304,7 +304,7 @@ static int shmem_reserve_inode(struct su
|
||||
}
|
||||
*inop = ino;
|
||||
}
|
||||
- spin_unlock(&sbinfo->stat_lock);
|
||||
+ raw_spin_unlock(&sbinfo->stat_lock);
|
||||
} else if (inop) {
|
||||
/*
|
||||
* __shmem_file_setup, one of our callers, is lock-free: it
|
||||
@@ -319,13 +319,14 @@ static int shmem_reserve_inode(struct su
|
||||
* to worry about things like glibc compatibility.
|
||||
*/
|
||||
ino_t *next_ino;
|
||||
+
|
||||
next_ino = per_cpu_ptr(sbinfo->ino_batch, get_cpu());
|
||||
ino = *next_ino;
|
||||
if (unlikely(ino % SHMEM_INO_BATCH == 0)) {
|
||||
- spin_lock(&sbinfo->stat_lock);
|
||||
+ raw_spin_lock(&sbinfo->stat_lock);
|
||||
ino = sbinfo->next_ino;
|
||||
sbinfo->next_ino += SHMEM_INO_BATCH;
|
||||
- spin_unlock(&sbinfo->stat_lock);
|
||||
+ raw_spin_unlock(&sbinfo->stat_lock);
|
||||
if (unlikely(is_zero_ino(ino)))
|
||||
ino++;
|
||||
}
|
||||
@@ -341,9 +342,9 @@ static void shmem_free_inode(struct supe
|
||||
{
|
||||
struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
|
||||
if (sbinfo->max_inodes) {
|
||||
- spin_lock(&sbinfo->stat_lock);
|
||||
+ raw_spin_lock(&sbinfo->stat_lock);
|
||||
sbinfo->free_inodes++;
|
||||
- spin_unlock(&sbinfo->stat_lock);
|
||||
+ raw_spin_unlock(&sbinfo->stat_lock);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1479,10 +1480,10 @@ static struct mempolicy *shmem_get_sbmpo
|
||||
{
|
||||
struct mempolicy *mpol = NULL;
|
||||
if (sbinfo->mpol) {
|
||||
- spin_lock(&sbinfo->stat_lock); /* prevent replace/use races */
|
||||
+ raw_spin_lock(&sbinfo->stat_lock); /* prevent replace/use races */
|
||||
mpol = sbinfo->mpol;
|
||||
mpol_get(mpol);
|
||||
- spin_unlock(&sbinfo->stat_lock);
|
||||
+ raw_spin_unlock(&sbinfo->stat_lock);
|
||||
}
|
||||
return mpol;
|
||||
}
|
||||
@@ -3587,9 +3588,10 @@ static int shmem_reconfigure(struct fs_c
|
||||
struct shmem_options *ctx = fc->fs_private;
|
||||
struct shmem_sb_info *sbinfo = SHMEM_SB(fc->root->d_sb);
|
||||
unsigned long inodes;
|
||||
+ struct mempolicy *mpol = NULL;
|
||||
const char *err;
|
||||
|
||||
- spin_lock(&sbinfo->stat_lock);
|
||||
+ raw_spin_lock(&sbinfo->stat_lock);
|
||||
inodes = sbinfo->max_inodes - sbinfo->free_inodes;
|
||||
if ((ctx->seen & SHMEM_SEEN_BLOCKS) && ctx->blocks) {
|
||||
if (!sbinfo->max_blocks) {
|
||||
@@ -3634,14 +3636,15 @@ static int shmem_reconfigure(struct fs_c
|
||||
* Preserve previous mempolicy unless mpol remount option was specified.
|
||||
*/
|
||||
if (ctx->mpol) {
|
||||
- mpol_put(sbinfo->mpol);
|
||||
+ mpol = sbinfo->mpol;
|
||||
sbinfo->mpol = ctx->mpol; /* transfers initial ref */
|
||||
ctx->mpol = NULL;
|
||||
}
|
||||
- spin_unlock(&sbinfo->stat_lock);
|
||||
+ raw_spin_unlock(&sbinfo->stat_lock);
|
||||
+ mpol_put(mpol);
|
||||
return 0;
|
||||
out:
|
||||
- spin_unlock(&sbinfo->stat_lock);
|
||||
+ raw_spin_unlock(&sbinfo->stat_lock);
|
||||
return invalfc(fc, "%s", err);
|
||||
}
|
||||
|
||||
@@ -3758,7 +3761,7 @@ static int shmem_fill_super(struct super
|
||||
sbinfo->mpol = ctx->mpol;
|
||||
ctx->mpol = NULL;
|
||||
|
||||
- spin_lock_init(&sbinfo->stat_lock);
|
||||
+ raw_spin_lock_init(&sbinfo->stat_lock);
|
||||
if (percpu_counter_init(&sbinfo->used_blocks, 0, GFP_KERNEL))
|
||||
goto failed;
|
||||
spin_lock_init(&sbinfo->shrinklist_lock);
|
||||
@@ -0,0 +1,40 @@
|
||||
Subject: net: Move lockdep where it belongs
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Tue, 08 Sep 2020 07:32:20 +0200
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
net/core/sock.c | 6 ++----
|
||||
1 file changed, 2 insertions(+), 4 deletions(-)
|
||||
|
||||
--- a/net/core/sock.c
|
||||
+++ b/net/core/sock.c
|
||||
@@ -3050,12 +3050,11 @@ void lock_sock_nested(struct sock *sk, i
|
||||
if (sk->sk_lock.owned)
|
||||
__lock_sock(sk);
|
||||
sk->sk_lock.owned = 1;
|
||||
- spin_unlock(&sk->sk_lock.slock);
|
||||
+ spin_unlock_bh(&sk->sk_lock.slock);
|
||||
/*
|
||||
* The sk_lock has mutex_lock() semantics here:
|
||||
*/
|
||||
mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
|
||||
- local_bh_enable();
|
||||
}
|
||||
EXPORT_SYMBOL(lock_sock_nested);
|
||||
|
||||
@@ -3104,13 +3103,12 @@ bool lock_sock_fast(struct sock *sk) __a
|
||||
|
||||
__lock_sock(sk);
|
||||
sk->sk_lock.owned = 1;
|
||||
- spin_unlock(&sk->sk_lock.slock);
|
||||
+ spin_unlock_bh(&sk->sk_lock.slock);
|
||||
/*
|
||||
* The sk_lock has mutex_lock() semantics here:
|
||||
*/
|
||||
mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_);
|
||||
__acquire(&sk->sk_lock.slock);
|
||||
- local_bh_enable();
|
||||
return true;
|
||||
}
|
||||
EXPORT_SYMBOL(lock_sock_fast);
|
||||
@@ -0,0 +1,99 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Mon, 12 Oct 2020 17:33:54 +0200
|
||||
Subject: [PATCH] tcp: Remove superfluous BH-disable around listening_hash
|
||||
|
||||
Commit
|
||||
9652dc2eb9e40 ("tcp: relax listening_hash operations")
|
||||
|
||||
removed the need to disable bottom half while acquiring
|
||||
listening_hash.lock. There are still two callers left which disable
|
||||
bottom half before the lock is acquired.
|
||||
|
||||
Drop local_bh_disable() around __inet_hash() which acquires
|
||||
listening_hash->lock, invoke inet_ehash_nolisten() with disabled BH.
|
||||
inet_unhash() conditionally acquires listening_hash->lock.
|
||||
|
||||
Reported-by: Mike Galbraith <efault@gmx.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Link: https://lore.kernel.org/linux-rt-users/12d6f9879a97cd56c09fb53dee343cbb14f7f1f7.camel@gmx.de/
|
||||
Link: https://lkml.kernel.org/r/X9CheYjuXWc75Spa@hirez.programming.kicks-ass.net
|
||||
---
|
||||
net/ipv4/inet_hashtables.c | 19 ++++++++++++-------
|
||||
net/ipv6/inet6_hashtables.c | 5 +----
|
||||
2 files changed, 13 insertions(+), 11 deletions(-)
|
||||
|
||||
--- a/net/ipv4/inet_hashtables.c
|
||||
+++ b/net/ipv4/inet_hashtables.c
|
||||
@@ -635,7 +635,9 @@ int __inet_hash(struct sock *sk, struct
|
||||
int err = 0;
|
||||
|
||||
if (sk->sk_state != TCP_LISTEN) {
|
||||
+ local_bh_disable();
|
||||
inet_ehash_nolisten(sk, osk, NULL);
|
||||
+ local_bh_enable();
|
||||
return 0;
|
||||
}
|
||||
WARN_ON(!sk_unhashed(sk));
|
||||
@@ -667,11 +669,8 @@ int inet_hash(struct sock *sk)
|
||||
{
|
||||
int err = 0;
|
||||
|
||||
- if (sk->sk_state != TCP_CLOSE) {
|
||||
- local_bh_disable();
|
||||
+ if (sk->sk_state != TCP_CLOSE)
|
||||
err = __inet_hash(sk, NULL);
|
||||
- local_bh_enable();
|
||||
- }
|
||||
|
||||
return err;
|
||||
}
|
||||
@@ -682,17 +681,20 @@ void inet_unhash(struct sock *sk)
|
||||
struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
|
||||
struct inet_listen_hashbucket *ilb = NULL;
|
||||
spinlock_t *lock;
|
||||
+ bool state_listen;
|
||||
|
||||
if (sk_unhashed(sk))
|
||||
return;
|
||||
|
||||
if (sk->sk_state == TCP_LISTEN) {
|
||||
+ state_listen = true;
|
||||
ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
|
||||
- lock = &ilb->lock;
|
||||
+ spin_lock(&ilb->lock);
|
||||
} else {
|
||||
+ state_listen = false;
|
||||
lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
|
||||
+ spin_lock_bh(lock);
|
||||
}
|
||||
- spin_lock_bh(lock);
|
||||
if (sk_unhashed(sk))
|
||||
goto unlock;
|
||||
|
||||
@@ -705,7 +707,10 @@ void inet_unhash(struct sock *sk)
|
||||
__sk_nulls_del_node_init_rcu(sk);
|
||||
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
|
||||
unlock:
|
||||
- spin_unlock_bh(lock);
|
||||
+ if (state_listen)
|
||||
+ spin_unlock(&ilb->lock);
|
||||
+ else
|
||||
+ spin_unlock_bh(lock);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(inet_unhash);
|
||||
|
||||
--- a/net/ipv6/inet6_hashtables.c
|
||||
+++ b/net/ipv6/inet6_hashtables.c
|
||||
@@ -333,11 +333,8 @@ int inet6_hash(struct sock *sk)
|
||||
{
|
||||
int err = 0;
|
||||
|
||||
- if (sk->sk_state != TCP_CLOSE) {
|
||||
- local_bh_disable();
|
||||
+ if (sk->sk_state != TCP_CLOSE)
|
||||
err = __inet_hash(sk, NULL);
|
||||
- local_bh_enable();
|
||||
- }
|
||||
|
||||
return err;
|
||||
}
|
||||
@@ -0,0 +1,40 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Mon, 15 Feb 2021 18:44:12 +0100
|
||||
Subject: [PATCH] smp: Wake ksoftirqd on PREEMPT_RT instead do_softirq().
|
||||
|
||||
The softirq implementation on PREEMPT_RT does not provide do_softirq().
|
||||
The other user of do_softirq() is replaced with a local_bh_disable()
|
||||
+ enable() around the possible raise-softirq invocation. This can not be
|
||||
done here because migration_cpu_stop() is invoked with disabled
|
||||
preemption.
|
||||
|
||||
Wake the softirq thread on PREEMPT_RT if there are any pending softirqs.
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
kernel/smp.c | 14 ++++++++++++--
|
||||
1 file changed, 12 insertions(+), 2 deletions(-)
|
||||
|
||||
--- a/kernel/smp.c
|
||||
+++ b/kernel/smp.c
|
||||
@@ -450,8 +450,18 @@ void flush_smp_call_function_from_idle(v
|
||||
|
||||
local_irq_save(flags);
|
||||
flush_smp_call_function_queue(true);
|
||||
- if (local_softirq_pending())
|
||||
- do_softirq();
|
||||
+
|
||||
+ if (local_softirq_pending()) {
|
||||
+
|
||||
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) {
|
||||
+ do_softirq();
|
||||
+ } else {
|
||||
+ struct task_struct *ksoftirqd = this_cpu_ksoftirqd();
|
||||
+
|
||||
+ if (ksoftirqd && ksoftirqd->state != TASK_RUNNING)
|
||||
+ wake_up_process(ksoftirqd);
|
||||
+ }
|
||||
+ }
|
||||
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
@@ -0,0 +1,28 @@
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Tue, 9 Mar 2021 09:42:04 +0100
|
||||
Subject: [PATCH 01/20] tasklets: Replace barrier() with cpu_relax() in
|
||||
tasklet_unlock_wait()
|
||||
|
||||
A barrier() in a tight loop which waits for something to happen on a remote
|
||||
CPU is a pointless exercise. Replace it with cpu_relax() which allows HT
|
||||
siblings to make progress.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Tested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/interrupt.h | 3 ++-
|
||||
1 file changed, 2 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/include/linux/interrupt.h
|
||||
+++ b/include/linux/interrupt.h
|
||||
@@ -677,7 +677,8 @@ static inline void tasklet_unlock(struct
|
||||
|
||||
static inline void tasklet_unlock_wait(struct tasklet_struct *t)
|
||||
{
|
||||
- while (test_bit(TASKLET_STATE_RUN, &(t)->state)) { barrier(); }
|
||||
+ while (test_bit(TASKLET_STATE_RUN, &t->state))
|
||||
+ cpu_relax();
|
||||
}
|
||||
#else
|
||||
#define tasklet_trylock(t) 1
|
||||
@@ -0,0 +1,28 @@
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Tue, 9 Mar 2021 09:42:05 +0100
|
||||
Subject: [PATCH 02/20] tasklets: Use static inlines for stub implementations
|
||||
|
||||
Inlines exist for a reason.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Tested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/interrupt.h | 6 +++---
|
||||
1 file changed, 3 insertions(+), 3 deletions(-)
|
||||
|
||||
--- a/include/linux/interrupt.h
|
||||
+++ b/include/linux/interrupt.h
|
||||
@@ -681,9 +681,9 @@ static inline void tasklet_unlock_wait(s
|
||||
cpu_relax();
|
||||
}
|
||||
#else
|
||||
-#define tasklet_trylock(t) 1
|
||||
-#define tasklet_unlock_wait(t) do { } while (0)
|
||||
-#define tasklet_unlock(t) do { } while (0)
|
||||
+static inline int tasklet_trylock(struct tasklet_struct *t) { return 1; }
|
||||
+static inline void tasklet_unlock(struct tasklet_struct *t) { }
|
||||
+static inline void tasklet_unlock_wait(struct tasklet_struct *t) { }
|
||||
#endif
|
||||
|
||||
extern void __tasklet_schedule(struct tasklet_struct *t);
|
||||
@@ -0,0 +1,61 @@
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Tue, 9 Mar 2021 09:42:06 +0100
|
||||
Subject: [PATCH 03/20] tasklets: Provide tasklet_disable_in_atomic()
|
||||
|
||||
Replacing the spin wait loops in tasklet_unlock_wait() with
|
||||
wait_var_event() is not possible as a handful of tasklet_disable()
|
||||
invocations are happening in atomic context. All other invocations are in
|
||||
teardown paths which can sleep.
|
||||
|
||||
Provide tasklet_disable_in_atomic() and tasklet_unlock_spin_wait() to
|
||||
convert the few atomic use cases over, which allows to change
|
||||
tasklet_disable() and tasklet_unlock_wait() in a later step.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/interrupt.h | 22 ++++++++++++++++++++++
|
||||
1 file changed, 22 insertions(+)
|
||||
|
||||
--- a/include/linux/interrupt.h
|
||||
+++ b/include/linux/interrupt.h
|
||||
@@ -680,10 +680,21 @@ static inline void tasklet_unlock_wait(s
|
||||
while (test_bit(TASKLET_STATE_RUN, &t->state))
|
||||
cpu_relax();
|
||||
}
|
||||
+
|
||||
+/*
|
||||
+ * Do not use in new code. Waiting for tasklets from atomic contexts is
|
||||
+ * error prone and should be avoided.
|
||||
+ */
|
||||
+static inline void tasklet_unlock_spin_wait(struct tasklet_struct *t)
|
||||
+{
|
||||
+ while (test_bit(TASKLET_STATE_RUN, &t->state))
|
||||
+ cpu_relax();
|
||||
+}
|
||||
#else
|
||||
static inline int tasklet_trylock(struct tasklet_struct *t) { return 1; }
|
||||
static inline void tasklet_unlock(struct tasklet_struct *t) { }
|
||||
static inline void tasklet_unlock_wait(struct tasklet_struct *t) { }
|
||||
+static inline void tasklet_unlock_spin_wait(struct tasklet_struct *t) { }
|
||||
#endif
|
||||
|
||||
extern void __tasklet_schedule(struct tasklet_struct *t);
|
||||
@@ -708,6 +719,17 @@ static inline void tasklet_disable_nosyn
|
||||
smp_mb__after_atomic();
|
||||
}
|
||||
|
||||
+/*
|
||||
+ * Do not use in new code. Disabling tasklets from atomic contexts is
|
||||
+ * error prone and should be avoided.
|
||||
+ */
|
||||
+static inline void tasklet_disable_in_atomic(struct tasklet_struct *t)
|
||||
+{
|
||||
+ tasklet_disable_nosync(t);
|
||||
+ tasklet_unlock_spin_wait(t);
|
||||
+ smp_mb();
|
||||
+}
|
||||
+
|
||||
static inline void tasklet_disable(struct tasklet_struct *t)
|
||||
{
|
||||
tasklet_disable_nosync(t);
|
||||
@@ -0,0 +1,26 @@
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Tue, 9 Mar 2021 09:42:07 +0100
|
||||
Subject: [PATCH 04/20] tasklets: Use spin wait in tasklet_disable()
|
||||
temporarily
|
||||
|
||||
To ease the transition use spin waiting in tasklet_disable() until all
|
||||
usage sites from atomic context have been cleaned up.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/interrupt.h | 3 ++-
|
||||
1 file changed, 2 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/include/linux/interrupt.h
|
||||
+++ b/include/linux/interrupt.h
|
||||
@@ -733,7 +733,8 @@ static inline void tasklet_disable_in_at
|
||||
static inline void tasklet_disable(struct tasklet_struct *t)
|
||||
{
|
||||
tasklet_disable_nosync(t);
|
||||
- tasklet_unlock_wait(t);
|
||||
+ /* Spin wait until all atomic users are converted */
|
||||
+ tasklet_unlock_spin_wait(t);
|
||||
smp_mb();
|
||||
}
|
||||
|
||||
@@ -0,0 +1,81 @@
|
||||
From: Peter Zijlstra <peterz@infradead.org>
|
||||
Date: Tue, 9 Mar 2021 09:42:08 +0100
|
||||
Subject: [PATCH 05/20] tasklets: Replace spin wait in tasklet_unlock_wait()
|
||||
|
||||
tasklet_unlock_wait() spin waits for TASKLET_STATE_RUN to be cleared. This
|
||||
is wasting CPU cycles in a tight loop which is especially painful in a
|
||||
guest when the CPU running the tasklet is scheduled out.
|
||||
|
||||
tasklet_unlock_wait() is invoked from tasklet_kill() which is used in
|
||||
teardown paths and not performance critical at all. Replace the spin wait
|
||||
with wait_var_event().
|
||||
|
||||
There are no users of tasklet_unlock_wait() which are invoked from atomic
|
||||
contexts. The usage in tasklet_disable() has been replaced temporarily with
|
||||
the spin waiting variant until the atomic users are fixed up and will be
|
||||
converted to the sleep wait variant later.
|
||||
|
||||
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/interrupt.h | 13 ++-----------
|
||||
kernel/softirq.c | 18 ++++++++++++++++++
|
||||
2 files changed, 20 insertions(+), 11 deletions(-)
|
||||
|
||||
--- a/include/linux/interrupt.h
|
||||
+++ b/include/linux/interrupt.h
|
||||
@@ -669,17 +669,8 @@ static inline int tasklet_trylock(struct
|
||||
return !test_and_set_bit(TASKLET_STATE_RUN, &(t)->state);
|
||||
}
|
||||
|
||||
-static inline void tasklet_unlock(struct tasklet_struct *t)
|
||||
-{
|
||||
- smp_mb__before_atomic();
|
||||
- clear_bit(TASKLET_STATE_RUN, &(t)->state);
|
||||
-}
|
||||
-
|
||||
-static inline void tasklet_unlock_wait(struct tasklet_struct *t)
|
||||
-{
|
||||
- while (test_bit(TASKLET_STATE_RUN, &t->state))
|
||||
- cpu_relax();
|
||||
-}
|
||||
+void tasklet_unlock(struct tasklet_struct *t);
|
||||
+void tasklet_unlock_wait(struct tasklet_struct *t);
|
||||
|
||||
/*
|
||||
* Do not use in new code. Waiting for tasklets from atomic contexts is
|
||||
--- a/kernel/softirq.c
|
||||
+++ b/kernel/softirq.c
|
||||
@@ -25,6 +25,7 @@
|
||||
#include <linux/smpboot.h>
|
||||
#include <linux/tick.h>
|
||||
#include <linux/irq.h>
|
||||
+#include <linux/wait_bit.h>
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include <trace/events/irq.h>
|
||||
@@ -619,6 +620,23 @@ void tasklet_kill(struct tasklet_struct
|
||||
}
|
||||
EXPORT_SYMBOL(tasklet_kill);
|
||||
|
||||
+#ifdef CONFIG_SMP
|
||||
+void tasklet_unlock(struct tasklet_struct *t)
|
||||
+{
|
||||
+ smp_mb__before_atomic();
|
||||
+ clear_bit(TASKLET_STATE_RUN, &t->state);
|
||||
+ smp_mb__after_atomic();
|
||||
+ wake_up_var(&t->state);
|
||||
+}
|
||||
+EXPORT_SYMBOL_GPL(tasklet_unlock);
|
||||
+
|
||||
+void tasklet_unlock_wait(struct tasklet_struct *t)
|
||||
+{
|
||||
+ wait_var_event(&t->state, !test_bit(TASKLET_STATE_RUN, &t->state));
|
||||
+}
|
||||
+EXPORT_SYMBOL_GPL(tasklet_unlock_wait);
|
||||
+#endif
|
||||
+
|
||||
void __init softirq_init(void)
|
||||
{
|
||||
int cpu;
|
||||
@@ -0,0 +1,67 @@
|
||||
From: Peter Zijlstra <peterz@infradead.org>
|
||||
Date: Tue, 9 Mar 2021 09:42:09 +0100
|
||||
Subject: [PATCH 06/20] tasklets: Replace spin wait in tasklet_kill()
|
||||
|
||||
tasklet_kill() spin waits for TASKLET_STATE_SCHED to be cleared invoking
|
||||
yield() from inside the loop. yield() is an ill defined mechanism and the
|
||||
result might still be wasting CPU cycles in a tight loop which is
|
||||
especially painful in a guest when the CPU running the tasklet is scheduled
|
||||
out.
|
||||
|
||||
tasklet_kill() is used in teardown paths and not performance critical at
|
||||
all. Replace the spin wait with wait_var_event().
|
||||
|
||||
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
kernel/softirq.c | 23 +++++++++++++++--------
|
||||
1 file changed, 15 insertions(+), 8 deletions(-)
|
||||
|
||||
--- a/kernel/softirq.c
|
||||
+++ b/kernel/softirq.c
|
||||
@@ -530,6 +530,16 @@ void __tasklet_hi_schedule(struct taskle
|
||||
}
|
||||
EXPORT_SYMBOL(__tasklet_hi_schedule);
|
||||
|
||||
+static inline bool tasklet_clear_sched(struct tasklet_struct *t)
|
||||
+{
|
||||
+ if (test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)) {
|
||||
+ wake_up_var(&t->state);
|
||||
+ return true;
|
||||
+ }
|
||||
+
|
||||
+ return false;
|
||||
+}
|
||||
+
|
||||
static void tasklet_action_common(struct softirq_action *a,
|
||||
struct tasklet_head *tl_head,
|
||||
unsigned int softirq_nr)
|
||||
@@ -549,8 +559,7 @@ static void tasklet_action_common(struct
|
||||
|
||||
if (tasklet_trylock(t)) {
|
||||
if (!atomic_read(&t->count)) {
|
||||
- if (!test_and_clear_bit(TASKLET_STATE_SCHED,
|
||||
- &t->state))
|
||||
+ if (!tasklet_clear_sched(t))
|
||||
BUG();
|
||||
if (t->use_callback)
|
||||
t->callback(t);
|
||||
@@ -610,13 +619,11 @@ void tasklet_kill(struct tasklet_struct
|
||||
if (in_interrupt())
|
||||
pr_notice("Attempt to kill tasklet from interrupt\n");
|
||||
|
||||
- while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
|
||||
- do {
|
||||
- yield();
|
||||
- } while (test_bit(TASKLET_STATE_SCHED, &t->state));
|
||||
- }
|
||||
+ while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
|
||||
+ wait_var_event(&t->state, !test_bit(TASKLET_STATE_SCHED, &t->state));
|
||||
+
|
||||
tasklet_unlock_wait(t);
|
||||
- clear_bit(TASKLET_STATE_SCHED, &t->state);
|
||||
+ tasklet_clear_sched(t);
|
||||
}
|
||||
EXPORT_SYMBOL(tasklet_kill);
|
||||
|
||||
@@ -0,0 +1,100 @@
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Tue, 9 Mar 2021 09:42:10 +0100
|
||||
Subject: [PATCH 07/20] tasklets: Prevent tasklet_unlock_spin_wait() deadlock
|
||||
on RT
|
||||
|
||||
tasklet_unlock_spin_wait() spin waits for the TASKLET_STATE_SCHED bit in
|
||||
the tasklet state to be cleared. This works on !RT nicely because the
|
||||
corresponding execution can only happen on a different CPU.
|
||||
|
||||
On RT softirq processing is preemptible, therefore a task preempting the
|
||||
softirq processing thread can spin forever.
|
||||
|
||||
Prevent this by invoking local_bh_disable()/enable() inside the loop. In
|
||||
case that the softirq processing thread was preempted by the current task,
|
||||
current will block on the local lock which yields the CPU to the preempted
|
||||
softirq processing thread. If the tasklet is processed on a different CPU
|
||||
then the local_bh_disable()/enable() pair is just a waste of processor
|
||||
cycles.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Tested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/interrupt.h | 12 ++----------
|
||||
kernel/softirq.c | 28 +++++++++++++++++++++++++++-
|
||||
2 files changed, 29 insertions(+), 11 deletions(-)
|
||||
|
||||
--- a/include/linux/interrupt.h
|
||||
+++ b/include/linux/interrupt.h
|
||||
@@ -663,7 +663,7 @@ enum
|
||||
TASKLET_STATE_RUN /* Tasklet is running (SMP only) */
|
||||
};
|
||||
|
||||
-#ifdef CONFIG_SMP
|
||||
+#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)
|
||||
static inline int tasklet_trylock(struct tasklet_struct *t)
|
||||
{
|
||||
return !test_and_set_bit(TASKLET_STATE_RUN, &(t)->state);
|
||||
@@ -671,16 +671,8 @@ static inline int tasklet_trylock(struct
|
||||
|
||||
void tasklet_unlock(struct tasklet_struct *t);
|
||||
void tasklet_unlock_wait(struct tasklet_struct *t);
|
||||
+void tasklet_unlock_spin_wait(struct tasklet_struct *t);
|
||||
|
||||
-/*
|
||||
- * Do not use in new code. Waiting for tasklets from atomic contexts is
|
||||
- * error prone and should be avoided.
|
||||
- */
|
||||
-static inline void tasklet_unlock_spin_wait(struct tasklet_struct *t)
|
||||
-{
|
||||
- while (test_bit(TASKLET_STATE_RUN, &t->state))
|
||||
- cpu_relax();
|
||||
-}
|
||||
#else
|
||||
static inline int tasklet_trylock(struct tasklet_struct *t) { return 1; }
|
||||
static inline void tasklet_unlock(struct tasklet_struct *t) { }
|
||||
--- a/kernel/softirq.c
|
||||
+++ b/kernel/softirq.c
|
||||
@@ -614,6 +614,32 @@ void tasklet_init(struct tasklet_struct
|
||||
}
|
||||
EXPORT_SYMBOL(tasklet_init);
|
||||
|
||||
+#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)
|
||||
+/*
|
||||
+ * Do not use in new code. Waiting for tasklets from atomic contexts is
|
||||
+ * error prone and should be avoided.
|
||||
+ */
|
||||
+void tasklet_unlock_spin_wait(struct tasklet_struct *t)
|
||||
+{
|
||||
+ while (test_bit(TASKLET_STATE_RUN, &(t)->state)) {
|
||||
+ if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
|
||||
+ /*
|
||||
+ * Prevent a live lock when current preempted soft
|
||||
+ * interrupt processing or prevents ksoftirqd from
|
||||
+ * running. If the tasklet runs on a different CPU
|
||||
+ * then this has no effect other than doing the BH
|
||||
+ * disable/enable dance for nothing.
|
||||
+ */
|
||||
+ local_bh_disable();
|
||||
+ local_bh_enable();
|
||||
+ } else {
|
||||
+ cpu_relax();
|
||||
+ }
|
||||
+ }
|
||||
+}
|
||||
+EXPORT_SYMBOL(tasklet_unlock_spin_wait);
|
||||
+#endif
|
||||
+
|
||||
void tasklet_kill(struct tasklet_struct *t)
|
||||
{
|
||||
if (in_interrupt())
|
||||
@@ -627,7 +653,7 @@ void tasklet_kill(struct tasklet_struct
|
||||
}
|
||||
EXPORT_SYMBOL(tasklet_kill);
|
||||
|
||||
-#ifdef CONFIG_SMP
|
||||
+#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)
|
||||
void tasklet_unlock(struct tasklet_struct *t)
|
||||
{
|
||||
smp_mb__before_atomic();
|
||||
@@ -0,0 +1,79 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Tue, 9 Mar 2021 09:42:11 +0100
|
||||
Subject: [PATCH 08/20] net: jme: Replace link-change tasklet with work
|
||||
|
||||
The link change tasklet disables the tasklets for tx/rx processing while
|
||||
upating hw parameters and then enables the tasklets again.
|
||||
|
||||
This update can also be pushed into a workqueue where it can be performed
|
||||
in preemptible context. This allows tasklet_disable() to become sleeping.
|
||||
|
||||
Replace the linkch_task tasklet with a work.
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
drivers/net/ethernet/jme.c | 10 +++++-----
|
||||
drivers/net/ethernet/jme.h | 2 +-
|
||||
2 files changed, 6 insertions(+), 6 deletions(-)
|
||||
|
||||
--- a/drivers/net/ethernet/jme.c
|
||||
+++ b/drivers/net/ethernet/jme.c
|
||||
@@ -1265,9 +1265,9 @@ jme_stop_shutdown_timer(struct jme_adapt
|
||||
jwrite32f(jme, JME_APMC, apmc);
|
||||
}
|
||||
|
||||
-static void jme_link_change_tasklet(struct tasklet_struct *t)
|
||||
+static void jme_link_change_work(struct work_struct *work)
|
||||
{
|
||||
- struct jme_adapter *jme = from_tasklet(jme, t, linkch_task);
|
||||
+ struct jme_adapter *jme = container_of(work, struct jme_adapter, linkch_task);
|
||||
struct net_device *netdev = jme->dev;
|
||||
int rc;
|
||||
|
||||
@@ -1510,7 +1510,7 @@ jme_intr_msi(struct jme_adapter *jme, u3
|
||||
* all other events are ignored
|
||||
*/
|
||||
jwrite32(jme, JME_IEVE, intrstat);
|
||||
- tasklet_schedule(&jme->linkch_task);
|
||||
+ schedule_work(&jme->linkch_task);
|
||||
goto out_reenable;
|
||||
}
|
||||
|
||||
@@ -1832,7 +1832,6 @@ jme_open(struct net_device *netdev)
|
||||
jme_clear_pm_disable_wol(jme);
|
||||
JME_NAPI_ENABLE(jme);
|
||||
|
||||
- tasklet_setup(&jme->linkch_task, jme_link_change_tasklet);
|
||||
tasklet_setup(&jme->txclean_task, jme_tx_clean_tasklet);
|
||||
tasklet_setup(&jme->rxclean_task, jme_rx_clean_tasklet);
|
||||
tasklet_setup(&jme->rxempty_task, jme_rx_empty_tasklet);
|
||||
@@ -1920,7 +1919,7 @@ jme_close(struct net_device *netdev)
|
||||
|
||||
JME_NAPI_DISABLE(jme);
|
||||
|
||||
- tasklet_kill(&jme->linkch_task);
|
||||
+ cancel_work_sync(&jme->linkch_task);
|
||||
tasklet_kill(&jme->txclean_task);
|
||||
tasklet_kill(&jme->rxclean_task);
|
||||
tasklet_kill(&jme->rxempty_task);
|
||||
@@ -3035,6 +3034,7 @@ jme_init_one(struct pci_dev *pdev,
|
||||
atomic_set(&jme->rx_empty, 1);
|
||||
|
||||
tasklet_setup(&jme->pcc_task, jme_pcc_tasklet);
|
||||
+ INIT_WORK(&jme->linkch_task, jme_link_change_work);
|
||||
jme->dpi.cur = PCC_P1;
|
||||
|
||||
jme->reg_ghc = 0;
|
||||
--- a/drivers/net/ethernet/jme.h
|
||||
+++ b/drivers/net/ethernet/jme.h
|
||||
@@ -411,7 +411,7 @@ struct jme_adapter {
|
||||
struct tasklet_struct rxempty_task;
|
||||
struct tasklet_struct rxclean_task;
|
||||
struct tasklet_struct txclean_task;
|
||||
- struct tasklet_struct linkch_task;
|
||||
+ struct work_struct linkch_task;
|
||||
struct tasklet_struct pcc_task;
|
||||
unsigned long flags;
|
||||
u32 reg_txcs;
|
||||
@@ -0,0 +1,32 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Tue, 9 Mar 2021 09:42:12 +0100
|
||||
Subject: [PATCH 09/20] net: sundance: Use tasklet_disable_in_atomic().
|
||||
|
||||
tasklet_disable() is used in the timer callback. This might be distangled,
|
||||
but without access to the hardware that's a bit risky.
|
||||
|
||||
Replace it with tasklet_disable_in_atomic() so tasklet_disable() can be
|
||||
changed to a sleep wait once all remaining atomic users are converted.
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: Denis Kirjanov <kda@linux-powerpc.org>
|
||||
Cc: "David S. Miller" <davem@davemloft.net>
|
||||
Cc: Jakub Kicinski <kuba@kernel.org>
|
||||
Cc: netdev@vger.kernel.org
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
drivers/net/ethernet/dlink/sundance.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/drivers/net/ethernet/dlink/sundance.c
|
||||
+++ b/drivers/net/ethernet/dlink/sundance.c
|
||||
@@ -963,7 +963,7 @@ static void tx_timeout(struct net_device
|
||||
unsigned long flag;
|
||||
|
||||
netif_stop_queue(dev);
|
||||
- tasklet_disable(&np->tx_tasklet);
|
||||
+ tasklet_disable_in_atomic(&np->tx_tasklet);
|
||||
iowrite16(0, ioaddr + IntrEnable);
|
||||
printk(KERN_WARNING "%s: Transmit timed out, TxStatus %2.2x "
|
||||
"TxFrameId %2.2x,"
|
||||
@@ -0,0 +1,41 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Tue, 9 Mar 2021 09:42:13 +0100
|
||||
Subject: [PATCH 10/20] ath9k: Use tasklet_disable_in_atomic()
|
||||
|
||||
All callers of ath9k_beacon_ensure_primary_slot() are preemptible /
|
||||
acquire a mutex except for this callchain:
|
||||
|
||||
spin_lock_bh(&sc->sc_pcu_lock);
|
||||
ath_complete_reset()
|
||||
-> ath9k_calculate_summary_state()
|
||||
-> ath9k_beacon_ensure_primary_slot()
|
||||
|
||||
It's unclear how that can be distangled, so use tasklet_disable_in_atomic()
|
||||
for now. This allows tasklet_disable() to become sleepable once the
|
||||
remaining atomic users are cleaned up.
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: ath9k-devel@qca.qualcomm.com
|
||||
Cc: Kalle Valo <kvalo@codeaurora.org>
|
||||
Cc: "David S. Miller" <davem@davemloft.net>
|
||||
Cc: Jakub Kicinski <kuba@kernel.org>
|
||||
Cc: linux-wireless@vger.kernel.org
|
||||
Cc: netdev@vger.kernel.org
|
||||
Acked-by: Kalle Valo <kvalo@codeaurora.org>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
drivers/net/wireless/ath/ath9k/beacon.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/drivers/net/wireless/ath/ath9k/beacon.c
|
||||
+++ b/drivers/net/wireless/ath/ath9k/beacon.c
|
||||
@@ -251,7 +251,7 @@ void ath9k_beacon_ensure_primary_slot(st
|
||||
int first_slot = ATH_BCBUF;
|
||||
int slot;
|
||||
|
||||
- tasklet_disable(&sc->bcon_tasklet);
|
||||
+ tasklet_disable_in_atomic(&sc->bcon_tasklet);
|
||||
|
||||
/* Find first taken slot. */
|
||||
for (slot = 0; slot < ATH_BCBUF; slot++) {
|
||||
@@ -0,0 +1,35 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Tue, 9 Mar 2021 09:42:14 +0100
|
||||
Subject: [PATCH 11/20] atm: eni: Use tasklet_disable_in_atomic() in the send()
|
||||
callback
|
||||
|
||||
The atmdev_ops::send callback which calls tasklet_disable() is invoked with
|
||||
bottom halfs disabled from net_device_ops::ndo_start_xmit(). All other
|
||||
invocations of tasklet_disable() in this driver happen in preemptible
|
||||
context.
|
||||
|
||||
Change the send() call to use tasklet_disable_in_atomic() which allows
|
||||
tasklet_disable() to be made sleepable once the remaining atomic context
|
||||
usage sites are cleaned up.
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: Chas Williams <3chas3@gmail.com>
|
||||
Cc: linux-atm-general@lists.sourceforge.net
|
||||
Cc: netdev@vger.kernel.org
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
drivers/atm/eni.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/drivers/atm/eni.c
|
||||
+++ b/drivers/atm/eni.c
|
||||
@@ -2054,7 +2054,7 @@ static int eni_send(struct atm_vcc *vcc,
|
||||
}
|
||||
submitted++;
|
||||
ATM_SKB(skb)->vcc = vcc;
|
||||
- tasklet_disable(&ENI_DEV(vcc->dev)->task);
|
||||
+ tasklet_disable_in_atomic(&ENI_DEV(vcc->dev)->task);
|
||||
res = do_tx(skb);
|
||||
tasklet_enable(&ENI_DEV(vcc->dev)->task);
|
||||
if (res == enq_ok) return 0;
|
||||
@@ -0,0 +1,39 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Tue, 9 Mar 2021 09:42:15 +0100
|
||||
Subject: [PATCH 12/20] PCI: hv: Use tasklet_disable_in_atomic()
|
||||
|
||||
The hv_compose_msi_msg() callback in irq_chip::irq_compose_msi_msg is
|
||||
invoked via irq_chip_compose_msi_msg(), which itself is always invoked from
|
||||
atomic contexts from the guts of the interrupt core code.
|
||||
|
||||
There is no way to change this w/o rewriting the whole driver, so use
|
||||
tasklet_disable_in_atomic() which allows to make tasklet_disable()
|
||||
sleepable once the remaining atomic users are addressed.
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
|
||||
Cc: Haiyang Zhang <haiyangz@microsoft.com>
|
||||
Cc: Stephen Hemminger <sthemmin@microsoft.com>
|
||||
Cc: Wei Liu <wei.liu@kernel.org>
|
||||
Cc: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
|
||||
Cc: Rob Herring <robh@kernel.org>
|
||||
Cc: Bjorn Helgaas <bhelgaas@google.com>
|
||||
Cc: linux-hyperv@vger.kernel.org
|
||||
Cc: linux-pci@vger.kernel.org
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
drivers/pci/controller/pci-hyperv.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/drivers/pci/controller/pci-hyperv.c
|
||||
+++ b/drivers/pci/controller/pci-hyperv.c
|
||||
@@ -1458,7 +1458,7 @@ static void hv_compose_msi_msg(struct ir
|
||||
* Prevents hv_pci_onchannelcallback() from running concurrently
|
||||
* in the tasklet.
|
||||
*/
|
||||
- tasklet_disable(&channel->callback_event);
|
||||
+ tasklet_disable_in_atomic(&channel->callback_event);
|
||||
|
||||
/*
|
||||
* Since this function is called with IRQ locks held, can't
|
||||
@@ -0,0 +1,54 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Tue, 9 Mar 2021 09:42:16 +0100
|
||||
Subject: [PATCH 13/20] firewire: ohci: Use tasklet_disable_in_atomic() where
|
||||
required
|
||||
|
||||
tasklet_disable() is invoked in several places. Some of them are in atomic
|
||||
context which prevents a conversion of tasklet_disable() to a sleepable
|
||||
function.
|
||||
|
||||
The atomic callchains are:
|
||||
|
||||
ar_context_tasklet()
|
||||
ohci_cancel_packet()
|
||||
tasklet_disable()
|
||||
|
||||
...
|
||||
ohci_flush_iso_completions()
|
||||
tasklet_disable()
|
||||
|
||||
The invocation of tasklet_disable() from at_context_flush() is always in
|
||||
preemptible context.
|
||||
|
||||
Use tasklet_disable_in_atomic() for the two invocations in
|
||||
ohci_cancel_packet() and ohci_flush_iso_completions().
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: Stefan Richter <stefanr@s5r6.in-berlin.de>
|
||||
Cc: linux1394-devel@lists.sourceforge.net
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
drivers/firewire/ohci.c | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
--- a/drivers/firewire/ohci.c
|
||||
+++ b/drivers/firewire/ohci.c
|
||||
@@ -2545,7 +2545,7 @@ static int ohci_cancel_packet(struct fw_
|
||||
struct driver_data *driver_data = packet->driver_data;
|
||||
int ret = -ENOENT;
|
||||
|
||||
- tasklet_disable(&ctx->tasklet);
|
||||
+ tasklet_disable_in_atomic(&ctx->tasklet);
|
||||
|
||||
if (packet->ack != 0)
|
||||
goto out;
|
||||
@@ -3465,7 +3465,7 @@ static int ohci_flush_iso_completions(st
|
||||
struct iso_context *ctx = container_of(base, struct iso_context, base);
|
||||
int ret = 0;
|
||||
|
||||
- tasklet_disable(&ctx->context.tasklet);
|
||||
+ tasklet_disable_in_atomic(&ctx->context.tasklet);
|
||||
|
||||
if (!test_and_set_bit_lock(0, &ctx->flushing_completions)) {
|
||||
context_tasklet((unsigned long)&ctx->context);
|
||||
@@ -0,0 +1,28 @@
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Tue, 9 Mar 2021 09:42:17 +0100
|
||||
Subject: [PATCH 14/20] tasklets: Switch tasklet_disable() to the sleep wait
|
||||
variant
|
||||
|
||||
-- NOT FOR IMMEDIATE MERGING --
|
||||
|
||||
Now that all users of tasklet_disable() are invoked from sleepable context,
|
||||
convert it to use tasklet_unlock_wait() which might sleep.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/interrupt.h | 3 +--
|
||||
1 file changed, 1 insertion(+), 2 deletions(-)
|
||||
|
||||
--- a/include/linux/interrupt.h
|
||||
+++ b/include/linux/interrupt.h
|
||||
@@ -716,8 +716,7 @@ static inline void tasklet_disable_in_at
|
||||
static inline void tasklet_disable(struct tasklet_struct *t)
|
||||
{
|
||||
tasklet_disable_nosync(t);
|
||||
- /* Spin wait until all atomic users are converted */
|
||||
- tasklet_unlock_spin_wait(t);
|
||||
+ tasklet_unlock_wait(t);
|
||||
smp_mb();
|
||||
}
|
||||
|
||||
@@ -0,0 +1,64 @@
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Tue, 9 Mar 2021 09:55:53 +0100
|
||||
Subject: [PATCH 15/20] softirq: Add RT specific softirq accounting
|
||||
|
||||
RT requires the softirq processing and local bottomhalf disabled regions to
|
||||
be preemptible. Using the normal preempt count based serialization is
|
||||
therefore not possible because this implicitely disables preemption.
|
||||
|
||||
RT kernels use a per CPU local lock to serialize bottomhalfs. As
|
||||
local_bh_disable() can nest the lock can only be acquired on the outermost
|
||||
invocation of local_bh_disable() and released when the nest count becomes
|
||||
zero. Tasks which hold the local lock can be preempted so its required to
|
||||
keep track of the nest count per task.
|
||||
|
||||
Add a RT only counter to task struct and adjust the relevant macros in
|
||||
preempt.h.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Tested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/hardirq.h | 1 +
|
||||
include/linux/preempt.h | 6 +++++-
|
||||
include/linux/sched.h | 3 +++
|
||||
3 files changed, 9 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/include/linux/hardirq.h
|
||||
+++ b/include/linux/hardirq.h
|
||||
@@ -6,6 +6,7 @@
|
||||
#include <linux/preempt.h>
|
||||
#include <linux/lockdep.h>
|
||||
#include <linux/ftrace_irq.h>
|
||||
+#include <linux/sched.h>
|
||||
#include <linux/vtime.h>
|
||||
#include <asm/hardirq.h>
|
||||
|
||||
--- a/include/linux/preempt.h
|
||||
+++ b/include/linux/preempt.h
|
||||
@@ -79,7 +79,11 @@
|
||||
|
||||
#define nmi_count() (preempt_count() & NMI_MASK)
|
||||
#define hardirq_count() (preempt_count() & HARDIRQ_MASK)
|
||||
-#define softirq_count() (preempt_count() & SOFTIRQ_MASK)
|
||||
+#ifdef CONFIG_PREEMPT_RT
|
||||
+# define softirq_count() (current->softirq_disable_cnt & SOFTIRQ_MASK)
|
||||
+#else
|
||||
+# define softirq_count() (preempt_count() & SOFTIRQ_MASK)
|
||||
+#endif
|
||||
#define irq_count() (nmi_count() | hardirq_count() | softirq_count())
|
||||
|
||||
/*
|
||||
--- a/include/linux/sched.h
|
||||
+++ b/include/linux/sched.h
|
||||
@@ -1040,6 +1040,9 @@ struct task_struct {
|
||||
int softirq_context;
|
||||
int irq_config;
|
||||
#endif
|
||||
+#ifdef CONFIG_PREEMPT_RT
|
||||
+ int softirq_disable_cnt;
|
||||
+#endif
|
||||
|
||||
#ifdef CONFIG_LOCKDEP
|
||||
# define MAX_LOCK_DEPTH 48UL
|
||||
@@ -0,0 +1,47 @@
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Tue, 9 Mar 2021 09:55:54 +0100
|
||||
Subject: [PATCH 16/20] irqtime: Make accounting correct on RT
|
||||
|
||||
vtime_account_irq and irqtime_account_irq() base checks on preempt_count()
|
||||
which fails on RT because preempt_count() does not contain the softirq
|
||||
accounting which is seperate on RT.
|
||||
|
||||
These checks do not need the full preempt count as they only operate on the
|
||||
hard and softirq sections.
|
||||
|
||||
Use irq_count() instead which provides the correct value on both RT and non
|
||||
RT kernels. The compiler is clever enough to fold the masking for !RT:
|
||||
|
||||
99b: 65 8b 05 00 00 00 00 mov %gs:0x0(%rip),%eax
|
||||
- 9a2: 25 ff ff ff 7f and $0x7fffffff,%eax
|
||||
+ 9a2: 25 00 ff ff 00 and $0xffff00,%eax
|
||||
|
||||
Reported-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Tested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
kernel/sched/cputime.c | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
--- a/kernel/sched/cputime.c
|
||||
+++ b/kernel/sched/cputime.c
|
||||
@@ -60,7 +60,7 @@ void irqtime_account_irq(struct task_str
|
||||
cpu = smp_processor_id();
|
||||
delta = sched_clock_cpu(cpu) - irqtime->irq_start_time;
|
||||
irqtime->irq_start_time += delta;
|
||||
- pc = preempt_count() - offset;
|
||||
+ pc = irq_count() - offset;
|
||||
|
||||
/*
|
||||
* We do not account for softirq time from ksoftirqd here.
|
||||
@@ -421,7 +421,7 @@ void vtime_task_switch(struct task_struc
|
||||
|
||||
void vtime_account_irq(struct task_struct *tsk, unsigned int offset)
|
||||
{
|
||||
- unsigned int pc = preempt_count() - offset;
|
||||
+ unsigned int pc = irq_count() - offset;
|
||||
|
||||
if (pc & HARDIRQ_OFFSET) {
|
||||
vtime_account_hardirq(tsk);
|
||||
@@ -0,0 +1,101 @@
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Tue, 9 Mar 2021 09:55:55 +0100
|
||||
Subject: [PATCH 17/20] softirq: Move various protections into inline helpers
|
||||
|
||||
To allow reuse of the bulk of softirq processing code for RT and to avoid
|
||||
#ifdeffery all over the place, split protections for various code sections
|
||||
out into inline helpers so the RT variant can just replace them in one go.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Tested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
kernel/softirq.c | 39 ++++++++++++++++++++++++++++++++-------
|
||||
1 file changed, 32 insertions(+), 7 deletions(-)
|
||||
|
||||
--- a/kernel/softirq.c
|
||||
+++ b/kernel/softirq.c
|
||||
@@ -205,6 +205,32 @@ void __local_bh_enable_ip(unsigned long
|
||||
}
|
||||
EXPORT_SYMBOL(__local_bh_enable_ip);
|
||||
|
||||
+static inline void softirq_handle_begin(void)
|
||||
+{
|
||||
+ __local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET);
|
||||
+}
|
||||
+
|
||||
+static inline void softirq_handle_end(void)
|
||||
+{
|
||||
+ __local_bh_enable(SOFTIRQ_OFFSET);
|
||||
+ WARN_ON_ONCE(in_interrupt());
|
||||
+}
|
||||
+
|
||||
+static inline void ksoftirqd_run_begin(void)
|
||||
+{
|
||||
+ local_irq_disable();
|
||||
+}
|
||||
+
|
||||
+static inline void ksoftirqd_run_end(void)
|
||||
+{
|
||||
+ local_irq_enable();
|
||||
+}
|
||||
+
|
||||
+static inline bool should_wake_ksoftirqd(void)
|
||||
+{
|
||||
+ return true;
|
||||
+}
|
||||
+
|
||||
static inline void invoke_softirq(void)
|
||||
{
|
||||
if (ksoftirqd_running(local_softirq_pending()))
|
||||
@@ -317,7 +343,7 @@ asmlinkage __visible void __softirq_entr
|
||||
|
||||
pending = local_softirq_pending();
|
||||
|
||||
- __local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET);
|
||||
+ softirq_handle_begin();
|
||||
in_hardirq = lockdep_softirq_start();
|
||||
account_softirq_enter(current);
|
||||
|
||||
@@ -368,8 +394,7 @@ asmlinkage __visible void __softirq_entr
|
||||
|
||||
account_softirq_exit(current);
|
||||
lockdep_softirq_end(in_hardirq);
|
||||
- __local_bh_enable(SOFTIRQ_OFFSET);
|
||||
- WARN_ON_ONCE(in_interrupt());
|
||||
+ softirq_handle_end();
|
||||
current_restore_flags(old_flags, PF_MEMALLOC);
|
||||
}
|
||||
|
||||
@@ -464,7 +489,7 @@ inline void raise_softirq_irqoff(unsigne
|
||||
* Otherwise we wake up ksoftirqd to make sure we
|
||||
* schedule the softirq soon.
|
||||
*/
|
||||
- if (!in_interrupt())
|
||||
+ if (!in_interrupt() && should_wake_ksoftirqd())
|
||||
wakeup_softirqd();
|
||||
}
|
||||
|
||||
@@ -692,18 +717,18 @@ static int ksoftirqd_should_run(unsigned
|
||||
|
||||
static void run_ksoftirqd(unsigned int cpu)
|
||||
{
|
||||
- local_irq_disable();
|
||||
+ ksoftirqd_run_begin();
|
||||
if (local_softirq_pending()) {
|
||||
/*
|
||||
* We can safely run softirq on inline stack, as we are not deep
|
||||
* in the task stack here.
|
||||
*/
|
||||
__do_softirq();
|
||||
- local_irq_enable();
|
||||
+ ksoftirqd_run_end();
|
||||
cond_resched();
|
||||
return;
|
||||
}
|
||||
- local_irq_enable();
|
||||
+ ksoftirqd_run_end();
|
||||
}
|
||||
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
@@ -0,0 +1,258 @@
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Tue, 9 Mar 2021 09:55:56 +0100
|
||||
Subject: [PATCH 18/20] softirq: Make softirq control and processing RT aware
|
||||
|
||||
Provide a local lock based serialization for soft interrupts on RT which
|
||||
allows the local_bh_disabled() sections and servicing soft interrupts to be
|
||||
preemptible.
|
||||
|
||||
Provide the necessary inline helpers which allow to reuse the bulk of the
|
||||
softirq processing code.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Tested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/bottom_half.h | 2
|
||||
kernel/softirq.c | 188 ++++++++++++++++++++++++++++++++++++++++++--
|
||||
2 files changed, 182 insertions(+), 8 deletions(-)
|
||||
|
||||
--- a/include/linux/bottom_half.h
|
||||
+++ b/include/linux/bottom_half.h
|
||||
@@ -4,7 +4,7 @@
|
||||
|
||||
#include <linux/preempt.h>
|
||||
|
||||
-#ifdef CONFIG_TRACE_IRQFLAGS
|
||||
+#if defined(CONFIG_PREEMPT_RT) || defined(CONFIG_TRACE_IRQFLAGS)
|
||||
extern void __local_bh_disable_ip(unsigned long ip, unsigned int cnt);
|
||||
#else
|
||||
static __always_inline void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
|
||||
--- a/kernel/softirq.c
|
||||
+++ b/kernel/softirq.c
|
||||
@@ -13,6 +13,7 @@
|
||||
#include <linux/kernel_stat.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/init.h>
|
||||
+#include <linux/local_lock.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/notifier.h>
|
||||
#include <linux/percpu.h>
|
||||
@@ -101,20 +102,189 @@ EXPORT_PER_CPU_SYMBOL_GPL(hardirq_contex
|
||||
#endif
|
||||
|
||||
/*
|
||||
- * preempt_count and SOFTIRQ_OFFSET usage:
|
||||
- * - preempt_count is changed by SOFTIRQ_OFFSET on entering or leaving
|
||||
- * softirq processing.
|
||||
- * - preempt_count is changed by SOFTIRQ_DISABLE_OFFSET (= 2 * SOFTIRQ_OFFSET)
|
||||
+ * SOFTIRQ_OFFSET usage:
|
||||
+ *
|
||||
+ * On !RT kernels 'count' is the preempt counter, on RT kernels this applies
|
||||
+ * to a per CPU counter and to task::softirqs_disabled_cnt.
|
||||
+ *
|
||||
+ * - count is changed by SOFTIRQ_OFFSET on entering or leaving softirq
|
||||
+ * processing.
|
||||
+ *
|
||||
+ * - count is changed by SOFTIRQ_DISABLE_OFFSET (= 2 * SOFTIRQ_OFFSET)
|
||||
* on local_bh_disable or local_bh_enable.
|
||||
+ *
|
||||
* This lets us distinguish between whether we are currently processing
|
||||
* softirq and whether we just have bh disabled.
|
||||
*/
|
||||
+#ifdef CONFIG_PREEMPT_RT
|
||||
+
|
||||
+/*
|
||||
+ * RT accounts for BH disabled sections in task::softirqs_disabled_cnt and
|
||||
+ * also in per CPU softirq_ctrl::cnt. This is necessary to allow tasks in a
|
||||
+ * softirq disabled section to be preempted.
|
||||
+ *
|
||||
+ * The per task counter is used for softirq_count(), in_softirq() and
|
||||
+ * in_serving_softirqs() because these counts are only valid when the task
|
||||
+ * holding softirq_ctrl::lock is running.
|
||||
+ *
|
||||
+ * The per CPU counter prevents pointless wakeups of ksoftirqd in case that
|
||||
+ * the task which is in a softirq disabled section is preempted or blocks.
|
||||
+ */
|
||||
+struct softirq_ctrl {
|
||||
+ local_lock_t lock;
|
||||
+ int cnt;
|
||||
+};
|
||||
+
|
||||
+static DEFINE_PER_CPU(struct softirq_ctrl, softirq_ctrl) = {
|
||||
+ .lock = INIT_LOCAL_LOCK(softirq_ctrl.lock),
|
||||
+};
|
||||
+
|
||||
+void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
|
||||
+{
|
||||
+ unsigned long flags;
|
||||
+ int newcnt;
|
||||
+
|
||||
+ WARN_ON_ONCE(in_hardirq());
|
||||
+
|
||||
+ /* First entry of a task into a BH disabled section? */
|
||||
+ if (!current->softirq_disable_cnt) {
|
||||
+ if (preemptible()) {
|
||||
+ local_lock(&softirq_ctrl.lock);
|
||||
+ /* Required to meet the RCU bottomhalf requirements. */
|
||||
+ rcu_read_lock();
|
||||
+ } else {
|
||||
+ DEBUG_LOCKS_WARN_ON(this_cpu_read(softirq_ctrl.cnt));
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ /*
|
||||
+ * Track the per CPU softirq disabled state. On RT this is per CPU
|
||||
+ * state to allow preemption of bottom half disabled sections.
|
||||
+ */
|
||||
+ newcnt = __this_cpu_add_return(softirq_ctrl.cnt, cnt);
|
||||
+ /*
|
||||
+ * Reflect the result in the task state to prevent recursion on the
|
||||
+ * local lock and to make softirq_count() & al work.
|
||||
+ */
|
||||
+ current->softirq_disable_cnt = newcnt;
|
||||
+
|
||||
+ if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS) && newcnt == cnt) {
|
||||
+ raw_local_irq_save(flags);
|
||||
+ lockdep_softirqs_off(ip);
|
||||
+ raw_local_irq_restore(flags);
|
||||
+ }
|
||||
+}
|
||||
+EXPORT_SYMBOL(__local_bh_disable_ip);
|
||||
+
|
||||
+static void __local_bh_enable(unsigned int cnt, bool unlock)
|
||||
+{
|
||||
+ unsigned long flags;
|
||||
+ int newcnt;
|
||||
+
|
||||
+ DEBUG_LOCKS_WARN_ON(current->softirq_disable_cnt !=
|
||||
+ this_cpu_read(softirq_ctrl.cnt));
|
||||
+
|
||||
+ if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS) && softirq_count() == cnt) {
|
||||
+ raw_local_irq_save(flags);
|
||||
+ lockdep_softirqs_on(_RET_IP_);
|
||||
+ raw_local_irq_restore(flags);
|
||||
+ }
|
||||
+
|
||||
+ newcnt = __this_cpu_sub_return(softirq_ctrl.cnt, cnt);
|
||||
+ current->softirq_disable_cnt = newcnt;
|
||||
+
|
||||
+ if (!newcnt && unlock) {
|
||||
+ rcu_read_unlock();
|
||||
+ local_unlock(&softirq_ctrl.lock);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+void __local_bh_enable_ip(unsigned long ip, unsigned int cnt)
|
||||
+{
|
||||
+ bool preempt_on = preemptible();
|
||||
+ unsigned long flags;
|
||||
+ u32 pending;
|
||||
+ int curcnt;
|
||||
+
|
||||
+ WARN_ON_ONCE(in_irq());
|
||||
+ lockdep_assert_irqs_enabled();
|
||||
+
|
||||
+ local_irq_save(flags);
|
||||
+ curcnt = __this_cpu_read(softirq_ctrl.cnt);
|
||||
+
|
||||
+ /*
|
||||
+ * If this is not reenabling soft interrupts, no point in trying to
|
||||
+ * run pending ones.
|
||||
+ */
|
||||
+ if (curcnt != cnt)
|
||||
+ goto out;
|
||||
+
|
||||
+ pending = local_softirq_pending();
|
||||
+ if (!pending || ksoftirqd_running(pending))
|
||||
+ goto out;
|
||||
+
|
||||
+ /*
|
||||
+ * If this was called from non preemptible context, wake up the
|
||||
+ * softirq daemon.
|
||||
+ */
|
||||
+ if (!preempt_on) {
|
||||
+ wakeup_softirqd();
|
||||
+ goto out;
|
||||
+ }
|
||||
+
|
||||
+ /*
|
||||
+ * Adjust softirq count to SOFTIRQ_OFFSET which makes
|
||||
+ * in_serving_softirq() become true.
|
||||
+ */
|
||||
+ cnt = SOFTIRQ_OFFSET;
|
||||
+ __local_bh_enable(cnt, false);
|
||||
+ __do_softirq();
|
||||
+
|
||||
+out:
|
||||
+ __local_bh_enable(cnt, preempt_on);
|
||||
+ local_irq_restore(flags);
|
||||
+}
|
||||
+EXPORT_SYMBOL(__local_bh_enable_ip);
|
||||
+
|
||||
+/*
|
||||
+ * Invoked from ksoftirqd_run() outside of the interrupt disabled section
|
||||
+ * to acquire the per CPU local lock for reentrancy protection.
|
||||
+ */
|
||||
+static inline void ksoftirqd_run_begin(void)
|
||||
+{
|
||||
+ __local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET);
|
||||
+ local_irq_disable();
|
||||
+}
|
||||
+
|
||||
+/* Counterpart to ksoftirqd_run_begin() */
|
||||
+static inline void ksoftirqd_run_end(void)
|
||||
+{
|
||||
+ __local_bh_enable(SOFTIRQ_OFFSET, true);
|
||||
+ WARN_ON_ONCE(in_interrupt());
|
||||
+ local_irq_enable();
|
||||
+}
|
||||
+
|
||||
+static inline void softirq_handle_begin(void) { }
|
||||
+static inline void softirq_handle_end(void) { }
|
||||
+
|
||||
+static inline bool should_wake_ksoftirqd(void)
|
||||
+{
|
||||
+ return !this_cpu_read(softirq_ctrl.cnt);
|
||||
+}
|
||||
+
|
||||
+static inline void invoke_softirq(void)
|
||||
+{
|
||||
+ if (should_wake_ksoftirqd())
|
||||
+ wakeup_softirqd();
|
||||
+}
|
||||
+
|
||||
+#else /* CONFIG_PREEMPT_RT */
|
||||
|
||||
-#ifdef CONFIG_TRACE_IRQFLAGS
|
||||
/*
|
||||
- * This is for softirq.c-internal use, where hardirqs are disabled
|
||||
+ * This one is for softirq.c-internal use, where hardirqs are disabled
|
||||
* legitimately:
|
||||
*/
|
||||
+#ifdef CONFIG_TRACE_IRQFLAGS
|
||||
void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
|
||||
{
|
||||
unsigned long flags;
|
||||
@@ -275,6 +445,8 @@ asmlinkage __visible void do_softirq(voi
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
+#endif /* !CONFIG_PREEMPT_RT */
|
||||
+
|
||||
/*
|
||||
* We restart softirq processing for at most MAX_SOFTIRQ_RESTART times,
|
||||
* but break the loop if need_resched() is set or after 2 ms.
|
||||
@@ -379,8 +551,10 @@ asmlinkage __visible void __softirq_entr
|
||||
pending >>= softirq_bit;
|
||||
}
|
||||
|
||||
- if (__this_cpu_read(ksoftirqd) == current)
|
||||
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT) &&
|
||||
+ __this_cpu_read(ksoftirqd) == current)
|
||||
rcu_softirq_qs();
|
||||
+
|
||||
local_irq_disable();
|
||||
|
||||
pending = local_softirq_pending();
|
||||
@@ -0,0 +1,73 @@
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Tue, 9 Mar 2021 09:55:57 +0100
|
||||
Subject: [PATCH 19/20] tick/sched: Prevent false positive softirq pending
|
||||
warnings on RT
|
||||
|
||||
On RT a task which has soft interrupts disabled can block on a lock and
|
||||
schedule out to idle while soft interrupts are pending. This triggers the
|
||||
warning in the NOHZ idle code which complains about going idle with pending
|
||||
soft interrupts. But as the task is blocked soft interrupt processing is
|
||||
temporarily blocked as well which means that such a warning is a false
|
||||
positive.
|
||||
|
||||
To prevent that check the per CPU state which indicates that a scheduled
|
||||
out task has soft interrupts disabled.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Tested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/bottom_half.h | 6 ++++++
|
||||
kernel/softirq.c | 15 +++++++++++++++
|
||||
kernel/time/tick-sched.c | 2 +-
|
||||
3 files changed, 22 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/include/linux/bottom_half.h
|
||||
+++ b/include/linux/bottom_half.h
|
||||
@@ -32,4 +32,10 @@ static inline void local_bh_enable(void)
|
||||
__local_bh_enable_ip(_THIS_IP_, SOFTIRQ_DISABLE_OFFSET);
|
||||
}
|
||||
|
||||
+#ifdef CONFIG_PREEMPT_RT
|
||||
+extern bool local_bh_blocked(void);
|
||||
+#else
|
||||
+static inline bool local_bh_blocked(void) { return false; }
|
||||
+#endif
|
||||
+
|
||||
#endif /* _LINUX_BH_H */
|
||||
--- a/kernel/softirq.c
|
||||
+++ b/kernel/softirq.c
|
||||
@@ -139,6 +139,21 @@ static DEFINE_PER_CPU(struct softirq_ctr
|
||||
.lock = INIT_LOCAL_LOCK(softirq_ctrl.lock),
|
||||
};
|
||||
|
||||
+/**
|
||||
+ * local_bh_blocked() - Check for idle whether BH processing is blocked
|
||||
+ *
|
||||
+ * Returns false if the per CPU softirq::cnt is 0 otherwise true.
|
||||
+ *
|
||||
+ * This is invoked from the idle task to guard against false positive
|
||||
+ * softirq pending warnings, which would happen when the task which holds
|
||||
+ * softirq_ctrl::lock was the only running task on the CPU and blocks on
|
||||
+ * some other lock.
|
||||
+ */
|
||||
+bool local_bh_blocked(void)
|
||||
+{
|
||||
+ return __this_cpu_read(softirq_ctrl.cnt) != 0;
|
||||
+}
|
||||
+
|
||||
void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
|
||||
{
|
||||
unsigned long flags;
|
||||
--- a/kernel/time/tick-sched.c
|
||||
+++ b/kernel/time/tick-sched.c
|
||||
@@ -973,7 +973,7 @@ static bool can_stop_idle_tick(int cpu,
|
||||
if (unlikely(local_softirq_pending())) {
|
||||
static int ratelimit;
|
||||
|
||||
- if (ratelimit < 10 &&
|
||||
+ if (ratelimit < 10 && !local_bh_blocked() &&
|
||||
(local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) {
|
||||
pr_warn("NOHZ tick-stop error: Non-RCU local softirq work is pending, handler #%02x!!!\n",
|
||||
(unsigned int) local_softirq_pending());
|
||||
@@ -0,0 +1,28 @@
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Tue, 9 Mar 2021 09:55:58 +0100
|
||||
Subject: [PATCH 20/20] rcu: Prevent false positive softirq warning on RT
|
||||
|
||||
Soft interrupt disabled sections can legitimately be preempted or schedule
|
||||
out when blocking on a lock on RT enabled kernels so the RCU preempt check
|
||||
warning has to be disabled for RT kernels.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Tested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Reviewed-by: Paul E. McKenney <paulmck@kernel.org>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/rcupdate.h | 3 ++-
|
||||
1 file changed, 2 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/include/linux/rcupdate.h
|
||||
+++ b/include/linux/rcupdate.h
|
||||
@@ -328,7 +328,8 @@ static inline void rcu_preempt_sleep_che
|
||||
#define rcu_sleep_check() \
|
||||
do { \
|
||||
rcu_preempt_sleep_check(); \
|
||||
- RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map), \
|
||||
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) \
|
||||
+ RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map), \
|
||||
"Illegal context switch in RCU-bh read-side critical section"); \
|
||||
RCU_LOCKDEP_WARN(lock_is_held(&rcu_sched_lock_map), \
|
||||
"Illegal context switch in RCU-sched read-side critical section"); \
|
||||
@@ -0,0 +1,256 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Tue, 2 Feb 2021 18:01:03 +0100
|
||||
Subject: [PATCH 1/2] chelsio: cxgb: Replace the workqueue with threaded
|
||||
interrupt
|
||||
|
||||
The external interrupt (F_PL_INTR_EXT) needs to be handled in a process
|
||||
context and this is accomplished by utilizing a workqueue.
|
||||
|
||||
The process context can also be provided by a threaded interrupt instead
|
||||
of a workqueue. The threaded interrupt can be used later for other
|
||||
interrupt related processing which require non-atomic context without
|
||||
using yet another workqueue. free_irq() also ensures that the thread is
|
||||
done which is currently missing (the worker could continue after the
|
||||
module has been removed).
|
||||
|
||||
Save pending flags in pending_thread_intr. Use the same mechanism
|
||||
to disable F_PL_INTR_EXT as interrupt source like it is used before the
|
||||
worker is scheduled. Enable the interrupt again once
|
||||
t1_elmer0_ext_intr_handler() is done.
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
drivers/net/ethernet/chelsio/cxgb/common.h | 5 +--
|
||||
drivers/net/ethernet/chelsio/cxgb/cxgb2.c | 44 ++---------------------------
|
||||
drivers/net/ethernet/chelsio/cxgb/sge.c | 33 +++++++++++++++++++--
|
||||
drivers/net/ethernet/chelsio/cxgb/sge.h | 1
|
||||
drivers/net/ethernet/chelsio/cxgb/subr.c | 26 +++++++++++------
|
||||
5 files changed, 55 insertions(+), 54 deletions(-)
|
||||
|
||||
--- a/drivers/net/ethernet/chelsio/cxgb/common.h
|
||||
+++ b/drivers/net/ethernet/chelsio/cxgb/common.h
|
||||
@@ -238,7 +238,6 @@ struct adapter {
|
||||
int msg_enable;
|
||||
u32 mmio_len;
|
||||
|
||||
- struct work_struct ext_intr_handler_task;
|
||||
struct adapter_params params;
|
||||
|
||||
/* Terminator modules. */
|
||||
@@ -257,6 +256,7 @@ struct adapter {
|
||||
|
||||
/* guards async operations */
|
||||
spinlock_t async_lock ____cacheline_aligned;
|
||||
+ u32 pending_thread_intr;
|
||||
u32 slow_intr_mask;
|
||||
int t1powersave;
|
||||
};
|
||||
@@ -334,8 +334,7 @@ void t1_interrupts_enable(adapter_t *ada
|
||||
void t1_interrupts_disable(adapter_t *adapter);
|
||||
void t1_interrupts_clear(adapter_t *adapter);
|
||||
int t1_elmer0_ext_intr_handler(adapter_t *adapter);
|
||||
-void t1_elmer0_ext_intr(adapter_t *adapter);
|
||||
-int t1_slow_intr_handler(adapter_t *adapter);
|
||||
+irqreturn_t t1_slow_intr_handler(adapter_t *adapter);
|
||||
|
||||
int t1_link_start(struct cphy *phy, struct cmac *mac, struct link_config *lc);
|
||||
const struct board_info *t1_get_board_info(unsigned int board_id);
|
||||
--- a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c
|
||||
+++ b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c
|
||||
@@ -211,9 +211,10 @@ static int cxgb_up(struct adapter *adapt
|
||||
t1_interrupts_clear(adapter);
|
||||
|
||||
adapter->params.has_msi = !disable_msi && !pci_enable_msi(adapter->pdev);
|
||||
- err = request_irq(adapter->pdev->irq, t1_interrupt,
|
||||
- adapter->params.has_msi ? 0 : IRQF_SHARED,
|
||||
- adapter->name, adapter);
|
||||
+ err = request_threaded_irq(adapter->pdev->irq, t1_interrupt,
|
||||
+ t1_interrupt_thread,
|
||||
+ adapter->params.has_msi ? 0 : IRQF_SHARED,
|
||||
+ adapter->name, adapter);
|
||||
if (err) {
|
||||
if (adapter->params.has_msi)
|
||||
pci_disable_msi(adapter->pdev);
|
||||
@@ -916,41 +917,6 @@ static void mac_stats_task(struct work_s
|
||||
spin_unlock(&adapter->work_lock);
|
||||
}
|
||||
|
||||
-/*
|
||||
- * Processes elmer0 external interrupts in process context.
|
||||
- */
|
||||
-static void ext_intr_task(struct work_struct *work)
|
||||
-{
|
||||
- struct adapter *adapter =
|
||||
- container_of(work, struct adapter, ext_intr_handler_task);
|
||||
-
|
||||
- t1_elmer0_ext_intr_handler(adapter);
|
||||
-
|
||||
- /* Now reenable external interrupts */
|
||||
- spin_lock_irq(&adapter->async_lock);
|
||||
- adapter->slow_intr_mask |= F_PL_INTR_EXT;
|
||||
- writel(F_PL_INTR_EXT, adapter->regs + A_PL_CAUSE);
|
||||
- writel(adapter->slow_intr_mask | F_PL_INTR_SGE_DATA,
|
||||
- adapter->regs + A_PL_ENABLE);
|
||||
- spin_unlock_irq(&adapter->async_lock);
|
||||
-}
|
||||
-
|
||||
-/*
|
||||
- * Interrupt-context handler for elmer0 external interrupts.
|
||||
- */
|
||||
-void t1_elmer0_ext_intr(struct adapter *adapter)
|
||||
-{
|
||||
- /*
|
||||
- * Schedule a task to handle external interrupts as we require
|
||||
- * a process context. We disable EXT interrupts in the interim
|
||||
- * and let the task reenable them when it's done.
|
||||
- */
|
||||
- adapter->slow_intr_mask &= ~F_PL_INTR_EXT;
|
||||
- writel(adapter->slow_intr_mask | F_PL_INTR_SGE_DATA,
|
||||
- adapter->regs + A_PL_ENABLE);
|
||||
- schedule_work(&adapter->ext_intr_handler_task);
|
||||
-}
|
||||
-
|
||||
void t1_fatal_err(struct adapter *adapter)
|
||||
{
|
||||
if (adapter->flags & FULL_INIT_DONE) {
|
||||
@@ -1062,8 +1028,6 @@ static int init_one(struct pci_dev *pdev
|
||||
spin_lock_init(&adapter->async_lock);
|
||||
spin_lock_init(&adapter->mac_lock);
|
||||
|
||||
- INIT_WORK(&adapter->ext_intr_handler_task,
|
||||
- ext_intr_task);
|
||||
INIT_DELAYED_WORK(&adapter->stats_update_task,
|
||||
mac_stats_task);
|
||||
|
||||
--- a/drivers/net/ethernet/chelsio/cxgb/sge.c
|
||||
+++ b/drivers/net/ethernet/chelsio/cxgb/sge.c
|
||||
@@ -1619,11 +1619,38 @@ int t1_poll(struct napi_struct *napi, in
|
||||
return work_done;
|
||||
}
|
||||
|
||||
+irqreturn_t t1_interrupt_thread(int irq, void *data)
|
||||
+{
|
||||
+ struct adapter *adapter = data;
|
||||
+ u32 pending_thread_intr;
|
||||
+
|
||||
+ spin_lock_irq(&adapter->async_lock);
|
||||
+ pending_thread_intr = adapter->pending_thread_intr;
|
||||
+ adapter->pending_thread_intr = 0;
|
||||
+ spin_unlock_irq(&adapter->async_lock);
|
||||
+
|
||||
+ if (!pending_thread_intr)
|
||||
+ return IRQ_NONE;
|
||||
+
|
||||
+ if (pending_thread_intr & F_PL_INTR_EXT)
|
||||
+ t1_elmer0_ext_intr_handler(adapter);
|
||||
+
|
||||
+ spin_lock_irq(&adapter->async_lock);
|
||||
+ adapter->slow_intr_mask |= F_PL_INTR_EXT;
|
||||
+
|
||||
+ writel(F_PL_INTR_EXT, adapter->regs + A_PL_CAUSE);
|
||||
+ writel(adapter->slow_intr_mask | F_PL_INTR_SGE_DATA,
|
||||
+ adapter->regs + A_PL_ENABLE);
|
||||
+ spin_unlock_irq(&adapter->async_lock);
|
||||
+
|
||||
+ return IRQ_HANDLED;
|
||||
+}
|
||||
+
|
||||
irqreturn_t t1_interrupt(int irq, void *data)
|
||||
{
|
||||
struct adapter *adapter = data;
|
||||
struct sge *sge = adapter->sge;
|
||||
- int handled;
|
||||
+ irqreturn_t handled;
|
||||
|
||||
if (likely(responses_pending(adapter))) {
|
||||
writel(F_PL_INTR_SGE_DATA, adapter->regs + A_PL_CAUSE);
|
||||
@@ -1645,10 +1672,10 @@ irqreturn_t t1_interrupt(int irq, void *
|
||||
handled = t1_slow_intr_handler(adapter);
|
||||
spin_unlock(&adapter->async_lock);
|
||||
|
||||
- if (!handled)
|
||||
+ if (handled == IRQ_NONE)
|
||||
sge->stats.unhandled_irqs++;
|
||||
|
||||
- return IRQ_RETVAL(handled != 0);
|
||||
+ return handled;
|
||||
}
|
||||
|
||||
/*
|
||||
--- a/drivers/net/ethernet/chelsio/cxgb/sge.h
|
||||
+++ b/drivers/net/ethernet/chelsio/cxgb/sge.h
|
||||
@@ -74,6 +74,7 @@ struct sge *t1_sge_create(struct adapter
|
||||
int t1_sge_configure(struct sge *, struct sge_params *);
|
||||
int t1_sge_set_coalesce_params(struct sge *, struct sge_params *);
|
||||
void t1_sge_destroy(struct sge *);
|
||||
+irqreturn_t t1_interrupt_thread(int irq, void *data);
|
||||
irqreturn_t t1_interrupt(int irq, void *cookie);
|
||||
int t1_poll(struct napi_struct *, int);
|
||||
|
||||
--- a/drivers/net/ethernet/chelsio/cxgb/subr.c
|
||||
+++ b/drivers/net/ethernet/chelsio/cxgb/subr.c
|
||||
@@ -210,7 +210,7 @@ static int fpga_phy_intr_handler(adapter
|
||||
/*
|
||||
* Slow path interrupt handler for FPGAs.
|
||||
*/
|
||||
-static int fpga_slow_intr(adapter_t *adapter)
|
||||
+static irqreturn_t fpga_slow_intr(adapter_t *adapter)
|
||||
{
|
||||
u32 cause = readl(adapter->regs + A_PL_CAUSE);
|
||||
|
||||
@@ -238,7 +238,7 @@ static int fpga_slow_intr(adapter_t *ada
|
||||
if (cause)
|
||||
writel(cause, adapter->regs + A_PL_CAUSE);
|
||||
|
||||
- return cause != 0;
|
||||
+ return cause == 0 ? IRQ_NONE : IRQ_HANDLED;
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -842,13 +842,14 @@ void t1_interrupts_clear(adapter_t* adap
|
||||
/*
|
||||
* Slow path interrupt handler for ASICs.
|
||||
*/
|
||||
-static int asic_slow_intr(adapter_t *adapter)
|
||||
+static irqreturn_t asic_slow_intr(adapter_t *adapter)
|
||||
{
|
||||
u32 cause = readl(adapter->regs + A_PL_CAUSE);
|
||||
+ irqreturn_t ret = IRQ_HANDLED;
|
||||
|
||||
cause &= adapter->slow_intr_mask;
|
||||
if (!cause)
|
||||
- return 0;
|
||||
+ return IRQ_NONE;
|
||||
if (cause & F_PL_INTR_SGE_ERR)
|
||||
t1_sge_intr_error_handler(adapter->sge);
|
||||
if (cause & F_PL_INTR_TP)
|
||||
@@ -857,16 +858,25 @@ static int asic_slow_intr(adapter_t *ada
|
||||
t1_espi_intr_handler(adapter->espi);
|
||||
if (cause & F_PL_INTR_PCIX)
|
||||
t1_pci_intr_handler(adapter);
|
||||
- if (cause & F_PL_INTR_EXT)
|
||||
- t1_elmer0_ext_intr(adapter);
|
||||
+ if (cause & F_PL_INTR_EXT) {
|
||||
+ /* Wake the threaded interrupt to handle external interrupts as
|
||||
+ * we require a process context. We disable EXT interrupts in
|
||||
+ * the interim and let the thread reenable them when it's done.
|
||||
+ */
|
||||
+ adapter->pending_thread_intr |= F_PL_INTR_EXT;
|
||||
+ adapter->slow_intr_mask &= ~F_PL_INTR_EXT;
|
||||
+ writel(adapter->slow_intr_mask | F_PL_INTR_SGE_DATA,
|
||||
+ adapter->regs + A_PL_ENABLE);
|
||||
+ ret = IRQ_WAKE_THREAD;
|
||||
+ }
|
||||
|
||||
/* Clear the interrupts just processed. */
|
||||
writel(cause, adapter->regs + A_PL_CAUSE);
|
||||
readl(adapter->regs + A_PL_CAUSE); /* flush writes */
|
||||
- return 1;
|
||||
+ return ret;
|
||||
}
|
||||
|
||||
-int t1_slow_intr_handler(adapter_t *adapter)
|
||||
+irqreturn_t t1_slow_intr_handler(adapter_t *adapter)
|
||||
{
|
||||
#ifdef CONFIG_CHELSIO_T1_1G
|
||||
if (!t1_is_asic(adapter))
|
||||
@@ -0,0 +1,200 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Tue, 2 Feb 2021 18:01:04 +0100
|
||||
Subject: [PATCH 2/2] chelsio: cxgb: Disable the card on error in threaded
|
||||
interrupt
|
||||
|
||||
t1_fatal_err() is invoked from the interrupt handler. The bad part is
|
||||
that it invokes (via t1_sge_stop()) del_timer_sync() and tasklet_kill().
|
||||
Both functions must not be called from an interrupt because it is
|
||||
possible that it will wait for the completion of the timer/tasklet it
|
||||
just interrupted.
|
||||
|
||||
In case of a fatal error, use t1_interrupts_disable() to disable all
|
||||
interrupt sources and then wake the interrupt thread with
|
||||
F_PL_INTR_SGE_ERR as pending flag. The threaded-interrupt will stop the
|
||||
card via t1_sge_stop() and not re-enable the interrupts again.
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
drivers/net/ethernet/chelsio/cxgb/common.h | 1
|
||||
drivers/net/ethernet/chelsio/cxgb/cxgb2.c | 10 -------
|
||||
drivers/net/ethernet/chelsio/cxgb/sge.c | 20 ++++++++++++---
|
||||
drivers/net/ethernet/chelsio/cxgb/sge.h | 2 -
|
||||
drivers/net/ethernet/chelsio/cxgb/subr.c | 38 ++++++++++++++++++++---------
|
||||
5 files changed, 44 insertions(+), 27 deletions(-)
|
||||
|
||||
--- a/drivers/net/ethernet/chelsio/cxgb/common.h
|
||||
+++ b/drivers/net/ethernet/chelsio/cxgb/common.h
|
||||
@@ -346,7 +346,6 @@ int t1_get_board_rev(adapter_t *adapter,
|
||||
int t1_init_hw_modules(adapter_t *adapter);
|
||||
int t1_init_sw_modules(adapter_t *adapter, const struct board_info *bi);
|
||||
void t1_free_sw_modules(adapter_t *adapter);
|
||||
-void t1_fatal_err(adapter_t *adapter);
|
||||
void t1_link_changed(adapter_t *adapter, int port_id);
|
||||
void t1_link_negotiated(adapter_t *adapter, int port_id, int link_stat,
|
||||
int speed, int duplex, int pause);
|
||||
--- a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c
|
||||
+++ b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c
|
||||
@@ -917,16 +917,6 @@ static void mac_stats_task(struct work_s
|
||||
spin_unlock(&adapter->work_lock);
|
||||
}
|
||||
|
||||
-void t1_fatal_err(struct adapter *adapter)
|
||||
-{
|
||||
- if (adapter->flags & FULL_INIT_DONE) {
|
||||
- t1_sge_stop(adapter->sge);
|
||||
- t1_interrupts_disable(adapter);
|
||||
- }
|
||||
- pr_alert("%s: encountered fatal error, operation suspended\n",
|
||||
- adapter->name);
|
||||
-}
|
||||
-
|
||||
static const struct net_device_ops cxgb_netdev_ops = {
|
||||
.ndo_open = cxgb_open,
|
||||
.ndo_stop = cxgb_close,
|
||||
--- a/drivers/net/ethernet/chelsio/cxgb/sge.c
|
||||
+++ b/drivers/net/ethernet/chelsio/cxgb/sge.c
|
||||
@@ -940,10 +940,11 @@ void t1_sge_intr_clear(struct sge *sge)
|
||||
/*
|
||||
* SGE 'Error' interrupt handler
|
||||
*/
|
||||
-int t1_sge_intr_error_handler(struct sge *sge)
|
||||
+bool t1_sge_intr_error_handler(struct sge *sge)
|
||||
{
|
||||
struct adapter *adapter = sge->adapter;
|
||||
u32 cause = readl(adapter->regs + A_SG_INT_CAUSE);
|
||||
+ bool wake = false;
|
||||
|
||||
if (adapter->port[0].dev->hw_features & NETIF_F_TSO)
|
||||
cause &= ~F_PACKET_TOO_BIG;
|
||||
@@ -967,11 +968,14 @@ int t1_sge_intr_error_handler(struct sge
|
||||
sge->stats.pkt_mismatch++;
|
||||
pr_alert("%s: SGE packet mismatch\n", adapter->name);
|
||||
}
|
||||
- if (cause & SGE_INT_FATAL)
|
||||
- t1_fatal_err(adapter);
|
||||
+ if (cause & SGE_INT_FATAL) {
|
||||
+ t1_interrupts_disable(adapter);
|
||||
+ adapter->pending_thread_intr |= F_PL_INTR_SGE_ERR;
|
||||
+ wake = true;
|
||||
+ }
|
||||
|
||||
writel(cause, adapter->regs + A_SG_INT_CAUSE);
|
||||
- return 0;
|
||||
+ return wake;
|
||||
}
|
||||
|
||||
const struct sge_intr_counts *t1_sge_get_intr_counts(const struct sge *sge)
|
||||
@@ -1635,6 +1639,14 @@ irqreturn_t t1_interrupt_thread(int irq,
|
||||
if (pending_thread_intr & F_PL_INTR_EXT)
|
||||
t1_elmer0_ext_intr_handler(adapter);
|
||||
|
||||
+ /* This error is fatal, interrupts remain off */
|
||||
+ if (pending_thread_intr & F_PL_INTR_SGE_ERR) {
|
||||
+ pr_alert("%s: encountered fatal error, operation suspended\n",
|
||||
+ adapter->name);
|
||||
+ t1_sge_stop(adapter->sge);
|
||||
+ return IRQ_HANDLED;
|
||||
+ }
|
||||
+
|
||||
spin_lock_irq(&adapter->async_lock);
|
||||
adapter->slow_intr_mask |= F_PL_INTR_EXT;
|
||||
|
||||
--- a/drivers/net/ethernet/chelsio/cxgb/sge.h
|
||||
+++ b/drivers/net/ethernet/chelsio/cxgb/sge.h
|
||||
@@ -82,7 +82,7 @@ netdev_tx_t t1_start_xmit(struct sk_buff
|
||||
void t1_vlan_mode(struct adapter *adapter, netdev_features_t features);
|
||||
void t1_sge_start(struct sge *);
|
||||
void t1_sge_stop(struct sge *);
|
||||
-int t1_sge_intr_error_handler(struct sge *);
|
||||
+bool t1_sge_intr_error_handler(struct sge *sge);
|
||||
void t1_sge_intr_enable(struct sge *);
|
||||
void t1_sge_intr_disable(struct sge *);
|
||||
void t1_sge_intr_clear(struct sge *);
|
||||
--- a/drivers/net/ethernet/chelsio/cxgb/subr.c
|
||||
+++ b/drivers/net/ethernet/chelsio/cxgb/subr.c
|
||||
@@ -170,7 +170,7 @@ void t1_link_changed(adapter_t *adapter,
|
||||
t1_link_negotiated(adapter, port_id, link_ok, speed, duplex, fc);
|
||||
}
|
||||
|
||||
-static int t1_pci_intr_handler(adapter_t *adapter)
|
||||
+static bool t1_pci_intr_handler(adapter_t *adapter)
|
||||
{
|
||||
u32 pcix_cause;
|
||||
|
||||
@@ -179,9 +179,13 @@ static int t1_pci_intr_handler(adapter_t
|
||||
if (pcix_cause) {
|
||||
pci_write_config_dword(adapter->pdev, A_PCICFG_INTR_CAUSE,
|
||||
pcix_cause);
|
||||
- t1_fatal_err(adapter); /* PCI errors are fatal */
|
||||
+ /* PCI errors are fatal */
|
||||
+ t1_interrupts_disable(adapter);
|
||||
+ adapter->pending_thread_intr |= F_PL_INTR_SGE_ERR;
|
||||
+ pr_alert("%s: PCI error encountered.\n", adapter->name);
|
||||
+ return true;
|
||||
}
|
||||
- return 0;
|
||||
+ return false;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_CHELSIO_T1_1G
|
||||
@@ -213,10 +217,13 @@ static int fpga_phy_intr_handler(adapter
|
||||
static irqreturn_t fpga_slow_intr(adapter_t *adapter)
|
||||
{
|
||||
u32 cause = readl(adapter->regs + A_PL_CAUSE);
|
||||
+ irqreturn_t ret = IRQ_NONE;
|
||||
|
||||
cause &= ~F_PL_INTR_SGE_DATA;
|
||||
- if (cause & F_PL_INTR_SGE_ERR)
|
||||
- t1_sge_intr_error_handler(adapter->sge);
|
||||
+ if (cause & F_PL_INTR_SGE_ERR) {
|
||||
+ if (t1_sge_intr_error_handler(adapter->sge))
|
||||
+ ret = IRQ_WAKE_THREAD;
|
||||
+ }
|
||||
|
||||
if (cause & FPGA_PCIX_INTERRUPT_GMAC)
|
||||
fpga_phy_intr_handler(adapter);
|
||||
@@ -231,13 +238,18 @@ static irqreturn_t fpga_slow_intr(adapte
|
||||
/* Clear TP interrupt */
|
||||
writel(tp_cause, adapter->regs + FPGA_TP_ADDR_INTERRUPT_CAUSE);
|
||||
}
|
||||
- if (cause & FPGA_PCIX_INTERRUPT_PCIX)
|
||||
- t1_pci_intr_handler(adapter);
|
||||
+ if (cause & FPGA_PCIX_INTERRUPT_PCIX) {
|
||||
+ if (t1_pci_intr_handler(adapter))
|
||||
+ ret = IRQ_WAKE_THREAD;
|
||||
+ }
|
||||
|
||||
/* Clear the interrupts just processed. */
|
||||
if (cause)
|
||||
writel(cause, adapter->regs + A_PL_CAUSE);
|
||||
|
||||
+ if (ret != IRQ_NONE)
|
||||
+ return ret;
|
||||
+
|
||||
return cause == 0 ? IRQ_NONE : IRQ_HANDLED;
|
||||
}
|
||||
#endif
|
||||
@@ -850,14 +862,18 @@ static irqreturn_t asic_slow_intr(adapte
|
||||
cause &= adapter->slow_intr_mask;
|
||||
if (!cause)
|
||||
return IRQ_NONE;
|
||||
- if (cause & F_PL_INTR_SGE_ERR)
|
||||
- t1_sge_intr_error_handler(adapter->sge);
|
||||
+ if (cause & F_PL_INTR_SGE_ERR) {
|
||||
+ if (t1_sge_intr_error_handler(adapter->sge))
|
||||
+ ret = IRQ_WAKE_THREAD;
|
||||
+ }
|
||||
if (cause & F_PL_INTR_TP)
|
||||
t1_tp_intr_handler(adapter->tp);
|
||||
if (cause & F_PL_INTR_ESPI)
|
||||
t1_espi_intr_handler(adapter->espi);
|
||||
- if (cause & F_PL_INTR_PCIX)
|
||||
- t1_pci_intr_handler(adapter);
|
||||
+ if (cause & F_PL_INTR_PCIX) {
|
||||
+ if (t1_pci_intr_handler(adapter))
|
||||
+ ret = IRQ_WAKE_THREAD;
|
||||
+ }
|
||||
if (cause & F_PL_INTR_EXT) {
|
||||
/* Wake the threaded interrupt to handle external interrupts as
|
||||
* we require a process context. We disable EXT interrupts in
|
||||
@@ -0,0 +1,86 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Tue, 29 Sep 2020 15:21:17 +0200
|
||||
Subject: [PATCH 01/22] locking/rtmutex: Remove cruft
|
||||
|
||||
Most of this is around since the very beginning. I'm not sure if this
|
||||
was used while the rtmutex-deadlock-tester was around but today it seems
|
||||
to only waste memory:
|
||||
- save_state: No users
|
||||
- name: Assigned and printed if a dead lock was detected. I'm keeping it
|
||||
but want to point out that lockdep has the same information.
|
||||
- file + line: Printed if ::name was NULL. This is only used for
|
||||
in-kernel locks so it ::name shouldn't be NULL and then ::file and
|
||||
::line isn't used.
|
||||
- magic: Assigned to NULL by rt_mutex_destroy().
|
||||
|
||||
Remove members of rt_mutex which are not used.
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/rtmutex.h | 7 ++-----
|
||||
kernel/locking/rtmutex-debug.c | 7 +------
|
||||
kernel/locking/rtmutex.c | 3 ---
|
||||
kernel/locking/rtmutex_common.h | 1 -
|
||||
4 files changed, 3 insertions(+), 15 deletions(-)
|
||||
|
||||
--- a/include/linux/rtmutex.h
|
||||
+++ b/include/linux/rtmutex.h
|
||||
@@ -32,10 +32,7 @@ struct rt_mutex {
|
||||
struct rb_root_cached waiters;
|
||||
struct task_struct *owner;
|
||||
#ifdef CONFIG_DEBUG_RT_MUTEXES
|
||||
- int save_state;
|
||||
- const char *name, *file;
|
||||
- int line;
|
||||
- void *magic;
|
||||
+ const char *name;
|
||||
#endif
|
||||
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
||||
struct lockdep_map dep_map;
|
||||
@@ -60,7 +57,7 @@ struct hrtimer_sleeper;
|
||||
|
||||
#ifdef CONFIG_DEBUG_RT_MUTEXES
|
||||
# define __DEBUG_RT_MUTEX_INITIALIZER(mutexname) \
|
||||
- , .name = #mutexname, .file = __FILE__, .line = __LINE__
|
||||
+ , .name = #mutexname
|
||||
|
||||
# define rt_mutex_init(mutex) \
|
||||
do { \
|
||||
--- a/kernel/locking/rtmutex-debug.c
|
||||
+++ b/kernel/locking/rtmutex-debug.c
|
||||
@@ -42,12 +42,7 @@ static void printk_task(struct task_stru
|
||||
|
||||
static void printk_lock(struct rt_mutex *lock, int print_owner)
|
||||
{
|
||||
- if (lock->name)
|
||||
- printk(" [%p] {%s}\n",
|
||||
- lock, lock->name);
|
||||
- else
|
||||
- printk(" [%p] {%s:%d}\n",
|
||||
- lock, lock->file, lock->line);
|
||||
+ printk(" [%p] {%s}\n", lock, lock->name);
|
||||
|
||||
if (print_owner && rt_mutex_owner(lock)) {
|
||||
printk(".. ->owner: %p\n", lock->owner);
|
||||
--- a/kernel/locking/rtmutex.c
|
||||
+++ b/kernel/locking/rtmutex.c
|
||||
@@ -1655,9 +1655,6 @@ void __sched rt_mutex_futex_unlock(struc
|
||||
void rt_mutex_destroy(struct rt_mutex *lock)
|
||||
{
|
||||
WARN_ON(rt_mutex_is_locked(lock));
|
||||
-#ifdef CONFIG_DEBUG_RT_MUTEXES
|
||||
- lock->magic = NULL;
|
||||
-#endif
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rt_mutex_destroy);
|
||||
|
||||
--- a/kernel/locking/rtmutex_common.h
|
||||
+++ b/kernel/locking/rtmutex_common.h
|
||||
@@ -30,7 +30,6 @@ struct rt_mutex_waiter {
|
||||
struct task_struct *task;
|
||||
struct rt_mutex *lock;
|
||||
#ifdef CONFIG_DEBUG_RT_MUTEXES
|
||||
- unsigned long ip;
|
||||
struct pid *deadlock_task_pid;
|
||||
struct rt_mutex *deadlock_lock;
|
||||
#endif
|
||||
@@ -0,0 +1,294 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Tue, 29 Sep 2020 16:05:11 +0200
|
||||
Subject: [PATCH 02/22] locking/rtmutex: Remove output from deadlock detector.
|
||||
|
||||
In commit
|
||||
f5694788ad8da ("rt_mutex: Add lockdep annotations")
|
||||
|
||||
rtmutex gained lockdep annotation for rt_mutex_lock() and and related
|
||||
functions.
|
||||
lockdep will see the locking order and may complain about a deadlock
|
||||
before rtmutex' own mechanism gets a chance to detect it.
|
||||
The rtmutex deadlock detector will only complain locks with the
|
||||
RT_MUTEX_MIN_CHAINWALK and a waiter must be pending. That means it
|
||||
works only for in-kernel locks because the futex interface always uses
|
||||
RT_MUTEX_FULL_CHAINWALK.
|
||||
The requirement for an active waiter limits the detector to actual
|
||||
deadlocks and makes it possible to report potential deadlocks like
|
||||
lockdep does.
|
||||
It looks like lockdep is better suited for reporting deadlocks.
|
||||
|
||||
Remove rtmutex' debug print on deadlock detection.
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/rtmutex.h | 7 --
|
||||
kernel/locking/rtmutex-debug.c | 97 ----------------------------------------
|
||||
kernel/locking/rtmutex-debug.h | 11 ----
|
||||
kernel/locking/rtmutex.c | 9 ---
|
||||
kernel/locking/rtmutex.h | 7 --
|
||||
kernel/locking/rtmutex_common.h | 4 -
|
||||
6 files changed, 135 deletions(-)
|
||||
|
||||
--- a/include/linux/rtmutex.h
|
||||
+++ b/include/linux/rtmutex.h
|
||||
@@ -31,9 +31,6 @@ struct rt_mutex {
|
||||
raw_spinlock_t wait_lock;
|
||||
struct rb_root_cached waiters;
|
||||
struct task_struct *owner;
|
||||
-#ifdef CONFIG_DEBUG_RT_MUTEXES
|
||||
- const char *name;
|
||||
-#endif
|
||||
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
||||
struct lockdep_map dep_map;
|
||||
#endif
|
||||
@@ -56,8 +53,6 @@ struct hrtimer_sleeper;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_DEBUG_RT_MUTEXES
|
||||
-# define __DEBUG_RT_MUTEX_INITIALIZER(mutexname) \
|
||||
- , .name = #mutexname
|
||||
|
||||
# define rt_mutex_init(mutex) \
|
||||
do { \
|
||||
@@ -67,7 +62,6 @@ do { \
|
||||
|
||||
extern void rt_mutex_debug_task_free(struct task_struct *tsk);
|
||||
#else
|
||||
-# define __DEBUG_RT_MUTEX_INITIALIZER(mutexname)
|
||||
# define rt_mutex_init(mutex) __rt_mutex_init(mutex, NULL, NULL)
|
||||
# define rt_mutex_debug_task_free(t) do { } while (0)
|
||||
#endif
|
||||
@@ -83,7 +77,6 @@ do { \
|
||||
{ .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \
|
||||
, .waiters = RB_ROOT_CACHED \
|
||||
, .owner = NULL \
|
||||
- __DEBUG_RT_MUTEX_INITIALIZER(mutexname) \
|
||||
__DEP_MAP_RT_MUTEX_INITIALIZER(mutexname)}
|
||||
|
||||
#define DEFINE_RT_MUTEX(mutexname) \
|
||||
--- a/kernel/locking/rtmutex-debug.c
|
||||
+++ b/kernel/locking/rtmutex-debug.c
|
||||
@@ -32,105 +32,12 @@
|
||||
|
||||
#include "rtmutex_common.h"
|
||||
|
||||
-static void printk_task(struct task_struct *p)
|
||||
-{
|
||||
- if (p)
|
||||
- printk("%16s:%5d [%p, %3d]", p->comm, task_pid_nr(p), p, p->prio);
|
||||
- else
|
||||
- printk("<none>");
|
||||
-}
|
||||
-
|
||||
-static void printk_lock(struct rt_mutex *lock, int print_owner)
|
||||
-{
|
||||
- printk(" [%p] {%s}\n", lock, lock->name);
|
||||
-
|
||||
- if (print_owner && rt_mutex_owner(lock)) {
|
||||
- printk(".. ->owner: %p\n", lock->owner);
|
||||
- printk(".. held by: ");
|
||||
- printk_task(rt_mutex_owner(lock));
|
||||
- printk("\n");
|
||||
- }
|
||||
-}
|
||||
-
|
||||
void rt_mutex_debug_task_free(struct task_struct *task)
|
||||
{
|
||||
DEBUG_LOCKS_WARN_ON(!RB_EMPTY_ROOT(&task->pi_waiters.rb_root));
|
||||
DEBUG_LOCKS_WARN_ON(task->pi_blocked_on);
|
||||
}
|
||||
|
||||
-/*
|
||||
- * We fill out the fields in the waiter to store the information about
|
||||
- * the deadlock. We print when we return. act_waiter can be NULL in
|
||||
- * case of a remove waiter operation.
|
||||
- */
|
||||
-void debug_rt_mutex_deadlock(enum rtmutex_chainwalk chwalk,
|
||||
- struct rt_mutex_waiter *act_waiter,
|
||||
- struct rt_mutex *lock)
|
||||
-{
|
||||
- struct task_struct *task;
|
||||
-
|
||||
- if (!debug_locks || chwalk == RT_MUTEX_FULL_CHAINWALK || !act_waiter)
|
||||
- return;
|
||||
-
|
||||
- task = rt_mutex_owner(act_waiter->lock);
|
||||
- if (task && task != current) {
|
||||
- act_waiter->deadlock_task_pid = get_pid(task_pid(task));
|
||||
- act_waiter->deadlock_lock = lock;
|
||||
- }
|
||||
-}
|
||||
-
|
||||
-void debug_rt_mutex_print_deadlock(struct rt_mutex_waiter *waiter)
|
||||
-{
|
||||
- struct task_struct *task;
|
||||
-
|
||||
- if (!waiter->deadlock_lock || !debug_locks)
|
||||
- return;
|
||||
-
|
||||
- rcu_read_lock();
|
||||
- task = pid_task(waiter->deadlock_task_pid, PIDTYPE_PID);
|
||||
- if (!task) {
|
||||
- rcu_read_unlock();
|
||||
- return;
|
||||
- }
|
||||
-
|
||||
- if (!debug_locks_off()) {
|
||||
- rcu_read_unlock();
|
||||
- return;
|
||||
- }
|
||||
-
|
||||
- pr_warn("\n");
|
||||
- pr_warn("============================================\n");
|
||||
- pr_warn("WARNING: circular locking deadlock detected!\n");
|
||||
- pr_warn("%s\n", print_tainted());
|
||||
- pr_warn("--------------------------------------------\n");
|
||||
- printk("%s/%d is deadlocking current task %s/%d\n\n",
|
||||
- task->comm, task_pid_nr(task),
|
||||
- current->comm, task_pid_nr(current));
|
||||
-
|
||||
- printk("\n1) %s/%d is trying to acquire this lock:\n",
|
||||
- current->comm, task_pid_nr(current));
|
||||
- printk_lock(waiter->lock, 1);
|
||||
-
|
||||
- printk("\n2) %s/%d is blocked on this lock:\n",
|
||||
- task->comm, task_pid_nr(task));
|
||||
- printk_lock(waiter->deadlock_lock, 1);
|
||||
-
|
||||
- debug_show_held_locks(current);
|
||||
- debug_show_held_locks(task);
|
||||
-
|
||||
- printk("\n%s/%d's [blocked] stackdump:\n\n",
|
||||
- task->comm, task_pid_nr(task));
|
||||
- show_stack(task, NULL, KERN_DEFAULT);
|
||||
- printk("\n%s/%d's [current] stackdump:\n\n",
|
||||
- current->comm, task_pid_nr(current));
|
||||
- dump_stack();
|
||||
- debug_show_all_locks();
|
||||
- rcu_read_unlock();
|
||||
-
|
||||
- printk("[ turning off deadlock detection."
|
||||
- "Please report this trace. ]\n\n");
|
||||
-}
|
||||
-
|
||||
void debug_rt_mutex_lock(struct rt_mutex *lock)
|
||||
{
|
||||
}
|
||||
@@ -153,12 +60,10 @@ void debug_rt_mutex_proxy_unlock(struct
|
||||
void debug_rt_mutex_init_waiter(struct rt_mutex_waiter *waiter)
|
||||
{
|
||||
memset(waiter, 0x11, sizeof(*waiter));
|
||||
- waiter->deadlock_task_pid = NULL;
|
||||
}
|
||||
|
||||
void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter)
|
||||
{
|
||||
- put_pid(waiter->deadlock_task_pid);
|
||||
memset(waiter, 0x22, sizeof(*waiter));
|
||||
}
|
||||
|
||||
@@ -168,10 +73,8 @@ void debug_rt_mutex_init(struct rt_mutex
|
||||
* Make sure we are not reinitializing a held lock:
|
||||
*/
|
||||
debug_check_no_locks_freed((void *)lock, sizeof(*lock));
|
||||
- lock->name = name;
|
||||
|
||||
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
||||
lockdep_init_map(&lock->dep_map, name, key, 0);
|
||||
#endif
|
||||
}
|
||||
-
|
||||
--- a/kernel/locking/rtmutex-debug.h
|
||||
+++ b/kernel/locking/rtmutex-debug.h
|
||||
@@ -18,20 +18,9 @@ extern void debug_rt_mutex_unlock(struct
|
||||
extern void debug_rt_mutex_proxy_lock(struct rt_mutex *lock,
|
||||
struct task_struct *powner);
|
||||
extern void debug_rt_mutex_proxy_unlock(struct rt_mutex *lock);
|
||||
-extern void debug_rt_mutex_deadlock(enum rtmutex_chainwalk chwalk,
|
||||
- struct rt_mutex_waiter *waiter,
|
||||
- struct rt_mutex *lock);
|
||||
-extern void debug_rt_mutex_print_deadlock(struct rt_mutex_waiter *waiter);
|
||||
-# define debug_rt_mutex_reset_waiter(w) \
|
||||
- do { (w)->deadlock_lock = NULL; } while (0)
|
||||
|
||||
static inline bool debug_rt_mutex_detect_deadlock(struct rt_mutex_waiter *waiter,
|
||||
enum rtmutex_chainwalk walk)
|
||||
{
|
||||
return (waiter != NULL);
|
||||
}
|
||||
-
|
||||
-static inline void rt_mutex_print_deadlock(struct rt_mutex_waiter *w)
|
||||
-{
|
||||
- debug_rt_mutex_print_deadlock(w);
|
||||
-}
|
||||
--- a/kernel/locking/rtmutex.c
|
||||
+++ b/kernel/locking/rtmutex.c
|
||||
@@ -597,7 +597,6 @@ static int rt_mutex_adjust_prio_chain(st
|
||||
* walk, we detected a deadlock.
|
||||
*/
|
||||
if (lock == orig_lock || rt_mutex_owner(lock) == top_task) {
|
||||
- debug_rt_mutex_deadlock(chwalk, orig_waiter, lock);
|
||||
raw_spin_unlock(&lock->wait_lock);
|
||||
ret = -EDEADLK;
|
||||
goto out_unlock_pi;
|
||||
@@ -1189,8 +1188,6 @@ static int __sched
|
||||
|
||||
raw_spin_unlock_irq(&lock->wait_lock);
|
||||
|
||||
- debug_rt_mutex_print_deadlock(waiter);
|
||||
-
|
||||
schedule();
|
||||
|
||||
raw_spin_lock_irq(&lock->wait_lock);
|
||||
@@ -1211,10 +1208,6 @@ static void rt_mutex_handle_deadlock(int
|
||||
if (res != -EDEADLOCK || detect_deadlock)
|
||||
return;
|
||||
|
||||
- /*
|
||||
- * Yell lowdly and stop the task right here.
|
||||
- */
|
||||
- rt_mutex_print_deadlock(w);
|
||||
while (1) {
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
schedule();
|
||||
@@ -1763,8 +1756,6 @@ int __rt_mutex_start_proxy_lock(struct r
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
- debug_rt_mutex_print_deadlock(waiter);
|
||||
-
|
||||
return ret;
|
||||
}
|
||||
|
||||
--- a/kernel/locking/rtmutex.h
|
||||
+++ b/kernel/locking/rtmutex.h
|
||||
@@ -19,15 +19,8 @@
|
||||
#define debug_rt_mutex_proxy_unlock(l) do { } while (0)
|
||||
#define debug_rt_mutex_unlock(l) do { } while (0)
|
||||
#define debug_rt_mutex_init(m, n, k) do { } while (0)
|
||||
-#define debug_rt_mutex_deadlock(d, a ,l) do { } while (0)
|
||||
-#define debug_rt_mutex_print_deadlock(w) do { } while (0)
|
||||
#define debug_rt_mutex_reset_waiter(w) do { } while (0)
|
||||
|
||||
-static inline void rt_mutex_print_deadlock(struct rt_mutex_waiter *w)
|
||||
-{
|
||||
- WARN(1, "rtmutex deadlock detected\n");
|
||||
-}
|
||||
-
|
||||
static inline bool debug_rt_mutex_detect_deadlock(struct rt_mutex_waiter *w,
|
||||
enum rtmutex_chainwalk walk)
|
||||
{
|
||||
--- a/kernel/locking/rtmutex_common.h
|
||||
+++ b/kernel/locking/rtmutex_common.h
|
||||
@@ -29,10 +29,6 @@ struct rt_mutex_waiter {
|
||||
struct rb_node pi_tree_entry;
|
||||
struct task_struct *task;
|
||||
struct rt_mutex *lock;
|
||||
-#ifdef CONFIG_DEBUG_RT_MUTEXES
|
||||
- struct pid *deadlock_task_pid;
|
||||
- struct rt_mutex *deadlock_lock;
|
||||
-#endif
|
||||
int prio;
|
||||
u64 deadline;
|
||||
};
|
||||
@@ -0,0 +1,53 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Tue, 29 Sep 2020 16:32:49 +0200
|
||||
Subject: [PATCH 03/22] locking/rtmutex: Move rt_mutex_init() outside of
|
||||
CONFIG_DEBUG_RT_MUTEXES
|
||||
|
||||
rt_mutex_init() only initializes lockdep if CONFIG_DEBUG_RT_MUTEXES is
|
||||
enabled. The static initializer (DEFINE_RT_MUTEX) does not have such a
|
||||
restriction.
|
||||
|
||||
Move rt_mutex_init() outside of CONFIG_DEBUG_RT_MUTEXES.
|
||||
Move the remaining functions in this CONFIG_DEBUG_RT_MUTEXES block to
|
||||
the upper block.
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/rtmutex.h | 12 +++---------
|
||||
1 file changed, 3 insertions(+), 9 deletions(-)
|
||||
|
||||
--- a/include/linux/rtmutex.h
|
||||
+++ b/include/linux/rtmutex.h
|
||||
@@ -43,6 +43,7 @@ struct hrtimer_sleeper;
|
||||
extern int rt_mutex_debug_check_no_locks_freed(const void *from,
|
||||
unsigned long len);
|
||||
extern void rt_mutex_debug_check_no_locks_held(struct task_struct *task);
|
||||
+ extern void rt_mutex_debug_task_free(struct task_struct *tsk);
|
||||
#else
|
||||
static inline int rt_mutex_debug_check_no_locks_freed(const void *from,
|
||||
unsigned long len)
|
||||
@@ -50,22 +51,15 @@ struct hrtimer_sleeper;
|
||||
return 0;
|
||||
}
|
||||
# define rt_mutex_debug_check_no_locks_held(task) do { } while (0)
|
||||
+# define rt_mutex_debug_task_free(t) do { } while (0)
|
||||
#endif
|
||||
|
||||
-#ifdef CONFIG_DEBUG_RT_MUTEXES
|
||||
-
|
||||
-# define rt_mutex_init(mutex) \
|
||||
+#define rt_mutex_init(mutex) \
|
||||
do { \
|
||||
static struct lock_class_key __key; \
|
||||
__rt_mutex_init(mutex, __func__, &__key); \
|
||||
} while (0)
|
||||
|
||||
- extern void rt_mutex_debug_task_free(struct task_struct *tsk);
|
||||
-#else
|
||||
-# define rt_mutex_init(mutex) __rt_mutex_init(mutex, NULL, NULL)
|
||||
-# define rt_mutex_debug_task_free(t) do { } while (0)
|
||||
-#endif
|
||||
-
|
||||
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
||||
#define __DEP_MAP_RT_MUTEX_INITIALIZER(mutexname) \
|
||||
, .dep_map = { .name = #mutexname }
|
||||
@@ -0,0 +1,89 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Wed, 7 Oct 2020 12:11:33 +0200
|
||||
Subject: [PATCH 04/22] locking/rtmutex: Remove rt_mutex_timed_lock()
|
||||
|
||||
rt_mutex_timed_lock() has no callers since commit
|
||||
c051b21f71d1f ("rtmutex: Confine deadlock logic to futex")
|
||||
|
||||
Remove rt_mutex_timed_lock().
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/rtmutex.h | 3 ---
|
||||
kernel/locking/rtmutex.c | 46 ----------------------------------------------
|
||||
2 files changed, 49 deletions(-)
|
||||
|
||||
--- a/include/linux/rtmutex.h
|
||||
+++ b/include/linux/rtmutex.h
|
||||
@@ -99,9 +99,6 @@ extern void rt_mutex_lock(struct rt_mute
|
||||
#endif
|
||||
|
||||
extern int rt_mutex_lock_interruptible(struct rt_mutex *lock);
|
||||
-extern int rt_mutex_timed_lock(struct rt_mutex *lock,
|
||||
- struct hrtimer_sleeper *timeout);
|
||||
-
|
||||
extern int rt_mutex_trylock(struct rt_mutex *lock);
|
||||
|
||||
extern void rt_mutex_unlock(struct rt_mutex *lock);
|
||||
--- a/kernel/locking/rtmutex.c
|
||||
+++ b/kernel/locking/rtmutex.c
|
||||
@@ -1406,21 +1406,6 @@ rt_mutex_fastlock(struct rt_mutex *lock,
|
||||
}
|
||||
|
||||
static inline int
|
||||
-rt_mutex_timed_fastlock(struct rt_mutex *lock, int state,
|
||||
- struct hrtimer_sleeper *timeout,
|
||||
- enum rtmutex_chainwalk chwalk,
|
||||
- int (*slowfn)(struct rt_mutex *lock, int state,
|
||||
- struct hrtimer_sleeper *timeout,
|
||||
- enum rtmutex_chainwalk chwalk))
|
||||
-{
|
||||
- if (chwalk == RT_MUTEX_MIN_CHAINWALK &&
|
||||
- likely(rt_mutex_cmpxchg_acquire(lock, NULL, current)))
|
||||
- return 0;
|
||||
-
|
||||
- return slowfn(lock, state, timeout, chwalk);
|
||||
-}
|
||||
-
|
||||
-static inline int
|
||||
rt_mutex_fasttrylock(struct rt_mutex *lock,
|
||||
int (*slowfn)(struct rt_mutex *lock))
|
||||
{
|
||||
@@ -1528,37 +1513,6 @@ int __sched __rt_mutex_futex_trylock(str
|
||||
}
|
||||
|
||||
/**
|
||||
- * rt_mutex_timed_lock - lock a rt_mutex interruptible
|
||||
- * the timeout structure is provided
|
||||
- * by the caller
|
||||
- *
|
||||
- * @lock: the rt_mutex to be locked
|
||||
- * @timeout: timeout structure or NULL (no timeout)
|
||||
- *
|
||||
- * Returns:
|
||||
- * 0 on success
|
||||
- * -EINTR when interrupted by a signal
|
||||
- * -ETIMEDOUT when the timeout expired
|
||||
- */
|
||||
-int
|
||||
-rt_mutex_timed_lock(struct rt_mutex *lock, struct hrtimer_sleeper *timeout)
|
||||
-{
|
||||
- int ret;
|
||||
-
|
||||
- might_sleep();
|
||||
-
|
||||
- mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
|
||||
- ret = rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout,
|
||||
- RT_MUTEX_MIN_CHAINWALK,
|
||||
- rt_mutex_slowlock);
|
||||
- if (ret)
|
||||
- mutex_release(&lock->dep_map, _RET_IP_);
|
||||
-
|
||||
- return ret;
|
||||
-}
|
||||
-EXPORT_SYMBOL_GPL(rt_mutex_timed_lock);
|
||||
-
|
||||
-/**
|
||||
* rt_mutex_trylock - try to lock a rt_mutex
|
||||
*
|
||||
* @lock: the rt_mutex to be locked
|
||||
@@ -1,6 +1,7 @@
|
||||
Subject: rtmutex: Handle the various new futex race conditions
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Fri, 10 Jun 2011 11:04:15 +0200
|
||||
Subject: [PATCH 05/22] locking/rtmutex: Handle the various new futex race
|
||||
conditions
|
||||
|
||||
RT opens a few new interesting race conditions in the rtmutex/futex
|
||||
combo due to futex hash bucket lock being a 'sleeping' spinlock and
|
||||
@@ -8,16 +9,16 @@ therefor not disabling preemption.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
kernel/futex.c | 77 ++++++++++++++++++++++++++++++++--------
|
||||
kernel/futex.c | 78 ++++++++++++++++++++++++++++++++--------
|
||||
kernel/locking/rtmutex.c | 36 +++++++++++++++---
|
||||
kernel/locking/rtmutex_common.h | 2 +
|
||||
3 files changed, 94 insertions(+), 21 deletions(-)
|
||||
3 files changed, 95 insertions(+), 21 deletions(-)
|
||||
|
||||
--- a/kernel/futex.c
|
||||
+++ b/kernel/futex.c
|
||||
@@ -2260,6 +2260,16 @@ static int futex_requeue(u32 __user *uad
|
||||
@@ -2154,6 +2154,16 @@ static int futex_requeue(u32 __user *uad
|
||||
*/
|
||||
requeue_pi_wake_futex(this, &key2, hb2);
|
||||
drop_count++;
|
||||
continue;
|
||||
+ } else if (ret == -EAGAIN) {
|
||||
+ /*
|
||||
@@ -32,16 +33,16 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
} else if (ret) {
|
||||
/*
|
||||
* rt_mutex_start_proxy_lock() detected a
|
||||
@@ -3315,7 +3325,7 @@ static int futex_wait_requeue_pi(u32 __u
|
||||
@@ -3172,7 +3182,7 @@ static int futex_wait_requeue_pi(u32 __u
|
||||
{
|
||||
struct hrtimer_sleeper timeout, *to;
|
||||
struct futex_pi_state *pi_state = NULL;
|
||||
struct rt_mutex_waiter rt_waiter;
|
||||
- struct futex_hash_bucket *hb;
|
||||
+ struct futex_hash_bucket *hb, *hb2;
|
||||
union futex_key key2 = FUTEX_KEY_INIT;
|
||||
struct futex_q q = futex_q_init;
|
||||
int res, ret;
|
||||
@@ -3367,20 +3377,55 @@ static int futex_wait_requeue_pi(u32 __u
|
||||
@@ -3224,20 +3234,55 @@ static int futex_wait_requeue_pi(u32 __u
|
||||
/* Queue the futex_q, drop the hb lock, wait for wakeup. */
|
||||
futex_wait_queue_me(hb, &q, to);
|
||||
|
||||
@@ -49,7 +50,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
- ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to);
|
||||
- spin_unlock(&hb->lock);
|
||||
- if (ret)
|
||||
- goto out_put_keys;
|
||||
- goto out;
|
||||
+ /*
|
||||
+ * On RT we must avoid races with requeue and trying to block
|
||||
+ * on two mutexes (hb->lock and uaddr2's rtmutex) by
|
||||
@@ -86,7 +87,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
+ ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to);
|
||||
+ spin_unlock(&hb->lock);
|
||||
+ if (ret)
|
||||
+ goto out_put_keys;
|
||||
+ goto out;
|
||||
+ }
|
||||
|
||||
/*
|
||||
@@ -108,7 +109,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
|
||||
/* Check if the requeue code acquired the second futex for us. */
|
||||
if (!q.rt_waiter) {
|
||||
@@ -3389,7 +3434,8 @@ static int futex_wait_requeue_pi(u32 __u
|
||||
@@ -3246,14 +3291,16 @@ static int futex_wait_requeue_pi(u32 __u
|
||||
* did a lock-steal - fix up the PI-state in that case.
|
||||
*/
|
||||
if (q.pi_state && (q.pi_state->owner != current)) {
|
||||
@@ -116,18 +117,18 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
+ spin_lock(&hb2->lock);
|
||||
+ BUG_ON(&hb2->lock != q.lock_ptr);
|
||||
ret = fixup_pi_state_owner(uaddr2, &q, current);
|
||||
if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current) {
|
||||
pi_state = q.pi_state;
|
||||
@@ -3400,7 +3446,7 @@ static int futex_wait_requeue_pi(u32 __u
|
||||
/*
|
||||
* Drop the reference to the pi state which
|
||||
* the requeue_pi() code acquired for us.
|
||||
*/
|
||||
put_pi_state(q.pi_state);
|
||||
- spin_unlock(q.lock_ptr);
|
||||
+ spin_unlock(&hb2->lock);
|
||||
}
|
||||
} else {
|
||||
struct rt_mutex *pi_mutex;
|
||||
@@ -3414,7 +3460,8 @@ static int futex_wait_requeue_pi(u32 __u
|
||||
+
|
||||
/*
|
||||
* Adjust the return value. It's either -EFAULT or
|
||||
* success (1) but the caller expects 0 for success.
|
||||
@@ -3272,7 +3319,8 @@ static int futex_wait_requeue_pi(u32 __u
|
||||
pi_mutex = &q.pi_state->pi_mutex;
|
||||
ret = rt_mutex_wait_proxy_lock(pi_mutex, to, &rt_waiter);
|
||||
|
||||
@@ -151,7 +152,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
/*
|
||||
* We can speed up the acquire/release, if there's no debugging state to be
|
||||
* set up.
|
||||
@@ -380,7 +385,8 @@ int max_lock_depth = 1024;
|
||||
@@ -378,7 +383,8 @@ int max_lock_depth = 1024;
|
||||
|
||||
static inline struct rt_mutex *task_blocked_on_lock(struct task_struct *p)
|
||||
{
|
||||
@@ -161,7 +162,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -516,7 +522,7 @@ static int rt_mutex_adjust_prio_chain(st
|
||||
@@ -514,7 +520,7 @@ static int rt_mutex_adjust_prio_chain(st
|
||||
* reached or the state of the chain has changed while we
|
||||
* dropped the locks.
|
||||
*/
|
||||
@@ -170,7 +171,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
goto out_unlock_pi;
|
||||
|
||||
/*
|
||||
@@ -950,6 +956,22 @@ static int task_blocks_on_rt_mutex(struc
|
||||
@@ -947,6 +953,22 @@ static int task_blocks_on_rt_mutex(struc
|
||||
return -EDEADLK;
|
||||
|
||||
raw_spin_lock(&task->pi_lock);
|
||||
@@ -193,7 +194,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
waiter->task = task;
|
||||
waiter->lock = lock;
|
||||
waiter->prio = task->prio;
|
||||
@@ -973,7 +995,7 @@ static int task_blocks_on_rt_mutex(struc
|
||||
@@ -970,7 +992,7 @@ static int task_blocks_on_rt_mutex(struc
|
||||
rt_mutex_enqueue_pi(owner, waiter);
|
||||
|
||||
rt_mutex_adjust_prio(owner);
|
||||
@@ -202,7 +203,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
chain_walk = 1;
|
||||
} else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) {
|
||||
chain_walk = 1;
|
||||
@@ -1069,7 +1091,7 @@ static void remove_waiter(struct rt_mute
|
||||
@@ -1066,7 +1088,7 @@ static void remove_waiter(struct rt_mute
|
||||
{
|
||||
bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock));
|
||||
struct task_struct *owner = rt_mutex_owner(lock);
|
||||
@@ -211,7 +212,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
|
||||
lockdep_assert_held(&lock->wait_lock);
|
||||
|
||||
@@ -1095,7 +1117,8 @@ static void remove_waiter(struct rt_mute
|
||||
@@ -1092,7 +1114,8 @@ static void remove_waiter(struct rt_mute
|
||||
rt_mutex_adjust_prio(owner);
|
||||
|
||||
/* Store the lock on which owner is blocked or NULL */
|
||||
@@ -221,7 +222,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
|
||||
raw_spin_unlock(&owner->pi_lock);
|
||||
|
||||
@@ -1131,7 +1154,8 @@ void rt_mutex_adjust_pi(struct task_stru
|
||||
@@ -1128,7 +1151,8 @@ void rt_mutex_adjust_pi(struct task_stru
|
||||
raw_spin_lock_irqsave(&task->pi_lock, flags);
|
||||
|
||||
waiter = task->pi_blocked_on;
|
||||
@@ -233,7 +234,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
}
|
||||
--- a/kernel/locking/rtmutex_common.h
|
||||
+++ b/kernel/locking/rtmutex_common.h
|
||||
@@ -130,6 +130,8 @@ enum rtmutex_chainwalk {
|
||||
@@ -125,6 +125,8 @@ enum rtmutex_chainwalk {
|
||||
/*
|
||||
* PI-futex support (proxy locking functions, etc.):
|
||||
*/
|
||||
@@ -1,12 +1,11 @@
|
||||
From: Steven Rostedt <rostedt@goodmis.org>
|
||||
Date: Tue, 14 Jul 2015 14:26:34 +0200
|
||||
Subject: futex: Fix bug on when a requeued RT task times out
|
||||
Subject: [PATCH 06/22] futex: Fix bug on when a requeued RT task times out
|
||||
|
||||
Requeue with timeout causes a bug with PREEMPT_RT.
|
||||
|
||||
The bug comes from a timed out condition.
|
||||
|
||||
|
||||
TASK 1 TASK 2
|
||||
------ ------
|
||||
futex_wait_requeue_pi()
|
||||
@@ -16,13 +15,12 @@ The bug comes from a timed out condition.
|
||||
double_lock_hb();
|
||||
|
||||
raw_spin_lock(pi_lock);
|
||||
if (current->pi_blocked_on) {
|
||||
if (current->pi_blocked_on) {
|
||||
} else {
|
||||
current->pi_blocked_on = PI_WAKE_INPROGRESS;
|
||||
run_spin_unlock(pi_lock);
|
||||
spin_lock(hb->lock); <-- blocked!
|
||||
|
||||
|
||||
plist_for_each_entry_safe(this) {
|
||||
rt_mutex_start_proxy_lock();
|
||||
task_blocks_on_rt_mutex();
|
||||
@@ -45,7 +43,6 @@ Otherwise set it to a new flag PI_REQUEUE_INPROGRESS, which notifies
|
||||
the proxy task that it is being requeued, and will handle things
|
||||
appropriately.
|
||||
|
||||
|
||||
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
@@ -65,7 +62,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1779,6 +1780,34 @@ int __rt_mutex_start_proxy_lock(struct r
|
||||
@@ -1720,6 +1721,34 @@ int __rt_mutex_start_proxy_lock(struct r
|
||||
if (try_to_take_rt_mutex(lock, task, NULL))
|
||||
return 1;
|
||||
|
||||
@@ -102,7 +99,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
RT_MUTEX_FULL_CHAINWALK);
|
||||
--- a/kernel/locking/rtmutex_common.h
|
||||
+++ b/kernel/locking/rtmutex_common.h
|
||||
@@ -131,6 +131,7 @@ enum rtmutex_chainwalk {
|
||||
@@ -126,6 +126,7 @@ enum rtmutex_chainwalk {
|
||||
* PI-futex support (proxy locking functions, etc.):
|
||||
*/
|
||||
#define PI_WAKEUP_INPROGRESS ((struct rt_mutex_waiter *) 1)
|
||||
@@ -1,13 +1,12 @@
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Sat, 1 Apr 2017 12:50:59 +0200
|
||||
Subject: [PATCH] rtmutex: Make lock_killable work
|
||||
Subject: [PATCH 07/22] locking/rtmutex: Make lock_killable work
|
||||
|
||||
Locking an rt mutex killable does not work because signal handling is
|
||||
restricted to TASK_INTERRUPTIBLE.
|
||||
|
||||
Use signal_pending_state() unconditionaly.
|
||||
Use signal_pending_state() unconditionally.
|
||||
|
||||
Cc: stable-rt@vger.kernel.org
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
@@ -16,7 +15,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
|
||||
--- a/kernel/locking/rtmutex.c
|
||||
+++ b/kernel/locking/rtmutex.c
|
||||
@@ -1200,18 +1200,13 @@ static int __sched
|
||||
@@ -1197,18 +1197,13 @@ static int __sched
|
||||
if (try_to_take_rt_mutex(lock, current, waiter))
|
||||
break;
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
Subject: spinlock: Split the lock types header
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Wed, 29 Jun 2011 19:34:01 +0200
|
||||
Subject: [PATCH 08/22] locking/spinlock: Split the lock types header
|
||||
|
||||
Split raw_spinlock into its own file and the remaining spinlock_t into
|
||||
its own non-RT header. The non-RT header will be replaced later by sleeping
|
||||
@@ -8,11 +8,13 @@ spinlocks.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
include/linux/rwlock_types.h | 4 ++
|
||||
include/linux/spinlock_types.h | 71 +-----------------------------------
|
||||
include/linux/spinlock_types_nort.h | 33 ++++++++++++++++
|
||||
include/linux/spinlock_types_raw.h | 55 +++++++++++++++++++++++++++
|
||||
4 files changed, 94 insertions(+), 69 deletions(-)
|
||||
include/linux/rwlock_types.h | 4 +
|
||||
include/linux/spinlock_types.h | 87 ------------------------------------
|
||||
include/linux/spinlock_types_nort.h | 39 ++++++++++++++++
|
||||
include/linux/spinlock_types_raw.h | 65 ++++++++++++++++++++++++++
|
||||
4 files changed, 110 insertions(+), 85 deletions(-)
|
||||
create mode 100644 include/linux/spinlock_types_nort.h
|
||||
create mode 100644 include/linux/spinlock_types_raw.h
|
||||
|
||||
--- a/include/linux/rwlock_types.h
|
||||
+++ b/include/linux/rwlock_types.h
|
||||
@@ -29,7 +31,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
* and initializers
|
||||
--- a/include/linux/spinlock_types.h
|
||||
+++ b/include/linux/spinlock_types.h
|
||||
@@ -9,76 +9,9 @@
|
||||
@@ -9,92 +9,9 @@
|
||||
* Released under the General Public License (GPL).
|
||||
*/
|
||||
|
||||
@@ -40,7 +42,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
-#endif
|
||||
+#include <linux/spinlock_types_raw.h>
|
||||
|
||||
-#include <linux/lockdep.h>
|
||||
-#include <linux/lockdep_types.h>
|
||||
-
|
||||
-typedef struct raw_spinlock {
|
||||
- arch_spinlock_t raw_lock;
|
||||
@@ -58,8 +60,18 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
-#define SPINLOCK_OWNER_INIT ((void *)-1L)
|
||||
-
|
||||
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
||||
-# define SPIN_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname }
|
||||
-# define RAW_SPIN_DEP_MAP_INIT(lockname) \
|
||||
- .dep_map = { \
|
||||
- .name = #lockname, \
|
||||
- .wait_type_inner = LD_WAIT_SPIN, \
|
||||
- }
|
||||
-# define SPIN_DEP_MAP_INIT(lockname) \
|
||||
- .dep_map = { \
|
||||
- .name = #lockname, \
|
||||
- .wait_type_inner = LD_WAIT_CONFIG, \
|
||||
- }
|
||||
-#else
|
||||
-# define RAW_SPIN_DEP_MAP_INIT(lockname)
|
||||
-# define SPIN_DEP_MAP_INIT(lockname)
|
||||
-#endif
|
||||
-
|
||||
@@ -76,7 +88,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
- { \
|
||||
- .raw_lock = __ARCH_SPIN_LOCK_UNLOCKED, \
|
||||
- SPIN_DEBUG_INIT(lockname) \
|
||||
- SPIN_DEP_MAP_INIT(lockname) }
|
||||
- RAW_SPIN_DEP_MAP_INIT(lockname) }
|
||||
-
|
||||
-#define __RAW_SPIN_LOCK_UNLOCKED(lockname) \
|
||||
- (raw_spinlock_t) __RAW_SPIN_LOCK_INITIALIZER(lockname)
|
||||
@@ -97,11 +109,17 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
- };
|
||||
-} spinlock_t;
|
||||
-
|
||||
-#define ___SPIN_LOCK_INITIALIZER(lockname) \
|
||||
- { \
|
||||
- .raw_lock = __ARCH_SPIN_LOCK_UNLOCKED, \
|
||||
- SPIN_DEBUG_INIT(lockname) \
|
||||
- SPIN_DEP_MAP_INIT(lockname) }
|
||||
-
|
||||
-#define __SPIN_LOCK_INITIALIZER(lockname) \
|
||||
- { { .rlock = __RAW_SPIN_LOCK_INITIALIZER(lockname) } }
|
||||
- { { .rlock = ___SPIN_LOCK_INITIALIZER(lockname) } }
|
||||
-
|
||||
-#define __SPIN_LOCK_UNLOCKED(lockname) \
|
||||
- (spinlock_t ) __SPIN_LOCK_INITIALIZER(lockname)
|
||||
- (spinlock_t) __SPIN_LOCK_INITIALIZER(lockname)
|
||||
-
|
||||
-#define DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x)
|
||||
+#include <linux/spinlock_types_nort.h>
|
||||
@@ -110,7 +128,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
|
||||
--- /dev/null
|
||||
+++ b/include/linux/spinlock_types_nort.h
|
||||
@@ -0,0 +1,33 @@
|
||||
@@ -0,0 +1,39 @@
|
||||
+#ifndef __LINUX_SPINLOCK_TYPES_NORT_H
|
||||
+#define __LINUX_SPINLOCK_TYPES_NORT_H
|
||||
+
|
||||
@@ -135,18 +153,24 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
+ };
|
||||
+} spinlock_t;
|
||||
+
|
||||
+#define ___SPIN_LOCK_INITIALIZER(lockname) \
|
||||
+{ \
|
||||
+ .raw_lock = __ARCH_SPIN_LOCK_UNLOCKED, \
|
||||
+ SPIN_DEBUG_INIT(lockname) \
|
||||
+ SPIN_DEP_MAP_INIT(lockname) }
|
||||
+
|
||||
+#define __SPIN_LOCK_INITIALIZER(lockname) \
|
||||
+ { { .rlock = __RAW_SPIN_LOCK_INITIALIZER(lockname) } }
|
||||
+ { { .rlock = ___SPIN_LOCK_INITIALIZER(lockname) } }
|
||||
+
|
||||
+#define __SPIN_LOCK_UNLOCKED(lockname) \
|
||||
+ (spinlock_t ) __SPIN_LOCK_INITIALIZER(lockname)
|
||||
+ (spinlock_t) __SPIN_LOCK_INITIALIZER(lockname)
|
||||
+
|
||||
+#define DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x)
|
||||
+
|
||||
+#endif
|
||||
--- /dev/null
|
||||
+++ b/include/linux/spinlock_types_raw.h
|
||||
@@ -0,0 +1,55 @@
|
||||
@@ -0,0 +1,65 @@
|
||||
+#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
|
||||
+#define __LINUX_SPINLOCK_TYPES_RAW_H
|
||||
+
|
||||
@@ -158,7 +182,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
+# include <linux/spinlock_types_up.h>
|
||||
+#endif
|
||||
+
|
||||
+#include <linux/lockdep.h>
|
||||
+#include <linux/lockdep_types.h>
|
||||
+
|
||||
+typedef struct raw_spinlock {
|
||||
+ arch_spinlock_t raw_lock;
|
||||
@@ -176,8 +200,18 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
+#define SPINLOCK_OWNER_INIT ((void *)-1L)
|
||||
+
|
||||
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
||||
+# define SPIN_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname }
|
||||
+# define RAW_SPIN_DEP_MAP_INIT(lockname) \
|
||||
+ .dep_map = { \
|
||||
+ .name = #lockname, \
|
||||
+ .wait_type_inner = LD_WAIT_SPIN, \
|
||||
+ }
|
||||
+# define SPIN_DEP_MAP_INIT(lockname) \
|
||||
+ .dep_map = { \
|
||||
+ .name = #lockname, \
|
||||
+ .wait_type_inner = LD_WAIT_CONFIG, \
|
||||
+ }
|
||||
+#else
|
||||
+# define RAW_SPIN_DEP_MAP_INIT(lockname)
|
||||
+# define SPIN_DEP_MAP_INIT(lockname)
|
||||
+#endif
|
||||
+
|
||||
@@ -191,14 +225,14 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
+#endif
|
||||
+
|
||||
+#define __RAW_SPIN_LOCK_INITIALIZER(lockname) \
|
||||
+ { \
|
||||
+{ \
|
||||
+ .raw_lock = __ARCH_SPIN_LOCK_UNLOCKED, \
|
||||
+ SPIN_DEBUG_INIT(lockname) \
|
||||
+ SPIN_DEP_MAP_INIT(lockname) }
|
||||
+ RAW_SPIN_DEP_MAP_INIT(lockname) }
|
||||
+
|
||||
+#define __RAW_SPIN_LOCK_UNLOCKED(lockname) \
|
||||
+ (raw_spinlock_t) __RAW_SPIN_LOCK_INITIALIZER(lockname)
|
||||
+
|
||||
+#define DEFINE_RAW_SPINLOCK(x) raw_spinlock_t x = __RAW_SPIN_LOCK_UNLOCKED(x)
|
||||
+#define DEFINE_RAW_SPINLOCK(x) raw_spinlock_t x = __RAW_SPIN_LOCK_UNLOCKED(x)
|
||||
+
|
||||
+#endif
|
||||
@@ -1,6 +1,6 @@
|
||||
Subject: rtmutex: Avoid include hell
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Wed, 29 Jun 2011 20:06:39 +0200
|
||||
Subject: [PATCH 09/22] locking/rtmutex: Avoid include hell
|
||||
|
||||
Include only the required raw types. This avoids pulling in the
|
||||
complete spinlock header which in turn requires rtmutex.h at some point.
|
||||
@@ -0,0 +1,26 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Fri, 14 Aug 2020 16:55:25 +0200
|
||||
Subject: [PATCH 11/23] lockdep: Reduce header files in debug_locks.h
|
||||
|
||||
The inclusion of printk.h leads to circular dependency if spinlock_t is
|
||||
based on rt_mutex.
|
||||
|
||||
Include only atomic.h (xchg()) and cache.h (__read_mostly).
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/debug_locks.h | 3 +--
|
||||
1 file changed, 1 insertion(+), 2 deletions(-)
|
||||
|
||||
--- a/include/linux/debug_locks.h
|
||||
+++ b/include/linux/debug_locks.h
|
||||
@@ -3,8 +3,7 @@
|
||||
#define __LINUX_DEBUG_LOCKING_H
|
||||
|
||||
#include <linux/atomic.h>
|
||||
-#include <linux/bug.h>
|
||||
-#include <linux/printk.h>
|
||||
+#include <linux/cache.h>
|
||||
|
||||
struct task_struct;
|
||||
|
||||
@@ -0,0 +1,108 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Fri, 14 Aug 2020 17:08:41 +0200
|
||||
Subject: [PATCH 11/22] locking: split out the rbtree definition
|
||||
|
||||
rtmutex.h needs the definition for rb_root_cached. By including kernel.h
|
||||
we will get to spinlock.h which requires rtmutex.h again.
|
||||
|
||||
Split out the required struct definition and move it into its own header
|
||||
file which can be included by rtmutex.h
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/rbtree.h | 27 +--------------------------
|
||||
include/linux/rbtree_type.h | 31 +++++++++++++++++++++++++++++++
|
||||
include/linux/rtmutex.h | 2 +-
|
||||
3 files changed, 33 insertions(+), 27 deletions(-)
|
||||
create mode 100644 include/linux/rbtree_type.h
|
||||
|
||||
--- a/include/linux/rbtree.h
|
||||
+++ b/include/linux/rbtree.h
|
||||
@@ -19,19 +19,9 @@
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/stddef.h>
|
||||
+#include <linux/rbtree_type.h>
|
||||
#include <linux/rcupdate.h>
|
||||
|
||||
-struct rb_node {
|
||||
- unsigned long __rb_parent_color;
|
||||
- struct rb_node *rb_right;
|
||||
- struct rb_node *rb_left;
|
||||
-} __attribute__((aligned(sizeof(long))));
|
||||
- /* The alignment might seem pointless, but allegedly CRIS needs it */
|
||||
-
|
||||
-struct rb_root {
|
||||
- struct rb_node *rb_node;
|
||||
-};
|
||||
-
|
||||
#define rb_parent(r) ((struct rb_node *)((r)->__rb_parent_color & ~3))
|
||||
|
||||
#define RB_ROOT (struct rb_root) { NULL, }
|
||||
@@ -112,21 +102,6 @@ static inline void rb_link_node_rcu(stru
|
||||
typeof(*pos), field); 1; }); \
|
||||
pos = n)
|
||||
|
||||
-/*
|
||||
- * Leftmost-cached rbtrees.
|
||||
- *
|
||||
- * We do not cache the rightmost node based on footprint
|
||||
- * size vs number of potential users that could benefit
|
||||
- * from O(1) rb_last(). Just not worth it, users that want
|
||||
- * this feature can always implement the logic explicitly.
|
||||
- * Furthermore, users that want to cache both pointers may
|
||||
- * find it a bit asymmetric, but that's ok.
|
||||
- */
|
||||
-struct rb_root_cached {
|
||||
- struct rb_root rb_root;
|
||||
- struct rb_node *rb_leftmost;
|
||||
-};
|
||||
-
|
||||
#define RB_ROOT_CACHED (struct rb_root_cached) { {NULL, }, NULL }
|
||||
|
||||
/* Same as rb_first(), but O(1) */
|
||||
--- /dev/null
|
||||
+++ b/include/linux/rbtree_type.h
|
||||
@@ -0,0 +1,31 @@
|
||||
+/* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
+#ifndef _LINUX_RBTREE_TYPE_H
|
||||
+#define _LINUX_RBTREE_TYPE_H
|
||||
+
|
||||
+struct rb_node {
|
||||
+ unsigned long __rb_parent_color;
|
||||
+ struct rb_node *rb_right;
|
||||
+ struct rb_node *rb_left;
|
||||
+} __attribute__((aligned(sizeof(long))));
|
||||
+/* The alignment might seem pointless, but allegedly CRIS needs it */
|
||||
+
|
||||
+struct rb_root {
|
||||
+ struct rb_node *rb_node;
|
||||
+};
|
||||
+
|
||||
+/*
|
||||
+ * Leftmost-cached rbtrees.
|
||||
+ *
|
||||
+ * We do not cache the rightmost node based on footprint
|
||||
+ * size vs number of potential users that could benefit
|
||||
+ * from O(1) rb_last(). Just not worth it, users that want
|
||||
+ * this feature can always implement the logic explicitly.
|
||||
+ * Furthermore, users that want to cache both pointers may
|
||||
+ * find it a bit asymmetric, but that's ok.
|
||||
+ */
|
||||
+struct rb_root_cached {
|
||||
+ struct rb_root rb_root;
|
||||
+ struct rb_node *rb_leftmost;
|
||||
+};
|
||||
+
|
||||
+#endif
|
||||
--- a/include/linux/rtmutex.h
|
||||
+++ b/include/linux/rtmutex.h
|
||||
@@ -14,7 +14,7 @@
|
||||
#define __LINUX_RT_MUTEX_H
|
||||
|
||||
#include <linux/linkage.h>
|
||||
-#include <linux/rbtree.h>
|
||||
+#include <linux/rbtree_type.h>
|
||||
#include <linux/spinlock_types_raw.h>
|
||||
|
||||
extern int max_lock_depth; /* for sysctl */
|
||||
@@ -1,6 +1,6 @@
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Thu, 12 Oct 2017 16:14:22 +0200
|
||||
Subject: rtmutex: Provide rt_mutex_slowlock_locked()
|
||||
Subject: [PATCH 12/22] locking/rtmutex: Provide rt_mutex_slowlock_locked()
|
||||
|
||||
This is the inner-part of rt_mutex_slowlock(), required for rwsem-rt.
|
||||
|
||||
@@ -13,7 +13,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
|
||||
--- a/kernel/locking/rtmutex.c
|
||||
+++ b/kernel/locking/rtmutex.c
|
||||
@@ -1243,35 +1243,16 @@ static void rt_mutex_handle_deadlock(int
|
||||
@@ -1234,35 +1234,16 @@ static void rt_mutex_handle_deadlock(int
|
||||
}
|
||||
}
|
||||
|
||||
@@ -55,7 +55,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
|
||||
set_current_state(state);
|
||||
|
||||
@@ -1279,16 +1260,16 @@ rt_mutex_slowlock(struct rt_mutex *lock,
|
||||
@@ -1270,16 +1251,16 @@ rt_mutex_slowlock(struct rt_mutex *lock,
|
||||
if (unlikely(timeout))
|
||||
hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS);
|
||||
|
||||
@@ -76,7 +76,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1296,6 +1277,34 @@ rt_mutex_slowlock(struct rt_mutex *lock,
|
||||
@@ -1287,6 +1268,34 @@ rt_mutex_slowlock(struct rt_mutex *lock,
|
||||
* unconditionally. We might have to fix that up.
|
||||
*/
|
||||
fixup_rt_mutex_waiters(lock);
|
||||
@@ -121,7 +121,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
|
||||
/*
|
||||
* This is the control structure for tasks blocked on a rt_mutex,
|
||||
@@ -159,6 +160,12 @@ extern bool __rt_mutex_futex_unlock(stru
|
||||
@@ -153,6 +154,12 @@ extern bool __rt_mutex_futex_unlock(stru
|
||||
struct wake_q_head *wqh);
|
||||
|
||||
extern void rt_mutex_postunlock(struct wake_q_head *wake_q);
|
||||
@@ -1,20 +1,20 @@
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Thu, 12 Oct 2017 16:36:39 +0200
|
||||
Subject: rtmutex: export lockdep-less version of rt_mutex's lock,
|
||||
trylock and unlock
|
||||
Subject: [PATCH 13/22] locking/rtmutex: export lockdep-less version of
|
||||
rt_mutex's lock, trylock and unlock
|
||||
|
||||
Required for lock implementation ontop of rtmutex.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
kernel/locking/rtmutex.c | 59 ++++++++++++++++++++++++++--------------
|
||||
kernel/locking/rtmutex.c | 54 ++++++++++++++++++++++++++++------------
|
||||
kernel/locking/rtmutex_common.h | 3 ++
|
||||
2 files changed, 42 insertions(+), 20 deletions(-)
|
||||
2 files changed, 41 insertions(+), 16 deletions(-)
|
||||
|
||||
--- a/kernel/locking/rtmutex.c
|
||||
+++ b/kernel/locking/rtmutex.c
|
||||
@@ -1493,12 +1493,33 @@ rt_mutex_fastunlock(struct rt_mutex *loc
|
||||
@@ -1469,12 +1469,33 @@ rt_mutex_fastunlock(struct rt_mutex *loc
|
||||
rt_mutex_postunlock(&wake_q);
|
||||
}
|
||||
|
||||
@@ -40,7 +40,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
- rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, rt_mutex_slowlock);
|
||||
+ ret = __rt_mutex_lock_state(lock, state);
|
||||
+ if (ret)
|
||||
+ mutex_release(&lock->dep_map, 1, _RET_IP_);
|
||||
+ mutex_release(&lock->dep_map, _RET_IP_);
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
@@ -50,7 +50,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
}
|
||||
|
||||
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
||||
@@ -1539,16 +1560,7 @@ EXPORT_SYMBOL_GPL(rt_mutex_lock);
|
||||
@@ -1515,16 +1536,7 @@ EXPORT_SYMBOL_GPL(rt_mutex_lock);
|
||||
*/
|
||||
int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock)
|
||||
{
|
||||
@@ -61,31 +61,16 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
- mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
|
||||
- ret = rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE, rt_mutex_slowlock);
|
||||
- if (ret)
|
||||
- mutex_release(&lock->dep_map, 1, _RET_IP_);
|
||||
- mutex_release(&lock->dep_map, _RET_IP_);
|
||||
-
|
||||
- return ret;
|
||||
+ return rt_mutex_lock_state(lock, 0, TASK_INTERRUPTIBLE);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible);
|
||||
|
||||
@@ -1574,13 +1586,10 @@ int __sched __rt_mutex_futex_trylock(str
|
||||
* Returns:
|
||||
* 0 on success
|
||||
* -EINTR when interrupted by a signal
|
||||
- * -EDEADLK when the lock would deadlock (when deadlock detection is on)
|
||||
*/
|
||||
int __sched rt_mutex_lock_killable(struct rt_mutex *lock)
|
||||
{
|
||||
- might_sleep();
|
||||
-
|
||||
- return rt_mutex_fastlock(lock, TASK_KILLABLE, rt_mutex_slowlock);
|
||||
+ return rt_mutex_lock_state(lock, 0, TASK_KILLABLE);
|
||||
@@ -1541,6 +1553,14 @@ int __sched __rt_mutex_futex_trylock(str
|
||||
return __rt_mutex_slowtrylock(lock);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rt_mutex_lock_killable);
|
||||
|
||||
@@ -1615,6 +1624,14 @@ rt_mutex_timed_lock(struct rt_mutex *loc
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rt_mutex_timed_lock);
|
||||
|
||||
+int __sched __rt_mutex_trylock(struct rt_mutex *lock)
|
||||
+{
|
||||
@@ -98,7 +83,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
/**
|
||||
* rt_mutex_trylock - try to lock a rt_mutex
|
||||
*
|
||||
@@ -1630,10 +1647,7 @@ int __sched rt_mutex_trylock(struct rt_m
|
||||
@@ -1556,10 +1576,7 @@ int __sched rt_mutex_trylock(struct rt_m
|
||||
{
|
||||
int ret;
|
||||
|
||||
@@ -110,7 +95,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
if (ret)
|
||||
mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_);
|
||||
|
||||
@@ -1641,6 +1655,11 @@ int __sched rt_mutex_trylock(struct rt_m
|
||||
@@ -1567,6 +1584,11 @@ int __sched rt_mutex_trylock(struct rt_m
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rt_mutex_trylock);
|
||||
|
||||
@@ -124,7 +109,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
*
|
||||
--- a/kernel/locking/rtmutex_common.h
|
||||
+++ b/kernel/locking/rtmutex_common.h
|
||||
@@ -162,6 +162,9 @@ extern bool __rt_mutex_futex_unlock(stru
|
||||
@@ -156,6 +156,9 @@ extern bool __rt_mutex_futex_unlock(stru
|
||||
extern void rt_mutex_postunlock(struct wake_q_head *wake_q);
|
||||
/* RW semaphore special interface */
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
Subject: sched: Add saved_state for tasks blocked on sleeping locks
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Sat, 25 Jun 2011 09:21:04 +0200
|
||||
Subject: [PATCH 14/22] sched: Add saved_state for tasks blocked on sleeping
|
||||
locks
|
||||
|
||||
Spinlocks are state preserving in !RT. RT changes the state when a
|
||||
task gets blocked on a lock. So we need to remember the state before
|
||||
@@ -11,13 +12,13 @@ sleep is done, the saved state is restored.
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
include/linux/sched.h | 3 +++
|
||||
kernel/sched/core.c | 42 +++++++++++++++++++++++++++++++++++++++---
|
||||
kernel/sched/core.c | 34 ++++++++++++++++++++++++++++++++--
|
||||
kernel/sched/sched.h | 1 +
|
||||
3 files changed, 43 insertions(+), 3 deletions(-)
|
||||
3 files changed, 36 insertions(+), 2 deletions(-)
|
||||
|
||||
--- a/include/linux/sched.h
|
||||
+++ b/include/linux/sched.h
|
||||
@@ -631,6 +631,8 @@ struct task_struct {
|
||||
@@ -655,6 +655,8 @@ struct task_struct {
|
||||
#endif
|
||||
/* -1 unrunnable, 0 runnable, >0 stopped: */
|
||||
volatile long state;
|
||||
@@ -26,7 +27,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
|
||||
/*
|
||||
* This begins the randomizable portion of task_struct. Only
|
||||
@@ -1679,6 +1681,7 @@ extern struct task_struct *find_get_task
|
||||
@@ -1777,6 +1779,7 @@ extern struct task_struct *find_get_task
|
||||
|
||||
extern int wake_up_state(struct task_struct *tsk, unsigned int state);
|
||||
extern int wake_up_process(struct task_struct *tsk);
|
||||
@@ -36,30 +37,20 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
#ifdef CONFIG_SMP
|
||||
--- a/kernel/sched/core.c
|
||||
+++ b/kernel/sched/core.c
|
||||
@@ -2524,6 +2524,8 @@ try_to_wake_up(struct task_struct *p, un
|
||||
@@ -3316,7 +3316,7 @@ try_to_wake_up(struct task_struct *p, un
|
||||
int cpu, success = 0;
|
||||
|
||||
preempt_disable();
|
||||
+
|
||||
+#ifndef CONFIG_PREEMPT_RT
|
||||
if (p == current) {
|
||||
- if (p == current) {
|
||||
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT) && p == current) {
|
||||
/*
|
||||
* We're waking current, this means 'p->on_rq' and 'task_cpu(p)
|
||||
@@ -2546,7 +2548,7 @@ try_to_wake_up(struct task_struct *p, un
|
||||
trace_sched_wakeup(p);
|
||||
goto out;
|
||||
}
|
||||
-
|
||||
+#endif
|
||||
/*
|
||||
* If we are going to wake up a thread waiting for CONDITION we
|
||||
* need to ensure that CONDITION=1 done by the caller can not be
|
||||
@@ -2555,8 +2557,27 @@ try_to_wake_up(struct task_struct *p, un
|
||||
* == smp_processor_id()'. Together this means we can special
|
||||
@@ -3346,8 +3346,26 @@ try_to_wake_up(struct task_struct *p, un
|
||||
*/
|
||||
raw_spin_lock_irqsave(&p->pi_lock, flags);
|
||||
smp_mb__after_spinlock();
|
||||
- if (!(p->state & state))
|
||||
- goto unlock;
|
||||
+ if (!(p->state & state)) {
|
||||
+ /*
|
||||
+ * The task might be running due to a spinlock sleeper
|
||||
@@ -72,8 +63,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
+ success = 1;
|
||||
+ }
|
||||
+ }
|
||||
+ raw_spin_unlock_irqrestore(&p->pi_lock, flags);
|
||||
+ goto out_nostat;
|
||||
goto unlock;
|
||||
+ }
|
||||
+ /*
|
||||
+ * If this is a regular wakeup, then we can unconditionally
|
||||
@@ -84,20 +74,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
|
||||
trace_sched_waking(p);
|
||||
|
||||
@@ -2648,9 +2669,12 @@ try_to_wake_up(struct task_struct *p, un
|
||||
ttwu_queue(p, cpu, wake_flags);
|
||||
unlock:
|
||||
raw_spin_unlock_irqrestore(&p->pi_lock, flags);
|
||||
+#ifndef CONFIG_PREEMPT_RT
|
||||
out:
|
||||
+#endif
|
||||
if (success)
|
||||
ttwu_stat(p, cpu, wake_flags);
|
||||
+out_nostat:
|
||||
preempt_enable();
|
||||
|
||||
return success;
|
||||
@@ -2673,6 +2697,18 @@ int wake_up_process(struct task_struct *
|
||||
@@ -3536,6 +3554,18 @@ int wake_up_process(struct task_struct *
|
||||
}
|
||||
EXPORT_SYMBOL(wake_up_process);
|
||||
|
||||
@@ -118,11 +95,11 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
return try_to_wake_up(p, state, 0);
|
||||
--- a/kernel/sched/sched.h
|
||||
+++ b/kernel/sched/sched.h
|
||||
@@ -1644,6 +1644,7 @@ static inline int task_on_rq_migrating(s
|
||||
#define WF_SYNC 0x01 /* Waker goes to sleep after wakeup */
|
||||
#define WF_FORK 0x02 /* Child wakeup after fork */
|
||||
#define WF_MIGRATED 0x4 /* Internal use, task got migrated */
|
||||
+#define WF_LOCK_SLEEPER 0x08 /* wakeup spinlock "sleeper" */
|
||||
@@ -1751,6 +1751,7 @@ static inline int task_on_rq_migrating(s
|
||||
#define WF_SYNC 0x10 /* Waker goes to sleep after wakeup */
|
||||
#define WF_MIGRATED 0x20 /* Internal use, task got migrated */
|
||||
#define WF_ON_CPU 0x40 /* Wakee is on_cpu */
|
||||
+#define WF_LOCK_SLEEPER 0x80 /* Wakeup spinlock "sleeper" */
|
||||
|
||||
/*
|
||||
* To aid in avoiding the subversion of "niceness" due to uneven distribution
|
||||
#ifdef CONFIG_SMP
|
||||
static_assert(WF_EXEC == SD_BALANCE_EXEC);
|
||||
@@ -1,28 +1,29 @@
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Thu, 12 Oct 2017 17:11:19 +0200
|
||||
Subject: rtmutex: add sleeping lock implementation
|
||||
Subject: [PATCH 15/22] locking/rtmutex: add sleeping lock implementation
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/kernel.h | 5
|
||||
include/linux/rtmutex.h | 21 +
|
||||
include/linux/sched.h | 8
|
||||
include/linux/preempt.h | 4
|
||||
include/linux/rtmutex.h | 19 +
|
||||
include/linux/sched.h | 7
|
||||
include/linux/sched/wake_q.h | 13 +
|
||||
include/linux/spinlock_rt.h | 156 +++++++++++++
|
||||
include/linux/spinlock_types_rt.h | 48 ++++
|
||||
include/linux/spinlock_rt.h | 155 +++++++++++++
|
||||
include/linux/spinlock_types_rt.h | 38 +++
|
||||
kernel/fork.c | 1
|
||||
kernel/futex.c | 11
|
||||
kernel/locking/rtmutex.c | 436 ++++++++++++++++++++++++++++++++++----
|
||||
kernel/futex.c | 10
|
||||
kernel/locking/rtmutex.c | 444 ++++++++++++++++++++++++++++++++++----
|
||||
kernel/locking/rtmutex_common.h | 14 -
|
||||
kernel/sched/core.c | 39 ++-
|
||||
11 files changed, 694 insertions(+), 58 deletions(-)
|
||||
12 files changed, 694 insertions(+), 55 deletions(-)
|
||||
create mode 100644 include/linux/spinlock_rt.h
|
||||
create mode 100644 include/linux/spinlock_types_rt.h
|
||||
|
||||
--- a/include/linux/kernel.h
|
||||
+++ b/include/linux/kernel.h
|
||||
@@ -227,6 +227,10 @@ extern void __cant_sleep(const char *fil
|
||||
@@ -107,6 +107,10 @@ extern void __cant_migrate(const char *f
|
||||
*/
|
||||
# define might_sleep() \
|
||||
do { __might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0)
|
||||
@@ -33,23 +34,31 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
/**
|
||||
* cant_sleep - annotation for functions that cannot sleep
|
||||
*
|
||||
@@ -258,6 +262,7 @@ extern void __cant_sleep(const char *fil
|
||||
@@ -150,6 +154,7 @@ extern void __cant_migrate(const char *f
|
||||
static inline void __might_sleep(const char *file, int line,
|
||||
int preempt_offset) { }
|
||||
# define might_sleep() do { might_resched(); } while (0)
|
||||
+# define might_sleep_no_state_check() do { might_resched(); } while (0)
|
||||
# define cant_sleep() do { } while (0)
|
||||
# define cant_migrate() do { } while (0)
|
||||
# define sched_annotate_sleep() do { } while (0)
|
||||
# define non_block_start() do { } while (0)
|
||||
--- a/include/linux/preempt.h
|
||||
+++ b/include/linux/preempt.h
|
||||
@@ -121,7 +121,11 @@
|
||||
/*
|
||||
* The preempt_count offset after spin_lock()
|
||||
*/
|
||||
+#if !defined(CONFIG_PREEMPT_RT)
|
||||
#define PREEMPT_LOCK_OFFSET PREEMPT_DISABLE_OFFSET
|
||||
+#else
|
||||
+#define PREEMPT_LOCK_OFFSET 0
|
||||
+#endif
|
||||
|
||||
/*
|
||||
* The preempt_count offset needed for things like:
|
||||
--- a/include/linux/rtmutex.h
|
||||
+++ b/include/linux/rtmutex.h
|
||||
@@ -14,11 +14,15 @@
|
||||
#define __LINUX_RT_MUTEX_H
|
||||
|
||||
#include <linux/linkage.h>
|
||||
-#include <linux/rbtree.h>
|
||||
#include <linux/spinlock_types_raw.h>
|
||||
+#include <linux/rbtree.h>
|
||||
@@ -19,6 +19,10 @@
|
||||
|
||||
extern int max_lock_depth; /* for sysctl */
|
||||
|
||||
@@ -60,46 +69,40 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
/**
|
||||
* The rt_mutex structure
|
||||
*
|
||||
@@ -31,8 +35,8 @@ struct rt_mutex {
|
||||
@@ -31,6 +35,7 @@ struct rt_mutex {
|
||||
raw_spinlock_t wait_lock;
|
||||
struct rb_root_cached waiters;
|
||||
struct task_struct *owner;
|
||||
-#ifdef CONFIG_DEBUG_RT_MUTEXES
|
||||
int save_state;
|
||||
+#ifdef CONFIG_DEBUG_RT_MUTEXES
|
||||
const char *name, *file;
|
||||
int line;
|
||||
void *magic;
|
||||
@@ -82,16 +86,23 @@ do { \
|
||||
+ int save_state;
|
||||
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
||||
struct lockdep_map dep_map;
|
||||
#endif
|
||||
@@ -67,11 +72,19 @@ do { \
|
||||
#define __DEP_MAP_RT_MUTEX_INITIALIZER(mutexname)
|
||||
#endif
|
||||
|
||||
-#define __RT_MUTEX_INITIALIZER(mutexname) \
|
||||
- { .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \
|
||||
+#define __RT_MUTEX_INITIALIZER_PLAIN(mutexname) \
|
||||
+ .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \
|
||||
+ .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \
|
||||
, .waiters = RB_ROOT_CACHED \
|
||||
, .owner = NULL \
|
||||
__DEBUG_RT_MUTEX_INITIALIZER(mutexname) \
|
||||
- __DEP_MAP_RT_MUTEX_INITIALIZER(mutexname)}
|
||||
+ __DEP_MAP_RT_MUTEX_INITIALIZER(mutexname)
|
||||
+
|
||||
+#define __RT_MUTEX_INITIALIZER(mutexname) \
|
||||
+ { __RT_MUTEX_INITIALIZER_PLAIN(mutexname) }
|
||||
+ { __RT_MUTEX_INITIALIZER_PLAIN(mutexname) \
|
||||
+ , .save_state = 0 }
|
||||
+
|
||||
+#define __RT_MUTEX_INITIALIZER_SAVE_STATE(mutexname) \
|
||||
+ { __RT_MUTEX_INITIALIZER_PLAIN(mutexname) \
|
||||
+ , .save_state = 1 }
|
||||
|
||||
#define DEFINE_RT_MUTEX(mutexname) \
|
||||
struct rt_mutex mutexname = __RT_MUTEX_INITIALIZER(mutexname)
|
||||
|
||||
+#define __RT_MUTEX_INITIALIZER_SAVE_STATE(mutexname) \
|
||||
+ { __RT_MUTEX_INITIALIZER_PLAIN(mutexname) \
|
||||
+ , .save_state = 1 }
|
||||
+
|
||||
/**
|
||||
* rt_mutex_is_locked - is the mutex locked
|
||||
* @lock: the mutex to be queried
|
||||
--- a/include/linux/sched.h
|
||||
+++ b/include/linux/sched.h
|
||||
@@ -140,6 +140,9 @@ struct task_group;
|
||||
@@ -141,6 +141,9 @@ struct io_uring_task;
|
||||
smp_store_mb(current->state, (state_value)); \
|
||||
} while (0)
|
||||
|
||||
@@ -109,15 +112,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
#define set_special_state(state_value) \
|
||||
do { \
|
||||
unsigned long flags; /* may shadow */ \
|
||||
@@ -149,6 +152,7 @@ struct task_group;
|
||||
current->state = (state_value); \
|
||||
raw_spin_unlock_irqrestore(¤t->pi_lock, flags); \
|
||||
} while (0)
|
||||
+
|
||||
#else
|
||||
/*
|
||||
* set_current_state() includes a barrier so that the write of current->state
|
||||
@@ -193,6 +197,9 @@ struct task_group;
|
||||
@@ -194,6 +197,9 @@ struct io_uring_task;
|
||||
#define set_current_state(state_value) \
|
||||
smp_store_mb(current->state, (state_value))
|
||||
|
||||
@@ -127,7 +122,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
/*
|
||||
* set_special_state() should be used for those states when the blocking task
|
||||
* can not use the regular condition based wait-loop. In that case we must
|
||||
@@ -950,6 +957,7 @@ struct task_struct {
|
||||
@@ -1015,6 +1021,7 @@ struct task_struct {
|
||||
raw_spinlock_t pi_lock;
|
||||
|
||||
struct wake_q_node wake_q;
|
||||
@@ -158,7 +153,8 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
#endif /* _LINUX_SCHED_WAKE_Q_H */
|
||||
--- /dev/null
|
||||
+++ b/include/linux/spinlock_rt.h
|
||||
@@ -0,0 +1,156 @@
|
||||
@@ -0,0 +1,155 @@
|
||||
+// SPDX-License-Identifier: GPL-2.0-only
|
||||
+#ifndef __LINUX_SPINLOCK_RT_H
|
||||
+#define __LINUX_SPINLOCK_RT_H
|
||||
+
|
||||
@@ -180,10 +176,10 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
+} while (0)
|
||||
+
|
||||
+extern void __lockfunc rt_spin_lock(spinlock_t *lock);
|
||||
+extern unsigned long __lockfunc rt_spin_lock_trace_flags(spinlock_t *lock);
|
||||
+extern void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass);
|
||||
+extern void __lockfunc rt_spin_lock_nest_lock(spinlock_t *lock, struct lockdep_map *nest_lock);
|
||||
+extern void __lockfunc rt_spin_unlock(spinlock_t *lock);
|
||||
+extern void __lockfunc rt_spin_unlock_wait(spinlock_t *lock);
|
||||
+extern void __lockfunc rt_spin_lock_unlock(spinlock_t *lock);
|
||||
+extern int __lockfunc rt_spin_trylock_irqsave(spinlock_t *lock, unsigned long *flags);
|
||||
+extern int __lockfunc rt_spin_trylock_bh(spinlock_t *lock);
|
||||
+extern int __lockfunc rt_spin_trylock(spinlock_t *lock);
|
||||
@@ -229,6 +225,12 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
+ rt_spin_lock_nested(lock, subclass); \
|
||||
+ } while (0)
|
||||
+
|
||||
+# define spin_lock_nest_lock(lock, subclass) \
|
||||
+ do { \
|
||||
+ typecheck(struct lockdep_map *, &(subclass)->dep_map); \
|
||||
+ rt_spin_lock_nest_lock(lock, &(subclass)->dep_map); \
|
||||
+ } while (0)
|
||||
+
|
||||
+# define spin_lock_irqsave_nested(lock, flags, subclass) \
|
||||
+ do { \
|
||||
+ typecheck(unsigned long, flags); \
|
||||
@@ -236,14 +238,15 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
+ rt_spin_lock_nested(lock, subclass); \
|
||||
+ } while (0)
|
||||
+#else
|
||||
+# define spin_lock_nested(lock, subclass) spin_lock(lock)
|
||||
+# define spin_lock_bh_nested(lock, subclass) spin_lock_bh(lock)
|
||||
+# define spin_lock_nested(lock, subclass) spin_lock(((void)(subclass), (lock)))
|
||||
+# define spin_lock_nest_lock(lock, subclass) spin_lock(((void)(subclass), (lock)))
|
||||
+# define spin_lock_bh_nested(lock, subclass) spin_lock_bh(((void)(subclass), (lock)))
|
||||
+
|
||||
+# define spin_lock_irqsave_nested(lock, flags, subclass) \
|
||||
+ do { \
|
||||
+ typecheck(unsigned long, flags); \
|
||||
+ flags = 0; \
|
||||
+ spin_lock(lock); \
|
||||
+ spin_lock(((void)(subclass), (lock))); \
|
||||
+ } while (0)
|
||||
+#endif
|
||||
+
|
||||
@@ -254,20 +257,6 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
+ spin_lock(lock); \
|
||||
+ } while (0)
|
||||
+
|
||||
+static inline unsigned long spin_lock_trace_flags(spinlock_t *lock)
|
||||
+{
|
||||
+ unsigned long flags = 0;
|
||||
+#ifdef CONFIG_TRACE_IRQFLAGS
|
||||
+ flags = rt_spin_lock_trace_flags(lock);
|
||||
+#else
|
||||
+ spin_lock(lock); /* lock_local */
|
||||
+#endif
|
||||
+ return flags;
|
||||
+}
|
||||
+
|
||||
+/* FIXME: we need rt_spin_lock_nest_lock */
|
||||
+#define spin_lock_nest_lock(lock, nest_lock) spin_lock_nested(lock, 0)
|
||||
+
|
||||
+#define spin_unlock(lock) rt_spin_unlock(lock)
|
||||
+
|
||||
+#define spin_unlock_bh(lock) \
|
||||
@@ -288,10 +277,15 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
+#define spin_trylock_bh(lock) __cond_lock(lock, rt_spin_trylock_bh(lock))
|
||||
+#define spin_trylock_irq(lock) spin_trylock(lock)
|
||||
+
|
||||
+#define spin_trylock_irqsave(lock, flags) \
|
||||
+ rt_spin_trylock_irqsave(lock, &(flags))
|
||||
+
|
||||
+#define spin_unlock_wait(lock) rt_spin_unlock_wait(lock)
|
||||
+#define spin_trylock_irqsave(lock, flags) \
|
||||
+({ \
|
||||
+ int __locked; \
|
||||
+ \
|
||||
+ typecheck(unsigned long, flags); \
|
||||
+ flags = 0; \
|
||||
+ __locked = spin_trylock(lock); \
|
||||
+ __locked; \
|
||||
+})
|
||||
+
|
||||
+#ifdef CONFIG_GENERIC_LOCKBREAK
|
||||
+# define spin_is_contended(lock) ((lock)->break_lock)
|
||||
@@ -317,7 +311,8 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
+#endif
|
||||
--- /dev/null
|
||||
+++ b/include/linux/spinlock_types_rt.h
|
||||
@@ -0,0 +1,48 @@
|
||||
@@ -0,0 +1,38 @@
|
||||
+// SPDX-License-Identifier: GPL-2.0-only
|
||||
+#ifndef __LINUX_SPINLOCK_TYPES_RT_H
|
||||
+#define __LINUX_SPINLOCK_TYPES_RT_H
|
||||
+
|
||||
@@ -338,22 +333,11 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
+#endif
|
||||
+} spinlock_t;
|
||||
+
|
||||
+#ifdef CONFIG_DEBUG_RT_MUTEXES
|
||||
+# define __RT_SPIN_INITIALIZER(name) \
|
||||
+#define __RT_SPIN_INITIALIZER(name) \
|
||||
+ { \
|
||||
+ .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock), \
|
||||
+ .save_state = 1, \
|
||||
+ .file = __FILE__, \
|
||||
+ .line = __LINE__ , \
|
||||
+ }
|
||||
+#else
|
||||
+# define __RT_SPIN_INITIALIZER(name) \
|
||||
+ { \
|
||||
+ .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock), \
|
||||
+ .save_state = 1, \
|
||||
+ }
|
||||
+#endif
|
||||
+
|
||||
+/*
|
||||
+.wait_list = PLIST_HEAD_INIT_RAW((name).lock.wait_list, (name).lock.wait_lock)
|
||||
+*/
|
||||
@@ -368,7 +352,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
+#endif
|
||||
--- a/kernel/fork.c
|
||||
+++ b/kernel/fork.c
|
||||
@@ -950,6 +950,7 @@ static struct task_struct *dup_task_stru
|
||||
@@ -927,6 +927,7 @@ static struct task_struct *dup_task_stru
|
||||
tsk->splice_pipe = NULL;
|
||||
tsk->task_frag.page = NULL;
|
||||
tsk->wake_q.next = NULL;
|
||||
@@ -378,7 +362,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
|
||||
--- a/kernel/futex.c
|
||||
+++ b/kernel/futex.c
|
||||
@@ -1573,6 +1573,7 @@ static int wake_futex_pi(u32 __user *uad
|
||||
@@ -1497,6 +1497,7 @@ static int wake_futex_pi(u32 __user *uad
|
||||
struct task_struct *new_owner;
|
||||
bool postunlock = false;
|
||||
DEFINE_WAKE_Q(wake_q);
|
||||
@@ -386,14 +370,15 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
int ret = 0;
|
||||
|
||||
new_owner = rt_mutex_next_owner(&pi_state->pi_mutex);
|
||||
@@ -1632,13 +1633,13 @@ static int wake_futex_pi(u32 __user *uad
|
||||
pi_state->owner = new_owner;
|
||||
raw_spin_unlock(&new_owner->pi_lock);
|
||||
@@ -1546,14 +1547,15 @@ static int wake_futex_pi(u32 __user *uad
|
||||
* not fail.
|
||||
*/
|
||||
pi_state_update_owner(pi_state, new_owner);
|
||||
- postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q);
|
||||
+ postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q,
|
||||
+ &wake_sleeper_q);
|
||||
}
|
||||
|
||||
- postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q);
|
||||
-
|
||||
+ postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q,
|
||||
+ &wake_sleeper_q);
|
||||
out_unlock:
|
||||
raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
|
||||
|
||||
@@ -403,7 +388,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -2980,7 +2981,7 @@ static int futex_lock_pi(u32 __user *uad
|
||||
@@ -2857,7 +2859,7 @@ static int futex_lock_pi(u32 __user *uad
|
||||
goto no_block;
|
||||
}
|
||||
|
||||
@@ -412,7 +397,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
|
||||
/*
|
||||
* On PREEMPT_RT_FULL, when hb->lock becomes an rt_mutex, we must not
|
||||
@@ -3348,7 +3349,7 @@ static int futex_wait_requeue_pi(u32 __u
|
||||
@@ -3203,7 +3205,7 @@ static int futex_wait_requeue_pi(u32 __u
|
||||
* The waiter is allocated on our stack, manipulated by the requeue
|
||||
* code while we sleep on uaddr.
|
||||
*/
|
||||
@@ -435,7 +420,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
*
|
||||
* See Documentation/locking/rt-mutex-design.rst for details.
|
||||
*/
|
||||
@@ -235,7 +240,7 @@ static inline bool unlock_rt_mutex_safe(
|
||||
@@ -233,7 +238,7 @@ static inline bool unlock_rt_mutex_safe(
|
||||
* Only use with rt_mutex_waiter_{less,equal}()
|
||||
*/
|
||||
#define task_to_waiter(p) \
|
||||
@@ -444,7 +429,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
|
||||
static inline int
|
||||
rt_mutex_waiter_less(struct rt_mutex_waiter *left,
|
||||
@@ -275,6 +280,27 @@ rt_mutex_waiter_equal(struct rt_mutex_wa
|
||||
@@ -273,6 +278,27 @@ rt_mutex_waiter_equal(struct rt_mutex_wa
|
||||
return 1;
|
||||
}
|
||||
|
||||
@@ -472,7 +457,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
static void
|
||||
rt_mutex_enqueue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter)
|
||||
{
|
||||
@@ -379,6 +405,14 @@ static bool rt_mutex_cond_detect_deadloc
|
||||
@@ -377,6 +403,14 @@ static bool rt_mutex_cond_detect_deadloc
|
||||
return debug_rt_mutex_detect_deadlock(waiter, chwalk);
|
||||
}
|
||||
|
||||
@@ -487,7 +472,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
/*
|
||||
* Max number of times we'll walk the boosting chain:
|
||||
*/
|
||||
@@ -703,13 +737,16 @@ static int rt_mutex_adjust_prio_chain(st
|
||||
@@ -700,13 +734,16 @@ static int rt_mutex_adjust_prio_chain(st
|
||||
* follow here. This is the end of the chain we are walking.
|
||||
*/
|
||||
if (!rt_mutex_owner(lock)) {
|
||||
@@ -506,7 +491,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
raw_spin_unlock_irq(&lock->wait_lock);
|
||||
return 0;
|
||||
}
|
||||
@@ -810,9 +847,11 @@ static int rt_mutex_adjust_prio_chain(st
|
||||
@@ -807,9 +844,11 @@ static int rt_mutex_adjust_prio_chain(st
|
||||
* @task: The task which wants to acquire the lock
|
||||
* @waiter: The waiter that is queued to the lock's wait tree if the
|
||||
* callsite called task_blocked_on_lock(), otherwise NULL
|
||||
@@ -520,7 +505,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
{
|
||||
lockdep_assert_held(&lock->wait_lock);
|
||||
|
||||
@@ -848,12 +887,11 @@ static int try_to_take_rt_mutex(struct r
|
||||
@@ -845,12 +884,11 @@ static int try_to_take_rt_mutex(struct r
|
||||
*/
|
||||
if (waiter) {
|
||||
/*
|
||||
@@ -536,7 +521,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
/*
|
||||
* We can acquire the lock. Remove the waiter from the
|
||||
* lock waiters tree.
|
||||
@@ -871,14 +909,12 @@ static int try_to_take_rt_mutex(struct r
|
||||
@@ -868,14 +906,12 @@ static int try_to_take_rt_mutex(struct r
|
||||
*/
|
||||
if (rt_mutex_has_waiters(lock)) {
|
||||
/*
|
||||
@@ -555,7 +540,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
/*
|
||||
* The current top waiter stays enqueued. We
|
||||
* don't have to change anything in the lock
|
||||
@@ -925,6 +961,296 @@ static int try_to_take_rt_mutex(struct r
|
||||
@@ -922,6 +958,289 @@ static int try_to_take_rt_mutex(struct r
|
||||
return 1;
|
||||
}
|
||||
|
||||
@@ -666,8 +651,6 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
+
|
||||
+ raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
|
||||
+
|
||||
+ debug_rt_mutex_print_deadlock(waiter);
|
||||
+
|
||||
+ if (top_waiter != waiter || adaptive_wait(lock, lock_owner))
|
||||
+ schedule();
|
||||
+
|
||||
@@ -736,9 +719,9 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
+
|
||||
+void __lockfunc rt_spin_lock(spinlock_t *lock)
|
||||
+{
|
||||
+ migrate_disable();
|
||||
+ spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
|
||||
+ rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock);
|
||||
+ migrate_disable();
|
||||
+}
|
||||
+EXPORT_SYMBOL(rt_spin_lock);
|
||||
+
|
||||
@@ -750,19 +733,28 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
||||
+void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass)
|
||||
+{
|
||||
+ migrate_disable();
|
||||
+ spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
|
||||
+ rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock);
|
||||
+ migrate_disable();
|
||||
+}
|
||||
+EXPORT_SYMBOL(rt_spin_lock_nested);
|
||||
+
|
||||
+void __lockfunc rt_spin_lock_nest_lock(spinlock_t *lock,
|
||||
+ struct lockdep_map *nest_lock)
|
||||
+{
|
||||
+ spin_acquire_nest(&lock->dep_map, 0, 0, nest_lock, _RET_IP_);
|
||||
+ rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock);
|
||||
+ migrate_disable();
|
||||
+}
|
||||
+EXPORT_SYMBOL(rt_spin_lock_nest_lock);
|
||||
+#endif
|
||||
+
|
||||
+void __lockfunc rt_spin_unlock(spinlock_t *lock)
|
||||
+{
|
||||
+ /* NOTE: we always pass in '1' for nested, for simplicity */
|
||||
+ spin_release(&lock->dep_map, 1, _RET_IP_);
|
||||
+ rt_spin_lock_fastunlock(&lock->lock, rt_spin_lock_slowunlock);
|
||||
+ spin_release(&lock->dep_map, _RET_IP_);
|
||||
+ migrate_enable();
|
||||
+ rt_spin_lock_fastunlock(&lock->lock, rt_spin_lock_slowunlock);
|
||||
+}
|
||||
+EXPORT_SYMBOL(rt_spin_unlock);
|
||||
+
|
||||
@@ -777,23 +769,22 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
+ * (like raw spinlocks do), we lock and unlock, to force the kernel to
|
||||
+ * schedule if there's contention:
|
||||
+ */
|
||||
+void __lockfunc rt_spin_unlock_wait(spinlock_t *lock)
|
||||
+void __lockfunc rt_spin_lock_unlock(spinlock_t *lock)
|
||||
+{
|
||||
+ spin_lock(lock);
|
||||
+ spin_unlock(lock);
|
||||
+}
|
||||
+EXPORT_SYMBOL(rt_spin_unlock_wait);
|
||||
+EXPORT_SYMBOL(rt_spin_lock_unlock);
|
||||
+
|
||||
+int __lockfunc rt_spin_trylock(spinlock_t *lock)
|
||||
+{
|
||||
+ int ret;
|
||||
+
|
||||
+ migrate_disable();
|
||||
+ ret = __rt_mutex_trylock(&lock->lock);
|
||||
+ if (ret)
|
||||
+ if (ret) {
|
||||
+ spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
|
||||
+ else
|
||||
+ migrate_enable();
|
||||
+ migrate_disable();
|
||||
+ }
|
||||
+ return ret;
|
||||
+}
|
||||
+EXPORT_SYMBOL(rt_spin_trylock);
|
||||
@@ -805,27 +796,14 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
+ local_bh_disable();
|
||||
+ ret = __rt_mutex_trylock(&lock->lock);
|
||||
+ if (ret) {
|
||||
+ migrate_disable();
|
||||
+ spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
|
||||
+ } else
|
||||
+ migrate_disable();
|
||||
+ } else {
|
||||
+ local_bh_enable();
|
||||
+ return ret;
|
||||
+}
|
||||
+EXPORT_SYMBOL(rt_spin_trylock_bh);
|
||||
+
|
||||
+int __lockfunc rt_spin_trylock_irqsave(spinlock_t *lock, unsigned long *flags)
|
||||
+{
|
||||
+ int ret;
|
||||
+
|
||||
+ *flags = 0;
|
||||
+ ret = __rt_mutex_trylock(&lock->lock);
|
||||
+ if (ret) {
|
||||
+ migrate_disable();
|
||||
+ spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
|
||||
+ }
|
||||
+ return ret;
|
||||
+}
|
||||
+EXPORT_SYMBOL(rt_spin_trylock_irqsave);
|
||||
+EXPORT_SYMBOL(rt_spin_trylock_bh);
|
||||
+
|
||||
+void
|
||||
+__rt_spin_lock_init(spinlock_t *lock, const char *name, struct lock_class_key *key)
|
||||
@@ -852,7 +830,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
/*
|
||||
* Task blocks on lock.
|
||||
*
|
||||
@@ -1038,6 +1364,7 @@ static int task_blocks_on_rt_mutex(struc
|
||||
@@ -1035,6 +1354,7 @@ static int task_blocks_on_rt_mutex(struc
|
||||
* Called with lock->wait_lock held and interrupts disabled.
|
||||
*/
|
||||
static void mark_wakeup_next_waiter(struct wake_q_head *wake_q,
|
||||
@@ -860,7 +838,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
struct rt_mutex *lock)
|
||||
{
|
||||
struct rt_mutex_waiter *waiter;
|
||||
@@ -1077,7 +1404,10 @@ static void mark_wakeup_next_waiter(stru
|
||||
@@ -1074,7 +1394,10 @@ static void mark_wakeup_next_waiter(stru
|
||||
* Pairs with preempt_enable() in rt_mutex_postunlock();
|
||||
*/
|
||||
preempt_disable();
|
||||
@@ -872,7 +850,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
raw_spin_unlock(¤t->pi_lock);
|
||||
}
|
||||
|
||||
@@ -1161,21 +1491,22 @@ void rt_mutex_adjust_pi(struct task_stru
|
||||
@@ -1158,21 +1481,22 @@ void rt_mutex_adjust_pi(struct task_stru
|
||||
return;
|
||||
}
|
||||
next_lock = waiter->lock;
|
||||
@@ -897,7 +875,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -1292,7 +1623,7 @@ rt_mutex_slowlock(struct rt_mutex *lock,
|
||||
@@ -1283,7 +1607,7 @@ rt_mutex_slowlock(struct rt_mutex *lock,
|
||||
unsigned long flags;
|
||||
int ret = 0;
|
||||
|
||||
@@ -906,7 +884,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
|
||||
/*
|
||||
* Technically we could use raw_spin_[un]lock_irq() here, but this can
|
||||
@@ -1365,7 +1696,8 @@ static inline int rt_mutex_slowtrylock(s
|
||||
@@ -1356,7 +1680,8 @@ static inline int rt_mutex_slowtrylock(s
|
||||
* Return whether the current task needs to call rt_mutex_postunlock().
|
||||
*/
|
||||
static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock,
|
||||
@@ -916,7 +894,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
@@ -1419,7 +1751,7 @@ static bool __sched rt_mutex_slowunlock(
|
||||
@@ -1410,7 +1735,7 @@ static bool __sched rt_mutex_slowunlock(
|
||||
*
|
||||
* Queue the next waiter for wakeup once we release the wait_lock.
|
||||
*/
|
||||
@@ -925,7 +903,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
|
||||
|
||||
return true; /* call rt_mutex_postunlock() */
|
||||
@@ -1471,9 +1803,11 @@ rt_mutex_fasttrylock(struct rt_mutex *lo
|
||||
@@ -1447,9 +1772,11 @@ rt_mutex_fasttrylock(struct rt_mutex *lo
|
||||
/*
|
||||
* Performs the wakeup of the the top-waiter and re-enables preemption.
|
||||
*/
|
||||
@@ -938,7 +916,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
|
||||
/* Pairs with preempt_disable() in rt_mutex_slowunlock() */
|
||||
preempt_enable();
|
||||
@@ -1482,15 +1816,17 @@ void rt_mutex_postunlock(struct wake_q_h
|
||||
@@ -1458,15 +1785,17 @@ void rt_mutex_postunlock(struct wake_q_h
|
||||
static inline void
|
||||
rt_mutex_fastunlock(struct rt_mutex *lock,
|
||||
bool (*slowfn)(struct rt_mutex *lock,
|
||||
@@ -959,10 +937,10 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
}
|
||||
|
||||
int __sched __rt_mutex_lock_state(struct rt_mutex *lock, int state)
|
||||
@@ -1668,16 +2004,13 @@ void __sched __rt_mutex_unlock(struct rt
|
||||
@@ -1597,16 +1926,13 @@ void __sched __rt_mutex_unlock(struct rt
|
||||
void __sched rt_mutex_unlock(struct rt_mutex *lock)
|
||||
{
|
||||
mutex_release(&lock->dep_map, 1, _RET_IP_);
|
||||
mutex_release(&lock->dep_map, _RET_IP_);
|
||||
- rt_mutex_fastunlock(lock, rt_mutex_slowunlock);
|
||||
+ __rt_mutex_unlock(lock);
|
||||
}
|
||||
@@ -980,7 +958,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
{
|
||||
lockdep_assert_held(&lock->wait_lock);
|
||||
|
||||
@@ -1694,23 +2027,35 @@ bool __sched __rt_mutex_futex_unlock(str
|
||||
@@ -1623,23 +1949,35 @@ bool __sched __rt_mutex_futex_unlock(str
|
||||
* avoid inversion prior to the wakeup. preempt_disable()
|
||||
* therein pairs with rt_mutex_postunlock().
|
||||
*/
|
||||
@@ -1019,7 +997,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -1749,7 +2094,7 @@ void __rt_mutex_init(struct rt_mutex *lo
|
||||
@@ -1675,7 +2013,7 @@ void __rt_mutex_init(struct rt_mutex *lo
|
||||
if (name && key)
|
||||
debug_rt_mutex_init(lock, name, key);
|
||||
}
|
||||
@@ -1028,18 +1006,28 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
|
||||
/**
|
||||
* rt_mutex_init_proxy_locked - initialize and lock a rt_mutex on behalf of a
|
||||
@@ -1944,6 +2289,7 @@ int rt_mutex_wait_proxy_lock(struct rt_m
|
||||
struct hrtimer_sleeper *to,
|
||||
struct rt_mutex_waiter *waiter)
|
||||
@@ -1695,6 +2033,14 @@ void rt_mutex_init_proxy_locked(struct r
|
||||
struct task_struct *proxy_owner)
|
||||
{
|
||||
+ struct task_struct *tsk = current;
|
||||
int ret;
|
||||
__rt_mutex_init(lock, NULL, NULL);
|
||||
+#ifdef CONFIG_DEBUG_SPINLOCK
|
||||
+ /*
|
||||
+ * get another key class for the wait_lock. LOCK_PI and UNLOCK_PI is
|
||||
+ * holding the ->wait_lock of the proxy_lock while unlocking a sleeping
|
||||
+ * lock.
|
||||
+ */
|
||||
+ raw_spin_lock_init(&lock->wait_lock);
|
||||
+#endif
|
||||
debug_rt_mutex_proxy_lock(lock, proxy_owner);
|
||||
rt_mutex_set_owner(lock, proxy_owner);
|
||||
}
|
||||
@@ -1717,6 +2063,26 @@ void rt_mutex_proxy_unlock(struct rt_mut
|
||||
rt_mutex_set_owner(lock, NULL);
|
||||
}
|
||||
|
||||
raw_spin_lock_irq(&lock->wait_lock);
|
||||
@@ -1955,6 +2301,24 @@ int rt_mutex_wait_proxy_lock(struct rt_m
|
||||
* have to fix that up.
|
||||
*/
|
||||
fixup_rt_mutex_waiters(lock);
|
||||
+static void fixup_rt_mutex_blocked(struct rt_mutex *lock)
|
||||
+{
|
||||
+ struct task_struct *tsk = current;
|
||||
+ /*
|
||||
+ * RT has a problem here when the wait got interrupted by a timeout
|
||||
+ * or a signal. task->pi_blocked_on is still set. The task must
|
||||
@@ -1052,35 +1040,54 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
+ * boosting chain of the rtmutex. That's correct because the task
|
||||
+ * is not longer blocked on it.
|
||||
+ */
|
||||
+ if (ret) {
|
||||
+ raw_spin_lock(&tsk->pi_lock);
|
||||
+ tsk->pi_blocked_on = NULL;
|
||||
+ raw_spin_unlock(&tsk->pi_lock);
|
||||
+ }
|
||||
+ raw_spin_lock(&tsk->pi_lock);
|
||||
+ tsk->pi_blocked_on = NULL;
|
||||
+ raw_spin_unlock(&tsk->pi_lock);
|
||||
+}
|
||||
+
|
||||
/**
|
||||
* __rt_mutex_start_proxy_lock() - Start lock acquisition for another task
|
||||
* @lock: the rt_mutex to take
|
||||
@@ -1789,6 +2155,9 @@ int __rt_mutex_start_proxy_lock(struct r
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
+ if (ret)
|
||||
+ fixup_rt_mutex_blocked(lock);
|
||||
+
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -1878,6 +2247,9 @@ int rt_mutex_wait_proxy_lock(struct rt_m
|
||||
* have to fix that up.
|
||||
*/
|
||||
fixup_rt_mutex_waiters(lock);
|
||||
+ if (ret)
|
||||
+ fixup_rt_mutex_blocked(lock);
|
||||
+
|
||||
raw_spin_unlock_irq(&lock->wait_lock);
|
||||
|
||||
return ret;
|
||||
--- a/kernel/locking/rtmutex_common.h
|
||||
+++ b/kernel/locking/rtmutex_common.h
|
||||
@@ -30,6 +30,7 @@ struct rt_mutex_waiter {
|
||||
struct rb_node pi_tree_entry;
|
||||
@@ -31,6 +31,7 @@ struct rt_mutex_waiter {
|
||||
struct task_struct *task;
|
||||
struct rt_mutex *lock;
|
||||
int prio;
|
||||
+ bool savestate;
|
||||
#ifdef CONFIG_DEBUG_RT_MUTEXES
|
||||
unsigned long ip;
|
||||
struct pid *deadlock_task_pid;
|
||||
@@ -139,7 +140,7 @@ extern void rt_mutex_init_proxy_locked(s
|
||||
u64 deadline;
|
||||
};
|
||||
|
||||
@@ -133,7 +134,7 @@ extern struct task_struct *rt_mutex_next
|
||||
extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
|
||||
struct task_struct *proxy_owner);
|
||||
extern void rt_mutex_proxy_unlock(struct rt_mutex *lock,
|
||||
struct task_struct *proxy_owner);
|
||||
extern void rt_mutex_proxy_unlock(struct rt_mutex *lock);
|
||||
-extern void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter);
|
||||
+extern void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter, bool savetate);
|
||||
extern int __rt_mutex_start_proxy_lock(struct rt_mutex *lock,
|
||||
struct rt_mutex_waiter *waiter,
|
||||
struct task_struct *task);
|
||||
@@ -157,9 +158,12 @@ extern int __rt_mutex_futex_trylock(stru
|
||||
@@ -151,9 +152,12 @@ extern int __rt_mutex_futex_trylock(stru
|
||||
|
||||
extern void rt_mutex_futex_unlock(struct rt_mutex *lock);
|
||||
extern bool __rt_mutex_futex_unlock(struct rt_mutex *lock,
|
||||
@@ -1095,7 +1102,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
/* RW semaphore special interface */
|
||||
|
||||
extern int __rt_mutex_lock_state(struct rt_mutex *lock, int state);
|
||||
@@ -169,6 +173,10 @@ int __sched rt_mutex_slowlock_locked(str
|
||||
@@ -163,6 +167,10 @@ int __sched rt_mutex_slowlock_locked(str
|
||||
struct hrtimer_sleeper *timeout,
|
||||
enum rtmutex_chainwalk chwalk,
|
||||
struct rt_mutex_waiter *waiter);
|
||||
@@ -1108,7 +1115,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
# include "rtmutex-debug.h"
|
||||
--- a/kernel/sched/core.c
|
||||
+++ b/kernel/sched/core.c
|
||||
@@ -414,9 +414,15 @@ static bool set_nr_if_polling(struct tas
|
||||
@@ -502,9 +502,15 @@ static bool set_nr_if_polling(struct tas
|
||||
#endif
|
||||
#endif
|
||||
|
||||
@@ -1126,7 +1133,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
|
||||
/*
|
||||
* Atomically grab the task, if ->wake_q is !nil already it means
|
||||
@@ -452,7 +458,13 @@ static bool __wake_q_add(struct wake_q_h
|
||||
@@ -540,7 +546,13 @@ static bool __wake_q_add(struct wake_q_h
|
||||
*/
|
||||
void wake_q_add(struct wake_q_head *head, struct task_struct *task)
|
||||
{
|
||||
@@ -1141,7 +1148,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
get_task_struct(task);
|
||||
}
|
||||
|
||||
@@ -475,28 +487,39 @@ void wake_q_add(struct wake_q_head *head
|
||||
@@ -563,28 +575,39 @@ void wake_q_add(struct wake_q_head *head
|
||||
*/
|
||||
void wake_q_add_safe(struct wake_q_head *head, struct task_struct *task)
|
||||
{
|
||||
@@ -1,10 +1,12 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Wed 02 Dec 2015 11:34:07 +0100
|
||||
Subject: rtmutex: trylock is okay on -RT
|
||||
Date: Wed, 2 Dec 2015 11:34:07 +0100
|
||||
Subject: [PATCH 16/22] locking/rtmutex: Allow rt_mutex_trylock() on PREEMPT_RT
|
||||
|
||||
non-RT kernel could deadlock on rt_mutex_trylock() in softirq context. On
|
||||
-RT we don't run softirqs in IRQ context but in thread context so it is
|
||||
not a issue here.
|
||||
Non PREEMPT_RT kernel can deadlock on rt_mutex_trylock() in softirq
|
||||
context.
|
||||
On PREEMPT_RT the softirq context is handled in thread context. This
|
||||
avoids the deadlock in the slow path and PI-boosting will be done on the
|
||||
correct thread.
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
@@ -13,7 +15,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
|
||||
--- a/kernel/locking/rtmutex.c
|
||||
+++ b/kernel/locking/rtmutex.c
|
||||
@@ -1962,7 +1962,11 @@ EXPORT_SYMBOL_GPL(rt_mutex_timed_lock);
|
||||
@@ -1884,7 +1884,11 @@ int __sched __rt_mutex_futex_trylock(str
|
||||
|
||||
int __sched __rt_mutex_trylock(struct rt_mutex *lock)
|
||||
{
|
||||
@@ -1,19 +1,21 @@
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Thu, 12 Oct 2017 17:17:03 +0200
|
||||
Subject: rtmutex: add mutex implementation based on rtmutex
|
||||
Subject: [PATCH 17/22] locking/rtmutex: add mutex implementation based on
|
||||
rtmutex
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/mutex_rt.h | 130 ++++++++++++++++++++++++++
|
||||
kernel/locking/mutex-rt.c | 223 ++++++++++++++++++++++++++++++++++++++++++++++
|
||||
2 files changed, 353 insertions(+)
|
||||
kernel/locking/mutex-rt.c | 224 ++++++++++++++++++++++++++++++++++++++++++++++
|
||||
2 files changed, 354 insertions(+)
|
||||
create mode 100644 include/linux/mutex_rt.h
|
||||
create mode 100644 kernel/locking/mutex-rt.c
|
||||
|
||||
--- /dev/null
|
||||
+++ b/include/linux/mutex_rt.h
|
||||
@@ -0,0 +1,130 @@
|
||||
+// SPDX-License-Identifier: GPL-2.0-only
|
||||
+#ifndef __LINUX_MUTEX_RT_H
|
||||
+#define __LINUX_MUTEX_RT_H
|
||||
+
|
||||
@@ -44,7 +46,6 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
+
|
||||
+extern void __mutex_do_init(struct mutex *lock, const char *name, struct lock_class_key *key);
|
||||
+extern void __lockfunc _mutex_lock(struct mutex *lock);
|
||||
+extern void __lockfunc _mutex_lock_io(struct mutex *lock);
|
||||
+extern void __lockfunc _mutex_lock_io_nested(struct mutex *lock, int subclass);
|
||||
+extern int __lockfunc _mutex_lock_interruptible(struct mutex *lock);
|
||||
+extern int __lockfunc _mutex_lock_killable(struct mutex *lock);
|
||||
@@ -61,7 +62,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
+#define mutex_lock_killable(l) _mutex_lock_killable(l)
|
||||
+#define mutex_trylock(l) _mutex_trylock(l)
|
||||
+#define mutex_unlock(l) _mutex_unlock(l)
|
||||
+#define mutex_lock_io(l) _mutex_lock_io(l);
|
||||
+#define mutex_lock_io(l) _mutex_lock_io_nested(l, 0);
|
||||
+
|
||||
+#define __mutex_owner(l) ((l)->lock.owner)
|
||||
+
|
||||
@@ -92,7 +93,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
+# define mutex_lock_killable_nested(l, s) \
|
||||
+ _mutex_lock_killable(l)
|
||||
+# define mutex_lock_nest_lock(lock, nest_lock) mutex_lock(lock)
|
||||
+# define mutex_lock_io_nested(l, s) _mutex_lock_io(l)
|
||||
+# define mutex_lock_io_nested(l, s) _mutex_lock_io_nested(l, s)
|
||||
+#endif
|
||||
+
|
||||
+# define mutex_init(mutex) \
|
||||
@@ -146,10 +147,9 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
+#endif
|
||||
--- /dev/null
|
||||
+++ b/kernel/locking/mutex-rt.c
|
||||
@@ -0,0 +1,223 @@
|
||||
@@ -0,0 +1,224 @@
|
||||
+// SPDX-License-Identifier: GPL-2.0-only
|
||||
+/*
|
||||
+ * kernel/rt.c
|
||||
+ *
|
||||
+ * Real-Time Preemption Support
|
||||
+ *
|
||||
+ * started by Ingo Molnar:
|
||||
@@ -215,6 +215,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
+#include <linux/fs.h>
|
||||
+#include <linux/futex.h>
|
||||
+#include <linux/hrtimer.h>
|
||||
+#include <linux/blkdev.h>
|
||||
+
|
||||
+#include "rtmutex_common.h"
|
||||
+
|
||||
@@ -235,55 +236,24 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
+}
|
||||
+EXPORT_SYMBOL(__mutex_do_init);
|
||||
+
|
||||
+static int _mutex_lock_blk_flush(struct mutex *lock, int state)
|
||||
+{
|
||||
+ /*
|
||||
+ * Flush blk before ->pi_blocked_on is set. At schedule() time it is too
|
||||
+ * late if one of the callbacks needs to acquire a sleeping lock.
|
||||
+ */
|
||||
+ if (blk_needs_flush_plug(current))
|
||||
+ blk_schedule_flush_plug(current);
|
||||
+ return __rt_mutex_lock_state(&lock->lock, state);
|
||||
+}
|
||||
+
|
||||
+void __lockfunc _mutex_lock(struct mutex *lock)
|
||||
+{
|
||||
+ mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
|
||||
+ __rt_mutex_lock_state(&lock->lock, TASK_UNINTERRUPTIBLE);
|
||||
+ _mutex_lock_blk_flush(lock, TASK_UNINTERRUPTIBLE);
|
||||
+}
|
||||
+EXPORT_SYMBOL(_mutex_lock);
|
||||
+
|
||||
+void __lockfunc _mutex_lock_io(struct mutex *lock)
|
||||
+{
|
||||
+ int token;
|
||||
+
|
||||
+ token = io_schedule_prepare();
|
||||
+ _mutex_lock(lock);
|
||||
+ io_schedule_finish(token);
|
||||
+}
|
||||
+EXPORT_SYMBOL_GPL(_mutex_lock_io);
|
||||
+
|
||||
+int __lockfunc _mutex_lock_interruptible(struct mutex *lock)
|
||||
+{
|
||||
+ int ret;
|
||||
+
|
||||
+ mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
|
||||
+ ret = __rt_mutex_lock_state(&lock->lock, TASK_INTERRUPTIBLE);
|
||||
+ if (ret)
|
||||
+ mutex_release(&lock->dep_map, 1, _RET_IP_);
|
||||
+ return ret;
|
||||
+}
|
||||
+EXPORT_SYMBOL(_mutex_lock_interruptible);
|
||||
+
|
||||
+int __lockfunc _mutex_lock_killable(struct mutex *lock)
|
||||
+{
|
||||
+ int ret;
|
||||
+
|
||||
+ mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
|
||||
+ ret = __rt_mutex_lock_state(&lock->lock, TASK_KILLABLE);
|
||||
+ if (ret)
|
||||
+ mutex_release(&lock->dep_map, 1, _RET_IP_);
|
||||
+ return ret;
|
||||
+}
|
||||
+EXPORT_SYMBOL(_mutex_lock_killable);
|
||||
+
|
||||
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
||||
+void __lockfunc _mutex_lock_nested(struct mutex *lock, int subclass)
|
||||
+{
|
||||
+ mutex_acquire_nest(&lock->dep_map, subclass, 0, NULL, _RET_IP_);
|
||||
+ __rt_mutex_lock_state(&lock->lock, TASK_UNINTERRUPTIBLE);
|
||||
+}
|
||||
+EXPORT_SYMBOL(_mutex_lock_nested);
|
||||
+
|
||||
+void __lockfunc _mutex_lock_io_nested(struct mutex *lock, int subclass)
|
||||
+{
|
||||
+ int token;
|
||||
@@ -297,10 +267,42 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
+}
|
||||
+EXPORT_SYMBOL_GPL(_mutex_lock_io_nested);
|
||||
+
|
||||
+int __lockfunc _mutex_lock_interruptible(struct mutex *lock)
|
||||
+{
|
||||
+ int ret;
|
||||
+
|
||||
+ mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
|
||||
+ ret = _mutex_lock_blk_flush(lock, TASK_INTERRUPTIBLE);
|
||||
+ if (ret)
|
||||
+ mutex_release(&lock->dep_map, _RET_IP_);
|
||||
+ return ret;
|
||||
+}
|
||||
+EXPORT_SYMBOL(_mutex_lock_interruptible);
|
||||
+
|
||||
+int __lockfunc _mutex_lock_killable(struct mutex *lock)
|
||||
+{
|
||||
+ int ret;
|
||||
+
|
||||
+ mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
|
||||
+ ret = _mutex_lock_blk_flush(lock, TASK_KILLABLE);
|
||||
+ if (ret)
|
||||
+ mutex_release(&lock->dep_map, _RET_IP_);
|
||||
+ return ret;
|
||||
+}
|
||||
+EXPORT_SYMBOL(_mutex_lock_killable);
|
||||
+
|
||||
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
||||
+void __lockfunc _mutex_lock_nested(struct mutex *lock, int subclass)
|
||||
+{
|
||||
+ mutex_acquire_nest(&lock->dep_map, subclass, 0, NULL, _RET_IP_);
|
||||
+ _mutex_lock_blk_flush(lock, TASK_UNINTERRUPTIBLE);
|
||||
+}
|
||||
+EXPORT_SYMBOL(_mutex_lock_nested);
|
||||
+
|
||||
+void __lockfunc _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest)
|
||||
+{
|
||||
+ mutex_acquire_nest(&lock->dep_map, 0, 0, nest, _RET_IP_);
|
||||
+ __rt_mutex_lock_state(&lock->lock, TASK_UNINTERRUPTIBLE);
|
||||
+ _mutex_lock_blk_flush(lock, TASK_UNINTERRUPTIBLE);
|
||||
+}
|
||||
+EXPORT_SYMBOL(_mutex_lock_nest_lock);
|
||||
+
|
||||
@@ -309,9 +311,9 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
+ int ret;
|
||||
+
|
||||
+ mutex_acquire_nest(&lock->dep_map, subclass, 0, NULL, _RET_IP_);
|
||||
+ ret = __rt_mutex_lock_state(&lock->lock, TASK_INTERRUPTIBLE);
|
||||
+ ret = _mutex_lock_blk_flush(lock, TASK_INTERRUPTIBLE);
|
||||
+ if (ret)
|
||||
+ mutex_release(&lock->dep_map, 1, _RET_IP_);
|
||||
+ mutex_release(&lock->dep_map, _RET_IP_);
|
||||
+ return ret;
|
||||
+}
|
||||
+EXPORT_SYMBOL(_mutex_lock_interruptible_nested);
|
||||
@@ -321,9 +323,9 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
+ int ret;
|
||||
+
|
||||
+ mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
|
||||
+ ret = __rt_mutex_lock_state(&lock->lock, TASK_KILLABLE);
|
||||
+ ret = _mutex_lock_blk_flush(lock, TASK_KILLABLE);
|
||||
+ if (ret)
|
||||
+ mutex_release(&lock->dep_map, 1, _RET_IP_);
|
||||
+ mutex_release(&lock->dep_map, _RET_IP_);
|
||||
+ return ret;
|
||||
+}
|
||||
+EXPORT_SYMBOL(_mutex_lock_killable_nested);
|
||||
@@ -342,7 +344,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
+
|
||||
+void __lockfunc _mutex_unlock(struct mutex *lock)
|
||||
+{
|
||||
+ mutex_release(&lock->dep_map, 1, _RET_IP_);
|
||||
+ mutex_release(&lock->dep_map, _RET_IP_);
|
||||
+ __rt_mutex_unlock(&lock->lock);
|
||||
+}
|
||||
+EXPORT_SYMBOL(_mutex_unlock);
|
||||
@@ -1,6 +1,7 @@
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Thu, 12 Oct 2017 17:28:34 +0200
|
||||
Subject: rtmutex: add rwsem implementation based on rtmutex
|
||||
Subject: [PATCH 18/22] locking/rtmutex: add rwsem implementation based on
|
||||
rtmutex
|
||||
|
||||
The RT specific R/W semaphore implementation restricts the number of readers
|
||||
to one because a writer cannot block on multiple readers and inherit its
|
||||
@@ -14,7 +15,7 @@ The single reader restricting is painful in various ways:
|
||||
- Progress blocker for drivers which are carefully crafted to avoid the
|
||||
potential reader/writer deadlock in mainline.
|
||||
|
||||
The analysis of the writer code pathes shows, that properly written RT tasks
|
||||
The analysis of the writer code paths shows, that properly written RT tasks
|
||||
should not take them. Syscalls like mmap(), file access which take mmap sem
|
||||
write locked have unbound latencies which are completely unrelated to mmap
|
||||
sem. Other R/W sem users like graphics drivers are not suitable for RT tasks
|
||||
@@ -41,15 +42,16 @@ the approach.
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/rwsem-rt.h | 68 ++++++++++
|
||||
kernel/locking/rwsem-rt.c | 293 ++++++++++++++++++++++++++++++++++++++++++++++
|
||||
2 files changed, 361 insertions(+)
|
||||
include/linux/rwsem-rt.h | 70 ++++++++++
|
||||
kernel/locking/rwsem-rt.c | 318 ++++++++++++++++++++++++++++++++++++++++++++++
|
||||
2 files changed, 388 insertions(+)
|
||||
create mode 100644 include/linux/rwsem-rt.h
|
||||
create mode 100644 kernel/locking/rwsem-rt.c
|
||||
|
||||
--- /dev/null
|
||||
+++ b/include/linux/rwsem-rt.h
|
||||
@@ -0,0 +1,68 @@
|
||||
@@ -0,0 +1,70 @@
|
||||
+// SPDX-License-Identifier: GPL-2.0-only
|
||||
+#ifndef _LINUX_RWSEM_RT_H
|
||||
+#define _LINUX_RWSEM_RT_H
|
||||
+
|
||||
@@ -108,6 +110,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
+}
|
||||
+
|
||||
+extern void __down_read(struct rw_semaphore *sem);
|
||||
+extern int __down_read_interruptible(struct rw_semaphore *sem);
|
||||
+extern int __down_read_killable(struct rw_semaphore *sem);
|
||||
+extern int __down_read_trylock(struct rw_semaphore *sem);
|
||||
+extern void __down_write(struct rw_semaphore *sem);
|
||||
@@ -120,13 +123,13 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
+#endif
|
||||
--- /dev/null
|
||||
+++ b/kernel/locking/rwsem-rt.c
|
||||
@@ -0,0 +1,293 @@
|
||||
+/*
|
||||
+ */
|
||||
@@ -0,0 +1,318 @@
|
||||
+// SPDX-License-Identifier: GPL-2.0-only
|
||||
+#include <linux/rwsem.h>
|
||||
+#include <linux/sched/debug.h>
|
||||
+#include <linux/sched/signal.h>
|
||||
+#include <linux/export.h>
|
||||
+#include <linux/blkdev.h>
|
||||
+
|
||||
+#include "rtmutex_common.h"
|
||||
+
|
||||
@@ -211,6 +214,13 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
+ if (__down_read_trylock(sem))
|
||||
+ return 0;
|
||||
+
|
||||
+ /*
|
||||
+ * Flush blk before ->pi_blocked_on is set. At schedule() time it is too
|
||||
+ * late if one of the callbacks needs to acquire a sleeping lock.
|
||||
+ */
|
||||
+ if (blk_needs_flush_plug(current))
|
||||
+ blk_schedule_flush_plug(current);
|
||||
+
|
||||
+ might_sleep();
|
||||
+ raw_spin_lock_irq(&m->wait_lock);
|
||||
+ /*
|
||||
@@ -280,6 +290,17 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
+ WARN_ON_ONCE(ret);
|
||||
+}
|
||||
+
|
||||
+int __down_read_interruptible(struct rw_semaphore *sem)
|
||||
+{
|
||||
+ int ret;
|
||||
+
|
||||
+ ret = __down_read_common(sem, TASK_INTERRUPTIBLE);
|
||||
+ if (likely(!ret))
|
||||
+ return ret;
|
||||
+ WARN_ONCE(ret != -EINTR, "Unexpected state: %d\n", ret);
|
||||
+ return -EINTR;
|
||||
+}
|
||||
+
|
||||
+int __down_read_killable(struct rw_semaphore *sem)
|
||||
+{
|
||||
+ int ret;
|
||||
@@ -333,6 +354,13 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
+ struct rt_mutex *m = &sem->rtmutex;
|
||||
+ unsigned long flags;
|
||||
+
|
||||
+ /*
|
||||
+ * Flush blk before ->pi_blocked_on is set. At schedule() time it is too
|
||||
+ * late if one of the callbacks needs to acquire a sleeping lock.
|
||||
+ */
|
||||
+ if (blk_needs_flush_plug(current))
|
||||
+ blk_schedule_flush_plug(current);
|
||||
+
|
||||
+ /* Take the rtmutex as a first step */
|
||||
+ if (__rt_mutex_lock_state(m, state))
|
||||
+ return -EINTR;
|
||||
@@ -1,23 +1,26 @@
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Thu, 12 Oct 2017 17:18:06 +0200
|
||||
Subject: rtmutex: add rwlock implementation based on rtmutex
|
||||
Subject: [PATCH 19/22] locking/rtmutex: add rwlock implementation based on
|
||||
rtmutex
|
||||
|
||||
The implementation is bias-based, similar to the rwsem implementation.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/rwlock_rt.h | 119 ++++++++++++
|
||||
include/linux/rwlock_types_rt.h | 55 +++++
|
||||
kernel/locking/rwlock-rt.c | 368 ++++++++++++++++++++++++++++++++++++++++
|
||||
3 files changed, 542 insertions(+)
|
||||
include/linux/rwlock_rt.h | 109 +++++++++++++
|
||||
include/linux/rwlock_types_rt.h | 56 ++++++
|
||||
kernel/Kconfig.locks | 2
|
||||
kernel/locking/rwlock-rt.c | 328 ++++++++++++++++++++++++++++++++++++++++
|
||||
4 files changed, 494 insertions(+), 1 deletion(-)
|
||||
create mode 100644 include/linux/rwlock_rt.h
|
||||
create mode 100644 include/linux/rwlock_types_rt.h
|
||||
create mode 100644 kernel/locking/rwlock-rt.c
|
||||
|
||||
--- /dev/null
|
||||
+++ b/include/linux/rwlock_rt.h
|
||||
@@ -0,0 +1,119 @@
|
||||
@@ -0,0 +1,109 @@
|
||||
+// SPDX-License-Identifier: GPL-2.0-only
|
||||
+#ifndef __LINUX_RWLOCK_RT_H
|
||||
+#define __LINUX_RWLOCK_RT_H
|
||||
+
|
||||
@@ -43,7 +46,6 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
+
|
||||
+static inline int __write_trylock_rt_irqsave(rwlock_t *lock, unsigned long *flags)
|
||||
+{
|
||||
+ /* XXX ARCH_IRQ_ENABLED */
|
||||
+ *flags = 0;
|
||||
+ return rt_write_trylock(lock);
|
||||
+}
|
||||
@@ -126,20 +128,11 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
+ __rt_rwlock_init(rwl, #rwl, &__key); \
|
||||
+} while (0)
|
||||
+
|
||||
+/*
|
||||
+ * Internal functions made global for CPU pinning
|
||||
+ */
|
||||
+void __read_rt_lock(struct rt_rw_lock *lock);
|
||||
+int __read_rt_trylock(struct rt_rw_lock *lock);
|
||||
+void __write_rt_lock(struct rt_rw_lock *lock);
|
||||
+int __write_rt_trylock(struct rt_rw_lock *lock);
|
||||
+void __read_rt_unlock(struct rt_rw_lock *lock);
|
||||
+void __write_rt_unlock(struct rt_rw_lock *lock);
|
||||
+
|
||||
+#endif
|
||||
--- /dev/null
|
||||
+++ b/include/linux/rwlock_types_rt.h
|
||||
@@ -0,0 +1,55 @@
|
||||
@@ -0,0 +1,56 @@
|
||||
+// SPDX-License-Identifier: GPL-2.0-only
|
||||
+#ifndef __LINUX_RWLOCK_TYPES_RT_H
|
||||
+#define __LINUX_RWLOCK_TYPES_RT_H
|
||||
+
|
||||
@@ -195,11 +188,21 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
+ } while (0)
|
||||
+
|
||||
+#endif
|
||||
--- a/kernel/Kconfig.locks
|
||||
+++ b/kernel/Kconfig.locks
|
||||
@@ -251,7 +251,7 @@ config ARCH_USE_QUEUED_RWLOCKS
|
||||
|
||||
config QUEUED_RWLOCKS
|
||||
def_bool y if ARCH_USE_QUEUED_RWLOCKS
|
||||
- depends on SMP
|
||||
+ depends on SMP && !PREEMPT_RT
|
||||
|
||||
config ARCH_HAS_MMIOWB
|
||||
bool
|
||||
--- /dev/null
|
||||
+++ b/kernel/locking/rwlock-rt.c
|
||||
@@ -0,0 +1,368 @@
|
||||
+/*
|
||||
+ */
|
||||
@@ -0,0 +1,328 @@
|
||||
+// SPDX-License-Identifier: GPL-2.0-only
|
||||
+#include <linux/sched/debug.h>
|
||||
+#include <linux/export.h>
|
||||
+
|
||||
@@ -262,7 +265,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
+ lock->rtmutex.save_state = 1;
|
||||
+}
|
||||
+
|
||||
+int __read_rt_trylock(struct rt_rw_lock *lock)
|
||||
+static int __read_rt_trylock(struct rt_rw_lock *lock)
|
||||
+{
|
||||
+ int r, old;
|
||||
+
|
||||
@@ -279,7 +282,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+void __sched __read_rt_lock(struct rt_rw_lock *lock)
|
||||
+static void __read_rt_lock(struct rt_rw_lock *lock)
|
||||
+{
|
||||
+ struct rt_mutex *m = &lock->rtmutex;
|
||||
+ struct rt_mutex_waiter waiter;
|
||||
@@ -342,7 +345,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
+ debug_rt_mutex_free_waiter(&waiter);
|
||||
+}
|
||||
+
|
||||
+void __read_rt_unlock(struct rt_rw_lock *lock)
|
||||
+static void __read_rt_unlock(struct rt_rw_lock *lock)
|
||||
+{
|
||||
+ struct rt_mutex *m = &lock->rtmutex;
|
||||
+ struct task_struct *tsk;
|
||||
@@ -378,7 +381,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
+ rt_spin_lock_slowunlock(m);
|
||||
+}
|
||||
+
|
||||
+void __sched __write_rt_lock(struct rt_rw_lock *lock)
|
||||
+static void __write_rt_lock(struct rt_rw_lock *lock)
|
||||
+{
|
||||
+ struct rt_mutex *m = &lock->rtmutex;
|
||||
+ struct task_struct *self = current;
|
||||
@@ -422,7 +425,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+int __write_rt_trylock(struct rt_rw_lock *lock)
|
||||
+static int __write_rt_trylock(struct rt_rw_lock *lock)
|
||||
+{
|
||||
+ struct rt_mutex *m = &lock->rtmutex;
|
||||
+ unsigned long flags;
|
||||
@@ -442,7 +445,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+void __write_rt_unlock(struct rt_rw_lock *lock)
|
||||
+static void __write_rt_unlock(struct rt_rw_lock *lock)
|
||||
+{
|
||||
+ struct rt_mutex *m = &lock->rtmutex;
|
||||
+ unsigned long flags;
|
||||
@@ -451,43 +454,6 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
+ __write_unlock_common(lock, WRITER_BIAS, flags);
|
||||
+}
|
||||
+
|
||||
+/* Map the reader biased implementation */
|
||||
+static inline int do_read_rt_trylock(rwlock_t *rwlock)
|
||||
+{
|
||||
+ return __read_rt_trylock(rwlock);
|
||||
+}
|
||||
+
|
||||
+static inline int do_write_rt_trylock(rwlock_t *rwlock)
|
||||
+{
|
||||
+ return __write_rt_trylock(rwlock);
|
||||
+}
|
||||
+
|
||||
+static inline void do_read_rt_lock(rwlock_t *rwlock)
|
||||
+{
|
||||
+ __read_rt_lock(rwlock);
|
||||
+}
|
||||
+
|
||||
+static inline void do_write_rt_lock(rwlock_t *rwlock)
|
||||
+{
|
||||
+ __write_rt_lock(rwlock);
|
||||
+}
|
||||
+
|
||||
+static inline void do_read_rt_unlock(rwlock_t *rwlock)
|
||||
+{
|
||||
+ __read_rt_unlock(rwlock);
|
||||
+}
|
||||
+
|
||||
+static inline void do_write_rt_unlock(rwlock_t *rwlock)
|
||||
+{
|
||||
+ __write_rt_unlock(rwlock);
|
||||
+}
|
||||
+
|
||||
+static inline void do_rwlock_rt_init(rwlock_t *rwlock, const char *name,
|
||||
+ struct lock_class_key *key)
|
||||
+{
|
||||
+ __rwlock_biased_rt_init(rwlock, name, key);
|
||||
+}
|
||||
+
|
||||
+int __lockfunc rt_read_can_lock(rwlock_t *rwlock)
|
||||
+{
|
||||
+ return atomic_read(&rwlock->readers) < 0;
|
||||
@@ -505,12 +471,11 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
+{
|
||||
+ int ret;
|
||||
+
|
||||
+ migrate_disable();
|
||||
+ ret = do_read_rt_trylock(rwlock);
|
||||
+ if (ret)
|
||||
+ ret = __read_rt_trylock(rwlock);
|
||||
+ if (ret) {
|
||||
+ rwlock_acquire_read(&rwlock->dep_map, 0, 1, _RET_IP_);
|
||||
+ else
|
||||
+ migrate_enable();
|
||||
+ migrate_disable();
|
||||
+ }
|
||||
+ return ret;
|
||||
+}
|
||||
+EXPORT_SYMBOL(rt_read_trylock);
|
||||
@@ -519,50 +484,49 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
+{
|
||||
+ int ret;
|
||||
+
|
||||
+ migrate_disable();
|
||||
+ ret = do_write_rt_trylock(rwlock);
|
||||
+ if (ret)
|
||||
+ ret = __write_rt_trylock(rwlock);
|
||||
+ if (ret) {
|
||||
+ rwlock_acquire(&rwlock->dep_map, 0, 1, _RET_IP_);
|
||||
+ else
|
||||
+ migrate_enable();
|
||||
+ migrate_disable();
|
||||
+ }
|
||||
+ return ret;
|
||||
+}
|
||||
+EXPORT_SYMBOL(rt_write_trylock);
|
||||
+
|
||||
+void __lockfunc rt_read_lock(rwlock_t *rwlock)
|
||||
+{
|
||||
+ migrate_disable();
|
||||
+ rwlock_acquire_read(&rwlock->dep_map, 0, 0, _RET_IP_);
|
||||
+ do_read_rt_lock(rwlock);
|
||||
+ __read_rt_lock(rwlock);
|
||||
+ migrate_disable();
|
||||
+}
|
||||
+EXPORT_SYMBOL(rt_read_lock);
|
||||
+
|
||||
+void __lockfunc rt_write_lock(rwlock_t *rwlock)
|
||||
+{
|
||||
+ migrate_disable();
|
||||
+ rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_);
|
||||
+ do_write_rt_lock(rwlock);
|
||||
+ __write_rt_lock(rwlock);
|
||||
+ migrate_disable();
|
||||
+}
|
||||
+EXPORT_SYMBOL(rt_write_lock);
|
||||
+
|
||||
+void __lockfunc rt_read_unlock(rwlock_t *rwlock)
|
||||
+{
|
||||
+ rwlock_release(&rwlock->dep_map, 1, _RET_IP_);
|
||||
+ do_read_rt_unlock(rwlock);
|
||||
+ rwlock_release(&rwlock->dep_map, _RET_IP_);
|
||||
+ migrate_enable();
|
||||
+ __read_rt_unlock(rwlock);
|
||||
+}
|
||||
+EXPORT_SYMBOL(rt_read_unlock);
|
||||
+
|
||||
+void __lockfunc rt_write_unlock(rwlock_t *rwlock)
|
||||
+{
|
||||
+ rwlock_release(&rwlock->dep_map, 1, _RET_IP_);
|
||||
+ do_write_rt_unlock(rwlock);
|
||||
+ rwlock_release(&rwlock->dep_map, _RET_IP_);
|
||||
+ migrate_enable();
|
||||
+ __write_rt_unlock(rwlock);
|
||||
+}
|
||||
+EXPORT_SYMBOL(rt_write_unlock);
|
||||
+
|
||||
+void __rt_rwlock_init(rwlock_t *rwlock, char *name, struct lock_class_key *key)
|
||||
+{
|
||||
+ do_rwlock_rt_init(rwlock, name, key);
|
||||
+ __rwlock_biased_rt_init(rwlock, name, key);
|
||||
+}
|
||||
+EXPORT_SYMBOL(__rt_rwlock_init);
|
||||
@@ -1,30 +1,35 @@
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Thu, 12 Oct 2017 17:31:14 +0200
|
||||
Subject: rtmutex: wire up RT's locking
|
||||
Subject: [PATCH 20/22] locking/rtmutex: wire up RT's locking
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/mutex.h | 20 +++++++++++++-------
|
||||
include/linux/rwsem.h | 12 ++++++++++++
|
||||
include/linux/spinlock.h | 12 +++++++++++-
|
||||
include/linux/spinlock_api_smp.h | 4 +++-
|
||||
include/linux/spinlock_types.h | 11 ++++++++---
|
||||
kernel/locking/Makefile | 10 +++++++---
|
||||
kernel/locking/rwsem.c | 7 +++++++
|
||||
kernel/locking/spinlock.c | 7 +++++++
|
||||
kernel/locking/spinlock_debug.c | 5 +++++
|
||||
9 files changed, 73 insertions(+), 15 deletions(-)
|
||||
include/linux/mutex.h | 26 ++++++++++++++++----------
|
||||
include/linux/rwsem.h | 12 ++++++++++++
|
||||
include/linux/spinlock.h | 12 +++++++++++-
|
||||
include/linux/spinlock_api_smp.h | 4 +++-
|
||||
include/linux/spinlock_types.h | 11 ++++++++---
|
||||
include/linux/spinlock_types_up.h | 2 +-
|
||||
kernel/Kconfig.preempt | 1 +
|
||||
kernel/locking/Makefile | 10 +++++++---
|
||||
kernel/locking/rwsem.c | 6 ++++++
|
||||
kernel/locking/spinlock.c | 7 +++++++
|
||||
kernel/locking/spinlock_debug.c | 5 +++++
|
||||
11 files changed, 77 insertions(+), 19 deletions(-)
|
||||
|
||||
--- a/include/linux/mutex.h
|
||||
+++ b/include/linux/mutex.h
|
||||
@@ -22,6 +22,17 @@
|
||||
@@ -22,6 +22,20 @@
|
||||
|
||||
struct ww_acquire_ctx;
|
||||
|
||||
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
||||
+# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \
|
||||
+ , .dep_map = { .name = #lockname }
|
||||
+# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \
|
||||
+ , .dep_map = { \
|
||||
+ .name = #lockname, \
|
||||
+ .wait_type_inner = LD_WAIT_SLEEP, \
|
||||
+ }
|
||||
+#else
|
||||
+# define __DEP_MAP_MUTEX_INITIALIZER(lockname)
|
||||
+#endif
|
||||
@@ -36,13 +41,16 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
/*
|
||||
* Simple, straightforward mutexes with strict semantics:
|
||||
*
|
||||
@@ -108,13 +119,6 @@ do { \
|
||||
@@ -119,16 +133,6 @@ do { \
|
||||
__mutex_init((mutex), #mutex, &__key); \
|
||||
} while (0)
|
||||
|
||||
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
||||
-# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \
|
||||
- , .dep_map = { .name = #lockname }
|
||||
-# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \
|
||||
- , .dep_map = { \
|
||||
- .name = #lockname, \
|
||||
- .wait_type_inner = LD_WAIT_SLEEP, \
|
||||
- }
|
||||
-#else
|
||||
-# define __DEP_MAP_MUTEX_INITIALIZER(lockname)
|
||||
-#endif
|
||||
@@ -50,7 +58,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
#define __MUTEX_INITIALIZER(lockname) \
|
||||
{ .owner = ATOMIC_LONG_INIT(0) \
|
||||
, .wait_lock = __SPIN_LOCK_UNLOCKED(lockname.wait_lock) \
|
||||
@@ -210,4 +214,6 @@ enum mutex_trylock_recursive_enum {
|
||||
@@ -224,4 +228,6 @@ enum mutex_trylock_recursive_enum {
|
||||
extern /* __deprecated */ __must_check enum mutex_trylock_recursive_enum
|
||||
mutex_trylock_recursive(struct mutex *lock);
|
||||
|
||||
@@ -71,7 +79,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
|
||||
#include <linux/osq_lock.h>
|
||||
#endif
|
||||
@@ -115,6 +120,13 @@ static inline int rwsem_is_contended(str
|
||||
@@ -119,6 +124,13 @@ static inline int rwsem_is_contended(str
|
||||
return !list_empty(&sem->wait_list);
|
||||
}
|
||||
|
||||
@@ -87,7 +95,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
*/
|
||||
--- a/include/linux/spinlock.h
|
||||
+++ b/include/linux/spinlock.h
|
||||
@@ -307,7 +307,11 @@ static inline void do_raw_spin_unlock(ra
|
||||
@@ -309,7 +309,11 @@ static inline void do_raw_spin_unlock(ra
|
||||
})
|
||||
|
||||
/* Include rwlock functions */
|
||||
@@ -100,7 +108,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
|
||||
/*
|
||||
* Pull the _spin_*()/_read_*()/_write_*() functions/declarations:
|
||||
@@ -318,6 +322,10 @@ static inline void do_raw_spin_unlock(ra
|
||||
@@ -320,6 +324,10 @@ static inline void do_raw_spin_unlock(ra
|
||||
# include <linux/spinlock_api_up.h>
|
||||
#endif
|
||||
|
||||
@@ -111,7 +119,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
/*
|
||||
* Map the spin_lock functions to the raw variants for PREEMPT_RT=n
|
||||
*/
|
||||
@@ -438,6 +446,8 @@ static __always_inline int spin_is_conte
|
||||
@@ -454,6 +462,8 @@ static __always_inline int spin_is_conte
|
||||
|
||||
#define assert_spin_locked(lock) assert_raw_spin_locked(&(lock)->rlock)
|
||||
|
||||
@@ -151,6 +159,27 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
+#endif
|
||||
|
||||
#endif /* __LINUX_SPINLOCK_TYPES_H */
|
||||
--- a/include/linux/spinlock_types_up.h
|
||||
+++ b/include/linux/spinlock_types_up.h
|
||||
@@ -1,7 +1,7 @@
|
||||
#ifndef __LINUX_SPINLOCK_TYPES_UP_H
|
||||
#define __LINUX_SPINLOCK_TYPES_UP_H
|
||||
|
||||
-#ifndef __LINUX_SPINLOCK_TYPES_H
|
||||
+#if !defined(__LINUX_SPINLOCK_TYPES_H) && !defined(__LINUX_RT_MUTEX_H)
|
||||
# error "please don't include this file directly"
|
||||
#endif
|
||||
|
||||
--- a/kernel/Kconfig.preempt
|
||||
+++ b/kernel/Kconfig.preempt
|
||||
@@ -59,6 +59,7 @@ config PREEMPT_RT
|
||||
bool "Fully Preemptible Kernel (Real-Time)"
|
||||
depends on EXPERT && ARCH_SUPPORTS_RT
|
||||
select PREEMPTION
|
||||
+ select RT_MUTEXES
|
||||
help
|
||||
This option turns the kernel into a real-time kernel by replacing
|
||||
various locking primitives (spinlocks, rwlocks, etc.) with
|
||||
--- a/kernel/locking/Makefile
|
||||
+++ b/kernel/locking/Makefile
|
||||
@@ -3,7 +3,7 @@
|
||||
@@ -160,9 +189,9 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
-obj-y += mutex.o semaphore.o rwsem.o percpu-rwsem.o
|
||||
+obj-y += semaphore.o rwsem.o percpu-rwsem.o
|
||||
|
||||
ifdef CONFIG_FUNCTION_TRACER
|
||||
CFLAGS_REMOVE_lockdep.o = $(CC_FLAGS_FTRACE)
|
||||
@@ -12,19 +12,23 @@ CFLAGS_REMOVE_mutex-debug.o = $(CC_FLAGS
|
||||
# Avoid recursion lockdep -> KCSAN -> ... -> lockdep.
|
||||
KCSAN_SANITIZE_lockdep.o := n
|
||||
@@ -15,19 +15,23 @@ CFLAGS_REMOVE_mutex-debug.o = $(CC_FLAGS
|
||||
CFLAGS_REMOVE_rtmutex-debug.o = $(CC_FLAGS_FTRACE)
|
||||
endif
|
||||
|
||||
@@ -198,15 +227,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
#include "lock_events.h"
|
||||
|
||||
/*
|
||||
@@ -1332,6 +1333,7 @@ static struct rw_semaphore *rwsem_downgr
|
||||
return sem;
|
||||
}
|
||||
|
||||
+
|
||||
/*
|
||||
* lock for reading
|
||||
*/
|
||||
@@ -1482,6 +1484,7 @@ static inline void __downgrade_write(str
|
||||
@@ -1343,6 +1344,7 @@ static inline void __downgrade_write(str
|
||||
if (tmp & RWSEM_FLAG_WAITERS)
|
||||
rwsem_downgrade_wake(sem);
|
||||
}
|
||||
@@ -214,36 +235,26 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
|
||||
/*
|
||||
* lock for reading
|
||||
@@ -1613,6 +1616,7 @@ void _down_write_nest_lock(struct rw_sem
|
||||
}
|
||||
EXPORT_SYMBOL(_down_write_nest_lock);
|
||||
|
||||
+#ifndef CONFIG_PREEMPT_RT
|
||||
void down_read_non_owner(struct rw_semaphore *sem)
|
||||
@@ -1506,7 +1508,9 @@ void down_read_non_owner(struct rw_semap
|
||||
{
|
||||
might_sleep();
|
||||
@@ -1620,6 +1624,7 @@ void down_read_non_owner(struct rw_semap
|
||||
__down_read(sem);
|
||||
+#ifndef CONFIG_PREEMPT_RT
|
||||
__rwsem_set_reader_owned(sem, NULL);
|
||||
+#endif
|
||||
}
|
||||
EXPORT_SYMBOL(down_read_non_owner);
|
||||
+#endif
|
||||
|
||||
void down_write_nested(struct rw_semaphore *sem, int subclass)
|
||||
{
|
||||
@@ -1644,11 +1649,13 @@ int __sched down_write_killable_nested(s
|
||||
}
|
||||
EXPORT_SYMBOL(down_write_killable_nested);
|
||||
@@ -1535,7 +1539,9 @@ EXPORT_SYMBOL(down_write_killable_nested
|
||||
|
||||
+#ifndef CONFIG_PREEMPT_RT
|
||||
void up_read_non_owner(struct rw_semaphore *sem)
|
||||
{
|
||||
+#ifndef CONFIG_PREEMPT_RT
|
||||
DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem);
|
||||
+#endif
|
||||
__up_read(sem);
|
||||
}
|
||||
EXPORT_SYMBOL(up_read_non_owner);
|
||||
+#endif
|
||||
|
||||
#endif
|
||||
--- a/kernel/locking/spinlock.c
|
||||
+++ b/kernel/locking/spinlock.c
|
||||
@@ -124,8 +124,11 @@ void __lockfunc __raw_##op##_lock_bh(loc
|
||||
@@ -1,14 +1,50 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Thu, 12 Oct 2017 17:34:38 +0200
|
||||
Subject: rtmutex: add ww_mutex addon for mutex-rt
|
||||
Subject: [PATCH 21/22] locking/rtmutex: add ww_mutex addon for mutex-rt
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
kernel/locking/rtmutex.c | 271 ++++++++++++++++++++++++++++++++++++++--
|
||||
include/linux/mutex.h | 8 -
|
||||
include/linux/ww_mutex.h | 8 +
|
||||
kernel/locking/rtmutex.c | 262 ++++++++++++++++++++++++++++++++++++++--
|
||||
kernel/locking/rtmutex_common.h | 2
|
||||
kernel/locking/rwsem-rt.c | 2
|
||||
3 files changed, 261 insertions(+), 14 deletions(-)
|
||||
5 files changed, 262 insertions(+), 20 deletions(-)
|
||||
|
||||
--- a/include/linux/mutex.h
|
||||
+++ b/include/linux/mutex.h
|
||||
@@ -82,14 +82,6 @@ struct mutex {
|
||||
struct ww_class;
|
||||
struct ww_acquire_ctx;
|
||||
|
||||
-struct ww_mutex {
|
||||
- struct mutex base;
|
||||
- struct ww_acquire_ctx *ctx;
|
||||
-#ifdef CONFIG_DEBUG_MUTEXES
|
||||
- struct ww_class *ww_class;
|
||||
-#endif
|
||||
-};
|
||||
-
|
||||
/*
|
||||
* This is the control structure for tasks blocked on mutex,
|
||||
* which resides on the blocked task's kernel stack:
|
||||
--- a/include/linux/ww_mutex.h
|
||||
+++ b/include/linux/ww_mutex.h
|
||||
@@ -28,6 +28,14 @@ struct ww_class {
|
||||
unsigned int is_wait_die;
|
||||
};
|
||||
|
||||
+struct ww_mutex {
|
||||
+ struct mutex base;
|
||||
+ struct ww_acquire_ctx *ctx;
|
||||
+#ifdef CONFIG_DEBUG_MUTEXES
|
||||
+ struct ww_class *ww_class;
|
||||
+#endif
|
||||
+};
|
||||
+
|
||||
struct ww_acquire_ctx {
|
||||
struct task_struct *task;
|
||||
unsigned long stamp;
|
||||
--- a/kernel/locking/rtmutex.c
|
||||
+++ b/kernel/locking/rtmutex.c
|
||||
@@ -24,6 +24,7 @@
|
||||
@@ -19,7 +55,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
|
||||
#include "rtmutex_common.h"
|
||||
|
||||
@@ -1244,6 +1245,40 @@ EXPORT_SYMBOL(__rt_spin_lock_init);
|
||||
@@ -1234,6 +1235,40 @@ EXPORT_SYMBOL(__rt_spin_lock_init);
|
||||
|
||||
#endif /* PREEMPT_RT */
|
||||
|
||||
@@ -60,7 +96,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
static inline int
|
||||
try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
|
||||
struct rt_mutex_waiter *waiter)
|
||||
@@ -1522,7 +1557,8 @@ void rt_mutex_init_waiter(struct rt_mute
|
||||
@@ -1512,7 +1547,8 @@ void rt_mutex_init_waiter(struct rt_mute
|
||||
static int __sched
|
||||
__rt_mutex_slowlock(struct rt_mutex *lock, int state,
|
||||
struct hrtimer_sleeper *timeout,
|
||||
@@ -70,7 +106,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
@@ -1540,6 +1576,12 @@ static int __sched
|
||||
@@ -1530,6 +1566,12 @@ static int __sched
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -82,8 +118,8 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
+
|
||||
raw_spin_unlock_irq(&lock->wait_lock);
|
||||
|
||||
debug_rt_mutex_print_deadlock(waiter);
|
||||
@@ -1574,16 +1616,106 @@ static void rt_mutex_handle_deadlock(int
|
||||
schedule();
|
||||
@@ -1558,16 +1600,106 @@ static void rt_mutex_handle_deadlock(int
|
||||
}
|
||||
}
|
||||
|
||||
@@ -191,7 +227,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
|
||||
set_current_state(state);
|
||||
|
||||
@@ -1593,14 +1725,24 @@ int __sched rt_mutex_slowlock_locked(str
|
||||
@@ -1577,14 +1709,24 @@ int __sched rt_mutex_slowlock_locked(str
|
||||
|
||||
ret = task_blocks_on_rt_mutex(lock, waiter, current, chwalk);
|
||||
|
||||
@@ -219,7 +255,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1617,7 +1759,8 @@ int __sched rt_mutex_slowlock_locked(str
|
||||
@@ -1601,7 +1743,8 @@ int __sched rt_mutex_slowlock_locked(str
|
||||
static int __sched
|
||||
rt_mutex_slowlock(struct rt_mutex *lock, int state,
|
||||
struct hrtimer_sleeper *timeout,
|
||||
@@ -229,7 +265,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
{
|
||||
struct rt_mutex_waiter waiter;
|
||||
unsigned long flags;
|
||||
@@ -1635,7 +1778,8 @@ rt_mutex_slowlock(struct rt_mutex *lock,
|
||||
@@ -1619,7 +1762,8 @@ rt_mutex_slowlock(struct rt_mutex *lock,
|
||||
*/
|
||||
raw_spin_lock_irqsave(&lock->wait_lock, flags);
|
||||
|
||||
@@ -239,7 +275,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
|
||||
raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
|
||||
|
||||
@@ -1765,29 +1909,33 @@ static bool __sched rt_mutex_slowunlock(
|
||||
@@ -1749,14 +1893,16 @@ static bool __sched rt_mutex_slowunlock(
|
||||
*/
|
||||
static inline int
|
||||
rt_mutex_fastlock(struct rt_mutex *lock, int state,
|
||||
@@ -258,26 +294,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
}
|
||||
|
||||
static inline int
|
||||
rt_mutex_timed_fastlock(struct rt_mutex *lock, int state,
|
||||
struct hrtimer_sleeper *timeout,
|
||||
enum rtmutex_chainwalk chwalk,
|
||||
+ struct ww_acquire_ctx *ww_ctx,
|
||||
int (*slowfn)(struct rt_mutex *lock, int state,
|
||||
struct hrtimer_sleeper *timeout,
|
||||
- enum rtmutex_chainwalk chwalk))
|
||||
+ enum rtmutex_chainwalk chwalk,
|
||||
+ struct ww_acquire_ctx *ww_ctx))
|
||||
{
|
||||
if (chwalk == RT_MUTEX_MIN_CHAINWALK &&
|
||||
likely(rt_mutex_cmpxchg_acquire(lock, NULL, current)))
|
||||
return 0;
|
||||
|
||||
- return slowfn(lock, state, timeout, chwalk);
|
||||
+ return slowfn(lock, state, timeout, chwalk, ww_ctx);
|
||||
}
|
||||
|
||||
static inline int
|
||||
@@ -1832,7 +1980,7 @@ rt_mutex_fastunlock(struct rt_mutex *loc
|
||||
@@ -1801,7 +1947,7 @@ rt_mutex_fastunlock(struct rt_mutex *loc
|
||||
int __sched __rt_mutex_lock_state(struct rt_mutex *lock, int state)
|
||||
{
|
||||
might_sleep();
|
||||
@@ -286,15 +303,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -1952,6 +2100,7 @@ rt_mutex_timed_lock(struct rt_mutex *loc
|
||||
mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
|
||||
ret = rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout,
|
||||
RT_MUTEX_MIN_CHAINWALK,
|
||||
+ NULL,
|
||||
rt_mutex_slowlock);
|
||||
if (ret)
|
||||
mutex_release(&lock->dep_map, 1, _RET_IP_);
|
||||
@@ -2321,7 +2470,7 @@ int rt_mutex_wait_proxy_lock(struct rt_m
|
||||
@@ -2245,7 +2391,7 @@ int rt_mutex_wait_proxy_lock(struct rt_m
|
||||
raw_spin_lock_irq(&lock->wait_lock);
|
||||
/* sleep on the mutex */
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
@@ -303,7 +312,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
/*
|
||||
* try_to_take_rt_mutex() sets the waiter bit unconditionally. We might
|
||||
* have to fix that up.
|
||||
@@ -2391,3 +2540,99 @@ bool rt_mutex_cleanup_proxy_lock(struct
|
||||
@@ -2315,3 +2461,97 @@ bool rt_mutex_cleanup_proxy_lock(struct
|
||||
|
||||
return cleanup;
|
||||
}
|
||||
@@ -312,7 +321,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
+ww_mutex_deadlock_injection(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
|
||||
+{
|
||||
+#ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH
|
||||
+ unsigned tmp;
|
||||
+ unsigned int tmp;
|
||||
+
|
||||
+ if (ctx->deadlock_inject_countdown-- == 0) {
|
||||
+ tmp = ctx->deadlock_inject_interval;
|
||||
@@ -347,7 +356,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
+ ret = rt_mutex_slowlock(&lock->base.lock, TASK_INTERRUPTIBLE, NULL, 0,
|
||||
+ ctx);
|
||||
+ if (ret)
|
||||
+ mutex_release(&lock->base.dep_map, 1, _RET_IP_);
|
||||
+ mutex_release(&lock->base.dep_map, _RET_IP_);
|
||||
+ else if (!ret && ctx && ctx->acquired > 1)
|
||||
+ return ww_mutex_deadlock_injection(lock, ctx);
|
||||
+
|
||||
@@ -367,7 +376,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
+ ret = rt_mutex_slowlock(&lock->base.lock, TASK_UNINTERRUPTIBLE, NULL, 0,
|
||||
+ ctx);
|
||||
+ if (ret)
|
||||
+ mutex_release(&lock->base.dep_map, 1, _RET_IP_);
|
||||
+ mutex_release(&lock->base.dep_map, _RET_IP_);
|
||||
+ else if (!ret && ctx && ctx->acquired > 1)
|
||||
+ return ww_mutex_deadlock_injection(lock, ctx);
|
||||
+
|
||||
@@ -377,13 +386,11 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
+
|
||||
+void __sched ww_mutex_unlock(struct ww_mutex *lock)
|
||||
+{
|
||||
+ int nest = !!lock->ctx;
|
||||
+
|
||||
+ /*
|
||||
+ * The unlocking fastpath is the 0->1 transition from 'locked'
|
||||
+ * into 'unlocked' state:
|
||||
+ */
|
||||
+ if (nest) {
|
||||
+ if (lock->ctx) {
|
||||
+#ifdef CONFIG_DEBUG_MUTEXES
|
||||
+ DEBUG_LOCKS_WARN_ON(!lock->ctx->acquired);
|
||||
+#endif
|
||||
@@ -392,7 +399,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
+ lock->ctx = NULL;
|
||||
+ }
|
||||
+
|
||||
+ mutex_release(&lock->base.dep_map, nest, _RET_IP_);
|
||||
+ mutex_release(&lock->base.dep_map, _RET_IP_);
|
||||
+ __rt_mutex_unlock(&lock->base.lock);
|
||||
+}
|
||||
+EXPORT_SYMBOL(ww_mutex_unlock);
|
||||
@@ -405,7 +412,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
+#endif
|
||||
--- a/kernel/locking/rtmutex_common.h
|
||||
+++ b/kernel/locking/rtmutex_common.h
|
||||
@@ -165,6 +165,7 @@ extern void rt_mutex_postunlock(struct w
|
||||
@@ -159,6 +159,7 @@ extern void rt_mutex_postunlock(struct w
|
||||
struct wake_q_head *wake_sleeper_q);
|
||||
|
||||
/* RW semaphore special interface */
|
||||
@@ -413,7 +420,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
|
||||
extern int __rt_mutex_lock_state(struct rt_mutex *lock, int state);
|
||||
extern int __rt_mutex_trylock(struct rt_mutex *lock);
|
||||
@@ -172,6 +173,7 @@ extern void __rt_mutex_unlock(struct rt_
|
||||
@@ -166,6 +167,7 @@ extern void __rt_mutex_unlock(struct rt_
|
||||
int __sched rt_mutex_slowlock_locked(struct rt_mutex *lock, int state,
|
||||
struct hrtimer_sleeper *timeout,
|
||||
enum rtmutex_chainwalk chwalk,
|
||||
@@ -423,7 +430,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
struct rt_mutex_waiter *waiter,
|
||||
--- a/kernel/locking/rwsem-rt.c
|
||||
+++ b/kernel/locking/rwsem-rt.c
|
||||
@@ -131,7 +131,7 @@ static int __sched __down_read_common(st
|
||||
@@ -138,7 +138,7 @@ static int __sched __down_read_common(st
|
||||
*/
|
||||
rt_mutex_init_waiter(&waiter, false);
|
||||
ret = rt_mutex_slowlock_locked(m, state, NULL, RT_MUTEX_MIN_CHAINWALK,
|
||||
@@ -0,0 +1,224 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Tue, 6 Oct 2020 13:07:17 +0200
|
||||
Subject: [PATCH 22/22] locking/rtmutex: Use custom scheduling function for
|
||||
spin-schedule()
|
||||
|
||||
PREEMPT_RT builds the rwsem, mutex, spinlock and rwlock typed locks on
|
||||
top of a rtmutex lock. While blocked task->pi_blocked_on is set
|
||||
(tsk_is_pi_blocked()) and task needs to schedule away while waiting.
|
||||
|
||||
The schedule process must distinguish between blocking on a regular
|
||||
sleeping lock (rwsem and mutex) and a RT-only sleeping lock (spinlock
|
||||
and rwlock):
|
||||
- rwsem and mutex must flush block requests (blk_schedule_flush_plug())
|
||||
even if blocked on a lock. This can not deadlock because this also
|
||||
happens for non-RT.
|
||||
There should be a warning if the scheduling point is within a RCU read
|
||||
section.
|
||||
|
||||
- spinlock and rwlock must not flush block requests. This will deadlock
|
||||
if the callback attempts to acquire a lock which is already acquired.
|
||||
Similarly to being preempted, there should be no warning if the
|
||||
scheduling point is within a RCU read section.
|
||||
|
||||
Add preempt_schedule_lock() which is invoked if scheduling is required
|
||||
while blocking on a PREEMPT_RT-only sleeping lock.
|
||||
Remove tsk_is_pi_blocked() from the scheduler path which is no longer
|
||||
needed with the additional scheduler entry point.
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
arch/arm64/include/asm/preempt.h | 3 +++
|
||||
arch/x86/include/asm/preempt.h | 3 +++
|
||||
include/asm-generic/preempt.h | 3 +++
|
||||
include/linux/sched/rt.h | 8 --------
|
||||
kernel/locking/rtmutex.c | 2 +-
|
||||
kernel/locking/rwlock-rt.c | 2 +-
|
||||
kernel/sched/core.c | 32 +++++++++++++++++++++-----------
|
||||
7 files changed, 32 insertions(+), 21 deletions(-)
|
||||
|
||||
--- a/arch/arm64/include/asm/preempt.h
|
||||
+++ b/arch/arm64/include/asm/preempt.h
|
||||
@@ -81,6 +81,9 @@ static inline bool should_resched(int pr
|
||||
|
||||
#ifdef CONFIG_PREEMPTION
|
||||
void preempt_schedule(void);
|
||||
+#ifdef CONFIG_PREEMPT_RT
|
||||
+void preempt_schedule_lock(void);
|
||||
+#endif
|
||||
#define __preempt_schedule() preempt_schedule()
|
||||
void preempt_schedule_notrace(void);
|
||||
#define __preempt_schedule_notrace() preempt_schedule_notrace()
|
||||
--- a/arch/x86/include/asm/preempt.h
|
||||
+++ b/arch/x86/include/asm/preempt.h
|
||||
@@ -103,6 +103,9 @@ static __always_inline bool should_resch
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PREEMPTION
|
||||
+#ifdef CONFIG_PREEMPT_RT
|
||||
+ extern void preempt_schedule_lock(void);
|
||||
+#endif
|
||||
extern asmlinkage void preempt_schedule_thunk(void);
|
||||
# define __preempt_schedule() \
|
||||
asm volatile ("call preempt_schedule_thunk" : ASM_CALL_CONSTRAINT)
|
||||
--- a/include/asm-generic/preempt.h
|
||||
+++ b/include/asm-generic/preempt.h
|
||||
@@ -79,6 +79,9 @@ static __always_inline bool should_resch
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PREEMPTION
|
||||
+#ifdef CONFIG_PREEMPT_RT
|
||||
+extern void preempt_schedule_lock(void);
|
||||
+#endif
|
||||
extern asmlinkage void preempt_schedule(void);
|
||||
#define __preempt_schedule() preempt_schedule()
|
||||
extern asmlinkage void preempt_schedule_notrace(void);
|
||||
--- a/include/linux/sched/rt.h
|
||||
+++ b/include/linux/sched/rt.h
|
||||
@@ -39,20 +39,12 @@ static inline struct task_struct *rt_mut
|
||||
}
|
||||
extern void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task);
|
||||
extern void rt_mutex_adjust_pi(struct task_struct *p);
|
||||
-static inline bool tsk_is_pi_blocked(struct task_struct *tsk)
|
||||
-{
|
||||
- return tsk->pi_blocked_on != NULL;
|
||||
-}
|
||||
#else
|
||||
static inline struct task_struct *rt_mutex_get_top_task(struct task_struct *task)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
# define rt_mutex_adjust_pi(p) do { } while (0)
|
||||
-static inline bool tsk_is_pi_blocked(struct task_struct *tsk)
|
||||
-{
|
||||
- return false;
|
||||
-}
|
||||
#endif
|
||||
|
||||
extern void normalize_rt_tasks(void);
|
||||
--- a/kernel/locking/rtmutex.c
|
||||
+++ b/kernel/locking/rtmutex.c
|
||||
@@ -1067,7 +1067,7 @@ void __sched rt_spin_lock_slowlock_locke
|
||||
raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
|
||||
|
||||
if (top_waiter != waiter || adaptive_wait(lock, lock_owner))
|
||||
- schedule();
|
||||
+ preempt_schedule_lock();
|
||||
|
||||
raw_spin_lock_irqsave(&lock->wait_lock, flags);
|
||||
|
||||
--- a/kernel/locking/rwlock-rt.c
|
||||
+++ b/kernel/locking/rwlock-rt.c
|
||||
@@ -211,7 +211,7 @@ static void __write_rt_lock(struct rt_rw
|
||||
raw_spin_unlock_irqrestore(&m->wait_lock, flags);
|
||||
|
||||
if (atomic_read(&lock->readers) != 0)
|
||||
- schedule();
|
||||
+ preempt_schedule_lock();
|
||||
|
||||
raw_spin_lock_irqsave(&m->wait_lock, flags);
|
||||
|
||||
--- a/kernel/sched/core.c
|
||||
+++ b/kernel/sched/core.c
|
||||
@@ -5006,7 +5006,7 @@ pick_next_task(struct rq *rq, struct tas
|
||||
*
|
||||
* WARNING: must be called with preemption disabled!
|
||||
*/
|
||||
-static void __sched notrace __schedule(bool preempt)
|
||||
+static void __sched notrace __schedule(bool preempt, bool spinning_lock)
|
||||
{
|
||||
struct task_struct *prev, *next;
|
||||
unsigned long *switch_count;
|
||||
@@ -5059,7 +5059,7 @@ static void __sched notrace __schedule(b
|
||||
* - ptrace_{,un}freeze_traced() can change ->state underneath us.
|
||||
*/
|
||||
prev_state = prev->state;
|
||||
- if (!preempt && prev_state) {
|
||||
+ if ((!preempt || spinning_lock) && prev_state) {
|
||||
if (signal_pending_state(prev_state, prev)) {
|
||||
prev->state = TASK_RUNNING;
|
||||
} else {
|
||||
@@ -5143,7 +5143,7 @@ void __noreturn do_task_dead(void)
|
||||
/* Tell freezer to ignore us: */
|
||||
current->flags |= PF_NOFREEZE;
|
||||
|
||||
- __schedule(false);
|
||||
+ __schedule(false, false);
|
||||
BUG();
|
||||
|
||||
/* Avoid "noreturn function does return" - but don't continue if BUG() is a NOP: */
|
||||
@@ -5176,9 +5176,6 @@ static inline void sched_submit_work(str
|
||||
preempt_enable_no_resched();
|
||||
}
|
||||
|
||||
- if (tsk_is_pi_blocked(tsk))
|
||||
- return;
|
||||
-
|
||||
/*
|
||||
* If we are going to sleep and we have plugged IO queued,
|
||||
* make sure to submit it to avoid deadlocks.
|
||||
@@ -5204,7 +5201,7 @@ asmlinkage __visible void __sched schedu
|
||||
sched_submit_work(tsk);
|
||||
do {
|
||||
preempt_disable();
|
||||
- __schedule(false);
|
||||
+ __schedule(false, false);
|
||||
sched_preempt_enable_no_resched();
|
||||
} while (need_resched());
|
||||
sched_update_worker(tsk);
|
||||
@@ -5232,7 +5229,7 @@ void __sched schedule_idle(void)
|
||||
*/
|
||||
WARN_ON_ONCE(current->state);
|
||||
do {
|
||||
- __schedule(false);
|
||||
+ __schedule(false, false);
|
||||
} while (need_resched());
|
||||
}
|
||||
|
||||
@@ -5285,7 +5282,7 @@ static void __sched notrace preempt_sche
|
||||
*/
|
||||
preempt_disable_notrace();
|
||||
preempt_latency_start(1);
|
||||
- __schedule(true);
|
||||
+ __schedule(true, false);
|
||||
preempt_latency_stop(1);
|
||||
preempt_enable_no_resched_notrace();
|
||||
|
||||
@@ -5315,6 +5312,19 @@ asmlinkage __visible void __sched notrac
|
||||
NOKPROBE_SYMBOL(preempt_schedule);
|
||||
EXPORT_SYMBOL(preempt_schedule);
|
||||
|
||||
+#ifdef CONFIG_PREEMPT_RT
|
||||
+void __sched notrace preempt_schedule_lock(void)
|
||||
+{
|
||||
+ do {
|
||||
+ preempt_disable();
|
||||
+ __schedule(true, true);
|
||||
+ sched_preempt_enable_no_resched();
|
||||
+ } while (need_resched());
|
||||
+}
|
||||
+NOKPROBE_SYMBOL(preempt_schedule_lock);
|
||||
+EXPORT_SYMBOL(preempt_schedule_lock);
|
||||
+#endif
|
||||
+
|
||||
/**
|
||||
* preempt_schedule_notrace - preempt_schedule called by tracing
|
||||
*
|
||||
@@ -5358,7 +5368,7 @@ asmlinkage __visible void __sched notrac
|
||||
* an infinite recursion.
|
||||
*/
|
||||
prev_ctx = exception_enter();
|
||||
- __schedule(true);
|
||||
+ __schedule(true, false);
|
||||
exception_exit(prev_ctx);
|
||||
|
||||
preempt_latency_stop(1);
|
||||
@@ -5387,7 +5397,7 @@ asmlinkage __visible void __sched preemp
|
||||
do {
|
||||
preempt_disable();
|
||||
local_irq_enable();
|
||||
- __schedule(true);
|
||||
+ __schedule(true, false);
|
||||
local_irq_disable();
|
||||
sched_preempt_enable_no_resched();
|
||||
} while (need_resched());
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user