mirror of
https://github.com/linuxkit/linuxkit.git
synced 2025-07-20 01:29:07 +00:00
Merge pull request #3491 from TiejunChina/master-dev
enable rt for 5.4.x
This commit is contained in:
commit
f98fa5ca41
@ -253,6 +253,7 @@ endef
|
||||
#
|
||||
ifeq ($(ARCH),x86_64)
|
||||
$(eval $(call kernel,5.4.28,5.4.x,$(EXTRA),$(DEBUG)))
|
||||
$(eval $(call kernel,5.4.28,5.4.x,-rt,))
|
||||
$(eval $(call kernel,4.19.113,4.19.x,$(EXTRA),$(DEBUG)))
|
||||
$(eval $(call kernel,4.19.113,4.19.x,,-dbg))
|
||||
$(eval $(call kernel,4.19.106,4.19.x,-rt,))
|
||||
@ -260,6 +261,7 @@ $(eval $(call kernel,4.14.174,4.14.x,$(EXTRA),$(DEBUG)))
|
||||
|
||||
else ifeq ($(ARCH),aarch64)
|
||||
$(eval $(call kernel,5.4.28,5.4.x,$(EXTRA),$(DEBUG)))
|
||||
$(eval $(call kernel,5.4.28,5.4.x,-rt,))
|
||||
$(eval $(call kernel,4.19.106,4.19.x,-rt,))
|
||||
|
||||
else ifeq ($(ARCH),s390x)
|
||||
|
20
kernel/config-5.4.x-aarch64-rt
Normal file
20
kernel/config-5.4.x-aarch64-rt
Normal file
@ -0,0 +1,20 @@
|
||||
CONFIG_SLUB_DEBUG=y
|
||||
# CONFIG_SLUB_MEMCG_SYSFS_ON is not set
|
||||
CONFIG_SLUB=y
|
||||
# CONFIG_SLAB_FREELIST_HARDENED is not set
|
||||
CONFIG_HAVE_ALIGNED_STRUCT_PAGE=y
|
||||
CONFIG_PREEMPT=y
|
||||
CONFIG_PREEMPT_RT_BASE=y
|
||||
CONFIG_HAVE_PREEMPT_LAZY=y
|
||||
CONFIG_PREEMPT_LAZY=y
|
||||
# CONFIG_PREEMPT_VOLUNTARY is not set
|
||||
# CONFIG_PREEMPT__LL is not set
|
||||
# CONFIG_PREEMPT_RTB is not set
|
||||
CONFIG_PREEMPT_RT_FULL=y
|
||||
CONFIG_PREEMPT_COUNT=y
|
||||
# CONFIG_SLUB_DEBUG_ON is not set
|
||||
# CONFIG_SLUB_STATS is not set
|
||||
CONFIG_DEBUG_PREEMPT=y
|
||||
# CONFIG_PREEMPT_TRACER is not set
|
||||
CONFIG_HZ_1000=y
|
||||
CONFIG_HZ=1000
|
22
kernel/config-5.4.x-x86_64-rt
Normal file
22
kernel/config-5.4.x-x86_64-rt
Normal file
@ -0,0 +1,22 @@
|
||||
CONFIG_RWSEM_GENERIC_SPINLOCK=y
|
||||
# CONFIG_RWSEM_XCHGADD_ALGORITHM is not set
|
||||
CONFIG_PREEMPT_RCU=y
|
||||
CONFIG_TASKS_RCU=y
|
||||
CONFIG_SLUB_DEBUG=y
|
||||
# CONFIG_SLUB_MEMCG_SYSFS_ON is not set
|
||||
CONFIG_SLUB=y
|
||||
# CONFIG_SLAB_FREELIST_HARDENED is not set
|
||||
CONFIG_HAVE_ALIGNED_STRUCT_PAGE=y
|
||||
CONFIG_PREEMPT=y
|
||||
CONFIG_PREEMPT_RT_BASE=y
|
||||
CONFIG_HAVE_PREEMPT_LAZY=y
|
||||
CONFIG_PREEMPT_LAZY=y
|
||||
# CONFIG_PREEMPT_VOLUNTARY is not set
|
||||
# CONFIG_PREEMPT__LL is not set
|
||||
# CONFIG_PREEMPT_RTB is not set
|
||||
CONFIG_PREEMPT_RT_FULL=y
|
||||
CONFIG_PREEMPT_COUNT=y
|
||||
# CONFIG_SLUB_DEBUG_ON is not set
|
||||
# CONFIG_SLUB_STATS is not set
|
||||
CONFIG_DEBUG_PREEMPT=y
|
||||
# CONFIG_PREEMPT_TRACER is not set
|
@ -0,0 +1,35 @@
|
||||
From: Waiman Long <longman@redhat.com>
|
||||
Date: Thu, 3 Oct 2019 16:36:08 -0400
|
||||
Subject: [PATCH] lib/smp_processor_id: Don't use cpumask_equal()
|
||||
|
||||
The check_preemption_disabled() function uses cpumask_equal() to see
|
||||
if the task is bounded to the current CPU only. cpumask_equal() calls
|
||||
memcmp() to do the comparison. As x86 doesn't have __HAVE_ARCH_MEMCMP,
|
||||
the slow memcmp() function in lib/string.c is used.
|
||||
|
||||
On a RT kernel that call check_preemption_disabled() very frequently,
|
||||
below is the perf-record output of a certain microbenchmark:
|
||||
|
||||
42.75% 2.45% testpmd [kernel.kallsyms] [k] check_preemption_disabled
|
||||
40.01% 39.97% testpmd [kernel.kallsyms] [k] memcmp
|
||||
|
||||
We should avoid calling memcmp() in performance critical path. So the
|
||||
cpumask_equal() call is now replaced with an equivalent simpler check.
|
||||
|
||||
Signed-off-by: Waiman Long <longman@redhat.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
lib/smp_processor_id.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/lib/smp_processor_id.c
|
||||
+++ b/lib/smp_processor_id.c
|
||||
@@ -23,7 +23,7 @@ unsigned int check_preemption_disabled(c
|
||||
* Kernel threads bound to a single CPU can safely use
|
||||
* smp_processor_id():
|
||||
*/
|
||||
- if (cpumask_equal(current->cpus_ptr, cpumask_of(this_cpu)))
|
||||
+ if (current->nr_cpus_allowed == 1)
|
||||
goto out;
|
||||
|
||||
/*
|
@ -0,0 +1,57 @@
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Fri, 9 Aug 2019 14:42:27 +0200
|
||||
Subject: [PATCH 1/7] jbd2: Simplify journal_unmap_buffer()
|
||||
|
||||
journal_unmap_buffer() checks first whether the buffer head is a journal.
|
||||
If so it takes locks and then invokes jbd2_journal_grab_journal_head()
|
||||
followed by another check whether this is journal head buffer.
|
||||
|
||||
The double checking is pointless.
|
||||
|
||||
Replace the initial check with jbd2_journal_grab_journal_head() which
|
||||
alredy checks whether the buffer head is actually a journal.
|
||||
|
||||
Allows also early access to the journal head pointer for the upcoming
|
||||
conversion of state lock to a regular spinlock.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Reviewed-by: Jan Kara <jack@suse.cz>
|
||||
Cc: linux-ext4@vger.kernel.org
|
||||
Cc: "Theodore Ts'o" <tytso@mit.edu>
|
||||
Signed-off-by: Jan Kara <jack@suse.cz>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
fs/jbd2/transaction.c | 8 ++------
|
||||
1 file changed, 2 insertions(+), 6 deletions(-)
|
||||
|
||||
--- a/fs/jbd2/transaction.c
|
||||
+++ b/fs/jbd2/transaction.c
|
||||
@@ -2203,7 +2203,8 @@ static int journal_unmap_buffer(journal_
|
||||
* holding the page lock. --sct
|
||||
*/
|
||||
|
||||
- if (!buffer_jbd(bh))
|
||||
+ jh = jbd2_journal_grab_journal_head(bh);
|
||||
+ if (!jh)
|
||||
goto zap_buffer_unlocked;
|
||||
|
||||
/* OK, we have data buffer in journaled mode */
|
||||
@@ -2211,10 +2212,6 @@ static int journal_unmap_buffer(journal_
|
||||
jbd_lock_bh_state(bh);
|
||||
spin_lock(&journal->j_list_lock);
|
||||
|
||||
- jh = jbd2_journal_grab_journal_head(bh);
|
||||
- if (!jh)
|
||||
- goto zap_buffer_no_jh;
|
||||
-
|
||||
/*
|
||||
* We cannot remove the buffer from checkpoint lists until the
|
||||
* transaction adding inode to orphan list (let's call it T)
|
||||
@@ -2338,7 +2335,6 @@ static int journal_unmap_buffer(journal_
|
||||
*/
|
||||
jh->b_modified = 0;
|
||||
jbd2_journal_put_journal_head(jh);
|
||||
-zap_buffer_no_jh:
|
||||
spin_unlock(&journal->j_list_lock);
|
||||
jbd_unlock_bh_state(bh);
|
||||
write_unlock(&journal->j_state_lock);
|
@ -0,0 +1,30 @@
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Fri, 9 Aug 2019 14:42:28 +0200
|
||||
Subject: [PATCH 2/7] jbd2: Remove jbd_trylock_bh_state()
|
||||
|
||||
No users.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Reviewed-by: Jan Kara <jack@suse.cz>
|
||||
Cc: linux-ext4@vger.kernel.org
|
||||
Cc: "Theodore Ts'o" <tytso@mit.edu>
|
||||
Signed-off-by: Jan Kara <jack@suse.cz>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/jbd2.h | 5 -----
|
||||
1 file changed, 5 deletions(-)
|
||||
|
||||
--- a/include/linux/jbd2.h
|
||||
+++ b/include/linux/jbd2.h
|
||||
@@ -347,11 +347,6 @@ static inline void jbd_lock_bh_state(str
|
||||
bit_spin_lock(BH_State, &bh->b_state);
|
||||
}
|
||||
|
||||
-static inline int jbd_trylock_bh_state(struct buffer_head *bh)
|
||||
-{
|
||||
- return bit_spin_trylock(BH_State, &bh->b_state);
|
||||
-}
|
||||
-
|
||||
static inline int jbd_is_locked_bh_state(struct buffer_head *bh)
|
||||
{
|
||||
return bit_spin_is_locked(BH_State, &bh->b_state);
|
@ -0,0 +1,150 @@
|
||||
From: Jan Kara <jack@suse.cz>
|
||||
Date: Fri, 9 Aug 2019 14:42:29 +0200
|
||||
Subject: [PATCH 3/7] jbd2: Move dropping of jh reference out of un/re-filing
|
||||
functions
|
||||
|
||||
__jbd2_journal_unfile_buffer() and __jbd2_journal_refile_buffer() drop
|
||||
transaction's jh reference when they remove jh from a transaction. This
|
||||
will be however inconvenient once we move state lock into journal_head
|
||||
itself as we still need to unlock it and we'd need to grab jh reference
|
||||
just for that. Move dropping of jh reference out of these functions into
|
||||
the few callers.
|
||||
|
||||
Signed-off-by: Jan Kara <jack@suse.cz>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
fs/jbd2/commit.c | 5 ++++-
|
||||
fs/jbd2/transaction.c | 23 +++++++++++++++--------
|
||||
include/linux/jbd2.h | 2 +-
|
||||
3 files changed, 20 insertions(+), 10 deletions(-)
|
||||
|
||||
--- a/fs/jbd2/commit.c
|
||||
+++ b/fs/jbd2/commit.c
|
||||
@@ -920,6 +920,7 @@ void jbd2_journal_commit_transaction(jou
|
||||
transaction_t *cp_transaction;
|
||||
struct buffer_head *bh;
|
||||
int try_to_free = 0;
|
||||
+ bool drop_ref;
|
||||
|
||||
jh = commit_transaction->t_forget;
|
||||
spin_unlock(&journal->j_list_lock);
|
||||
@@ -1028,8 +1029,10 @@ void jbd2_journal_commit_transaction(jou
|
||||
try_to_free = 1;
|
||||
}
|
||||
JBUFFER_TRACE(jh, "refile or unfile buffer");
|
||||
- __jbd2_journal_refile_buffer(jh);
|
||||
+ drop_ref = __jbd2_journal_refile_buffer(jh);
|
||||
jbd_unlock_bh_state(bh);
|
||||
+ if (drop_ref)
|
||||
+ jbd2_journal_put_journal_head(jh);
|
||||
if (try_to_free)
|
||||
release_buffer_page(bh); /* Drops bh reference */
|
||||
else
|
||||
--- a/fs/jbd2/transaction.c
|
||||
+++ b/fs/jbd2/transaction.c
|
||||
@@ -1602,6 +1602,7 @@ int jbd2_journal_forget (handle_t *handl
|
||||
__jbd2_journal_file_buffer(jh, transaction, BJ_Forget);
|
||||
} else {
|
||||
__jbd2_journal_unfile_buffer(jh);
|
||||
+ jbd2_journal_put_journal_head(jh);
|
||||
if (!buffer_jbd(bh)) {
|
||||
spin_unlock(&journal->j_list_lock);
|
||||
goto not_jbd;
|
||||
@@ -1975,17 +1976,15 @@ static void __jbd2_journal_temp_unlink_b
|
||||
}
|
||||
|
||||
/*
|
||||
- * Remove buffer from all transactions.
|
||||
+ * Remove buffer from all transactions. The caller is responsible for dropping
|
||||
+ * the jh reference that belonged to the transaction.
|
||||
*
|
||||
* Called with bh_state lock and j_list_lock
|
||||
- *
|
||||
- * jh and bh may be already freed when this function returns.
|
||||
*/
|
||||
static void __jbd2_journal_unfile_buffer(struct journal_head *jh)
|
||||
{
|
||||
__jbd2_journal_temp_unlink_buffer(jh);
|
||||
jh->b_transaction = NULL;
|
||||
- jbd2_journal_put_journal_head(jh);
|
||||
}
|
||||
|
||||
void jbd2_journal_unfile_buffer(journal_t *journal, struct journal_head *jh)
|
||||
@@ -1999,6 +1998,7 @@ void jbd2_journal_unfile_buffer(journal_
|
||||
__jbd2_journal_unfile_buffer(jh);
|
||||
spin_unlock(&journal->j_list_lock);
|
||||
jbd_unlock_bh_state(bh);
|
||||
+ jbd2_journal_put_journal_head(jh);
|
||||
__brelse(bh);
|
||||
}
|
||||
|
||||
@@ -2137,6 +2137,7 @@ static int __dispose_buffer(struct journ
|
||||
} else {
|
||||
JBUFFER_TRACE(jh, "on running transaction");
|
||||
__jbd2_journal_unfile_buffer(jh);
|
||||
+ jbd2_journal_put_journal_head(jh);
|
||||
}
|
||||
return may_free;
|
||||
}
|
||||
@@ -2502,9 +2503,11 @@ void jbd2_journal_file_buffer(struct jou
|
||||
* Called under j_list_lock
|
||||
* Called under jbd_lock_bh_state(jh2bh(jh))
|
||||
*
|
||||
- * jh and bh may be already free when this function returns
|
||||
+ * When this function returns true, there's no next transaction to refile to
|
||||
+ * and the caller has to drop jh reference through
|
||||
+ * jbd2_journal_put_journal_head().
|
||||
*/
|
||||
-void __jbd2_journal_refile_buffer(struct journal_head *jh)
|
||||
+bool __jbd2_journal_refile_buffer(struct journal_head *jh)
|
||||
{
|
||||
int was_dirty, jlist;
|
||||
struct buffer_head *bh = jh2bh(jh);
|
||||
@@ -2516,7 +2519,7 @@ void __jbd2_journal_refile_buffer(struct
|
||||
/* If the buffer is now unused, just drop it. */
|
||||
if (jh->b_next_transaction == NULL) {
|
||||
__jbd2_journal_unfile_buffer(jh);
|
||||
- return;
|
||||
+ return true;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -2544,6 +2547,7 @@ void __jbd2_journal_refile_buffer(struct
|
||||
|
||||
if (was_dirty)
|
||||
set_buffer_jbddirty(bh);
|
||||
+ return false;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -2555,15 +2559,18 @@ void __jbd2_journal_refile_buffer(struct
|
||||
void jbd2_journal_refile_buffer(journal_t *journal, struct journal_head *jh)
|
||||
{
|
||||
struct buffer_head *bh = jh2bh(jh);
|
||||
+ bool drop;
|
||||
|
||||
/* Get reference so that buffer cannot be freed before we unlock it */
|
||||
get_bh(bh);
|
||||
jbd_lock_bh_state(bh);
|
||||
spin_lock(&journal->j_list_lock);
|
||||
- __jbd2_journal_refile_buffer(jh);
|
||||
+ drop = __jbd2_journal_refile_buffer(jh);
|
||||
jbd_unlock_bh_state(bh);
|
||||
spin_unlock(&journal->j_list_lock);
|
||||
__brelse(bh);
|
||||
+ if (drop)
|
||||
+ jbd2_journal_put_journal_head(jh);
|
||||
}
|
||||
|
||||
/*
|
||||
--- a/include/linux/jbd2.h
|
||||
+++ b/include/linux/jbd2.h
|
||||
@@ -1252,7 +1252,7 @@ JBD2_FEATURE_INCOMPAT_FUNCS(csum3, CSUM
|
||||
|
||||
/* Filing buffers */
|
||||
extern void jbd2_journal_unfile_buffer(journal_t *, struct journal_head *);
|
||||
-extern void __jbd2_journal_refile_buffer(struct journal_head *);
|
||||
+extern bool __jbd2_journal_refile_buffer(struct journal_head *);
|
||||
extern void jbd2_journal_refile_buffer(journal_t *, struct journal_head *);
|
||||
extern void __jbd2_journal_file_buffer(struct journal_head *, transaction_t *, int);
|
||||
extern void __journal_free_buffer(struct journal_head *bh);
|
@ -0,0 +1,27 @@
|
||||
From: Jan Kara <jack@suse.cz>
|
||||
Date: Fri, 9 Aug 2019 14:42:30 +0200
|
||||
Subject: [PATCH 4/7] jbd2: Drop unnecessary branch from jbd2_journal_forget()
|
||||
|
||||
We have cleared both dirty & jbddirty bits from the bh. So there's no
|
||||
difference between bforget() and brelse(). Thus there's no point jumping
|
||||
to no_jbd branch.
|
||||
|
||||
Signed-off-by: Jan Kara <jack@suse.cz>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
fs/jbd2/transaction.c | 4 ----
|
||||
1 file changed, 4 deletions(-)
|
||||
|
||||
--- a/fs/jbd2/transaction.c
|
||||
+++ b/fs/jbd2/transaction.c
|
||||
@@ -1603,10 +1603,6 @@ int jbd2_journal_forget (handle_t *handl
|
||||
} else {
|
||||
__jbd2_journal_unfile_buffer(jh);
|
||||
jbd2_journal_put_journal_head(jh);
|
||||
- if (!buffer_jbd(bh)) {
|
||||
- spin_unlock(&journal->j_list_lock);
|
||||
- goto not_jbd;
|
||||
- }
|
||||
}
|
||||
spin_unlock(&journal->j_list_lock);
|
||||
} else if (jh->b_transaction) {
|
@ -0,0 +1,58 @@
|
||||
From: Jan Kara <jack@suse.cz>
|
||||
Date: Fri, 9 Aug 2019 14:42:31 +0200
|
||||
Subject: [PATCH 5/7] jbd2: Don't call __bforget() unnecessarily
|
||||
|
||||
jbd2_journal_forget() jumps to 'not_jbd' branch which calls __bforget()
|
||||
in cases where the buffer is clean which is pointless. In case of failed
|
||||
assertion, it can be even argued that it is safer not to touch buffer's
|
||||
dirty bits. Also logically it makes more sense to just jump to 'drop'
|
||||
and that will make logic also simpler when we switch bh_state_lock to a
|
||||
spinlock.
|
||||
|
||||
Signed-off-by: Jan Kara <jack@suse.cz>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
fs/jbd2/transaction.c | 9 ++++-----
|
||||
1 file changed, 4 insertions(+), 5 deletions(-)
|
||||
|
||||
--- a/fs/jbd2/transaction.c
|
||||
+++ b/fs/jbd2/transaction.c
|
||||
@@ -1554,7 +1554,7 @@ int jbd2_journal_forget (handle_t *handl
|
||||
if (!J_EXPECT_JH(jh, !jh->b_committed_data,
|
||||
"inconsistent data on disk")) {
|
||||
err = -EIO;
|
||||
- goto not_jbd;
|
||||
+ goto drop;
|
||||
}
|
||||
|
||||
/* keep track of whether or not this transaction modified us */
|
||||
@@ -1644,7 +1644,7 @@ int jbd2_journal_forget (handle_t *handl
|
||||
if (!jh->b_cp_transaction) {
|
||||
JBUFFER_TRACE(jh, "belongs to none transaction");
|
||||
spin_unlock(&journal->j_list_lock);
|
||||
- goto not_jbd;
|
||||
+ goto drop;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1654,7 +1654,7 @@ int jbd2_journal_forget (handle_t *handl
|
||||
if (!buffer_dirty(bh)) {
|
||||
__jbd2_journal_remove_checkpoint(jh);
|
||||
spin_unlock(&journal->j_list_lock);
|
||||
- goto not_jbd;
|
||||
+ goto drop;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1667,10 +1667,9 @@ int jbd2_journal_forget (handle_t *handl
|
||||
__jbd2_journal_file_buffer(jh, transaction, BJ_Forget);
|
||||
spin_unlock(&journal->j_list_lock);
|
||||
}
|
||||
-
|
||||
+drop:
|
||||
jbd_unlock_bh_state(bh);
|
||||
__brelse(bh);
|
||||
-drop:
|
||||
if (drop_reserve) {
|
||||
/* no need to reserve log space for this block -bzzz */
|
||||
handle->h_buffer_credits++;
|
@ -0,0 +1,675 @@
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Fri, 9 Aug 2019 14:42:32 +0200
|
||||
Subject: [PATCH 6/7] jbd2: Make state lock a spinlock
|
||||
|
||||
Bit-spinlocks are problematic on PREEMPT_RT if functions which might sleep
|
||||
on RT, e.g. spin_lock(), alloc/free(), are invoked inside the lock held
|
||||
region because bit spinlocks disable preemption even on RT.
|
||||
|
||||
A first attempt was to replace state lock with a spinlock placed in struct
|
||||
buffer_head and make the locking conditional on PREEMPT_RT and
|
||||
DEBUG_BIT_SPINLOCKS.
|
||||
|
||||
Jan pointed out that there is a 4 byte hole in struct journal_head where a
|
||||
regular spinlock fits in and he would not object to convert the state lock
|
||||
to a spinlock unconditionally.
|
||||
|
||||
Aside of solving the RT problem, this also gains lockdep coverage for the
|
||||
journal head state lock (bit-spinlocks are not covered by lockdep as it's
|
||||
hard to fit a lockdep map into a single bit).
|
||||
|
||||
The trivial change would have been to convert the jbd_*lock_bh_state()
|
||||
inlines, but that comes with the downside that these functions take a
|
||||
buffer head pointer which needs to be converted to a journal head pointer
|
||||
which adds another level of indirection.
|
||||
|
||||
As almost all functions which use this lock have a journal head pointer
|
||||
readily available, it makes more sense to remove the lock helper inlines
|
||||
and write out spin_*lock() at all call sites.
|
||||
|
||||
Fixup all locking comments as well.
|
||||
|
||||
Suggested-by: Jan Kara <jack@suse.com>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Signed-off-by: Jan Kara <jack@suse.cz>
|
||||
Cc: "Theodore Ts'o" <tytso@mit.edu>
|
||||
Cc: Mark Fasheh <mark@fasheh.com>
|
||||
Cc: Joseph Qi <joseph.qi@linux.alibaba.com>
|
||||
Cc: Joel Becker <jlbec@evilplan.org>
|
||||
Cc: Jan Kara <jack@suse.com>
|
||||
Cc: linux-ext4@vger.kernel.org
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
fs/jbd2/commit.c | 8 +--
|
||||
fs/jbd2/journal.c | 10 ++--
|
||||
fs/jbd2/transaction.c | 100 ++++++++++++++++++++-----------------------
|
||||
fs/ocfs2/suballoc.c | 19 ++++----
|
||||
include/linux/jbd2.h | 20 --------
|
||||
include/linux/journal-head.h | 21 ++++++---
|
||||
6 files changed, 84 insertions(+), 94 deletions(-)
|
||||
|
||||
--- a/fs/jbd2/commit.c
|
||||
+++ b/fs/jbd2/commit.c
|
||||
@@ -482,10 +482,10 @@ void jbd2_journal_commit_transaction(jou
|
||||
if (jh->b_committed_data) {
|
||||
struct buffer_head *bh = jh2bh(jh);
|
||||
|
||||
- jbd_lock_bh_state(bh);
|
||||
+ spin_lock(&jh->b_state_lock);
|
||||
jbd2_free(jh->b_committed_data, bh->b_size);
|
||||
jh->b_committed_data = NULL;
|
||||
- jbd_unlock_bh_state(bh);
|
||||
+ spin_unlock(&jh->b_state_lock);
|
||||
}
|
||||
jbd2_journal_refile_buffer(journal, jh);
|
||||
}
|
||||
@@ -930,7 +930,7 @@ void jbd2_journal_commit_transaction(jou
|
||||
* done with it.
|
||||
*/
|
||||
get_bh(bh);
|
||||
- jbd_lock_bh_state(bh);
|
||||
+ spin_lock(&jh->b_state_lock);
|
||||
J_ASSERT_JH(jh, jh->b_transaction == commit_transaction);
|
||||
|
||||
/*
|
||||
@@ -1030,7 +1030,7 @@ void jbd2_journal_commit_transaction(jou
|
||||
}
|
||||
JBUFFER_TRACE(jh, "refile or unfile buffer");
|
||||
drop_ref = __jbd2_journal_refile_buffer(jh);
|
||||
- jbd_unlock_bh_state(bh);
|
||||
+ spin_unlock(&jh->b_state_lock);
|
||||
if (drop_ref)
|
||||
jbd2_journal_put_journal_head(jh);
|
||||
if (try_to_free)
|
||||
--- a/fs/jbd2/journal.c
|
||||
+++ b/fs/jbd2/journal.c
|
||||
@@ -363,7 +363,7 @@ int jbd2_journal_write_metadata_buffer(t
|
||||
/* keep subsequent assertions sane */
|
||||
atomic_set(&new_bh->b_count, 1);
|
||||
|
||||
- jbd_lock_bh_state(bh_in);
|
||||
+ spin_lock(&jh_in->b_state_lock);
|
||||
repeat:
|
||||
/*
|
||||
* If a new transaction has already done a buffer copy-out, then
|
||||
@@ -405,13 +405,13 @@ int jbd2_journal_write_metadata_buffer(t
|
||||
if (need_copy_out && !done_copy_out) {
|
||||
char *tmp;
|
||||
|
||||
- jbd_unlock_bh_state(bh_in);
|
||||
+ spin_unlock(&jh_in->b_state_lock);
|
||||
tmp = jbd2_alloc(bh_in->b_size, GFP_NOFS);
|
||||
if (!tmp) {
|
||||
brelse(new_bh);
|
||||
return -ENOMEM;
|
||||
}
|
||||
- jbd_lock_bh_state(bh_in);
|
||||
+ spin_lock(&jh_in->b_state_lock);
|
||||
if (jh_in->b_frozen_data) {
|
||||
jbd2_free(tmp, bh_in->b_size);
|
||||
goto repeat;
|
||||
@@ -464,7 +464,7 @@ int jbd2_journal_write_metadata_buffer(t
|
||||
__jbd2_journal_file_buffer(jh_in, transaction, BJ_Shadow);
|
||||
spin_unlock(&journal->j_list_lock);
|
||||
set_buffer_shadow(bh_in);
|
||||
- jbd_unlock_bh_state(bh_in);
|
||||
+ spin_unlock(&jh_in->b_state_lock);
|
||||
|
||||
return do_escape | (done_copy_out << 1);
|
||||
}
|
||||
@@ -2407,6 +2407,8 @@ static struct journal_head *journal_allo
|
||||
ret = kmem_cache_zalloc(jbd2_journal_head_cache,
|
||||
GFP_NOFS | __GFP_NOFAIL);
|
||||
}
|
||||
+ if (ret)
|
||||
+ spin_lock_init(&ret->b_state_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
--- a/fs/jbd2/transaction.c
|
||||
+++ b/fs/jbd2/transaction.c
|
||||
@@ -877,7 +877,7 @@ do_get_write_access(handle_t *handle, st
|
||||
|
||||
start_lock = jiffies;
|
||||
lock_buffer(bh);
|
||||
- jbd_lock_bh_state(bh);
|
||||
+ spin_lock(&jh->b_state_lock);
|
||||
|
||||
/* If it takes too long to lock the buffer, trace it */
|
||||
time_lock = jbd2_time_diff(start_lock, jiffies);
|
||||
@@ -927,7 +927,7 @@ do_get_write_access(handle_t *handle, st
|
||||
|
||||
error = -EROFS;
|
||||
if (is_handle_aborted(handle)) {
|
||||
- jbd_unlock_bh_state(bh);
|
||||
+ spin_unlock(&jh->b_state_lock);
|
||||
goto out;
|
||||
}
|
||||
error = 0;
|
||||
@@ -991,7 +991,7 @@ do_get_write_access(handle_t *handle, st
|
||||
*/
|
||||
if (buffer_shadow(bh)) {
|
||||
JBUFFER_TRACE(jh, "on shadow: sleep");
|
||||
- jbd_unlock_bh_state(bh);
|
||||
+ spin_unlock(&jh->b_state_lock);
|
||||
wait_on_bit_io(&bh->b_state, BH_Shadow, TASK_UNINTERRUPTIBLE);
|
||||
goto repeat;
|
||||
}
|
||||
@@ -1012,7 +1012,7 @@ do_get_write_access(handle_t *handle, st
|
||||
JBUFFER_TRACE(jh, "generate frozen data");
|
||||
if (!frozen_buffer) {
|
||||
JBUFFER_TRACE(jh, "allocate memory for buffer");
|
||||
- jbd_unlock_bh_state(bh);
|
||||
+ spin_unlock(&jh->b_state_lock);
|
||||
frozen_buffer = jbd2_alloc(jh2bh(jh)->b_size,
|
||||
GFP_NOFS | __GFP_NOFAIL);
|
||||
goto repeat;
|
||||
@@ -1031,7 +1031,7 @@ do_get_write_access(handle_t *handle, st
|
||||
jh->b_next_transaction = transaction;
|
||||
|
||||
done:
|
||||
- jbd_unlock_bh_state(bh);
|
||||
+ spin_unlock(&jh->b_state_lock);
|
||||
|
||||
/*
|
||||
* If we are about to journal a buffer, then any revoke pending on it is
|
||||
@@ -1173,7 +1173,7 @@ int jbd2_journal_get_create_access(handl
|
||||
* that case: the transaction must have deleted the buffer for it to be
|
||||
* reused here.
|
||||
*/
|
||||
- jbd_lock_bh_state(bh);
|
||||
+ spin_lock(&jh->b_state_lock);
|
||||
J_ASSERT_JH(jh, (jh->b_transaction == transaction ||
|
||||
jh->b_transaction == NULL ||
|
||||
(jh->b_transaction == journal->j_committing_transaction &&
|
||||
@@ -1208,7 +1208,7 @@ int jbd2_journal_get_create_access(handl
|
||||
jh->b_next_transaction = transaction;
|
||||
spin_unlock(&journal->j_list_lock);
|
||||
}
|
||||
- jbd_unlock_bh_state(bh);
|
||||
+ spin_unlock(&jh->b_state_lock);
|
||||
|
||||
/*
|
||||
* akpm: I added this. ext3_alloc_branch can pick up new indirect
|
||||
@@ -1279,13 +1279,13 @@ int jbd2_journal_get_undo_access(handle_
|
||||
committed_data = jbd2_alloc(jh2bh(jh)->b_size,
|
||||
GFP_NOFS|__GFP_NOFAIL);
|
||||
|
||||
- jbd_lock_bh_state(bh);
|
||||
+ spin_lock(&jh->b_state_lock);
|
||||
if (!jh->b_committed_data) {
|
||||
/* Copy out the current buffer contents into the
|
||||
* preserved, committed copy. */
|
||||
JBUFFER_TRACE(jh, "generate b_committed data");
|
||||
if (!committed_data) {
|
||||
- jbd_unlock_bh_state(bh);
|
||||
+ spin_unlock(&jh->b_state_lock);
|
||||
goto repeat;
|
||||
}
|
||||
|
||||
@@ -1293,7 +1293,7 @@ int jbd2_journal_get_undo_access(handle_
|
||||
committed_data = NULL;
|
||||
memcpy(jh->b_committed_data, bh->b_data, bh->b_size);
|
||||
}
|
||||
- jbd_unlock_bh_state(bh);
|
||||
+ spin_unlock(&jh->b_state_lock);
|
||||
out:
|
||||
jbd2_journal_put_journal_head(jh);
|
||||
if (unlikely(committed_data))
|
||||
@@ -1394,16 +1394,16 @@ int jbd2_journal_dirty_metadata(handle_t
|
||||
*/
|
||||
if (jh->b_transaction != transaction &&
|
||||
jh->b_next_transaction != transaction) {
|
||||
- jbd_lock_bh_state(bh);
|
||||
+ spin_lock(&jh->b_state_lock);
|
||||
J_ASSERT_JH(jh, jh->b_transaction == transaction ||
|
||||
jh->b_next_transaction == transaction);
|
||||
- jbd_unlock_bh_state(bh);
|
||||
+ spin_unlock(&jh->b_state_lock);
|
||||
}
|
||||
if (jh->b_modified == 1) {
|
||||
/* If it's in our transaction it must be in BJ_Metadata list. */
|
||||
if (jh->b_transaction == transaction &&
|
||||
jh->b_jlist != BJ_Metadata) {
|
||||
- jbd_lock_bh_state(bh);
|
||||
+ spin_lock(&jh->b_state_lock);
|
||||
if (jh->b_transaction == transaction &&
|
||||
jh->b_jlist != BJ_Metadata)
|
||||
pr_err("JBD2: assertion failure: h_type=%u "
|
||||
@@ -1413,13 +1413,13 @@ int jbd2_journal_dirty_metadata(handle_t
|
||||
jh->b_jlist);
|
||||
J_ASSERT_JH(jh, jh->b_transaction != transaction ||
|
||||
jh->b_jlist == BJ_Metadata);
|
||||
- jbd_unlock_bh_state(bh);
|
||||
+ spin_unlock(&jh->b_state_lock);
|
||||
}
|
||||
goto out;
|
||||
}
|
||||
|
||||
journal = transaction->t_journal;
|
||||
- jbd_lock_bh_state(bh);
|
||||
+ spin_lock(&jh->b_state_lock);
|
||||
|
||||
if (jh->b_modified == 0) {
|
||||
/*
|
||||
@@ -1505,7 +1505,7 @@ int jbd2_journal_dirty_metadata(handle_t
|
||||
__jbd2_journal_file_buffer(jh, transaction, BJ_Metadata);
|
||||
spin_unlock(&journal->j_list_lock);
|
||||
out_unlock_bh:
|
||||
- jbd_unlock_bh_state(bh);
|
||||
+ spin_unlock(&jh->b_state_lock);
|
||||
out:
|
||||
JBUFFER_TRACE(jh, "exit");
|
||||
return ret;
|
||||
@@ -1543,11 +1543,13 @@ int jbd2_journal_forget (handle_t *handl
|
||||
|
||||
BUFFER_TRACE(bh, "entry");
|
||||
|
||||
- jbd_lock_bh_state(bh);
|
||||
+ jh = jbd2_journal_grab_journal_head(bh);
|
||||
+ if (!jh) {
|
||||
+ __bforget(bh);
|
||||
+ return 0;
|
||||
+ }
|
||||
|
||||
- if (!buffer_jbd(bh))
|
||||
- goto not_jbd;
|
||||
- jh = bh2jh(bh);
|
||||
+ spin_lock(&jh->b_state_lock);
|
||||
|
||||
/* Critical error: attempting to delete a bitmap buffer, maybe?
|
||||
* Don't do any jbd operations, and return an error. */
|
||||
@@ -1668,18 +1670,14 @@ int jbd2_journal_forget (handle_t *handl
|
||||
spin_unlock(&journal->j_list_lock);
|
||||
}
|
||||
drop:
|
||||
- jbd_unlock_bh_state(bh);
|
||||
__brelse(bh);
|
||||
+ spin_unlock(&jh->b_state_lock);
|
||||
+ jbd2_journal_put_journal_head(jh);
|
||||
if (drop_reserve) {
|
||||
/* no need to reserve log space for this block -bzzz */
|
||||
handle->h_buffer_credits++;
|
||||
}
|
||||
return err;
|
||||
-
|
||||
-not_jbd:
|
||||
- jbd_unlock_bh_state(bh);
|
||||
- __bforget(bh);
|
||||
- goto drop;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -1878,7 +1876,7 @@ int jbd2_journal_stop(handle_t *handle)
|
||||
*
|
||||
* j_list_lock is held.
|
||||
*
|
||||
- * jbd_lock_bh_state(jh2bh(jh)) is held.
|
||||
+ * jh->b_state_lock is held.
|
||||
*/
|
||||
|
||||
static inline void
|
||||
@@ -1902,7 +1900,7 @@ static inline void
|
||||
*
|
||||
* Called with j_list_lock held, and the journal may not be locked.
|
||||
*
|
||||
- * jbd_lock_bh_state(jh2bh(jh)) is held.
|
||||
+ * jh->b_state_lock is held.
|
||||
*/
|
||||
|
||||
static inline void
|
||||
@@ -1934,7 +1932,7 @@ static void __jbd2_journal_temp_unlink_b
|
||||
transaction_t *transaction;
|
||||
struct buffer_head *bh = jh2bh(jh);
|
||||
|
||||
- J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh));
|
||||
+ lockdep_assert_held(&jh->b_state_lock);
|
||||
transaction = jh->b_transaction;
|
||||
if (transaction)
|
||||
assert_spin_locked(&transaction->t_journal->j_list_lock);
|
||||
@@ -1988,11 +1986,11 @@ void jbd2_journal_unfile_buffer(journal_
|
||||
|
||||
/* Get reference so that buffer cannot be freed before we unlock it */
|
||||
get_bh(bh);
|
||||
- jbd_lock_bh_state(bh);
|
||||
+ spin_lock(&jh->b_state_lock);
|
||||
spin_lock(&journal->j_list_lock);
|
||||
__jbd2_journal_unfile_buffer(jh);
|
||||
spin_unlock(&journal->j_list_lock);
|
||||
- jbd_unlock_bh_state(bh);
|
||||
+ spin_unlock(&jh->b_state_lock);
|
||||
jbd2_journal_put_journal_head(jh);
|
||||
__brelse(bh);
|
||||
}
|
||||
@@ -2000,7 +1998,7 @@ void jbd2_journal_unfile_buffer(journal_
|
||||
/*
|
||||
* Called from jbd2_journal_try_to_free_buffers().
|
||||
*
|
||||
- * Called under jbd_lock_bh_state(bh)
|
||||
+ * Called under jh->b_state_lock
|
||||
*/
|
||||
static void
|
||||
__journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh)
|
||||
@@ -2087,10 +2085,10 @@ int jbd2_journal_try_to_free_buffers(jou
|
||||
if (!jh)
|
||||
continue;
|
||||
|
||||
- jbd_lock_bh_state(bh);
|
||||
+ spin_lock(&jh->b_state_lock);
|
||||
__journal_try_to_free_buffer(journal, bh);
|
||||
+ spin_unlock(&jh->b_state_lock);
|
||||
jbd2_journal_put_journal_head(jh);
|
||||
- jbd_unlock_bh_state(bh);
|
||||
if (buffer_jbd(bh))
|
||||
goto busy;
|
||||
} while ((bh = bh->b_this_page) != head);
|
||||
@@ -2111,7 +2109,7 @@ int jbd2_journal_try_to_free_buffers(jou
|
||||
*
|
||||
* Called under j_list_lock.
|
||||
*
|
||||
- * Called under jbd_lock_bh_state(bh).
|
||||
+ * Called under jh->b_state_lock.
|
||||
*/
|
||||
static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
|
||||
{
|
||||
@@ -2205,7 +2203,7 @@ static int journal_unmap_buffer(journal_
|
||||
|
||||
/* OK, we have data buffer in journaled mode */
|
||||
write_lock(&journal->j_state_lock);
|
||||
- jbd_lock_bh_state(bh);
|
||||
+ spin_lock(&jh->b_state_lock);
|
||||
spin_lock(&journal->j_list_lock);
|
||||
|
||||
/*
|
||||
@@ -2286,10 +2284,10 @@ static int journal_unmap_buffer(journal_
|
||||
* for commit and try again.
|
||||
*/
|
||||
if (partial_page) {
|
||||
- jbd2_journal_put_journal_head(jh);
|
||||
spin_unlock(&journal->j_list_lock);
|
||||
- jbd_unlock_bh_state(bh);
|
||||
+ spin_unlock(&jh->b_state_lock);
|
||||
write_unlock(&journal->j_state_lock);
|
||||
+ jbd2_journal_put_journal_head(jh);
|
||||
return -EBUSY;
|
||||
}
|
||||
/*
|
||||
@@ -2303,10 +2301,10 @@ static int journal_unmap_buffer(journal_
|
||||
if (journal->j_running_transaction && buffer_jbddirty(bh))
|
||||
jh->b_next_transaction = journal->j_running_transaction;
|
||||
jh->b_modified = 0;
|
||||
- jbd2_journal_put_journal_head(jh);
|
||||
spin_unlock(&journal->j_list_lock);
|
||||
- jbd_unlock_bh_state(bh);
|
||||
+ spin_unlock(&jh->b_state_lock);
|
||||
write_unlock(&journal->j_state_lock);
|
||||
+ jbd2_journal_put_journal_head(jh);
|
||||
return 0;
|
||||
} else {
|
||||
/* Good, the buffer belongs to the running transaction.
|
||||
@@ -2330,10 +2328,10 @@ static int journal_unmap_buffer(journal_
|
||||
* here.
|
||||
*/
|
||||
jh->b_modified = 0;
|
||||
- jbd2_journal_put_journal_head(jh);
|
||||
spin_unlock(&journal->j_list_lock);
|
||||
- jbd_unlock_bh_state(bh);
|
||||
+ spin_unlock(&jh->b_state_lock);
|
||||
write_unlock(&journal->j_state_lock);
|
||||
+ jbd2_journal_put_journal_head(jh);
|
||||
zap_buffer_unlocked:
|
||||
clear_buffer_dirty(bh);
|
||||
J_ASSERT_BH(bh, !buffer_jbddirty(bh));
|
||||
@@ -2420,7 +2418,7 @@ void __jbd2_journal_file_buffer(struct j
|
||||
int was_dirty = 0;
|
||||
struct buffer_head *bh = jh2bh(jh);
|
||||
|
||||
- J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh));
|
||||
+ lockdep_assert_held(&jh->b_state_lock);
|
||||
assert_spin_locked(&transaction->t_journal->j_list_lock);
|
||||
|
||||
J_ASSERT_JH(jh, jh->b_jlist < BJ_Types);
|
||||
@@ -2482,11 +2480,11 @@ void __jbd2_journal_file_buffer(struct j
|
||||
void jbd2_journal_file_buffer(struct journal_head *jh,
|
||||
transaction_t *transaction, int jlist)
|
||||
{
|
||||
- jbd_lock_bh_state(jh2bh(jh));
|
||||
+ spin_lock(&jh->b_state_lock);
|
||||
spin_lock(&transaction->t_journal->j_list_lock);
|
||||
__jbd2_journal_file_buffer(jh, transaction, jlist);
|
||||
spin_unlock(&transaction->t_journal->j_list_lock);
|
||||
- jbd_unlock_bh_state(jh2bh(jh));
|
||||
+ spin_unlock(&jh->b_state_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -2496,7 +2494,7 @@ void jbd2_journal_file_buffer(struct jou
|
||||
* buffer on that transaction's metadata list.
|
||||
*
|
||||
* Called under j_list_lock
|
||||
- * Called under jbd_lock_bh_state(jh2bh(jh))
|
||||
+ * Called under jh->b_state_lock
|
||||
*
|
||||
* When this function returns true, there's no next transaction to refile to
|
||||
* and the caller has to drop jh reference through
|
||||
@@ -2507,7 +2505,7 @@ bool __jbd2_journal_refile_buffer(struct
|
||||
int was_dirty, jlist;
|
||||
struct buffer_head *bh = jh2bh(jh);
|
||||
|
||||
- J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh));
|
||||
+ lockdep_assert_held(&jh->b_state_lock);
|
||||
if (jh->b_transaction)
|
||||
assert_spin_locked(&jh->b_transaction->t_journal->j_list_lock);
|
||||
|
||||
@@ -2553,17 +2551,13 @@ bool __jbd2_journal_refile_buffer(struct
|
||||
*/
|
||||
void jbd2_journal_refile_buffer(journal_t *journal, struct journal_head *jh)
|
||||
{
|
||||
- struct buffer_head *bh = jh2bh(jh);
|
||||
bool drop;
|
||||
|
||||
- /* Get reference so that buffer cannot be freed before we unlock it */
|
||||
- get_bh(bh);
|
||||
- jbd_lock_bh_state(bh);
|
||||
+ spin_lock(&jh->b_state_lock);
|
||||
spin_lock(&journal->j_list_lock);
|
||||
drop = __jbd2_journal_refile_buffer(jh);
|
||||
- jbd_unlock_bh_state(bh);
|
||||
+ spin_unlock(&jh->b_state_lock);
|
||||
spin_unlock(&journal->j_list_lock);
|
||||
- __brelse(bh);
|
||||
if (drop)
|
||||
jbd2_journal_put_journal_head(jh);
|
||||
}
|
||||
--- a/fs/ocfs2/suballoc.c
|
||||
+++ b/fs/ocfs2/suballoc.c
|
||||
@@ -1252,6 +1252,7 @@ static int ocfs2_test_bg_bit_allocatable
|
||||
int nr)
|
||||
{
|
||||
struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
|
||||
+ struct journal_head *jh;
|
||||
int ret;
|
||||
|
||||
if (ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap))
|
||||
@@ -1260,13 +1261,14 @@ static int ocfs2_test_bg_bit_allocatable
|
||||
if (!buffer_jbd(bg_bh))
|
||||
return 1;
|
||||
|
||||
- jbd_lock_bh_state(bg_bh);
|
||||
- bg = (struct ocfs2_group_desc *) bh2jh(bg_bh)->b_committed_data;
|
||||
+ jh = bh2jh(bg_bh);
|
||||
+ spin_lock(&jh->b_state_lock);
|
||||
+ bg = (struct ocfs2_group_desc *) jh->b_committed_data;
|
||||
if (bg)
|
||||
ret = !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap);
|
||||
else
|
||||
ret = 1;
|
||||
- jbd_unlock_bh_state(bg_bh);
|
||||
+ spin_unlock(&jh->b_state_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -2387,6 +2389,7 @@ static int ocfs2_block_group_clear_bits(
|
||||
int status;
|
||||
unsigned int tmp;
|
||||
struct ocfs2_group_desc *undo_bg = NULL;
|
||||
+ struct journal_head *jh;
|
||||
|
||||
/* The caller got this descriptor from
|
||||
* ocfs2_read_group_descriptor(). Any corruption is a code bug. */
|
||||
@@ -2405,10 +2408,10 @@ static int ocfs2_block_group_clear_bits(
|
||||
goto bail;
|
||||
}
|
||||
|
||||
+ jh = bh2jh(group_bh);
|
||||
if (undo_fn) {
|
||||
- jbd_lock_bh_state(group_bh);
|
||||
- undo_bg = (struct ocfs2_group_desc *)
|
||||
- bh2jh(group_bh)->b_committed_data;
|
||||
+ spin_lock(&jh->b_state_lock);
|
||||
+ undo_bg = (struct ocfs2_group_desc *) jh->b_committed_data;
|
||||
BUG_ON(!undo_bg);
|
||||
}
|
||||
|
||||
@@ -2423,7 +2426,7 @@ static int ocfs2_block_group_clear_bits(
|
||||
le16_add_cpu(&bg->bg_free_bits_count, num_bits);
|
||||
if (le16_to_cpu(bg->bg_free_bits_count) > le16_to_cpu(bg->bg_bits)) {
|
||||
if (undo_fn)
|
||||
- jbd_unlock_bh_state(group_bh);
|
||||
+ spin_unlock(&jh->b_state_lock);
|
||||
return ocfs2_error(alloc_inode->i_sb, "Group descriptor # %llu has bit count %u but claims %u are freed. num_bits %d\n",
|
||||
(unsigned long long)le64_to_cpu(bg->bg_blkno),
|
||||
le16_to_cpu(bg->bg_bits),
|
||||
@@ -2432,7 +2435,7 @@ static int ocfs2_block_group_clear_bits(
|
||||
}
|
||||
|
||||
if (undo_fn)
|
||||
- jbd_unlock_bh_state(group_bh);
|
||||
+ spin_unlock(&jh->b_state_lock);
|
||||
|
||||
ocfs2_journal_dirty(handle, group_bh);
|
||||
bail:
|
||||
--- a/include/linux/jbd2.h
|
||||
+++ b/include/linux/jbd2.h
|
||||
@@ -313,7 +313,6 @@ enum jbd_state_bits {
|
||||
BH_Revoked, /* Has been revoked from the log */
|
||||
BH_RevokeValid, /* Revoked flag is valid */
|
||||
BH_JBDDirty, /* Is dirty but journaled */
|
||||
- BH_State, /* Pins most journal_head state */
|
||||
BH_JournalHead, /* Pins bh->b_private and jh->b_bh */
|
||||
BH_Shadow, /* IO on shadow buffer is running */
|
||||
BH_Verified, /* Metadata block has been verified ok */
|
||||
@@ -342,21 +341,6 @@ static inline struct journal_head *bh2jh
|
||||
return bh->b_private;
|
||||
}
|
||||
|
||||
-static inline void jbd_lock_bh_state(struct buffer_head *bh)
|
||||
-{
|
||||
- bit_spin_lock(BH_State, &bh->b_state);
|
||||
-}
|
||||
-
|
||||
-static inline int jbd_is_locked_bh_state(struct buffer_head *bh)
|
||||
-{
|
||||
- return bit_spin_is_locked(BH_State, &bh->b_state);
|
||||
-}
|
||||
-
|
||||
-static inline void jbd_unlock_bh_state(struct buffer_head *bh)
|
||||
-{
|
||||
- bit_spin_unlock(BH_State, &bh->b_state);
|
||||
-}
|
||||
-
|
||||
static inline void jbd_lock_bh_journal_head(struct buffer_head *bh)
|
||||
{
|
||||
bit_spin_lock(BH_JournalHead, &bh->b_state);
|
||||
@@ -551,9 +535,9 @@ struct transaction_chp_stats_s {
|
||||
* ->jbd_lock_bh_journal_head() (This is "innermost")
|
||||
*
|
||||
* j_state_lock
|
||||
- * ->jbd_lock_bh_state()
|
||||
+ * ->b_state_lock
|
||||
*
|
||||
- * jbd_lock_bh_state()
|
||||
+ * b_state_lock
|
||||
* ->j_list_lock
|
||||
*
|
||||
* j_state_lock
|
||||
--- a/include/linux/journal-head.h
|
||||
+++ b/include/linux/journal-head.h
|
||||
@@ -11,6 +11,8 @@
|
||||
#ifndef JOURNAL_HEAD_H_INCLUDED
|
||||
#define JOURNAL_HEAD_H_INCLUDED
|
||||
|
||||
+#include <linux/spinlock.h>
|
||||
+
|
||||
typedef unsigned int tid_t; /* Unique transaction ID */
|
||||
typedef struct transaction_s transaction_t; /* Compound transaction type */
|
||||
|
||||
@@ -24,13 +26,18 @@ struct journal_head {
|
||||
struct buffer_head *b_bh;
|
||||
|
||||
/*
|
||||
+ * Protect the buffer head state
|
||||
+ */
|
||||
+ spinlock_t b_state_lock;
|
||||
+
|
||||
+ /*
|
||||
* Reference count - see description in journal.c
|
||||
* [jbd_lock_bh_journal_head()]
|
||||
*/
|
||||
int b_jcount;
|
||||
|
||||
/*
|
||||
- * Journalling list for this buffer [jbd_lock_bh_state()]
|
||||
+ * Journalling list for this buffer [b_state_lock]
|
||||
* NOTE: We *cannot* combine this with b_modified into a bitfield
|
||||
* as gcc would then (which the C standard allows but which is
|
||||
* very unuseful) make 64-bit accesses to the bitfield and clobber
|
||||
@@ -41,20 +48,20 @@ struct journal_head {
|
||||
/*
|
||||
* This flag signals the buffer has been modified by
|
||||
* the currently running transaction
|
||||
- * [jbd_lock_bh_state()]
|
||||
+ * [b_state_lock]
|
||||
*/
|
||||
unsigned b_modified;
|
||||
|
||||
/*
|
||||
* Copy of the buffer data frozen for writing to the log.
|
||||
- * [jbd_lock_bh_state()]
|
||||
+ * [b_state_lock]
|
||||
*/
|
||||
char *b_frozen_data;
|
||||
|
||||
/*
|
||||
* Pointer to a saved copy of the buffer containing no uncommitted
|
||||
* deallocation references, so that allocations can avoid overwriting
|
||||
- * uncommitted deletes. [jbd_lock_bh_state()]
|
||||
+ * uncommitted deletes. [b_state_lock]
|
||||
*/
|
||||
char *b_committed_data;
|
||||
|
||||
@@ -63,7 +70,7 @@ struct journal_head {
|
||||
* metadata: either the running transaction or the committing
|
||||
* transaction (if there is one). Only applies to buffers on a
|
||||
* transaction's data or metadata journaling list.
|
||||
- * [j_list_lock] [jbd_lock_bh_state()]
|
||||
+ * [j_list_lock] [b_state_lock]
|
||||
* Either of these locks is enough for reading, both are needed for
|
||||
* changes.
|
||||
*/
|
||||
@@ -73,13 +80,13 @@ struct journal_head {
|
||||
* Pointer to the running compound transaction which is currently
|
||||
* modifying the buffer's metadata, if there was already a transaction
|
||||
* committing it when the new transaction touched it.
|
||||
- * [t_list_lock] [jbd_lock_bh_state()]
|
||||
+ * [t_list_lock] [b_state_lock]
|
||||
*/
|
||||
transaction_t *b_next_transaction;
|
||||
|
||||
/*
|
||||
* Doubly-linked list of buffers on a transaction's data, metadata or
|
||||
- * forget queue. [t_list_lock] [jbd_lock_bh_state()]
|
||||
+ * forget queue. [t_list_lock] [b_state_lock]
|
||||
*/
|
||||
struct journal_head *b_tnext, *b_tprev;
|
||||
|
@ -0,0 +1,88 @@
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Fri, 9 Aug 2019 14:42:33 +0200
|
||||
Subject: [PATCH 7/7] jbd2: Free journal head outside of locked region
|
||||
|
||||
On PREEMPT_RT bit-spinlocks have the same semantics as on PREEMPT_RT=n,
|
||||
i.e. they disable preemption. That means functions which are not safe to be
|
||||
called in preempt disabled context on RT trigger a might_sleep() assert.
|
||||
|
||||
The journal head bit spinlock is mostly held for short code sequences with
|
||||
trivial RT safe functionality, except for one place:
|
||||
|
||||
jbd2_journal_put_journal_head() invokes __journal_remove_journal_head()
|
||||
with the journal head bit spinlock held. __journal_remove_journal_head()
|
||||
invokes kmem_cache_free() which must not be called with preemption disabled
|
||||
on RT.
|
||||
|
||||
Jan suggested to rework the removal function so the actual free happens
|
||||
outside the bit-spinlocked region.
|
||||
|
||||
Split it into two parts:
|
||||
|
||||
- Do the sanity checks and the buffer head detach under the lock
|
||||
|
||||
- Do the actual free after dropping the lock
|
||||
|
||||
There is error case handling in the free part which needs to dereference
|
||||
the b_size field of the now detached buffer head. Due to paranoia (caused
|
||||
by ignorance) the size is retrieved in the detach function and handed into
|
||||
the free function. Might be over-engineered, but better safe than sorry.
|
||||
|
||||
This makes the journal head bit-spinlock usage RT compliant and also avoids
|
||||
nested locking which is not covered by lockdep.
|
||||
|
||||
Suggested-by: Jan Kara <jack@suse.com>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: linux-ext4@vger.kernel.org
|
||||
Cc: "Theodore Ts'o" <tytso@mit.edu>
|
||||
Cc: Jan Kara <jack@suse.com>
|
||||
Signed-off-by: Jan Kara <jack@suse.cz>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
fs/jbd2/journal.c | 20 ++++++++++++++------
|
||||
1 file changed, 14 insertions(+), 6 deletions(-)
|
||||
|
||||
--- a/fs/jbd2/journal.c
|
||||
+++ b/fs/jbd2/journal.c
|
||||
@@ -2528,17 +2528,23 @@ static void __journal_remove_journal_hea
|
||||
J_ASSERT_BH(bh, buffer_jbd(bh));
|
||||
J_ASSERT_BH(bh, jh2bh(jh) == bh);
|
||||
BUFFER_TRACE(bh, "remove journal_head");
|
||||
+
|
||||
+ /* Unlink before dropping the lock */
|
||||
+ bh->b_private = NULL;
|
||||
+ jh->b_bh = NULL; /* debug, really */
|
||||
+ clear_buffer_jbd(bh);
|
||||
+}
|
||||
+
|
||||
+static void journal_release_journal_head(struct journal_head *jh, size_t b_size)
|
||||
+{
|
||||
if (jh->b_frozen_data) {
|
||||
printk(KERN_WARNING "%s: freeing b_frozen_data\n", __func__);
|
||||
- jbd2_free(jh->b_frozen_data, bh->b_size);
|
||||
+ jbd2_free(jh->b_frozen_data, b_size);
|
||||
}
|
||||
if (jh->b_committed_data) {
|
||||
printk(KERN_WARNING "%s: freeing b_committed_data\n", __func__);
|
||||
- jbd2_free(jh->b_committed_data, bh->b_size);
|
||||
+ jbd2_free(jh->b_committed_data, b_size);
|
||||
}
|
||||
- bh->b_private = NULL;
|
||||
- jh->b_bh = NULL; /* debug, really */
|
||||
- clear_buffer_jbd(bh);
|
||||
journal_free_journal_head(jh);
|
||||
}
|
||||
|
||||
@@ -2556,9 +2562,11 @@ void jbd2_journal_put_journal_head(struc
|
||||
if (!jh->b_jcount) {
|
||||
__journal_remove_journal_head(bh);
|
||||
jbd_unlock_bh_journal_head(bh);
|
||||
+ journal_release_journal_head(jh, bh->b_size);
|
||||
__brelse(bh);
|
||||
- } else
|
||||
+ } else {
|
||||
jbd_unlock_bh_journal_head(bh);
|
||||
+ }
|
||||
}
|
||||
|
||||
/*
|
@ -0,0 +1,86 @@
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Thu, 17 Oct 2019 12:19:02 +0200
|
||||
Subject: [PATCH] x86/ioapic: Rename misnamed functions
|
||||
|
||||
ioapic_irqd_[un]mask() are misnomers as both functions do way more than
|
||||
masking and unmasking the interrupt line. Both deal with the moving the
|
||||
affinity of the interrupt within interrupt context. The mask/unmask is just
|
||||
a tiny part of the functionality.
|
||||
|
||||
Rename them to ioapic_prepare/finish_move(), fixup the call sites and
|
||||
rename the related variables in the code to reflect what this is about.
|
||||
|
||||
No functional change.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: Andy Shevchenko <andy.shevchenko@gmail.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Sebastian Siewior <bigeasy@linutronix.de>
|
||||
Link: https://lkml.kernel.org/r/20191017101938.412489856@linutronix.de
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
arch/x86/kernel/apic/io_apic.c | 16 ++++++++--------
|
||||
1 file changed, 8 insertions(+), 8 deletions(-)
|
||||
|
||||
--- a/arch/x86/kernel/apic/io_apic.c
|
||||
+++ b/arch/x86/kernel/apic/io_apic.c
|
||||
@@ -1725,7 +1725,7 @@ static bool io_apic_level_ack_pending(st
|
||||
return false;
|
||||
}
|
||||
|
||||
-static inline bool ioapic_irqd_mask(struct irq_data *data)
|
||||
+static inline bool ioapic_prepare_move(struct irq_data *data)
|
||||
{
|
||||
/* If we are moving the IRQ we need to mask it */
|
||||
if (unlikely(irqd_is_setaffinity_pending(data))) {
|
||||
@@ -1736,9 +1736,9 @@ static inline bool ioapic_irqd_mask(stru
|
||||
return false;
|
||||
}
|
||||
|
||||
-static inline void ioapic_irqd_unmask(struct irq_data *data, bool masked)
|
||||
+static inline void ioapic_finish_move(struct irq_data *data, bool moveit)
|
||||
{
|
||||
- if (unlikely(masked)) {
|
||||
+ if (unlikely(moveit)) {
|
||||
/* Only migrate the irq if the ack has been received.
|
||||
*
|
||||
* On rare occasions the broadcast level triggered ack gets
|
||||
@@ -1773,11 +1773,11 @@ static inline void ioapic_irqd_unmask(st
|
||||
}
|
||||
}
|
||||
#else
|
||||
-static inline bool ioapic_irqd_mask(struct irq_data *data)
|
||||
+static inline bool ioapic_prepare_move(struct irq_data *data)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
-static inline void ioapic_irqd_unmask(struct irq_data *data, bool masked)
|
||||
+static inline void ioapic_finish_move(struct irq_data *data, bool moveit)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
@@ -1786,11 +1786,11 @@ static void ioapic_ack_level(struct irq_
|
||||
{
|
||||
struct irq_cfg *cfg = irqd_cfg(irq_data);
|
||||
unsigned long v;
|
||||
- bool masked;
|
||||
+ bool moveit;
|
||||
int i;
|
||||
|
||||
irq_complete_move(cfg);
|
||||
- masked = ioapic_irqd_mask(irq_data);
|
||||
+ moveit = ioapic_prepare_move(irq_data);
|
||||
|
||||
/*
|
||||
* It appears there is an erratum which affects at least version 0x11
|
||||
@@ -1845,7 +1845,7 @@ static void ioapic_ack_level(struct irq_
|
||||
eoi_ioapic_pin(cfg->vector, irq_data->chip_data);
|
||||
}
|
||||
|
||||
- ioapic_irqd_unmask(irq_data, masked);
|
||||
+ ioapic_finish_move(irq_data, moveit);
|
||||
}
|
||||
|
||||
static void ioapic_ir_ack_level(struct irq_data *irq_data)
|
@ -0,0 +1,100 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Wed, 4 Sep 2019 17:59:36 +0200
|
||||
Subject: [PATCH] percpu-refcount: use normal instead of RCU-sched"
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
This is a revert of commit
|
||||
a4244454df129 ("percpu-refcount: use RCU-sched insted of normal RCU")
|
||||
|
||||
which claims the only reason for using RCU-sched is
|
||||
"rcu_read_[un]lock() … are slightly more expensive than preempt_disable/enable()"
|
||||
|
||||
and
|
||||
"As the RCU critical sections are extremely short, using sched-RCU
|
||||
shouldn't have any latency implications."
|
||||
|
||||
The problem with RCU-sched is that it disables preemption and the
|
||||
callback must not acquire any sleeping locks like spinlock_t on
|
||||
PREEMPT_RT which is the case.
|
||||
|
||||
Convert back to normal RCU.
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/percpu-refcount.h | 16 ++++++++--------
|
||||
1 file changed, 8 insertions(+), 8 deletions(-)
|
||||
|
||||
--- a/include/linux/percpu-refcount.h
|
||||
+++ b/include/linux/percpu-refcount.h
|
||||
@@ -186,14 +186,14 @@ static inline void percpu_ref_get_many(s
|
||||
{
|
||||
unsigned long __percpu *percpu_count;
|
||||
|
||||
- rcu_read_lock_sched();
|
||||
+ rcu_read_lock();
|
||||
|
||||
if (__ref_is_percpu(ref, &percpu_count))
|
||||
this_cpu_add(*percpu_count, nr);
|
||||
else
|
||||
atomic_long_add(nr, &ref->count);
|
||||
|
||||
- rcu_read_unlock_sched();
|
||||
+ rcu_read_unlock();
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -223,7 +223,7 @@ static inline bool percpu_ref_tryget(str
|
||||
unsigned long __percpu *percpu_count;
|
||||
bool ret;
|
||||
|
||||
- rcu_read_lock_sched();
|
||||
+ rcu_read_lock();
|
||||
|
||||
if (__ref_is_percpu(ref, &percpu_count)) {
|
||||
this_cpu_inc(*percpu_count);
|
||||
@@ -232,7 +232,7 @@ static inline bool percpu_ref_tryget(str
|
||||
ret = atomic_long_inc_not_zero(&ref->count);
|
||||
}
|
||||
|
||||
- rcu_read_unlock_sched();
|
||||
+ rcu_read_unlock();
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -257,7 +257,7 @@ static inline bool percpu_ref_tryget_liv
|
||||
unsigned long __percpu *percpu_count;
|
||||
bool ret = false;
|
||||
|
||||
- rcu_read_lock_sched();
|
||||
+ rcu_read_lock();
|
||||
|
||||
if (__ref_is_percpu(ref, &percpu_count)) {
|
||||
this_cpu_inc(*percpu_count);
|
||||
@@ -266,7 +266,7 @@ static inline bool percpu_ref_tryget_liv
|
||||
ret = atomic_long_inc_not_zero(&ref->count);
|
||||
}
|
||||
|
||||
- rcu_read_unlock_sched();
|
||||
+ rcu_read_unlock();
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -285,14 +285,14 @@ static inline void percpu_ref_put_many(s
|
||||
{
|
||||
unsigned long __percpu *percpu_count;
|
||||
|
||||
- rcu_read_lock_sched();
|
||||
+ rcu_read_lock();
|
||||
|
||||
if (__ref_is_percpu(ref, &percpu_count))
|
||||
this_cpu_sub(*percpu_count, nr);
|
||||
else if (unlikely(atomic_long_sub_and_test(nr, &ref->count)))
|
||||
ref->release(ref);
|
||||
|
||||
- rcu_read_unlock_sched();
|
||||
+ rcu_read_unlock();
|
||||
}
|
||||
|
||||
/**
|
@ -0,0 +1,70 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Wed, 10 Apr 2019 11:01:37 +0200
|
||||
Subject: [PATCH] drm/i915: Don't disable interrupts independently of the
|
||||
lock
|
||||
|
||||
The locks (active.lock and rq->lock) need to be taken with disabled
|
||||
interrupts. This is done in i915_request_retire() by disabling the
|
||||
interrupts independently of the locks itself.
|
||||
While local_irq_disable()+spin_lock() equals spin_lock_irq() on vanilla
|
||||
it does not on PREEMPT_RT.
|
||||
Chris Wilson confirmed that local_irq_disable() was just introduced as
|
||||
an optimisation to avoid enabling/disabling interrupts during
|
||||
lock/unlock combo.
|
||||
|
||||
Enable/disable interrupts as part of the locking instruction.
|
||||
|
||||
Cc: Chris Wilson <chris@chris-wilson.co.uk>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
drivers/gpu/drm/i915/i915_request.c | 12 ++++--------
|
||||
1 file changed, 4 insertions(+), 8 deletions(-)
|
||||
|
||||
--- a/drivers/gpu/drm/i915/i915_request.c
|
||||
+++ b/drivers/gpu/drm/i915/i915_request.c
|
||||
@@ -205,14 +205,14 @@ static void remove_from_engine(struct i9
|
||||
* check that the rq still belongs to the newly locked engine.
|
||||
*/
|
||||
locked = READ_ONCE(rq->engine);
|
||||
- spin_lock(&locked->active.lock);
|
||||
+ spin_lock_irq(&locked->active.lock);
|
||||
while (unlikely(locked != (engine = READ_ONCE(rq->engine)))) {
|
||||
spin_unlock(&locked->active.lock);
|
||||
spin_lock(&engine->active.lock);
|
||||
locked = engine;
|
||||
}
|
||||
list_del(&rq->sched.link);
|
||||
- spin_unlock(&locked->active.lock);
|
||||
+ spin_unlock_irq(&locked->active.lock);
|
||||
}
|
||||
|
||||
static bool i915_request_retire(struct i915_request *rq)
|
||||
@@ -272,8 +272,6 @@ static bool i915_request_retire(struct i
|
||||
active->retire(active, rq);
|
||||
}
|
||||
|
||||
- local_irq_disable();
|
||||
-
|
||||
/*
|
||||
* We only loosely track inflight requests across preemption,
|
||||
* and so we may find ourselves attempting to retire a _completed_
|
||||
@@ -282,7 +280,7 @@ static bool i915_request_retire(struct i
|
||||
*/
|
||||
remove_from_engine(rq);
|
||||
|
||||
- spin_lock(&rq->lock);
|
||||
+ spin_lock_irq(&rq->lock);
|
||||
i915_request_mark_complete(rq);
|
||||
if (!i915_request_signaled(rq))
|
||||
dma_fence_signal_locked(&rq->fence);
|
||||
@@ -297,9 +295,7 @@ static bool i915_request_retire(struct i
|
||||
__notify_execute_cb(rq);
|
||||
}
|
||||
GEM_BUG_ON(!list_empty(&rq->execute_cb));
|
||||
- spin_unlock(&rq->lock);
|
||||
-
|
||||
- local_irq_enable();
|
||||
+ spin_unlock_irq(&rq->lock);
|
||||
|
||||
remove_from_client(rq);
|
||||
list_del(&rq->link);
|
@ -0,0 +1,35 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Fri, 15 Nov 2019 21:37:22 +0100
|
||||
Subject: [PATCH] block: Don't disable interrupts in trigger_softirq()
|
||||
|
||||
trigger_softirq() is always invoked as a SMP-function call which is
|
||||
always invoked with disables interrupts.
|
||||
|
||||
Don't disable interrupt in trigger_softirq() because interrupts are
|
||||
already disabled.
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
block/blk-softirq.c | 4 ----
|
||||
1 file changed, 4 deletions(-)
|
||||
|
||||
--- a/block/blk-softirq.c
|
||||
+++ b/block/blk-softirq.c
|
||||
@@ -42,17 +42,13 @@ static __latent_entropy void blk_done_so
|
||||
static void trigger_softirq(void *data)
|
||||
{
|
||||
struct request *rq = data;
|
||||
- unsigned long flags;
|
||||
struct list_head *list;
|
||||
|
||||
- local_irq_save(flags);
|
||||
list = this_cpu_ptr(&blk_cpu_done);
|
||||
list_add_tail(&rq->ipi_list, list);
|
||||
|
||||
if (list->next == &rq->ipi_list)
|
||||
raise_softirq_irqoff(BLOCK_SOFTIRQ);
|
||||
-
|
||||
- local_irq_restore(flags);
|
||||
}
|
||||
|
||||
/*
|
@ -0,0 +1,89 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Thu, 26 Jul 2018 09:13:42 +0200
|
||||
Subject: [PATCH] arm64: KVM: Invoke compute_layout() before alternatives are
|
||||
applied
|
||||
|
||||
compute_layout() is invoked as part of an alternative fixup under
|
||||
stop_machine(). This function invokes get_random_long() which acquires a
|
||||
sleeping lock on -RT which can not be acquired in this context.
|
||||
|
||||
Rename compute_layout() to kvm_compute_layout() and invoke it before
|
||||
stop_machine() applies the alternatives. Add a __init prefix to
|
||||
kvm_compute_layout() because the caller has it, too (and so the code can be
|
||||
discarded after boot).
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
arch/arm64/include/asm/kvm_mmu.h | 1 +
|
||||
arch/arm64/kernel/smp.c | 4 ++++
|
||||
arch/arm64/kvm/va_layout.c | 8 +-------
|
||||
3 files changed, 6 insertions(+), 7 deletions(-)
|
||||
|
||||
--- a/arch/arm64/include/asm/kvm_mmu.h
|
||||
+++ b/arch/arm64/include/asm/kvm_mmu.h
|
||||
@@ -91,6 +91,7 @@ alternative_cb_end
|
||||
|
||||
void kvm_update_va_mask(struct alt_instr *alt,
|
||||
__le32 *origptr, __le32 *updptr, int nr_inst);
|
||||
+void kvm_compute_layout(void);
|
||||
|
||||
static inline unsigned long __kern_hyp_va(unsigned long v)
|
||||
{
|
||||
--- a/arch/arm64/kernel/smp.c
|
||||
+++ b/arch/arm64/kernel/smp.c
|
||||
@@ -31,6 +31,7 @@
|
||||
#include <linux/of.h>
|
||||
#include <linux/irq_work.h>
|
||||
#include <linux/kexec.h>
|
||||
+#include <linux/kvm_host.h>
|
||||
|
||||
#include <asm/alternative.h>
|
||||
#include <asm/atomic.h>
|
||||
@@ -39,6 +40,7 @@
|
||||
#include <asm/cputype.h>
|
||||
#include <asm/cpu_ops.h>
|
||||
#include <asm/daifflags.h>
|
||||
+#include <asm/kvm_mmu.h>
|
||||
#include <asm/mmu_context.h>
|
||||
#include <asm/numa.h>
|
||||
#include <asm/pgtable.h>
|
||||
@@ -408,6 +410,8 @@ static void __init hyp_mode_check(void)
|
||||
"CPU: CPUs started in inconsistent modes");
|
||||
else
|
||||
pr_info("CPU: All CPU(s) started at EL1\n");
|
||||
+ if (IS_ENABLED(CONFIG_KVM_ARM_HOST))
|
||||
+ kvm_compute_layout();
|
||||
}
|
||||
|
||||
void __init smp_cpus_done(unsigned int max_cpus)
|
||||
--- a/arch/arm64/kvm/va_layout.c
|
||||
+++ b/arch/arm64/kvm/va_layout.c
|
||||
@@ -22,7 +22,7 @@ static u8 tag_lsb;
|
||||
static u64 tag_val;
|
||||
static u64 va_mask;
|
||||
|
||||
-static void compute_layout(void)
|
||||
+__init void kvm_compute_layout(void)
|
||||
{
|
||||
phys_addr_t idmap_addr = __pa_symbol(__hyp_idmap_text_start);
|
||||
u64 hyp_va_msb;
|
||||
@@ -110,9 +110,6 @@ void __init kvm_update_va_mask(struct al
|
||||
|
||||
BUG_ON(nr_inst != 5);
|
||||
|
||||
- if (!has_vhe() && !va_mask)
|
||||
- compute_layout();
|
||||
-
|
||||
for (i = 0; i < nr_inst; i++) {
|
||||
u32 rd, rn, insn, oinsn;
|
||||
|
||||
@@ -156,9 +153,6 @@ void kvm_patch_vector_branch(struct alt_
|
||||
return;
|
||||
}
|
||||
|
||||
- if (!va_mask)
|
||||
- compute_layout();
|
||||
-
|
||||
/*
|
||||
* Compute HYP VA by using the same computation as kern_hyp_va()
|
||||
*/
|
@ -0,0 +1,57 @@
|
||||
From: Marc Kleine-Budde <mkl@pengutronix.de>
|
||||
Date: Wed, 5 Mar 2014 00:49:47 +0100
|
||||
Subject: net: sched: Use msleep() instead of yield()
|
||||
|
||||
On PREEMPT_RT enabled systems the interrupt handler run as threads at prio 50
|
||||
(by default). If a high priority userspace process tries to shut down a busy
|
||||
network interface it might spin in a yield loop waiting for the device to
|
||||
become idle. With the interrupt thread having a lower priority than the
|
||||
looping process it might never be scheduled and so result in a deadlock on UP
|
||||
systems.
|
||||
|
||||
With Magic SysRq the following backtrace can be produced:
|
||||
|
||||
> test_app R running 0 174 168 0x00000000
|
||||
> [<c02c7070>] (__schedule+0x220/0x3fc) from [<c02c7870>] (preempt_schedule_irq+0x48/0x80)
|
||||
> [<c02c7870>] (preempt_schedule_irq+0x48/0x80) from [<c0008fa8>] (svc_preempt+0x8/0x20)
|
||||
> [<c0008fa8>] (svc_preempt+0x8/0x20) from [<c001a984>] (local_bh_enable+0x18/0x88)
|
||||
> [<c001a984>] (local_bh_enable+0x18/0x88) from [<c025316c>] (dev_deactivate_many+0x220/0x264)
|
||||
> [<c025316c>] (dev_deactivate_many+0x220/0x264) from [<c023be04>] (__dev_close_many+0x64/0xd4)
|
||||
> [<c023be04>] (__dev_close_many+0x64/0xd4) from [<c023be9c>] (__dev_close+0x28/0x3c)
|
||||
> [<c023be9c>] (__dev_close+0x28/0x3c) from [<c023f7f0>] (__dev_change_flags+0x88/0x130)
|
||||
> [<c023f7f0>] (__dev_change_flags+0x88/0x130) from [<c023f904>] (dev_change_flags+0x10/0x48)
|
||||
> [<c023f904>] (dev_change_flags+0x10/0x48) from [<c024c140>] (do_setlink+0x370/0x7ec)
|
||||
> [<c024c140>] (do_setlink+0x370/0x7ec) from [<c024d2f0>] (rtnl_newlink+0x2b4/0x450)
|
||||
> [<c024d2f0>] (rtnl_newlink+0x2b4/0x450) from [<c024cfa0>] (rtnetlink_rcv_msg+0x158/0x1f4)
|
||||
> [<c024cfa0>] (rtnetlink_rcv_msg+0x158/0x1f4) from [<c0256740>] (netlink_rcv_skb+0xac/0xc0)
|
||||
> [<c0256740>] (netlink_rcv_skb+0xac/0xc0) from [<c024bbd8>] (rtnetlink_rcv+0x18/0x24)
|
||||
> [<c024bbd8>] (rtnetlink_rcv+0x18/0x24) from [<c02561b8>] (netlink_unicast+0x13c/0x198)
|
||||
> [<c02561b8>] (netlink_unicast+0x13c/0x198) from [<c025651c>] (netlink_sendmsg+0x264/0x2e0)
|
||||
> [<c025651c>] (netlink_sendmsg+0x264/0x2e0) from [<c022af98>] (sock_sendmsg+0x78/0x98)
|
||||
> [<c022af98>] (sock_sendmsg+0x78/0x98) from [<c022bb50>] (___sys_sendmsg.part.25+0x268/0x278)
|
||||
> [<c022bb50>] (___sys_sendmsg.part.25+0x268/0x278) from [<c022cf08>] (__sys_sendmsg+0x48/0x78)
|
||||
> [<c022cf08>] (__sys_sendmsg+0x48/0x78) from [<c0009320>] (ret_fast_syscall+0x0/0x2c)
|
||||
|
||||
This patch works around the problem by replacing yield() by msleep(1), giving
|
||||
the interrupt thread time to finish, similar to other changes contained in the
|
||||
rt patch set. Using wait_for_completion() instead would probably be a better
|
||||
solution.
|
||||
|
||||
|
||||
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
net/sched/sch_generic.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/net/sched/sch_generic.c
|
||||
+++ b/net/sched/sch_generic.c
|
||||
@@ -1215,7 +1215,7 @@ void dev_deactivate_many(struct list_hea
|
||||
/* Wait for outstanding qdisc_run calls. */
|
||||
list_for_each_entry(dev, head, close_list) {
|
||||
while (some_qdisc_is_busy(dev))
|
||||
- yield();
|
||||
+ msleep(1);
|
||||
/* The new qdisc is assigned at this point so we can safely
|
||||
* unwind stale skb lists and qdisc statistics
|
||||
*/
|
@ -0,0 +1,105 @@
|
||||
From: "Uladzislau Rezki (Sony)" <urezki@gmail.com>
|
||||
Date: Sat, 30 Nov 2019 17:54:33 -0800
|
||||
Subject: [PATCH] mm/vmalloc: remove preempt_disable/enable when doing
|
||||
preloading
|
||||
|
||||
Some background. The preemption was disabled before to guarantee that a
|
||||
preloaded object is available for a CPU, it was stored for. That was
|
||||
achieved by combining the disabling the preemption and taking the spin
|
||||
lock while the ne_fit_preload_node is checked.
|
||||
|
||||
The aim was to not allocate in atomic context when spinlock is taken
|
||||
later, for regular vmap allocations. But that approach conflicts with
|
||||
CONFIG_PREEMPT_RT philosophy. It means that calling spin_lock() with
|
||||
disabled preemption is forbidden in the CONFIG_PREEMPT_RT kernel.
|
||||
|
||||
Therefore, get rid of preempt_disable() and preempt_enable() when the
|
||||
preload is done for splitting purpose. As a result we do not guarantee
|
||||
now that a CPU is preloaded, instead we minimize the case when it is
|
||||
not, with this change, by populating the per cpu preload pointer under
|
||||
the vmap_area_lock.
|
||||
|
||||
This implies that at least each caller that has done the preallocation
|
||||
will not fallback to an atomic allocation later. It is possible that
|
||||
the preallocation would be pointless or that no preallocation is done
|
||||
because of the race but the data shows that this is really rare.
|
||||
|
||||
For example i run the special test case that follows the preload pattern
|
||||
and path. 20 "unbind" threads run it and each does 1000000 allocations.
|
||||
Only 3.5 times among 1000000 a CPU was not preloaded. So it can happen
|
||||
but the number is negligible.
|
||||
|
||||
[mhocko@suse.com: changelog additions]
|
||||
Link: http://lkml.kernel.org/r/20191016095438.12391-1-urezki@gmail.com
|
||||
Fixes: 82dd23e84be3 ("mm/vmalloc.c: preload a CPU with one object for split purpose")
|
||||
Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
|
||||
Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
|
||||
Acked-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Acked-by: Daniel Wagner <dwagner@suse.de>
|
||||
Acked-by: Michal Hocko <mhocko@suse.com>
|
||||
Cc: Hillf Danton <hdanton@sina.com>
|
||||
Cc: Matthew Wilcox <willy@infradead.org>
|
||||
Cc: Oleksiy Avramchenko <oleksiy.avramchenko@sonymobile.com>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
||||
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
||||
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
mm/vmalloc.c | 37 ++++++++++++++++++++-----------------
|
||||
1 file changed, 20 insertions(+), 17 deletions(-)
|
||||
|
||||
--- a/mm/vmalloc.c
|
||||
+++ b/mm/vmalloc.c
|
||||
@@ -1077,31 +1077,34 @@ static struct vmap_area *alloc_vmap_area
|
||||
|
||||
retry:
|
||||
/*
|
||||
- * Preload this CPU with one extra vmap_area object to ensure
|
||||
- * that we have it available when fit type of free area is
|
||||
- * NE_FIT_TYPE.
|
||||
+ * Preload this CPU with one extra vmap_area object. It is used
|
||||
+ * when fit type of free area is NE_FIT_TYPE. Please note, it
|
||||
+ * does not guarantee that an allocation occurs on a CPU that
|
||||
+ * is preloaded, instead we minimize the case when it is not.
|
||||
+ * It can happen because of cpu migration, because there is a
|
||||
+ * race until the below spinlock is taken.
|
||||
*
|
||||
* The preload is done in non-atomic context, thus it allows us
|
||||
* to use more permissive allocation masks to be more stable under
|
||||
- * low memory condition and high memory pressure.
|
||||
+ * low memory condition and high memory pressure. In rare case,
|
||||
+ * if not preloaded, GFP_NOWAIT is used.
|
||||
*
|
||||
- * Even if it fails we do not really care about that. Just proceed
|
||||
- * as it is. "overflow" path will refill the cache we allocate from.
|
||||
+ * Set "pva" to NULL here, because of "retry" path.
|
||||
*/
|
||||
- preempt_disable();
|
||||
- if (!__this_cpu_read(ne_fit_preload_node)) {
|
||||
- preempt_enable();
|
||||
- pva = kmem_cache_alloc_node(vmap_area_cachep, GFP_KERNEL, node);
|
||||
- preempt_disable();
|
||||
+ pva = NULL;
|
||||
|
||||
- if (__this_cpu_cmpxchg(ne_fit_preload_node, NULL, pva)) {
|
||||
- if (pva)
|
||||
- kmem_cache_free(vmap_area_cachep, pva);
|
||||
- }
|
||||
- }
|
||||
+ if (!this_cpu_read(ne_fit_preload_node))
|
||||
+ /*
|
||||
+ * Even if it fails we do not really care about that.
|
||||
+ * Just proceed as it is. If needed "overflow" path
|
||||
+ * will refill the cache we allocate from.
|
||||
+ */
|
||||
+ pva = kmem_cache_alloc_node(vmap_area_cachep, GFP_KERNEL, node);
|
||||
|
||||
spin_lock(&vmap_area_lock);
|
||||
- preempt_enable();
|
||||
+
|
||||
+ if (pva && __this_cpu_cmpxchg(ne_fit_preload_node, NULL, pva))
|
||||
+ kmem_cache_free(vmap_area_cachep, pva);
|
||||
|
||||
/*
|
||||
* If an allocation fails, the "vend" address is
|
@ -0,0 +1,46 @@
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Tue, 13 Aug 2019 14:29:41 +0200
|
||||
Subject: [PATCH] KVM: arm/arm64: Let the timer expire in hardirq context
|
||||
on RT
|
||||
|
||||
The timers are canceled from an preempt-notifier which is invoked with
|
||||
disabled preemption which is not allowed on PREEMPT_RT.
|
||||
The timer callback is short so in could be invoked in hard-IRQ context
|
||||
on -RT.
|
||||
|
||||
Let the timer expire on hard-IRQ context even on -RT.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Acked-by: Marc Zyngier <maz@kernel.org>
|
||||
Tested-by: Julien Grall <julien.grall@arm.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
virt/kvm/arm/arch_timer.c | 8 ++++----
|
||||
1 file changed, 4 insertions(+), 4 deletions(-)
|
||||
|
||||
--- a/virt/kvm/arm/arch_timer.c
|
||||
+++ b/virt/kvm/arm/arch_timer.c
|
||||
@@ -80,7 +80,7 @@ static inline bool userspace_irqchip(str
|
||||
static void soft_timer_start(struct hrtimer *hrt, u64 ns)
|
||||
{
|
||||
hrtimer_start(hrt, ktime_add_ns(ktime_get(), ns),
|
||||
- HRTIMER_MODE_ABS);
|
||||
+ HRTIMER_MODE_ABS_HARD);
|
||||
}
|
||||
|
||||
static void soft_timer_cancel(struct hrtimer *hrt)
|
||||
@@ -697,11 +697,11 @@ void kvm_timer_vcpu_init(struct kvm_vcpu
|
||||
update_vtimer_cntvoff(vcpu, kvm_phys_timer_read());
|
||||
ptimer->cntvoff = 0;
|
||||
|
||||
- hrtimer_init(&timer->bg_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
|
||||
+ hrtimer_init(&timer->bg_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
|
||||
timer->bg_timer.function = kvm_bg_timer_expire;
|
||||
|
||||
- hrtimer_init(&vtimer->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
|
||||
- hrtimer_init(&ptimer->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
|
||||
+ hrtimer_init(&vtimer->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
|
||||
+ hrtimer_init(&ptimer->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
|
||||
vtimer->hrtimer.function = kvm_hrtimer_expire;
|
||||
ptimer->hrtimer.function = kvm_hrtimer_expire;
|
||||
|
@ -0,0 +1,55 @@
|
||||
From: "Ahmed S. Darwish" <a.darwish@linutronix.de>
|
||||
Date: Mon, 9 Mar 2020 18:15:29 +0000
|
||||
Subject: [PATCH] time/sched_clock: Expire timer in hardirq context
|
||||
|
||||
To minimize latency, PREEMPT_RT kernels expires hrtimers in preemptible
|
||||
softirq context by default. This can be overriden by marking the timer's
|
||||
expiry with HRTIMER_MODE_HARD.
|
||||
|
||||
sched_clock_timer is missing this annotation: if its callback is preempted
|
||||
and the duration of the preemption exceeds the wrap around time of the
|
||||
underlying clocksource, sched clock will get out of sync.
|
||||
|
||||
Mark the sched_clock_timer for expiry in hard interrupt context.
|
||||
|
||||
Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Link: https://lkml.kernel.org/r/20200309181529.26558-1-a.darwish@linutronix.de
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
kernel/time/sched_clock.c | 9 +++++----
|
||||
1 file changed, 5 insertions(+), 4 deletions(-)
|
||||
|
||||
--- a/kernel/time/sched_clock.c
|
||||
+++ b/kernel/time/sched_clock.c
|
||||
@@ -207,7 +207,8 @@ sched_clock_register(u64 (*read)(void),
|
||||
|
||||
if (sched_clock_timer.function != NULL) {
|
||||
/* update timeout for clock wrap */
|
||||
- hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL);
|
||||
+ hrtimer_start(&sched_clock_timer, cd.wrap_kt,
|
||||
+ HRTIMER_MODE_REL_HARD);
|
||||
}
|
||||
|
||||
r = rate;
|
||||
@@ -251,9 +252,9 @@ void __init generic_sched_clock_init(voi
|
||||
* Start the timer to keep sched_clock() properly updated and
|
||||
* sets the initial epoch.
|
||||
*/
|
||||
- hrtimer_init(&sched_clock_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
|
||||
+ hrtimer_init(&sched_clock_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
|
||||
sched_clock_timer.function = sched_clock_poll;
|
||||
- hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL);
|
||||
+ hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL_HARD);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -290,7 +291,7 @@ void sched_clock_resume(void)
|
||||
struct clock_read_data *rd = &cd.read_data[0];
|
||||
|
||||
rd->epoch_cyc = cd.actual_read_sched_clock();
|
||||
- hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL);
|
||||
+ hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL_HARD);
|
||||
rd->read_sched_clock = cd.actual_read_sched_clock;
|
||||
}
|
||||
|
@ -0,0 +1,393 @@
|
||||
From: John Ogness <john.ogness@linutronix.de>
|
||||
Date: Tue, 12 Feb 2019 15:29:39 +0100
|
||||
Subject: [PATCH 01/25] printk-rb: add printk ring buffer documentation
|
||||
|
||||
The full documentation file for the printk ring buffer.
|
||||
|
||||
Signed-off-by: John Ogness <john.ogness@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
Documentation/printk-ringbuffer.txt | 377 ++++++++++++++++++++++++++++++++++++
|
||||
1 file changed, 377 insertions(+)
|
||||
create mode 100644 Documentation/printk-ringbuffer.txt
|
||||
|
||||
--- /dev/null
|
||||
+++ b/Documentation/printk-ringbuffer.txt
|
||||
@@ -0,0 +1,377 @@
|
||||
+struct printk_ringbuffer
|
||||
+------------------------
|
||||
+John Ogness <john.ogness@linutronix.de>
|
||||
+
|
||||
+Overview
|
||||
+~~~~~~~~
|
||||
+As the name suggests, this ring buffer was implemented specifically to serve
|
||||
+the needs of the printk() infrastructure. The ring buffer itself is not
|
||||
+specific to printk and could be used for other purposes. _However_, the
|
||||
+requirements and semantics of printk are rather unique. If you intend to use
|
||||
+this ring buffer for anything other than printk, you need to be very clear on
|
||||
+its features, behavior, and pitfalls.
|
||||
+
|
||||
+Features
|
||||
+^^^^^^^^
|
||||
+The printk ring buffer has the following features:
|
||||
+
|
||||
+- single global buffer
|
||||
+- resides in initialized data section (available at early boot)
|
||||
+- lockless readers
|
||||
+- supports multiple writers
|
||||
+- supports multiple non-consuming readers
|
||||
+- safe from any context (including NMI)
|
||||
+- groups bytes into variable length blocks (referenced by entries)
|
||||
+- entries tagged with sequence numbers
|
||||
+
|
||||
+Behavior
|
||||
+^^^^^^^^
|
||||
+Since the printk ring buffer readers are lockless, there exists no
|
||||
+synchronization between readers and writers. Basically writers are the tasks
|
||||
+in control and may overwrite any and all committed data at any time and from
|
||||
+any context. For this reason readers can miss entries if they are overwritten
|
||||
+before the reader was able to access the data. The reader API implementation
|
||||
+is such that reader access to entries is atomic, so there is no risk of
|
||||
+readers having to deal with partial or corrupt data. Also, entries are
|
||||
+tagged with sequence numbers so readers can recognize if entries were missed.
|
||||
+
|
||||
+Writing to the ring buffer consists of 2 steps. First a writer must reserve
|
||||
+an entry of desired size. After this step the writer has exclusive access
|
||||
+to the memory region. Once the data has been written to memory, it needs to
|
||||
+be committed to the ring buffer. After this step the entry has been inserted
|
||||
+into the ring buffer and assigned an appropriate sequence number.
|
||||
+
|
||||
+Once committed, a writer must no longer access the data directly. This is
|
||||
+because the data may have been overwritten and no longer exists. If a
|
||||
+writer must access the data, it should either keep a private copy before
|
||||
+committing the entry or use the reader API to gain access to the data.
|
||||
+
|
||||
+Because of how the data backend is implemented, entries that have been
|
||||
+reserved but not yet committed act as barriers, preventing future writers
|
||||
+from filling the ring buffer beyond the location of the reserved but not
|
||||
+yet committed entry region. For this reason it is *important* that writers
|
||||
+perform both reserve and commit as quickly as possible. Also, be aware that
|
||||
+preemption and local interrupts are disabled and writing to the ring buffer
|
||||
+is processor-reentrant locked during the reserve/commit window. Writers in
|
||||
+NMI contexts can still preempt any other writers, but as long as these
|
||||
+writers do not write a large amount of data with respect to the ring buffer
|
||||
+size, this should not become an issue.
|
||||
+
|
||||
+API
|
||||
+~~~
|
||||
+
|
||||
+Declaration
|
||||
+^^^^^^^^^^^
|
||||
+The printk ring buffer can be instantiated as a static structure:
|
||||
+
|
||||
+ /* declare a static struct printk_ringbuffer */
|
||||
+ #define DECLARE_STATIC_PRINTKRB(name, szbits, cpulockptr)
|
||||
+
|
||||
+The value of szbits specifies the size of the ring buffer in bits. The
|
||||
+cpulockptr field is a pointer to a prb_cpulock struct that is used to
|
||||
+perform processor-reentrant spin locking for the writers. It is specified
|
||||
+externally because it may be used for multiple ring buffers (or other
|
||||
+code) to synchronize writers without risk of deadlock.
|
||||
+
|
||||
+Here is an example of a declaration of a printk ring buffer specifying a
|
||||
+32KB (2^15) ring buffer:
|
||||
+
|
||||
+....
|
||||
+DECLARE_STATIC_PRINTKRB_CPULOCK(rb_cpulock);
|
||||
+DECLARE_STATIC_PRINTKRB(rb, 15, &rb_cpulock);
|
||||
+....
|
||||
+
|
||||
+If writers will be using multiple ring buffers and the ordering of that usage
|
||||
+is not clear, the same prb_cpulock should be used for both ring buffers.
|
||||
+
|
||||
+Writer API
|
||||
+^^^^^^^^^^
|
||||
+The writer API consists of 2 functions. The first is to reserve an entry in
|
||||
+the ring buffer, the second is to commit that data to the ring buffer. The
|
||||
+reserved entry information is stored within a provided `struct prb_handle`.
|
||||
+
|
||||
+ /* reserve an entry */
|
||||
+ char *prb_reserve(struct prb_handle *h, struct printk_ringbuffer *rb,
|
||||
+ unsigned int size);
|
||||
+
|
||||
+ /* commit a reserved entry to the ring buffer */
|
||||
+ void prb_commit(struct prb_handle *h);
|
||||
+
|
||||
+Here is an example of a function to write data to a ring buffer:
|
||||
+
|
||||
+....
|
||||
+int write_data(struct printk_ringbuffer *rb, char *data, int size)
|
||||
+{
|
||||
+ struct prb_handle h;
|
||||
+ char *buf;
|
||||
+
|
||||
+ buf = prb_reserve(&h, rb, size);
|
||||
+ if (!buf)
|
||||
+ return -1;
|
||||
+ memcpy(buf, data, size);
|
||||
+ prb_commit(&h);
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+....
|
||||
+
|
||||
+Pitfalls
|
||||
+++++++++
|
||||
+Be aware that prb_reserve() can fail. A retry might be successful, but it
|
||||
+depends entirely on whether or not the next part of the ring buffer to
|
||||
+overwrite belongs to reserved but not yet committed entries of other writers.
|
||||
+Writers can use the prb_inc_lost() function to allow readers to notice that a
|
||||
+message was lost.
|
||||
+
|
||||
+Reader API
|
||||
+^^^^^^^^^^
|
||||
+The reader API utilizes a `struct prb_iterator` to track the reader's
|
||||
+position in the ring buffer.
|
||||
+
|
||||
+ /* declare a pre-initialized static iterator for a ring buffer */
|
||||
+ #define DECLARE_STATIC_PRINTKRB_ITER(name, rbaddr)
|
||||
+
|
||||
+ /* initialize iterator for a ring buffer (if static macro NOT used) */
|
||||
+ void prb_iter_init(struct prb_iterator *iter,
|
||||
+ struct printk_ringbuffer *rb, u64 *seq);
|
||||
+
|
||||
+ /* make a deep copy of an iterator */
|
||||
+ void prb_iter_copy(struct prb_iterator *dest,
|
||||
+ struct prb_iterator *src);
|
||||
+
|
||||
+ /* non-blocking, advance to next entry (and read the data) */
|
||||
+ int prb_iter_next(struct prb_iterator *iter, char *buf,
|
||||
+ int size, u64 *seq);
|
||||
+
|
||||
+ /* blocking, advance to next entry (and read the data) */
|
||||
+ int prb_iter_wait_next(struct prb_iterator *iter, char *buf,
|
||||
+ int size, u64 *seq);
|
||||
+
|
||||
+ /* position iterator at the entry seq */
|
||||
+ int prb_iter_seek(struct prb_iterator *iter, u64 seq);
|
||||
+
|
||||
+ /* read data at current position */
|
||||
+ int prb_iter_data(struct prb_iterator *iter, char *buf,
|
||||
+ int size, u64 *seq);
|
||||
+
|
||||
+Typically prb_iter_data() is not needed because the data can be retrieved
|
||||
+directly with prb_iter_next().
|
||||
+
|
||||
+Here is an example of a non-blocking function that will read all the data in
|
||||
+a ring buffer:
|
||||
+
|
||||
+....
|
||||
+void read_all_data(struct printk_ringbuffer *rb, char *buf, int size)
|
||||
+{
|
||||
+ struct prb_iterator iter;
|
||||
+ u64 prev_seq = 0;
|
||||
+ u64 seq;
|
||||
+ int ret;
|
||||
+
|
||||
+ prb_iter_init(&iter, rb, NULL);
|
||||
+
|
||||
+ for (;;) {
|
||||
+ ret = prb_iter_next(&iter, buf, size, &seq);
|
||||
+ if (ret > 0) {
|
||||
+ if (seq != ++prev_seq) {
|
||||
+ /* "seq - prev_seq" entries missed */
|
||||
+ prev_seq = seq;
|
||||
+ }
|
||||
+ /* process buf here */
|
||||
+ } else if (ret == 0) {
|
||||
+ /* hit the end, done */
|
||||
+ break;
|
||||
+ } else if (ret < 0) {
|
||||
+ /*
|
||||
+ * iterator is invalid, a writer overtook us, reset the
|
||||
+ * iterator and keep going, entries were missed
|
||||
+ */
|
||||
+ prb_iter_init(&iter, rb, NULL);
|
||||
+ }
|
||||
+ }
|
||||
+}
|
||||
+....
|
||||
+
|
||||
+Pitfalls
|
||||
+++++++++
|
||||
+The reader's iterator can become invalid at any time because the reader was
|
||||
+overtaken by a writer. Typically the reader should reset the iterator back
|
||||
+to the current oldest entry (which will be newer than the entry the reader
|
||||
+was at) and continue, noting the number of entries that were missed.
|
||||
+
|
||||
+Utility API
|
||||
+^^^^^^^^^^^
|
||||
+Several functions are available as convenience for external code.
|
||||
+
|
||||
+ /* query the size of the data buffer */
|
||||
+ int prb_buffer_size(struct printk_ringbuffer *rb);
|
||||
+
|
||||
+ /* skip a seq number to signify a lost record */
|
||||
+ void prb_inc_lost(struct printk_ringbuffer *rb);
|
||||
+
|
||||
+ /* processor-reentrant spin lock */
|
||||
+ void prb_lock(struct prb_cpulock *cpu_lock, unsigned int *cpu_store);
|
||||
+
|
||||
+ /* processor-reentrant spin unlock */
|
||||
+ void prb_lock(struct prb_cpulock *cpu_lock, unsigned int *cpu_store);
|
||||
+
|
||||
+Pitfalls
|
||||
+++++++++
|
||||
+Although the value returned by prb_buffer_size() does represent an absolute
|
||||
+upper bound, the amount of data that can be stored within the ring buffer
|
||||
+is actually less because of the additional storage space of a header for each
|
||||
+entry.
|
||||
+
|
||||
+The prb_lock() and prb_unlock() functions can be used to synchronize between
|
||||
+ring buffer writers and other external activities. The function of a
|
||||
+processor-reentrant spin lock is to disable preemption and local interrupts
|
||||
+and synchronize against other processors. It does *not* protect against
|
||||
+multiple contexts of a single processor, i.e NMI.
|
||||
+
|
||||
+Implementation
|
||||
+~~~~~~~~~~~~~~
|
||||
+This section describes several of the implementation concepts and details to
|
||||
+help developers better understand the code.
|
||||
+
|
||||
+Entries
|
||||
+^^^^^^^
|
||||
+All ring buffer data is stored within a single static byte array. The reason
|
||||
+for this is to ensure that any pointers to the data (past and present) will
|
||||
+always point to valid memory. This is important because the lockless readers
|
||||
+may be preempted for long periods of time and when they resume may be working
|
||||
+with expired pointers.
|
||||
+
|
||||
+Entries are identified by start index and size. (The start index plus size
|
||||
+is the start index of the next entry.) The start index is not simply an
|
||||
+offset into the byte array, but rather a logical position (lpos) that maps
|
||||
+directly to byte array offsets.
|
||||
+
|
||||
+For example, for a byte array of 1000, an entry may have have a start index
|
||||
+of 100. Another entry may have a start index of 1100. And yet another 2100.
|
||||
+All of these entry are pointing to the same memory region, but only the most
|
||||
+recent entry is valid. The other entries are pointing to valid memory, but
|
||||
+represent entries that have been overwritten.
|
||||
+
|
||||
+Note that due to overflowing, the most recent entry is not necessarily the one
|
||||
+with the highest lpos value. Indeed, the printk ring buffer initializes its
|
||||
+data such that an overflow happens relatively quickly in order to validate the
|
||||
+handling of this situation. The implementation assumes that an lpos (unsigned
|
||||
+long) will never completely wrap while a reader is preempted. If this were to
|
||||
+become an issue, the seq number (which never wraps) could be used to increase
|
||||
+the robustness of handling this situation.
|
||||
+
|
||||
+Buffer Wrapping
|
||||
+^^^^^^^^^^^^^^^
|
||||
+If an entry starts near the end of the byte array but would extend beyond it,
|
||||
+a special terminating entry (size = -1) is inserted into the byte array and
|
||||
+the real entry is placed at the beginning of the byte array. This can waste
|
||||
+space at the end of the byte array, but simplifies the implementation by
|
||||
+allowing writers to always work with contiguous buffers.
|
||||
+
|
||||
+Note that the size field is the first 4 bytes of the entry header. Also note
|
||||
+that calc_next() always ensures that there are at least 4 bytes left at the
|
||||
+end of the byte array to allow room for a terminating entry.
|
||||
+
|
||||
+Ring Buffer Pointers
|
||||
+^^^^^^^^^^^^^^^^^^^^
|
||||
+Three pointers (lpos values) are used to manage the ring buffer:
|
||||
+
|
||||
+ - _tail_: points to the oldest entry
|
||||
+ - _head_: points to where the next new committed entry will be
|
||||
+ - _reserve_: points to where the next new reserved entry will be
|
||||
+
|
||||
+These pointers always maintain a logical ordering:
|
||||
+
|
||||
+ tail <= head <= reserve
|
||||
+
|
||||
+The reserve pointer moves forward when a writer reserves a new entry. The
|
||||
+head pointer moves forward when a writer commits a new entry.
|
||||
+
|
||||
+The reserve pointer cannot overwrite the tail pointer in a wrap situation. In
|
||||
+such a situation, the tail pointer must be "pushed forward", thus
|
||||
+invalidating that oldest entry. Readers identify if they are accessing a
|
||||
+valid entry by ensuring their entry pointer is `>= tail && < head`.
|
||||
+
|
||||
+If the tail pointer is equal to the head pointer, it cannot be pushed and any
|
||||
+reserve operation will fail. The only resolution is for writers to commit
|
||||
+their reserved entries.
|
||||
+
|
||||
+Processor-Reentrant Locking
|
||||
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
+The purpose of the processor-reentrant locking is to limit the interruption
|
||||
+scenarios of writers to 2 contexts. This allows for a simplified
|
||||
+implementation where:
|
||||
+
|
||||
+- The reserve/commit window only exists on 1 processor at a time. A reserve
|
||||
+ can never fail due to uncommitted entries of other processors.
|
||||
+
|
||||
+- When committing entries, it is trivial to handle the situation when
|
||||
+ subsequent entries have already been committed, i.e. managing the head
|
||||
+ pointer.
|
||||
+
|
||||
+Performance
|
||||
+~~~~~~~~~~~
|
||||
+Some basic tests were performed on a quad Intel(R) Xeon(R) CPU E5-2697 v4 at
|
||||
+2.30GHz (36 cores / 72 threads). All tests involved writing a total of
|
||||
+32,000,000 records at an average of 33 bytes each. Each writer was pinned to
|
||||
+its own CPU and would write as fast as it could until a total of 32,000,000
|
||||
+records were written. All tests involved 2 readers that were both pinned
|
||||
+together to another CPU. Each reader would read as fast as it could and track
|
||||
+how many of the 32,000,000 records it could read. All tests used a ring buffer
|
||||
+of 16KB in size, which holds around 350 records (header + data for each
|
||||
+entry).
|
||||
+
|
||||
+The only difference between the tests is the number of writers (and thus also
|
||||
+the number of records per writer). As more writers are added, the time to
|
||||
+write a record increases. This is because data pointers, modified via cmpxchg,
|
||||
+and global data access in general become more contended.
|
||||
+
|
||||
+1 writer
|
||||
+^^^^^^^^
|
||||
+ runtime: 0m 18s
|
||||
+ reader1: 16219900/32000000 (50%) records
|
||||
+ reader2: 16141582/32000000 (50%) records
|
||||
+
|
||||
+2 writers
|
||||
+^^^^^^^^^
|
||||
+ runtime: 0m 32s
|
||||
+ reader1: 16327957/32000000 (51%) records
|
||||
+ reader2: 16313988/32000000 (50%) records
|
||||
+
|
||||
+4 writers
|
||||
+^^^^^^^^^
|
||||
+ runtime: 0m 42s
|
||||
+ reader1: 16421642/32000000 (51%) records
|
||||
+ reader2: 16417224/32000000 (51%) records
|
||||
+
|
||||
+8 writers
|
||||
+^^^^^^^^^
|
||||
+ runtime: 0m 43s
|
||||
+ reader1: 16418300/32000000 (51%) records
|
||||
+ reader2: 16432222/32000000 (51%) records
|
||||
+
|
||||
+16 writers
|
||||
+^^^^^^^^^^
|
||||
+ runtime: 0m 54s
|
||||
+ reader1: 16539189/32000000 (51%) records
|
||||
+ reader2: 16542711/32000000 (51%) records
|
||||
+
|
||||
+32 writers
|
||||
+^^^^^^^^^^
|
||||
+ runtime: 1m 13s
|
||||
+ reader1: 16731808/32000000 (52%) records
|
||||
+ reader2: 16735119/32000000 (52%) records
|
||||
+
|
||||
+Comments
|
||||
+^^^^^^^^
|
||||
+It is particularly interesting to compare/contrast the 1-writer and 32-writer
|
||||
+tests. Despite the writing of the 32,000,000 records taking over 4 times
|
||||
+longer, the readers (which perform no cmpxchg) were still unable to keep up.
|
||||
+This shows that the memory contention between the increasing number of CPUs
|
||||
+also has a dramatic effect on readers.
|
||||
+
|
||||
+It should also be noted that in all cases each reader was able to read >=50%
|
||||
+of the records. This means that a single reader would have been able to keep
|
||||
+up with the writer(s) in all cases, becoming slightly easier as more writers
|
||||
+are added. This was the purpose of pinning 2 readers to 1 CPU: to observe how
|
||||
+maximum reader performance changes.
|
@ -0,0 +1,158 @@
|
||||
From: John Ogness <john.ogness@linutronix.de>
|
||||
Date: Tue, 12 Feb 2019 15:29:40 +0100
|
||||
Subject: [PATCH 02/25] printk-rb: add prb locking functions
|
||||
|
||||
Add processor-reentrant spin locking functions. These allow
|
||||
restricting the number of possible contexts to 2, which can simplify
|
||||
implementing code that also supports NMI interruptions.
|
||||
|
||||
prb_lock();
|
||||
|
||||
/*
|
||||
* This code is synchronized with all contexts
|
||||
* except an NMI on the same processor.
|
||||
*/
|
||||
|
||||
prb_unlock();
|
||||
|
||||
In order to support printk's emergency messages, a
|
||||
processor-reentrant spin lock will be used to control raw access to
|
||||
the emergency console. However, it must be the same
|
||||
processor-reentrant spin lock as the one used by the ring buffer,
|
||||
otherwise a deadlock can occur:
|
||||
|
||||
CPU1: printk lock -> emergency -> serial lock
|
||||
CPU2: serial lock -> printk lock
|
||||
|
||||
By making the processor-reentrant implemtation available externally,
|
||||
printk can use the same atomic_t for the ring buffer as for the
|
||||
emergency console and thus avoid the above deadlock.
|
||||
|
||||
Signed-off-by: John Ogness <john.ogness@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/printk_ringbuffer.h | 24 +++++++++++
|
||||
lib/Makefile | 2
|
||||
lib/printk_ringbuffer.c | 77 ++++++++++++++++++++++++++++++++++++++
|
||||
3 files changed, 102 insertions(+), 1 deletion(-)
|
||||
create mode 100644 include/linux/printk_ringbuffer.h
|
||||
create mode 100644 lib/printk_ringbuffer.c
|
||||
|
||||
--- /dev/null
|
||||
+++ b/include/linux/printk_ringbuffer.h
|
||||
@@ -0,0 +1,24 @@
|
||||
+/* SPDX-License-Identifier: GPL-2.0 */
|
||||
+#ifndef _LINUX_PRINTK_RINGBUFFER_H
|
||||
+#define _LINUX_PRINTK_RINGBUFFER_H
|
||||
+
|
||||
+#include <linux/atomic.h>
|
||||
+#include <linux/percpu.h>
|
||||
+
|
||||
+struct prb_cpulock {
|
||||
+ atomic_t owner;
|
||||
+ unsigned long __percpu *irqflags;
|
||||
+};
|
||||
+
|
||||
+#define DECLARE_STATIC_PRINTKRB_CPULOCK(name) \
|
||||
+static DEFINE_PER_CPU(unsigned long, _##name##_percpu_irqflags); \
|
||||
+static struct prb_cpulock name = { \
|
||||
+ .owner = ATOMIC_INIT(-1), \
|
||||
+ .irqflags = &_##name##_percpu_irqflags, \
|
||||
+}
|
||||
+
|
||||
+/* utility functions */
|
||||
+void prb_lock(struct prb_cpulock *cpu_lock, unsigned int *cpu_store);
|
||||
+void prb_unlock(struct prb_cpulock *cpu_lock, unsigned int cpu_store);
|
||||
+
|
||||
+#endif /*_LINUX_PRINTK_RINGBUFFER_H */
|
||||
--- a/lib/Makefile
|
||||
+++ b/lib/Makefile
|
||||
@@ -26,7 +26,7 @@ endif
|
||||
|
||||
lib-y := ctype.o string.o vsprintf.o cmdline.o \
|
||||
rbtree.o radix-tree.o timerqueue.o xarray.o \
|
||||
- idr.o extable.o \
|
||||
+ idr.o extable.o printk_ringbuffer.o \
|
||||
sha1.o chacha.o irq_regs.o argv_split.o \
|
||||
flex_proportions.o ratelimit.o show_mem.o \
|
||||
is_single_threaded.o plist.o decompress.o kobject_uevent.o \
|
||||
--- /dev/null
|
||||
+++ b/lib/printk_ringbuffer.c
|
||||
@@ -0,0 +1,77 @@
|
||||
+// SPDX-License-Identifier: GPL-2.0
|
||||
+#include <linux/smp.h>
|
||||
+#include <linux/printk_ringbuffer.h>
|
||||
+
|
||||
+static bool __prb_trylock(struct prb_cpulock *cpu_lock,
|
||||
+ unsigned int *cpu_store)
|
||||
+{
|
||||
+ unsigned long *flags;
|
||||
+ unsigned int cpu;
|
||||
+
|
||||
+ cpu = get_cpu();
|
||||
+
|
||||
+ *cpu_store = atomic_read(&cpu_lock->owner);
|
||||
+ /* memory barrier to ensure the current lock owner is visible */
|
||||
+ smp_rmb();
|
||||
+ if (*cpu_store == -1) {
|
||||
+ flags = per_cpu_ptr(cpu_lock->irqflags, cpu);
|
||||
+ local_irq_save(*flags);
|
||||
+ if (atomic_try_cmpxchg_acquire(&cpu_lock->owner,
|
||||
+ cpu_store, cpu)) {
|
||||
+ return true;
|
||||
+ }
|
||||
+ local_irq_restore(*flags);
|
||||
+ } else if (*cpu_store == cpu) {
|
||||
+ return true;
|
||||
+ }
|
||||
+
|
||||
+ put_cpu();
|
||||
+ return false;
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+ * prb_lock: Perform a processor-reentrant spin lock.
|
||||
+ * @cpu_lock: A pointer to the lock object.
|
||||
+ * @cpu_store: A "flags" pointer to store lock status information.
|
||||
+ *
|
||||
+ * If no processor has the lock, the calling processor takes the lock and
|
||||
+ * becomes the owner. If the calling processor is already the owner of the
|
||||
+ * lock, this function succeeds immediately. If lock is locked by another
|
||||
+ * processor, this function spins until the calling processor becomes the
|
||||
+ * owner.
|
||||
+ *
|
||||
+ * It is safe to call this function from any context and state.
|
||||
+ */
|
||||
+void prb_lock(struct prb_cpulock *cpu_lock, unsigned int *cpu_store)
|
||||
+{
|
||||
+ for (;;) {
|
||||
+ if (__prb_trylock(cpu_lock, cpu_store))
|
||||
+ break;
|
||||
+ cpu_relax();
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+ * prb_unlock: Perform a processor-reentrant spin unlock.
|
||||
+ * @cpu_lock: A pointer to the lock object.
|
||||
+ * @cpu_store: A "flags" object storing lock status information.
|
||||
+ *
|
||||
+ * Release the lock. The calling processor must be the owner of the lock.
|
||||
+ *
|
||||
+ * It is safe to call this function from any context and state.
|
||||
+ */
|
||||
+void prb_unlock(struct prb_cpulock *cpu_lock, unsigned int cpu_store)
|
||||
+{
|
||||
+ unsigned long *flags;
|
||||
+ unsigned int cpu;
|
||||
+
|
||||
+ cpu = atomic_read(&cpu_lock->owner);
|
||||
+ atomic_set_release(&cpu_lock->owner, cpu_store);
|
||||
+
|
||||
+ if (cpu_store == -1) {
|
||||
+ flags = per_cpu_ptr(cpu_lock->irqflags, cpu);
|
||||
+ local_irq_restore(*flags);
|
||||
+ }
|
||||
+
|
||||
+ put_cpu();
|
||||
+}
|
@ -0,0 +1,57 @@
|
||||
From: John Ogness <john.ogness@linutronix.de>
|
||||
Date: Tue, 12 Feb 2019 15:29:41 +0100
|
||||
Subject: [PATCH 03/25] printk-rb: define ring buffer struct and initializer
|
||||
|
||||
See Documentation/printk-ringbuffer.txt for details about the
|
||||
initializer arguments.
|
||||
|
||||
Signed-off-by: John Ogness <john.ogness@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/printk_ringbuffer.h | 28 ++++++++++++++++++++++++++++
|
||||
1 file changed, 28 insertions(+)
|
||||
|
||||
--- a/include/linux/printk_ringbuffer.h
|
||||
+++ b/include/linux/printk_ringbuffer.h
|
||||
@@ -10,6 +10,20 @@ struct prb_cpulock {
|
||||
unsigned long __percpu *irqflags;
|
||||
};
|
||||
|
||||
+struct printk_ringbuffer {
|
||||
+ void *buffer;
|
||||
+ unsigned int size_bits;
|
||||
+
|
||||
+ u64 seq;
|
||||
+
|
||||
+ atomic_long_t tail;
|
||||
+ atomic_long_t head;
|
||||
+ atomic_long_t reserve;
|
||||
+
|
||||
+ struct prb_cpulock *cpulock;
|
||||
+ atomic_t ctx;
|
||||
+};
|
||||
+
|
||||
#define DECLARE_STATIC_PRINTKRB_CPULOCK(name) \
|
||||
static DEFINE_PER_CPU(unsigned long, _##name##_percpu_irqflags); \
|
||||
static struct prb_cpulock name = { \
|
||||
@@ -17,6 +31,20 @@ static struct prb_cpulock name = { \
|
||||
.irqflags = &_##name##_percpu_irqflags, \
|
||||
}
|
||||
|
||||
+#define DECLARE_STATIC_PRINTKRB(name, szbits, cpulockptr) \
|
||||
+static char _##name##_buffer[1 << (szbits)] \
|
||||
+ __aligned(__alignof__(long)); \
|
||||
+static struct printk_ringbuffer name = { \
|
||||
+ .buffer = &_##name##_buffer[0], \
|
||||
+ .size_bits = szbits, \
|
||||
+ .seq = 0, \
|
||||
+ .tail = ATOMIC_LONG_INIT(-111 * sizeof(long)), \
|
||||
+ .head = ATOMIC_LONG_INIT(-111 * sizeof(long)), \
|
||||
+ .reserve = ATOMIC_LONG_INIT(-111 * sizeof(long)), \
|
||||
+ .cpulock = cpulockptr, \
|
||||
+ .ctx = ATOMIC_INIT(0), \
|
||||
+}
|
||||
+
|
||||
/* utility functions */
|
||||
void prb_lock(struct prb_cpulock *cpu_lock, unsigned int *cpu_store);
|
||||
void prb_unlock(struct prb_cpulock *cpu_lock, unsigned int cpu_store);
|
@ -0,0 +1,233 @@
|
||||
From: John Ogness <john.ogness@linutronix.de>
|
||||
Date: Tue, 12 Feb 2019 15:29:42 +0100
|
||||
Subject: [PATCH 04/25] printk-rb: add writer interface
|
||||
|
||||
Add the writer functions prb_reserve() and prb_commit(). These make
|
||||
use of processor-reentrant spin locks to limit the number of possible
|
||||
interruption scenarios for the writers.
|
||||
|
||||
Signed-off-by: John Ogness <john.ogness@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/printk_ringbuffer.h | 17 +++
|
||||
lib/printk_ringbuffer.c | 172 ++++++++++++++++++++++++++++++++++++++
|
||||
2 files changed, 189 insertions(+)
|
||||
|
||||
--- a/include/linux/printk_ringbuffer.h
|
||||
+++ b/include/linux/printk_ringbuffer.h
|
||||
@@ -24,6 +24,18 @@ struct printk_ringbuffer {
|
||||
atomic_t ctx;
|
||||
};
|
||||
|
||||
+struct prb_entry {
|
||||
+ unsigned int size;
|
||||
+ u64 seq;
|
||||
+ char data[0];
|
||||
+};
|
||||
+
|
||||
+struct prb_handle {
|
||||
+ struct printk_ringbuffer *rb;
|
||||
+ unsigned int cpu;
|
||||
+ struct prb_entry *entry;
|
||||
+};
|
||||
+
|
||||
#define DECLARE_STATIC_PRINTKRB_CPULOCK(name) \
|
||||
static DEFINE_PER_CPU(unsigned long, _##name##_percpu_irqflags); \
|
||||
static struct prb_cpulock name = { \
|
||||
@@ -45,6 +57,11 @@ static struct printk_ringbuffer name = {
|
||||
.ctx = ATOMIC_INIT(0), \
|
||||
}
|
||||
|
||||
+/* writer interface */
|
||||
+char *prb_reserve(struct prb_handle *h, struct printk_ringbuffer *rb,
|
||||
+ unsigned int size);
|
||||
+void prb_commit(struct prb_handle *h);
|
||||
+
|
||||
/* utility functions */
|
||||
void prb_lock(struct prb_cpulock *cpu_lock, unsigned int *cpu_store);
|
||||
void prb_unlock(struct prb_cpulock *cpu_lock, unsigned int cpu_store);
|
||||
--- a/lib/printk_ringbuffer.c
|
||||
+++ b/lib/printk_ringbuffer.c
|
||||
@@ -2,6 +2,14 @@
|
||||
#include <linux/smp.h>
|
||||
#include <linux/printk_ringbuffer.h>
|
||||
|
||||
+#define PRB_SIZE(rb) (1 << rb->size_bits)
|
||||
+#define PRB_SIZE_BITMASK(rb) (PRB_SIZE(rb) - 1)
|
||||
+#define PRB_INDEX(rb, lpos) (lpos & PRB_SIZE_BITMASK(rb))
|
||||
+#define PRB_WRAPS(rb, lpos) (lpos >> rb->size_bits)
|
||||
+#define PRB_WRAP_LPOS(rb, lpos, xtra) \
|
||||
+ ((PRB_WRAPS(rb, lpos) + xtra) << rb->size_bits)
|
||||
+#define PRB_DATA_ALIGN sizeof(long)
|
||||
+
|
||||
static bool __prb_trylock(struct prb_cpulock *cpu_lock,
|
||||
unsigned int *cpu_store)
|
||||
{
|
||||
@@ -75,3 +83,167 @@ void prb_unlock(struct prb_cpulock *cpu_
|
||||
|
||||
put_cpu();
|
||||
}
|
||||
+
|
||||
+static struct prb_entry *to_entry(struct printk_ringbuffer *rb,
|
||||
+ unsigned long lpos)
|
||||
+{
|
||||
+ char *buffer = rb->buffer;
|
||||
+ buffer += PRB_INDEX(rb, lpos);
|
||||
+ return (struct prb_entry *)buffer;
|
||||
+}
|
||||
+
|
||||
+static int calc_next(struct printk_ringbuffer *rb, unsigned long tail,
|
||||
+ unsigned long lpos, int size, unsigned long *calced_next)
|
||||
+{
|
||||
+ unsigned long next_lpos;
|
||||
+ int ret = 0;
|
||||
+again:
|
||||
+ next_lpos = lpos + size;
|
||||
+ if (next_lpos - tail > PRB_SIZE(rb))
|
||||
+ return -1;
|
||||
+
|
||||
+ if (PRB_WRAPS(rb, lpos) != PRB_WRAPS(rb, next_lpos)) {
|
||||
+ lpos = PRB_WRAP_LPOS(rb, next_lpos, 0);
|
||||
+ ret |= 1;
|
||||
+ goto again;
|
||||
+ }
|
||||
+
|
||||
+ *calced_next = next_lpos;
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+static bool push_tail(struct printk_ringbuffer *rb, unsigned long tail)
|
||||
+{
|
||||
+ unsigned long new_tail;
|
||||
+ struct prb_entry *e;
|
||||
+ unsigned long head;
|
||||
+
|
||||
+ if (tail != atomic_long_read(&rb->tail))
|
||||
+ return true;
|
||||
+
|
||||
+ e = to_entry(rb, tail);
|
||||
+ if (e->size != -1)
|
||||
+ new_tail = tail + e->size;
|
||||
+ else
|
||||
+ new_tail = PRB_WRAP_LPOS(rb, tail, 1);
|
||||
+
|
||||
+ /* make sure the new tail does not overtake the head */
|
||||
+ head = atomic_long_read(&rb->head);
|
||||
+ if (head - new_tail > PRB_SIZE(rb))
|
||||
+ return false;
|
||||
+
|
||||
+ atomic_long_cmpxchg(&rb->tail, tail, new_tail);
|
||||
+ return true;
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+ * prb_commit: Commit a reserved entry to the ring buffer.
|
||||
+ * @h: An entry handle referencing the data entry to commit.
|
||||
+ *
|
||||
+ * Commit data that has been reserved using prb_reserve(). Once the data
|
||||
+ * block has been committed, it can be invalidated at any time. If a writer
|
||||
+ * is interested in using the data after committing, the writer should make
|
||||
+ * its own copy first or use the prb_iter_ reader functions to access the
|
||||
+ * data in the ring buffer.
|
||||
+ *
|
||||
+ * It is safe to call this function from any context and state.
|
||||
+ */
|
||||
+void prb_commit(struct prb_handle *h)
|
||||
+{
|
||||
+ struct printk_ringbuffer *rb = h->rb;
|
||||
+ struct prb_entry *e;
|
||||
+ unsigned long head;
|
||||
+ unsigned long res;
|
||||
+
|
||||
+ for (;;) {
|
||||
+ if (atomic_read(&rb->ctx) != 1) {
|
||||
+ /* the interrupted context will fixup head */
|
||||
+ atomic_dec(&rb->ctx);
|
||||
+ break;
|
||||
+ }
|
||||
+ /* assign sequence numbers before moving head */
|
||||
+ head = atomic_long_read(&rb->head);
|
||||
+ res = atomic_long_read(&rb->reserve);
|
||||
+ while (head != res) {
|
||||
+ e = to_entry(rb, head);
|
||||
+ if (e->size == -1) {
|
||||
+ head = PRB_WRAP_LPOS(rb, head, 1);
|
||||
+ continue;
|
||||
+ }
|
||||
+ e->seq = ++rb->seq;
|
||||
+ head += e->size;
|
||||
+ }
|
||||
+ atomic_long_set_release(&rb->head, res);
|
||||
+ atomic_dec(&rb->ctx);
|
||||
+
|
||||
+ if (atomic_long_read(&rb->reserve) == res)
|
||||
+ break;
|
||||
+ atomic_inc(&rb->ctx);
|
||||
+ }
|
||||
+
|
||||
+ prb_unlock(rb->cpulock, h->cpu);
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+ * prb_reserve: Reserve an entry within a ring buffer.
|
||||
+ * @h: An entry handle to be setup and reference an entry.
|
||||
+ * @rb: A ring buffer to reserve data within.
|
||||
+ * @size: The number of bytes to reserve.
|
||||
+ *
|
||||
+ * Reserve an entry of at least @size bytes to be used by the caller. If
|
||||
+ * successful, the data region of the entry belongs to the caller and cannot
|
||||
+ * be invalidated by any other task/context. For this reason, the caller
|
||||
+ * should call prb_commit() as quickly as possible in order to avoid preventing
|
||||
+ * other tasks/contexts from reserving data in the case that the ring buffer
|
||||
+ * has wrapped.
|
||||
+ *
|
||||
+ * It is safe to call this function from any context and state.
|
||||
+ *
|
||||
+ * Returns a pointer to the reserved entry (and @h is setup to reference that
|
||||
+ * entry) or NULL if it was not possible to reserve data.
|
||||
+ */
|
||||
+char *prb_reserve(struct prb_handle *h, struct printk_ringbuffer *rb,
|
||||
+ unsigned int size)
|
||||
+{
|
||||
+ unsigned long tail, res1, res2;
|
||||
+ int ret;
|
||||
+
|
||||
+ if (size == 0)
|
||||
+ return NULL;
|
||||
+ size += sizeof(struct prb_entry);
|
||||
+ size += PRB_DATA_ALIGN - 1;
|
||||
+ size &= ~(PRB_DATA_ALIGN - 1);
|
||||
+ if (size >= PRB_SIZE(rb))
|
||||
+ return NULL;
|
||||
+
|
||||
+ h->rb = rb;
|
||||
+ prb_lock(rb->cpulock, &h->cpu);
|
||||
+
|
||||
+ atomic_inc(&rb->ctx);
|
||||
+
|
||||
+ do {
|
||||
+ for (;;) {
|
||||
+ tail = atomic_long_read(&rb->tail);
|
||||
+ res1 = atomic_long_read(&rb->reserve);
|
||||
+ ret = calc_next(rb, tail, res1, size, &res2);
|
||||
+ if (ret >= 0)
|
||||
+ break;
|
||||
+ if (!push_tail(rb, tail)) {
|
||||
+ prb_commit(h);
|
||||
+ return NULL;
|
||||
+ }
|
||||
+ }
|
||||
+ } while (!atomic_long_try_cmpxchg_acquire(&rb->reserve, &res1, res2));
|
||||
+
|
||||
+ h->entry = to_entry(rb, res1);
|
||||
+
|
||||
+ if (ret) {
|
||||
+ /* handle wrap */
|
||||
+ h->entry->size = -1;
|
||||
+ h->entry = to_entry(rb, PRB_WRAP_LPOS(rb, res2, 0));
|
||||
+ }
|
||||
+
|
||||
+ h->entry->size = size;
|
||||
+
|
||||
+ return &h->entry->data[0];
|
||||
+}
|
@ -0,0 +1,259 @@
|
||||
From: John Ogness <john.ogness@linutronix.de>
|
||||
Date: Tue, 12 Feb 2019 15:29:43 +0100
|
||||
Subject: [PATCH 05/25] printk-rb: add basic non-blocking reading interface
|
||||
|
||||
Add reader iterator static declaration/initializer, dynamic
|
||||
initializer, and functions to iterate and retrieve ring buffer data.
|
||||
|
||||
Signed-off-by: John Ogness <john.ogness@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/printk_ringbuffer.h | 20 ++++
|
||||
lib/printk_ringbuffer.c | 190 ++++++++++++++++++++++++++++++++++++++
|
||||
2 files changed, 210 insertions(+)
|
||||
|
||||
--- a/include/linux/printk_ringbuffer.h
|
||||
+++ b/include/linux/printk_ringbuffer.h
|
||||
@@ -43,6 +43,19 @@ static struct prb_cpulock name = { \
|
||||
.irqflags = &_##name##_percpu_irqflags, \
|
||||
}
|
||||
|
||||
+#define PRB_INIT ((unsigned long)-1)
|
||||
+
|
||||
+#define DECLARE_STATIC_PRINTKRB_ITER(name, rbaddr) \
|
||||
+static struct prb_iterator name = { \
|
||||
+ .rb = rbaddr, \
|
||||
+ .lpos = PRB_INIT, \
|
||||
+}
|
||||
+
|
||||
+struct prb_iterator {
|
||||
+ struct printk_ringbuffer *rb;
|
||||
+ unsigned long lpos;
|
||||
+};
|
||||
+
|
||||
#define DECLARE_STATIC_PRINTKRB(name, szbits, cpulockptr) \
|
||||
static char _##name##_buffer[1 << (szbits)] \
|
||||
__aligned(__alignof__(long)); \
|
||||
@@ -62,6 +75,13 @@ char *prb_reserve(struct prb_handle *h,
|
||||
unsigned int size);
|
||||
void prb_commit(struct prb_handle *h);
|
||||
|
||||
+/* reader interface */
|
||||
+void prb_iter_init(struct prb_iterator *iter, struct printk_ringbuffer *rb,
|
||||
+ u64 *seq);
|
||||
+void prb_iter_copy(struct prb_iterator *dest, struct prb_iterator *src);
|
||||
+int prb_iter_next(struct prb_iterator *iter, char *buf, int size, u64 *seq);
|
||||
+int prb_iter_data(struct prb_iterator *iter, char *buf, int size, u64 *seq);
|
||||
+
|
||||
/* utility functions */
|
||||
void prb_lock(struct prb_cpulock *cpu_lock, unsigned int *cpu_store);
|
||||
void prb_unlock(struct prb_cpulock *cpu_lock, unsigned int cpu_store);
|
||||
--- a/lib/printk_ringbuffer.c
|
||||
+++ b/lib/printk_ringbuffer.c
|
||||
@@ -1,5 +1,7 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
#include <linux/smp.h>
|
||||
+#include <linux/string.h>
|
||||
+#include <linux/errno.h>
|
||||
#include <linux/printk_ringbuffer.h>
|
||||
|
||||
#define PRB_SIZE(rb) (1 << rb->size_bits)
|
||||
@@ -8,6 +10,7 @@
|
||||
#define PRB_WRAPS(rb, lpos) (lpos >> rb->size_bits)
|
||||
#define PRB_WRAP_LPOS(rb, lpos, xtra) \
|
||||
((PRB_WRAPS(rb, lpos) + xtra) << rb->size_bits)
|
||||
+#define PRB_DATA_SIZE(e) (e->size - sizeof(struct prb_entry))
|
||||
#define PRB_DATA_ALIGN sizeof(long)
|
||||
|
||||
static bool __prb_trylock(struct prb_cpulock *cpu_lock,
|
||||
@@ -247,3 +250,190 @@ char *prb_reserve(struct prb_handle *h,
|
||||
|
||||
return &h->entry->data[0];
|
||||
}
|
||||
+
|
||||
+/*
|
||||
+ * prb_iter_copy: Copy an iterator.
|
||||
+ * @dest: The iterator to copy to.
|
||||
+ * @src: The iterator to copy from.
|
||||
+ *
|
||||
+ * Make a deep copy of an iterator. This is particularly useful for making
|
||||
+ * backup copies of an iterator in case a form of rewinding it needed.
|
||||
+ *
|
||||
+ * It is safe to call this function from any context and state. But
|
||||
+ * note that this function is not atomic. Callers should not make copies
|
||||
+ * to/from iterators that can be accessed by other tasks/contexts.
|
||||
+ */
|
||||
+void prb_iter_copy(struct prb_iterator *dest, struct prb_iterator *src)
|
||||
+{
|
||||
+ memcpy(dest, src, sizeof(*dest));
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+ * prb_iter_init: Initialize an iterator for a ring buffer.
|
||||
+ * @iter: The iterator to initialize.
|
||||
+ * @rb: A ring buffer to that @iter should iterate.
|
||||
+ * @seq: The sequence number of the position preceding the first record.
|
||||
+ * May be NULL.
|
||||
+ *
|
||||
+ * Initialize an iterator to be used with a specified ring buffer. If @seq
|
||||
+ * is non-NULL, it will be set such that prb_iter_next() will provide a
|
||||
+ * sequence value of "@seq + 1" if no records were missed.
|
||||
+ *
|
||||
+ * It is safe to call this function from any context and state.
|
||||
+ */
|
||||
+void prb_iter_init(struct prb_iterator *iter, struct printk_ringbuffer *rb,
|
||||
+ u64 *seq)
|
||||
+{
|
||||
+ memset(iter, 0, sizeof(*iter));
|
||||
+ iter->rb = rb;
|
||||
+ iter->lpos = PRB_INIT;
|
||||
+
|
||||
+ if (!seq)
|
||||
+ return;
|
||||
+
|
||||
+ for (;;) {
|
||||
+ struct prb_iterator tmp_iter;
|
||||
+ int ret;
|
||||
+
|
||||
+ prb_iter_copy(&tmp_iter, iter);
|
||||
+
|
||||
+ ret = prb_iter_next(&tmp_iter, NULL, 0, seq);
|
||||
+ if (ret < 0)
|
||||
+ continue;
|
||||
+
|
||||
+ if (ret == 0)
|
||||
+ *seq = 0;
|
||||
+ else
|
||||
+ (*seq)--;
|
||||
+ break;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+static bool is_valid(struct printk_ringbuffer *rb, unsigned long lpos)
|
||||
+{
|
||||
+ unsigned long head, tail;
|
||||
+
|
||||
+ tail = atomic_long_read(&rb->tail);
|
||||
+ head = atomic_long_read(&rb->head);
|
||||
+ head -= tail;
|
||||
+ lpos -= tail;
|
||||
+
|
||||
+ if (lpos >= head)
|
||||
+ return false;
|
||||
+ return true;
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+ * prb_iter_data: Retrieve the record data at the current position.
|
||||
+ * @iter: Iterator tracking the current position.
|
||||
+ * @buf: A buffer to store the data of the record. May be NULL.
|
||||
+ * @size: The size of @buf. (Ignored if @buf is NULL.)
|
||||
+ * @seq: The sequence number of the record. May be NULL.
|
||||
+ *
|
||||
+ * If @iter is at a record, provide the data and/or sequence number of that
|
||||
+ * record (if specified by the caller).
|
||||
+ *
|
||||
+ * It is safe to call this function from any context and state.
|
||||
+ *
|
||||
+ * Returns >=0 if the current record contains valid data (returns 0 if @buf
|
||||
+ * is NULL or returns the size of the data block if @buf is non-NULL) or
|
||||
+ * -EINVAL if @iter is now invalid.
|
||||
+ */
|
||||
+int prb_iter_data(struct prb_iterator *iter, char *buf, int size, u64 *seq)
|
||||
+{
|
||||
+ struct printk_ringbuffer *rb = iter->rb;
|
||||
+ unsigned long lpos = iter->lpos;
|
||||
+ unsigned int datsize = 0;
|
||||
+ struct prb_entry *e;
|
||||
+
|
||||
+ if (buf || seq) {
|
||||
+ e = to_entry(rb, lpos);
|
||||
+ if (!is_valid(rb, lpos))
|
||||
+ return -EINVAL;
|
||||
+ /* memory barrier to ensure valid lpos */
|
||||
+ smp_rmb();
|
||||
+ if (buf) {
|
||||
+ datsize = PRB_DATA_SIZE(e);
|
||||
+ /* memory barrier to ensure load of datsize */
|
||||
+ smp_rmb();
|
||||
+ if (!is_valid(rb, lpos))
|
||||
+ return -EINVAL;
|
||||
+ if (PRB_INDEX(rb, lpos) + datsize >
|
||||
+ PRB_SIZE(rb) - PRB_DATA_ALIGN) {
|
||||
+ return -EINVAL;
|
||||
+ }
|
||||
+ if (size > datsize)
|
||||
+ size = datsize;
|
||||
+ memcpy(buf, &e->data[0], size);
|
||||
+ }
|
||||
+ if (seq)
|
||||
+ *seq = e->seq;
|
||||
+ /* memory barrier to ensure loads of entry data */
|
||||
+ smp_rmb();
|
||||
+ }
|
||||
+
|
||||
+ if (!is_valid(rb, lpos))
|
||||
+ return -EINVAL;
|
||||
+
|
||||
+ return datsize;
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+ * prb_iter_next: Advance to the next record.
|
||||
+ * @iter: Iterator tracking the current position.
|
||||
+ * @buf: A buffer to store the data of the next record. May be NULL.
|
||||
+ * @size: The size of @buf. (Ignored if @buf is NULL.)
|
||||
+ * @seq: The sequence number of the next record. May be NULL.
|
||||
+ *
|
||||
+ * If a next record is available, @iter is advanced and (if specified)
|
||||
+ * the data and/or sequence number of that record are provided.
|
||||
+ *
|
||||
+ * It is safe to call this function from any context and state.
|
||||
+ *
|
||||
+ * Returns 1 if @iter was advanced, 0 if @iter is at the end of the list, or
|
||||
+ * -EINVAL if @iter is now invalid.
|
||||
+ */
|
||||
+int prb_iter_next(struct prb_iterator *iter, char *buf, int size, u64 *seq)
|
||||
+{
|
||||
+ struct printk_ringbuffer *rb = iter->rb;
|
||||
+ unsigned long next_lpos;
|
||||
+ struct prb_entry *e;
|
||||
+ unsigned int esize;
|
||||
+
|
||||
+ if (iter->lpos == PRB_INIT) {
|
||||
+ next_lpos = atomic_long_read(&rb->tail);
|
||||
+ } else {
|
||||
+ if (!is_valid(rb, iter->lpos))
|
||||
+ return -EINVAL;
|
||||
+ /* memory barrier to ensure valid lpos */
|
||||
+ smp_rmb();
|
||||
+ e = to_entry(rb, iter->lpos);
|
||||
+ esize = e->size;
|
||||
+ /* memory barrier to ensure load of size */
|
||||
+ smp_rmb();
|
||||
+ if (!is_valid(rb, iter->lpos))
|
||||
+ return -EINVAL;
|
||||
+ next_lpos = iter->lpos + esize;
|
||||
+ }
|
||||
+ if (next_lpos == atomic_long_read(&rb->head))
|
||||
+ return 0;
|
||||
+ if (!is_valid(rb, next_lpos))
|
||||
+ return -EINVAL;
|
||||
+ /* memory barrier to ensure valid lpos */
|
||||
+ smp_rmb();
|
||||
+
|
||||
+ iter->lpos = next_lpos;
|
||||
+ e = to_entry(rb, iter->lpos);
|
||||
+ esize = e->size;
|
||||
+ /* memory barrier to ensure load of size */
|
||||
+ smp_rmb();
|
||||
+ if (!is_valid(rb, iter->lpos))
|
||||
+ return -EINVAL;
|
||||
+ if (esize == -1)
|
||||
+ iter->lpos = PRB_WRAP_LPOS(rb, iter->lpos, 1);
|
||||
+
|
||||
+ if (prb_iter_data(iter, buf, size, seq) < 0)
|
||||
+ return -EINVAL;
|
||||
+
|
||||
+ return 1;
|
||||
+}
|
@ -0,0 +1,161 @@
|
||||
From: John Ogness <john.ogness@linutronix.de>
|
||||
Date: Tue, 12 Feb 2019 15:29:44 +0100
|
||||
Subject: [PATCH 06/25] printk-rb: add blocking reader support
|
||||
|
||||
Add a blocking read function for readers. An irq_work function is
|
||||
used to signal the wait queue so that write notification can
|
||||
be triggered from any context.
|
||||
|
||||
Signed-off-by: John Ogness <john.ogness@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/printk_ringbuffer.h | 20 +++++++++++++
|
||||
lib/printk_ringbuffer.c | 55 ++++++++++++++++++++++++++++++++++++++
|
||||
2 files changed, 75 insertions(+)
|
||||
|
||||
--- a/include/linux/printk_ringbuffer.h
|
||||
+++ b/include/linux/printk_ringbuffer.h
|
||||
@@ -2,8 +2,10 @@
|
||||
#ifndef _LINUX_PRINTK_RINGBUFFER_H
|
||||
#define _LINUX_PRINTK_RINGBUFFER_H
|
||||
|
||||
+#include <linux/irq_work.h>
|
||||
#include <linux/atomic.h>
|
||||
#include <linux/percpu.h>
|
||||
+#include <linux/wait.h>
|
||||
|
||||
struct prb_cpulock {
|
||||
atomic_t owner;
|
||||
@@ -22,6 +24,10 @@ struct printk_ringbuffer {
|
||||
|
||||
struct prb_cpulock *cpulock;
|
||||
atomic_t ctx;
|
||||
+
|
||||
+ struct wait_queue_head *wq;
|
||||
+ atomic_long_t wq_counter;
|
||||
+ struct irq_work *wq_work;
|
||||
};
|
||||
|
||||
struct prb_entry {
|
||||
@@ -59,6 +65,15 @@ struct prb_iterator {
|
||||
#define DECLARE_STATIC_PRINTKRB(name, szbits, cpulockptr) \
|
||||
static char _##name##_buffer[1 << (szbits)] \
|
||||
__aligned(__alignof__(long)); \
|
||||
+static DECLARE_WAIT_QUEUE_HEAD(_##name##_wait); \
|
||||
+static void _##name##_wake_work_func(struct irq_work *irq_work) \
|
||||
+{ \
|
||||
+ wake_up_interruptible_all(&_##name##_wait); \
|
||||
+} \
|
||||
+static struct irq_work _##name##_wake_work = { \
|
||||
+ .func = _##name##_wake_work_func, \
|
||||
+ .flags = IRQ_WORK_LAZY, \
|
||||
+}; \
|
||||
static struct printk_ringbuffer name = { \
|
||||
.buffer = &_##name##_buffer[0], \
|
||||
.size_bits = szbits, \
|
||||
@@ -68,6 +83,9 @@ static struct printk_ringbuffer name = {
|
||||
.reserve = ATOMIC_LONG_INIT(-111 * sizeof(long)), \
|
||||
.cpulock = cpulockptr, \
|
||||
.ctx = ATOMIC_INIT(0), \
|
||||
+ .wq = &_##name##_wait, \
|
||||
+ .wq_counter = ATOMIC_LONG_INIT(0), \
|
||||
+ .wq_work = &_##name##_wake_work, \
|
||||
}
|
||||
|
||||
/* writer interface */
|
||||
@@ -80,6 +98,8 @@ void prb_iter_init(struct prb_iterator *
|
||||
u64 *seq);
|
||||
void prb_iter_copy(struct prb_iterator *dest, struct prb_iterator *src);
|
||||
int prb_iter_next(struct prb_iterator *iter, char *buf, int size, u64 *seq);
|
||||
+int prb_iter_wait_next(struct prb_iterator *iter, char *buf, int size,
|
||||
+ u64 *seq);
|
||||
int prb_iter_data(struct prb_iterator *iter, char *buf, int size, u64 *seq);
|
||||
|
||||
/* utility functions */
|
||||
--- a/lib/printk_ringbuffer.c
|
||||
+++ b/lib/printk_ringbuffer.c
|
||||
@@ -1,4 +1,5 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
+#include <linux/sched.h>
|
||||
#include <linux/smp.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/errno.h>
|
||||
@@ -154,6 +155,7 @@ static bool push_tail(struct printk_ring
|
||||
void prb_commit(struct prb_handle *h)
|
||||
{
|
||||
struct printk_ringbuffer *rb = h->rb;
|
||||
+ bool changed = false;
|
||||
struct prb_entry *e;
|
||||
unsigned long head;
|
||||
unsigned long res;
|
||||
@@ -175,6 +177,7 @@ void prb_commit(struct prb_handle *h)
|
||||
}
|
||||
e->seq = ++rb->seq;
|
||||
head += e->size;
|
||||
+ changed = true;
|
||||
}
|
||||
atomic_long_set_release(&rb->head, res);
|
||||
atomic_dec(&rb->ctx);
|
||||
@@ -185,6 +188,18 @@ void prb_commit(struct prb_handle *h)
|
||||
}
|
||||
|
||||
prb_unlock(rb->cpulock, h->cpu);
|
||||
+
|
||||
+ if (changed) {
|
||||
+ atomic_long_inc(&rb->wq_counter);
|
||||
+ if (wq_has_sleeper(rb->wq)) {
|
||||
+#ifdef CONFIG_IRQ_WORK
|
||||
+ irq_work_queue(rb->wq_work);
|
||||
+#else
|
||||
+ if (!in_nmi())
|
||||
+ wake_up_interruptible_all(rb->wq);
|
||||
+#endif
|
||||
+ }
|
||||
+ }
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -437,3 +452,43 @@ int prb_iter_next(struct prb_iterator *i
|
||||
|
||||
return 1;
|
||||
}
|
||||
+
|
||||
+/*
|
||||
+ * prb_iter_wait_next: Advance to the next record, blocking if none available.
|
||||
+ * @iter: Iterator tracking the current position.
|
||||
+ * @buf: A buffer to store the data of the next record. May be NULL.
|
||||
+ * @size: The size of @buf. (Ignored if @buf is NULL.)
|
||||
+ * @seq: The sequence number of the next record. May be NULL.
|
||||
+ *
|
||||
+ * If a next record is already available, this function works like
|
||||
+ * prb_iter_next(). Otherwise block interruptible until a next record is
|
||||
+ * available.
|
||||
+ *
|
||||
+ * When a next record is available, @iter is advanced and (if specified)
|
||||
+ * the data and/or sequence number of that record are provided.
|
||||
+ *
|
||||
+ * This function might sleep.
|
||||
+ *
|
||||
+ * Returns 1 if @iter was advanced, -EINVAL if @iter is now invalid, or
|
||||
+ * -ERESTARTSYS if interrupted by a signal.
|
||||
+ */
|
||||
+int prb_iter_wait_next(struct prb_iterator *iter, char *buf, int size, u64 *seq)
|
||||
+{
|
||||
+ unsigned long last_seen;
|
||||
+ int ret;
|
||||
+
|
||||
+ for (;;) {
|
||||
+ last_seen = atomic_long_read(&iter->rb->wq_counter);
|
||||
+
|
||||
+ ret = prb_iter_next(iter, buf, size, seq);
|
||||
+ if (ret != 0)
|
||||
+ break;
|
||||
+
|
||||
+ ret = wait_event_interruptible(*iter->rb->wq,
|
||||
+ last_seen != atomic_long_read(&iter->rb->wq_counter));
|
||||
+ if (ret < 0)
|
||||
+ break;
|
||||
+ }
|
||||
+
|
||||
+ return ret;
|
||||
+}
|
@ -0,0 +1,159 @@
|
||||
From: John Ogness <john.ogness@linutronix.de>
|
||||
Date: Tue, 12 Feb 2019 15:29:45 +0100
|
||||
Subject: [PATCH 07/25] printk-rb: add functionality required by printk
|
||||
|
||||
The printk subsystem needs to be able to query the size of the ring
|
||||
buffer, seek to specific entries within the ring buffer, and track
|
||||
if records could not be stored in the ring buffer.
|
||||
|
||||
Signed-off-by: John Ogness <john.ogness@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/printk_ringbuffer.h | 5 ++
|
||||
lib/printk_ringbuffer.c | 95 ++++++++++++++++++++++++++++++++++++++
|
||||
2 files changed, 100 insertions(+)
|
||||
|
||||
--- a/include/linux/printk_ringbuffer.h
|
||||
+++ b/include/linux/printk_ringbuffer.h
|
||||
@@ -17,6 +17,7 @@ struct printk_ringbuffer {
|
||||
unsigned int size_bits;
|
||||
|
||||
u64 seq;
|
||||
+ atomic_long_t lost;
|
||||
|
||||
atomic_long_t tail;
|
||||
atomic_long_t head;
|
||||
@@ -78,6 +79,7 @@ static struct printk_ringbuffer name = {
|
||||
.buffer = &_##name##_buffer[0], \
|
||||
.size_bits = szbits, \
|
||||
.seq = 0, \
|
||||
+ .lost = ATOMIC_LONG_INIT(0), \
|
||||
.tail = ATOMIC_LONG_INIT(-111 * sizeof(long)), \
|
||||
.head = ATOMIC_LONG_INIT(-111 * sizeof(long)), \
|
||||
.reserve = ATOMIC_LONG_INIT(-111 * sizeof(long)), \
|
||||
@@ -100,9 +102,12 @@ void prb_iter_copy(struct prb_iterator *
|
||||
int prb_iter_next(struct prb_iterator *iter, char *buf, int size, u64 *seq);
|
||||
int prb_iter_wait_next(struct prb_iterator *iter, char *buf, int size,
|
||||
u64 *seq);
|
||||
+int prb_iter_seek(struct prb_iterator *iter, u64 seq);
|
||||
int prb_iter_data(struct prb_iterator *iter, char *buf, int size, u64 *seq);
|
||||
|
||||
/* utility functions */
|
||||
+int prb_buffer_size(struct printk_ringbuffer *rb);
|
||||
+void prb_inc_lost(struct printk_ringbuffer *rb);
|
||||
void prb_lock(struct prb_cpulock *cpu_lock, unsigned int *cpu_store);
|
||||
void prb_unlock(struct prb_cpulock *cpu_lock, unsigned int cpu_store);
|
||||
|
||||
--- a/lib/printk_ringbuffer.c
|
||||
+++ b/lib/printk_ringbuffer.c
|
||||
@@ -175,11 +175,16 @@ void prb_commit(struct prb_handle *h)
|
||||
head = PRB_WRAP_LPOS(rb, head, 1);
|
||||
continue;
|
||||
}
|
||||
+ while (atomic_long_read(&rb->lost)) {
|
||||
+ atomic_long_dec(&rb->lost);
|
||||
+ rb->seq++;
|
||||
+ }
|
||||
e->seq = ++rb->seq;
|
||||
head += e->size;
|
||||
changed = true;
|
||||
}
|
||||
atomic_long_set_release(&rb->head, res);
|
||||
+
|
||||
atomic_dec(&rb->ctx);
|
||||
|
||||
if (atomic_long_read(&rb->reserve) == res)
|
||||
@@ -492,3 +497,93 @@ int prb_iter_wait_next(struct prb_iterat
|
||||
|
||||
return ret;
|
||||
}
|
||||
+
|
||||
+/*
|
||||
+ * prb_iter_seek: Seek forward to a specific record.
|
||||
+ * @iter: Iterator to advance.
|
||||
+ * @seq: Record number to advance to.
|
||||
+ *
|
||||
+ * Advance @iter such that a following call to prb_iter_data() will provide
|
||||
+ * the contents of the specified record. If a record is specified that does
|
||||
+ * not yet exist, advance @iter to the end of the record list.
|
||||
+ *
|
||||
+ * Note that iterators cannot be rewound. So if a record is requested that
|
||||
+ * exists but is previous to @iter in position, @iter is considered invalid.
|
||||
+ *
|
||||
+ * It is safe to call this function from any context and state.
|
||||
+ *
|
||||
+ * Returns 1 on succces, 0 if specified record does not yet exist (@iter is
|
||||
+ * now at the end of the list), or -EINVAL if @iter is now invalid.
|
||||
+ */
|
||||
+int prb_iter_seek(struct prb_iterator *iter, u64 seq)
|
||||
+{
|
||||
+ u64 cur_seq;
|
||||
+ int ret;
|
||||
+
|
||||
+ /* first check if the iterator is already at the wanted seq */
|
||||
+ if (seq == 0) {
|
||||
+ if (iter->lpos == PRB_INIT)
|
||||
+ return 1;
|
||||
+ else
|
||||
+ return -EINVAL;
|
||||
+ }
|
||||
+ if (iter->lpos != PRB_INIT) {
|
||||
+ if (prb_iter_data(iter, NULL, 0, &cur_seq) >= 0) {
|
||||
+ if (cur_seq == seq)
|
||||
+ return 1;
|
||||
+ if (cur_seq > seq)
|
||||
+ return -EINVAL;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ /* iterate to find the wanted seq */
|
||||
+ for (;;) {
|
||||
+ ret = prb_iter_next(iter, NULL, 0, &cur_seq);
|
||||
+ if (ret <= 0)
|
||||
+ break;
|
||||
+
|
||||
+ if (cur_seq == seq)
|
||||
+ break;
|
||||
+
|
||||
+ if (cur_seq > seq) {
|
||||
+ ret = -EINVAL;
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+ * prb_buffer_size: Get the size of the ring buffer.
|
||||
+ * @rb: The ring buffer to get the size of.
|
||||
+ *
|
||||
+ * Return the number of bytes used for the ring buffer entry storage area.
|
||||
+ * Note that this area stores both entry header and entry data. Therefore
|
||||
+ * this represents an upper bound to the amount of data that can be stored
|
||||
+ * in the ring buffer.
|
||||
+ *
|
||||
+ * It is safe to call this function from any context and state.
|
||||
+ *
|
||||
+ * Returns the size in bytes of the entry storage area.
|
||||
+ */
|
||||
+int prb_buffer_size(struct printk_ringbuffer *rb)
|
||||
+{
|
||||
+ return PRB_SIZE(rb);
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+ * prb_inc_lost: Increment the seq counter to signal a lost record.
|
||||
+ * @rb: The ring buffer to increment the seq of.
|
||||
+ *
|
||||
+ * Increment the seq counter so that a seq number is intentially missing
|
||||
+ * for the readers. This allows readers to identify that a record is
|
||||
+ * missing. A writer will typically use this function if prb_reserve()
|
||||
+ * fails.
|
||||
+ *
|
||||
+ * It is safe to call this function from any context and state.
|
||||
+ */
|
||||
+void prb_inc_lost(struct printk_ringbuffer *rb)
|
||||
+{
|
||||
+ atomic_long_inc(&rb->lost);
|
||||
+}
|
@ -0,0 +1,168 @@
|
||||
From: John Ogness <john.ogness@linutronix.de>
|
||||
Date: Tue, 12 Feb 2019 15:29:46 +0100
|
||||
Subject: [PATCH 08/25] printk: add ring buffer and kthread
|
||||
|
||||
The printk ring buffer provides an NMI-safe interface for writing
|
||||
messages to a ring buffer. Using such a buffer for alleviates printk
|
||||
callers from the current burdens of disabled preemption while calling
|
||||
the console drivers (and possibly printing out many messages that
|
||||
another task put into the log buffer).
|
||||
|
||||
Create a ring buffer to be used for storing messages to be
|
||||
printed to the consoles.
|
||||
|
||||
Create a dedicated printk kthread to block on the ring buffer
|
||||
and call the console drivers for the read messages.
|
||||
|
||||
NOTE: The printk_delay is relocated to _after_ the message is
|
||||
printed, where it makes more sense.
|
||||
|
||||
Signed-off-by: John Ogness <john.ogness@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
kernel/printk/printk.c | 105 +++++++++++++++++++++++++++++++++++++++++++++++++
|
||||
1 file changed, 105 insertions(+)
|
||||
|
||||
--- a/kernel/printk/printk.c
|
||||
+++ b/kernel/printk/printk.c
|
||||
@@ -45,6 +45,8 @@
|
||||
#include <linux/irq_work.h>
|
||||
#include <linux/ctype.h>
|
||||
#include <linux/uio.h>
|
||||
+#include <linux/kthread.h>
|
||||
+#include <linux/printk_ringbuffer.h>
|
||||
#include <linux/sched/clock.h>
|
||||
#include <linux/sched/debug.h>
|
||||
#include <linux/sched/task_stack.h>
|
||||
@@ -417,7 +419,12 @@ DEFINE_RAW_SPINLOCK(logbuf_lock);
|
||||
printk_safe_exit_irqrestore(flags); \
|
||||
} while (0)
|
||||
|
||||
+DECLARE_STATIC_PRINTKRB_CPULOCK(printk_cpulock);
|
||||
+
|
||||
#ifdef CONFIG_PRINTK
|
||||
+/* record buffer */
|
||||
+DECLARE_STATIC_PRINTKRB(printk_rb, CONFIG_LOG_BUF_SHIFT, &printk_cpulock);
|
||||
+
|
||||
DECLARE_WAIT_QUEUE_HEAD(log_wait);
|
||||
/* the next printk record to read by syslog(READ) or /proc/kmsg */
|
||||
static u64 syslog_seq;
|
||||
@@ -780,6 +787,10 @@ static ssize_t msg_print_ext_body(char *
|
||||
return p - buf;
|
||||
}
|
||||
|
||||
+#define PRINTK_SPRINT_MAX (LOG_LINE_MAX + PREFIX_MAX)
|
||||
+#define PRINTK_RECORD_MAX (sizeof(struct printk_log) + \
|
||||
+ CONSOLE_EXT_LOG_MAX + PRINTK_SPRINT_MAX)
|
||||
+
|
||||
/* /dev/kmsg - userspace message inject/listen interface */
|
||||
struct devkmsg_user {
|
||||
u64 seq;
|
||||
@@ -1620,6 +1631,34 @@ SYSCALL_DEFINE3(syslog, int, type, char
|
||||
return do_syslog(type, buf, len, SYSLOG_FROM_READER);
|
||||
}
|
||||
|
||||
+static void format_text(struct printk_log *msg, u64 seq,
|
||||
+ char *ext_text, size_t *ext_len,
|
||||
+ char *text, size_t *len, bool time)
|
||||
+{
|
||||
+ if (suppress_message_printing(msg->level)) {
|
||||
+ /*
|
||||
+ * Skip record that has level above the console
|
||||
+ * loglevel and update each console's local seq.
|
||||
+ */
|
||||
+ *len = 0;
|
||||
+ *ext_len = 0;
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ *len = msg_print_text(msg, console_msg_format & MSG_FORMAT_SYSLOG,
|
||||
+ time, text, PRINTK_SPRINT_MAX);
|
||||
+ if (nr_ext_console_drivers) {
|
||||
+ *ext_len = msg_print_ext_header(ext_text, CONSOLE_EXT_LOG_MAX,
|
||||
+ msg, seq);
|
||||
+ *ext_len += msg_print_ext_body(ext_text + *ext_len,
|
||||
+ CONSOLE_EXT_LOG_MAX - *ext_len,
|
||||
+ log_dict(msg), msg->dict_len,
|
||||
+ log_text(msg), msg->text_len);
|
||||
+ } else {
|
||||
+ *ext_len = 0;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* Special console_lock variants that help to reduce the risk of soft-lockups.
|
||||
* They allow to pass console_lock to another printk() call using a busy wait.
|
||||
@@ -2974,6 +3013,72 @@ void wake_up_klogd(void)
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
+static int printk_kthread_func(void *data)
|
||||
+{
|
||||
+ struct prb_iterator iter;
|
||||
+ struct printk_log *msg;
|
||||
+ size_t ext_len;
|
||||
+ char *ext_text;
|
||||
+ u64 master_seq;
|
||||
+ size_t len;
|
||||
+ char *text;
|
||||
+ char *buf;
|
||||
+ int ret;
|
||||
+
|
||||
+ ext_text = kmalloc(CONSOLE_EXT_LOG_MAX, GFP_KERNEL);
|
||||
+ text = kmalloc(PRINTK_SPRINT_MAX, GFP_KERNEL);
|
||||
+ buf = kmalloc(PRINTK_RECORD_MAX, GFP_KERNEL);
|
||||
+ if (!ext_text || !text || !buf)
|
||||
+ return -1;
|
||||
+
|
||||
+ prb_iter_init(&iter, &printk_rb, NULL);
|
||||
+
|
||||
+ /* the printk kthread never exits */
|
||||
+ for (;;) {
|
||||
+ ret = prb_iter_wait_next(&iter, buf,
|
||||
+ PRINTK_RECORD_MAX, &master_seq);
|
||||
+ if (ret == -ERESTARTSYS) {
|
||||
+ continue;
|
||||
+ } else if (ret < 0) {
|
||||
+ /* iterator invalid, start over */
|
||||
+ prb_iter_init(&iter, &printk_rb, NULL);
|
||||
+ continue;
|
||||
+ }
|
||||
+
|
||||
+ msg = (struct printk_log *)buf;
|
||||
+ format_text(msg, master_seq, ext_text, &ext_len, text,
|
||||
+ &len, printk_time);
|
||||
+
|
||||
+ console_lock();
|
||||
+ if (len > 0 || ext_len > 0) {
|
||||
+ call_console_drivers(ext_text, ext_len, text, len);
|
||||
+ boot_delay_msec(msg->level);
|
||||
+ printk_delay();
|
||||
+ }
|
||||
+ console_unlock();
|
||||
+ }
|
||||
+
|
||||
+ kfree(ext_text);
|
||||
+ kfree(text);
|
||||
+ kfree(buf);
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static int __init init_printk_kthread(void)
|
||||
+{
|
||||
+ struct task_struct *thread;
|
||||
+
|
||||
+ thread = kthread_run(printk_kthread_func, NULL, "printk");
|
||||
+ if (IS_ERR(thread)) {
|
||||
+ pr_err("printk: unable to create printing thread\n");
|
||||
+ return PTR_ERR(thread);
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+late_initcall(init_printk_kthread);
|
||||
+
|
||||
void defer_console_output(void)
|
||||
{
|
||||
preempt_disable();
|
@ -0,0 +1,101 @@
|
||||
From: John Ogness <john.ogness@linutronix.de>
|
||||
Date: Tue, 12 Feb 2019 15:29:47 +0100
|
||||
Subject: [PATCH 09/25] printk: remove exclusive console hack
|
||||
|
||||
In order to support printing the printk log history when new
|
||||
consoles are registered, a global exclusive_console variable is
|
||||
temporarily set. This only works because printk runs with
|
||||
preemption disabled.
|
||||
|
||||
When console printing is moved to a fully preemptible dedicated
|
||||
kthread, this hack no longer works.
|
||||
|
||||
Remove exclusive_console usage.
|
||||
|
||||
Signed-off-by: John Ogness <john.ogness@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
kernel/printk/printk.c | 30 ++++--------------------------
|
||||
1 file changed, 4 insertions(+), 26 deletions(-)
|
||||
|
||||
--- a/kernel/printk/printk.c
|
||||
+++ b/kernel/printk/printk.c
|
||||
@@ -269,11 +269,6 @@ static void __up_console_sem(unsigned lo
|
||||
static int console_locked, console_suspended;
|
||||
|
||||
/*
|
||||
- * If exclusive_console is non-NULL then only this console is to be printed to.
|
||||
- */
|
||||
-static struct console *exclusive_console;
|
||||
-
|
||||
-/*
|
||||
* Array of consoles built from command line options (console=)
|
||||
*/
|
||||
|
||||
@@ -443,7 +438,6 @@ static u32 log_next_idx;
|
||||
/* the next printk record to write to the console */
|
||||
static u64 console_seq;
|
||||
static u32 console_idx;
|
||||
-static u64 exclusive_console_stop_seq;
|
||||
|
||||
/* the next printk record to read after the last 'clear' command */
|
||||
static u64 clear_seq;
|
||||
@@ -1815,8 +1809,6 @@ static void call_console_drivers(const c
|
||||
return;
|
||||
|
||||
for_each_console(con) {
|
||||
- if (exclusive_console && con != exclusive_console)
|
||||
- continue;
|
||||
if (!(con->flags & CON_ENABLED))
|
||||
continue;
|
||||
if (!con->write)
|
||||
@@ -2109,7 +2101,6 @@ static u64 syslog_seq;
|
||||
static u32 syslog_idx;
|
||||
static u64 console_seq;
|
||||
static u32 console_idx;
|
||||
-static u64 exclusive_console_stop_seq;
|
||||
static u64 log_first_seq;
|
||||
static u32 log_first_idx;
|
||||
static u64 log_next_seq;
|
||||
@@ -2478,12 +2469,6 @@ void console_unlock(void)
|
||||
goto skip;
|
||||
}
|
||||
|
||||
- /* Output to all consoles once old messages replayed. */
|
||||
- if (unlikely(exclusive_console &&
|
||||
- console_seq >= exclusive_console_stop_seq)) {
|
||||
- exclusive_console = NULL;
|
||||
- }
|
||||
-
|
||||
len += msg_print_text(msg,
|
||||
console_msg_format & MSG_FORMAT_SYSLOG,
|
||||
printk_time, text + len, sizeof(text) - len);
|
||||
@@ -2809,17 +2794,6 @@ void register_console(struct console *ne
|
||||
* for us.
|
||||
*/
|
||||
logbuf_lock_irqsave(flags);
|
||||
- /*
|
||||
- * We're about to replay the log buffer. Only do this to the
|
||||
- * just-registered console to avoid excessive message spam to
|
||||
- * the already-registered consoles.
|
||||
- *
|
||||
- * Set exclusive_console with disabled interrupts to reduce
|
||||
- * race window with eventual console_flush_on_panic() that
|
||||
- * ignores console_lock.
|
||||
- */
|
||||
- exclusive_console = newcon;
|
||||
- exclusive_console_stop_seq = console_seq;
|
||||
console_seq = syslog_seq;
|
||||
console_idx = syslog_idx;
|
||||
logbuf_unlock_irqrestore(flags);
|
||||
@@ -2833,6 +2807,10 @@ void register_console(struct console *ne
|
||||
* boot consoles, real consoles, etc - this is to ensure that end
|
||||
* users know there might be something in the kernel's log buffer that
|
||||
* went to the bootconsole (that they do not see on the real console)
|
||||
+ *
|
||||
+ * This message is also important because it will trigger the
|
||||
+ * printk kthread to begin dumping the log buffer to the newly
|
||||
+ * registered console.
|
||||
*/
|
||||
pr_info("%sconsole [%s%d] enabled\n",
|
||||
(newcon->flags & CON_BOOT) ? "boot" : "" ,
|
@ -0,0 +1,437 @@
|
||||
From: John Ogness <john.ogness@linutronix.de>
|
||||
Date: Tue, 12 Feb 2019 15:29:48 +0100
|
||||
Subject: [PATCH 10/25] printk: redirect emit/store to new ringbuffer
|
||||
|
||||
vprintk_emit and vprintk_store are the main functions that all printk
|
||||
variants eventually go through. Change these to store the message in
|
||||
the new printk ring buffer that the printk kthread is reading.
|
||||
|
||||
Remove functions no longer in use because of the changes to
|
||||
vprintk_emit and vprintk_store.
|
||||
|
||||
In order to handle interrupts and NMIs, a second per-cpu ring buffer
|
||||
(sprint_rb) is added. This ring buffer is used for NMI-safe memory
|
||||
allocation in order to format the printk messages.
|
||||
|
||||
NOTE: LOG_CONT is ignored for now and handled as individual messages.
|
||||
LOG_CONT functions are masked behind "#if 0" blocks until their
|
||||
functionality can be restored
|
||||
|
||||
Signed-off-by: John Ogness <john.ogness@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
kernel/printk/printk.c | 325 +++++++------------------------------------------
|
||||
1 file changed, 51 insertions(+), 274 deletions(-)
|
||||
|
||||
--- a/kernel/printk/printk.c
|
||||
+++ b/kernel/printk/printk.c
|
||||
@@ -517,90 +517,6 @@ static u32 log_next(u32 idx)
|
||||
return idx + msg->len;
|
||||
}
|
||||
|
||||
-/*
|
||||
- * Check whether there is enough free space for the given message.
|
||||
- *
|
||||
- * The same values of first_idx and next_idx mean that the buffer
|
||||
- * is either empty or full.
|
||||
- *
|
||||
- * If the buffer is empty, we must respect the position of the indexes.
|
||||
- * They cannot be reset to the beginning of the buffer.
|
||||
- */
|
||||
-static int logbuf_has_space(u32 msg_size, bool empty)
|
||||
-{
|
||||
- u32 free;
|
||||
-
|
||||
- if (log_next_idx > log_first_idx || empty)
|
||||
- free = max(log_buf_len - log_next_idx, log_first_idx);
|
||||
- else
|
||||
- free = log_first_idx - log_next_idx;
|
||||
-
|
||||
- /*
|
||||
- * We need space also for an empty header that signalizes wrapping
|
||||
- * of the buffer.
|
||||
- */
|
||||
- return free >= msg_size + sizeof(struct printk_log);
|
||||
-}
|
||||
-
|
||||
-static int log_make_free_space(u32 msg_size)
|
||||
-{
|
||||
- while (log_first_seq < log_next_seq &&
|
||||
- !logbuf_has_space(msg_size, false)) {
|
||||
- /* drop old messages until we have enough contiguous space */
|
||||
- log_first_idx = log_next(log_first_idx);
|
||||
- log_first_seq++;
|
||||
- }
|
||||
-
|
||||
- if (clear_seq < log_first_seq) {
|
||||
- clear_seq = log_first_seq;
|
||||
- clear_idx = log_first_idx;
|
||||
- }
|
||||
-
|
||||
- /* sequence numbers are equal, so the log buffer is empty */
|
||||
- if (logbuf_has_space(msg_size, log_first_seq == log_next_seq))
|
||||
- return 0;
|
||||
-
|
||||
- return -ENOMEM;
|
||||
-}
|
||||
-
|
||||
-/* compute the message size including the padding bytes */
|
||||
-static u32 msg_used_size(u16 text_len, u16 dict_len, u32 *pad_len)
|
||||
-{
|
||||
- u32 size;
|
||||
-
|
||||
- size = sizeof(struct printk_log) + text_len + dict_len;
|
||||
- *pad_len = (-size) & (LOG_ALIGN - 1);
|
||||
- size += *pad_len;
|
||||
-
|
||||
- return size;
|
||||
-}
|
||||
-
|
||||
-/*
|
||||
- * Define how much of the log buffer we could take at maximum. The value
|
||||
- * must be greater than two. Note that only half of the buffer is available
|
||||
- * when the index points to the middle.
|
||||
- */
|
||||
-#define MAX_LOG_TAKE_PART 4
|
||||
-static const char trunc_msg[] = "<truncated>";
|
||||
-
|
||||
-static u32 truncate_msg(u16 *text_len, u16 *trunc_msg_len,
|
||||
- u16 *dict_len, u32 *pad_len)
|
||||
-{
|
||||
- /*
|
||||
- * The message should not take the whole buffer. Otherwise, it might
|
||||
- * get removed too soon.
|
||||
- */
|
||||
- u32 max_text_len = log_buf_len / MAX_LOG_TAKE_PART;
|
||||
- if (*text_len > max_text_len)
|
||||
- *text_len = max_text_len;
|
||||
- /* enable the warning message */
|
||||
- *trunc_msg_len = strlen(trunc_msg);
|
||||
- /* disable the "dict" completely */
|
||||
- *dict_len = 0;
|
||||
- /* compute the size again, count also the warning message */
|
||||
- return msg_used_size(*text_len + *trunc_msg_len, 0, pad_len);
|
||||
-}
|
||||
-
|
||||
/* insert record into the buffer, discard old ones, update heads */
|
||||
static int log_store(u32 caller_id, int facility, int level,
|
||||
enum log_flags flags, u64 ts_nsec,
|
||||
@@ -608,57 +524,39 @@ static int log_store(u32 caller_id, int
|
||||
const char *text, u16 text_len)
|
||||
{
|
||||
struct printk_log *msg;
|
||||
- u32 size, pad_len;
|
||||
- u16 trunc_msg_len = 0;
|
||||
-
|
||||
- /* number of '\0' padding bytes to next message */
|
||||
- size = msg_used_size(text_len, dict_len, &pad_len);
|
||||
+ struct prb_handle h;
|
||||
+ char *rbuf;
|
||||
+ u32 size;
|
||||
|
||||
- if (log_make_free_space(size)) {
|
||||
- /* truncate the message if it is too long for empty buffer */
|
||||
- size = truncate_msg(&text_len, &trunc_msg_len,
|
||||
- &dict_len, &pad_len);
|
||||
- /* survive when the log buffer is too small for trunc_msg */
|
||||
- if (log_make_free_space(size))
|
||||
- return 0;
|
||||
- }
|
||||
+ size = sizeof(*msg) + text_len + dict_len;
|
||||
|
||||
- if (log_next_idx + size + sizeof(struct printk_log) > log_buf_len) {
|
||||
+ rbuf = prb_reserve(&h, &printk_rb, size);
|
||||
+ if (!rbuf) {
|
||||
/*
|
||||
- * This message + an additional empty header does not fit
|
||||
- * at the end of the buffer. Add an empty header with len == 0
|
||||
- * to signify a wrap around.
|
||||
+ * An emergency message would have been printed, but
|
||||
+ * it cannot be stored in the log.
|
||||
*/
|
||||
- memset(log_buf + log_next_idx, 0, sizeof(struct printk_log));
|
||||
- log_next_idx = 0;
|
||||
+ prb_inc_lost(&printk_rb);
|
||||
+ return 0;
|
||||
}
|
||||
|
||||
/* fill message */
|
||||
- msg = (struct printk_log *)(log_buf + log_next_idx);
|
||||
+ msg = (struct printk_log *)rbuf;
|
||||
memcpy(log_text(msg), text, text_len);
|
||||
msg->text_len = text_len;
|
||||
- if (trunc_msg_len) {
|
||||
- memcpy(log_text(msg) + text_len, trunc_msg, trunc_msg_len);
|
||||
- msg->text_len += trunc_msg_len;
|
||||
- }
|
||||
memcpy(log_dict(msg), dict, dict_len);
|
||||
msg->dict_len = dict_len;
|
||||
msg->facility = facility;
|
||||
msg->level = level & 7;
|
||||
msg->flags = flags & 0x1f;
|
||||
- if (ts_nsec > 0)
|
||||
- msg->ts_nsec = ts_nsec;
|
||||
- else
|
||||
- msg->ts_nsec = local_clock();
|
||||
+ msg->ts_nsec = ts_nsec;
|
||||
#ifdef CONFIG_PRINTK_CALLER
|
||||
msg->caller_id = caller_id;
|
||||
#endif
|
||||
- memset(log_dict(msg) + dict_len, 0, pad_len);
|
||||
msg->len = size;
|
||||
|
||||
/* insert message */
|
||||
- log_next_idx += msg->len;
|
||||
- log_next_seq++;
|
||||
+ prb_commit(&h);
|
||||
|
||||
return msg->text_len;
|
||||
}
|
||||
@@ -1729,70 +1627,6 @@ static int console_lock_spinning_disable
|
||||
return 1;
|
||||
}
|
||||
|
||||
-/**
|
||||
- * console_trylock_spinning - try to get console_lock by busy waiting
|
||||
- *
|
||||
- * This allows to busy wait for the console_lock when the current
|
||||
- * owner is running in specially marked sections. It means that
|
||||
- * the current owner is running and cannot reschedule until it
|
||||
- * is ready to lose the lock.
|
||||
- *
|
||||
- * Return: 1 if we got the lock, 0 othrewise
|
||||
- */
|
||||
-static int console_trylock_spinning(void)
|
||||
-{
|
||||
- struct task_struct *owner = NULL;
|
||||
- bool waiter;
|
||||
- bool spin = false;
|
||||
- unsigned long flags;
|
||||
-
|
||||
- if (console_trylock())
|
||||
- return 1;
|
||||
-
|
||||
- printk_safe_enter_irqsave(flags);
|
||||
-
|
||||
- raw_spin_lock(&console_owner_lock);
|
||||
- owner = READ_ONCE(console_owner);
|
||||
- waiter = READ_ONCE(console_waiter);
|
||||
- if (!waiter && owner && owner != current) {
|
||||
- WRITE_ONCE(console_waiter, true);
|
||||
- spin = true;
|
||||
- }
|
||||
- raw_spin_unlock(&console_owner_lock);
|
||||
-
|
||||
- /*
|
||||
- * If there is an active printk() writing to the
|
||||
- * consoles, instead of having it write our data too,
|
||||
- * see if we can offload that load from the active
|
||||
- * printer, and do some printing ourselves.
|
||||
- * Go into a spin only if there isn't already a waiter
|
||||
- * spinning, and there is an active printer, and
|
||||
- * that active printer isn't us (recursive printk?).
|
||||
- */
|
||||
- if (!spin) {
|
||||
- printk_safe_exit_irqrestore(flags);
|
||||
- return 0;
|
||||
- }
|
||||
-
|
||||
- /* We spin waiting for the owner to release us */
|
||||
- spin_acquire(&console_owner_dep_map, 0, 0, _THIS_IP_);
|
||||
- /* Owner will clear console_waiter on hand off */
|
||||
- while (READ_ONCE(console_waiter))
|
||||
- cpu_relax();
|
||||
- spin_release(&console_owner_dep_map, 1, _THIS_IP_);
|
||||
-
|
||||
- printk_safe_exit_irqrestore(flags);
|
||||
- /*
|
||||
- * The owner passed the console lock to us.
|
||||
- * Since we did not spin on console lock, annotate
|
||||
- * this as a trylock. Otherwise lockdep will
|
||||
- * complain.
|
||||
- */
|
||||
- mutex_acquire(&console_lock_dep_map, 0, 1, _THIS_IP_);
|
||||
-
|
||||
- return 1;
|
||||
-}
|
||||
-
|
||||
/*
|
||||
* Call the console drivers, asking them to write out
|
||||
* log_buf[start] to log_buf[end - 1].
|
||||
@@ -1813,7 +1647,7 @@ static void call_console_drivers(const c
|
||||
continue;
|
||||
if (!con->write)
|
||||
continue;
|
||||
- if (!cpu_online(smp_processor_id()) &&
|
||||
+ if (!cpu_online(raw_smp_processor_id()) &&
|
||||
!(con->flags & CON_ANYTIME))
|
||||
continue;
|
||||
if (con->flags & CON_EXTENDED)
|
||||
@@ -1843,6 +1677,8 @@ static inline u32 printk_caller_id(void)
|
||||
0x80000000 + raw_smp_processor_id();
|
||||
}
|
||||
|
||||
+/* FIXME: no support for LOG_CONT */
|
||||
+#if 0
|
||||
/*
|
||||
* Continuation lines are buffered, and not committed to the record buffer
|
||||
* until the line is complete, or a race forces it. The line fragments
|
||||
@@ -1898,56 +1734,45 @@ static bool cont_add(u32 caller_id, int
|
||||
|
||||
return true;
|
||||
}
|
||||
+#endif /* 0 */
|
||||
|
||||
-static size_t log_output(int facility, int level, enum log_flags lflags, const char *dict, size_t dictlen, char *text, size_t text_len)
|
||||
-{
|
||||
- const u32 caller_id = printk_caller_id();
|
||||
-
|
||||
- /*
|
||||
- * If an earlier line was buffered, and we're a continuation
|
||||
- * write from the same context, try to add it to the buffer.
|
||||
- */
|
||||
- if (cont.len) {
|
||||
- if (cont.caller_id == caller_id && (lflags & LOG_CONT)) {
|
||||
- if (cont_add(caller_id, facility, level, lflags, text, text_len))
|
||||
- return text_len;
|
||||
- }
|
||||
- /* Otherwise, make sure it's flushed */
|
||||
- cont_flush();
|
||||
- }
|
||||
-
|
||||
- /* Skip empty continuation lines that couldn't be added - they just flush */
|
||||
- if (!text_len && (lflags & LOG_CONT))
|
||||
- return 0;
|
||||
-
|
||||
- /* If it doesn't end in a newline, try to buffer the current line */
|
||||
- if (!(lflags & LOG_NEWLINE)) {
|
||||
- if (cont_add(caller_id, facility, level, lflags, text, text_len))
|
||||
- return text_len;
|
||||
- }
|
||||
-
|
||||
- /* Store it in the record log */
|
||||
- return log_store(caller_id, facility, level, lflags, 0,
|
||||
- dict, dictlen, text, text_len);
|
||||
-}
|
||||
-
|
||||
-/* Must be called under logbuf_lock. */
|
||||
int vprintk_store(int facility, int level,
|
||||
const char *dict, size_t dictlen,
|
||||
const char *fmt, va_list args)
|
||||
{
|
||||
- static char textbuf[LOG_LINE_MAX];
|
||||
- char *text = textbuf;
|
||||
- size_t text_len;
|
||||
+ return vprintk_emit(facility, level, dict, dictlen, fmt, args);
|
||||
+}
|
||||
+
|
||||
+/* ring buffer used as memory allocator for temporary sprint buffers */
|
||||
+DECLARE_STATIC_PRINTKRB(sprint_rb,
|
||||
+ ilog2(PRINTK_RECORD_MAX + sizeof(struct prb_entry) +
|
||||
+ sizeof(long)) + 2, &printk_cpulock);
|
||||
+
|
||||
+asmlinkage int vprintk_emit(int facility, int level,
|
||||
+ const char *dict, size_t dictlen,
|
||||
+ const char *fmt, va_list args)
|
||||
+{
|
||||
+ const u32 caller_id = printk_caller_id();
|
||||
enum log_flags lflags = 0;
|
||||
+ int printed_len = 0;
|
||||
+ struct prb_handle h;
|
||||
+ size_t text_len;
|
||||
+ u64 ts_nsec;
|
||||
+ char *text;
|
||||
+ char *rbuf;
|
||||
|
||||
- /*
|
||||
- * The printf needs to come first; we need the syslog
|
||||
- * prefix which might be passed-in as a parameter.
|
||||
- */
|
||||
- text_len = vscnprintf(text, sizeof(textbuf), fmt, args);
|
||||
+ ts_nsec = local_clock();
|
||||
+
|
||||
+ rbuf = prb_reserve(&h, &sprint_rb, PRINTK_SPRINT_MAX);
|
||||
+ if (!rbuf) {
|
||||
+ prb_inc_lost(&printk_rb);
|
||||
+ return printed_len;
|
||||
+ }
|
||||
+
|
||||
+ text = rbuf;
|
||||
+ text_len = vscnprintf(text, PRINTK_SPRINT_MAX, fmt, args);
|
||||
|
||||
- /* mark and strip a trailing newline */
|
||||
+ /* strip and flag a trailing newline */
|
||||
if (text_len && text[text_len-1] == '\n') {
|
||||
text_len--;
|
||||
lflags |= LOG_NEWLINE;
|
||||
@@ -1978,58 +1803,10 @@ int vprintk_store(int facility, int leve
|
||||
if (dict)
|
||||
lflags |= LOG_NEWLINE;
|
||||
|
||||
- return log_output(facility, level, lflags,
|
||||
- dict, dictlen, text, text_len);
|
||||
-}
|
||||
-
|
||||
-asmlinkage int vprintk_emit(int facility, int level,
|
||||
- const char *dict, size_t dictlen,
|
||||
- const char *fmt, va_list args)
|
||||
-{
|
||||
- int printed_len;
|
||||
- bool in_sched = false, pending_output;
|
||||
- unsigned long flags;
|
||||
- u64 curr_log_seq;
|
||||
-
|
||||
- /* Suppress unimportant messages after panic happens */
|
||||
- if (unlikely(suppress_printk))
|
||||
- return 0;
|
||||
-
|
||||
- if (level == LOGLEVEL_SCHED) {
|
||||
- level = LOGLEVEL_DEFAULT;
|
||||
- in_sched = true;
|
||||
- }
|
||||
-
|
||||
- boot_delay_msec(level);
|
||||
- printk_delay();
|
||||
-
|
||||
- /* This stops the holder of console_sem just where we want him */
|
||||
- logbuf_lock_irqsave(flags);
|
||||
- curr_log_seq = log_next_seq;
|
||||
- printed_len = vprintk_store(facility, level, dict, dictlen, fmt, args);
|
||||
- pending_output = (curr_log_seq != log_next_seq);
|
||||
- logbuf_unlock_irqrestore(flags);
|
||||
-
|
||||
- /* If called from the scheduler, we can not call up(). */
|
||||
- if (!in_sched && pending_output) {
|
||||
- /*
|
||||
- * Disable preemption to avoid being preempted while holding
|
||||
- * console_sem which would prevent anyone from printing to
|
||||
- * console
|
||||
- */
|
||||
- preempt_disable();
|
||||
- /*
|
||||
- * Try to acquire and then immediately release the console
|
||||
- * semaphore. The release will print out buffers and wake up
|
||||
- * /dev/kmsg and syslog() users.
|
||||
- */
|
||||
- if (console_trylock_spinning())
|
||||
- console_unlock();
|
||||
- preempt_enable();
|
||||
- }
|
||||
+ printed_len = log_store(caller_id, facility, level, lflags, ts_nsec,
|
||||
+ dict, dictlen, text, text_len);
|
||||
|
||||
- if (pending_output)
|
||||
- wake_up_klogd();
|
||||
+ prb_commit(&h);
|
||||
return printed_len;
|
||||
}
|
||||
EXPORT_SYMBOL(vprintk_emit);
|
||||
@@ -2494,7 +2271,7 @@ void console_unlock(void)
|
||||
console_lock_spinning_enable();
|
||||
|
||||
stop_critical_timings(); /* don't trace print latency */
|
||||
- call_console_drivers(ext_text, ext_len, text, len);
|
||||
+ //call_console_drivers(ext_text, ext_len, text, len);
|
||||
start_critical_timings();
|
||||
|
||||
if (console_lock_spinning_disable_and_check()) {
|
@ -0,0 +1,699 @@
|
||||
From: John Ogness <john.ogness@linutronix.de>
|
||||
Date: Tue, 12 Feb 2019 15:29:49 +0100
|
||||
Subject: [PATCH 11/25] printk_safe: remove printk safe code
|
||||
|
||||
vprintk variants are now NMI-safe so there is no longer a need for
|
||||
the "safe" calls.
|
||||
|
||||
NOTE: This also removes printk flushing functionality.
|
||||
|
||||
Signed-off-by: John Ogness <john.ogness@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
arch/powerpc/kernel/traps.c | 1
|
||||
arch/powerpc/kernel/watchdog.c | 5
|
||||
include/linux/hardirq.h | 2
|
||||
include/linux/printk.h | 27 --
|
||||
init/main.c | 1
|
||||
kernel/kexec_core.c | 1
|
||||
kernel/panic.c | 3
|
||||
kernel/printk/Makefile | 1
|
||||
kernel/printk/internal.h | 30 --
|
||||
kernel/printk/printk.c | 13 -
|
||||
kernel/printk/printk_safe.c | 415 -----------------------------------------
|
||||
kernel/trace/trace.c | 2
|
||||
lib/nmi_backtrace.c | 6
|
||||
13 files changed, 7 insertions(+), 500 deletions(-)
|
||||
delete mode 100644 kernel/printk/printk_safe.c
|
||||
|
||||
--- a/arch/powerpc/kernel/traps.c
|
||||
+++ b/arch/powerpc/kernel/traps.c
|
||||
@@ -171,7 +171,6 @@ extern void panic_flush_kmsg_start(void)
|
||||
|
||||
extern void panic_flush_kmsg_end(void)
|
||||
{
|
||||
- printk_safe_flush_on_panic();
|
||||
kmsg_dump(KMSG_DUMP_PANIC);
|
||||
bust_spinlocks(0);
|
||||
debug_locks_off();
|
||||
--- a/arch/powerpc/kernel/watchdog.c
|
||||
+++ b/arch/powerpc/kernel/watchdog.c
|
||||
@@ -181,11 +181,6 @@ static void watchdog_smp_panic(int cpu,
|
||||
|
||||
wd_smp_unlock(&flags);
|
||||
|
||||
- printk_safe_flush();
|
||||
- /*
|
||||
- * printk_safe_flush() seems to require another print
|
||||
- * before anything actually goes out to console.
|
||||
- */
|
||||
if (sysctl_hardlockup_all_cpu_backtrace)
|
||||
trigger_allbutself_cpu_backtrace();
|
||||
|
||||
--- a/include/linux/hardirq.h
|
||||
+++ b/include/linux/hardirq.h
|
||||
@@ -68,7 +68,6 @@ extern void irq_exit(void);
|
||||
#define nmi_enter() \
|
||||
do { \
|
||||
arch_nmi_enter(); \
|
||||
- printk_nmi_enter(); \
|
||||
lockdep_off(); \
|
||||
ftrace_nmi_enter(); \
|
||||
BUG_ON(in_nmi()); \
|
||||
@@ -85,7 +84,6 @@ extern void irq_exit(void);
|
||||
preempt_count_sub(NMI_OFFSET + HARDIRQ_OFFSET); \
|
||||
ftrace_nmi_exit(); \
|
||||
lockdep_on(); \
|
||||
- printk_nmi_exit(); \
|
||||
arch_nmi_exit(); \
|
||||
} while (0)
|
||||
|
||||
--- a/include/linux/printk.h
|
||||
+++ b/include/linux/printk.h
|
||||
@@ -146,18 +146,6 @@ static inline __printf(1, 2) __cold
|
||||
void early_printk(const char *s, ...) { }
|
||||
#endif
|
||||
|
||||
-#ifdef CONFIG_PRINTK_NMI
|
||||
-extern void printk_nmi_enter(void);
|
||||
-extern void printk_nmi_exit(void);
|
||||
-extern void printk_nmi_direct_enter(void);
|
||||
-extern void printk_nmi_direct_exit(void);
|
||||
-#else
|
||||
-static inline void printk_nmi_enter(void) { }
|
||||
-static inline void printk_nmi_exit(void) { }
|
||||
-static inline void printk_nmi_direct_enter(void) { }
|
||||
-static inline void printk_nmi_direct_exit(void) { }
|
||||
-#endif /* PRINTK_NMI */
|
||||
-
|
||||
#ifdef CONFIG_PRINTK
|
||||
asmlinkage __printf(5, 0)
|
||||
int vprintk_emit(int facility, int level,
|
||||
@@ -202,9 +190,6 @@ void __init setup_log_buf(int early);
|
||||
void dump_stack_print_info(const char *log_lvl);
|
||||
void show_regs_print_info(const char *log_lvl);
|
||||
extern asmlinkage void dump_stack(void) __cold;
|
||||
-extern void printk_safe_init(void);
|
||||
-extern void printk_safe_flush(void);
|
||||
-extern void printk_safe_flush_on_panic(void);
|
||||
#else
|
||||
static inline __printf(1, 0)
|
||||
int vprintk(const char *s, va_list args)
|
||||
@@ -268,18 +253,6 @@ static inline void show_regs_print_info(
|
||||
static inline void dump_stack(void)
|
||||
{
|
||||
}
|
||||
-
|
||||
-static inline void printk_safe_init(void)
|
||||
-{
|
||||
-}
|
||||
-
|
||||
-static inline void printk_safe_flush(void)
|
||||
-{
|
||||
-}
|
||||
-
|
||||
-static inline void printk_safe_flush_on_panic(void)
|
||||
-{
|
||||
-}
|
||||
#endif
|
||||
|
||||
extern int kptr_restrict;
|
||||
--- a/init/main.c
|
||||
+++ b/init/main.c
|
||||
@@ -694,7 +694,6 @@ asmlinkage __visible void __init start_k
|
||||
boot_init_stack_canary();
|
||||
|
||||
time_init();
|
||||
- printk_safe_init();
|
||||
perf_event_init();
|
||||
profile_init();
|
||||
call_function_init();
|
||||
--- a/kernel/kexec_core.c
|
||||
+++ b/kernel/kexec_core.c
|
||||
@@ -972,7 +972,6 @@ void crash_kexec(struct pt_regs *regs)
|
||||
old_cpu = atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, this_cpu);
|
||||
if (old_cpu == PANIC_CPU_INVALID) {
|
||||
/* This is the 1st CPU which comes here, so go ahead. */
|
||||
- printk_safe_flush_on_panic();
|
||||
__crash_kexec(regs);
|
||||
|
||||
/*
|
||||
--- a/kernel/panic.c
|
||||
+++ b/kernel/panic.c
|
||||
@@ -237,7 +237,6 @@ void panic(const char *fmt, ...)
|
||||
* Bypass the panic_cpu check and call __crash_kexec directly.
|
||||
*/
|
||||
if (!_crash_kexec_post_notifiers) {
|
||||
- printk_safe_flush_on_panic();
|
||||
__crash_kexec(NULL);
|
||||
|
||||
/*
|
||||
@@ -261,8 +260,6 @@ void panic(const char *fmt, ...)
|
||||
*/
|
||||
atomic_notifier_call_chain(&panic_notifier_list, 0, buf);
|
||||
|
||||
- /* Call flush even twice. It tries harder with a single online CPU */
|
||||
- printk_safe_flush_on_panic();
|
||||
kmsg_dump(KMSG_DUMP_PANIC);
|
||||
|
||||
/*
|
||||
--- a/kernel/printk/Makefile
|
||||
+++ b/kernel/printk/Makefile
|
||||
@@ -1,4 +1,3 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
obj-y = printk.o
|
||||
-obj-$(CONFIG_PRINTK) += printk_safe.o
|
||||
obj-$(CONFIG_A11Y_BRAILLE_CONSOLE) += braille.o
|
||||
--- a/kernel/printk/internal.h
|
||||
+++ b/kernel/printk/internal.h
|
||||
@@ -20,32 +20,6 @@ int vprintk_store(int facility, int leve
|
||||
__printf(1, 0) int vprintk_default(const char *fmt, va_list args);
|
||||
__printf(1, 0) int vprintk_deferred(const char *fmt, va_list args);
|
||||
__printf(1, 0) int vprintk_func(const char *fmt, va_list args);
|
||||
-void __printk_safe_enter(void);
|
||||
-void __printk_safe_exit(void);
|
||||
-
|
||||
-#define printk_safe_enter_irqsave(flags) \
|
||||
- do { \
|
||||
- local_irq_save(flags); \
|
||||
- __printk_safe_enter(); \
|
||||
- } while (0)
|
||||
-
|
||||
-#define printk_safe_exit_irqrestore(flags) \
|
||||
- do { \
|
||||
- __printk_safe_exit(); \
|
||||
- local_irq_restore(flags); \
|
||||
- } while (0)
|
||||
-
|
||||
-#define printk_safe_enter_irq() \
|
||||
- do { \
|
||||
- local_irq_disable(); \
|
||||
- __printk_safe_enter(); \
|
||||
- } while (0)
|
||||
-
|
||||
-#define printk_safe_exit_irq() \
|
||||
- do { \
|
||||
- __printk_safe_exit(); \
|
||||
- local_irq_enable(); \
|
||||
- } while (0)
|
||||
|
||||
void defer_console_output(void);
|
||||
|
||||
@@ -58,10 +32,10 @@ void defer_console_output(void);
|
||||
* semaphore and some of console functions (console_unlock()/etc.), so
|
||||
* printk-safe must preserve the existing local IRQ guarantees.
|
||||
*/
|
||||
+#endif /* CONFIG_PRINTK */
|
||||
+
|
||||
#define printk_safe_enter_irqsave(flags) local_irq_save(flags)
|
||||
#define printk_safe_exit_irqrestore(flags) local_irq_restore(flags)
|
||||
|
||||
#define printk_safe_enter_irq() local_irq_disable()
|
||||
#define printk_safe_exit_irq() local_irq_enable()
|
||||
-
|
||||
-#endif /* CONFIG_PRINTK */
|
||||
--- a/kernel/printk/printk.c
|
||||
+++ b/kernel/printk/printk.c
|
||||
@@ -1736,13 +1736,6 @@ static bool cont_add(u32 caller_id, int
|
||||
}
|
||||
#endif /* 0 */
|
||||
|
||||
-int vprintk_store(int facility, int level,
|
||||
- const char *dict, size_t dictlen,
|
||||
- const char *fmt, va_list args)
|
||||
-{
|
||||
- return vprintk_emit(facility, level, dict, dictlen, fmt, args);
|
||||
-}
|
||||
-
|
||||
/* ring buffer used as memory allocator for temporary sprint buffers */
|
||||
DECLARE_STATIC_PRINTKRB(sprint_rb,
|
||||
ilog2(PRINTK_RECORD_MAX + sizeof(struct prb_entry) +
|
||||
@@ -1811,6 +1804,11 @@ asmlinkage int vprintk_emit(int facility
|
||||
}
|
||||
EXPORT_SYMBOL(vprintk_emit);
|
||||
|
||||
+__printf(1, 0) int vprintk_func(const char *fmt, va_list args)
|
||||
+{
|
||||
+ return vprintk_emit(0, LOGLEVEL_DEFAULT, NULL, 0, fmt, args);
|
||||
+}
|
||||
+
|
||||
asmlinkage int vprintk(const char *fmt, va_list args)
|
||||
{
|
||||
return vprintk_func(fmt, args);
|
||||
@@ -3211,5 +3209,4 @@ void kmsg_dump_rewind(struct kmsg_dumper
|
||||
logbuf_unlock_irqrestore(flags);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kmsg_dump_rewind);
|
||||
-
|
||||
#endif
|
||||
--- a/kernel/printk/printk_safe.c
|
||||
+++ /dev/null
|
||||
@@ -1,415 +0,0 @@
|
||||
-// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
-/*
|
||||
- * printk_safe.c - Safe printk for printk-deadlock-prone contexts
|
||||
- */
|
||||
-
|
||||
-#include <linux/preempt.h>
|
||||
-#include <linux/spinlock.h>
|
||||
-#include <linux/debug_locks.h>
|
||||
-#include <linux/smp.h>
|
||||
-#include <linux/cpumask.h>
|
||||
-#include <linux/irq_work.h>
|
||||
-#include <linux/printk.h>
|
||||
-
|
||||
-#include "internal.h"
|
||||
-
|
||||
-/*
|
||||
- * printk() could not take logbuf_lock in NMI context. Instead,
|
||||
- * it uses an alternative implementation that temporary stores
|
||||
- * the strings into a per-CPU buffer. The content of the buffer
|
||||
- * is later flushed into the main ring buffer via IRQ work.
|
||||
- *
|
||||
- * The alternative implementation is chosen transparently
|
||||
- * by examinig current printk() context mask stored in @printk_context
|
||||
- * per-CPU variable.
|
||||
- *
|
||||
- * The implementation allows to flush the strings also from another CPU.
|
||||
- * There are situations when we want to make sure that all buffers
|
||||
- * were handled or when IRQs are blocked.
|
||||
- */
|
||||
-static int printk_safe_irq_ready __read_mostly;
|
||||
-
|
||||
-#define SAFE_LOG_BUF_LEN ((1 << CONFIG_PRINTK_SAFE_LOG_BUF_SHIFT) - \
|
||||
- sizeof(atomic_t) - \
|
||||
- sizeof(atomic_t) - \
|
||||
- sizeof(struct irq_work))
|
||||
-
|
||||
-struct printk_safe_seq_buf {
|
||||
- atomic_t len; /* length of written data */
|
||||
- atomic_t message_lost;
|
||||
- struct irq_work work; /* IRQ work that flushes the buffer */
|
||||
- unsigned char buffer[SAFE_LOG_BUF_LEN];
|
||||
-};
|
||||
-
|
||||
-static DEFINE_PER_CPU(struct printk_safe_seq_buf, safe_print_seq);
|
||||
-static DEFINE_PER_CPU(int, printk_context);
|
||||
-
|
||||
-#ifdef CONFIG_PRINTK_NMI
|
||||
-static DEFINE_PER_CPU(struct printk_safe_seq_buf, nmi_print_seq);
|
||||
-#endif
|
||||
-
|
||||
-/* Get flushed in a more safe context. */
|
||||
-static void queue_flush_work(struct printk_safe_seq_buf *s)
|
||||
-{
|
||||
- if (printk_safe_irq_ready)
|
||||
- irq_work_queue(&s->work);
|
||||
-}
|
||||
-
|
||||
-/*
|
||||
- * Add a message to per-CPU context-dependent buffer. NMI and printk-safe
|
||||
- * have dedicated buffers, because otherwise printk-safe preempted by
|
||||
- * NMI-printk would have overwritten the NMI messages.
|
||||
- *
|
||||
- * The messages are flushed from irq work (or from panic()), possibly,
|
||||
- * from other CPU, concurrently with printk_safe_log_store(). Should this
|
||||
- * happen, printk_safe_log_store() will notice the buffer->len mismatch
|
||||
- * and repeat the write.
|
||||
- */
|
||||
-static __printf(2, 0) int printk_safe_log_store(struct printk_safe_seq_buf *s,
|
||||
- const char *fmt, va_list args)
|
||||
-{
|
||||
- int add;
|
||||
- size_t len;
|
||||
- va_list ap;
|
||||
-
|
||||
-again:
|
||||
- len = atomic_read(&s->len);
|
||||
-
|
||||
- /* The trailing '\0' is not counted into len. */
|
||||
- if (len >= sizeof(s->buffer) - 1) {
|
||||
- atomic_inc(&s->message_lost);
|
||||
- queue_flush_work(s);
|
||||
- return 0;
|
||||
- }
|
||||
-
|
||||
- /*
|
||||
- * Make sure that all old data have been read before the buffer
|
||||
- * was reset. This is not needed when we just append data.
|
||||
- */
|
||||
- if (!len)
|
||||
- smp_rmb();
|
||||
-
|
||||
- va_copy(ap, args);
|
||||
- add = vscnprintf(s->buffer + len, sizeof(s->buffer) - len, fmt, ap);
|
||||
- va_end(ap);
|
||||
- if (!add)
|
||||
- return 0;
|
||||
-
|
||||
- /*
|
||||
- * Do it once again if the buffer has been flushed in the meantime.
|
||||
- * Note that atomic_cmpxchg() is an implicit memory barrier that
|
||||
- * makes sure that the data were written before updating s->len.
|
||||
- */
|
||||
- if (atomic_cmpxchg(&s->len, len, len + add) != len)
|
||||
- goto again;
|
||||
-
|
||||
- queue_flush_work(s);
|
||||
- return add;
|
||||
-}
|
||||
-
|
||||
-static inline void printk_safe_flush_line(const char *text, int len)
|
||||
-{
|
||||
- /*
|
||||
- * Avoid any console drivers calls from here, because we may be
|
||||
- * in NMI or printk_safe context (when in panic). The messages
|
||||
- * must go only into the ring buffer at this stage. Consoles will
|
||||
- * get explicitly called later when a crashdump is not generated.
|
||||
- */
|
||||
- printk_deferred("%.*s", len, text);
|
||||
-}
|
||||
-
|
||||
-/* printk part of the temporary buffer line by line */
|
||||
-static int printk_safe_flush_buffer(const char *start, size_t len)
|
||||
-{
|
||||
- const char *c, *end;
|
||||
- bool header;
|
||||
-
|
||||
- c = start;
|
||||
- end = start + len;
|
||||
- header = true;
|
||||
-
|
||||
- /* Print line by line. */
|
||||
- while (c < end) {
|
||||
- if (*c == '\n') {
|
||||
- printk_safe_flush_line(start, c - start + 1);
|
||||
- start = ++c;
|
||||
- header = true;
|
||||
- continue;
|
||||
- }
|
||||
-
|
||||
- /* Handle continuous lines or missing new line. */
|
||||
- if ((c + 1 < end) && printk_get_level(c)) {
|
||||
- if (header) {
|
||||
- c = printk_skip_level(c);
|
||||
- continue;
|
||||
- }
|
||||
-
|
||||
- printk_safe_flush_line(start, c - start);
|
||||
- start = c++;
|
||||
- header = true;
|
||||
- continue;
|
||||
- }
|
||||
-
|
||||
- header = false;
|
||||
- c++;
|
||||
- }
|
||||
-
|
||||
- /* Check if there was a partial line. Ignore pure header. */
|
||||
- if (start < end && !header) {
|
||||
- static const char newline[] = KERN_CONT "\n";
|
||||
-
|
||||
- printk_safe_flush_line(start, end - start);
|
||||
- printk_safe_flush_line(newline, strlen(newline));
|
||||
- }
|
||||
-
|
||||
- return len;
|
||||
-}
|
||||
-
|
||||
-static void report_message_lost(struct printk_safe_seq_buf *s)
|
||||
-{
|
||||
- int lost = atomic_xchg(&s->message_lost, 0);
|
||||
-
|
||||
- if (lost)
|
||||
- printk_deferred("Lost %d message(s)!\n", lost);
|
||||
-}
|
||||
-
|
||||
-/*
|
||||
- * Flush data from the associated per-CPU buffer. The function
|
||||
- * can be called either via IRQ work or independently.
|
||||
- */
|
||||
-static void __printk_safe_flush(struct irq_work *work)
|
||||
-{
|
||||
- static raw_spinlock_t read_lock =
|
||||
- __RAW_SPIN_LOCK_INITIALIZER(read_lock);
|
||||
- struct printk_safe_seq_buf *s =
|
||||
- container_of(work, struct printk_safe_seq_buf, work);
|
||||
- unsigned long flags;
|
||||
- size_t len;
|
||||
- int i;
|
||||
-
|
||||
- /*
|
||||
- * The lock has two functions. First, one reader has to flush all
|
||||
- * available message to make the lockless synchronization with
|
||||
- * writers easier. Second, we do not want to mix messages from
|
||||
- * different CPUs. This is especially important when printing
|
||||
- * a backtrace.
|
||||
- */
|
||||
- raw_spin_lock_irqsave(&read_lock, flags);
|
||||
-
|
||||
- i = 0;
|
||||
-more:
|
||||
- len = atomic_read(&s->len);
|
||||
-
|
||||
- /*
|
||||
- * This is just a paranoid check that nobody has manipulated
|
||||
- * the buffer an unexpected way. If we printed something then
|
||||
- * @len must only increase. Also it should never overflow the
|
||||
- * buffer size.
|
||||
- */
|
||||
- if ((i && i >= len) || len > sizeof(s->buffer)) {
|
||||
- const char *msg = "printk_safe_flush: internal error\n";
|
||||
-
|
||||
- printk_safe_flush_line(msg, strlen(msg));
|
||||
- len = 0;
|
||||
- }
|
||||
-
|
||||
- if (!len)
|
||||
- goto out; /* Someone else has already flushed the buffer. */
|
||||
-
|
||||
- /* Make sure that data has been written up to the @len */
|
||||
- smp_rmb();
|
||||
- i += printk_safe_flush_buffer(s->buffer + i, len - i);
|
||||
-
|
||||
- /*
|
||||
- * Check that nothing has got added in the meantime and truncate
|
||||
- * the buffer. Note that atomic_cmpxchg() is an implicit memory
|
||||
- * barrier that makes sure that the data were copied before
|
||||
- * updating s->len.
|
||||
- */
|
||||
- if (atomic_cmpxchg(&s->len, len, 0) != len)
|
||||
- goto more;
|
||||
-
|
||||
-out:
|
||||
- report_message_lost(s);
|
||||
- raw_spin_unlock_irqrestore(&read_lock, flags);
|
||||
-}
|
||||
-
|
||||
-/**
|
||||
- * printk_safe_flush - flush all per-cpu nmi buffers.
|
||||
- *
|
||||
- * The buffers are flushed automatically via IRQ work. This function
|
||||
- * is useful only when someone wants to be sure that all buffers have
|
||||
- * been flushed at some point.
|
||||
- */
|
||||
-void printk_safe_flush(void)
|
||||
-{
|
||||
- int cpu;
|
||||
-
|
||||
- for_each_possible_cpu(cpu) {
|
||||
-#ifdef CONFIG_PRINTK_NMI
|
||||
- __printk_safe_flush(&per_cpu(nmi_print_seq, cpu).work);
|
||||
-#endif
|
||||
- __printk_safe_flush(&per_cpu(safe_print_seq, cpu).work);
|
||||
- }
|
||||
-}
|
||||
-
|
||||
-/**
|
||||
- * printk_safe_flush_on_panic - flush all per-cpu nmi buffers when the system
|
||||
- * goes down.
|
||||
- *
|
||||
- * Similar to printk_safe_flush() but it can be called even in NMI context when
|
||||
- * the system goes down. It does the best effort to get NMI messages into
|
||||
- * the main ring buffer.
|
||||
- *
|
||||
- * Note that it could try harder when there is only one CPU online.
|
||||
- */
|
||||
-void printk_safe_flush_on_panic(void)
|
||||
-{
|
||||
- /*
|
||||
- * Make sure that we could access the main ring buffer.
|
||||
- * Do not risk a double release when more CPUs are up.
|
||||
- */
|
||||
- if (raw_spin_is_locked(&logbuf_lock)) {
|
||||
- if (num_online_cpus() > 1)
|
||||
- return;
|
||||
-
|
||||
- debug_locks_off();
|
||||
- raw_spin_lock_init(&logbuf_lock);
|
||||
- }
|
||||
-
|
||||
- printk_safe_flush();
|
||||
-}
|
||||
-
|
||||
-#ifdef CONFIG_PRINTK_NMI
|
||||
-/*
|
||||
- * Safe printk() for NMI context. It uses a per-CPU buffer to
|
||||
- * store the message. NMIs are not nested, so there is always only
|
||||
- * one writer running. But the buffer might get flushed from another
|
||||
- * CPU, so we need to be careful.
|
||||
- */
|
||||
-static __printf(1, 0) int vprintk_nmi(const char *fmt, va_list args)
|
||||
-{
|
||||
- struct printk_safe_seq_buf *s = this_cpu_ptr(&nmi_print_seq);
|
||||
-
|
||||
- return printk_safe_log_store(s, fmt, args);
|
||||
-}
|
||||
-
|
||||
-void notrace printk_nmi_enter(void)
|
||||
-{
|
||||
- this_cpu_or(printk_context, PRINTK_NMI_CONTEXT_MASK);
|
||||
-}
|
||||
-
|
||||
-void notrace printk_nmi_exit(void)
|
||||
-{
|
||||
- this_cpu_and(printk_context, ~PRINTK_NMI_CONTEXT_MASK);
|
||||
-}
|
||||
-
|
||||
-/*
|
||||
- * Marks a code that might produce many messages in NMI context
|
||||
- * and the risk of losing them is more critical than eventual
|
||||
- * reordering.
|
||||
- *
|
||||
- * It has effect only when called in NMI context. Then printk()
|
||||
- * will try to store the messages into the main logbuf directly
|
||||
- * and use the per-CPU buffers only as a fallback when the lock
|
||||
- * is not available.
|
||||
- */
|
||||
-void printk_nmi_direct_enter(void)
|
||||
-{
|
||||
- if (this_cpu_read(printk_context) & PRINTK_NMI_CONTEXT_MASK)
|
||||
- this_cpu_or(printk_context, PRINTK_NMI_DIRECT_CONTEXT_MASK);
|
||||
-}
|
||||
-
|
||||
-void printk_nmi_direct_exit(void)
|
||||
-{
|
||||
- this_cpu_and(printk_context, ~PRINTK_NMI_DIRECT_CONTEXT_MASK);
|
||||
-}
|
||||
-
|
||||
-#else
|
||||
-
|
||||
-static __printf(1, 0) int vprintk_nmi(const char *fmt, va_list args)
|
||||
-{
|
||||
- return 0;
|
||||
-}
|
||||
-
|
||||
-#endif /* CONFIG_PRINTK_NMI */
|
||||
-
|
||||
-/*
|
||||
- * Lock-less printk(), to avoid deadlocks should the printk() recurse
|
||||
- * into itself. It uses a per-CPU buffer to store the message, just like
|
||||
- * NMI.
|
||||
- */
|
||||
-static __printf(1, 0) int vprintk_safe(const char *fmt, va_list args)
|
||||
-{
|
||||
- struct printk_safe_seq_buf *s = this_cpu_ptr(&safe_print_seq);
|
||||
-
|
||||
- return printk_safe_log_store(s, fmt, args);
|
||||
-}
|
||||
-
|
||||
-/* Can be preempted by NMI. */
|
||||
-void __printk_safe_enter(void)
|
||||
-{
|
||||
- this_cpu_inc(printk_context);
|
||||
-}
|
||||
-
|
||||
-/* Can be preempted by NMI. */
|
||||
-void __printk_safe_exit(void)
|
||||
-{
|
||||
- this_cpu_dec(printk_context);
|
||||
-}
|
||||
-
|
||||
-__printf(1, 0) int vprintk_func(const char *fmt, va_list args)
|
||||
-{
|
||||
- /*
|
||||
- * Try to use the main logbuf even in NMI. But avoid calling console
|
||||
- * drivers that might have their own locks.
|
||||
- */
|
||||
- if ((this_cpu_read(printk_context) & PRINTK_NMI_DIRECT_CONTEXT_MASK) &&
|
||||
- raw_spin_trylock(&logbuf_lock)) {
|
||||
- int len;
|
||||
-
|
||||
- len = vprintk_store(0, LOGLEVEL_DEFAULT, NULL, 0, fmt, args);
|
||||
- raw_spin_unlock(&logbuf_lock);
|
||||
- defer_console_output();
|
||||
- return len;
|
||||
- }
|
||||
-
|
||||
- /* Use extra buffer in NMI when logbuf_lock is taken or in safe mode. */
|
||||
- if (this_cpu_read(printk_context) & PRINTK_NMI_CONTEXT_MASK)
|
||||
- return vprintk_nmi(fmt, args);
|
||||
-
|
||||
- /* Use extra buffer to prevent a recursion deadlock in safe mode. */
|
||||
- if (this_cpu_read(printk_context) & PRINTK_SAFE_CONTEXT_MASK)
|
||||
- return vprintk_safe(fmt, args);
|
||||
-
|
||||
- /* No obstacles. */
|
||||
- return vprintk_default(fmt, args);
|
||||
-}
|
||||
-
|
||||
-void __init printk_safe_init(void)
|
||||
-{
|
||||
- int cpu;
|
||||
-
|
||||
- for_each_possible_cpu(cpu) {
|
||||
- struct printk_safe_seq_buf *s;
|
||||
-
|
||||
- s = &per_cpu(safe_print_seq, cpu);
|
||||
- init_irq_work(&s->work, __printk_safe_flush);
|
||||
-
|
||||
-#ifdef CONFIG_PRINTK_NMI
|
||||
- s = &per_cpu(nmi_print_seq, cpu);
|
||||
- init_irq_work(&s->work, __printk_safe_flush);
|
||||
-#endif
|
||||
- }
|
||||
-
|
||||
- /*
|
||||
- * In the highly unlikely event that a NMI were to trigger at
|
||||
- * this moment. Make sure IRQ work is set up before this
|
||||
- * variable is set.
|
||||
- */
|
||||
- barrier();
|
||||
- printk_safe_irq_ready = 1;
|
||||
-
|
||||
- /* Flush pending messages that did not have scheduled IRQ works. */
|
||||
- printk_safe_flush();
|
||||
-}
|
||||
--- a/kernel/trace/trace.c
|
||||
+++ b/kernel/trace/trace.c
|
||||
@@ -8929,7 +8929,6 @@ void ftrace_dump(enum ftrace_dump_mode o
|
||||
tracing_off();
|
||||
|
||||
local_irq_save(flags);
|
||||
- printk_nmi_direct_enter();
|
||||
|
||||
/* Simulate the iterator */
|
||||
trace_init_global_iter(&iter);
|
||||
@@ -9006,7 +9005,6 @@ void ftrace_dump(enum ftrace_dump_mode o
|
||||
atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
|
||||
}
|
||||
atomic_dec(&dump_running);
|
||||
- printk_nmi_direct_exit();
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(ftrace_dump);
|
||||
--- a/lib/nmi_backtrace.c
|
||||
+++ b/lib/nmi_backtrace.c
|
||||
@@ -75,12 +75,6 @@ void nmi_trigger_cpumask_backtrace(const
|
||||
touch_softlockup_watchdog();
|
||||
}
|
||||
|
||||
- /*
|
||||
- * Force flush any remote buffers that might be stuck in IRQ context
|
||||
- * and therefore could not run their irq_work.
|
||||
- */
|
||||
- printk_safe_flush();
|
||||
-
|
||||
clear_bit_unlock(0, &backtrace_flag);
|
||||
put_cpu();
|
||||
}
|
@ -0,0 +1,329 @@
|
||||
From: John Ogness <john.ogness@linutronix.de>
|
||||
Date: Tue, 12 Feb 2019 15:29:50 +0100
|
||||
Subject: [PATCH 12/25] printk: minimize console locking implementation
|
||||
|
||||
Since printing of the printk buffer is now handled by the printk
|
||||
kthread, minimize the console locking functions to just handle
|
||||
locking of the console.
|
||||
|
||||
NOTE: With this console_flush_on_panic will no longer flush.
|
||||
|
||||
Signed-off-by: John Ogness <john.ogness@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
kernel/printk/printk.c | 255 -------------------------------------------------
|
||||
1 file changed, 1 insertion(+), 254 deletions(-)
|
||||
|
||||
--- a/kernel/printk/printk.c
|
||||
+++ b/kernel/printk/printk.c
|
||||
@@ -227,19 +227,7 @@ static int nr_ext_console_drivers;
|
||||
|
||||
static int __down_trylock_console_sem(unsigned long ip)
|
||||
{
|
||||
- int lock_failed;
|
||||
- unsigned long flags;
|
||||
-
|
||||
- /*
|
||||
- * Here and in __up_console_sem() we need to be in safe mode,
|
||||
- * because spindump/WARN/etc from under console ->lock will
|
||||
- * deadlock in printk()->down_trylock_console_sem() otherwise.
|
||||
- */
|
||||
- printk_safe_enter_irqsave(flags);
|
||||
- lock_failed = down_trylock(&console_sem);
|
||||
- printk_safe_exit_irqrestore(flags);
|
||||
-
|
||||
- if (lock_failed)
|
||||
+ if (down_trylock(&console_sem))
|
||||
return 1;
|
||||
mutex_acquire(&console_lock_dep_map, 0, 1, ip);
|
||||
return 0;
|
||||
@@ -248,13 +236,9 @@ static int __down_trylock_console_sem(un
|
||||
|
||||
static void __up_console_sem(unsigned long ip)
|
||||
{
|
||||
- unsigned long flags;
|
||||
-
|
||||
mutex_release(&console_lock_dep_map, 1, ip);
|
||||
|
||||
- printk_safe_enter_irqsave(flags);
|
||||
up(&console_sem);
|
||||
- printk_safe_exit_irqrestore(flags);
|
||||
}
|
||||
#define up_console_sem() __up_console_sem(_RET_IP_)
|
||||
|
||||
@@ -1552,82 +1536,6 @@ static void format_text(struct printk_lo
|
||||
}
|
||||
|
||||
/*
|
||||
- * Special console_lock variants that help to reduce the risk of soft-lockups.
|
||||
- * They allow to pass console_lock to another printk() call using a busy wait.
|
||||
- */
|
||||
-
|
||||
-#ifdef CONFIG_LOCKDEP
|
||||
-static struct lockdep_map console_owner_dep_map = {
|
||||
- .name = "console_owner"
|
||||
-};
|
||||
-#endif
|
||||
-
|
||||
-static DEFINE_RAW_SPINLOCK(console_owner_lock);
|
||||
-static struct task_struct *console_owner;
|
||||
-static bool console_waiter;
|
||||
-
|
||||
-/**
|
||||
- * console_lock_spinning_enable - mark beginning of code where another
|
||||
- * thread might safely busy wait
|
||||
- *
|
||||
- * This basically converts console_lock into a spinlock. This marks
|
||||
- * the section where the console_lock owner can not sleep, because
|
||||
- * there may be a waiter spinning (like a spinlock). Also it must be
|
||||
- * ready to hand over the lock at the end of the section.
|
||||
- */
|
||||
-static void console_lock_spinning_enable(void)
|
||||
-{
|
||||
- raw_spin_lock(&console_owner_lock);
|
||||
- console_owner = current;
|
||||
- raw_spin_unlock(&console_owner_lock);
|
||||
-
|
||||
- /* The waiter may spin on us after setting console_owner */
|
||||
- spin_acquire(&console_owner_dep_map, 0, 0, _THIS_IP_);
|
||||
-}
|
||||
-
|
||||
-/**
|
||||
- * console_lock_spinning_disable_and_check - mark end of code where another
|
||||
- * thread was able to busy wait and check if there is a waiter
|
||||
- *
|
||||
- * This is called at the end of the section where spinning is allowed.
|
||||
- * It has two functions. First, it is a signal that it is no longer
|
||||
- * safe to start busy waiting for the lock. Second, it checks if
|
||||
- * there is a busy waiter and passes the lock rights to her.
|
||||
- *
|
||||
- * Important: Callers lose the lock if there was a busy waiter.
|
||||
- * They must not touch items synchronized by console_lock
|
||||
- * in this case.
|
||||
- *
|
||||
- * Return: 1 if the lock rights were passed, 0 otherwise.
|
||||
- */
|
||||
-static int console_lock_spinning_disable_and_check(void)
|
||||
-{
|
||||
- int waiter;
|
||||
-
|
||||
- raw_spin_lock(&console_owner_lock);
|
||||
- waiter = READ_ONCE(console_waiter);
|
||||
- console_owner = NULL;
|
||||
- raw_spin_unlock(&console_owner_lock);
|
||||
-
|
||||
- if (!waiter) {
|
||||
- spin_release(&console_owner_dep_map, 1, _THIS_IP_);
|
||||
- return 0;
|
||||
- }
|
||||
-
|
||||
- /* The waiter is now free to continue */
|
||||
- WRITE_ONCE(console_waiter, false);
|
||||
-
|
||||
- spin_release(&console_owner_dep_map, 1, _THIS_IP_);
|
||||
-
|
||||
- /*
|
||||
- * Hand off console_lock to waiter. The waiter will perform
|
||||
- * the up(). After this, the waiter is the console_lock owner.
|
||||
- */
|
||||
- mutex_release(&console_lock_dep_map, 1, _THIS_IP_);
|
||||
- return 1;
|
||||
-}
|
||||
-
|
||||
-/*
|
||||
* Call the console drivers, asking them to write out
|
||||
* log_buf[start] to log_buf[end - 1].
|
||||
* The console_lock must be held.
|
||||
@@ -1889,8 +1797,6 @@ static ssize_t msg_print_ext_header(char
|
||||
static ssize_t msg_print_ext_body(char *buf, size_t size,
|
||||
char *dict, size_t dict_len,
|
||||
char *text, size_t text_len) { return 0; }
|
||||
-static void console_lock_spinning_enable(void) { }
|
||||
-static int console_lock_spinning_disable_and_check(void) { return 0; }
|
||||
static void call_console_drivers(const char *ext_text, size_t ext_len,
|
||||
const char *text, size_t len) {}
|
||||
static size_t msg_print_text(const struct printk_log *msg, bool syslog,
|
||||
@@ -2125,35 +2031,6 @@ int is_console_locked(void)
|
||||
{
|
||||
return console_locked;
|
||||
}
|
||||
-EXPORT_SYMBOL(is_console_locked);
|
||||
-
|
||||
-/*
|
||||
- * Check if we have any console that is capable of printing while cpu is
|
||||
- * booting or shutting down. Requires console_sem.
|
||||
- */
|
||||
-static int have_callable_console(void)
|
||||
-{
|
||||
- struct console *con;
|
||||
-
|
||||
- for_each_console(con)
|
||||
- if ((con->flags & CON_ENABLED) &&
|
||||
- (con->flags & CON_ANYTIME))
|
||||
- return 1;
|
||||
-
|
||||
- return 0;
|
||||
-}
|
||||
-
|
||||
-/*
|
||||
- * Can we actually use the console at this time on this cpu?
|
||||
- *
|
||||
- * Console drivers may assume that per-cpu resources have been allocated. So
|
||||
- * unless they're explicitly marked as being able to cope (CON_ANYTIME) don't
|
||||
- * call them until this CPU is officially up.
|
||||
- */
|
||||
-static inline int can_use_console(void)
|
||||
-{
|
||||
- return cpu_online(raw_smp_processor_id()) || have_callable_console();
|
||||
-}
|
||||
|
||||
/**
|
||||
* console_unlock - unlock the console system
|
||||
@@ -2161,147 +2038,17 @@ static inline int can_use_console(void)
|
||||
* Releases the console_lock which the caller holds on the console system
|
||||
* and the console driver list.
|
||||
*
|
||||
- * While the console_lock was held, console output may have been buffered
|
||||
- * by printk(). If this is the case, console_unlock(); emits
|
||||
- * the output prior to releasing the lock.
|
||||
- *
|
||||
- * If there is output waiting, we wake /dev/kmsg and syslog() users.
|
||||
- *
|
||||
* console_unlock(); may be called from any context.
|
||||
*/
|
||||
void console_unlock(void)
|
||||
{
|
||||
- static char ext_text[CONSOLE_EXT_LOG_MAX];
|
||||
- static char text[LOG_LINE_MAX + PREFIX_MAX];
|
||||
- unsigned long flags;
|
||||
- bool do_cond_resched, retry;
|
||||
-
|
||||
if (console_suspended) {
|
||||
up_console_sem();
|
||||
return;
|
||||
}
|
||||
|
||||
- /*
|
||||
- * Console drivers are called with interrupts disabled, so
|
||||
- * @console_may_schedule should be cleared before; however, we may
|
||||
- * end up dumping a lot of lines, for example, if called from
|
||||
- * console registration path, and should invoke cond_resched()
|
||||
- * between lines if allowable. Not doing so can cause a very long
|
||||
- * scheduling stall on a slow console leading to RCU stall and
|
||||
- * softlockup warnings which exacerbate the issue with more
|
||||
- * messages practically incapacitating the system.
|
||||
- *
|
||||
- * console_trylock() is not able to detect the preemptive
|
||||
- * context reliably. Therefore the value must be stored before
|
||||
- * and cleared after the the "again" goto label.
|
||||
- */
|
||||
- do_cond_resched = console_may_schedule;
|
||||
-again:
|
||||
- console_may_schedule = 0;
|
||||
-
|
||||
- /*
|
||||
- * We released the console_sem lock, so we need to recheck if
|
||||
- * cpu is online and (if not) is there at least one CON_ANYTIME
|
||||
- * console.
|
||||
- */
|
||||
- if (!can_use_console()) {
|
||||
- console_locked = 0;
|
||||
- up_console_sem();
|
||||
- return;
|
||||
- }
|
||||
-
|
||||
- for (;;) {
|
||||
- struct printk_log *msg;
|
||||
- size_t ext_len = 0;
|
||||
- size_t len;
|
||||
-
|
||||
- printk_safe_enter_irqsave(flags);
|
||||
- raw_spin_lock(&logbuf_lock);
|
||||
- if (console_seq < log_first_seq) {
|
||||
- len = sprintf(text,
|
||||
- "** %llu printk messages dropped **\n",
|
||||
- log_first_seq - console_seq);
|
||||
-
|
||||
- /* messages are gone, move to first one */
|
||||
- console_seq = log_first_seq;
|
||||
- console_idx = log_first_idx;
|
||||
- } else {
|
||||
- len = 0;
|
||||
- }
|
||||
-skip:
|
||||
- if (console_seq == log_next_seq)
|
||||
- break;
|
||||
-
|
||||
- msg = log_from_idx(console_idx);
|
||||
- if (suppress_message_printing(msg->level)) {
|
||||
- /*
|
||||
- * Skip record we have buffered and already printed
|
||||
- * directly to the console when we received it, and
|
||||
- * record that has level above the console loglevel.
|
||||
- */
|
||||
- console_idx = log_next(console_idx);
|
||||
- console_seq++;
|
||||
- goto skip;
|
||||
- }
|
||||
-
|
||||
- len += msg_print_text(msg,
|
||||
- console_msg_format & MSG_FORMAT_SYSLOG,
|
||||
- printk_time, text + len, sizeof(text) - len);
|
||||
- if (nr_ext_console_drivers) {
|
||||
- ext_len = msg_print_ext_header(ext_text,
|
||||
- sizeof(ext_text),
|
||||
- msg, console_seq);
|
||||
- ext_len += msg_print_ext_body(ext_text + ext_len,
|
||||
- sizeof(ext_text) - ext_len,
|
||||
- log_dict(msg), msg->dict_len,
|
||||
- log_text(msg), msg->text_len);
|
||||
- }
|
||||
- console_idx = log_next(console_idx);
|
||||
- console_seq++;
|
||||
- raw_spin_unlock(&logbuf_lock);
|
||||
-
|
||||
- /*
|
||||
- * While actively printing out messages, if another printk()
|
||||
- * were to occur on another CPU, it may wait for this one to
|
||||
- * finish. This task can not be preempted if there is a
|
||||
- * waiter waiting to take over.
|
||||
- */
|
||||
- console_lock_spinning_enable();
|
||||
-
|
||||
- stop_critical_timings(); /* don't trace print latency */
|
||||
- //call_console_drivers(ext_text, ext_len, text, len);
|
||||
- start_critical_timings();
|
||||
-
|
||||
- if (console_lock_spinning_disable_and_check()) {
|
||||
- printk_safe_exit_irqrestore(flags);
|
||||
- return;
|
||||
- }
|
||||
-
|
||||
- printk_safe_exit_irqrestore(flags);
|
||||
-
|
||||
- if (do_cond_resched)
|
||||
- cond_resched();
|
||||
- }
|
||||
-
|
||||
console_locked = 0;
|
||||
-
|
||||
- raw_spin_unlock(&logbuf_lock);
|
||||
-
|
||||
up_console_sem();
|
||||
-
|
||||
- /*
|
||||
- * Someone could have filled up the buffer again, so re-check if there's
|
||||
- * something to flush. In case we cannot trylock the console_sem again,
|
||||
- * there's a new owner and the console_unlock() from them will do the
|
||||
- * flush, no worries.
|
||||
- */
|
||||
- raw_spin_lock(&logbuf_lock);
|
||||
- retry = console_seq != log_next_seq;
|
||||
- raw_spin_unlock(&logbuf_lock);
|
||||
- printk_safe_exit_irqrestore(flags);
|
||||
-
|
||||
- if (retry && console_trylock())
|
||||
- goto again;
|
||||
}
|
||||
EXPORT_SYMBOL(console_unlock);
|
||||
|
@ -0,0 +1,92 @@
|
||||
From: John Ogness <john.ogness@linutronix.de>
|
||||
Date: Tue, 12 Feb 2019 15:29:51 +0100
|
||||
Subject: [PATCH 13/25] printk: track seq per console
|
||||
|
||||
Allow each console to track which seq record was last printed. This
|
||||
simplifies identifying dropped records.
|
||||
|
||||
Signed-off-by: John Ogness <john.ogness@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/console.h | 1 +
|
||||
kernel/printk/printk.c | 30 +++++++++++++++++++++++++++---
|
||||
2 files changed, 28 insertions(+), 3 deletions(-)
|
||||
|
||||
--- a/include/linux/console.h
|
||||
+++ b/include/linux/console.h
|
||||
@@ -153,6 +153,7 @@ struct console {
|
||||
short flags;
|
||||
short index;
|
||||
int cflag;
|
||||
+ unsigned long printk_seq;
|
||||
void *data;
|
||||
struct console *next;
|
||||
};
|
||||
--- a/kernel/printk/printk.c
|
||||
+++ b/kernel/printk/printk.c
|
||||
@@ -1507,6 +1507,16 @@ SYSCALL_DEFINE3(syslog, int, type, char
|
||||
return do_syslog(type, buf, len, SYSLOG_FROM_READER);
|
||||
}
|
||||
|
||||
+static void print_console_dropped(struct console *con, u64 count)
|
||||
+{
|
||||
+ char text[64];
|
||||
+ int len;
|
||||
+
|
||||
+ len = sprintf(text, "** %llu printk message%s dropped **\n",
|
||||
+ count, count > 1 ? "s" : "");
|
||||
+ con->write(con, text, len);
|
||||
+}
|
||||
+
|
||||
static void format_text(struct printk_log *msg, u64 seq,
|
||||
char *ext_text, size_t *ext_len,
|
||||
char *text, size_t *len, bool time)
|
||||
@@ -1540,7 +1550,7 @@ static void format_text(struct printk_lo
|
||||
* log_buf[start] to log_buf[end - 1].
|
||||
* The console_lock must be held.
|
||||
*/
|
||||
-static void call_console_drivers(const char *ext_text, size_t ext_len,
|
||||
+static void call_console_drivers(u64 seq, const char *ext_text, size_t ext_len,
|
||||
const char *text, size_t len)
|
||||
{
|
||||
struct console *con;
|
||||
@@ -1558,6 +1568,19 @@ static void call_console_drivers(const c
|
||||
if (!cpu_online(raw_smp_processor_id()) &&
|
||||
!(con->flags & CON_ANYTIME))
|
||||
continue;
|
||||
+ if (con->printk_seq >= seq)
|
||||
+ continue;
|
||||
+
|
||||
+ con->printk_seq++;
|
||||
+ if (con->printk_seq < seq) {
|
||||
+ print_console_dropped(con, seq - con->printk_seq);
|
||||
+ con->printk_seq = seq;
|
||||
+ }
|
||||
+
|
||||
+ /* for supressed messages, only seq is updated */
|
||||
+ if (len == 0 && ext_len == 0)
|
||||
+ continue;
|
||||
+
|
||||
if (con->flags & CON_EXTENDED)
|
||||
con->write(con, ext_text, ext_len);
|
||||
else
|
||||
@@ -1797,7 +1820,7 @@ static ssize_t msg_print_ext_header(char
|
||||
static ssize_t msg_print_ext_body(char *buf, size_t size,
|
||||
char *dict, size_t dict_len,
|
||||
char *text, size_t text_len) { return 0; }
|
||||
-static void call_console_drivers(const char *ext_text, size_t ext_len,
|
||||
+static void call_console_drivers(u64 seq, const char *ext_text, size_t ext_len,
|
||||
const char *text, size_t len) {}
|
||||
static size_t msg_print_text(const struct printk_log *msg, bool syslog,
|
||||
bool time, char *buf, size_t size) { return 0; }
|
||||
@@ -2550,8 +2573,9 @@ static int printk_kthread_func(void *dat
|
||||
&len, printk_time);
|
||||
|
||||
console_lock();
|
||||
+ call_console_drivers(master_seq, ext_text,
|
||||
+ ext_len, text, len);
|
||||
if (len > 0 || ext_len > 0) {
|
||||
- call_console_drivers(ext_text, ext_len, text, len);
|
||||
boot_delay_msec(msg->level);
|
||||
printk_delay();
|
||||
}
|
@ -0,0 +1,71 @@
|
||||
From: John Ogness <john.ogness@linutronix.de>
|
||||
Date: Tue, 12 Feb 2019 15:29:52 +0100
|
||||
Subject: [PATCH 14/25] printk: do boot_delay_msec inside printk_delay
|
||||
|
||||
Both functions needed to be called one after the other, so just
|
||||
integrate boot_delay_msec into printk_delay for simplification.
|
||||
|
||||
Signed-off-by: John Ogness <john.ogness@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
kernel/printk/printk.c | 35 +++++++++++++++++------------------
|
||||
1 file changed, 17 insertions(+), 18 deletions(-)
|
||||
|
||||
--- a/kernel/printk/printk.c
|
||||
+++ b/kernel/printk/printk.c
|
||||
@@ -1507,6 +1507,21 @@ SYSCALL_DEFINE3(syslog, int, type, char
|
||||
return do_syslog(type, buf, len, SYSLOG_FROM_READER);
|
||||
}
|
||||
|
||||
+int printk_delay_msec __read_mostly;
|
||||
+
|
||||
+static inline void printk_delay(int level)
|
||||
+{
|
||||
+ boot_delay_msec(level);
|
||||
+ if (unlikely(printk_delay_msec)) {
|
||||
+ int m = printk_delay_msec;
|
||||
+
|
||||
+ while (m--) {
|
||||
+ mdelay(1);
|
||||
+ touch_nmi_watchdog();
|
||||
+ }
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
static void print_console_dropped(struct console *con, u64 count)
|
||||
{
|
||||
char text[64];
|
||||
@@ -1588,20 +1603,6 @@ static void call_console_drivers(u64 seq
|
||||
}
|
||||
}
|
||||
|
||||
-int printk_delay_msec __read_mostly;
|
||||
-
|
||||
-static inline void printk_delay(void)
|
||||
-{
|
||||
- if (unlikely(printk_delay_msec)) {
|
||||
- int m = printk_delay_msec;
|
||||
-
|
||||
- while (m--) {
|
||||
- mdelay(1);
|
||||
- touch_nmi_watchdog();
|
||||
- }
|
||||
- }
|
||||
-}
|
||||
-
|
||||
static inline u32 printk_caller_id(void)
|
||||
{
|
||||
return in_task() ? task_pid_nr(current) :
|
||||
@@ -2575,10 +2576,8 @@ static int printk_kthread_func(void *dat
|
||||
console_lock();
|
||||
call_console_drivers(master_seq, ext_text,
|
||||
ext_len, text, len);
|
||||
- if (len > 0 || ext_len > 0) {
|
||||
- boot_delay_msec(msg->level);
|
||||
- printk_delay();
|
||||
- }
|
||||
+ if (len > 0 || ext_len > 0)
|
||||
+ printk_delay(msg->level);
|
||||
console_unlock();
|
||||
}
|
||||
|
@ -0,0 +1,118 @@
|
||||
From: John Ogness <john.ogness@linutronix.de>
|
||||
Date: Tue, 12 Feb 2019 15:29:53 +0100
|
||||
Subject: [PATCH 15/25] printk: print history for new consoles
|
||||
|
||||
When new consoles register, they currently print how many messages
|
||||
they have missed. However, many (or all) of those messages may still
|
||||
be in the ring buffer. Add functionality to print as much of the
|
||||
history as available. This is a clean replacement of the old
|
||||
exclusive console hack.
|
||||
|
||||
Signed-off-by: John Ogness <john.ogness@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/console.h | 1
|
||||
kernel/printk/printk.c | 75 ++++++++++++++++++++++++++++++++++++++++++++++++
|
||||
2 files changed, 76 insertions(+)
|
||||
|
||||
--- a/include/linux/console.h
|
||||
+++ b/include/linux/console.h
|
||||
@@ -154,6 +154,7 @@ struct console {
|
||||
short index;
|
||||
int cflag;
|
||||
unsigned long printk_seq;
|
||||
+ int wrote_history;
|
||||
void *data;
|
||||
struct console *next;
|
||||
};
|
||||
--- a/kernel/printk/printk.c
|
||||
+++ b/kernel/printk/printk.c
|
||||
@@ -1560,6 +1560,77 @@ static void format_text(struct printk_lo
|
||||
}
|
||||
}
|
||||
|
||||
+static void printk_write_history(struct console *con, u64 master_seq)
|
||||
+{
|
||||
+ struct prb_iterator iter;
|
||||
+ bool time = printk_time;
|
||||
+ static char *ext_text;
|
||||
+ static char *text;
|
||||
+ static char *buf;
|
||||
+ u64 seq;
|
||||
+
|
||||
+ ext_text = kmalloc(CONSOLE_EXT_LOG_MAX, GFP_KERNEL);
|
||||
+ text = kmalloc(PRINTK_SPRINT_MAX, GFP_KERNEL);
|
||||
+ buf = kmalloc(PRINTK_RECORD_MAX, GFP_KERNEL);
|
||||
+ if (!ext_text || !text || !buf)
|
||||
+ return;
|
||||
+
|
||||
+ if (!(con->flags & CON_ENABLED))
|
||||
+ goto out;
|
||||
+
|
||||
+ if (!con->write)
|
||||
+ goto out;
|
||||
+
|
||||
+ if (!cpu_online(raw_smp_processor_id()) &&
|
||||
+ !(con->flags & CON_ANYTIME))
|
||||
+ goto out;
|
||||
+
|
||||
+ prb_iter_init(&iter, &printk_rb, NULL);
|
||||
+
|
||||
+ for (;;) {
|
||||
+ struct printk_log *msg;
|
||||
+ size_t ext_len;
|
||||
+ size_t len;
|
||||
+ int ret;
|
||||
+
|
||||
+ ret = prb_iter_next(&iter, buf, PRINTK_RECORD_MAX, &seq);
|
||||
+ if (ret == 0) {
|
||||
+ break;
|
||||
+ } else if (ret < 0) {
|
||||
+ prb_iter_init(&iter, &printk_rb, NULL);
|
||||
+ continue;
|
||||
+ }
|
||||
+
|
||||
+ if (seq > master_seq)
|
||||
+ break;
|
||||
+
|
||||
+ con->printk_seq++;
|
||||
+ if (con->printk_seq < seq) {
|
||||
+ print_console_dropped(con, seq - con->printk_seq);
|
||||
+ con->printk_seq = seq;
|
||||
+ }
|
||||
+
|
||||
+ msg = (struct printk_log *)buf;
|
||||
+ format_text(msg, master_seq, ext_text, &ext_len, text,
|
||||
+ &len, time);
|
||||
+
|
||||
+ if (len == 0 && ext_len == 0)
|
||||
+ continue;
|
||||
+
|
||||
+ if (con->flags & CON_EXTENDED)
|
||||
+ con->write(con, ext_text, ext_len);
|
||||
+ else
|
||||
+ con->write(con, text, len);
|
||||
+
|
||||
+ printk_delay(msg->level);
|
||||
+ }
|
||||
+out:
|
||||
+ con->wrote_history = 1;
|
||||
+ kfree(ext_text);
|
||||
+ kfree(text);
|
||||
+ kfree(buf);
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* Call the console drivers, asking them to write out
|
||||
* log_buf[start] to log_buf[end - 1].
|
||||
@@ -1578,6 +1649,10 @@ static void call_console_drivers(u64 seq
|
||||
for_each_console(con) {
|
||||
if (!(con->flags & CON_ENABLED))
|
||||
continue;
|
||||
+ if (!con->wrote_history) {
|
||||
+ printk_write_history(con, seq);
|
||||
+ continue;
|
||||
+ }
|
||||
if (!con->write)
|
||||
continue;
|
||||
if (!cpu_online(raw_smp_processor_id()) &&
|
@ -0,0 +1,91 @@
|
||||
From: John Ogness <john.ogness@linutronix.de>
|
||||
Date: Tue, 12 Feb 2019 15:29:54 +0100
|
||||
Subject: [PATCH 16/25] printk: implement CON_PRINTBUFFER
|
||||
|
||||
If the CON_PRINTBUFFER flag is not set, do not replay the history
|
||||
for that console.
|
||||
|
||||
Signed-off-by: John Ogness <john.ogness@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
kernel/printk/printk.c | 34 ++++++----------------------------
|
||||
1 file changed, 6 insertions(+), 28 deletions(-)
|
||||
|
||||
--- a/kernel/printk/printk.c
|
||||
+++ b/kernel/printk/printk.c
|
||||
@@ -419,10 +419,6 @@ static u32 log_first_idx;
|
||||
static u64 log_next_seq;
|
||||
static u32 log_next_idx;
|
||||
|
||||
-/* the next printk record to write to the console */
|
||||
-static u64 console_seq;
|
||||
-static u32 console_idx;
|
||||
-
|
||||
/* the next printk record to read after the last 'clear' command */
|
||||
static u64 clear_seq;
|
||||
static u32 clear_idx;
|
||||
@@ -1650,8 +1646,12 @@ static void call_console_drivers(u64 seq
|
||||
if (!(con->flags & CON_ENABLED))
|
||||
continue;
|
||||
if (!con->wrote_history) {
|
||||
- printk_write_history(con, seq);
|
||||
- continue;
|
||||
+ if (con->flags & CON_PRINTBUFFER) {
|
||||
+ printk_write_history(con, seq);
|
||||
+ continue;
|
||||
+ }
|
||||
+ con->wrote_history = 1;
|
||||
+ con->printk_seq = seq - 1;
|
||||
}
|
||||
if (!con->write)
|
||||
continue;
|
||||
@@ -1881,8 +1881,6 @@ EXPORT_SYMBOL(printk);
|
||||
|
||||
static u64 syslog_seq;
|
||||
static u32 syslog_idx;
|
||||
-static u64 console_seq;
|
||||
-static u32 console_idx;
|
||||
static u64 log_first_seq;
|
||||
static u32 log_first_idx;
|
||||
static u64 log_next_seq;
|
||||
@@ -2206,15 +2204,6 @@ void console_flush_on_panic(enum con_flu
|
||||
*/
|
||||
console_trylock();
|
||||
console_may_schedule = 0;
|
||||
-
|
||||
- if (mode == CONSOLE_REPLAY_ALL) {
|
||||
- unsigned long flags;
|
||||
-
|
||||
- logbuf_lock_irqsave(flags);
|
||||
- console_seq = log_first_seq;
|
||||
- console_idx = log_first_idx;
|
||||
- logbuf_unlock_irqrestore(flags);
|
||||
- }
|
||||
console_unlock();
|
||||
}
|
||||
|
||||
@@ -2293,7 +2282,6 @@ early_param("keep_bootcon", keep_bootcon
|
||||
void register_console(struct console *newcon)
|
||||
{
|
||||
int i;
|
||||
- unsigned long flags;
|
||||
struct console *bcon = NULL;
|
||||
struct console_cmdline *c;
|
||||
static bool has_preferred;
|
||||
@@ -2409,16 +2397,6 @@ void register_console(struct console *ne
|
||||
if (newcon->flags & CON_EXTENDED)
|
||||
nr_ext_console_drivers++;
|
||||
|
||||
- if (newcon->flags & CON_PRINTBUFFER) {
|
||||
- /*
|
||||
- * console_unlock(); will print out the buffered messages
|
||||
- * for us.
|
||||
- */
|
||||
- logbuf_lock_irqsave(flags);
|
||||
- console_seq = syslog_seq;
|
||||
- console_idx = syslog_idx;
|
||||
- logbuf_unlock_irqrestore(flags);
|
||||
- }
|
||||
console_unlock();
|
||||
console_sysfs_notify();
|
||||
|
@ -0,0 +1,99 @@
|
||||
From: John Ogness <john.ogness@linutronix.de>
|
||||
Date: Tue, 12 Feb 2019 15:29:55 +0100
|
||||
Subject: [PATCH 17/25] printk: add processor number to output
|
||||
|
||||
It can be difficult to sort printk out if multiple processors are
|
||||
printing simultaneously. Add the processor number to the printk
|
||||
output to allow the messages to be sorted.
|
||||
|
||||
Signed-off-by: John Ogness <john.ogness@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
kernel/printk/printk.c | 19 +++++++++++++++----
|
||||
1 file changed, 15 insertions(+), 4 deletions(-)
|
||||
|
||||
--- a/kernel/printk/printk.c
|
||||
+++ b/kernel/printk/printk.c
|
||||
@@ -348,6 +348,7 @@ enum log_flags {
|
||||
|
||||
struct printk_log {
|
||||
u64 ts_nsec; /* timestamp in nanoseconds */
|
||||
+ u16 cpu; /* cpu that generated record */
|
||||
u16 len; /* length of entire record */
|
||||
u16 text_len; /* length of text buffer */
|
||||
u16 dict_len; /* length of dictionary buffer */
|
||||
@@ -499,7 +500,7 @@ static u32 log_next(u32 idx)
|
||||
|
||||
/* insert record into the buffer, discard old ones, update heads */
|
||||
static int log_store(u32 caller_id, int facility, int level,
|
||||
- enum log_flags flags, u64 ts_nsec,
|
||||
+ enum log_flags flags, u64 ts_nsec, u16 cpu,
|
||||
const char *dict, u16 dict_len,
|
||||
const char *text, u16 text_len)
|
||||
{
|
||||
@@ -533,6 +534,7 @@ static int log_store(u32 caller_id, int
|
||||
#ifdef CONFIG_PRINTK_CALLER
|
||||
msg->caller_id = caller_id;
|
||||
#endif
|
||||
+ msg->cpu = cpu;
|
||||
msg->len = size;
|
||||
|
||||
/* insert message */
|
||||
@@ -606,9 +608,9 @@ static ssize_t msg_print_ext_header(char
|
||||
|
||||
do_div(ts_usec, 1000);
|
||||
|
||||
- return scnprintf(buf, size, "%u,%llu,%llu,%c%s;",
|
||||
+ return scnprintf(buf, size, "%u,%llu,%llu,%c%s,%hu;",
|
||||
(msg->facility << 3) | msg->level, seq, ts_usec,
|
||||
- msg->flags & LOG_CONT ? 'c' : '-', caller);
|
||||
+ msg->flags & LOG_CONT ? 'c' : '-', caller, msg->cpu);
|
||||
}
|
||||
|
||||
static ssize_t msg_print_ext_body(char *buf, size_t size,
|
||||
@@ -1142,6 +1144,11 @@ static inline void boot_delay_msec(int l
|
||||
static bool printk_time = IS_ENABLED(CONFIG_PRINTK_TIME);
|
||||
module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR);
|
||||
|
||||
+static size_t print_cpu(u16 cpu, char *buf)
|
||||
+{
|
||||
+ return sprintf(buf, "%03hu: ", cpu);
|
||||
+}
|
||||
+
|
||||
static size_t print_syslog(unsigned int level, char *buf)
|
||||
{
|
||||
return sprintf(buf, "<%u>", level);
|
||||
@@ -1185,6 +1192,7 @@ static size_t print_prefix(const struct
|
||||
buf[len++] = ' ';
|
||||
buf[len] = '\0';
|
||||
}
|
||||
+ len += print_cpu(msg->cpu, buf + len);
|
||||
|
||||
return len;
|
||||
}
|
||||
@@ -1760,6 +1768,7 @@ asmlinkage int vprintk_emit(int facility
|
||||
u64 ts_nsec;
|
||||
char *text;
|
||||
char *rbuf;
|
||||
+ int cpu;
|
||||
|
||||
ts_nsec = local_clock();
|
||||
|
||||
@@ -1769,6 +1778,8 @@ asmlinkage int vprintk_emit(int facility
|
||||
return printed_len;
|
||||
}
|
||||
|
||||
+ cpu = raw_smp_processor_id();
|
||||
+
|
||||
text = rbuf;
|
||||
text_len = vscnprintf(text, PRINTK_SPRINT_MAX, fmt, args);
|
||||
|
||||
@@ -1803,7 +1814,7 @@ asmlinkage int vprintk_emit(int facility
|
||||
if (dict)
|
||||
lflags |= LOG_NEWLINE;
|
||||
|
||||
- printed_len = log_store(caller_id, facility, level, lflags, ts_nsec,
|
||||
+ printed_len = log_store(caller_id, facility, level, lflags, ts_nsec, cpu,
|
||||
dict, dictlen, text, text_len);
|
||||
|
||||
prb_commit(&h);
|
@ -0,0 +1,64 @@
|
||||
From: John Ogness <john.ogness@linutronix.de>
|
||||
Date: Tue, 12 Feb 2019 15:29:56 +0100
|
||||
Subject: [PATCH 18/25] console: add write_atomic interface
|
||||
|
||||
Add a write_atomic callback to the console. This is an optional
|
||||
function for console drivers. The function must be atomic (including
|
||||
NMI safe) for writing to the console.
|
||||
|
||||
Console drivers must still implement the write callback. The
|
||||
write_atomic callback will only be used for emergency messages.
|
||||
|
||||
Creating an NMI safe write_atomic that must synchronize with write
|
||||
requires a careful implementation of the console driver. To aid with
|
||||
the implementation, a set of console_atomic_* functions are provided:
|
||||
|
||||
void console_atomic_lock(unsigned int *flags);
|
||||
void console_atomic_unlock(unsigned int flags);
|
||||
|
||||
These functions synchronize using the processor-reentrant cpu lock of
|
||||
the printk buffer.
|
||||
|
||||
Signed-off-by: John Ogness <john.ogness@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/console.h | 4 ++++
|
||||
kernel/printk/printk.c | 12 ++++++++++++
|
||||
2 files changed, 16 insertions(+)
|
||||
|
||||
--- a/include/linux/console.h
|
||||
+++ b/include/linux/console.h
|
||||
@@ -145,6 +145,7 @@ static inline int con_debug_leave(void)
|
||||
struct console {
|
||||
char name[16];
|
||||
void (*write)(struct console *, const char *, unsigned);
|
||||
+ void (*write_atomic)(struct console *, const char *, unsigned);
|
||||
int (*read)(struct console *, char *, unsigned);
|
||||
struct tty_driver *(*device)(struct console *, int *);
|
||||
void (*unblank)(void);
|
||||
@@ -236,4 +237,7 @@ extern void console_init(void);
|
||||
void dummycon_register_output_notifier(struct notifier_block *nb);
|
||||
void dummycon_unregister_output_notifier(struct notifier_block *nb);
|
||||
|
||||
+extern void console_atomic_lock(unsigned int *flags);
|
||||
+extern void console_atomic_unlock(unsigned int flags);
|
||||
+
|
||||
#endif /* _LINUX_CONSOLE_H */
|
||||
--- a/kernel/printk/printk.c
|
||||
+++ b/kernel/printk/printk.c
|
||||
@@ -3044,3 +3044,15 @@ void kmsg_dump_rewind(struct kmsg_dumper
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kmsg_dump_rewind);
|
||||
#endif
|
||||
+
|
||||
+void console_atomic_lock(unsigned int *flags)
|
||||
+{
|
||||
+ prb_lock(&printk_cpulock, flags);
|
||||
+}
|
||||
+EXPORT_SYMBOL(console_atomic_lock);
|
||||
+
|
||||
+void console_atomic_unlock(unsigned int flags)
|
||||
+{
|
||||
+ prb_unlock(&printk_cpulock, flags);
|
||||
+}
|
||||
+EXPORT_SYMBOL(console_atomic_unlock);
|
@ -0,0 +1,272 @@
|
||||
From: John Ogness <john.ogness@linutronix.de>
|
||||
Date: Tue, 12 Feb 2019 15:29:57 +0100
|
||||
Subject: [PATCH 19/25] printk: introduce emergency messages
|
||||
|
||||
Console messages are generally either critical or non-critical.
|
||||
Critical messages are messages such as crashes or sysrq output.
|
||||
Critical messages should never be lost because generally they provide
|
||||
important debugging information.
|
||||
|
||||
Since all console messages are output via a fully preemptible printk
|
||||
kernel thread, it is possible that messages are not output because
|
||||
that thread cannot be scheduled (BUG in scheduler, run-away RT task,
|
||||
etc).
|
||||
|
||||
To allow critical messages to be output independent of the
|
||||
schedulability of the printk task, introduce an emergency mechanism
|
||||
that _immediately_ outputs the message to the consoles. To avoid
|
||||
possible unbounded latency issues, the emergency mechanism only
|
||||
outputs the printk line provided by the caller and ignores any
|
||||
pending messages in the log buffer.
|
||||
|
||||
Critical messages are identified as messages (by default) with log
|
||||
level LOGLEVEL_WARNING or more critical. This is configurable via the
|
||||
kernel option CONSOLE_LOGLEVEL_EMERGENCY.
|
||||
|
||||
Any messages output as emergency messages are skipped by the printk
|
||||
thread on those consoles that output the emergency message.
|
||||
|
||||
In order for a console driver to support emergency messages, the
|
||||
write_atomic function must be implemented by the driver. If not
|
||||
implemented, the emergency messages are handled like all other
|
||||
messages and are printed by the printk thread.
|
||||
|
||||
Signed-off-by: John Ogness <john.ogness@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/printk.h | 2
|
||||
kernel/printk/printk.c | 111 ++++++++++++++++++++++++++++++++++++++++++++++---
|
||||
lib/Kconfig.debug | 17 +++++++
|
||||
3 files changed, 124 insertions(+), 6 deletions(-)
|
||||
|
||||
--- a/include/linux/printk.h
|
||||
+++ b/include/linux/printk.h
|
||||
@@ -58,6 +58,7 @@ static inline const char *printk_skip_he
|
||||
*/
|
||||
#define CONSOLE_LOGLEVEL_DEFAULT CONFIG_CONSOLE_LOGLEVEL_DEFAULT
|
||||
#define CONSOLE_LOGLEVEL_QUIET CONFIG_CONSOLE_LOGLEVEL_QUIET
|
||||
+#define CONSOLE_LOGLEVEL_EMERGENCY CONFIG_CONSOLE_LOGLEVEL_EMERGENCY
|
||||
|
||||
extern int console_printk[];
|
||||
|
||||
@@ -65,6 +66,7 @@ extern int console_printk[];
|
||||
#define default_message_loglevel (console_printk[1])
|
||||
#define minimum_console_loglevel (console_printk[2])
|
||||
#define default_console_loglevel (console_printk[3])
|
||||
+#define emergency_console_loglevel (console_printk[4])
|
||||
|
||||
static inline void console_silent(void)
|
||||
{
|
||||
--- a/kernel/printk/printk.c
|
||||
+++ b/kernel/printk/printk.c
|
||||
@@ -46,6 +46,7 @@
|
||||
#include <linux/ctype.h>
|
||||
#include <linux/uio.h>
|
||||
#include <linux/kthread.h>
|
||||
+#include <linux/clocksource.h>
|
||||
#include <linux/printk_ringbuffer.h>
|
||||
#include <linux/sched/clock.h>
|
||||
#include <linux/sched/debug.h>
|
||||
@@ -62,11 +63,12 @@
|
||||
#include "braille.h"
|
||||
#include "internal.h"
|
||||
|
||||
-int console_printk[4] = {
|
||||
+int console_printk[5] = {
|
||||
CONSOLE_LOGLEVEL_DEFAULT, /* console_loglevel */
|
||||
MESSAGE_LOGLEVEL_DEFAULT, /* default_message_loglevel */
|
||||
CONSOLE_LOGLEVEL_MIN, /* minimum_console_loglevel */
|
||||
CONSOLE_LOGLEVEL_DEFAULT, /* default_console_loglevel */
|
||||
+ CONSOLE_LOGLEVEL_EMERGENCY, /* emergency_console_loglevel */
|
||||
};
|
||||
EXPORT_SYMBOL_GPL(console_printk);
|
||||
|
||||
@@ -498,6 +500,9 @@ static u32 log_next(u32 idx)
|
||||
return idx + msg->len;
|
||||
}
|
||||
|
||||
+static void printk_emergency(char *buffer, int level, u64 ts_nsec, u16 cpu,
|
||||
+ char *text, u16 text_len);
|
||||
+
|
||||
/* insert record into the buffer, discard old ones, update heads */
|
||||
static int log_store(u32 caller_id, int facility, int level,
|
||||
enum log_flags flags, u64 ts_nsec, u16 cpu,
|
||||
@@ -1641,7 +1646,7 @@ static void printk_write_history(struct
|
||||
* The console_lock must be held.
|
||||
*/
|
||||
static void call_console_drivers(u64 seq, const char *ext_text, size_t ext_len,
|
||||
- const char *text, size_t len)
|
||||
+ const char *text, size_t len, int level)
|
||||
{
|
||||
struct console *con;
|
||||
|
||||
@@ -1661,6 +1666,18 @@ static void call_console_drivers(u64 seq
|
||||
con->wrote_history = 1;
|
||||
con->printk_seq = seq - 1;
|
||||
}
|
||||
+ if (con->write_atomic && level < emergency_console_loglevel) {
|
||||
+ /* skip emergency messages, already printed */
|
||||
+ if (con->printk_seq < seq)
|
||||
+ con->printk_seq = seq;
|
||||
+ continue;
|
||||
+ }
|
||||
+ if (con->flags & CON_BOOT) {
|
||||
+ /* skip emergency messages, already printed */
|
||||
+ if (con->printk_seq < seq)
|
||||
+ con->printk_seq = seq;
|
||||
+ continue;
|
||||
+ }
|
||||
if (!con->write)
|
||||
continue;
|
||||
if (!cpu_online(raw_smp_processor_id()) &&
|
||||
@@ -1780,8 +1797,12 @@ asmlinkage int vprintk_emit(int facility
|
||||
|
||||
cpu = raw_smp_processor_id();
|
||||
|
||||
- text = rbuf;
|
||||
- text_len = vscnprintf(text, PRINTK_SPRINT_MAX, fmt, args);
|
||||
+ /*
|
||||
+ * If this turns out to be an emergency message, there
|
||||
+ * may need to be a prefix added. Leave room for it.
|
||||
+ */
|
||||
+ text = rbuf + PREFIX_MAX;
|
||||
+ text_len = vscnprintf(text, PRINTK_SPRINT_MAX - PREFIX_MAX, fmt, args);
|
||||
|
||||
/* strip and flag a trailing newline */
|
||||
if (text_len && text[text_len-1] == '\n') {
|
||||
@@ -1814,6 +1835,14 @@ asmlinkage int vprintk_emit(int facility
|
||||
if (dict)
|
||||
lflags |= LOG_NEWLINE;
|
||||
|
||||
+ /*
|
||||
+ * NOTE:
|
||||
+ * - rbuf points to beginning of allocated buffer
|
||||
+ * - text points to beginning of text
|
||||
+ * - there is room before text for prefix
|
||||
+ */
|
||||
+ printk_emergency(rbuf, level, ts_nsec, cpu, text, text_len);
|
||||
+
|
||||
printed_len = log_store(caller_id, facility, level, lflags, ts_nsec, cpu,
|
||||
dict, dictlen, text, text_len);
|
||||
|
||||
@@ -1906,7 +1935,7 @@ static ssize_t msg_print_ext_body(char *
|
||||
char *dict, size_t dict_len,
|
||||
char *text, size_t text_len) { return 0; }
|
||||
static void call_console_drivers(u64 seq, const char *ext_text, size_t ext_len,
|
||||
- const char *text, size_t len) {}
|
||||
+ const char *text, size_t len, int level) {}
|
||||
static size_t msg_print_text(const struct printk_log *msg, bool syslog,
|
||||
bool time, char *buf, size_t size) { return 0; }
|
||||
static bool suppress_message_printing(int level) { return false; }
|
||||
@@ -2639,7 +2668,7 @@ static int printk_kthread_func(void *dat
|
||||
|
||||
console_lock();
|
||||
call_console_drivers(master_seq, ext_text,
|
||||
- ext_len, text, len);
|
||||
+ ext_len, text, len, msg->level);
|
||||
if (len > 0 || ext_len > 0)
|
||||
printk_delay(msg->level);
|
||||
console_unlock();
|
||||
@@ -3043,6 +3072,76 @@ void kmsg_dump_rewind(struct kmsg_dumper
|
||||
logbuf_unlock_irqrestore(flags);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kmsg_dump_rewind);
|
||||
+
|
||||
+static bool console_can_emergency(int level)
|
||||
+{
|
||||
+ struct console *con;
|
||||
+
|
||||
+ for_each_console(con) {
|
||||
+ if (!(con->flags & CON_ENABLED))
|
||||
+ continue;
|
||||
+ if (con->write_atomic && level < emergency_console_loglevel)
|
||||
+ return true;
|
||||
+ if (con->write && (con->flags & CON_BOOT))
|
||||
+ return true;
|
||||
+ }
|
||||
+ return false;
|
||||
+}
|
||||
+
|
||||
+static void call_emergency_console_drivers(int level, const char *text,
|
||||
+ size_t text_len)
|
||||
+{
|
||||
+ struct console *con;
|
||||
+
|
||||
+ for_each_console(con) {
|
||||
+ if (!(con->flags & CON_ENABLED))
|
||||
+ continue;
|
||||
+ if (con->write_atomic && level < emergency_console_loglevel) {
|
||||
+ con->write_atomic(con, text, text_len);
|
||||
+ continue;
|
||||
+ }
|
||||
+ if (con->write && (con->flags & CON_BOOT)) {
|
||||
+ con->write(con, text, text_len);
|
||||
+ continue;
|
||||
+ }
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+static void printk_emergency(char *buffer, int level, u64 ts_nsec, u16 cpu,
|
||||
+ char *text, u16 text_len)
|
||||
+{
|
||||
+ struct printk_log msg;
|
||||
+ size_t prefix_len;
|
||||
+
|
||||
+ if (!console_can_emergency(level))
|
||||
+ return;
|
||||
+
|
||||
+ msg.level = level;
|
||||
+ msg.ts_nsec = ts_nsec;
|
||||
+ msg.cpu = cpu;
|
||||
+ msg.facility = 0;
|
||||
+
|
||||
+ /* "text" must have PREFIX_MAX preceding bytes available */
|
||||
+
|
||||
+ prefix_len = print_prefix(&msg,
|
||||
+ console_msg_format & MSG_FORMAT_SYSLOG,
|
||||
+ printk_time, buffer);
|
||||
+ /* move the prefix forward to the beginning of the message text */
|
||||
+ text -= prefix_len;
|
||||
+ memmove(text, buffer, prefix_len);
|
||||
+ text_len += prefix_len;
|
||||
+
|
||||
+ text[text_len++] = '\n';
|
||||
+
|
||||
+ call_emergency_console_drivers(level, text, text_len);
|
||||
+
|
||||
+ touch_softlockup_watchdog_sync();
|
||||
+ clocksource_touch_watchdog();
|
||||
+ rcu_cpu_stall_reset();
|
||||
+ touch_nmi_watchdog();
|
||||
+
|
||||
+ printk_delay(level);
|
||||
+}
|
||||
#endif
|
||||
|
||||
void console_atomic_lock(unsigned int *flags)
|
||||
--- a/lib/Kconfig.debug
|
||||
+++ b/lib/Kconfig.debug
|
||||
@@ -61,6 +61,23 @@ config CONSOLE_LOGLEVEL_QUIET
|
||||
will be used as the loglevel. IOW passing "quiet" will be the
|
||||
equivalent of passing "loglevel=<CONSOLE_LOGLEVEL_QUIET>"
|
||||
|
||||
+config CONSOLE_LOGLEVEL_EMERGENCY
|
||||
+ int "Emergency console loglevel (1-15)"
|
||||
+ range 1 15
|
||||
+ default "5"
|
||||
+ help
|
||||
+ The loglevel to determine if a console message is an emergency
|
||||
+ message.
|
||||
+
|
||||
+ If supported by the console driver, emergency messages will be
|
||||
+ flushed to the console immediately. This can cause significant system
|
||||
+ latencies so the value should be set such that only significant
|
||||
+ messages are classified as emergency messages.
|
||||
+
|
||||
+ Setting a default here is equivalent to passing in
|
||||
+ emergency_loglevel=<x> in the kernel bootargs. emergency_loglevel=<x>
|
||||
+ continues to override whatever value is specified here as well.
|
||||
+
|
||||
config MESSAGE_LOGLEVEL_DEFAULT
|
||||
int "Default message log level (1-7)"
|
||||
range 1 7
|
@ -0,0 +1,484 @@
|
||||
From: John Ogness <john.ogness@linutronix.de>
|
||||
Date: Tue, 12 Feb 2019 15:29:58 +0100
|
||||
Subject: [PATCH 20/25] serial: 8250: implement write_atomic
|
||||
|
||||
Implement a non-sleeping NMI-safe write_atomic console function in
|
||||
order to support emergency printk messages.
|
||||
|
||||
Since interrupts need to be disabled during transmit, all usage of
|
||||
the IER register was wrapped with access functions that use the
|
||||
console_atomic_lock function to synchronize register access while
|
||||
tracking the state of the interrupts. This was necessary because
|
||||
write_atomic is can be calling from an NMI context that has
|
||||
preempted write_atomic.
|
||||
|
||||
Signed-off-by: John Ogness <john.ogness@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
drivers/tty/serial/8250/8250.h | 22 +++++
|
||||
drivers/tty/serial/8250/8250_core.c | 19 +++-
|
||||
drivers/tty/serial/8250/8250_dma.c | 4
|
||||
drivers/tty/serial/8250/8250_port.c | 154 ++++++++++++++++++++++++++----------
|
||||
include/linux/serial_8250.h | 5 +
|
||||
5 files changed, 157 insertions(+), 47 deletions(-)
|
||||
|
||||
--- a/drivers/tty/serial/8250/8250.h
|
||||
+++ b/drivers/tty/serial/8250/8250.h
|
||||
@@ -96,6 +96,10 @@ struct serial8250_config {
|
||||
#define SERIAL8250_SHARE_IRQS 0
|
||||
#endif
|
||||
|
||||
+void set_ier(struct uart_8250_port *up, unsigned char ier);
|
||||
+void clear_ier(struct uart_8250_port *up);
|
||||
+void restore_ier(struct uart_8250_port *up);
|
||||
+
|
||||
#define SERIAL8250_PORT_FLAGS(_base, _irq, _flags) \
|
||||
{ \
|
||||
.iobase = _base, \
|
||||
@@ -139,6 +143,15 @@ static inline bool serial8250_set_THRI(s
|
||||
return true;
|
||||
}
|
||||
|
||||
+static inline bool serial8250_set_THRI_sier(struct uart_8250_port *up)
|
||||
+{
|
||||
+ if (up->ier & UART_IER_THRI)
|
||||
+ return false;
|
||||
+ up->ier |= UART_IER_THRI;
|
||||
+ set_ier(up, up->ier);
|
||||
+ return true;
|
||||
+}
|
||||
+
|
||||
static inline bool serial8250_clear_THRI(struct uart_8250_port *up)
|
||||
{
|
||||
if (!(up->ier & UART_IER_THRI))
|
||||
@@ -148,6 +161,15 @@ static inline bool serial8250_clear_THRI
|
||||
return true;
|
||||
}
|
||||
|
||||
+static inline bool serial8250_clear_THRI_sier(struct uart_8250_port *up)
|
||||
+{
|
||||
+ if (!(up->ier & UART_IER_THRI))
|
||||
+ return false;
|
||||
+ up->ier &= ~UART_IER_THRI;
|
||||
+ set_ier(up, up->ier);
|
||||
+ return true;
|
||||
+}
|
||||
+
|
||||
struct uart_8250_port *serial8250_get_port(int line);
|
||||
|
||||
void serial8250_rpm_get(struct uart_8250_port *p);
|
||||
--- a/drivers/tty/serial/8250/8250_core.c
|
||||
+++ b/drivers/tty/serial/8250/8250_core.c
|
||||
@@ -265,7 +265,7 @@ static void serial8250_timeout(struct ti
|
||||
static void serial8250_backup_timeout(struct timer_list *t)
|
||||
{
|
||||
struct uart_8250_port *up = from_timer(up, t, timer);
|
||||
- unsigned int iir, ier = 0, lsr;
|
||||
+ unsigned int iir, lsr;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&up->port.lock, flags);
|
||||
@@ -274,10 +274,8 @@ static void serial8250_backup_timeout(st
|
||||
* Must disable interrupts or else we risk racing with the interrupt
|
||||
* based handler.
|
||||
*/
|
||||
- if (up->port.irq) {
|
||||
- ier = serial_in(up, UART_IER);
|
||||
- serial_out(up, UART_IER, 0);
|
||||
- }
|
||||
+ if (up->port.irq)
|
||||
+ clear_ier(up);
|
||||
|
||||
iir = serial_in(up, UART_IIR);
|
||||
|
||||
@@ -300,7 +298,7 @@ static void serial8250_backup_timeout(st
|
||||
serial8250_tx_chars(up);
|
||||
|
||||
if (up->port.irq)
|
||||
- serial_out(up, UART_IER, ier);
|
||||
+ restore_ier(up);
|
||||
|
||||
spin_unlock_irqrestore(&up->port.lock, flags);
|
||||
|
||||
@@ -578,6 +576,14 @@ serial8250_register_ports(struct uart_dr
|
||||
|
||||
#ifdef CONFIG_SERIAL_8250_CONSOLE
|
||||
|
||||
+static void univ8250_console_write_atomic(struct console *co, const char *s,
|
||||
+ unsigned int count)
|
||||
+{
|
||||
+ struct uart_8250_port *up = &serial8250_ports[co->index];
|
||||
+
|
||||
+ serial8250_console_write_atomic(up, s, count);
|
||||
+}
|
||||
+
|
||||
static void univ8250_console_write(struct console *co, const char *s,
|
||||
unsigned int count)
|
||||
{
|
||||
@@ -663,6 +669,7 @@ static int univ8250_console_match(struct
|
||||
|
||||
static struct console univ8250_console = {
|
||||
.name = "ttyS",
|
||||
+ .write_atomic = univ8250_console_write_atomic,
|
||||
.write = univ8250_console_write,
|
||||
.device = uart_console_device,
|
||||
.setup = univ8250_console_setup,
|
||||
--- a/drivers/tty/serial/8250/8250_dma.c
|
||||
+++ b/drivers/tty/serial/8250/8250_dma.c
|
||||
@@ -35,7 +35,7 @@ static void __dma_tx_complete(void *para
|
||||
|
||||
ret = serial8250_tx_dma(p);
|
||||
if (ret)
|
||||
- serial8250_set_THRI(p);
|
||||
+ serial8250_set_THRI_sier(p);
|
||||
|
||||
spin_unlock_irqrestore(&p->port.lock, flags);
|
||||
}
|
||||
@@ -98,7 +98,7 @@ int serial8250_tx_dma(struct uart_8250_p
|
||||
dma_async_issue_pending(dma->txchan);
|
||||
if (dma->tx_err) {
|
||||
dma->tx_err = 0;
|
||||
- serial8250_clear_THRI(p);
|
||||
+ serial8250_clear_THRI_sier(p);
|
||||
}
|
||||
return 0;
|
||||
err:
|
||||
--- a/drivers/tty/serial/8250/8250_port.c
|
||||
+++ b/drivers/tty/serial/8250/8250_port.c
|
||||
@@ -721,7 +721,7 @@ static void serial8250_set_sleep(struct
|
||||
serial_out(p, UART_EFR, UART_EFR_ECB);
|
||||
serial_out(p, UART_LCR, 0);
|
||||
}
|
||||
- serial_out(p, UART_IER, sleep ? UART_IERX_SLEEP : 0);
|
||||
+ set_ier(p, sleep ? UART_IERX_SLEEP : 0);
|
||||
if (p->capabilities & UART_CAP_EFR) {
|
||||
serial_out(p, UART_LCR, UART_LCR_CONF_MODE_B);
|
||||
serial_out(p, UART_EFR, efr);
|
||||
@@ -1390,7 +1390,7 @@ static void serial8250_stop_rx(struct ua
|
||||
|
||||
up->ier &= ~(UART_IER_RLSI | UART_IER_RDI);
|
||||
up->port.read_status_mask &= ~UART_LSR_DR;
|
||||
- serial_port_out(port, UART_IER, up->ier);
|
||||
+ set_ier(up, up->ier);
|
||||
|
||||
serial8250_rpm_put(up);
|
||||
}
|
||||
@@ -1408,7 +1408,7 @@ static void __do_stop_tx_rs485(struct ua
|
||||
serial8250_clear_and_reinit_fifos(p);
|
||||
|
||||
p->ier |= UART_IER_RLSI | UART_IER_RDI;
|
||||
- serial_port_out(&p->port, UART_IER, p->ier);
|
||||
+ set_ier(p, p->ier);
|
||||
}
|
||||
}
|
||||
static enum hrtimer_restart serial8250_em485_handle_stop_tx(struct hrtimer *t)
|
||||
@@ -1459,7 +1459,7 @@ static void __stop_tx_rs485(struct uart_
|
||||
|
||||
static inline void __do_stop_tx(struct uart_8250_port *p)
|
||||
{
|
||||
- if (serial8250_clear_THRI(p))
|
||||
+ if (serial8250_clear_THRI_sier(p))
|
||||
serial8250_rpm_put_tx(p);
|
||||
}
|
||||
|
||||
@@ -1509,7 +1509,7 @@ static inline void __start_tx(struct uar
|
||||
if (up->dma && !up->dma->tx_dma(up))
|
||||
return;
|
||||
|
||||
- if (serial8250_set_THRI(up)) {
|
||||
+ if (serial8250_set_THRI_sier(up)) {
|
||||
if (up->bugs & UART_BUG_TXEN) {
|
||||
unsigned char lsr;
|
||||
|
||||
@@ -1616,7 +1616,7 @@ static void serial8250_disable_ms(struct
|
||||
mctrl_gpio_disable_ms(up->gpios);
|
||||
|
||||
up->ier &= ~UART_IER_MSI;
|
||||
- serial_port_out(port, UART_IER, up->ier);
|
||||
+ set_ier(up, up->ier);
|
||||
}
|
||||
|
||||
static void serial8250_enable_ms(struct uart_port *port)
|
||||
@@ -1632,7 +1632,7 @@ static void serial8250_enable_ms(struct
|
||||
up->ier |= UART_IER_MSI;
|
||||
|
||||
serial8250_rpm_get(up);
|
||||
- serial_port_out(port, UART_IER, up->ier);
|
||||
+ set_ier(up, up->ier);
|
||||
serial8250_rpm_put(up);
|
||||
}
|
||||
|
||||
@@ -1991,6 +1991,52 @@ static void wait_for_xmitr(struct uart_8
|
||||
}
|
||||
}
|
||||
|
||||
+static atomic_t ier_counter = ATOMIC_INIT(0);
|
||||
+static atomic_t ier_value = ATOMIC_INIT(0);
|
||||
+
|
||||
+void set_ier(struct uart_8250_port *up, unsigned char ier)
|
||||
+{
|
||||
+ struct uart_port *port = &up->port;
|
||||
+ unsigned int flags;
|
||||
+
|
||||
+ console_atomic_lock(&flags);
|
||||
+ if (atomic_read(&ier_counter) > 0)
|
||||
+ atomic_set(&ier_value, ier);
|
||||
+ else
|
||||
+ serial_port_out(port, UART_IER, ier);
|
||||
+ console_atomic_unlock(flags);
|
||||
+}
|
||||
+
|
||||
+void clear_ier(struct uart_8250_port *up)
|
||||
+{
|
||||
+ struct uart_port *port = &up->port;
|
||||
+ unsigned int ier_cleared = 0;
|
||||
+ unsigned int flags;
|
||||
+ unsigned int ier;
|
||||
+
|
||||
+ console_atomic_lock(&flags);
|
||||
+ atomic_inc(&ier_counter);
|
||||
+ ier = serial_port_in(port, UART_IER);
|
||||
+ if (up->capabilities & UART_CAP_UUE)
|
||||
+ ier_cleared = UART_IER_UUE;
|
||||
+ if (ier != ier_cleared) {
|
||||
+ serial_port_out(port, UART_IER, ier_cleared);
|
||||
+ atomic_set(&ier_value, ier);
|
||||
+ }
|
||||
+ console_atomic_unlock(flags);
|
||||
+}
|
||||
+
|
||||
+void restore_ier(struct uart_8250_port *up)
|
||||
+{
|
||||
+ struct uart_port *port = &up->port;
|
||||
+ unsigned int flags;
|
||||
+
|
||||
+ console_atomic_lock(&flags);
|
||||
+ if (atomic_fetch_dec(&ier_counter) == 1)
|
||||
+ serial_port_out(port, UART_IER, atomic_read(&ier_value));
|
||||
+ console_atomic_unlock(flags);
|
||||
+}
|
||||
+
|
||||
#ifdef CONFIG_CONSOLE_POLL
|
||||
/*
|
||||
* Console polling routines for writing and reading from the uart while
|
||||
@@ -2022,18 +2068,10 @@ static int serial8250_get_poll_char(stru
|
||||
static void serial8250_put_poll_char(struct uart_port *port,
|
||||
unsigned char c)
|
||||
{
|
||||
- unsigned int ier;
|
||||
struct uart_8250_port *up = up_to_u8250p(port);
|
||||
|
||||
serial8250_rpm_get(up);
|
||||
- /*
|
||||
- * First save the IER then disable the interrupts
|
||||
- */
|
||||
- ier = serial_port_in(port, UART_IER);
|
||||
- if (up->capabilities & UART_CAP_UUE)
|
||||
- serial_port_out(port, UART_IER, UART_IER_UUE);
|
||||
- else
|
||||
- serial_port_out(port, UART_IER, 0);
|
||||
+ clear_ier(up);
|
||||
|
||||
wait_for_xmitr(up, BOTH_EMPTY);
|
||||
/*
|
||||
@@ -2046,7 +2084,7 @@ static void serial8250_put_poll_char(str
|
||||
* and restore the IER
|
||||
*/
|
||||
wait_for_xmitr(up, BOTH_EMPTY);
|
||||
- serial_port_out(port, UART_IER, ier);
|
||||
+ restore_ier(up);
|
||||
serial8250_rpm_put(up);
|
||||
}
|
||||
|
||||
@@ -2358,7 +2396,7 @@ void serial8250_do_shutdown(struct uart_
|
||||
*/
|
||||
spin_lock_irqsave(&port->lock, flags);
|
||||
up->ier = 0;
|
||||
- serial_port_out(port, UART_IER, 0);
|
||||
+ set_ier(up, 0);
|
||||
spin_unlock_irqrestore(&port->lock, flags);
|
||||
|
||||
synchronize_irq(port->irq);
|
||||
@@ -2643,7 +2681,7 @@ serial8250_do_set_termios(struct uart_po
|
||||
if (up->capabilities & UART_CAP_RTOIE)
|
||||
up->ier |= UART_IER_RTOIE;
|
||||
|
||||
- serial_port_out(port, UART_IER, up->ier);
|
||||
+ set_ier(up, up->ier);
|
||||
|
||||
if (up->capabilities & UART_CAP_EFR) {
|
||||
unsigned char efr = 0;
|
||||
@@ -3107,7 +3145,7 @@ EXPORT_SYMBOL_GPL(serial8250_set_default
|
||||
|
||||
#ifdef CONFIG_SERIAL_8250_CONSOLE
|
||||
|
||||
-static void serial8250_console_putchar(struct uart_port *port, int ch)
|
||||
+static void serial8250_console_putchar_locked(struct uart_port *port, int ch)
|
||||
{
|
||||
struct uart_8250_port *up = up_to_u8250p(port);
|
||||
|
||||
@@ -3115,6 +3153,18 @@ static void serial8250_console_putchar(s
|
||||
serial_port_out(port, UART_TX, ch);
|
||||
}
|
||||
|
||||
+static void serial8250_console_putchar(struct uart_port *port, int ch)
|
||||
+{
|
||||
+ struct uart_8250_port *up = up_to_u8250p(port);
|
||||
+ unsigned int flags;
|
||||
+
|
||||
+ wait_for_xmitr(up, UART_LSR_THRE);
|
||||
+
|
||||
+ console_atomic_lock(&flags);
|
||||
+ serial8250_console_putchar_locked(port, ch);
|
||||
+ console_atomic_unlock(flags);
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* Restore serial console when h/w power-off detected
|
||||
*/
|
||||
@@ -3136,6 +3186,42 @@ static void serial8250_console_restore(s
|
||||
serial8250_out_MCR(up, UART_MCR_DTR | UART_MCR_RTS);
|
||||
}
|
||||
|
||||
+void serial8250_console_write_atomic(struct uart_8250_port *up,
|
||||
+ const char *s, unsigned int count)
|
||||
+{
|
||||
+ struct uart_port *port = &up->port;
|
||||
+ unsigned int flags;
|
||||
+ bool locked;
|
||||
+
|
||||
+ console_atomic_lock(&flags);
|
||||
+
|
||||
+ /*
|
||||
+ * If possible, keep any other CPUs from working with the
|
||||
+ * UART until the atomic message is completed. This helps
|
||||
+ * to keep the output more orderly.
|
||||
+ */
|
||||
+ locked = spin_trylock(&port->lock);
|
||||
+
|
||||
+ touch_nmi_watchdog();
|
||||
+
|
||||
+ clear_ier(up);
|
||||
+
|
||||
+ if (atomic_fetch_inc(&up->console_printing)) {
|
||||
+ uart_console_write(port, "\n", 1,
|
||||
+ serial8250_console_putchar_locked);
|
||||
+ }
|
||||
+ uart_console_write(port, s, count, serial8250_console_putchar_locked);
|
||||
+ atomic_dec(&up->console_printing);
|
||||
+
|
||||
+ wait_for_xmitr(up, BOTH_EMPTY);
|
||||
+ restore_ier(up);
|
||||
+
|
||||
+ if (locked)
|
||||
+ spin_unlock(&port->lock);
|
||||
+
|
||||
+ console_atomic_unlock(flags);
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* Print a string to the serial port trying not to disturb
|
||||
* any possible real use of the port...
|
||||
@@ -3147,27 +3233,13 @@ void serial8250_console_write(struct uar
|
||||
{
|
||||
struct uart_port *port = &up->port;
|
||||
unsigned long flags;
|
||||
- unsigned int ier;
|
||||
- int locked = 1;
|
||||
|
||||
touch_nmi_watchdog();
|
||||
|
||||
serial8250_rpm_get(up);
|
||||
+ spin_lock_irqsave(&port->lock, flags);
|
||||
|
||||
- if (oops_in_progress)
|
||||
- locked = spin_trylock_irqsave(&port->lock, flags);
|
||||
- else
|
||||
- spin_lock_irqsave(&port->lock, flags);
|
||||
-
|
||||
- /*
|
||||
- * First save the IER then disable the interrupts
|
||||
- */
|
||||
- ier = serial_port_in(port, UART_IER);
|
||||
-
|
||||
- if (up->capabilities & UART_CAP_UUE)
|
||||
- serial_port_out(port, UART_IER, UART_IER_UUE);
|
||||
- else
|
||||
- serial_port_out(port, UART_IER, 0);
|
||||
+ clear_ier(up);
|
||||
|
||||
/* check scratch reg to see if port powered off during system sleep */
|
||||
if (up->canary && (up->canary != serial_port_in(port, UART_SCR))) {
|
||||
@@ -3175,14 +3247,16 @@ void serial8250_console_write(struct uar
|
||||
up->canary = 0;
|
||||
}
|
||||
|
||||
+ atomic_inc(&up->console_printing);
|
||||
uart_console_write(port, s, count, serial8250_console_putchar);
|
||||
+ atomic_dec(&up->console_printing);
|
||||
|
||||
/*
|
||||
* Finally, wait for transmitter to become empty
|
||||
* and restore the IER
|
||||
*/
|
||||
wait_for_xmitr(up, BOTH_EMPTY);
|
||||
- serial_port_out(port, UART_IER, ier);
|
||||
+ restore_ier(up);
|
||||
|
||||
/*
|
||||
* The receive handling will happen properly because the
|
||||
@@ -3194,8 +3268,7 @@ void serial8250_console_write(struct uar
|
||||
if (up->msr_saved_flags)
|
||||
serial8250_modem_status(up);
|
||||
|
||||
- if (locked)
|
||||
- spin_unlock_irqrestore(&port->lock, flags);
|
||||
+ spin_unlock_irqrestore(&port->lock, flags);
|
||||
serial8250_rpm_put(up);
|
||||
}
|
||||
|
||||
@@ -3216,6 +3289,7 @@ static unsigned int probe_baud(struct ua
|
||||
|
||||
int serial8250_console_setup(struct uart_port *port, char *options, bool probe)
|
||||
{
|
||||
+ struct uart_8250_port *up = up_to_u8250p(port);
|
||||
int baud = 9600;
|
||||
int bits = 8;
|
||||
int parity = 'n';
|
||||
@@ -3224,6 +3298,8 @@ int serial8250_console_setup(struct uart
|
||||
if (!port->iobase && !port->membase)
|
||||
return -ENODEV;
|
||||
|
||||
+ atomic_set(&up->console_printing, 0);
|
||||
+
|
||||
if (options)
|
||||
uart_parse_options(options, &baud, &parity, &bits, &flow);
|
||||
else if (probe)
|
||||
--- a/include/linux/serial_8250.h
|
||||
+++ b/include/linux/serial_8250.h
|
||||
@@ -7,6 +7,7 @@
|
||||
#ifndef _LINUX_SERIAL_8250_H
|
||||
#define _LINUX_SERIAL_8250_H
|
||||
|
||||
+#include <linux/atomic.h>
|
||||
#include <linux/serial_core.h>
|
||||
#include <linux/serial_reg.h>
|
||||
#include <linux/platform_device.h>
|
||||
@@ -123,6 +124,8 @@ struct uart_8250_port {
|
||||
#define MSR_SAVE_FLAGS UART_MSR_ANY_DELTA
|
||||
unsigned char msr_saved_flags;
|
||||
|
||||
+ atomic_t console_printing;
|
||||
+
|
||||
struct uart_8250_dma *dma;
|
||||
const struct uart_8250_ops *ops;
|
||||
|
||||
@@ -174,6 +177,8 @@ void serial8250_init_port(struct uart_82
|
||||
void serial8250_set_defaults(struct uart_8250_port *up);
|
||||
void serial8250_console_write(struct uart_8250_port *up, const char *s,
|
||||
unsigned int count);
|
||||
+void serial8250_console_write_atomic(struct uart_8250_port *up, const char *s,
|
||||
+ unsigned int count);
|
||||
int serial8250_console_setup(struct uart_port *port, char *options, bool probe);
|
||||
|
||||
extern void serial8250_set_isa_configurator(void (*v)
|
@ -0,0 +1,132 @@
|
||||
From: John Ogness <john.ogness@linutronix.de>
|
||||
Date: Tue, 12 Feb 2019 15:29:59 +0100
|
||||
Subject: [PATCH 21/25] printk: implement KERN_CONT
|
||||
|
||||
Implement KERN_CONT based on the printing CPU rather than on the
|
||||
printing task. As long as the KERN_CONT messages are coming from the
|
||||
same CPU and no non-KERN_CONT messages come, the messages are assumed
|
||||
to belong to each other.
|
||||
|
||||
Signed-off-by: John Ogness <john.ogness@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
kernel/printk/printk.c | 65 +++++++++++++++++++++++++++----------------------
|
||||
1 file changed, 37 insertions(+), 28 deletions(-)
|
||||
|
||||
--- a/kernel/printk/printk.c
|
||||
+++ b/kernel/printk/printk.c
|
||||
@@ -1709,8 +1709,6 @@ static inline u32 printk_caller_id(void)
|
||||
0x80000000 + raw_smp_processor_id();
|
||||
}
|
||||
|
||||
-/* FIXME: no support for LOG_CONT */
|
||||
-#if 0
|
||||
/*
|
||||
* Continuation lines are buffered, and not committed to the record buffer
|
||||
* until the line is complete, or a race forces it. The line fragments
|
||||
@@ -1721,52 +1719,55 @@ static struct cont {
|
||||
char buf[LOG_LINE_MAX];
|
||||
size_t len; /* length == 0 means unused buffer */
|
||||
u32 caller_id; /* printk_caller_id() of first print */
|
||||
+ int cpu_owner; /* cpu of first print */
|
||||
u64 ts_nsec; /* time of first print */
|
||||
u8 level; /* log level of first message */
|
||||
u8 facility; /* log facility of first message */
|
||||
enum log_flags flags; /* prefix, newline flags */
|
||||
-} cont;
|
||||
+} cont[2];
|
||||
|
||||
-static void cont_flush(void)
|
||||
+static void cont_flush(int ctx)
|
||||
{
|
||||
- if (cont.len == 0)
|
||||
+ struct cont *c = &cont[ctx];
|
||||
+
|
||||
+ if (c->len == 0)
|
||||
return;
|
||||
|
||||
- log_store(cont.caller_id, cont.facility, cont.level, cont.flags,
|
||||
- cont.ts_nsec, NULL, 0, cont.buf, cont.len);
|
||||
- cont.len = 0;
|
||||
+ log_store(c->caller_id, c->facility, c->level, c->flags,
|
||||
+ c->ts_nsec, c->cpu_owner, NULL, 0, c->buf, c->len);
|
||||
+ c->len = 0;
|
||||
}
|
||||
|
||||
-static bool cont_add(u32 caller_id, int facility, int level,
|
||||
+static void cont_add(int ctx, int cpu, u32 caller_id, int facility, int level,
|
||||
enum log_flags flags, const char *text, size_t len)
|
||||
{
|
||||
+ struct cont *c = &cont[ctx];
|
||||
+
|
||||
+ if (cpu != c->cpu_owner || !(flags & LOG_CONT))
|
||||
+ cont_flush(ctx);
|
||||
+
|
||||
/* If the line gets too long, split it up in separate records. */
|
||||
- if (cont.len + len > sizeof(cont.buf)) {
|
||||
- cont_flush();
|
||||
- return false;
|
||||
- }
|
||||
+ while (c->len + len > sizeof(c->buf))
|
||||
+ cont_flush(ctx);
|
||||
|
||||
- if (!cont.len) {
|
||||
- cont.facility = facility;
|
||||
- cont.level = level;
|
||||
- cont.caller_id = caller_id;
|
||||
- cont.ts_nsec = local_clock();
|
||||
- cont.flags = flags;
|
||||
+ if (!c->len) {
|
||||
+ c->facility = facility;
|
||||
+ c->level = level;
|
||||
+ c->caller_id = caller_id;
|
||||
+ c->ts_nsec = local_clock();
|
||||
+ c->flags = flags;
|
||||
+ c->cpu_owner = cpu;
|
||||
}
|
||||
|
||||
- memcpy(cont.buf + cont.len, text, len);
|
||||
- cont.len += len;
|
||||
+ memcpy(c->buf + c->len, text, len);
|
||||
+ c->len += len;
|
||||
|
||||
// The original flags come from the first line,
|
||||
// but later continuations can add a newline.
|
||||
if (flags & LOG_NEWLINE) {
|
||||
- cont.flags |= LOG_NEWLINE;
|
||||
- cont_flush();
|
||||
+ c->flags |= LOG_NEWLINE;
|
||||
}
|
||||
-
|
||||
- return true;
|
||||
}
|
||||
-#endif /* 0 */
|
||||
|
||||
/* ring buffer used as memory allocator for temporary sprint buffers */
|
||||
DECLARE_STATIC_PRINTKRB(sprint_rb,
|
||||
@@ -1778,6 +1779,7 @@ asmlinkage int vprintk_emit(int facility
|
||||
const char *fmt, va_list args)
|
||||
{
|
||||
const u32 caller_id = printk_caller_id();
|
||||
+ int ctx = !!in_nmi();
|
||||
enum log_flags lflags = 0;
|
||||
int printed_len = 0;
|
||||
struct prb_handle h;
|
||||
@@ -1843,8 +1845,15 @@ asmlinkage int vprintk_emit(int facility
|
||||
*/
|
||||
printk_emergency(rbuf, level, ts_nsec, cpu, text, text_len);
|
||||
|
||||
- printed_len = log_store(caller_id, facility, level, lflags, ts_nsec, cpu,
|
||||
- dict, dictlen, text, text_len);
|
||||
+ if ((lflags & LOG_CONT) || !(lflags & LOG_NEWLINE)) {
|
||||
+ cont_add(ctx, cpu, caller_id, facility, level, lflags, text, text_len);
|
||||
+ printed_len = text_len;
|
||||
+ } else {
|
||||
+ if (cpu == cont[ctx].cpu_owner)
|
||||
+ cont_flush(ctx);
|
||||
+ printed_len = log_store(caller_id, facility, level, lflags, ts_nsec, cpu,
|
||||
+ dict, dictlen, text, text_len);
|
||||
+ }
|
||||
|
||||
prb_commit(&h);
|
||||
return printed_len;
|
@ -0,0 +1,304 @@
|
||||
From: John Ogness <john.ogness@linutronix.de>
|
||||
Date: Tue, 12 Feb 2019 15:30:00 +0100
|
||||
Subject: [PATCH 22/25] printk: implement /dev/kmsg
|
||||
|
||||
Since printk messages are now logged to a new ring buffer, update
|
||||
the /dev/kmsg functions to pull the messages from there.
|
||||
|
||||
Signed-off-by: John Ogness <john.ogness@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
fs/proc/kmsg.c | 4 -
|
||||
include/linux/printk.h | 1
|
||||
kernel/printk/printk.c | 162 +++++++++++++++++++++++++++++++++----------------
|
||||
3 files changed, 113 insertions(+), 54 deletions(-)
|
||||
|
||||
--- a/fs/proc/kmsg.c
|
||||
+++ b/fs/proc/kmsg.c
|
||||
@@ -18,8 +18,6 @@
|
||||
#include <linux/uaccess.h>
|
||||
#include <asm/io.h>
|
||||
|
||||
-extern wait_queue_head_t log_wait;
|
||||
-
|
||||
static int kmsg_open(struct inode * inode, struct file * file)
|
||||
{
|
||||
return do_syslog(SYSLOG_ACTION_OPEN, NULL, 0, SYSLOG_FROM_PROC);
|
||||
@@ -42,7 +40,7 @@ static ssize_t kmsg_read(struct file *fi
|
||||
|
||||
static __poll_t kmsg_poll(struct file *file, poll_table *wait)
|
||||
{
|
||||
- poll_wait(file, &log_wait, wait);
|
||||
+ poll_wait(file, printk_wait_queue(), wait);
|
||||
if (do_syslog(SYSLOG_ACTION_SIZE_UNREAD, NULL, 0, SYSLOG_FROM_PROC))
|
||||
return EPOLLIN | EPOLLRDNORM;
|
||||
return 0;
|
||||
--- a/include/linux/printk.h
|
||||
+++ b/include/linux/printk.h
|
||||
@@ -192,6 +192,7 @@ void __init setup_log_buf(int early);
|
||||
void dump_stack_print_info(const char *log_lvl);
|
||||
void show_regs_print_info(const char *log_lvl);
|
||||
extern asmlinkage void dump_stack(void) __cold;
|
||||
+struct wait_queue_head *printk_wait_queue(void);
|
||||
#else
|
||||
static inline __printf(1, 0)
|
||||
int vprintk(const char *s, va_list args)
|
||||
--- a/kernel/printk/printk.c
|
||||
+++ b/kernel/printk/printk.c
|
||||
@@ -673,10 +673,11 @@ static ssize_t msg_print_ext_body(char *
|
||||
/* /dev/kmsg - userspace message inject/listen interface */
|
||||
struct devkmsg_user {
|
||||
u64 seq;
|
||||
- u32 idx;
|
||||
+ struct prb_iterator iter;
|
||||
struct ratelimit_state rs;
|
||||
struct mutex lock;
|
||||
char buf[CONSOLE_EXT_LOG_MAX];
|
||||
+ char msgbuf[PRINTK_RECORD_MAX];
|
||||
};
|
||||
|
||||
static __printf(3, 4) __cold
|
||||
@@ -759,9 +760,11 @@ static ssize_t devkmsg_read(struct file
|
||||
size_t count, loff_t *ppos)
|
||||
{
|
||||
struct devkmsg_user *user = file->private_data;
|
||||
+ struct prb_iterator backup_iter;
|
||||
struct printk_log *msg;
|
||||
- size_t len;
|
||||
ssize_t ret;
|
||||
+ size_t len;
|
||||
+ u64 seq;
|
||||
|
||||
if (!user)
|
||||
return -EBADF;
|
||||
@@ -770,52 +773,67 @@ static ssize_t devkmsg_read(struct file
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
- logbuf_lock_irq();
|
||||
- while (user->seq == log_next_seq) {
|
||||
- if (file->f_flags & O_NONBLOCK) {
|
||||
- ret = -EAGAIN;
|
||||
- logbuf_unlock_irq();
|
||||
- goto out;
|
||||
- }
|
||||
+ /* make a backup copy in case there is a problem */
|
||||
+ prb_iter_copy(&backup_iter, &user->iter);
|
||||
|
||||
- logbuf_unlock_irq();
|
||||
- ret = wait_event_interruptible(log_wait,
|
||||
- user->seq != log_next_seq);
|
||||
- if (ret)
|
||||
- goto out;
|
||||
- logbuf_lock_irq();
|
||||
+ if (file->f_flags & O_NONBLOCK) {
|
||||
+ ret = prb_iter_next(&user->iter, &user->msgbuf[0],
|
||||
+ sizeof(user->msgbuf), &seq);
|
||||
+ } else {
|
||||
+ ret = prb_iter_wait_next(&user->iter, &user->msgbuf[0],
|
||||
+ sizeof(user->msgbuf), &seq);
|
||||
}
|
||||
-
|
||||
- if (user->seq < log_first_seq) {
|
||||
- /* our last seen message is gone, return error and reset */
|
||||
- user->idx = log_first_idx;
|
||||
- user->seq = log_first_seq;
|
||||
+ if (ret == 0) {
|
||||
+ /* end of list */
|
||||
+ ret = -EAGAIN;
|
||||
+ goto out;
|
||||
+ } else if (ret == -EINVAL) {
|
||||
+ /* iterator invalid, return error and reset */
|
||||
ret = -EPIPE;
|
||||
- logbuf_unlock_irq();
|
||||
+ prb_iter_init(&user->iter, &printk_rb, &user->seq);
|
||||
+ goto out;
|
||||
+ } else if (ret < 0) {
|
||||
+ /* interrupted by signal */
|
||||
goto out;
|
||||
}
|
||||
|
||||
- msg = log_from_idx(user->idx);
|
||||
+ if (user->seq == 0) {
|
||||
+ user->seq = seq;
|
||||
+ } else {
|
||||
+ user->seq++;
|
||||
+ if (user->seq < seq) {
|
||||
+ ret = -EPIPE;
|
||||
+ goto restore_out;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ msg = (struct printk_log *)&user->msgbuf[0];
|
||||
len = msg_print_ext_header(user->buf, sizeof(user->buf),
|
||||
msg, user->seq);
|
||||
len += msg_print_ext_body(user->buf + len, sizeof(user->buf) - len,
|
||||
log_dict(msg), msg->dict_len,
|
||||
log_text(msg), msg->text_len);
|
||||
|
||||
- user->idx = log_next(user->idx);
|
||||
- user->seq++;
|
||||
- logbuf_unlock_irq();
|
||||
-
|
||||
if (len > count) {
|
||||
ret = -EINVAL;
|
||||
- goto out;
|
||||
+ goto restore_out;
|
||||
}
|
||||
|
||||
if (copy_to_user(buf, user->buf, len)) {
|
||||
ret = -EFAULT;
|
||||
- goto out;
|
||||
+ goto restore_out;
|
||||
}
|
||||
+
|
||||
ret = len;
|
||||
+ goto out;
|
||||
+restore_out:
|
||||
+ /*
|
||||
+ * There was an error, but this message should not be
|
||||
+ * lost because of it. Restore the backup and setup
|
||||
+ * seq so that it will work with the next read.
|
||||
+ */
|
||||
+ prb_iter_copy(&user->iter, &backup_iter);
|
||||
+ user->seq = seq - 1;
|
||||
out:
|
||||
mutex_unlock(&user->lock);
|
||||
return ret;
|
||||
@@ -824,19 +842,21 @@ static ssize_t devkmsg_read(struct file
|
||||
static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence)
|
||||
{
|
||||
struct devkmsg_user *user = file->private_data;
|
||||
- loff_t ret = 0;
|
||||
+ loff_t ret;
|
||||
|
||||
if (!user)
|
||||
return -EBADF;
|
||||
if (offset)
|
||||
return -ESPIPE;
|
||||
|
||||
- logbuf_lock_irq();
|
||||
+ ret = mutex_lock_interruptible(&user->lock);
|
||||
+ if (ret)
|
||||
+ return ret;
|
||||
+
|
||||
switch (whence) {
|
||||
case SEEK_SET:
|
||||
/* the first record */
|
||||
- user->idx = log_first_idx;
|
||||
- user->seq = log_first_seq;
|
||||
+ prb_iter_init(&user->iter, &printk_rb, &user->seq);
|
||||
break;
|
||||
case SEEK_DATA:
|
||||
/*
|
||||
@@ -844,40 +864,83 @@ static loff_t devkmsg_llseek(struct file
|
||||
* like issued by 'dmesg -c'. Reading /dev/kmsg itself
|
||||
* changes no global state, and does not clear anything.
|
||||
*/
|
||||
- user->idx = clear_idx;
|
||||
- user->seq = clear_seq;
|
||||
+ for (;;) {
|
||||
+ prb_iter_init(&user->iter, &printk_rb, NULL);
|
||||
+ ret = prb_iter_seek(&user->iter, clear_seq);
|
||||
+ if (ret > 0) {
|
||||
+ /* seeked to clear seq */
|
||||
+ user->seq = clear_seq;
|
||||
+ break;
|
||||
+ } else if (ret == 0) {
|
||||
+ /*
|
||||
+ * The end of the list was hit without
|
||||
+ * ever seeing the clear seq. Just
|
||||
+ * seek to the beginning of the list.
|
||||
+ */
|
||||
+ prb_iter_init(&user->iter, &printk_rb,
|
||||
+ &user->seq);
|
||||
+ break;
|
||||
+ }
|
||||
+ /* iterator invalid, start over */
|
||||
+ }
|
||||
+ ret = 0;
|
||||
break;
|
||||
case SEEK_END:
|
||||
/* after the last record */
|
||||
- user->idx = log_next_idx;
|
||||
- user->seq = log_next_seq;
|
||||
+ for (;;) {
|
||||
+ ret = prb_iter_next(&user->iter, NULL, 0, &user->seq);
|
||||
+ if (ret == 0)
|
||||
+ break;
|
||||
+ else if (ret > 0)
|
||||
+ continue;
|
||||
+ /* iterator invalid, start over */
|
||||
+ prb_iter_init(&user->iter, &printk_rb, &user->seq);
|
||||
+ }
|
||||
+ ret = 0;
|
||||
break;
|
||||
default:
|
||||
ret = -EINVAL;
|
||||
}
|
||||
- logbuf_unlock_irq();
|
||||
+
|
||||
+ mutex_unlock(&user->lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
+struct wait_queue_head *printk_wait_queue(void)
|
||||
+{
|
||||
+ /* FIXME: using prb internals! */
|
||||
+ return printk_rb.wq;
|
||||
+}
|
||||
+
|
||||
static __poll_t devkmsg_poll(struct file *file, poll_table *wait)
|
||||
{
|
||||
struct devkmsg_user *user = file->private_data;
|
||||
+ struct prb_iterator iter;
|
||||
__poll_t ret = 0;
|
||||
+ int rbret;
|
||||
+ u64 seq;
|
||||
|
||||
if (!user)
|
||||
return EPOLLERR|EPOLLNVAL;
|
||||
|
||||
- poll_wait(file, &log_wait, wait);
|
||||
+ poll_wait(file, printk_wait_queue(), wait);
|
||||
|
||||
- logbuf_lock_irq();
|
||||
- if (user->seq < log_next_seq) {
|
||||
- /* return error when data has vanished underneath us */
|
||||
- if (user->seq < log_first_seq)
|
||||
- ret = EPOLLIN|EPOLLRDNORM|EPOLLERR|EPOLLPRI;
|
||||
- else
|
||||
- ret = EPOLLIN|EPOLLRDNORM;
|
||||
- }
|
||||
- logbuf_unlock_irq();
|
||||
+ mutex_lock(&user->lock);
|
||||
+
|
||||
+ /* use copy so no actual iteration takes place */
|
||||
+ prb_iter_copy(&iter, &user->iter);
|
||||
+
|
||||
+ rbret = prb_iter_next(&iter, &user->msgbuf[0],
|
||||
+ sizeof(user->msgbuf), &seq);
|
||||
+ if (rbret == 0)
|
||||
+ goto out;
|
||||
+
|
||||
+ ret = EPOLLIN|EPOLLRDNORM;
|
||||
+
|
||||
+ if (rbret < 0 || (seq - user->seq) != 1)
|
||||
+ ret |= EPOLLERR|EPOLLPRI;
|
||||
+out:
|
||||
+ mutex_unlock(&user->lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -907,10 +970,7 @@ static int devkmsg_open(struct inode *in
|
||||
|
||||
mutex_init(&user->lock);
|
||||
|
||||
- logbuf_lock_irq();
|
||||
- user->idx = log_first_idx;
|
||||
- user->seq = log_first_seq;
|
||||
- logbuf_unlock_irq();
|
||||
+ prb_iter_init(&user->iter, &printk_rb, &user->seq);
|
||||
|
||||
file->private_data = user;
|
||||
return 0;
|
493
kernel/patches-5.4.x-rt/0040-0023-printk-implement-syslog.patch
Normal file
493
kernel/patches-5.4.x-rt/0040-0023-printk-implement-syslog.patch
Normal file
@ -0,0 +1,493 @@
|
||||
From: John Ogness <john.ogness@linutronix.de>
|
||||
Date: Tue, 12 Feb 2019 15:30:01 +0100
|
||||
Subject: [PATCH 23/25] printk: implement syslog
|
||||
|
||||
Since printk messages are now logged to a new ring buffer, update
|
||||
the syslog functions to pull the messages from there.
|
||||
|
||||
Signed-off-by: John Ogness <john.ogness@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
kernel/printk/printk.c | 342 +++++++++++++++++++++++++++++++++----------------
|
||||
1 file changed, 236 insertions(+), 106 deletions(-)
|
||||
|
||||
--- a/kernel/printk/printk.c
|
||||
+++ b/kernel/printk/printk.c
|
||||
@@ -407,10 +407,12 @@ DECLARE_STATIC_PRINTKRB_CPULOCK(printk_c
|
||||
/* record buffer */
|
||||
DECLARE_STATIC_PRINTKRB(printk_rb, CONFIG_LOG_BUF_SHIFT, &printk_cpulock);
|
||||
|
||||
+static DEFINE_MUTEX(syslog_lock);
|
||||
+DECLARE_STATIC_PRINTKRB_ITER(syslog_iter, &printk_rb);
|
||||
+
|
||||
DECLARE_WAIT_QUEUE_HEAD(log_wait);
|
||||
/* the next printk record to read by syslog(READ) or /proc/kmsg */
|
||||
static u64 syslog_seq;
|
||||
-static u32 syslog_idx;
|
||||
static size_t syslog_partial;
|
||||
static bool syslog_time;
|
||||
|
||||
@@ -1303,30 +1305,42 @@ static size_t msg_print_text(const struc
|
||||
return len;
|
||||
}
|
||||
|
||||
-static int syslog_print(char __user *buf, int size)
|
||||
+static int syslog_print(char __user *buf, int size, char *text,
|
||||
+ char *msgbuf, int *locked)
|
||||
{
|
||||
- char *text;
|
||||
+ struct prb_iterator iter;
|
||||
struct printk_log *msg;
|
||||
int len = 0;
|
||||
-
|
||||
- text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL);
|
||||
- if (!text)
|
||||
- return -ENOMEM;
|
||||
+ u64 seq;
|
||||
+ int ret;
|
||||
|
||||
while (size > 0) {
|
||||
size_t n;
|
||||
size_t skip;
|
||||
|
||||
- logbuf_lock_irq();
|
||||
- if (syslog_seq < log_first_seq) {
|
||||
- /* messages are gone, move to first one */
|
||||
- syslog_seq = log_first_seq;
|
||||
- syslog_idx = log_first_idx;
|
||||
- syslog_partial = 0;
|
||||
+ for (;;) {
|
||||
+ prb_iter_copy(&iter, &syslog_iter);
|
||||
+ ret = prb_iter_next(&iter, msgbuf,
|
||||
+ PRINTK_RECORD_MAX, &seq);
|
||||
+ if (ret < 0) {
|
||||
+ /* messages are gone, move to first one */
|
||||
+ prb_iter_init(&syslog_iter, &printk_rb,
|
||||
+ &syslog_seq);
|
||||
+ syslog_partial = 0;
|
||||
+ continue;
|
||||
+ }
|
||||
+ break;
|
||||
}
|
||||
- if (syslog_seq == log_next_seq) {
|
||||
- logbuf_unlock_irq();
|
||||
+ if (ret == 0)
|
||||
break;
|
||||
+
|
||||
+ /*
|
||||
+ * If messages have been missed, the partial tracker
|
||||
+ * is no longer valid and must be reset.
|
||||
+ */
|
||||
+ if (syslog_seq > 0 && seq - 1 != syslog_seq) {
|
||||
+ syslog_seq = seq - 1;
|
||||
+ syslog_partial = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1336,131 +1350,212 @@ static int syslog_print(char __user *buf
|
||||
if (!syslog_partial)
|
||||
syslog_time = printk_time;
|
||||
|
||||
+ msg = (struct printk_log *)msgbuf;
|
||||
+
|
||||
skip = syslog_partial;
|
||||
- msg = log_from_idx(syslog_idx);
|
||||
n = msg_print_text(msg, true, syslog_time, text,
|
||||
- LOG_LINE_MAX + PREFIX_MAX);
|
||||
+ PRINTK_SPRINT_MAX);
|
||||
if (n - syslog_partial <= size) {
|
||||
/* message fits into buffer, move forward */
|
||||
- syslog_idx = log_next(syslog_idx);
|
||||
- syslog_seq++;
|
||||
+ prb_iter_next(&syslog_iter, NULL, 0, &syslog_seq);
|
||||
n -= syslog_partial;
|
||||
syslog_partial = 0;
|
||||
- } else if (!len){
|
||||
+ } else if (!len) {
|
||||
/* partial read(), remember position */
|
||||
n = size;
|
||||
syslog_partial += n;
|
||||
} else
|
||||
n = 0;
|
||||
- logbuf_unlock_irq();
|
||||
|
||||
if (!n)
|
||||
break;
|
||||
|
||||
+ mutex_unlock(&syslog_lock);
|
||||
if (copy_to_user(buf, text + skip, n)) {
|
||||
if (!len)
|
||||
len = -EFAULT;
|
||||
+ *locked = 0;
|
||||
break;
|
||||
}
|
||||
+ ret = mutex_lock_interruptible(&syslog_lock);
|
||||
|
||||
len += n;
|
||||
size -= n;
|
||||
buf += n;
|
||||
+
|
||||
+ if (ret) {
|
||||
+ if (!len)
|
||||
+ len = ret;
|
||||
+ *locked = 0;
|
||||
+ break;
|
||||
+ }
|
||||
}
|
||||
|
||||
- kfree(text);
|
||||
return len;
|
||||
}
|
||||
|
||||
-static int syslog_print_all(char __user *buf, int size, bool clear)
|
||||
+static int count_remaining(struct prb_iterator *iter, u64 until_seq,
|
||||
+ char *msgbuf, int size, bool records, bool time)
|
||||
{
|
||||
- char *text;
|
||||
+ struct prb_iterator local_iter;
|
||||
+ struct printk_log *msg;
|
||||
int len = 0;
|
||||
- u64 next_seq;
|
||||
u64 seq;
|
||||
- u32 idx;
|
||||
+ int ret;
|
||||
+
|
||||
+ prb_iter_copy(&local_iter, iter);
|
||||
+ for (;;) {
|
||||
+ ret = prb_iter_next(&local_iter, msgbuf, size, &seq);
|
||||
+ if (ret == 0) {
|
||||
+ break;
|
||||
+ } else if (ret < 0) {
|
||||
+ /* the iter is invalid, restart from head */
|
||||
+ prb_iter_init(&local_iter, &printk_rb, NULL);
|
||||
+ len = 0;
|
||||
+ continue;
|
||||
+ }
|
||||
+
|
||||
+ if (until_seq && seq >= until_seq)
|
||||
+ break;
|
||||
+
|
||||
+ if (records) {
|
||||
+ len++;
|
||||
+ } else {
|
||||
+ msg = (struct printk_log *)msgbuf;
|
||||
+ len += msg_print_text(msg, true, time, NULL, 0);
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ return len;
|
||||
+}
|
||||
+
|
||||
+static void syslog_clear(void)
|
||||
+{
|
||||
+ struct prb_iterator iter;
|
||||
+ int ret;
|
||||
+
|
||||
+ prb_iter_init(&iter, &printk_rb, &clear_seq);
|
||||
+ for (;;) {
|
||||
+ ret = prb_iter_next(&iter, NULL, 0, &clear_seq);
|
||||
+ if (ret == 0)
|
||||
+ break;
|
||||
+ else if (ret < 0)
|
||||
+ prb_iter_init(&iter, &printk_rb, &clear_seq);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+static int syslog_print_all(char __user *buf, int size, bool clear)
|
||||
+{
|
||||
+ struct prb_iterator iter;
|
||||
+ struct printk_log *msg;
|
||||
+ char *msgbuf = NULL;
|
||||
+ char *text = NULL;
|
||||
+ int textlen;
|
||||
+ u64 seq = 0;
|
||||
+ int len = 0;
|
||||
bool time;
|
||||
+ int ret;
|
||||
|
||||
- text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL);
|
||||
+ text = kmalloc(PRINTK_SPRINT_MAX, GFP_KERNEL);
|
||||
if (!text)
|
||||
return -ENOMEM;
|
||||
+ msgbuf = kmalloc(PRINTK_RECORD_MAX, GFP_KERNEL);
|
||||
+ if (!msgbuf) {
|
||||
+ kfree(text);
|
||||
+ return -ENOMEM;
|
||||
+ }
|
||||
|
||||
time = printk_time;
|
||||
- logbuf_lock_irq();
|
||||
+
|
||||
/*
|
||||
- * Find first record that fits, including all following records,
|
||||
- * into the user-provided buffer for this dump.
|
||||
+ * Setup iter to last event before clear. Clear may
|
||||
+ * be lost, but keep going with a best effort.
|
||||
*/
|
||||
- seq = clear_seq;
|
||||
- idx = clear_idx;
|
||||
- while (seq < log_next_seq) {
|
||||
- struct printk_log *msg = log_from_idx(idx);
|
||||
-
|
||||
- len += msg_print_text(msg, true, time, NULL, 0);
|
||||
- idx = log_next(idx);
|
||||
- seq++;
|
||||
- }
|
||||
+ prb_iter_init(&iter, &printk_rb, NULL);
|
||||
+ prb_iter_seek(&iter, clear_seq);
|
||||
|
||||
- /* move first record forward until length fits into the buffer */
|
||||
- seq = clear_seq;
|
||||
- idx = clear_idx;
|
||||
- while (len > size && seq < log_next_seq) {
|
||||
- struct printk_log *msg = log_from_idx(idx);
|
||||
+ /* count the total bytes after clear */
|
||||
+ len = count_remaining(&iter, 0, msgbuf, PRINTK_RECORD_MAX,
|
||||
+ false, time);
|
||||
+
|
||||
+ /* move iter forward until length fits into the buffer */
|
||||
+ while (len > size) {
|
||||
+ ret = prb_iter_next(&iter, msgbuf,
|
||||
+ PRINTK_RECORD_MAX, &seq);
|
||||
+ if (ret == 0) {
|
||||
+ break;
|
||||
+ } else if (ret < 0) {
|
||||
+ /*
|
||||
+ * The iter is now invalid so clear will
|
||||
+ * also be invalid. Restart from the head.
|
||||
+ */
|
||||
+ prb_iter_init(&iter, &printk_rb, NULL);
|
||||
+ len = count_remaining(&iter, 0, msgbuf,
|
||||
+ PRINTK_RECORD_MAX, false, time);
|
||||
+ continue;
|
||||
+ }
|
||||
|
||||
+ msg = (struct printk_log *)msgbuf;
|
||||
len -= msg_print_text(msg, true, time, NULL, 0);
|
||||
- idx = log_next(idx);
|
||||
- seq++;
|
||||
- }
|
||||
|
||||
- /* last message fitting into this dump */
|
||||
- next_seq = log_next_seq;
|
||||
+ if (clear)
|
||||
+ clear_seq = seq;
|
||||
+ }
|
||||
|
||||
+ /* copy messages to buffer */
|
||||
len = 0;
|
||||
- while (len >= 0 && seq < next_seq) {
|
||||
- struct printk_log *msg = log_from_idx(idx);
|
||||
- int textlen = msg_print_text(msg, true, time, text,
|
||||
- LOG_LINE_MAX + PREFIX_MAX);
|
||||
+ while (len >= 0 && len < size) {
|
||||
+ if (clear)
|
||||
+ clear_seq = seq;
|
||||
|
||||
- idx = log_next(idx);
|
||||
- seq++;
|
||||
+ ret = prb_iter_next(&iter, msgbuf,
|
||||
+ PRINTK_RECORD_MAX, &seq);
|
||||
+ if (ret == 0) {
|
||||
+ break;
|
||||
+ } else if (ret < 0) {
|
||||
+ /*
|
||||
+ * The iter is now invalid. Make a best
|
||||
+ * effort to grab the rest of the log
|
||||
+ * from the new head.
|
||||
+ */
|
||||
+ prb_iter_init(&iter, &printk_rb, NULL);
|
||||
+ continue;
|
||||
+ }
|
||||
+
|
||||
+ msg = (struct printk_log *)msgbuf;
|
||||
+ textlen = msg_print_text(msg, true, time, text,
|
||||
+ PRINTK_SPRINT_MAX);
|
||||
+ if (textlen < 0) {
|
||||
+ len = textlen;
|
||||
+ break;
|
||||
+ }
|
||||
|
||||
- logbuf_unlock_irq();
|
||||
if (copy_to_user(buf + len, text, textlen))
|
||||
len = -EFAULT;
|
||||
else
|
||||
len += textlen;
|
||||
- logbuf_lock_irq();
|
||||
-
|
||||
- if (seq < log_first_seq) {
|
||||
- /* messages are gone, move to next one */
|
||||
- seq = log_first_seq;
|
||||
- idx = log_first_idx;
|
||||
- }
|
||||
}
|
||||
|
||||
- if (clear) {
|
||||
- clear_seq = log_next_seq;
|
||||
- clear_idx = log_next_idx;
|
||||
- }
|
||||
- logbuf_unlock_irq();
|
||||
+ if (clear && !seq)
|
||||
+ syslog_clear();
|
||||
|
||||
- kfree(text);
|
||||
+ if (text)
|
||||
+ kfree(text);
|
||||
+ if (msgbuf)
|
||||
+ kfree(msgbuf);
|
||||
return len;
|
||||
}
|
||||
|
||||
-static void syslog_clear(void)
|
||||
-{
|
||||
- logbuf_lock_irq();
|
||||
- clear_seq = log_next_seq;
|
||||
- clear_idx = log_next_idx;
|
||||
- logbuf_unlock_irq();
|
||||
-}
|
||||
-
|
||||
int do_syslog(int type, char __user *buf, int len, int source)
|
||||
{
|
||||
bool clear = false;
|
||||
static int saved_console_loglevel = LOGLEVEL_DEFAULT;
|
||||
+ struct prb_iterator iter;
|
||||
+ char *msgbuf = NULL;
|
||||
+ char *text = NULL;
|
||||
+ int locked;
|
||||
int error;
|
||||
+ int ret;
|
||||
|
||||
error = check_syslog_permissions(type, source);
|
||||
if (error)
|
||||
@@ -1478,11 +1573,49 @@ int do_syslog(int type, char __user *buf
|
||||
return 0;
|
||||
if (!access_ok(buf, len))
|
||||
return -EFAULT;
|
||||
- error = wait_event_interruptible(log_wait,
|
||||
- syslog_seq != log_next_seq);
|
||||
+
|
||||
+ text = kmalloc(PRINTK_SPRINT_MAX, GFP_KERNEL);
|
||||
+ msgbuf = kmalloc(PRINTK_RECORD_MAX, GFP_KERNEL);
|
||||
+ if (!text || !msgbuf) {
|
||||
+ error = -ENOMEM;
|
||||
+ goto out;
|
||||
+ }
|
||||
+
|
||||
+ error = mutex_lock_interruptible(&syslog_lock);
|
||||
if (error)
|
||||
- return error;
|
||||
- error = syslog_print(buf, len);
|
||||
+ goto out;
|
||||
+
|
||||
+ /*
|
||||
+ * Wait until a first message is available. Use a copy
|
||||
+ * because no iteration should occur for syslog now.
|
||||
+ */
|
||||
+ for (;;) {
|
||||
+ prb_iter_copy(&iter, &syslog_iter);
|
||||
+
|
||||
+ mutex_unlock(&syslog_lock);
|
||||
+ ret = prb_iter_wait_next(&iter, NULL, 0, NULL);
|
||||
+ if (ret == -ERESTARTSYS) {
|
||||
+ error = ret;
|
||||
+ goto out;
|
||||
+ }
|
||||
+ error = mutex_lock_interruptible(&syslog_lock);
|
||||
+ if (error)
|
||||
+ goto out;
|
||||
+
|
||||
+ if (ret == -EINVAL) {
|
||||
+ prb_iter_init(&syslog_iter, &printk_rb,
|
||||
+ &syslog_seq);
|
||||
+ syslog_partial = 0;
|
||||
+ continue;
|
||||
+ }
|
||||
+ break;
|
||||
+ }
|
||||
+
|
||||
+ /* print as much as will fit in the user buffer */
|
||||
+ locked = 1;
|
||||
+ error = syslog_print(buf, len, text, msgbuf, &locked);
|
||||
+ if (locked)
|
||||
+ mutex_unlock(&syslog_lock);
|
||||
break;
|
||||
/* Read/clear last kernel messages */
|
||||
case SYSLOG_ACTION_READ_CLEAR:
|
||||
@@ -1527,47 +1660,45 @@ int do_syslog(int type, char __user *buf
|
||||
break;
|
||||
/* Number of chars in the log buffer */
|
||||
case SYSLOG_ACTION_SIZE_UNREAD:
|
||||
- logbuf_lock_irq();
|
||||
- if (syslog_seq < log_first_seq) {
|
||||
- /* messages are gone, move to first one */
|
||||
- syslog_seq = log_first_seq;
|
||||
- syslog_idx = log_first_idx;
|
||||
- syslog_partial = 0;
|
||||
- }
|
||||
+ msgbuf = kmalloc(PRINTK_RECORD_MAX, GFP_KERNEL);
|
||||
+ if (!msgbuf)
|
||||
+ return -ENOMEM;
|
||||
+
|
||||
+ error = mutex_lock_interruptible(&syslog_lock);
|
||||
+ if (error)
|
||||
+ goto out;
|
||||
+
|
||||
if (source == SYSLOG_FROM_PROC) {
|
||||
/*
|
||||
* Short-cut for poll(/"proc/kmsg") which simply checks
|
||||
* for pending data, not the size; return the count of
|
||||
* records, not the length.
|
||||
*/
|
||||
- error = log_next_seq - syslog_seq;
|
||||
+ error = count_remaining(&syslog_iter, 0, msgbuf,
|
||||
+ PRINTK_RECORD_MAX, true,
|
||||
+ printk_time);
|
||||
} else {
|
||||
- u64 seq = syslog_seq;
|
||||
- u32 idx = syslog_idx;
|
||||
- bool time = syslog_partial ? syslog_time : printk_time;
|
||||
-
|
||||
- while (seq < log_next_seq) {
|
||||
- struct printk_log *msg = log_from_idx(idx);
|
||||
-
|
||||
- error += msg_print_text(msg, true, time, NULL,
|
||||
- 0);
|
||||
- time = printk_time;
|
||||
- idx = log_next(idx);
|
||||
- seq++;
|
||||
- }
|
||||
+ error = count_remaining(&syslog_iter, 0, msgbuf,
|
||||
+ PRINTK_RECORD_MAX, false,
|
||||
+ printk_time);
|
||||
error -= syslog_partial;
|
||||
}
|
||||
- logbuf_unlock_irq();
|
||||
+
|
||||
+ mutex_unlock(&syslog_lock);
|
||||
break;
|
||||
/* Size of the log buffer */
|
||||
case SYSLOG_ACTION_SIZE_BUFFER:
|
||||
- error = log_buf_len;
|
||||
+ error = prb_buffer_size(&printk_rb);
|
||||
break;
|
||||
default:
|
||||
error = -EINVAL;
|
||||
break;
|
||||
}
|
||||
-
|
||||
+out:
|
||||
+ if (msgbuf)
|
||||
+ kfree(msgbuf);
|
||||
+ if (text)
|
||||
+ kfree(text);
|
||||
return error;
|
||||
}
|
||||
|
||||
@@ -1989,7 +2120,6 @@ EXPORT_SYMBOL(printk);
|
||||
#define printk_time false
|
||||
|
||||
static u64 syslog_seq;
|
||||
-static u32 syslog_idx;
|
||||
static u64 log_first_seq;
|
||||
static u32 log_first_idx;
|
||||
static u64 log_next_seq;
|
@ -0,0 +1,397 @@
|
||||
From: John Ogness <john.ogness@linutronix.de>
|
||||
Date: Tue, 12 Feb 2019 15:30:02 +0100
|
||||
Subject: [PATCH 24/25] printk: implement kmsg_dump
|
||||
|
||||
Since printk messages are now logged to a new ring buffer, update
|
||||
the kmsg_dump functions to pull the messages from there.
|
||||
|
||||
Signed-off-by: John Ogness <john.ogness@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/kmsg_dump.h | 6 -
|
||||
kernel/printk/printk.c | 258 ++++++++++++++++++++++++----------------------
|
||||
2 files changed, 139 insertions(+), 125 deletions(-)
|
||||
|
||||
--- a/include/linux/kmsg_dump.h
|
||||
+++ b/include/linux/kmsg_dump.h
|
||||
@@ -46,10 +46,8 @@ struct kmsg_dumper {
|
||||
bool registered;
|
||||
|
||||
/* private state of the kmsg iterator */
|
||||
- u32 cur_idx;
|
||||
- u32 next_idx;
|
||||
- u64 cur_seq;
|
||||
- u64 next_seq;
|
||||
+ u64 line_seq;
|
||||
+ u64 buffer_end_seq;
|
||||
};
|
||||
|
||||
#ifdef CONFIG_PRINTK
|
||||
--- a/kernel/printk/printk.c
|
||||
+++ b/kernel/printk/printk.c
|
||||
@@ -417,13 +417,13 @@ static size_t syslog_partial;
|
||||
static bool syslog_time;
|
||||
|
||||
/* index and sequence number of the first record stored in the buffer */
|
||||
-static u64 log_first_seq;
|
||||
static u32 log_first_idx;
|
||||
|
||||
/* index and sequence number of the next record to store in the buffer */
|
||||
-static u64 log_next_seq;
|
||||
static u32 log_next_idx;
|
||||
|
||||
+static DEFINE_MUTEX(kmsg_dump_lock);
|
||||
+
|
||||
/* the next printk record to read after the last 'clear' command */
|
||||
static u64 clear_seq;
|
||||
static u32 clear_idx;
|
||||
@@ -470,38 +470,6 @@ static char *log_dict(const struct print
|
||||
return (char *)msg + sizeof(struct printk_log) + msg->text_len;
|
||||
}
|
||||
|
||||
-/* get record by index; idx must point to valid msg */
|
||||
-static struct printk_log *log_from_idx(u32 idx)
|
||||
-{
|
||||
- struct printk_log *msg = (struct printk_log *)(log_buf + idx);
|
||||
-
|
||||
- /*
|
||||
- * A length == 0 record is the end of buffer marker. Wrap around and
|
||||
- * read the message at the start of the buffer.
|
||||
- */
|
||||
- if (!msg->len)
|
||||
- return (struct printk_log *)log_buf;
|
||||
- return msg;
|
||||
-}
|
||||
-
|
||||
-/* get next record; idx must point to valid msg */
|
||||
-static u32 log_next(u32 idx)
|
||||
-{
|
||||
- struct printk_log *msg = (struct printk_log *)(log_buf + idx);
|
||||
-
|
||||
- /* length == 0 indicates the end of the buffer; wrap */
|
||||
- /*
|
||||
- * A length == 0 record is the end of buffer marker. Wrap around and
|
||||
- * read the message at the start of the buffer as *this* one, and
|
||||
- * return the one after that.
|
||||
- */
|
||||
- if (!msg->len) {
|
||||
- msg = (struct printk_log *)log_buf;
|
||||
- return msg->len;
|
||||
- }
|
||||
- return idx + msg->len;
|
||||
-}
|
||||
-
|
||||
static void printk_emergency(char *buffer, int level, u64 ts_nsec, u16 cpu,
|
||||
char *text, u16 text_len);
|
||||
|
||||
@@ -2120,9 +2088,7 @@ EXPORT_SYMBOL(printk);
|
||||
#define printk_time false
|
||||
|
||||
static u64 syslog_seq;
|
||||
-static u64 log_first_seq;
|
||||
static u32 log_first_idx;
|
||||
-static u64 log_next_seq;
|
||||
static char *log_text(const struct printk_log *msg) { return NULL; }
|
||||
static char *log_dict(const struct printk_log *msg) { return NULL; }
|
||||
static struct printk_log *log_from_idx(u32 idx) { return NULL; }
|
||||
@@ -3032,7 +2998,6 @@ module_param_named(always_kmsg_dump, alw
|
||||
void kmsg_dump(enum kmsg_dump_reason reason)
|
||||
{
|
||||
struct kmsg_dumper *dumper;
|
||||
- unsigned long flags;
|
||||
|
||||
if ((reason > KMSG_DUMP_OOPS) && !always_kmsg_dump)
|
||||
return;
|
||||
@@ -3045,12 +3010,7 @@ void kmsg_dump(enum kmsg_dump_reason rea
|
||||
/* initialize iterator with data about the stored records */
|
||||
dumper->active = true;
|
||||
|
||||
- logbuf_lock_irqsave(flags);
|
||||
- dumper->cur_seq = clear_seq;
|
||||
- dumper->cur_idx = clear_idx;
|
||||
- dumper->next_seq = log_next_seq;
|
||||
- dumper->next_idx = log_next_idx;
|
||||
- logbuf_unlock_irqrestore(flags);
|
||||
+ kmsg_dump_rewind(dumper);
|
||||
|
||||
/* invoke dumper which will iterate over records */
|
||||
dumper->dump(dumper, reason);
|
||||
@@ -3083,33 +3043,67 @@ void kmsg_dump(enum kmsg_dump_reason rea
|
||||
bool kmsg_dump_get_line_nolock(struct kmsg_dumper *dumper, bool syslog,
|
||||
char *line, size_t size, size_t *len)
|
||||
{
|
||||
+ struct prb_iterator iter;
|
||||
struct printk_log *msg;
|
||||
- size_t l = 0;
|
||||
- bool ret = false;
|
||||
+ struct prb_handle h;
|
||||
+ bool cont = false;
|
||||
+ char *msgbuf;
|
||||
+ char *rbuf;
|
||||
+ size_t l;
|
||||
+ u64 seq;
|
||||
+ int ret;
|
||||
|
||||
if (!dumper->active)
|
||||
- goto out;
|
||||
+ return cont;
|
||||
+
|
||||
+ rbuf = prb_reserve(&h, &sprint_rb, PRINTK_RECORD_MAX);
|
||||
+ if (!rbuf)
|
||||
+ return cont;
|
||||
+ msgbuf = rbuf;
|
||||
+retry:
|
||||
+ for (;;) {
|
||||
+ prb_iter_init(&iter, &printk_rb, &seq);
|
||||
+
|
||||
+ if (dumper->line_seq == seq) {
|
||||
+ /* already where we want to be */
|
||||
+ break;
|
||||
+ } else if (dumper->line_seq < seq) {
|
||||
+ /* messages are gone, move to first available one */
|
||||
+ dumper->line_seq = seq;
|
||||
+ break;
|
||||
+ }
|
||||
|
||||
- if (dumper->cur_seq < log_first_seq) {
|
||||
- /* messages are gone, move to first available one */
|
||||
- dumper->cur_seq = log_first_seq;
|
||||
- dumper->cur_idx = log_first_idx;
|
||||
+ ret = prb_iter_seek(&iter, dumper->line_seq);
|
||||
+ if (ret > 0) {
|
||||
+ /* seeked to line_seq */
|
||||
+ break;
|
||||
+ } else if (ret == 0) {
|
||||
+ /*
|
||||
+ * The end of the list was hit without ever seeing
|
||||
+ * line_seq. Reset it to the beginning of the list.
|
||||
+ */
|
||||
+ prb_iter_init(&iter, &printk_rb, &dumper->line_seq);
|
||||
+ break;
|
||||
+ }
|
||||
+ /* iterator invalid, start over */
|
||||
}
|
||||
|
||||
- /* last entry */
|
||||
- if (dumper->cur_seq >= log_next_seq)
|
||||
+ ret = prb_iter_next(&iter, msgbuf, PRINTK_RECORD_MAX,
|
||||
+ &dumper->line_seq);
|
||||
+ if (ret == 0)
|
||||
goto out;
|
||||
+ else if (ret < 0)
|
||||
+ goto retry;
|
||||
|
||||
- msg = log_from_idx(dumper->cur_idx);
|
||||
+ msg = (struct printk_log *)msgbuf;
|
||||
l = msg_print_text(msg, syslog, printk_time, line, size);
|
||||
|
||||
- dumper->cur_idx = log_next(dumper->cur_idx);
|
||||
- dumper->cur_seq++;
|
||||
- ret = true;
|
||||
-out:
|
||||
if (len)
|
||||
*len = l;
|
||||
- return ret;
|
||||
+ cont = true;
|
||||
+out:
|
||||
+ prb_commit(&h);
|
||||
+ return cont;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -3132,12 +3126,11 @@ bool kmsg_dump_get_line_nolock(struct km
|
||||
bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog,
|
||||
char *line, size_t size, size_t *len)
|
||||
{
|
||||
- unsigned long flags;
|
||||
bool ret;
|
||||
|
||||
- logbuf_lock_irqsave(flags);
|
||||
+ mutex_lock(&kmsg_dump_lock);
|
||||
ret = kmsg_dump_get_line_nolock(dumper, syslog, line, size, len);
|
||||
- logbuf_unlock_irqrestore(flags);
|
||||
+ mutex_unlock(&kmsg_dump_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -3165,74 +3158,101 @@ EXPORT_SYMBOL_GPL(kmsg_dump_get_line);
|
||||
bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog,
|
||||
char *buf, size_t size, size_t *len)
|
||||
{
|
||||
- unsigned long flags;
|
||||
- u64 seq;
|
||||
- u32 idx;
|
||||
- u64 next_seq;
|
||||
- u32 next_idx;
|
||||
- size_t l = 0;
|
||||
- bool ret = false;
|
||||
+ struct prb_iterator iter;
|
||||
bool time = printk_time;
|
||||
+ struct printk_log *msg;
|
||||
+ u64 new_end_seq = 0;
|
||||
+ struct prb_handle h;
|
||||
+ bool cont = false;
|
||||
+ char *msgbuf;
|
||||
+ u64 end_seq;
|
||||
+ int textlen;
|
||||
+ u64 seq = 0;
|
||||
+ char *rbuf;
|
||||
+ int l = 0;
|
||||
+ int ret;
|
||||
|
||||
if (!dumper->active)
|
||||
- goto out;
|
||||
+ return cont;
|
||||
|
||||
- logbuf_lock_irqsave(flags);
|
||||
- if (dumper->cur_seq < log_first_seq) {
|
||||
- /* messages are gone, move to first available one */
|
||||
- dumper->cur_seq = log_first_seq;
|
||||
- dumper->cur_idx = log_first_idx;
|
||||
- }
|
||||
+ rbuf = prb_reserve(&h, &sprint_rb, PRINTK_RECORD_MAX);
|
||||
+ if (!rbuf)
|
||||
+ return cont;
|
||||
+ msgbuf = rbuf;
|
||||
|
||||
- /* last entry */
|
||||
- if (dumper->cur_seq >= dumper->next_seq) {
|
||||
- logbuf_unlock_irqrestore(flags);
|
||||
- goto out;
|
||||
- }
|
||||
-
|
||||
- /* calculate length of entire buffer */
|
||||
- seq = dumper->cur_seq;
|
||||
- idx = dumper->cur_idx;
|
||||
- while (seq < dumper->next_seq) {
|
||||
- struct printk_log *msg = log_from_idx(idx);
|
||||
+ prb_iter_init(&iter, &printk_rb, NULL);
|
||||
|
||||
- l += msg_print_text(msg, true, time, NULL, 0);
|
||||
- idx = log_next(idx);
|
||||
- seq++;
|
||||
+ /*
|
||||
+ * seek to the start record, which is set/modified
|
||||
+ * by kmsg_dump_get_line_nolock()
|
||||
+ */
|
||||
+ ret = prb_iter_seek(&iter, dumper->line_seq);
|
||||
+ if (ret <= 0)
|
||||
+ prb_iter_init(&iter, &printk_rb, &seq);
|
||||
+
|
||||
+ /* work with a local end seq to have a constant value */
|
||||
+ end_seq = dumper->buffer_end_seq;
|
||||
+ if (!end_seq) {
|
||||
+ /* initialize end seq to "infinity" */
|
||||
+ end_seq = -1;
|
||||
+ dumper->buffer_end_seq = end_seq;
|
||||
}
|
||||
+retry:
|
||||
+ if (seq >= end_seq)
|
||||
+ goto out;
|
||||
|
||||
- /* move first record forward until length fits into the buffer */
|
||||
- seq = dumper->cur_seq;
|
||||
- idx = dumper->cur_idx;
|
||||
- while (l >= size && seq < dumper->next_seq) {
|
||||
- struct printk_log *msg = log_from_idx(idx);
|
||||
+ /* count the total bytes after seq */
|
||||
+ textlen = count_remaining(&iter, end_seq, msgbuf,
|
||||
+ PRINTK_RECORD_MAX, 0, time);
|
||||
|
||||
- l -= msg_print_text(msg, true, time, NULL, 0);
|
||||
- idx = log_next(idx);
|
||||
- seq++;
|
||||
+ /* move iter forward until length fits into the buffer */
|
||||
+ while (textlen > size) {
|
||||
+ ret = prb_iter_next(&iter, msgbuf, PRINTK_RECORD_MAX, &seq);
|
||||
+ if (ret == 0) {
|
||||
+ break;
|
||||
+ } else if (ret < 0) {
|
||||
+ prb_iter_init(&iter, &printk_rb, &seq);
|
||||
+ goto retry;
|
||||
+ }
|
||||
+
|
||||
+ msg = (struct printk_log *)msgbuf;
|
||||
+ textlen -= msg_print_text(msg, true, time, NULL, 0);
|
||||
}
|
||||
|
||||
- /* last message in next interation */
|
||||
- next_seq = seq;
|
||||
- next_idx = idx;
|
||||
+ /* save end seq for the next interation */
|
||||
+ new_end_seq = seq + 1;
|
||||
|
||||
- l = 0;
|
||||
- while (seq < dumper->next_seq) {
|
||||
- struct printk_log *msg = log_from_idx(idx);
|
||||
+ /* copy messages to buffer */
|
||||
+ while (l < size) {
|
||||
+ ret = prb_iter_next(&iter, msgbuf, PRINTK_RECORD_MAX, &seq);
|
||||
+ if (ret == 0) {
|
||||
+ break;
|
||||
+ } else if (ret < 0) {
|
||||
+ /*
|
||||
+ * iterator (and thus also the start position)
|
||||
+ * invalid, start over from beginning of list
|
||||
+ */
|
||||
+ prb_iter_init(&iter, &printk_rb, NULL);
|
||||
+ continue;
|
||||
+ }
|
||||
|
||||
- l += msg_print_text(msg, syslog, time, buf + l, size - l);
|
||||
- idx = log_next(idx);
|
||||
- seq++;
|
||||
+ if (seq >= end_seq)
|
||||
+ break;
|
||||
+
|
||||
+ msg = (struct printk_log *)msgbuf;
|
||||
+ textlen = msg_print_text(msg, syslog, time, buf + l, size - l);
|
||||
+ if (textlen > 0)
|
||||
+ l += textlen;
|
||||
+ cont = true;
|
||||
}
|
||||
|
||||
- dumper->next_seq = next_seq;
|
||||
- dumper->next_idx = next_idx;
|
||||
- ret = true;
|
||||
- logbuf_unlock_irqrestore(flags);
|
||||
-out:
|
||||
- if (len)
|
||||
+ if (cont && len)
|
||||
*len = l;
|
||||
- return ret;
|
||||
+out:
|
||||
+ prb_commit(&h);
|
||||
+ if (new_end_seq)
|
||||
+ dumper->buffer_end_seq = new_end_seq;
|
||||
+ return cont;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kmsg_dump_get_buffer);
|
||||
|
||||
@@ -3248,10 +3268,8 @@ EXPORT_SYMBOL_GPL(kmsg_dump_get_buffer);
|
||||
*/
|
||||
void kmsg_dump_rewind_nolock(struct kmsg_dumper *dumper)
|
||||
{
|
||||
- dumper->cur_seq = clear_seq;
|
||||
- dumper->cur_idx = clear_idx;
|
||||
- dumper->next_seq = log_next_seq;
|
||||
- dumper->next_idx = log_next_idx;
|
||||
+ dumper->line_seq = 0;
|
||||
+ dumper->buffer_end_seq = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -3264,11 +3282,9 @@ void kmsg_dump_rewind_nolock(struct kmsg
|
||||
*/
|
||||
void kmsg_dump_rewind(struct kmsg_dumper *dumper)
|
||||
{
|
||||
- unsigned long flags;
|
||||
-
|
||||
- logbuf_lock_irqsave(flags);
|
||||
+ mutex_lock(&kmsg_dump_lock);
|
||||
kmsg_dump_rewind_nolock(dumper);
|
||||
- logbuf_unlock_irqrestore(flags);
|
||||
+ mutex_unlock(&kmsg_dump_lock);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kmsg_dump_rewind);
|
||||
|
@ -0,0 +1,346 @@
|
||||
From: John Ogness <john.ogness@linutronix.de>
|
||||
Date: Tue, 12 Feb 2019 15:30:03 +0100
|
||||
Subject: [PATCH 25/25] printk: remove unused code
|
||||
|
||||
Code relating to the safe context and anything dealing with the
|
||||
previous log buffer implementation is no longer in use. Remove it.
|
||||
|
||||
Signed-off-by: John Ogness <john.ogness@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
kernel/printk/internal.h | 41 -----------
|
||||
kernel/printk/printk.c | 161 ++++-------------------------------------------
|
||||
lib/bust_spinlocks.c | 3
|
||||
3 files changed, 16 insertions(+), 189 deletions(-)
|
||||
delete mode 100644 kernel/printk/internal.h
|
||||
|
||||
--- a/kernel/printk/internal.h
|
||||
+++ /dev/null
|
||||
@@ -1,41 +0,0 @@
|
||||
-/* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
-/*
|
||||
- * internal.h - printk internal definitions
|
||||
- */
|
||||
-#include <linux/percpu.h>
|
||||
-
|
||||
-#ifdef CONFIG_PRINTK
|
||||
-
|
||||
-#define PRINTK_SAFE_CONTEXT_MASK 0x3fffffff
|
||||
-#define PRINTK_NMI_DIRECT_CONTEXT_MASK 0x40000000
|
||||
-#define PRINTK_NMI_CONTEXT_MASK 0x80000000
|
||||
-
|
||||
-extern raw_spinlock_t logbuf_lock;
|
||||
-
|
||||
-__printf(5, 0)
|
||||
-int vprintk_store(int facility, int level,
|
||||
- const char *dict, size_t dictlen,
|
||||
- const char *fmt, va_list args);
|
||||
-
|
||||
-__printf(1, 0) int vprintk_default(const char *fmt, va_list args);
|
||||
-__printf(1, 0) int vprintk_deferred(const char *fmt, va_list args);
|
||||
-__printf(1, 0) int vprintk_func(const char *fmt, va_list args);
|
||||
-
|
||||
-void defer_console_output(void);
|
||||
-
|
||||
-#else
|
||||
-
|
||||
-__printf(1, 0) int vprintk_func(const char *fmt, va_list args) { return 0; }
|
||||
-
|
||||
-/*
|
||||
- * In !PRINTK builds we still export logbuf_lock spin_lock, console_sem
|
||||
- * semaphore and some of console functions (console_unlock()/etc.), so
|
||||
- * printk-safe must preserve the existing local IRQ guarantees.
|
||||
- */
|
||||
-#endif /* CONFIG_PRINTK */
|
||||
-
|
||||
-#define printk_safe_enter_irqsave(flags) local_irq_save(flags)
|
||||
-#define printk_safe_exit_irqrestore(flags) local_irq_restore(flags)
|
||||
-
|
||||
-#define printk_safe_enter_irq() local_irq_disable()
|
||||
-#define printk_safe_exit_irq() local_irq_enable()
|
||||
--- a/kernel/printk/printk.c
|
||||
+++ b/kernel/printk/printk.c
|
||||
@@ -61,7 +61,6 @@
|
||||
|
||||
#include "console_cmdline.h"
|
||||
#include "braille.h"
|
||||
-#include "internal.h"
|
||||
|
||||
int console_printk[5] = {
|
||||
CONSOLE_LOGLEVEL_DEFAULT, /* console_loglevel */
|
||||
@@ -366,41 +365,6 @@ struct printk_log {
|
||||
#endif
|
||||
;
|
||||
|
||||
-/*
|
||||
- * The logbuf_lock protects kmsg buffer, indices, counters. This can be taken
|
||||
- * within the scheduler's rq lock. It must be released before calling
|
||||
- * console_unlock() or anything else that might wake up a process.
|
||||
- */
|
||||
-DEFINE_RAW_SPINLOCK(logbuf_lock);
|
||||
-
|
||||
-/*
|
||||
- * Helper macros to lock/unlock logbuf_lock and switch between
|
||||
- * printk-safe/unsafe modes.
|
||||
- */
|
||||
-#define logbuf_lock_irq() \
|
||||
- do { \
|
||||
- printk_safe_enter_irq(); \
|
||||
- raw_spin_lock(&logbuf_lock); \
|
||||
- } while (0)
|
||||
-
|
||||
-#define logbuf_unlock_irq() \
|
||||
- do { \
|
||||
- raw_spin_unlock(&logbuf_lock); \
|
||||
- printk_safe_exit_irq(); \
|
||||
- } while (0)
|
||||
-
|
||||
-#define logbuf_lock_irqsave(flags) \
|
||||
- do { \
|
||||
- printk_safe_enter_irqsave(flags); \
|
||||
- raw_spin_lock(&logbuf_lock); \
|
||||
- } while (0)
|
||||
-
|
||||
-#define logbuf_unlock_irqrestore(flags) \
|
||||
- do { \
|
||||
- raw_spin_unlock(&logbuf_lock); \
|
||||
- printk_safe_exit_irqrestore(flags); \
|
||||
- } while (0)
|
||||
-
|
||||
DECLARE_STATIC_PRINTKRB_CPULOCK(printk_cpulock);
|
||||
|
||||
#ifdef CONFIG_PRINTK
|
||||
@@ -410,23 +374,15 @@ DECLARE_STATIC_PRINTKRB(printk_rb, CONFI
|
||||
static DEFINE_MUTEX(syslog_lock);
|
||||
DECLARE_STATIC_PRINTKRB_ITER(syslog_iter, &printk_rb);
|
||||
|
||||
-DECLARE_WAIT_QUEUE_HEAD(log_wait);
|
||||
-/* the next printk record to read by syslog(READ) or /proc/kmsg */
|
||||
+/* the last printk record to read by syslog(READ) or /proc/kmsg */
|
||||
static u64 syslog_seq;
|
||||
static size_t syslog_partial;
|
||||
static bool syslog_time;
|
||||
|
||||
-/* index and sequence number of the first record stored in the buffer */
|
||||
-static u32 log_first_idx;
|
||||
-
|
||||
-/* index and sequence number of the next record to store in the buffer */
|
||||
-static u32 log_next_idx;
|
||||
-
|
||||
static DEFINE_MUTEX(kmsg_dump_lock);
|
||||
|
||||
/* the next printk record to read after the last 'clear' command */
|
||||
static u64 clear_seq;
|
||||
-static u32 clear_idx;
|
||||
|
||||
#ifdef CONFIG_PRINTK_CALLER
|
||||
#define PREFIX_MAX 48
|
||||
@@ -438,24 +394,16 @@ static u32 clear_idx;
|
||||
#define LOG_LEVEL(v) ((v) & 0x07)
|
||||
#define LOG_FACILITY(v) ((v) >> 3 & 0xff)
|
||||
|
||||
-/* record buffer */
|
||||
-#define LOG_ALIGN __alignof__(struct printk_log)
|
||||
-#define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT)
|
||||
-#define LOG_BUF_LEN_MAX (u32)(1 << 31)
|
||||
-static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN);
|
||||
-static char *log_buf = __log_buf;
|
||||
-static u32 log_buf_len = __LOG_BUF_LEN;
|
||||
-
|
||||
/* Return log buffer address */
|
||||
char *log_buf_addr_get(void)
|
||||
{
|
||||
- return log_buf;
|
||||
+ return printk_rb.buffer;
|
||||
}
|
||||
|
||||
/* Return log buffer size */
|
||||
u32 log_buf_len_get(void)
|
||||
{
|
||||
- return log_buf_len;
|
||||
+ return (1 << printk_rb.size_bits);
|
||||
}
|
||||
|
||||
/* human readable text of the record */
|
||||
@@ -980,11 +928,6 @@ const struct file_operations kmsg_fops =
|
||||
*/
|
||||
void log_buf_vmcoreinfo_setup(void)
|
||||
{
|
||||
- VMCOREINFO_SYMBOL(log_buf);
|
||||
- VMCOREINFO_SYMBOL(log_buf_len);
|
||||
- VMCOREINFO_SYMBOL(log_first_idx);
|
||||
- VMCOREINFO_SYMBOL(clear_idx);
|
||||
- VMCOREINFO_SYMBOL(log_next_idx);
|
||||
/*
|
||||
* Export struct printk_log size and field offsets. User space tools can
|
||||
* parse it and detect any changes to structure down the line.
|
||||
@@ -1000,6 +943,8 @@ void log_buf_vmcoreinfo_setup(void)
|
||||
}
|
||||
#endif
|
||||
|
||||
+/* FIXME: no support for buffer resizing */
|
||||
+#if 0
|
||||
/* requested log_buf_len from kernel cmdline */
|
||||
static unsigned long __initdata new_log_buf_len;
|
||||
|
||||
@@ -1065,9 +1010,12 @@ static void __init log_buf_add_cpu(void)
|
||||
#else /* !CONFIG_SMP */
|
||||
static inline void log_buf_add_cpu(void) {}
|
||||
#endif /* CONFIG_SMP */
|
||||
+#endif /* 0 */
|
||||
|
||||
void __init setup_log_buf(int early)
|
||||
{
|
||||
+/* FIXME: no support for buffer resizing */
|
||||
+#if 0
|
||||
unsigned long flags;
|
||||
char *new_log_buf;
|
||||
unsigned int free;
|
||||
@@ -1099,6 +1047,7 @@ void __init setup_log_buf(int early)
|
||||
pr_info("log_buf_len: %u bytes\n", log_buf_len);
|
||||
pr_info("early log buf free: %u(%u%%)\n",
|
||||
free, (free * 100) / __LOG_BUF_LEN);
|
||||
+#endif
|
||||
}
|
||||
|
||||
static bool __read_mostly ignore_loglevel;
|
||||
@@ -2019,7 +1968,7 @@ asmlinkage int vprintk_emit(int facility
|
||||
}
|
||||
EXPORT_SYMBOL(vprintk_emit);
|
||||
|
||||
-__printf(1, 0) int vprintk_func(const char *fmt, va_list args)
|
||||
+static __printf(1, 0) int vprintk_func(const char *fmt, va_list args)
|
||||
{
|
||||
return vprintk_emit(0, LOGLEVEL_DEFAULT, NULL, 0, fmt, args);
|
||||
}
|
||||
@@ -2080,31 +2029,6 @@ asmlinkage __visible int printk(const ch
|
||||
return r;
|
||||
}
|
||||
EXPORT_SYMBOL(printk);
|
||||
-
|
||||
-#else /* CONFIG_PRINTK */
|
||||
-
|
||||
-#define LOG_LINE_MAX 0
|
||||
-#define PREFIX_MAX 0
|
||||
-#define printk_time false
|
||||
-
|
||||
-static u64 syslog_seq;
|
||||
-static u32 log_first_idx;
|
||||
-static char *log_text(const struct printk_log *msg) { return NULL; }
|
||||
-static char *log_dict(const struct printk_log *msg) { return NULL; }
|
||||
-static struct printk_log *log_from_idx(u32 idx) { return NULL; }
|
||||
-static u32 log_next(u32 idx) { return 0; }
|
||||
-static ssize_t msg_print_ext_header(char *buf, size_t size,
|
||||
- struct printk_log *msg,
|
||||
- u64 seq) { return 0; }
|
||||
-static ssize_t msg_print_ext_body(char *buf, size_t size,
|
||||
- char *dict, size_t dict_len,
|
||||
- char *text, size_t text_len) { return 0; }
|
||||
-static void call_console_drivers(u64 seq, const char *ext_text, size_t ext_len,
|
||||
- const char *text, size_t len, int level) {}
|
||||
-static size_t msg_print_text(const struct printk_log *msg, bool syslog,
|
||||
- bool time, char *buf, size_t size) { return 0; }
|
||||
-static bool suppress_message_printing(int level) { return false; }
|
||||
-
|
||||
#endif /* CONFIG_PRINTK */
|
||||
|
||||
#ifdef CONFIG_EARLY_PRINTK
|
||||
@@ -2401,15 +2325,10 @@ void console_unblank(void)
|
||||
void console_flush_on_panic(enum con_flush_mode mode)
|
||||
{
|
||||
/*
|
||||
- * If someone else is holding the console lock, trylock will fail
|
||||
- * and may_schedule may be set. Ignore and proceed to unlock so
|
||||
- * that messages are flushed out. As this can be called from any
|
||||
- * context and we don't want to get preempted while flushing,
|
||||
- * ensure may_schedule is cleared.
|
||||
+ * FIXME: This is currently a NOP. Emergency messages will have been
|
||||
+ * printed, but what about if write_atomic is not available on the
|
||||
+ * console? What if the printk kthread is still alive?
|
||||
*/
|
||||
- console_trylock();
|
||||
- console_may_schedule = 0;
|
||||
- console_unlock();
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -2758,43 +2677,6 @@ static int __init printk_late_init(void)
|
||||
late_initcall(printk_late_init);
|
||||
|
||||
#if defined CONFIG_PRINTK
|
||||
-/*
|
||||
- * Delayed printk version, for scheduler-internal messages:
|
||||
- */
|
||||
-#define PRINTK_PENDING_WAKEUP 0x01
|
||||
-#define PRINTK_PENDING_OUTPUT 0x02
|
||||
-
|
||||
-static DEFINE_PER_CPU(int, printk_pending);
|
||||
-
|
||||
-static void wake_up_klogd_work_func(struct irq_work *irq_work)
|
||||
-{
|
||||
- int pending = __this_cpu_xchg(printk_pending, 0);
|
||||
-
|
||||
- if (pending & PRINTK_PENDING_OUTPUT) {
|
||||
- /* If trylock fails, someone else is doing the printing */
|
||||
- if (console_trylock())
|
||||
- console_unlock();
|
||||
- }
|
||||
-
|
||||
- if (pending & PRINTK_PENDING_WAKEUP)
|
||||
- wake_up_interruptible(&log_wait);
|
||||
-}
|
||||
-
|
||||
-static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) = {
|
||||
- .func = wake_up_klogd_work_func,
|
||||
- .flags = IRQ_WORK_LAZY,
|
||||
-};
|
||||
-
|
||||
-void wake_up_klogd(void)
|
||||
-{
|
||||
- preempt_disable();
|
||||
- if (waitqueue_active(&log_wait)) {
|
||||
- this_cpu_or(printk_pending, PRINTK_PENDING_WAKEUP);
|
||||
- irq_work_queue(this_cpu_ptr(&wake_up_klogd_work));
|
||||
- }
|
||||
- preempt_enable();
|
||||
-}
|
||||
-
|
||||
static int printk_kthread_func(void *data)
|
||||
{
|
||||
struct prb_iterator iter;
|
||||
@@ -2860,22 +2742,9 @@ static int __init init_printk_kthread(vo
|
||||
}
|
||||
late_initcall(init_printk_kthread);
|
||||
|
||||
-void defer_console_output(void)
|
||||
-{
|
||||
- preempt_disable();
|
||||
- __this_cpu_or(printk_pending, PRINTK_PENDING_OUTPUT);
|
||||
- irq_work_queue(this_cpu_ptr(&wake_up_klogd_work));
|
||||
- preempt_enable();
|
||||
-}
|
||||
-
|
||||
-int vprintk_deferred(const char *fmt, va_list args)
|
||||
+static int vprintk_deferred(const char *fmt, va_list args)
|
||||
{
|
||||
- int r;
|
||||
-
|
||||
- r = vprintk_emit(0, LOGLEVEL_SCHED, NULL, 0, fmt, args);
|
||||
- defer_console_output();
|
||||
-
|
||||
- return r;
|
||||
+ return vprintk_emit(0, LOGLEVEL_SCHED, NULL, 0, fmt, args);
|
||||
}
|
||||
|
||||
int printk_deferred(const char *fmt, ...)
|
||||
--- a/lib/bust_spinlocks.c
|
||||
+++ b/lib/bust_spinlocks.c
|
||||
@@ -26,7 +26,6 @@ void bust_spinlocks(int yes)
|
||||
unblank_screen();
|
||||
#endif
|
||||
console_unblank();
|
||||
- if (--oops_in_progress == 0)
|
||||
- wake_up_klogd();
|
||||
+ --oops_in_progress;
|
||||
}
|
||||
}
|
@ -0,0 +1,38 @@
|
||||
From: John Ogness <john.ogness@linutronix.de>
|
||||
Date: Thu, 14 Feb 2019 23:13:30 +0100
|
||||
Subject: [PATCH] printk: set deferred to default loglevel, enforce mask
|
||||
|
||||
All messages printed via vpritnk_deferred() were being
|
||||
automatically treated as emergency messages.
|
||||
|
||||
Messages printed via vprintk_deferred() should be set to the
|
||||
default loglevel. LOGLEVEL_SCHED is no longer relevant.
|
||||
|
||||
Also, enforce the loglevel mask for emergency messages.
|
||||
|
||||
Signed-off-by: John Ogness <john.ogness@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
kernel/printk/printk.c | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
--- a/kernel/printk/printk.c
|
||||
+++ b/kernel/printk/printk.c
|
||||
@@ -1951,7 +1951,7 @@ asmlinkage int vprintk_emit(int facility
|
||||
* - text points to beginning of text
|
||||
* - there is room before text for prefix
|
||||
*/
|
||||
- printk_emergency(rbuf, level, ts_nsec, cpu, text, text_len);
|
||||
+ printk_emergency(rbuf, level & 7, ts_nsec, cpu, text, text_len);
|
||||
|
||||
if ((lflags & LOG_CONT) || !(lflags & LOG_NEWLINE)) {
|
||||
cont_add(ctx, cpu, caller_id, facility, level, lflags, text, text_len);
|
||||
@@ -2744,7 +2744,7 @@ late_initcall(init_printk_kthread);
|
||||
|
||||
static int vprintk_deferred(const char *fmt, va_list args)
|
||||
{
|
||||
- return vprintk_emit(0, LOGLEVEL_SCHED, NULL, 0, fmt, args);
|
||||
+ return vprintk_emit(0, LOGLEVEL_DEFAULT, NULL, 0, fmt, args);
|
||||
}
|
||||
|
||||
int printk_deferred(const char *fmt, ...)
|
@ -0,0 +1,43 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Thu, 14 Feb 2019 17:38:24 +0100
|
||||
Subject: [PATCH] serial: 8250: remove that trylock in
|
||||
serial8250_console_write_atomic()
|
||||
|
||||
This does not work as rtmutex in NMI context. As per John, it is not
|
||||
needed.
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
drivers/tty/serial/8250/8250_port.c | 11 -----------
|
||||
1 file changed, 11 deletions(-)
|
||||
|
||||
--- a/drivers/tty/serial/8250/8250_port.c
|
||||
+++ b/drivers/tty/serial/8250/8250_port.c
|
||||
@@ -3191,17 +3191,9 @@ void serial8250_console_write_atomic(str
|
||||
{
|
||||
struct uart_port *port = &up->port;
|
||||
unsigned int flags;
|
||||
- bool locked;
|
||||
|
||||
console_atomic_lock(&flags);
|
||||
|
||||
- /*
|
||||
- * If possible, keep any other CPUs from working with the
|
||||
- * UART until the atomic message is completed. This helps
|
||||
- * to keep the output more orderly.
|
||||
- */
|
||||
- locked = spin_trylock(&port->lock);
|
||||
-
|
||||
touch_nmi_watchdog();
|
||||
|
||||
clear_ier(up);
|
||||
@@ -3216,9 +3208,6 @@ void serial8250_console_write_atomic(str
|
||||
wait_for_xmitr(up, BOTH_EMPTY);
|
||||
restore_ier(up);
|
||||
|
||||
- if (locked)
|
||||
- spin_unlock(&port->lock);
|
||||
-
|
||||
console_atomic_unlock(flags);
|
||||
}
|
||||
|
@ -0,0 +1,38 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Sat, 16 Feb 2019 09:02:00 +0100
|
||||
Subject: [PATCH] serial: 8250: export symbols which are used by symbols
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
drivers/tty/serial/8250/8250_port.c | 2 ++
|
||||
kernel/printk/printk.c | 1 +
|
||||
2 files changed, 3 insertions(+)
|
||||
|
||||
--- a/drivers/tty/serial/8250/8250_port.c
|
||||
+++ b/drivers/tty/serial/8250/8250_port.c
|
||||
@@ -2025,6 +2025,7 @@ void clear_ier(struct uart_8250_port *up
|
||||
}
|
||||
console_atomic_unlock(flags);
|
||||
}
|
||||
+EXPORT_SYMBOL_GPL(clear_ier);
|
||||
|
||||
void restore_ier(struct uart_8250_port *up)
|
||||
{
|
||||
@@ -2036,6 +2037,7 @@ void restore_ier(struct uart_8250_port *
|
||||
serial_port_out(port, UART_IER, atomic_read(&ier_value));
|
||||
console_atomic_unlock(flags);
|
||||
}
|
||||
+EXPORT_SYMBOL_GPL(restore_ier);
|
||||
|
||||
#ifdef CONFIG_CONSOLE_POLL
|
||||
/*
|
||||
--- a/kernel/printk/printk.c
|
||||
+++ b/kernel/printk/printk.c
|
||||
@@ -2257,6 +2257,7 @@ int is_console_locked(void)
|
||||
{
|
||||
return console_locked;
|
||||
}
|
||||
+EXPORT_SYMBOL(is_console_locked);
|
||||
|
||||
/**
|
||||
* console_unlock - unlock the console system
|
25
kernel/patches-5.4.x-rt/0046-arm-remove-printk_nmi_.patch
Normal file
25
kernel/patches-5.4.x-rt/0046-arm-remove-printk_nmi_.patch
Normal file
@ -0,0 +1,25 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Fri, 15 Feb 2019 14:34:20 +0100
|
||||
Subject: [PATCH] arm: remove printk_nmi_.*()
|
||||
|
||||
It is no longer provided by the printk core code.
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
arch/arm/kernel/smp.c | 2 --
|
||||
1 file changed, 2 deletions(-)
|
||||
|
||||
--- a/arch/arm/kernel/smp.c
|
||||
+++ b/arch/arm/kernel/smp.c
|
||||
@@ -682,11 +682,9 @@ void handle_IPI(int ipinr, struct pt_reg
|
||||
break;
|
||||
|
||||
case IPI_CPU_BACKTRACE:
|
||||
- printk_nmi_enter();
|
||||
irq_enter();
|
||||
nmi_cpu_backtrace(regs);
|
||||
irq_exit();
|
||||
- printk_nmi_exit();
|
||||
break;
|
||||
|
||||
default:
|
@ -0,0 +1,67 @@
|
||||
From: John Ogness <john.ogness@linutronix.de>
|
||||
Date: Sun, 17 Feb 2019 03:11:20 +0100
|
||||
Subject: [PATCH] printk: only allow kernel to emergency message
|
||||
|
||||
Emergency messages exist as a mechanism for the kernel to
|
||||
communicate critical information to users. It is not meant for
|
||||
use by userspace. Only allow facility=0 messages to be
|
||||
processed by the emergency message code.
|
||||
|
||||
Signed-off-by: John Ogness <john.ogness@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
kernel/printk/printk.c | 17 +++++++++++------
|
||||
1 file changed, 11 insertions(+), 6 deletions(-)
|
||||
|
||||
--- a/kernel/printk/printk.c
|
||||
+++ b/kernel/printk/printk.c
|
||||
@@ -1754,7 +1754,8 @@ static void printk_write_history(struct
|
||||
* The console_lock must be held.
|
||||
*/
|
||||
static void call_console_drivers(u64 seq, const char *ext_text, size_t ext_len,
|
||||
- const char *text, size_t len, int level)
|
||||
+ const char *text, size_t len, int level,
|
||||
+ int facility)
|
||||
{
|
||||
struct console *con;
|
||||
|
||||
@@ -1774,13 +1775,14 @@ static void call_console_drivers(u64 seq
|
||||
con->wrote_history = 1;
|
||||
con->printk_seq = seq - 1;
|
||||
}
|
||||
- if (con->write_atomic && level < emergency_console_loglevel) {
|
||||
+ if (con->write_atomic && level < emergency_console_loglevel &&
|
||||
+ facility == 0) {
|
||||
/* skip emergency messages, already printed */
|
||||
if (con->printk_seq < seq)
|
||||
con->printk_seq = seq;
|
||||
continue;
|
||||
}
|
||||
- if (con->flags & CON_BOOT) {
|
||||
+ if (con->flags & CON_BOOT && facility == 0) {
|
||||
/* skip emergency messages, already printed */
|
||||
if (con->printk_seq < seq)
|
||||
con->printk_seq = seq;
|
||||
@@ -1951,7 +1953,10 @@ asmlinkage int vprintk_emit(int facility
|
||||
* - text points to beginning of text
|
||||
* - there is room before text for prefix
|
||||
*/
|
||||
- printk_emergency(rbuf, level & 7, ts_nsec, cpu, text, text_len);
|
||||
+ if (facility == 0) {
|
||||
+ /* only the kernel can create emergency messages */
|
||||
+ printk_emergency(rbuf, level & 7, ts_nsec, cpu, text, text_len);
|
||||
+ }
|
||||
|
||||
if ((lflags & LOG_CONT) || !(lflags & LOG_NEWLINE)) {
|
||||
cont_add(ctx, cpu, caller_id, facility, level, lflags, text, text_len);
|
||||
@@ -2715,8 +2720,8 @@ static int printk_kthread_func(void *dat
|
||||
&len, printk_time);
|
||||
|
||||
console_lock();
|
||||
- call_console_drivers(master_seq, ext_text,
|
||||
- ext_len, text, len, msg->level);
|
||||
+ call_console_drivers(master_seq, ext_text, ext_len, text, len,
|
||||
+ msg->level, msg->facility);
|
||||
if (len > 0 || ext_len > 0)
|
||||
printk_delay(msg->level);
|
||||
console_unlock();
|
@ -0,0 +1,45 @@
|
||||
From: John Ogness <john.ogness@linutronix.de>
|
||||
Date: Fri, 22 Feb 2019 23:02:44 +0100
|
||||
Subject: [PATCH] printk: devkmsg: llseek: reset clear if it is lost
|
||||
|
||||
SEEK_DATA will seek to the last clear record. If this clear record
|
||||
is no longer in the ring buffer, devkmsg_llseek() will go into an
|
||||
infinite loop. Fix that by resetting the clear sequence if the old
|
||||
clear record is no longer in the ring buffer.
|
||||
|
||||
Signed-off-by: John Ogness <john.ogness@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
kernel/printk/printk.c | 7 ++++++-
|
||||
1 file changed, 6 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/kernel/printk/printk.c
|
||||
+++ b/kernel/printk/printk.c
|
||||
@@ -761,6 +761,7 @@ static loff_t devkmsg_llseek(struct file
|
||||
{
|
||||
struct devkmsg_user *user = file->private_data;
|
||||
loff_t ret;
|
||||
+ u64 seq;
|
||||
|
||||
if (!user)
|
||||
return -EBADF;
|
||||
@@ -783,7 +784,7 @@ static loff_t devkmsg_llseek(struct file
|
||||
* changes no global state, and does not clear anything.
|
||||
*/
|
||||
for (;;) {
|
||||
- prb_iter_init(&user->iter, &printk_rb, NULL);
|
||||
+ prb_iter_init(&user->iter, &printk_rb, &seq);
|
||||
ret = prb_iter_seek(&user->iter, clear_seq);
|
||||
if (ret > 0) {
|
||||
/* seeked to clear seq */
|
||||
@@ -800,6 +801,10 @@ static loff_t devkmsg_llseek(struct file
|
||||
break;
|
||||
}
|
||||
/* iterator invalid, start over */
|
||||
+
|
||||
+ /* reset clear_seq if it is no longer available */
|
||||
+ if (seq > clear_seq)
|
||||
+ clear_seq = 0;
|
||||
}
|
||||
ret = 0;
|
||||
break;
|
@ -0,0 +1,24 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Fri, 22 Feb 2019 12:47:13 +0100
|
||||
Subject: [PATCH] printk: print "rate-limitted" message as info
|
||||
|
||||
If messages which are injected via kmsg are dropped then they don't need
|
||||
to be printed as warnings. This is to avoid latency spikes if the
|
||||
interface decides to print a lot of important messages.
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/ratelimit.h | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/include/linux/ratelimit.h
|
||||
+++ b/include/linux/ratelimit.h
|
||||
@@ -59,7 +59,7 @@ static inline void ratelimit_state_exit(
|
||||
return;
|
||||
|
||||
if (rs->missed) {
|
||||
- pr_warn("%s: %d output lines suppressed due to ratelimiting\n",
|
||||
+ pr_info("%s: %d output lines suppressed due to ratelimiting\n",
|
||||
current->comm, rs->missed);
|
||||
rs->missed = 0;
|
||||
}
|
@ -0,0 +1,84 @@
|
||||
From: John Ogness <john.ogness@linutronix.de>
|
||||
Date: Wed, 24 Apr 2019 16:36:04 +0200
|
||||
Subject: [PATCH] printk: kmsg_dump: remove mutex usage
|
||||
|
||||
The kmsg dumper can be called from any context, but the dumping
|
||||
helpers were using a mutex to synchronize the iterator against
|
||||
concurrent dumps.
|
||||
|
||||
Rather than trying to synchronize the iterator, use a local copy
|
||||
of the iterator during the dump. Then no synchronization is
|
||||
required.
|
||||
|
||||
Reported-by: Scott Wood <swood@redhat.com>
|
||||
Signed-off-by: John Ogness <john.ogness@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
kernel/printk/printk.c | 23 ++++++++++-------------
|
||||
1 file changed, 10 insertions(+), 13 deletions(-)
|
||||
|
||||
--- a/kernel/printk/printk.c
|
||||
+++ b/kernel/printk/printk.c
|
||||
@@ -379,8 +379,6 @@ static u64 syslog_seq;
|
||||
static size_t syslog_partial;
|
||||
static bool syslog_time;
|
||||
|
||||
-static DEFINE_MUTEX(kmsg_dump_lock);
|
||||
-
|
||||
/* the next printk record to read after the last 'clear' command */
|
||||
static u64 clear_seq;
|
||||
|
||||
@@ -2877,6 +2875,7 @@ module_param_named(always_kmsg_dump, alw
|
||||
*/
|
||||
void kmsg_dump(enum kmsg_dump_reason reason)
|
||||
{
|
||||
+ struct kmsg_dumper dumper_local;
|
||||
struct kmsg_dumper *dumper;
|
||||
|
||||
if ((reason > KMSG_DUMP_OOPS) && !always_kmsg_dump)
|
||||
@@ -2887,16 +2886,18 @@ void kmsg_dump(enum kmsg_dump_reason rea
|
||||
if (dumper->max_reason && reason > dumper->max_reason)
|
||||
continue;
|
||||
|
||||
- /* initialize iterator with data about the stored records */
|
||||
- dumper->active = true;
|
||||
+ /*
|
||||
+ * use a local copy to avoid modifying the
|
||||
+ * iterator used by any other cpus/contexts
|
||||
+ */
|
||||
+ memcpy(&dumper_local, dumper, sizeof(dumper_local));
|
||||
|
||||
- kmsg_dump_rewind(dumper);
|
||||
+ /* initialize iterator with data about the stored records */
|
||||
+ dumper_local.active = true;
|
||||
+ kmsg_dump_rewind(&dumper_local);
|
||||
|
||||
/* invoke dumper which will iterate over records */
|
||||
- dumper->dump(dumper, reason);
|
||||
-
|
||||
- /* reset iterator */
|
||||
- dumper->active = false;
|
||||
+ dumper_local.dump(&dumper_local, reason);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
@@ -3008,9 +3009,7 @@ bool kmsg_dump_get_line(struct kmsg_dump
|
||||
{
|
||||
bool ret;
|
||||
|
||||
- mutex_lock(&kmsg_dump_lock);
|
||||
ret = kmsg_dump_get_line_nolock(dumper, syslog, line, size, len);
|
||||
- mutex_unlock(&kmsg_dump_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -3162,9 +3161,7 @@ void kmsg_dump_rewind_nolock(struct kmsg
|
||||
*/
|
||||
void kmsg_dump_rewind(struct kmsg_dumper *dumper)
|
||||
{
|
||||
- mutex_lock(&kmsg_dump_lock);
|
||||
kmsg_dump_rewind_nolock(dumper);
|
||||
- mutex_unlock(&kmsg_dump_lock);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kmsg_dump_rewind);
|
||||
|
@ -0,0 +1,43 @@
|
||||
From: He Zhe <zhe.he@windriver.com>
|
||||
Date: Tue, 24 Sep 2019 15:26:39 +0800
|
||||
Subject: [PATCH] printk: devkmsg: read: Return EPIPE when the first
|
||||
message user-space wants has gone
|
||||
|
||||
When user-space wants to read the first message, that is when user->seq
|
||||
is 0, and that message has gone, it currently automatically resets
|
||||
user->seq to current first seq. This mis-aligns with mainline kernel.
|
||||
|
||||
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/ABI/testing/dev-kmsg#n39
|
||||
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/kernel/printk/printk.c#n899
|
||||
|
||||
We should inform user-space that what it wants has gone by returning EPIPE
|
||||
in such scenario.
|
||||
|
||||
Link: https://lore.kernel.org/r/20190924072639.25986-1-zhe.he@windriver.com
|
||||
Signed-off-by: He Zhe <zhe.he@windriver.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
kernel/printk/printk.c | 12 ++++--------
|
||||
1 file changed, 4 insertions(+), 8 deletions(-)
|
||||
|
||||
--- a/kernel/printk/printk.c
|
||||
+++ b/kernel/printk/printk.c
|
||||
@@ -713,14 +713,10 @@ static ssize_t devkmsg_read(struct file
|
||||
goto out;
|
||||
}
|
||||
|
||||
- if (user->seq == 0) {
|
||||
- user->seq = seq;
|
||||
- } else {
|
||||
- user->seq++;
|
||||
- if (user->seq < seq) {
|
||||
- ret = -EPIPE;
|
||||
- goto restore_out;
|
||||
- }
|
||||
+ user->seq++;
|
||||
+ if (user->seq < seq) {
|
||||
+ ret = -EPIPE;
|
||||
+ goto restore_out;
|
||||
}
|
||||
|
||||
msg = (struct printk_log *)&user->msgbuf[0];
|
@ -0,0 +1,43 @@
|
||||
From: John Ogness <john.ogness@linutronix.de>
|
||||
Date: Mon, 7 Oct 2019 16:20:39 +0200
|
||||
Subject: [PATCH] printk: handle iterating while buffer changing
|
||||
|
||||
The syslog and kmsg_dump readers are provided buffers to fill.
|
||||
Both try to maximize the provided buffer usage by calculating the
|
||||
maximum number of messages that can fit. However, if after the
|
||||
calculation, messages are dropped and new messages added, the
|
||||
calculation will no longer match.
|
||||
|
||||
For syslog, add a check to make sure the provided buffer is not
|
||||
overfilled.
|
||||
|
||||
For kmsg_dump, start over by recalculating the messages
|
||||
available.
|
||||
|
||||
Signed-off-by: John Ogness <john.ogness@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
kernel/printk/printk.c | 5 ++++-
|
||||
1 file changed, 4 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/kernel/printk/printk.c
|
||||
+++ b/kernel/printk/printk.c
|
||||
@@ -1446,6 +1446,9 @@ static int syslog_print_all(char __user
|
||||
break;
|
||||
}
|
||||
|
||||
+ if (len + textlen > size)
|
||||
+ break;
|
||||
+
|
||||
if (copy_to_user(buf + len, text, textlen))
|
||||
len = -EFAULT;
|
||||
else
|
||||
@@ -3085,7 +3088,7 @@ bool kmsg_dump_get_buffer(struct kmsg_du
|
||||
ret = prb_iter_next(&iter, msgbuf, PRINTK_RECORD_MAX, &seq);
|
||||
if (ret == 0) {
|
||||
break;
|
||||
- } else if (ret < 0) {
|
||||
+ } else if (ret < 0 || seq >= end_seq) {
|
||||
prb_iter_init(&iter, &printk_rb, &seq);
|
||||
goto retry;
|
||||
}
|
@ -0,0 +1,52 @@
|
||||
From: John Ogness <john.ogness@linutronix.de>
|
||||
Date: Tue, 3 Dec 2019 09:14:57 +0100
|
||||
Subject: [PATCH] printk: hack out emergency loglevel usage
|
||||
|
||||
Instead of using an emergency loglevel to determine if atomic
|
||||
messages should be printed, use oops_in_progress. This conforms
|
||||
to the decision that latency-causing atomic messages never be
|
||||
generated during normal operation.
|
||||
|
||||
Signed-off-by: John Ogness <john.ogness@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
kernel/printk/printk.c | 13 +++----------
|
||||
1 file changed, 3 insertions(+), 10 deletions(-)
|
||||
|
||||
--- a/kernel/printk/printk.c
|
||||
+++ b/kernel/printk/printk.c
|
||||
@@ -1777,15 +1777,8 @@ static void call_console_drivers(u64 seq
|
||||
con->wrote_history = 1;
|
||||
con->printk_seq = seq - 1;
|
||||
}
|
||||
- if (con->write_atomic && level < emergency_console_loglevel &&
|
||||
- facility == 0) {
|
||||
- /* skip emergency messages, already printed */
|
||||
- if (con->printk_seq < seq)
|
||||
- con->printk_seq = seq;
|
||||
- continue;
|
||||
- }
|
||||
if (con->flags & CON_BOOT && facility == 0) {
|
||||
- /* skip emergency messages, already printed */
|
||||
+ /* skip boot messages, already printed */
|
||||
if (con->printk_seq < seq)
|
||||
con->printk_seq = seq;
|
||||
continue;
|
||||
@@ -3171,7 +3164,7 @@ static bool console_can_emergency(int le
|
||||
for_each_console(con) {
|
||||
if (!(con->flags & CON_ENABLED))
|
||||
continue;
|
||||
- if (con->write_atomic && level < emergency_console_loglevel)
|
||||
+ if (con->write_atomic && oops_in_progress)
|
||||
return true;
|
||||
if (con->write && (con->flags & CON_BOOT))
|
||||
return true;
|
||||
@@ -3187,7 +3180,7 @@ static void call_emergency_console_drive
|
||||
for_each_console(con) {
|
||||
if (!(con->flags & CON_ENABLED))
|
||||
continue;
|
||||
- if (con->write_atomic && level < emergency_console_loglevel) {
|
||||
+ if (con->write_atomic && oops_in_progress) {
|
||||
con->write_atomic(con, text, text_len);
|
||||
continue;
|
||||
}
|
@ -0,0 +1,384 @@
|
||||
From: John Ogness <john.ogness@linutronix.de>
|
||||
Date: Fri, 10 Jan 2020 16:45:31 +0106
|
||||
Subject: [PATCH] serial: 8250: only atomic lock for console
|
||||
|
||||
The atomic console implementation requires that IER is synchronized
|
||||
between atomic and non-atomic usage. However, it was implemented such
|
||||
that the console_atomic_lock was performed for all IER access, even
|
||||
if that port was not a console.
|
||||
|
||||
The implementation also used a usage counter to keep track of IER
|
||||
clear/restore windows. However, this is not needed because the
|
||||
console_atomic_lock synchronization of IER access with prevent any
|
||||
situations where IER is prematurely restored or left cleared.
|
||||
|
||||
Move the IER access functions to inline macros. They will only
|
||||
console_atomic_lock if the port is a console. Remove the
|
||||
restore_ier() function by having clear_ier() return the prior IER
|
||||
value so that the caller can restore it using set_ier(). Rename the
|
||||
IER access functions to match other 8250 wrapper macros.
|
||||
|
||||
Suggested-by: Dick Hollenbeck <dick@softplc.com>
|
||||
Signed-off-by: John Ogness <john.ogness@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
drivers/tty/serial/8250/8250.h | 65 +++++++++++++++++++---------
|
||||
drivers/tty/serial/8250/8250_core.c | 6 +-
|
||||
drivers/tty/serial/8250/8250_dma.c | 4 -
|
||||
drivers/tty/serial/8250/8250_port.c | 81 ++++++++----------------------------
|
||||
4 files changed, 66 insertions(+), 90 deletions(-)
|
||||
|
||||
--- a/drivers/tty/serial/8250/8250.h
|
||||
+++ b/drivers/tty/serial/8250/8250.h
|
||||
@@ -96,10 +96,6 @@ struct serial8250_config {
|
||||
#define SERIAL8250_SHARE_IRQS 0
|
||||
#endif
|
||||
|
||||
-void set_ier(struct uart_8250_port *up, unsigned char ier);
|
||||
-void clear_ier(struct uart_8250_port *up);
|
||||
-void restore_ier(struct uart_8250_port *up);
|
||||
-
|
||||
#define SERIAL8250_PORT_FLAGS(_base, _irq, _flags) \
|
||||
{ \
|
||||
.iobase = _base, \
|
||||
@@ -134,39 +130,64 @@ static inline void serial_dl_write(struc
|
||||
up->dl_write(up, value);
|
||||
}
|
||||
|
||||
-static inline bool serial8250_set_THRI(struct uart_8250_port *up)
|
||||
+static inline void serial8250_set_IER(struct uart_8250_port *up,
|
||||
+ unsigned char ier)
|
||||
{
|
||||
- if (up->ier & UART_IER_THRI)
|
||||
- return false;
|
||||
- up->ier |= UART_IER_THRI;
|
||||
- serial_out(up, UART_IER, up->ier);
|
||||
- return true;
|
||||
+ struct uart_port *port = &up->port;
|
||||
+ unsigned int flags;
|
||||
+ bool is_console;
|
||||
+
|
||||
+ is_console = uart_console(port);
|
||||
+
|
||||
+ if (is_console)
|
||||
+ console_atomic_lock(&flags);
|
||||
+
|
||||
+ serial_out(up, UART_IER, ier);
|
||||
+
|
||||
+ if (is_console)
|
||||
+ console_atomic_unlock(flags);
|
||||
}
|
||||
|
||||
-static inline bool serial8250_set_THRI_sier(struct uart_8250_port *up)
|
||||
+static inline unsigned char serial8250_clear_IER(struct uart_8250_port *up)
|
||||
{
|
||||
- if (up->ier & UART_IER_THRI)
|
||||
- return false;
|
||||
- up->ier |= UART_IER_THRI;
|
||||
- set_ier(up, up->ier);
|
||||
- return true;
|
||||
+ struct uart_port *port = &up->port;
|
||||
+ unsigned int clearval = 0;
|
||||
+ unsigned int prior;
|
||||
+ unsigned int flags;
|
||||
+ bool is_console;
|
||||
+
|
||||
+ is_console = uart_console(port);
|
||||
+
|
||||
+ if (up->capabilities & UART_CAP_UUE)
|
||||
+ clearval = UART_IER_UUE;
|
||||
+
|
||||
+ if (is_console)
|
||||
+ console_atomic_lock(&flags);
|
||||
+
|
||||
+ prior = serial_port_in(port, UART_IER);
|
||||
+ serial_port_out(port, UART_IER, clearval);
|
||||
+
|
||||
+ if (is_console)
|
||||
+ console_atomic_unlock(flags);
|
||||
+
|
||||
+ return prior;
|
||||
}
|
||||
|
||||
-static inline bool serial8250_clear_THRI(struct uart_8250_port *up)
|
||||
+static inline bool serial8250_set_THRI(struct uart_8250_port *up)
|
||||
{
|
||||
- if (!(up->ier & UART_IER_THRI))
|
||||
+ if (up->ier & UART_IER_THRI)
|
||||
return false;
|
||||
- up->ier &= ~UART_IER_THRI;
|
||||
- serial_out(up, UART_IER, up->ier);
|
||||
+ up->ier |= UART_IER_THRI;
|
||||
+ serial8250_set_IER(up, up->ier);
|
||||
return true;
|
||||
}
|
||||
|
||||
-static inline bool serial8250_clear_THRI_sier(struct uart_8250_port *up)
|
||||
+static inline bool serial8250_clear_THRI(struct uart_8250_port *up)
|
||||
{
|
||||
if (!(up->ier & UART_IER_THRI))
|
||||
return false;
|
||||
up->ier &= ~UART_IER_THRI;
|
||||
- set_ier(up, up->ier);
|
||||
+ serial8250_set_IER(up, up->ier);
|
||||
return true;
|
||||
}
|
||||
|
||||
--- a/drivers/tty/serial/8250/8250_core.c
|
||||
+++ b/drivers/tty/serial/8250/8250_core.c
|
||||
@@ -265,7 +265,7 @@ static void serial8250_timeout(struct ti
|
||||
static void serial8250_backup_timeout(struct timer_list *t)
|
||||
{
|
||||
struct uart_8250_port *up = from_timer(up, t, timer);
|
||||
- unsigned int iir, lsr;
|
||||
+ unsigned int iir, ier = 0, lsr;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&up->port.lock, flags);
|
||||
@@ -275,7 +275,7 @@ static void serial8250_backup_timeout(st
|
||||
* based handler.
|
||||
*/
|
||||
if (up->port.irq)
|
||||
- clear_ier(up);
|
||||
+ ier = serial8250_clear_IER(up);
|
||||
|
||||
iir = serial_in(up, UART_IIR);
|
||||
|
||||
@@ -298,7 +298,7 @@ static void serial8250_backup_timeout(st
|
||||
serial8250_tx_chars(up);
|
||||
|
||||
if (up->port.irq)
|
||||
- restore_ier(up);
|
||||
+ serial8250_set_IER(up, ier);
|
||||
|
||||
spin_unlock_irqrestore(&up->port.lock, flags);
|
||||
|
||||
--- a/drivers/tty/serial/8250/8250_dma.c
|
||||
+++ b/drivers/tty/serial/8250/8250_dma.c
|
||||
@@ -35,7 +35,7 @@ static void __dma_tx_complete(void *para
|
||||
|
||||
ret = serial8250_tx_dma(p);
|
||||
if (ret)
|
||||
- serial8250_set_THRI_sier(p);
|
||||
+ serial8250_set_THRI(p);
|
||||
|
||||
spin_unlock_irqrestore(&p->port.lock, flags);
|
||||
}
|
||||
@@ -98,7 +98,7 @@ int serial8250_tx_dma(struct uart_8250_p
|
||||
dma_async_issue_pending(dma->txchan);
|
||||
if (dma->tx_err) {
|
||||
dma->tx_err = 0;
|
||||
- serial8250_clear_THRI_sier(p);
|
||||
+ serial8250_clear_THRI(p);
|
||||
}
|
||||
return 0;
|
||||
err:
|
||||
--- a/drivers/tty/serial/8250/8250_port.c
|
||||
+++ b/drivers/tty/serial/8250/8250_port.c
|
||||
@@ -721,7 +721,7 @@ static void serial8250_set_sleep(struct
|
||||
serial_out(p, UART_EFR, UART_EFR_ECB);
|
||||
serial_out(p, UART_LCR, 0);
|
||||
}
|
||||
- set_ier(p, sleep ? UART_IERX_SLEEP : 0);
|
||||
+ serial8250_set_IER(p, sleep ? UART_IERX_SLEEP : 0);
|
||||
if (p->capabilities & UART_CAP_EFR) {
|
||||
serial_out(p, UART_LCR, UART_LCR_CONF_MODE_B);
|
||||
serial_out(p, UART_EFR, efr);
|
||||
@@ -1390,7 +1390,7 @@ static void serial8250_stop_rx(struct ua
|
||||
|
||||
up->ier &= ~(UART_IER_RLSI | UART_IER_RDI);
|
||||
up->port.read_status_mask &= ~UART_LSR_DR;
|
||||
- set_ier(up, up->ier);
|
||||
+ serial8250_set_IER(up, up->ier);
|
||||
|
||||
serial8250_rpm_put(up);
|
||||
}
|
||||
@@ -1408,7 +1408,7 @@ static void __do_stop_tx_rs485(struct ua
|
||||
serial8250_clear_and_reinit_fifos(p);
|
||||
|
||||
p->ier |= UART_IER_RLSI | UART_IER_RDI;
|
||||
- set_ier(p, p->ier);
|
||||
+ serial8250_set_IER(p, p->ier);
|
||||
}
|
||||
}
|
||||
static enum hrtimer_restart serial8250_em485_handle_stop_tx(struct hrtimer *t)
|
||||
@@ -1459,7 +1459,7 @@ static void __stop_tx_rs485(struct uart_
|
||||
|
||||
static inline void __do_stop_tx(struct uart_8250_port *p)
|
||||
{
|
||||
- if (serial8250_clear_THRI_sier(p))
|
||||
+ if (serial8250_clear_THRI(p))
|
||||
serial8250_rpm_put_tx(p);
|
||||
}
|
||||
|
||||
@@ -1509,7 +1509,7 @@ static inline void __start_tx(struct uar
|
||||
if (up->dma && !up->dma->tx_dma(up))
|
||||
return;
|
||||
|
||||
- if (serial8250_set_THRI_sier(up)) {
|
||||
+ if (serial8250_set_THRI(up)) {
|
||||
if (up->bugs & UART_BUG_TXEN) {
|
||||
unsigned char lsr;
|
||||
|
||||
@@ -1616,7 +1616,7 @@ static void serial8250_disable_ms(struct
|
||||
mctrl_gpio_disable_ms(up->gpios);
|
||||
|
||||
up->ier &= ~UART_IER_MSI;
|
||||
- set_ier(up, up->ier);
|
||||
+ serial8250_set_IER(up, up->ier);
|
||||
}
|
||||
|
||||
static void serial8250_enable_ms(struct uart_port *port)
|
||||
@@ -1632,7 +1632,7 @@ static void serial8250_enable_ms(struct
|
||||
up->ier |= UART_IER_MSI;
|
||||
|
||||
serial8250_rpm_get(up);
|
||||
- set_ier(up, up->ier);
|
||||
+ serial8250_set_IER(up, up->ier);
|
||||
serial8250_rpm_put(up);
|
||||
}
|
||||
|
||||
@@ -1991,54 +1991,6 @@ static void wait_for_xmitr(struct uart_8
|
||||
}
|
||||
}
|
||||
|
||||
-static atomic_t ier_counter = ATOMIC_INIT(0);
|
||||
-static atomic_t ier_value = ATOMIC_INIT(0);
|
||||
-
|
||||
-void set_ier(struct uart_8250_port *up, unsigned char ier)
|
||||
-{
|
||||
- struct uart_port *port = &up->port;
|
||||
- unsigned int flags;
|
||||
-
|
||||
- console_atomic_lock(&flags);
|
||||
- if (atomic_read(&ier_counter) > 0)
|
||||
- atomic_set(&ier_value, ier);
|
||||
- else
|
||||
- serial_port_out(port, UART_IER, ier);
|
||||
- console_atomic_unlock(flags);
|
||||
-}
|
||||
-
|
||||
-void clear_ier(struct uart_8250_port *up)
|
||||
-{
|
||||
- struct uart_port *port = &up->port;
|
||||
- unsigned int ier_cleared = 0;
|
||||
- unsigned int flags;
|
||||
- unsigned int ier;
|
||||
-
|
||||
- console_atomic_lock(&flags);
|
||||
- atomic_inc(&ier_counter);
|
||||
- ier = serial_port_in(port, UART_IER);
|
||||
- if (up->capabilities & UART_CAP_UUE)
|
||||
- ier_cleared = UART_IER_UUE;
|
||||
- if (ier != ier_cleared) {
|
||||
- serial_port_out(port, UART_IER, ier_cleared);
|
||||
- atomic_set(&ier_value, ier);
|
||||
- }
|
||||
- console_atomic_unlock(flags);
|
||||
-}
|
||||
-EXPORT_SYMBOL_GPL(clear_ier);
|
||||
-
|
||||
-void restore_ier(struct uart_8250_port *up)
|
||||
-{
|
||||
- struct uart_port *port = &up->port;
|
||||
- unsigned int flags;
|
||||
-
|
||||
- console_atomic_lock(&flags);
|
||||
- if (atomic_fetch_dec(&ier_counter) == 1)
|
||||
- serial_port_out(port, UART_IER, atomic_read(&ier_value));
|
||||
- console_atomic_unlock(flags);
|
||||
-}
|
||||
-EXPORT_SYMBOL_GPL(restore_ier);
|
||||
-
|
||||
#ifdef CONFIG_CONSOLE_POLL
|
||||
/*
|
||||
* Console polling routines for writing and reading from the uart while
|
||||
@@ -2070,10 +2022,11 @@ static int serial8250_get_poll_char(stru
|
||||
static void serial8250_put_poll_char(struct uart_port *port,
|
||||
unsigned char c)
|
||||
{
|
||||
+ unsigned int ier;
|
||||
struct uart_8250_port *up = up_to_u8250p(port);
|
||||
|
||||
serial8250_rpm_get(up);
|
||||
- clear_ier(up);
|
||||
+ ier = serial8250_clear_IER(up);
|
||||
|
||||
wait_for_xmitr(up, BOTH_EMPTY);
|
||||
/*
|
||||
@@ -2086,7 +2039,7 @@ static void serial8250_put_poll_char(str
|
||||
* and restore the IER
|
||||
*/
|
||||
wait_for_xmitr(up, BOTH_EMPTY);
|
||||
- restore_ier(up);
|
||||
+ serial8250_set_IER(up, ier);
|
||||
serial8250_rpm_put(up);
|
||||
}
|
||||
|
||||
@@ -2398,7 +2351,7 @@ void serial8250_do_shutdown(struct uart_
|
||||
*/
|
||||
spin_lock_irqsave(&port->lock, flags);
|
||||
up->ier = 0;
|
||||
- set_ier(up, 0);
|
||||
+ serial8250_set_IER(up, 0);
|
||||
spin_unlock_irqrestore(&port->lock, flags);
|
||||
|
||||
synchronize_irq(port->irq);
|
||||
@@ -2683,7 +2636,7 @@ serial8250_do_set_termios(struct uart_po
|
||||
if (up->capabilities & UART_CAP_RTOIE)
|
||||
up->ier |= UART_IER_RTOIE;
|
||||
|
||||
- set_ier(up, up->ier);
|
||||
+ serial8250_set_IER(up, up->ier);
|
||||
|
||||
if (up->capabilities & UART_CAP_EFR) {
|
||||
unsigned char efr = 0;
|
||||
@@ -3193,12 +3146,13 @@ void serial8250_console_write_atomic(str
|
||||
{
|
||||
struct uart_port *port = &up->port;
|
||||
unsigned int flags;
|
||||
+ unsigned int ier;
|
||||
|
||||
console_atomic_lock(&flags);
|
||||
|
||||
touch_nmi_watchdog();
|
||||
|
||||
- clear_ier(up);
|
||||
+ ier = serial8250_clear_IER(up);
|
||||
|
||||
if (atomic_fetch_inc(&up->console_printing)) {
|
||||
uart_console_write(port, "\n", 1,
|
||||
@@ -3208,7 +3162,7 @@ void serial8250_console_write_atomic(str
|
||||
atomic_dec(&up->console_printing);
|
||||
|
||||
wait_for_xmitr(up, BOTH_EMPTY);
|
||||
- restore_ier(up);
|
||||
+ serial8250_set_IER(up, ier);
|
||||
|
||||
console_atomic_unlock(flags);
|
||||
}
|
||||
@@ -3224,13 +3178,14 @@ void serial8250_console_write(struct uar
|
||||
{
|
||||
struct uart_port *port = &up->port;
|
||||
unsigned long flags;
|
||||
+ unsigned int ier;
|
||||
|
||||
touch_nmi_watchdog();
|
||||
|
||||
serial8250_rpm_get(up);
|
||||
spin_lock_irqsave(&port->lock, flags);
|
||||
|
||||
- clear_ier(up);
|
||||
+ ier = serial8250_clear_IER(up);
|
||||
|
||||
/* check scratch reg to see if port powered off during system sleep */
|
||||
if (up->canary && (up->canary != serial_port_in(port, UART_SCR))) {
|
||||
@@ -3247,7 +3202,7 @@ void serial8250_console_write(struct uar
|
||||
* and restore the IER
|
||||
*/
|
||||
wait_for_xmitr(up, BOTH_EMPTY);
|
||||
- restore_ier(up);
|
||||
+ serial8250_set_IER(up, ier);
|
||||
|
||||
/*
|
||||
* The receive handling will happen properly because the
|
@ -0,0 +1,102 @@
|
||||
From: John Ogness <john.ogness@linutronix.de>
|
||||
Date: Fri, 10 Jan 2020 16:45:32 +0106
|
||||
Subject: [PATCH] serial: 8250: fsl/ingenic/mtk: fix atomic console
|
||||
|
||||
A few 8250 implementations have their own IER access. If the port
|
||||
is a console, wrap the accesses with console_atomic_lock.
|
||||
|
||||
Signed-off-by: John Ogness <john.ogness@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
drivers/tty/serial/8250/8250_fsl.c | 9 +++++++++
|
||||
drivers/tty/serial/8250/8250_ingenic.c | 7 +++++++
|
||||
drivers/tty/serial/8250/8250_mtk.c | 29 +++++++++++++++++++++++++++--
|
||||
3 files changed, 43 insertions(+), 2 deletions(-)
|
||||
|
||||
--- a/drivers/tty/serial/8250/8250_fsl.c
|
||||
+++ b/drivers/tty/serial/8250/8250_fsl.c
|
||||
@@ -57,9 +57,18 @@ int fsl8250_handle_irq(struct uart_port
|
||||
|
||||
/* Stop processing interrupts on input overrun */
|
||||
if ((orig_lsr & UART_LSR_OE) && (up->overrun_backoff_time_ms > 0)) {
|
||||
+ unsigned int ca_flags;
|
||||
unsigned long delay;
|
||||
+ bool is_console;
|
||||
|
||||
+ is_console = uart_console(port);
|
||||
+
|
||||
+ if (is_console)
|
||||
+ console_atomic_lock(&ca_flags);
|
||||
up->ier = port->serial_in(port, UART_IER);
|
||||
+ if (is_console)
|
||||
+ console_atomic_unlock(ca_flags);
|
||||
+
|
||||
if (up->ier & (UART_IER_RLSI | UART_IER_RDI)) {
|
||||
port->ops->stop_rx(port);
|
||||
} else {
|
||||
--- a/drivers/tty/serial/8250/8250_ingenic.c
|
||||
+++ b/drivers/tty/serial/8250/8250_ingenic.c
|
||||
@@ -146,6 +146,8 @@ OF_EARLYCON_DECLARE(x1000_uart, "ingenic
|
||||
|
||||
static void ingenic_uart_serial_out(struct uart_port *p, int offset, int value)
|
||||
{
|
||||
+ unsigned int flags;
|
||||
+ bool is_console;
|
||||
int ier;
|
||||
|
||||
switch (offset) {
|
||||
@@ -167,7 +169,12 @@ static void ingenic_uart_serial_out(stru
|
||||
* If we have enabled modem status IRQs we should enable
|
||||
* modem mode.
|
||||
*/
|
||||
+ is_console = uart_console(p);
|
||||
+ if (is_console)
|
||||
+ console_atomic_lock(&flags);
|
||||
ier = p->serial_in(p, UART_IER);
|
||||
+ if (is_console)
|
||||
+ console_atomic_unlock(flags);
|
||||
|
||||
if (ier & UART_IER_MSI)
|
||||
value |= UART_MCR_MDCE | UART_MCR_FCM;
|
||||
--- a/drivers/tty/serial/8250/8250_mtk.c
|
||||
+++ b/drivers/tty/serial/8250/8250_mtk.c
|
||||
@@ -212,12 +212,37 @@ static void mtk8250_shutdown(struct uart
|
||||
|
||||
static void mtk8250_disable_intrs(struct uart_8250_port *up, int mask)
|
||||
{
|
||||
- serial_out(up, UART_IER, serial_in(up, UART_IER) & (~mask));
|
||||
+ struct uart_port *port = &up->port;
|
||||
+ unsigned int flags;
|
||||
+ unsigned int ier;
|
||||
+ bool is_console;
|
||||
+
|
||||
+ is_console = uart_console(port);
|
||||
+
|
||||
+ if (is_console)
|
||||
+ console_atomic_lock(&flags);
|
||||
+
|
||||
+ ier = serial_in(up, UART_IER);
|
||||
+ serial_out(up, UART_IER, ier & (~mask));
|
||||
+
|
||||
+ if (is_console)
|
||||
+ console_atomic_unlock(flags);
|
||||
}
|
||||
|
||||
static void mtk8250_enable_intrs(struct uart_8250_port *up, int mask)
|
||||
{
|
||||
- serial_out(up, UART_IER, serial_in(up, UART_IER) | mask);
|
||||
+ struct uart_port *port = &up->port;
|
||||
+ unsigned int flags;
|
||||
+ unsigned int ier;
|
||||
+
|
||||
+ if (uart_console(port))
|
||||
+ console_atomic_lock(&flags);
|
||||
+
|
||||
+ ier = serial_in(up, UART_IER);
|
||||
+ serial_out(up, UART_IER, ier | mask);
|
||||
+
|
||||
+ if (uart_console(port))
|
||||
+ console_atomic_unlock(flags);
|
||||
}
|
||||
|
||||
static void mtk8250_set_flow_ctrl(struct uart_8250_port *up, int mode)
|
@ -0,0 +1,217 @@
|
||||
From: Peter Zijlstra <peterz@infradead.org>
|
||||
Date: Fri, 31 Jan 2020 16:07:04 +0100
|
||||
Subject: [PATCH 1/7] locking/percpu-rwsem, lockdep: Make percpu-rwsem use its
|
||||
own lockdep_map
|
||||
|
||||
As preparation for replacing the embedded rwsem, give percpu-rwsem its
|
||||
own lockdep_map.
|
||||
|
||||
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
|
||||
Tested-by: Juri Lelli <juri.lelli@redhat.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/percpu-rwsem.h | 29 +++++++++++++++++++----------
|
||||
kernel/cpu.c | 4 ++--
|
||||
kernel/locking/percpu-rwsem.c | 16 ++++++++++++----
|
||||
kernel/locking/rwsem.c | 4 ++--
|
||||
kernel/locking/rwsem.h | 2 ++
|
||||
5 files changed, 37 insertions(+), 18 deletions(-)
|
||||
|
||||
--- a/include/linux/percpu-rwsem.h
|
||||
+++ b/include/linux/percpu-rwsem.h
|
||||
@@ -15,8 +15,17 @@ struct percpu_rw_semaphore {
|
||||
struct rw_semaphore rw_sem; /* slowpath */
|
||||
struct rcuwait writer; /* blocked writer */
|
||||
int readers_block;
|
||||
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
||||
+ struct lockdep_map dep_map;
|
||||
+#endif
|
||||
};
|
||||
|
||||
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
||||
+#define __PERCPU_RWSEM_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname },
|
||||
+#else
|
||||
+#define __PERCPU_RWSEM_DEP_MAP_INIT(lockname)
|
||||
+#endif
|
||||
+
|
||||
#define __DEFINE_PERCPU_RWSEM(name, is_static) \
|
||||
static DEFINE_PER_CPU(unsigned int, __percpu_rwsem_rc_##name); \
|
||||
is_static struct percpu_rw_semaphore name = { \
|
||||
@@ -24,7 +33,9 @@ is_static struct percpu_rw_semaphore nam
|
||||
.read_count = &__percpu_rwsem_rc_##name, \
|
||||
.rw_sem = __RWSEM_INITIALIZER(name.rw_sem), \
|
||||
.writer = __RCUWAIT_INITIALIZER(name.writer), \
|
||||
+ __PERCPU_RWSEM_DEP_MAP_INIT(name) \
|
||||
}
|
||||
+
|
||||
#define DEFINE_PERCPU_RWSEM(name) \
|
||||
__DEFINE_PERCPU_RWSEM(name, /* not static */)
|
||||
#define DEFINE_STATIC_PERCPU_RWSEM(name) \
|
||||
@@ -37,7 +48,7 @@ static inline void percpu_down_read(stru
|
||||
{
|
||||
might_sleep();
|
||||
|
||||
- rwsem_acquire_read(&sem->rw_sem.dep_map, 0, 0, _RET_IP_);
|
||||
+ rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_);
|
||||
|
||||
preempt_disable();
|
||||
/*
|
||||
@@ -76,13 +87,15 @@ static inline int percpu_down_read_trylo
|
||||
*/
|
||||
|
||||
if (ret)
|
||||
- rwsem_acquire_read(&sem->rw_sem.dep_map, 0, 1, _RET_IP_);
|
||||
+ rwsem_acquire_read(&sem->dep_map, 0, 1, _RET_IP_);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline void percpu_up_read(struct percpu_rw_semaphore *sem)
|
||||
{
|
||||
+ rwsem_release(&sem->dep_map, 1, _RET_IP_);
|
||||
+
|
||||
preempt_disable();
|
||||
/*
|
||||
* Same as in percpu_down_read().
|
||||
@@ -92,8 +105,6 @@ static inline void percpu_up_read(struct
|
||||
else
|
||||
__percpu_up_read(sem); /* Unconditional memory barrier */
|
||||
preempt_enable();
|
||||
-
|
||||
- rwsem_release(&sem->rw_sem.dep_map, 1, _RET_IP_);
|
||||
}
|
||||
|
||||
extern void percpu_down_write(struct percpu_rw_semaphore *);
|
||||
@@ -110,15 +121,13 @@ extern void percpu_free_rwsem(struct per
|
||||
__percpu_init_rwsem(sem, #sem, &rwsem_key); \
|
||||
})
|
||||
|
||||
-#define percpu_rwsem_is_held(sem) lockdep_is_held(&(sem)->rw_sem)
|
||||
-
|
||||
-#define percpu_rwsem_assert_held(sem) \
|
||||
- lockdep_assert_held(&(sem)->rw_sem)
|
||||
+#define percpu_rwsem_is_held(sem) lockdep_is_held(sem)
|
||||
+#define percpu_rwsem_assert_held(sem) lockdep_assert_held(sem)
|
||||
|
||||
static inline void percpu_rwsem_release(struct percpu_rw_semaphore *sem,
|
||||
bool read, unsigned long ip)
|
||||
{
|
||||
- lock_release(&sem->rw_sem.dep_map, 1, ip);
|
||||
+ lock_release(&sem->dep_map, 1, ip);
|
||||
#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
|
||||
if (!read)
|
||||
atomic_long_set(&sem->rw_sem.owner, RWSEM_OWNER_UNKNOWN);
|
||||
@@ -128,7 +137,7 @@ static inline void percpu_rwsem_release(
|
||||
static inline void percpu_rwsem_acquire(struct percpu_rw_semaphore *sem,
|
||||
bool read, unsigned long ip)
|
||||
{
|
||||
- lock_acquire(&sem->rw_sem.dep_map, 0, 1, read, 1, NULL, ip);
|
||||
+ lock_acquire(&sem->dep_map, 0, 1, read, 1, NULL, ip);
|
||||
#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
|
||||
if (!read)
|
||||
atomic_long_set(&sem->rw_sem.owner, (long)current);
|
||||
--- a/kernel/cpu.c
|
||||
+++ b/kernel/cpu.c
|
||||
@@ -331,12 +331,12 @@ void lockdep_assert_cpus_held(void)
|
||||
|
||||
static void lockdep_acquire_cpus_lock(void)
|
||||
{
|
||||
- rwsem_acquire(&cpu_hotplug_lock.rw_sem.dep_map, 0, 0, _THIS_IP_);
|
||||
+ rwsem_acquire(&cpu_hotplug_lock.dep_map, 0, 0, _THIS_IP_);
|
||||
}
|
||||
|
||||
static void lockdep_release_cpus_lock(void)
|
||||
{
|
||||
- rwsem_release(&cpu_hotplug_lock.rw_sem.dep_map, 1, _THIS_IP_);
|
||||
+ rwsem_release(&cpu_hotplug_lock.dep_map, 1, _THIS_IP_);
|
||||
}
|
||||
|
||||
/*
|
||||
--- a/kernel/locking/percpu-rwsem.c
|
||||
+++ b/kernel/locking/percpu-rwsem.c
|
||||
@@ -11,7 +11,7 @@
|
||||
#include "rwsem.h"
|
||||
|
||||
int __percpu_init_rwsem(struct percpu_rw_semaphore *sem,
|
||||
- const char *name, struct lock_class_key *rwsem_key)
|
||||
+ const char *name, struct lock_class_key *key)
|
||||
{
|
||||
sem->read_count = alloc_percpu(int);
|
||||
if (unlikely(!sem->read_count))
|
||||
@@ -19,9 +19,13 @@ int __percpu_init_rwsem(struct percpu_rw
|
||||
|
||||
/* ->rw_sem represents the whole percpu_rw_semaphore for lockdep */
|
||||
rcu_sync_init(&sem->rss);
|
||||
- __init_rwsem(&sem->rw_sem, name, rwsem_key);
|
||||
+ init_rwsem(&sem->rw_sem);
|
||||
rcuwait_init(&sem->writer);
|
||||
sem->readers_block = 0;
|
||||
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
||||
+ debug_check_no_locks_freed((void *)sem, sizeof(*sem));
|
||||
+ lockdep_init_map(&sem->dep_map, name, key, 0);
|
||||
+#endif
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__percpu_init_rwsem);
|
||||
@@ -142,10 +146,12 @@ static bool readers_active_check(struct
|
||||
|
||||
void percpu_down_write(struct percpu_rw_semaphore *sem)
|
||||
{
|
||||
+ rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
|
||||
+
|
||||
/* Notify readers to take the slow path. */
|
||||
rcu_sync_enter(&sem->rss);
|
||||
|
||||
- down_write(&sem->rw_sem);
|
||||
+ __down_write(&sem->rw_sem);
|
||||
|
||||
/*
|
||||
* Notify new readers to block; up until now, and thus throughout the
|
||||
@@ -168,6 +174,8 @@ EXPORT_SYMBOL_GPL(percpu_down_write);
|
||||
|
||||
void percpu_up_write(struct percpu_rw_semaphore *sem)
|
||||
{
|
||||
+ rwsem_release(&sem->dep_map, 1, _RET_IP_);
|
||||
+
|
||||
/*
|
||||
* Signal the writer is done, no fast path yet.
|
||||
*
|
||||
@@ -183,7 +191,7 @@ void percpu_up_write(struct percpu_rw_se
|
||||
/*
|
||||
* Release the write lock, this will allow readers back in the game.
|
||||
*/
|
||||
- up_write(&sem->rw_sem);
|
||||
+ __up_write(&sem->rw_sem);
|
||||
|
||||
/*
|
||||
* Once this completes (at least one RCU-sched grace period hence) the
|
||||
--- a/kernel/locking/rwsem.c
|
||||
+++ b/kernel/locking/rwsem.c
|
||||
@@ -1383,7 +1383,7 @@ static inline int __down_read_trylock(st
|
||||
/*
|
||||
* lock for writing
|
||||
*/
|
||||
-static inline void __down_write(struct rw_semaphore *sem)
|
||||
+inline void __down_write(struct rw_semaphore *sem)
|
||||
{
|
||||
long tmp = RWSEM_UNLOCKED_VALUE;
|
||||
|
||||
@@ -1446,7 +1446,7 @@ inline void __up_read(struct rw_semaphor
|
||||
/*
|
||||
* unlock after writing
|
||||
*/
|
||||
-static inline void __up_write(struct rw_semaphore *sem)
|
||||
+inline void __up_write(struct rw_semaphore *sem)
|
||||
{
|
||||
long tmp;
|
||||
|
||||
--- a/kernel/locking/rwsem.h
|
||||
+++ b/kernel/locking/rwsem.h
|
||||
@@ -6,5 +6,7 @@
|
||||
|
||||
extern void __down_read(struct rw_semaphore *sem);
|
||||
extern void __up_read(struct rw_semaphore *sem);
|
||||
+extern void __down_write(struct rw_semaphore *sem);
|
||||
+extern void __up_write(struct rw_semaphore *sem);
|
||||
|
||||
#endif /* __INTERNAL_RWSEM_H */
|
@ -0,0 +1,75 @@
|
||||
From: Peter Zijlstra <peterz@infradead.org>
|
||||
Date: Fri, 31 Jan 2020 16:07:05 +0100
|
||||
Subject: [PATCH 2/7] locking/percpu-rwsem: Convert to bool
|
||||
|
||||
Use bool where possible.
|
||||
|
||||
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
|
||||
Tested-by: Juri Lelli <juri.lelli@redhat.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/percpu-rwsem.h | 6 +++---
|
||||
kernel/locking/percpu-rwsem.c | 8 ++++----
|
||||
2 files changed, 7 insertions(+), 7 deletions(-)
|
||||
|
||||
--- a/include/linux/percpu-rwsem.h
|
||||
+++ b/include/linux/percpu-rwsem.h
|
||||
@@ -41,7 +41,7 @@ is_static struct percpu_rw_semaphore nam
|
||||
#define DEFINE_STATIC_PERCPU_RWSEM(name) \
|
||||
__DEFINE_PERCPU_RWSEM(name, static)
|
||||
|
||||
-extern int __percpu_down_read(struct percpu_rw_semaphore *, int);
|
||||
+extern bool __percpu_down_read(struct percpu_rw_semaphore *, bool);
|
||||
extern void __percpu_up_read(struct percpu_rw_semaphore *);
|
||||
|
||||
static inline void percpu_down_read(struct percpu_rw_semaphore *sem)
|
||||
@@ -69,9 +69,9 @@ static inline void percpu_down_read(stru
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
-static inline int percpu_down_read_trylock(struct percpu_rw_semaphore *sem)
|
||||
+static inline bool percpu_down_read_trylock(struct percpu_rw_semaphore *sem)
|
||||
{
|
||||
- int ret = 1;
|
||||
+ bool ret = true;
|
||||
|
||||
preempt_disable();
|
||||
/*
|
||||
--- a/kernel/locking/percpu-rwsem.c
|
||||
+++ b/kernel/locking/percpu-rwsem.c
|
||||
@@ -45,7 +45,7 @@ void percpu_free_rwsem(struct percpu_rw_
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(percpu_free_rwsem);
|
||||
|
||||
-int __percpu_down_read(struct percpu_rw_semaphore *sem, int try)
|
||||
+bool __percpu_down_read(struct percpu_rw_semaphore *sem, bool try)
|
||||
{
|
||||
/*
|
||||
* Due to having preemption disabled the decrement happens on
|
||||
@@ -69,7 +69,7 @@ int __percpu_down_read(struct percpu_rw_
|
||||
* release in percpu_up_write().
|
||||
*/
|
||||
if (likely(!smp_load_acquire(&sem->readers_block)))
|
||||
- return 1;
|
||||
+ return true;
|
||||
|
||||
/*
|
||||
* Per the above comment; we still have preemption disabled and
|
||||
@@ -78,7 +78,7 @@ int __percpu_down_read(struct percpu_rw_
|
||||
__percpu_up_read(sem);
|
||||
|
||||
if (try)
|
||||
- return 0;
|
||||
+ return false;
|
||||
|
||||
/*
|
||||
* We either call schedule() in the wait, or we'll fall through
|
||||
@@ -94,7 +94,7 @@ int __percpu_down_read(struct percpu_rw_
|
||||
__up_read(&sem->rw_sem);
|
||||
|
||||
preempt_disable();
|
||||
- return 1;
|
||||
+ return true;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__percpu_down_read);
|
||||
|
@ -0,0 +1,53 @@
|
||||
From: Peter Zijlstra <peterz@infradead.org>
|
||||
Date: Fri, 31 Jan 2020 16:07:06 +0100
|
||||
Subject: [PATCH 3/7] locking/percpu-rwsem: Move __this_cpu_inc() into the
|
||||
slowpath
|
||||
|
||||
As preparation to rework __percpu_down_read() move the
|
||||
__this_cpu_inc() into it.
|
||||
|
||||
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
|
||||
Tested-by: Juri Lelli <juri.lelli@redhat.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/percpu-rwsem.h | 10 ++++++----
|
||||
kernel/locking/percpu-rwsem.c | 2 ++
|
||||
2 files changed, 8 insertions(+), 4 deletions(-)
|
||||
|
||||
--- a/include/linux/percpu-rwsem.h
|
||||
+++ b/include/linux/percpu-rwsem.h
|
||||
@@ -59,8 +59,9 @@ static inline void percpu_down_read(stru
|
||||
* and that once the synchronize_rcu() is done, the writer will see
|
||||
* anything we did within this RCU-sched read-size critical section.
|
||||
*/
|
||||
- __this_cpu_inc(*sem->read_count);
|
||||
- if (unlikely(!rcu_sync_is_idle(&sem->rss)))
|
||||
+ if (likely(rcu_sync_is_idle(&sem->rss)))
|
||||
+ __this_cpu_inc(*sem->read_count);
|
||||
+ else
|
||||
__percpu_down_read(sem, false); /* Unconditional memory barrier */
|
||||
/*
|
||||
* The preempt_enable() prevents the compiler from
|
||||
@@ -77,8 +78,9 @@ static inline bool percpu_down_read_tryl
|
||||
/*
|
||||
* Same as in percpu_down_read().
|
||||
*/
|
||||
- __this_cpu_inc(*sem->read_count);
|
||||
- if (unlikely(!rcu_sync_is_idle(&sem->rss)))
|
||||
+ if (likely(rcu_sync_is_idle(&sem->rss)))
|
||||
+ __this_cpu_inc(*sem->read_count);
|
||||
+ else
|
||||
ret = __percpu_down_read(sem, true); /* Unconditional memory barrier */
|
||||
preempt_enable();
|
||||
/*
|
||||
--- a/kernel/locking/percpu-rwsem.c
|
||||
+++ b/kernel/locking/percpu-rwsem.c
|
||||
@@ -47,6 +47,8 @@ EXPORT_SYMBOL_GPL(percpu_free_rwsem);
|
||||
|
||||
bool __percpu_down_read(struct percpu_rw_semaphore *sem, bool try)
|
||||
{
|
||||
+ __this_cpu_inc(*sem->read_count);
|
||||
+
|
||||
/*
|
||||
* Due to having preemption disabled the decrement happens on
|
||||
* the same CPU as the increment, avoiding the
|
@ -0,0 +1,50 @@
|
||||
From: Peter Zijlstra <peterz@infradead.org>
|
||||
Date: Fri, 31 Jan 2020 16:07:07 +0100
|
||||
Subject: [PATCH 4/7] locking/percpu-rwsem: Extract
|
||||
__percpu_down_read_trylock()
|
||||
|
||||
In preparation for removing the embedded rwsem and building a custom
|
||||
lock, extract the read-trylock primitive.
|
||||
|
||||
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
|
||||
Tested-by: Juri Lelli <juri.lelli@redhat.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
kernel/locking/percpu-rwsem.c | 19 +++++++++++++------
|
||||
1 file changed, 13 insertions(+), 6 deletions(-)
|
||||
|
||||
--- a/kernel/locking/percpu-rwsem.c
|
||||
+++ b/kernel/locking/percpu-rwsem.c
|
||||
@@ -45,7 +45,7 @@ void percpu_free_rwsem(struct percpu_rw_
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(percpu_free_rwsem);
|
||||
|
||||
-bool __percpu_down_read(struct percpu_rw_semaphore *sem, bool try)
|
||||
+static bool __percpu_down_read_trylock(struct percpu_rw_semaphore *sem)
|
||||
{
|
||||
__this_cpu_inc(*sem->read_count);
|
||||
|
||||
@@ -73,11 +73,18 @@ bool __percpu_down_read(struct percpu_rw
|
||||
if (likely(!smp_load_acquire(&sem->readers_block)))
|
||||
return true;
|
||||
|
||||
- /*
|
||||
- * Per the above comment; we still have preemption disabled and
|
||||
- * will thus decrement on the same CPU as we incremented.
|
||||
- */
|
||||
- __percpu_up_read(sem);
|
||||
+ __this_cpu_dec(*sem->read_count);
|
||||
+
|
||||
+ /* Prod writer to re-evaluate readers_active_check() */
|
||||
+ rcuwait_wake_up(&sem->writer);
|
||||
+
|
||||
+ return false;
|
||||
+}
|
||||
+
|
||||
+bool __percpu_down_read(struct percpu_rw_semaphore *sem, bool try)
|
||||
+{
|
||||
+ if (__percpu_down_read_trylock(sem))
|
||||
+ return true;
|
||||
|
||||
if (try)
|
||||
return false;
|
@ -0,0 +1,433 @@
|
||||
From: Peter Zijlstra <peterz@infradead.org>
|
||||
Date: Fri, 31 Jan 2020 16:07:08 +0100
|
||||
Subject: [PATCH 5/7] locking/percpu-rwsem: Remove the embedded rwsem
|
||||
|
||||
The filesystem freezer uses percpu-rwsem in a way that is effectively
|
||||
write_non_owner() and achieves this with a few horrible hacks that
|
||||
rely on the rwsem (!percpu) implementation.
|
||||
|
||||
When PREEMPT_RT replaces the rwsem implementation with a PI aware
|
||||
variant this comes apart.
|
||||
|
||||
Remove the embedded rwsem and implement it using a waitqueue and an
|
||||
atomic_t.
|
||||
|
||||
- make readers_block an atomic, and use it, with the waitqueue
|
||||
for a blocking test-and-set write-side.
|
||||
|
||||
- have the read-side wait for the 'lock' state to clear.
|
||||
|
||||
Have the waiters use FIFO queueing and mark them (reader/writer) with
|
||||
a new WQ_FLAG. Use a custom wake_function to wake either a single
|
||||
writer or all readers until a writer.
|
||||
|
||||
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
|
||||
Tested-by: Juri Lelli <juri.lelli@redhat.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/percpu-rwsem.h | 19 +----
|
||||
include/linux/rwsem.h | 6 -
|
||||
include/linux/wait.h | 1
|
||||
kernel/locking/percpu-rwsem.c | 153 ++++++++++++++++++++++++++++++------------
|
||||
kernel/locking/rwsem.c | 11 +--
|
||||
kernel/locking/rwsem.h | 12 ---
|
||||
6 files changed, 123 insertions(+), 79 deletions(-)
|
||||
|
||||
--- a/include/linux/percpu-rwsem.h
|
||||
+++ b/include/linux/percpu-rwsem.h
|
||||
@@ -3,18 +3,18 @@
|
||||
#define _LINUX_PERCPU_RWSEM_H
|
||||
|
||||
#include <linux/atomic.h>
|
||||
-#include <linux/rwsem.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/rcuwait.h>
|
||||
+#include <linux/wait.h>
|
||||
#include <linux/rcu_sync.h>
|
||||
#include <linux/lockdep.h>
|
||||
|
||||
struct percpu_rw_semaphore {
|
||||
struct rcu_sync rss;
|
||||
unsigned int __percpu *read_count;
|
||||
- struct rw_semaphore rw_sem; /* slowpath */
|
||||
- struct rcuwait writer; /* blocked writer */
|
||||
- int readers_block;
|
||||
+ struct rcuwait writer;
|
||||
+ wait_queue_head_t waiters;
|
||||
+ atomic_t block;
|
||||
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
||||
struct lockdep_map dep_map;
|
||||
#endif
|
||||
@@ -31,8 +31,9 @@ static DEFINE_PER_CPU(unsigned int, __pe
|
||||
is_static struct percpu_rw_semaphore name = { \
|
||||
.rss = __RCU_SYNC_INITIALIZER(name.rss), \
|
||||
.read_count = &__percpu_rwsem_rc_##name, \
|
||||
- .rw_sem = __RWSEM_INITIALIZER(name.rw_sem), \
|
||||
.writer = __RCUWAIT_INITIALIZER(name.writer), \
|
||||
+ .waiters = __WAIT_QUEUE_HEAD_INITIALIZER(name.waiters), \
|
||||
+ .block = ATOMIC_INIT(0), \
|
||||
__PERCPU_RWSEM_DEP_MAP_INIT(name) \
|
||||
}
|
||||
|
||||
@@ -130,20 +131,12 @@ static inline void percpu_rwsem_release(
|
||||
bool read, unsigned long ip)
|
||||
{
|
||||
lock_release(&sem->dep_map, 1, ip);
|
||||
-#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
|
||||
- if (!read)
|
||||
- atomic_long_set(&sem->rw_sem.owner, RWSEM_OWNER_UNKNOWN);
|
||||
-#endif
|
||||
}
|
||||
|
||||
static inline void percpu_rwsem_acquire(struct percpu_rw_semaphore *sem,
|
||||
bool read, unsigned long ip)
|
||||
{
|
||||
lock_acquire(&sem->dep_map, 0, 1, read, 1, NULL, ip);
|
||||
-#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
|
||||
- if (!read)
|
||||
- atomic_long_set(&sem->rw_sem.owner, (long)current);
|
||||
-#endif
|
||||
}
|
||||
|
||||
#endif
|
||||
--- a/include/linux/rwsem.h
|
||||
+++ b/include/linux/rwsem.h
|
||||
@@ -53,12 +53,6 @@ struct rw_semaphore {
|
||||
#endif
|
||||
};
|
||||
|
||||
-/*
|
||||
- * Setting all bits of the owner field except bit 0 will indicate
|
||||
- * that the rwsem is writer-owned with an unknown owner.
|
||||
- */
|
||||
-#define RWSEM_OWNER_UNKNOWN (-2L)
|
||||
-
|
||||
/* In all implementations count != 0 means locked */
|
||||
static inline int rwsem_is_locked(struct rw_semaphore *sem)
|
||||
{
|
||||
--- a/include/linux/wait.h
|
||||
+++ b/include/linux/wait.h
|
||||
@@ -20,6 +20,7 @@ int default_wake_function(struct wait_qu
|
||||
#define WQ_FLAG_EXCLUSIVE 0x01
|
||||
#define WQ_FLAG_WOKEN 0x02
|
||||
#define WQ_FLAG_BOOKMARK 0x04
|
||||
+#define WQ_FLAG_CUSTOM 0x08
|
||||
|
||||
/*
|
||||
* A single wait-queue entry structure:
|
||||
--- a/kernel/locking/percpu-rwsem.c
|
||||
+++ b/kernel/locking/percpu-rwsem.c
|
||||
@@ -1,15 +1,14 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
#include <linux/atomic.h>
|
||||
-#include <linux/rwsem.h>
|
||||
#include <linux/percpu.h>
|
||||
+#include <linux/wait.h>
|
||||
#include <linux/lockdep.h>
|
||||
#include <linux/percpu-rwsem.h>
|
||||
#include <linux/rcupdate.h>
|
||||
#include <linux/sched.h>
|
||||
+#include <linux/sched/task.h>
|
||||
#include <linux/errno.h>
|
||||
|
||||
-#include "rwsem.h"
|
||||
-
|
||||
int __percpu_init_rwsem(struct percpu_rw_semaphore *sem,
|
||||
const char *name, struct lock_class_key *key)
|
||||
{
|
||||
@@ -17,11 +16,10 @@ int __percpu_init_rwsem(struct percpu_rw
|
||||
if (unlikely(!sem->read_count))
|
||||
return -ENOMEM;
|
||||
|
||||
- /* ->rw_sem represents the whole percpu_rw_semaphore for lockdep */
|
||||
rcu_sync_init(&sem->rss);
|
||||
- init_rwsem(&sem->rw_sem);
|
||||
rcuwait_init(&sem->writer);
|
||||
- sem->readers_block = 0;
|
||||
+ init_waitqueue_head(&sem->waiters);
|
||||
+ atomic_set(&sem->block, 0);
|
||||
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
||||
debug_check_no_locks_freed((void *)sem, sizeof(*sem));
|
||||
lockdep_init_map(&sem->dep_map, name, key, 0);
|
||||
@@ -54,23 +52,23 @@ static bool __percpu_down_read_trylock(s
|
||||
* the same CPU as the increment, avoiding the
|
||||
* increment-on-one-CPU-and-decrement-on-another problem.
|
||||
*
|
||||
- * If the reader misses the writer's assignment of readers_block, then
|
||||
- * the writer is guaranteed to see the reader's increment.
|
||||
+ * If the reader misses the writer's assignment of sem->block, then the
|
||||
+ * writer is guaranteed to see the reader's increment.
|
||||
*
|
||||
* Conversely, any readers that increment their sem->read_count after
|
||||
- * the writer looks are guaranteed to see the readers_block value,
|
||||
- * which in turn means that they are guaranteed to immediately
|
||||
- * decrement their sem->read_count, so that it doesn't matter that the
|
||||
- * writer missed them.
|
||||
+ * the writer looks are guaranteed to see the sem->block value, which
|
||||
+ * in turn means that they are guaranteed to immediately decrement
|
||||
+ * their sem->read_count, so that it doesn't matter that the writer
|
||||
+ * missed them.
|
||||
*/
|
||||
|
||||
smp_mb(); /* A matches D */
|
||||
|
||||
/*
|
||||
- * If !readers_block the critical section starts here, matched by the
|
||||
+ * If !sem->block the critical section starts here, matched by the
|
||||
* release in percpu_up_write().
|
||||
*/
|
||||
- if (likely(!smp_load_acquire(&sem->readers_block)))
|
||||
+ if (likely(!atomic_read_acquire(&sem->block)))
|
||||
return true;
|
||||
|
||||
__this_cpu_dec(*sem->read_count);
|
||||
@@ -81,6 +79,88 @@ static bool __percpu_down_read_trylock(s
|
||||
return false;
|
||||
}
|
||||
|
||||
+static inline bool __percpu_down_write_trylock(struct percpu_rw_semaphore *sem)
|
||||
+{
|
||||
+ if (atomic_read(&sem->block))
|
||||
+ return false;
|
||||
+
|
||||
+ return atomic_xchg(&sem->block, 1) == 0;
|
||||
+}
|
||||
+
|
||||
+static bool __percpu_rwsem_trylock(struct percpu_rw_semaphore *sem, bool reader)
|
||||
+{
|
||||
+ if (reader) {
|
||||
+ bool ret;
|
||||
+
|
||||
+ preempt_disable();
|
||||
+ ret = __percpu_down_read_trylock(sem);
|
||||
+ preempt_enable();
|
||||
+
|
||||
+ return ret;
|
||||
+ }
|
||||
+ return __percpu_down_write_trylock(sem);
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+ * The return value of wait_queue_entry::func means:
|
||||
+ *
|
||||
+ * <0 - error, wakeup is terminated and the error is returned
|
||||
+ * 0 - no wakeup, a next waiter is tried
|
||||
+ * >0 - woken, if EXCLUSIVE, counted towards @nr_exclusive.
|
||||
+ *
|
||||
+ * We use EXCLUSIVE for both readers and writers to preserve FIFO order,
|
||||
+ * and play games with the return value to allow waking multiple readers.
|
||||
+ *
|
||||
+ * Specifically, we wake readers until we've woken a single writer, or until a
|
||||
+ * trylock fails.
|
||||
+ */
|
||||
+static int percpu_rwsem_wake_function(struct wait_queue_entry *wq_entry,
|
||||
+ unsigned int mode, int wake_flags,
|
||||
+ void *key)
|
||||
+{
|
||||
+ struct task_struct *p = get_task_struct(wq_entry->private);
|
||||
+ bool reader = wq_entry->flags & WQ_FLAG_CUSTOM;
|
||||
+ struct percpu_rw_semaphore *sem = key;
|
||||
+
|
||||
+ /* concurrent against percpu_down_write(), can get stolen */
|
||||
+ if (!__percpu_rwsem_trylock(sem, reader))
|
||||
+ return 1;
|
||||
+
|
||||
+ list_del_init(&wq_entry->entry);
|
||||
+ smp_store_release(&wq_entry->private, NULL);
|
||||
+
|
||||
+ wake_up_process(p);
|
||||
+ put_task_struct(p);
|
||||
+
|
||||
+ return !reader; /* wake (readers until) 1 writer */
|
||||
+}
|
||||
+
|
||||
+static void percpu_rwsem_wait(struct percpu_rw_semaphore *sem, bool reader)
|
||||
+{
|
||||
+ DEFINE_WAIT_FUNC(wq_entry, percpu_rwsem_wake_function);
|
||||
+ bool wait;
|
||||
+
|
||||
+ spin_lock_irq(&sem->waiters.lock);
|
||||
+ /*
|
||||
+ * Serialize against the wakeup in percpu_up_write(), if we fail
|
||||
+ * the trylock, the wakeup must see us on the list.
|
||||
+ */
|
||||
+ wait = !__percpu_rwsem_trylock(sem, reader);
|
||||
+ if (wait) {
|
||||
+ wq_entry.flags |= WQ_FLAG_EXCLUSIVE | reader * WQ_FLAG_CUSTOM;
|
||||
+ __add_wait_queue_entry_tail(&sem->waiters, &wq_entry);
|
||||
+ }
|
||||
+ spin_unlock_irq(&sem->waiters.lock);
|
||||
+
|
||||
+ while (wait) {
|
||||
+ set_current_state(TASK_UNINTERRUPTIBLE);
|
||||
+ if (!smp_load_acquire(&wq_entry.private))
|
||||
+ break;
|
||||
+ schedule();
|
||||
+ }
|
||||
+ __set_current_state(TASK_RUNNING);
|
||||
+}
|
||||
+
|
||||
bool __percpu_down_read(struct percpu_rw_semaphore *sem, bool try)
|
||||
{
|
||||
if (__percpu_down_read_trylock(sem))
|
||||
@@ -89,20 +169,10 @@ bool __percpu_down_read(struct percpu_rw
|
||||
if (try)
|
||||
return false;
|
||||
|
||||
- /*
|
||||
- * We either call schedule() in the wait, or we'll fall through
|
||||
- * and reschedule on the preempt_enable() in percpu_down_read().
|
||||
- */
|
||||
- preempt_enable_no_resched();
|
||||
-
|
||||
- /*
|
||||
- * Avoid lockdep for the down/up_read() we already have them.
|
||||
- */
|
||||
- __down_read(&sem->rw_sem);
|
||||
- this_cpu_inc(*sem->read_count);
|
||||
- __up_read(&sem->rw_sem);
|
||||
-
|
||||
+ preempt_enable();
|
||||
+ percpu_rwsem_wait(sem, /* .reader = */ true);
|
||||
preempt_disable();
|
||||
+
|
||||
return true;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__percpu_down_read);
|
||||
@@ -117,7 +187,7 @@ void __percpu_up_read(struct percpu_rw_s
|
||||
*/
|
||||
__this_cpu_dec(*sem->read_count);
|
||||
|
||||
- /* Prod writer to recheck readers_active */
|
||||
+ /* Prod writer to re-evaluate readers_active_check() */
|
||||
rcuwait_wake_up(&sem->writer);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__percpu_up_read);
|
||||
@@ -137,6 +207,8 @@ EXPORT_SYMBOL_GPL(__percpu_up_read);
|
||||
* zero. If this sum is zero, then it is stable due to the fact that if any
|
||||
* newly arriving readers increment a given counter, they will immediately
|
||||
* decrement that same counter.
|
||||
+ *
|
||||
+ * Assumes sem->block is set.
|
||||
*/
|
||||
static bool readers_active_check(struct percpu_rw_semaphore *sem)
|
||||
{
|
||||
@@ -160,23 +232,22 @@ void percpu_down_write(struct percpu_rw_
|
||||
/* Notify readers to take the slow path. */
|
||||
rcu_sync_enter(&sem->rss);
|
||||
|
||||
- __down_write(&sem->rw_sem);
|
||||
-
|
||||
/*
|
||||
- * Notify new readers to block; up until now, and thus throughout the
|
||||
- * longish rcu_sync_enter() above, new readers could still come in.
|
||||
+ * Try set sem->block; this provides writer-writer exclusion.
|
||||
+ * Having sem->block set makes new readers block.
|
||||
*/
|
||||
- WRITE_ONCE(sem->readers_block, 1);
|
||||
+ if (!__percpu_down_write_trylock(sem))
|
||||
+ percpu_rwsem_wait(sem, /* .reader = */ false);
|
||||
|
||||
- smp_mb(); /* D matches A */
|
||||
+ /* smp_mb() implied by __percpu_down_write_trylock() on success -- D matches A */
|
||||
|
||||
/*
|
||||
- * If they don't see our writer of readers_block, then we are
|
||||
- * guaranteed to see their sem->read_count increment, and therefore
|
||||
- * will wait for them.
|
||||
+ * If they don't see our store of sem->block, then we are guaranteed to
|
||||
+ * see their sem->read_count increment, and therefore will wait for
|
||||
+ * them.
|
||||
*/
|
||||
|
||||
- /* Wait for all now active readers to complete. */
|
||||
+ /* Wait for all active readers to complete. */
|
||||
rcuwait_wait_event(&sem->writer, readers_active_check(sem));
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(percpu_down_write);
|
||||
@@ -195,12 +266,12 @@ void percpu_up_write(struct percpu_rw_se
|
||||
* Therefore we force it through the slow path which guarantees an
|
||||
* acquire and thereby guarantees the critical section's consistency.
|
||||
*/
|
||||
- smp_store_release(&sem->readers_block, 0);
|
||||
+ atomic_set_release(&sem->block, 0);
|
||||
|
||||
/*
|
||||
- * Release the write lock, this will allow readers back in the game.
|
||||
+ * Prod any pending reader/writer to make progress.
|
||||
*/
|
||||
- __up_write(&sem->rw_sem);
|
||||
+ __wake_up(&sem->waiters, TASK_NORMAL, 1, sem);
|
||||
|
||||
/*
|
||||
* Once this completes (at least one RCU-sched grace period hence) the
|
||||
--- a/kernel/locking/rwsem.c
|
||||
+++ b/kernel/locking/rwsem.c
|
||||
@@ -28,7 +28,6 @@
|
||||
#include <linux/rwsem.h>
|
||||
#include <linux/atomic.h>
|
||||
|
||||
-#include "rwsem.h"
|
||||
#include "lock_events.h"
|
||||
|
||||
/*
|
||||
@@ -660,8 +659,6 @@ static inline bool rwsem_can_spin_on_own
|
||||
unsigned long flags;
|
||||
bool ret = true;
|
||||
|
||||
- BUILD_BUG_ON(!(RWSEM_OWNER_UNKNOWN & RWSEM_NONSPINNABLE));
|
||||
-
|
||||
if (need_resched()) {
|
||||
lockevent_inc(rwsem_opt_fail);
|
||||
return false;
|
||||
@@ -1338,7 +1335,7 @@ static struct rw_semaphore *rwsem_downgr
|
||||
/*
|
||||
* lock for reading
|
||||
*/
|
||||
-inline void __down_read(struct rw_semaphore *sem)
|
||||
+static inline void __down_read(struct rw_semaphore *sem)
|
||||
{
|
||||
if (!rwsem_read_trylock(sem)) {
|
||||
rwsem_down_read_slowpath(sem, TASK_UNINTERRUPTIBLE);
|
||||
@@ -1383,7 +1380,7 @@ static inline int __down_read_trylock(st
|
||||
/*
|
||||
* lock for writing
|
||||
*/
|
||||
-inline void __down_write(struct rw_semaphore *sem)
|
||||
+static inline void __down_write(struct rw_semaphore *sem)
|
||||
{
|
||||
long tmp = RWSEM_UNLOCKED_VALUE;
|
||||
|
||||
@@ -1426,7 +1423,7 @@ static inline int __down_write_trylock(s
|
||||
/*
|
||||
* unlock after reading
|
||||
*/
|
||||
-inline void __up_read(struct rw_semaphore *sem)
|
||||
+static inline void __up_read(struct rw_semaphore *sem)
|
||||
{
|
||||
long tmp;
|
||||
|
||||
@@ -1446,7 +1443,7 @@ inline void __up_read(struct rw_semaphor
|
||||
/*
|
||||
* unlock after writing
|
||||
*/
|
||||
-inline void __up_write(struct rw_semaphore *sem)
|
||||
+static inline void __up_write(struct rw_semaphore *sem)
|
||||
{
|
||||
long tmp;
|
||||
|
||||
--- a/kernel/locking/rwsem.h
|
||||
+++ b/kernel/locking/rwsem.h
|
||||
@@ -1,12 +0,0 @@
|
||||
-/* SPDX-License-Identifier: GPL-2.0 */
|
||||
-
|
||||
-#ifndef __INTERNAL_RWSEM_H
|
||||
-#define __INTERNAL_RWSEM_H
|
||||
-#include <linux/rwsem.h>
|
||||
-
|
||||
-extern void __down_read(struct rw_semaphore *sem);
|
||||
-extern void __up_read(struct rw_semaphore *sem);
|
||||
-extern void __down_write(struct rw_semaphore *sem);
|
||||
-extern void __up_write(struct rw_semaphore *sem);
|
||||
-
|
||||
-#endif /* __INTERNAL_RWSEM_H */
|
@ -0,0 +1,85 @@
|
||||
From: Davidlohr Bueso <dave@stgolabs.net>
|
||||
Date: Fri, 31 Jan 2020 16:07:09 +0100
|
||||
Subject: [PATCH 6/7] locking/percpu-rwsem: Fold __percpu_up_read()
|
||||
|
||||
Now that __percpu_up_read() is only ever used from percpu_up_read()
|
||||
merge them, it's a small function.
|
||||
|
||||
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/percpu-rwsem.h | 19 +++++++++++++++----
|
||||
kernel/exit.c | 1 +
|
||||
kernel/locking/percpu-rwsem.c | 15 ---------------
|
||||
3 files changed, 16 insertions(+), 19 deletions(-)
|
||||
|
||||
--- a/include/linux/percpu-rwsem.h
|
||||
+++ b/include/linux/percpu-rwsem.h
|
||||
@@ -43,7 +43,6 @@ is_static struct percpu_rw_semaphore nam
|
||||
__DEFINE_PERCPU_RWSEM(name, static)
|
||||
|
||||
extern bool __percpu_down_read(struct percpu_rw_semaphore *, bool);
|
||||
-extern void __percpu_up_read(struct percpu_rw_semaphore *);
|
||||
|
||||
static inline void percpu_down_read(struct percpu_rw_semaphore *sem)
|
||||
{
|
||||
@@ -103,10 +102,22 @@ static inline void percpu_up_read(struct
|
||||
/*
|
||||
* Same as in percpu_down_read().
|
||||
*/
|
||||
- if (likely(rcu_sync_is_idle(&sem->rss)))
|
||||
+ if (likely(rcu_sync_is_idle(&sem->rss))) {
|
||||
__this_cpu_dec(*sem->read_count);
|
||||
- else
|
||||
- __percpu_up_read(sem); /* Unconditional memory barrier */
|
||||
+ } else {
|
||||
+ /*
|
||||
+ * slowpath; reader will only ever wake a single blocked
|
||||
+ * writer.
|
||||
+ */
|
||||
+ smp_mb(); /* B matches C */
|
||||
+ /*
|
||||
+ * In other words, if they see our decrement (presumably to
|
||||
+ * aggregate zero, as that is the only time it matters) they
|
||||
+ * will also see our critical section.
|
||||
+ */
|
||||
+ __this_cpu_dec(*sem->read_count);
|
||||
+ rcuwait_wake_up(&sem->writer);
|
||||
+ }
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
--- a/kernel/exit.c
|
||||
+++ b/kernel/exit.c
|
||||
@@ -258,6 +258,7 @@ void rcuwait_wake_up(struct rcuwait *w)
|
||||
wake_up_process(task);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
+EXPORT_SYMBOL_GPL(rcuwait_wake_up);
|
||||
|
||||
/*
|
||||
* Determine if a process group is "orphaned", according to the POSIX
|
||||
--- a/kernel/locking/percpu-rwsem.c
|
||||
+++ b/kernel/locking/percpu-rwsem.c
|
||||
@@ -177,21 +177,6 @@ bool __percpu_down_read(struct percpu_rw
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__percpu_down_read);
|
||||
|
||||
-void __percpu_up_read(struct percpu_rw_semaphore *sem)
|
||||
-{
|
||||
- smp_mb(); /* B matches C */
|
||||
- /*
|
||||
- * In other words, if they see our decrement (presumably to aggregate
|
||||
- * zero, as that is the only time it matters) they will also see our
|
||||
- * critical section.
|
||||
- */
|
||||
- __this_cpu_dec(*sem->read_count);
|
||||
-
|
||||
- /* Prod writer to re-evaluate readers_active_check() */
|
||||
- rcuwait_wake_up(&sem->writer);
|
||||
-}
|
||||
-EXPORT_SYMBOL_GPL(__percpu_up_read);
|
||||
-
|
||||
#define per_cpu_sum(var) \
|
||||
({ \
|
||||
typeof(var) __sum = 0; \
|
@ -0,0 +1,26 @@
|
||||
From: Davidlohr Bueso <dave@stgolabs.net>
|
||||
Date: Fri, 31 Jan 2020 16:07:10 +0100
|
||||
Subject: [PATCH 7/7] locking/percpu-rwsem: Add might_sleep() for writer
|
||||
locking
|
||||
|
||||
We are missing this annotation in percpu_down_write(). Correct
|
||||
this.
|
||||
|
||||
Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
|
||||
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
|
||||
Link: https://lkml.kernel.org/r/20200108013305.7732-1-dave@stgolabs.net
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
kernel/locking/percpu-rwsem.c | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
--- a/kernel/locking/percpu-rwsem.c
|
||||
+++ b/kernel/locking/percpu-rwsem.c
|
||||
@@ -212,6 +212,7 @@ static bool readers_active_check(struct
|
||||
|
||||
void percpu_down_write(struct percpu_rw_semaphore *sem)
|
||||
{
|
||||
+ might_sleep();
|
||||
rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
|
||||
|
||||
/* Notify readers to take the slow path. */
|
@ -0,0 +1,192 @@
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Fri, 15 Nov 2019 18:54:20 +0100
|
||||
Subject: [PATCH] fs/buffer: Make BH_Uptodate_Lock bit_spin_lock a regular
|
||||
spinlock_t
|
||||
|
||||
Bit spinlocks are problematic if PREEMPT_RT is enabled, because they
|
||||
disable preemption, which is undesired for latency reasons and breaks when
|
||||
regular spinlocks are taken within the bit_spinlock locked region because
|
||||
regular spinlocks are converted to 'sleeping spinlocks' on RT. So RT
|
||||
replaces the bit spinlocks with regular spinlocks to avoid this problem.
|
||||
Bit spinlocks are also not covered by lock debugging, e.g. lockdep.
|
||||
|
||||
Substitute the BH_Uptodate_Lock bit spinlock with a regular spinlock.
|
||||
|
||||
Reviewed-by: Jan Kara <jack@suse.cz>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
[bigeasy: remove the wrapper and use always spinlock_t and move it into
|
||||
the padding hole]
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
v2…v3: rename uptodate_lock to b_uptodate_lock.
|
||||
|
||||
v1…v2: Move the spinlock_t to the padding hole as per Jan Kara. pahole says
|
||||
its total size remained unchanged, before
|
||||
|
||||
| atomic_t b_count; /* 96 4 */
|
||||
|
|
||||
| /* size: 104, cachelines: 2, members: 12 */
|
||||
| /* padding: 4 */
|
||||
| /* last cacheline: 40 bytes */
|
||||
|
||||
after
|
||||
|
||||
| atomic_t b_count; /* 96 4 */
|
||||
| spinlock_t uptodate_lock; /* 100 4 */
|
||||
|
|
||||
| /* size: 104, cachelines: 2, members: 13 */
|
||||
| /* last cacheline: 40 bytes */
|
||||
|
||||
fs/buffer.c | 19 +++++++------------
|
||||
fs/ext4/page-io.c | 8 +++-----
|
||||
fs/ntfs/aops.c | 9 +++------
|
||||
include/linux/buffer_head.h | 6 +++---
|
||||
4 files changed, 16 insertions(+), 26 deletions(-)
|
||||
|
||||
--- a/fs/buffer.c
|
||||
+++ b/fs/buffer.c
|
||||
@@ -275,8 +275,7 @@ static void end_buffer_async_read(struct
|
||||
* decide that the page is now completely done.
|
||||
*/
|
||||
first = page_buffers(page);
|
||||
- local_irq_save(flags);
|
||||
- bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
|
||||
+ spin_lock_irqsave(&first->b_uptodate_lock, flags);
|
||||
clear_buffer_async_read(bh);
|
||||
unlock_buffer(bh);
|
||||
tmp = bh;
|
||||
@@ -289,8 +288,7 @@ static void end_buffer_async_read(struct
|
||||
}
|
||||
tmp = tmp->b_this_page;
|
||||
} while (tmp != bh);
|
||||
- bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
|
||||
- local_irq_restore(flags);
|
||||
+ spin_unlock_irqrestore(&first->b_uptodate_lock, flags);
|
||||
|
||||
/*
|
||||
* If none of the buffers had errors and they are all
|
||||
@@ -302,8 +300,7 @@ static void end_buffer_async_read(struct
|
||||
return;
|
||||
|
||||
still_busy:
|
||||
- bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
|
||||
- local_irq_restore(flags);
|
||||
+ spin_unlock_irqrestore(&first->b_uptodate_lock, flags);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -331,8 +328,7 @@ void end_buffer_async_write(struct buffe
|
||||
}
|
||||
|
||||
first = page_buffers(page);
|
||||
- local_irq_save(flags);
|
||||
- bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
|
||||
+ spin_lock_irqsave(&first->b_uptodate_lock, flags);
|
||||
|
||||
clear_buffer_async_write(bh);
|
||||
unlock_buffer(bh);
|
||||
@@ -344,14 +340,12 @@ void end_buffer_async_write(struct buffe
|
||||
}
|
||||
tmp = tmp->b_this_page;
|
||||
}
|
||||
- bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
|
||||
- local_irq_restore(flags);
|
||||
+ spin_unlock_irqrestore(&first->b_uptodate_lock, flags);
|
||||
end_page_writeback(page);
|
||||
return;
|
||||
|
||||
still_busy:
|
||||
- bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
|
||||
- local_irq_restore(flags);
|
||||
+ spin_unlock_irqrestore(&first->b_uptodate_lock, flags);
|
||||
return;
|
||||
}
|
||||
EXPORT_SYMBOL(end_buffer_async_write);
|
||||
@@ -3345,6 +3339,7 @@ struct buffer_head *alloc_buffer_head(gf
|
||||
struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags);
|
||||
if (ret) {
|
||||
INIT_LIST_HEAD(&ret->b_assoc_buffers);
|
||||
+ spin_lock_init(&ret->b_uptodate_lock);
|
||||
preempt_disable();
|
||||
__this_cpu_inc(bh_accounting.nr);
|
||||
recalc_bh_state();
|
||||
--- a/fs/ext4/page-io.c
|
||||
+++ b/fs/ext4/page-io.c
|
||||
@@ -87,11 +87,10 @@ static void ext4_finish_bio(struct bio *
|
||||
}
|
||||
bh = head = page_buffers(page);
|
||||
/*
|
||||
- * We check all buffers in the page under BH_Uptodate_Lock
|
||||
+ * We check all buffers in the page under b_uptodate_lock
|
||||
* to avoid races with other end io clearing async_write flags
|
||||
*/
|
||||
- local_irq_save(flags);
|
||||
- bit_spin_lock(BH_Uptodate_Lock, &head->b_state);
|
||||
+ spin_lock_irqsave(&head->b_uptodate_lock, flags);
|
||||
do {
|
||||
if (bh_offset(bh) < bio_start ||
|
||||
bh_offset(bh) + bh->b_size > bio_end) {
|
||||
@@ -103,8 +102,7 @@ static void ext4_finish_bio(struct bio *
|
||||
if (bio->bi_status)
|
||||
buffer_io_error(bh);
|
||||
} while ((bh = bh->b_this_page) != head);
|
||||
- bit_spin_unlock(BH_Uptodate_Lock, &head->b_state);
|
||||
- local_irq_restore(flags);
|
||||
+ spin_unlock_irqrestore(&head->b_uptodate_lock, flags);
|
||||
if (!under_io) {
|
||||
fscrypt_free_bounce_page(bounce_page);
|
||||
end_page_writeback(page);
|
||||
--- a/fs/ntfs/aops.c
|
||||
+++ b/fs/ntfs/aops.c
|
||||
@@ -92,8 +92,7 @@ static void ntfs_end_buffer_async_read(s
|
||||
"0x%llx.", (unsigned long long)bh->b_blocknr);
|
||||
}
|
||||
first = page_buffers(page);
|
||||
- local_irq_save(flags);
|
||||
- bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
|
||||
+ spin_lock_irqsave(&first->b_uptodate_lock, flags);
|
||||
clear_buffer_async_read(bh);
|
||||
unlock_buffer(bh);
|
||||
tmp = bh;
|
||||
@@ -108,8 +107,7 @@ static void ntfs_end_buffer_async_read(s
|
||||
}
|
||||
tmp = tmp->b_this_page;
|
||||
} while (tmp != bh);
|
||||
- bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
|
||||
- local_irq_restore(flags);
|
||||
+ spin_unlock_irqrestore(&first->b_uptodate_lock, flags);
|
||||
/*
|
||||
* If none of the buffers had errors then we can set the page uptodate,
|
||||
* but we first have to perform the post read mst fixups, if the
|
||||
@@ -142,8 +140,7 @@ static void ntfs_end_buffer_async_read(s
|
||||
unlock_page(page);
|
||||
return;
|
||||
still_busy:
|
||||
- bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
|
||||
- local_irq_restore(flags);
|
||||
+ spin_unlock_irqrestore(&first->b_uptodate_lock, flags);
|
||||
return;
|
||||
}
|
||||
|
||||
--- a/include/linux/buffer_head.h
|
||||
+++ b/include/linux/buffer_head.h
|
||||
@@ -22,9 +22,6 @@ enum bh_state_bits {
|
||||
BH_Dirty, /* Is dirty */
|
||||
BH_Lock, /* Is locked */
|
||||
BH_Req, /* Has been submitted for I/O */
|
||||
- BH_Uptodate_Lock,/* Used by the first bh in a page, to serialise
|
||||
- * IO completion of other buffers in the page
|
||||
- */
|
||||
|
||||
BH_Mapped, /* Has a disk mapping */
|
||||
BH_New, /* Disk mapping was newly created by get_block */
|
||||
@@ -76,6 +73,9 @@ struct buffer_head {
|
||||
struct address_space *b_assoc_map; /* mapping this buffer is
|
||||
associated with */
|
||||
atomic_t b_count; /* users using this buffer_head */
|
||||
+ spinlock_t b_uptodate_lock; /* Used by the first bh in a page, to
|
||||
+ * serialise IO completion of other
|
||||
+ * buffers in the page */
|
||||
};
|
||||
|
||||
/*
|
@ -0,0 +1,109 @@
|
||||
From: Clark Williams <williams@redhat.com>
|
||||
Date: Mon, 15 Jul 2019 15:25:00 -0500
|
||||
Subject: [PATCH] thermal/x86_pkg_temp: Make pkg_temp_lock a raw_spinlock_t
|
||||
|
||||
The spinlock pkg_temp_lock has the potential of being taken in atomic
|
||||
context because it can be acquired from the thermal IRQ vector.
|
||||
It's static and limited scope so go ahead and make it a raw spinlock.
|
||||
|
||||
Signed-off-by: Clark Williams <williams@redhat.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
drivers/thermal/intel/x86_pkg_temp_thermal.c | 24 ++++++++++++------------
|
||||
1 file changed, 12 insertions(+), 12 deletions(-)
|
||||
|
||||
--- a/drivers/thermal/intel/x86_pkg_temp_thermal.c
|
||||
+++ b/drivers/thermal/intel/x86_pkg_temp_thermal.c
|
||||
@@ -63,7 +63,7 @@ static int max_id __read_mostly;
|
||||
/* Array of zone pointers */
|
||||
static struct zone_device **zones;
|
||||
/* Serializes interrupt notification, work and hotplug */
|
||||
-static DEFINE_SPINLOCK(pkg_temp_lock);
|
||||
+static DEFINE_RAW_SPINLOCK(pkg_temp_lock);
|
||||
/* Protects zone operation in the work function against hotplug removal */
|
||||
static DEFINE_MUTEX(thermal_zone_mutex);
|
||||
|
||||
@@ -266,12 +266,12 @@ static void pkg_temp_thermal_threshold_w
|
||||
u64 msr_val, wr_val;
|
||||
|
||||
mutex_lock(&thermal_zone_mutex);
|
||||
- spin_lock_irq(&pkg_temp_lock);
|
||||
+ raw_spin_lock_irq(&pkg_temp_lock);
|
||||
++pkg_work_cnt;
|
||||
|
||||
zonedev = pkg_temp_thermal_get_dev(cpu);
|
||||
if (!zonedev) {
|
||||
- spin_unlock_irq(&pkg_temp_lock);
|
||||
+ raw_spin_unlock_irq(&pkg_temp_lock);
|
||||
mutex_unlock(&thermal_zone_mutex);
|
||||
return;
|
||||
}
|
||||
@@ -285,7 +285,7 @@ static void pkg_temp_thermal_threshold_w
|
||||
}
|
||||
|
||||
enable_pkg_thres_interrupt();
|
||||
- spin_unlock_irq(&pkg_temp_lock);
|
||||
+ raw_spin_unlock_irq(&pkg_temp_lock);
|
||||
|
||||
/*
|
||||
* If tzone is not NULL, then thermal_zone_mutex will prevent the
|
||||
@@ -310,7 +310,7 @@ static int pkg_thermal_notify(u64 msr_va
|
||||
struct zone_device *zonedev;
|
||||
unsigned long flags;
|
||||
|
||||
- spin_lock_irqsave(&pkg_temp_lock, flags);
|
||||
+ raw_spin_lock_irqsave(&pkg_temp_lock, flags);
|
||||
++pkg_interrupt_cnt;
|
||||
|
||||
disable_pkg_thres_interrupt();
|
||||
@@ -322,7 +322,7 @@ static int pkg_thermal_notify(u64 msr_va
|
||||
pkg_thermal_schedule_work(zonedev->cpu, &zonedev->work);
|
||||
}
|
||||
|
||||
- spin_unlock_irqrestore(&pkg_temp_lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&pkg_temp_lock, flags);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -368,9 +368,9 @@ static int pkg_temp_thermal_device_add(u
|
||||
zonedev->msr_pkg_therm_high);
|
||||
|
||||
cpumask_set_cpu(cpu, &zonedev->cpumask);
|
||||
- spin_lock_irq(&pkg_temp_lock);
|
||||
+ raw_spin_lock_irq(&pkg_temp_lock);
|
||||
zones[id] = zonedev;
|
||||
- spin_unlock_irq(&pkg_temp_lock);
|
||||
+ raw_spin_unlock_irq(&pkg_temp_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -407,7 +407,7 @@ static int pkg_thermal_cpu_offline(unsig
|
||||
}
|
||||
|
||||
/* Protect against work and interrupts */
|
||||
- spin_lock_irq(&pkg_temp_lock);
|
||||
+ raw_spin_lock_irq(&pkg_temp_lock);
|
||||
|
||||
/*
|
||||
* Check whether this cpu was the current target and store the new
|
||||
@@ -439,9 +439,9 @@ static int pkg_thermal_cpu_offline(unsig
|
||||
* To cancel the work we need to drop the lock, otherwise
|
||||
* we might deadlock if the work needs to be flushed.
|
||||
*/
|
||||
- spin_unlock_irq(&pkg_temp_lock);
|
||||
+ raw_spin_unlock_irq(&pkg_temp_lock);
|
||||
cancel_delayed_work_sync(&zonedev->work);
|
||||
- spin_lock_irq(&pkg_temp_lock);
|
||||
+ raw_spin_lock_irq(&pkg_temp_lock);
|
||||
/*
|
||||
* If this is not the last cpu in the package and the work
|
||||
* did not run after we dropped the lock above, then we
|
||||
@@ -452,7 +452,7 @@ static int pkg_thermal_cpu_offline(unsig
|
||||
pkg_thermal_schedule_work(target, &zonedev->work);
|
||||
}
|
||||
|
||||
- spin_unlock_irq(&pkg_temp_lock);
|
||||
+ raw_spin_unlock_irq(&pkg_temp_lock);
|
||||
|
||||
/* Final cleanup if this is the last cpu */
|
||||
if (lastcpu)
|
@ -0,0 +1,30 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Fri, 15 Nov 2019 18:04:07 +0100
|
||||
Subject: [PATCH] perf/core: Add SRCU annotation for pmus list walk
|
||||
|
||||
Since commit
|
||||
28875945ba98d ("rcu: Add support for consolidated-RCU reader checking")
|
||||
|
||||
there is an additional check to ensure that a RCU related lock is held
|
||||
while the RCU list is iterated.
|
||||
This section holds the SRCU reader lock instead.
|
||||
|
||||
Add annotation to list_for_each_entry_rcu() that pmus_srcu must be
|
||||
acquired during the list traversal.
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
kernel/events/core.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/kernel/events/core.c
|
||||
+++ b/kernel/events/core.c
|
||||
@@ -10264,7 +10264,7 @@ static struct pmu *perf_init_event(struc
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
- list_for_each_entry_rcu(pmu, &pmus, entry) {
|
||||
+ list_for_each_entry_rcu(pmu, &pmus, entry, lockdep_is_held(&pmus_srcu)) {
|
||||
ret = perf_try_init_event(pmu, event);
|
||||
if (!ret)
|
||||
goto unlock;
|
@ -0,0 +1,411 @@
|
||||
From: He Zhe <zhe.he@windriver.com>
|
||||
Date: Wed, 19 Dec 2018 16:30:57 +0100
|
||||
Subject: [PATCH] kmemleak: Turn kmemleak_lock and object->lock to
|
||||
raw_spinlock_t
|
||||
|
||||
kmemleak_lock as a rwlock on RT can possibly be acquired in atomic context
|
||||
which does work on RT.
|
||||
Since the kmemleak operation is performed in atomic context make it a
|
||||
raw_spinlock_t so it can also be acquired on RT. This is used for
|
||||
debugging and is not enabled by default in a production like environment
|
||||
(where performance/latency matters) so it makes sense to make it a
|
||||
raw_spinlock_t instead trying to get rid of the atomic context.
|
||||
Turn also the kmemleak_object->lock into raw_spinlock_t which is
|
||||
acquired (nested) while the kmemleak_lock is held.
|
||||
|
||||
The time spent in "echo scan > kmemleak" slightly improved on 64core box
|
||||
with this patch applied after boot.
|
||||
|
||||
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
|
||||
Link: https://lkml.kernel.org/r/20181218150744.GB20197@arrakis.emea.arm.com
|
||||
Link: https://lkml.kernel.org/r/1542877459-144382-1-git-send-email-zhe.he@windriver.com
|
||||
Link: https://lkml.kernel.org/r/20190927082230.34152-1-yongxin.liu@windriver.com
|
||||
Signed-off-by: He Zhe <zhe.he@windriver.com>
|
||||
Signed-off-by: Liu Haitao <haitao.liu@windriver.com>
|
||||
Signed-off-by: Yongxin Liu <yongxin.liu@windriver.com>
|
||||
[bigeasy: Redo the description. Merge the individual bits: He Zhe did
|
||||
the kmemleak_lock, Liu Haitao the ->lock and Yongxin Liu forwarded the
|
||||
patch.]
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
mm/kmemleak.c | 112 +++++++++++++++++++++++++++++-----------------------------
|
||||
1 file changed, 56 insertions(+), 56 deletions(-)
|
||||
|
||||
--- a/mm/kmemleak.c
|
||||
+++ b/mm/kmemleak.c
|
||||
@@ -13,7 +13,7 @@
|
||||
*
|
||||
* The following locks and mutexes are used by kmemleak:
|
||||
*
|
||||
- * - kmemleak_lock (rwlock): protects the object_list modifications and
|
||||
+ * - kmemleak_lock (raw_spinlock_t): protects the object_list modifications and
|
||||
* accesses to the object_tree_root. The object_list is the main list
|
||||
* holding the metadata (struct kmemleak_object) for the allocated memory
|
||||
* blocks. The object_tree_root is a red black tree used to look-up
|
||||
@@ -22,13 +22,13 @@
|
||||
* object_tree_root in the create_object() function called from the
|
||||
* kmemleak_alloc() callback and removed in delete_object() called from the
|
||||
* kmemleak_free() callback
|
||||
- * - kmemleak_object.lock (spinlock): protects a kmemleak_object. Accesses to
|
||||
- * the metadata (e.g. count) are protected by this lock. Note that some
|
||||
- * members of this structure may be protected by other means (atomic or
|
||||
- * kmemleak_lock). This lock is also held when scanning the corresponding
|
||||
- * memory block to avoid the kernel freeing it via the kmemleak_free()
|
||||
- * callback. This is less heavyweight than holding a global lock like
|
||||
- * kmemleak_lock during scanning
|
||||
+ * - kmemleak_object.lock (raw_spinlock_t): protects a kmemleak_object.
|
||||
+ * Accesses to the metadata (e.g. count) are protected by this lock. Note
|
||||
+ * that some members of this structure may be protected by other means
|
||||
+ * (atomic or kmemleak_lock). This lock is also held when scanning the
|
||||
+ * corresponding memory block to avoid the kernel freeing it via the
|
||||
+ * kmemleak_free() callback. This is less heavyweight than holding a global
|
||||
+ * lock like kmemleak_lock during scanning.
|
||||
* - scan_mutex (mutex): ensures that only one thread may scan the memory for
|
||||
* unreferenced objects at a time. The gray_list contains the objects which
|
||||
* are already referenced or marked as false positives and need to be
|
||||
@@ -135,7 +135,7 @@ struct kmemleak_scan_area {
|
||||
* (use_count) and freed using the RCU mechanism.
|
||||
*/
|
||||
struct kmemleak_object {
|
||||
- spinlock_t lock;
|
||||
+ raw_spinlock_t lock;
|
||||
unsigned int flags; /* object status flags */
|
||||
struct list_head object_list;
|
||||
struct list_head gray_list;
|
||||
@@ -191,8 +191,8 @@ static int mem_pool_free_count = ARRAY_S
|
||||
static LIST_HEAD(mem_pool_free_list);
|
||||
/* search tree for object boundaries */
|
||||
static struct rb_root object_tree_root = RB_ROOT;
|
||||
-/* rw_lock protecting the access to object_list and object_tree_root */
|
||||
-static DEFINE_RWLOCK(kmemleak_lock);
|
||||
+/* protecting the access to object_list and object_tree_root */
|
||||
+static DEFINE_RAW_SPINLOCK(kmemleak_lock);
|
||||
|
||||
/* allocation caches for kmemleak internal data */
|
||||
static struct kmem_cache *object_cache;
|
||||
@@ -426,7 +426,7 @@ static struct kmemleak_object *mem_pool_
|
||||
}
|
||||
|
||||
/* slab allocation failed, try the memory pool */
|
||||
- write_lock_irqsave(&kmemleak_lock, flags);
|
||||
+ raw_spin_lock_irqsave(&kmemleak_lock, flags);
|
||||
object = list_first_entry_or_null(&mem_pool_free_list,
|
||||
typeof(*object), object_list);
|
||||
if (object)
|
||||
@@ -435,7 +435,7 @@ static struct kmemleak_object *mem_pool_
|
||||
object = &mem_pool[--mem_pool_free_count];
|
||||
else
|
||||
pr_warn_once("Memory pool empty, consider increasing CONFIG_DEBUG_KMEMLEAK_MEM_POOL_SIZE\n");
|
||||
- write_unlock_irqrestore(&kmemleak_lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&kmemleak_lock, flags);
|
||||
|
||||
return object;
|
||||
}
|
||||
@@ -453,9 +453,9 @@ static void mem_pool_free(struct kmemlea
|
||||
}
|
||||
|
||||
/* add the object to the memory pool free list */
|
||||
- write_lock_irqsave(&kmemleak_lock, flags);
|
||||
+ raw_spin_lock_irqsave(&kmemleak_lock, flags);
|
||||
list_add(&object->object_list, &mem_pool_free_list);
|
||||
- write_unlock_irqrestore(&kmemleak_lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&kmemleak_lock, flags);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -514,9 +514,9 @@ static struct kmemleak_object *find_and_
|
||||
struct kmemleak_object *object;
|
||||
|
||||
rcu_read_lock();
|
||||
- read_lock_irqsave(&kmemleak_lock, flags);
|
||||
+ raw_spin_lock_irqsave(&kmemleak_lock, flags);
|
||||
object = lookup_object(ptr, alias);
|
||||
- read_unlock_irqrestore(&kmemleak_lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&kmemleak_lock, flags);
|
||||
|
||||
/* check whether the object is still available */
|
||||
if (object && !get_object(object))
|
||||
@@ -546,11 +546,11 @@ static struct kmemleak_object *find_and_
|
||||
unsigned long flags;
|
||||
struct kmemleak_object *object;
|
||||
|
||||
- write_lock_irqsave(&kmemleak_lock, flags);
|
||||
+ raw_spin_lock_irqsave(&kmemleak_lock, flags);
|
||||
object = lookup_object(ptr, alias);
|
||||
if (object)
|
||||
__remove_object(object);
|
||||
- write_unlock_irqrestore(&kmemleak_lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&kmemleak_lock, flags);
|
||||
|
||||
return object;
|
||||
}
|
||||
@@ -585,7 +585,7 @@ static struct kmemleak_object *create_ob
|
||||
INIT_LIST_HEAD(&object->object_list);
|
||||
INIT_LIST_HEAD(&object->gray_list);
|
||||
INIT_HLIST_HEAD(&object->area_list);
|
||||
- spin_lock_init(&object->lock);
|
||||
+ raw_spin_lock_init(&object->lock);
|
||||
atomic_set(&object->use_count, 1);
|
||||
object->flags = OBJECT_ALLOCATED;
|
||||
object->pointer = ptr;
|
||||
@@ -617,7 +617,7 @@ static struct kmemleak_object *create_ob
|
||||
/* kernel backtrace */
|
||||
object->trace_len = __save_stack_trace(object->trace);
|
||||
|
||||
- write_lock_irqsave(&kmemleak_lock, flags);
|
||||
+ raw_spin_lock_irqsave(&kmemleak_lock, flags);
|
||||
|
||||
untagged_ptr = (unsigned long)kasan_reset_tag((void *)ptr);
|
||||
min_addr = min(min_addr, untagged_ptr);
|
||||
@@ -649,7 +649,7 @@ static struct kmemleak_object *create_ob
|
||||
|
||||
list_add_tail_rcu(&object->object_list, &object_list);
|
||||
out:
|
||||
- write_unlock_irqrestore(&kmemleak_lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&kmemleak_lock, flags);
|
||||
return object;
|
||||
}
|
||||
|
||||
@@ -667,9 +667,9 @@ static void __delete_object(struct kmeml
|
||||
* Locking here also ensures that the corresponding memory block
|
||||
* cannot be freed when it is being scanned.
|
||||
*/
|
||||
- spin_lock_irqsave(&object->lock, flags);
|
||||
+ raw_spin_lock_irqsave(&object->lock, flags);
|
||||
object->flags &= ~OBJECT_ALLOCATED;
|
||||
- spin_unlock_irqrestore(&object->lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&object->lock, flags);
|
||||
put_object(object);
|
||||
}
|
||||
|
||||
@@ -739,9 +739,9 @@ static void paint_it(struct kmemleak_obj
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
- spin_lock_irqsave(&object->lock, flags);
|
||||
+ raw_spin_lock_irqsave(&object->lock, flags);
|
||||
__paint_it(object, color);
|
||||
- spin_unlock_irqrestore(&object->lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&object->lock, flags);
|
||||
}
|
||||
|
||||
static void paint_ptr(unsigned long ptr, int color)
|
||||
@@ -798,7 +798,7 @@ static void add_scan_area(unsigned long
|
||||
if (scan_area_cache)
|
||||
area = kmem_cache_alloc(scan_area_cache, gfp_kmemleak_mask(gfp));
|
||||
|
||||
- spin_lock_irqsave(&object->lock, flags);
|
||||
+ raw_spin_lock_irqsave(&object->lock, flags);
|
||||
if (!area) {
|
||||
pr_warn_once("Cannot allocate a scan area, scanning the full object\n");
|
||||
/* mark the object for full scan to avoid false positives */
|
||||
@@ -820,7 +820,7 @@ static void add_scan_area(unsigned long
|
||||
|
||||
hlist_add_head(&area->node, &object->area_list);
|
||||
out_unlock:
|
||||
- spin_unlock_irqrestore(&object->lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&object->lock, flags);
|
||||
put_object(object);
|
||||
}
|
||||
|
||||
@@ -842,9 +842,9 @@ static void object_set_excess_ref(unsign
|
||||
return;
|
||||
}
|
||||
|
||||
- spin_lock_irqsave(&object->lock, flags);
|
||||
+ raw_spin_lock_irqsave(&object->lock, flags);
|
||||
object->excess_ref = excess_ref;
|
||||
- spin_unlock_irqrestore(&object->lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&object->lock, flags);
|
||||
put_object(object);
|
||||
}
|
||||
|
||||
@@ -864,9 +864,9 @@ static void object_no_scan(unsigned long
|
||||
return;
|
||||
}
|
||||
|
||||
- spin_lock_irqsave(&object->lock, flags);
|
||||
+ raw_spin_lock_irqsave(&object->lock, flags);
|
||||
object->flags |= OBJECT_NO_SCAN;
|
||||
- spin_unlock_irqrestore(&object->lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&object->lock, flags);
|
||||
put_object(object);
|
||||
}
|
||||
|
||||
@@ -1026,9 +1026,9 @@ void __ref kmemleak_update_trace(const v
|
||||
return;
|
||||
}
|
||||
|
||||
- spin_lock_irqsave(&object->lock, flags);
|
||||
+ raw_spin_lock_irqsave(&object->lock, flags);
|
||||
object->trace_len = __save_stack_trace(object->trace);
|
||||
- spin_unlock_irqrestore(&object->lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&object->lock, flags);
|
||||
|
||||
put_object(object);
|
||||
}
|
||||
@@ -1233,7 +1233,7 @@ static void scan_block(void *_start, voi
|
||||
unsigned long flags;
|
||||
unsigned long untagged_ptr;
|
||||
|
||||
- read_lock_irqsave(&kmemleak_lock, flags);
|
||||
+ raw_spin_lock_irqsave(&kmemleak_lock, flags);
|
||||
for (ptr = start; ptr < end; ptr++) {
|
||||
struct kmemleak_object *object;
|
||||
unsigned long pointer;
|
||||
@@ -1268,7 +1268,7 @@ static void scan_block(void *_start, voi
|
||||
* previously acquired in scan_object(). These locks are
|
||||
* enclosed by scan_mutex.
|
||||
*/
|
||||
- spin_lock_nested(&object->lock, SINGLE_DEPTH_NESTING);
|
||||
+ raw_spin_lock_nested(&object->lock, SINGLE_DEPTH_NESTING);
|
||||
/* only pass surplus references (object already gray) */
|
||||
if (color_gray(object)) {
|
||||
excess_ref = object->excess_ref;
|
||||
@@ -1277,7 +1277,7 @@ static void scan_block(void *_start, voi
|
||||
excess_ref = 0;
|
||||
update_refs(object);
|
||||
}
|
||||
- spin_unlock(&object->lock);
|
||||
+ raw_spin_unlock(&object->lock);
|
||||
|
||||
if (excess_ref) {
|
||||
object = lookup_object(excess_ref, 0);
|
||||
@@ -1286,12 +1286,12 @@ static void scan_block(void *_start, voi
|
||||
if (object == scanned)
|
||||
/* circular reference, ignore */
|
||||
continue;
|
||||
- spin_lock_nested(&object->lock, SINGLE_DEPTH_NESTING);
|
||||
+ raw_spin_lock_nested(&object->lock, SINGLE_DEPTH_NESTING);
|
||||
update_refs(object);
|
||||
- spin_unlock(&object->lock);
|
||||
+ raw_spin_unlock(&object->lock);
|
||||
}
|
||||
}
|
||||
- read_unlock_irqrestore(&kmemleak_lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&kmemleak_lock, flags);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1324,7 +1324,7 @@ static void scan_object(struct kmemleak_
|
||||
* Once the object->lock is acquired, the corresponding memory block
|
||||
* cannot be freed (the same lock is acquired in delete_object).
|
||||
*/
|
||||
- spin_lock_irqsave(&object->lock, flags);
|
||||
+ raw_spin_lock_irqsave(&object->lock, flags);
|
||||
if (object->flags & OBJECT_NO_SCAN)
|
||||
goto out;
|
||||
if (!(object->flags & OBJECT_ALLOCATED))
|
||||
@@ -1344,9 +1344,9 @@ static void scan_object(struct kmemleak_
|
||||
if (start >= end)
|
||||
break;
|
||||
|
||||
- spin_unlock_irqrestore(&object->lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&object->lock, flags);
|
||||
cond_resched();
|
||||
- spin_lock_irqsave(&object->lock, flags);
|
||||
+ raw_spin_lock_irqsave(&object->lock, flags);
|
||||
} while (object->flags & OBJECT_ALLOCATED);
|
||||
} else
|
||||
hlist_for_each_entry(area, &object->area_list, node)
|
||||
@@ -1354,7 +1354,7 @@ static void scan_object(struct kmemleak_
|
||||
(void *)(area->start + area->size),
|
||||
object);
|
||||
out:
|
||||
- spin_unlock_irqrestore(&object->lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&object->lock, flags);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1407,7 +1407,7 @@ static void kmemleak_scan(void)
|
||||
/* prepare the kmemleak_object's */
|
||||
rcu_read_lock();
|
||||
list_for_each_entry_rcu(object, &object_list, object_list) {
|
||||
- spin_lock_irqsave(&object->lock, flags);
|
||||
+ raw_spin_lock_irqsave(&object->lock, flags);
|
||||
#ifdef DEBUG
|
||||
/*
|
||||
* With a few exceptions there should be a maximum of
|
||||
@@ -1424,7 +1424,7 @@ static void kmemleak_scan(void)
|
||||
if (color_gray(object) && get_object(object))
|
||||
list_add_tail(&object->gray_list, &gray_list);
|
||||
|
||||
- spin_unlock_irqrestore(&object->lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&object->lock, flags);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
@@ -1492,14 +1492,14 @@ static void kmemleak_scan(void)
|
||||
*/
|
||||
rcu_read_lock();
|
||||
list_for_each_entry_rcu(object, &object_list, object_list) {
|
||||
- spin_lock_irqsave(&object->lock, flags);
|
||||
+ raw_spin_lock_irqsave(&object->lock, flags);
|
||||
if (color_white(object) && (object->flags & OBJECT_ALLOCATED)
|
||||
&& update_checksum(object) && get_object(object)) {
|
||||
/* color it gray temporarily */
|
||||
object->count = object->min_count;
|
||||
list_add_tail(&object->gray_list, &gray_list);
|
||||
}
|
||||
- spin_unlock_irqrestore(&object->lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&object->lock, flags);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
@@ -1519,7 +1519,7 @@ static void kmemleak_scan(void)
|
||||
*/
|
||||
rcu_read_lock();
|
||||
list_for_each_entry_rcu(object, &object_list, object_list) {
|
||||
- spin_lock_irqsave(&object->lock, flags);
|
||||
+ raw_spin_lock_irqsave(&object->lock, flags);
|
||||
if (unreferenced_object(object) &&
|
||||
!(object->flags & OBJECT_REPORTED)) {
|
||||
object->flags |= OBJECT_REPORTED;
|
||||
@@ -1529,7 +1529,7 @@ static void kmemleak_scan(void)
|
||||
|
||||
new_leaks++;
|
||||
}
|
||||
- spin_unlock_irqrestore(&object->lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&object->lock, flags);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
@@ -1681,10 +1681,10 @@ static int kmemleak_seq_show(struct seq_
|
||||
struct kmemleak_object *object = v;
|
||||
unsigned long flags;
|
||||
|
||||
- spin_lock_irqsave(&object->lock, flags);
|
||||
+ raw_spin_lock_irqsave(&object->lock, flags);
|
||||
if ((object->flags & OBJECT_REPORTED) && unreferenced_object(object))
|
||||
print_unreferenced(seq, object);
|
||||
- spin_unlock_irqrestore(&object->lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&object->lock, flags);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -1714,9 +1714,9 @@ static int dump_str_object_info(const ch
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
- spin_lock_irqsave(&object->lock, flags);
|
||||
+ raw_spin_lock_irqsave(&object->lock, flags);
|
||||
dump_object_info(object);
|
||||
- spin_unlock_irqrestore(&object->lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&object->lock, flags);
|
||||
|
||||
put_object(object);
|
||||
return 0;
|
||||
@@ -1735,11 +1735,11 @@ static void kmemleak_clear(void)
|
||||
|
||||
rcu_read_lock();
|
||||
list_for_each_entry_rcu(object, &object_list, object_list) {
|
||||
- spin_lock_irqsave(&object->lock, flags);
|
||||
+ raw_spin_lock_irqsave(&object->lock, flags);
|
||||
if ((object->flags & OBJECT_REPORTED) &&
|
||||
unreferenced_object(object))
|
||||
__paint_it(object, KMEMLEAK_GREY);
|
||||
- spin_unlock_irqrestore(&object->lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&object->lock, flags);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
@ -0,0 +1,99 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Thu, 16 Jan 2020 12:00:31 +0100
|
||||
Subject: [PATCH] smp: Use smp_cond_func_t as type for the conditional
|
||||
function
|
||||
|
||||
Use a typdef for the conditional function instead defining it each time in
|
||||
the function prototype.
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/smp.h | 14 +++++++-------
|
||||
kernel/smp.c | 11 +++++------
|
||||
kernel/up.c | 11 +++++------
|
||||
3 files changed, 17 insertions(+), 19 deletions(-)
|
||||
|
||||
--- a/include/linux/smp.h
|
||||
+++ b/include/linux/smp.h
|
||||
@@ -15,6 +15,7 @@
|
||||
#include <linux/llist.h>
|
||||
|
||||
typedef void (*smp_call_func_t)(void *info);
|
||||
+typedef bool (*smp_cond_func_t)(int cpu, void *info);
|
||||
struct __call_single_data {
|
||||
struct llist_node llist;
|
||||
smp_call_func_t func;
|
||||
@@ -49,13 +50,12 @@ void on_each_cpu_mask(const struct cpuma
|
||||
* cond_func returns a positive value. This may include the local
|
||||
* processor.
|
||||
*/
|
||||
-void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info),
|
||||
- smp_call_func_t func, void *info, bool wait,
|
||||
- gfp_t gfp_flags);
|
||||
-
|
||||
-void on_each_cpu_cond_mask(bool (*cond_func)(int cpu, void *info),
|
||||
- smp_call_func_t func, void *info, bool wait,
|
||||
- gfp_t gfp_flags, const struct cpumask *mask);
|
||||
+void on_each_cpu_cond(smp_cond_func_t cond_func, smp_call_func_t func,
|
||||
+ void *info, bool wait, gfp_t gfp_flags);
|
||||
+
|
||||
+void on_each_cpu_cond_mask(smp_cond_func_t cond_func, smp_call_func_t func,
|
||||
+ void *info, bool wait, gfp_t gfp_flags,
|
||||
+ const struct cpumask *mask);
|
||||
|
||||
int smp_call_function_single_async(int cpu, call_single_data_t *csd);
|
||||
|
||||
--- a/kernel/smp.c
|
||||
+++ b/kernel/smp.c
|
||||
@@ -680,9 +680,9 @@ EXPORT_SYMBOL(on_each_cpu_mask);
|
||||
* You must not call this function with disabled interrupts or
|
||||
* from a hardware interrupt handler or from a bottom half handler.
|
||||
*/
|
||||
-void on_each_cpu_cond_mask(bool (*cond_func)(int cpu, void *info),
|
||||
- smp_call_func_t func, void *info, bool wait,
|
||||
- gfp_t gfp_flags, const struct cpumask *mask)
|
||||
+void on_each_cpu_cond_mask(smp_cond_func_t cond_func, smp_call_func_t func,
|
||||
+ void *info, bool wait, gfp_t gfp_flags,
|
||||
+ const struct cpumask *mask)
|
||||
{
|
||||
cpumask_var_t cpus;
|
||||
int cpu, ret;
|
||||
@@ -714,9 +714,8 @@ void on_each_cpu_cond_mask(bool (*cond_f
|
||||
}
|
||||
EXPORT_SYMBOL(on_each_cpu_cond_mask);
|
||||
|
||||
-void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info),
|
||||
- smp_call_func_t func, void *info, bool wait,
|
||||
- gfp_t gfp_flags)
|
||||
+void on_each_cpu_cond(smp_cond_func_t cond_func, smp_call_func_t func,
|
||||
+ void *info, bool wait, gfp_t gfp_flags)
|
||||
{
|
||||
on_each_cpu_cond_mask(cond_func, func, info, wait, gfp_flags,
|
||||
cpu_online_mask);
|
||||
--- a/kernel/up.c
|
||||
+++ b/kernel/up.c
|
||||
@@ -68,9 +68,9 @@ EXPORT_SYMBOL(on_each_cpu_mask);
|
||||
* Preemption is disabled here to make sure the cond_func is called under the
|
||||
* same condtions in UP and SMP.
|
||||
*/
|
||||
-void on_each_cpu_cond_mask(bool (*cond_func)(int cpu, void *info),
|
||||
- smp_call_func_t func, void *info, bool wait,
|
||||
- gfp_t gfp_flags, const struct cpumask *mask)
|
||||
+void on_each_cpu_cond_mask(smp_cond_func_t cond_func, smp_call_func_t func,
|
||||
+ void *info, bool wait, gfp_t gfp_flags,
|
||||
+ const struct cpumask *mask)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
@@ -84,9 +84,8 @@ void on_each_cpu_cond_mask(bool (*cond_f
|
||||
}
|
||||
EXPORT_SYMBOL(on_each_cpu_cond_mask);
|
||||
|
||||
-void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info),
|
||||
- smp_call_func_t func, void *info, bool wait,
|
||||
- gfp_t gfp_flags)
|
||||
+void on_each_cpu_cond(smp_cond_func_t cond_func, smp_call_func_t func,
|
||||
+ void *info, bool wait, gfp_t gfp_flags)
|
||||
{
|
||||
on_each_cpu_cond_mask(cond_func, func, info, wait, gfp_flags, NULL);
|
||||
}
|
@ -0,0 +1,139 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Thu, 16 Jan 2020 12:14:38 +0100
|
||||
Subject: [PATCH] smp: Add a smp_cond_func_t argument to
|
||||
smp_call_function_many()
|
||||
|
||||
on_each_cpu_cond_mask() allocates a new CPU mask. The newly allocated
|
||||
mask is a subset of the provided mask based on the conditional function.
|
||||
This memory allocation could be avoided by extending
|
||||
smp_call_function_many() with the conditional function and performing the
|
||||
remote function call based on the mask and the conditional function.
|
||||
|
||||
Rename smp_call_function_many() to smp_call_function_many_cond() and add
|
||||
the smp_cond_func_t argument. If smp_cond_func_t is provided then it is
|
||||
used before invoking the function.
|
||||
Provide smp_call_function_many() with cond_func set to NULL.
|
||||
Let on_each_cpu_cond_mask() use smp_call_function_many_cond().
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
kernel/smp.c | 77 +++++++++++++++++++++++++++--------------------------------
|
||||
1 file changed, 36 insertions(+), 41 deletions(-)
|
||||
|
||||
--- a/kernel/smp.c
|
||||
+++ b/kernel/smp.c
|
||||
@@ -395,22 +395,9 @@ int smp_call_function_any(const struct c
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(smp_call_function_any);
|
||||
|
||||
-/**
|
||||
- * smp_call_function_many(): Run a function on a set of other CPUs.
|
||||
- * @mask: The set of cpus to run on (only runs on online subset).
|
||||
- * @func: The function to run. This must be fast and non-blocking.
|
||||
- * @info: An arbitrary pointer to pass to the function.
|
||||
- * @wait: If true, wait (atomically) until function has completed
|
||||
- * on other CPUs.
|
||||
- *
|
||||
- * If @wait is true, then returns once @func has returned.
|
||||
- *
|
||||
- * You must not call this function with disabled interrupts or from a
|
||||
- * hardware interrupt handler or from a bottom half handler. Preemption
|
||||
- * must be disabled when calling this function.
|
||||
- */
|
||||
-void smp_call_function_many(const struct cpumask *mask,
|
||||
- smp_call_func_t func, void *info, bool wait)
|
||||
+static void smp_call_function_many_cond(const struct cpumask *mask,
|
||||
+ smp_call_func_t func, void *info,
|
||||
+ bool wait, smp_cond_func_t cond_func)
|
||||
{
|
||||
struct call_function_data *cfd;
|
||||
int cpu, next_cpu, this_cpu = smp_processor_id();
|
||||
@@ -448,7 +435,8 @@ void smp_call_function_many(const struct
|
||||
|
||||
/* Fastpath: do that cpu by itself. */
|
||||
if (next_cpu >= nr_cpu_ids) {
|
||||
- smp_call_function_single(cpu, func, info, wait);
|
||||
+ if (!cond_func || cond_func(cpu, info))
|
||||
+ smp_call_function_single(cpu, func, info, wait);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -465,6 +453,9 @@ void smp_call_function_many(const struct
|
||||
for_each_cpu(cpu, cfd->cpumask) {
|
||||
call_single_data_t *csd = per_cpu_ptr(cfd->csd, cpu);
|
||||
|
||||
+ if (cond_func && !cond_func(cpu, info))
|
||||
+ continue;
|
||||
+
|
||||
csd_lock(csd);
|
||||
if (wait)
|
||||
csd->flags |= CSD_FLAG_SYNCHRONOUS;
|
||||
@@ -486,6 +477,26 @@ void smp_call_function_many(const struct
|
||||
}
|
||||
}
|
||||
}
|
||||
+
|
||||
+/**
|
||||
+ * smp_call_function_many(): Run a function on a set of other CPUs.
|
||||
+ * @mask: The set of cpus to run on (only runs on online subset).
|
||||
+ * @func: The function to run. This must be fast and non-blocking.
|
||||
+ * @info: An arbitrary pointer to pass to the function.
|
||||
+ * @wait: If true, wait (atomically) until function has completed
|
||||
+ * on other CPUs.
|
||||
+ *
|
||||
+ * If @wait is true, then returns once @func has returned.
|
||||
+ *
|
||||
+ * You must not call this function with disabled interrupts or from a
|
||||
+ * hardware interrupt handler or from a bottom half handler. Preemption
|
||||
+ * must be disabled when calling this function.
|
||||
+ */
|
||||
+void smp_call_function_many(const struct cpumask *mask,
|
||||
+ smp_call_func_t func, void *info, bool wait)
|
||||
+{
|
||||
+ smp_call_function_many_cond(mask, func, info, wait, NULL);
|
||||
+}
|
||||
EXPORT_SYMBOL(smp_call_function_many);
|
||||
|
||||
/**
|
||||
@@ -684,33 +695,17 @@ void on_each_cpu_cond_mask(smp_cond_func
|
||||
void *info, bool wait, gfp_t gfp_flags,
|
||||
const struct cpumask *mask)
|
||||
{
|
||||
- cpumask_var_t cpus;
|
||||
- int cpu, ret;
|
||||
+ int cpu = get_cpu();
|
||||
|
||||
- might_sleep_if(gfpflags_allow_blocking(gfp_flags));
|
||||
+ smp_call_function_many_cond(mask, func, info, wait, cond_func);
|
||||
+ if (cpumask_test_cpu(cpu, mask) && cond_func(cpu, info)) {
|
||||
+ unsigned long flags;
|
||||
|
||||
- if (likely(zalloc_cpumask_var(&cpus, (gfp_flags|__GFP_NOWARN)))) {
|
||||
- preempt_disable();
|
||||
- for_each_cpu(cpu, mask)
|
||||
- if (cond_func(cpu, info))
|
||||
- __cpumask_set_cpu(cpu, cpus);
|
||||
- on_each_cpu_mask(cpus, func, info, wait);
|
||||
- preempt_enable();
|
||||
- free_cpumask_var(cpus);
|
||||
- } else {
|
||||
- /*
|
||||
- * No free cpumask, bother. No matter, we'll
|
||||
- * just have to IPI them one by one.
|
||||
- */
|
||||
- preempt_disable();
|
||||
- for_each_cpu(cpu, mask)
|
||||
- if (cond_func(cpu, info)) {
|
||||
- ret = smp_call_function_single(cpu, func,
|
||||
- info, wait);
|
||||
- WARN_ON_ONCE(ret);
|
||||
- }
|
||||
- preempt_enable();
|
||||
+ local_irq_save(flags);
|
||||
+ func(info);
|
||||
+ local_irq_restore(flags);
|
||||
}
|
||||
+ put_cpu();
|
||||
}
|
||||
EXPORT_SYMBOL(on_each_cpu_cond_mask);
|
||||
|
@ -0,0 +1,127 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Thu, 16 Jan 2020 13:13:41 +0100
|
||||
Subject: [PATCH] smp: Remove allocation mask from on_each_cpu_cond.*()
|
||||
|
||||
The allocation mask is no longer used by on_each_cpu_cond() and
|
||||
on_each_cpu_cond_mask() and ca be removed.
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
arch/x86/mm/tlb.c | 2 +-
|
||||
fs/buffer.c | 2 +-
|
||||
include/linux/smp.h | 5 ++---
|
||||
kernel/smp.c | 13 +++----------
|
||||
kernel/up.c | 7 +++----
|
||||
mm/slub.c | 2 +-
|
||||
6 files changed, 11 insertions(+), 20 deletions(-)
|
||||
|
||||
--- a/arch/x86/mm/tlb.c
|
||||
+++ b/arch/x86/mm/tlb.c
|
||||
@@ -708,7 +708,7 @@ void native_flush_tlb_others(const struc
|
||||
(void *)info, 1);
|
||||
else
|
||||
on_each_cpu_cond_mask(tlb_is_not_lazy, flush_tlb_func_remote,
|
||||
- (void *)info, 1, GFP_ATOMIC, cpumask);
|
||||
+ (void *)info, 1, cpumask);
|
||||
}
|
||||
|
||||
/*
|
||||
--- a/fs/buffer.c
|
||||
+++ b/fs/buffer.c
|
||||
@@ -1387,7 +1387,7 @@ static bool has_bh_in_lru(int cpu, void
|
||||
|
||||
void invalidate_bh_lrus(void)
|
||||
{
|
||||
- on_each_cpu_cond(has_bh_in_lru, invalidate_bh_lru, NULL, 1, GFP_KERNEL);
|
||||
+ on_each_cpu_cond(has_bh_in_lru, invalidate_bh_lru, NULL, 1);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(invalidate_bh_lrus);
|
||||
|
||||
--- a/include/linux/smp.h
|
||||
+++ b/include/linux/smp.h
|
||||
@@ -51,11 +51,10 @@ void on_each_cpu_mask(const struct cpuma
|
||||
* processor.
|
||||
*/
|
||||
void on_each_cpu_cond(smp_cond_func_t cond_func, smp_call_func_t func,
|
||||
- void *info, bool wait, gfp_t gfp_flags);
|
||||
+ void *info, bool wait);
|
||||
|
||||
void on_each_cpu_cond_mask(smp_cond_func_t cond_func, smp_call_func_t func,
|
||||
- void *info, bool wait, gfp_t gfp_flags,
|
||||
- const struct cpumask *mask);
|
||||
+ void *info, bool wait, const struct cpumask *mask);
|
||||
|
||||
int smp_call_function_single_async(int cpu, call_single_data_t *csd);
|
||||
|
||||
--- a/kernel/smp.c
|
||||
+++ b/kernel/smp.c
|
||||
@@ -679,11 +679,6 @@ EXPORT_SYMBOL(on_each_cpu_mask);
|
||||
* @info: An arbitrary pointer to pass to both functions.
|
||||
* @wait: If true, wait (atomically) until function has
|
||||
* completed on other CPUs.
|
||||
- * @gfp_flags: GFP flags to use when allocating the cpumask
|
||||
- * used internally by the function.
|
||||
- *
|
||||
- * The function might sleep if the GFP flags indicates a non
|
||||
- * atomic allocation is allowed.
|
||||
*
|
||||
* Preemption is disabled to protect against CPUs going offline but not online.
|
||||
* CPUs going online during the call will not be seen or sent an IPI.
|
||||
@@ -692,8 +687,7 @@ EXPORT_SYMBOL(on_each_cpu_mask);
|
||||
* from a hardware interrupt handler or from a bottom half handler.
|
||||
*/
|
||||
void on_each_cpu_cond_mask(smp_cond_func_t cond_func, smp_call_func_t func,
|
||||
- void *info, bool wait, gfp_t gfp_flags,
|
||||
- const struct cpumask *mask)
|
||||
+ void *info, bool wait, const struct cpumask *mask)
|
||||
{
|
||||
int cpu = get_cpu();
|
||||
|
||||
@@ -710,10 +704,9 @@ void on_each_cpu_cond_mask(smp_cond_func
|
||||
EXPORT_SYMBOL(on_each_cpu_cond_mask);
|
||||
|
||||
void on_each_cpu_cond(smp_cond_func_t cond_func, smp_call_func_t func,
|
||||
- void *info, bool wait, gfp_t gfp_flags)
|
||||
+ void *info, bool wait)
|
||||
{
|
||||
- on_each_cpu_cond_mask(cond_func, func, info, wait, gfp_flags,
|
||||
- cpu_online_mask);
|
||||
+ on_each_cpu_cond_mask(cond_func, func, info, wait, cpu_online_mask);
|
||||
}
|
||||
EXPORT_SYMBOL(on_each_cpu_cond);
|
||||
|
||||
--- a/kernel/up.c
|
||||
+++ b/kernel/up.c
|
||||
@@ -69,8 +69,7 @@ EXPORT_SYMBOL(on_each_cpu_mask);
|
||||
* same condtions in UP and SMP.
|
||||
*/
|
||||
void on_each_cpu_cond_mask(smp_cond_func_t cond_func, smp_call_func_t func,
|
||||
- void *info, bool wait, gfp_t gfp_flags,
|
||||
- const struct cpumask *mask)
|
||||
+ void *info, bool wait, const struct cpumask *mask)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
@@ -85,9 +84,9 @@ void on_each_cpu_cond_mask(smp_cond_func
|
||||
EXPORT_SYMBOL(on_each_cpu_cond_mask);
|
||||
|
||||
void on_each_cpu_cond(smp_cond_func_t cond_func, smp_call_func_t func,
|
||||
- void *info, bool wait, gfp_t gfp_flags)
|
||||
+ void *info, bool wait)
|
||||
{
|
||||
- on_each_cpu_cond_mask(cond_func, func, info, wait, gfp_flags, NULL);
|
||||
+ on_each_cpu_cond_mask(cond_func, func, info, wait, NULL);
|
||||
}
|
||||
EXPORT_SYMBOL(on_each_cpu_cond);
|
||||
|
||||
--- a/mm/slub.c
|
||||
+++ b/mm/slub.c
|
||||
@@ -2338,7 +2338,7 @@ static bool has_cpu_slab(int cpu, void *
|
||||
|
||||
static void flush_all(struct kmem_cache *s)
|
||||
{
|
||||
- on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1, GFP_ATOMIC);
|
||||
+ on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1);
|
||||
}
|
||||
|
||||
/*
|
@ -0,0 +1,35 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Fri, 21 Feb 2020 18:57:11 +0100
|
||||
Subject: [PATCH] drm/vmwgfx: Drop preempt_disable() in
|
||||
vmw_fifo_ping_host()
|
||||
|
||||
vmw_fifo_ping_host() disables preemption around a test and a register
|
||||
write via vmw_write(). The write function acquires a spinlock_t typed
|
||||
lock which is not allowed in a preempt_disable()ed section on
|
||||
PREEMPT_RT. This has been reported in the bugzilla.
|
||||
|
||||
It has been explained by Thomas Hellstrom that this preempt_disable()ed
|
||||
section is not required for correctness.
|
||||
|
||||
Remove the preempt_disable() section.
|
||||
|
||||
Link: https://bugzilla.kernel.org/show_bug.cgi?id=206591
|
||||
Link: https://lkml.kernel.org/r/0b5e1c65d89951de993deab06d1d197b40fd67aa.camel@vmware.com
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c | 2 --
|
||||
1 file changed, 2 deletions(-)
|
||||
|
||||
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
|
||||
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
|
||||
@@ -169,10 +169,8 @@ void vmw_fifo_ping_host(struct vmw_priva
|
||||
{
|
||||
u32 *fifo_mem = dev_priv->mmio_virt;
|
||||
|
||||
- preempt_disable();
|
||||
if (cmpxchg(fifo_mem + SVGA_FIFO_BUSY, 0, 1) == 0)
|
||||
vmw_write(dev_priv, SVGA_REG_SYNC, reason);
|
||||
- preempt_enable();
|
||||
}
|
||||
|
||||
void vmw_fifo_release(struct vmw_private *dev_priv, struct vmw_fifo_state *fifo)
|
@ -0,0 +1,32 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Tue, 3 Mar 2020 13:43:25 +0100
|
||||
Subject: [PATCH] =?UTF-8?q?mm/compaction:=20Really=20limit=20compact=5Fune?=
|
||||
=?UTF-8?q?victable=5Fallowed=20to=200=E2=80=A61?=
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
The proc file `compact_unevictable_allowed' should allow 0 and 1 only,
|
||||
the `extra*' attribues have been set properly but without
|
||||
proc_dointvec_minmax() as the `proc_handler' the limit will not be
|
||||
enforced.
|
||||
|
||||
Use proc_dointvec_minmax() as the `proc_handler' to enfoce the valid
|
||||
specified range.
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
kernel/sysctl.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/kernel/sysctl.c
|
||||
+++ b/kernel/sysctl.c
|
||||
@@ -1493,7 +1493,7 @@ static struct ctl_table vm_table[] = {
|
||||
.data = &sysctl_compact_unevictable_allowed,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0644,
|
||||
- .proc_handler = proc_dointvec,
|
||||
+ .proc_handler = proc_dointvec_minmax,
|
||||
.extra1 = SYSCTL_ZERO,
|
||||
.extra2 = SYSCTL_ONE,
|
||||
},
|
@ -0,0 +1,102 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Fri, 8 Nov 2019 12:55:47 +0100
|
||||
Subject: [PATCH] mm/compaction: Disable compact_unevictable_allowed on RT
|
||||
|
||||
Since commit
|
||||
5bbe3547aa3ba ("mm: allow compaction of unevictable pages")
|
||||
|
||||
it is allowed to examine mlocked pages and compact them by default.
|
||||
On -RT even minor pagefaults are problematic because it may take a few
|
||||
100us to resolve them and until then the task is blocked.
|
||||
|
||||
Make compact_unevictable_allowed = 0 default and issue a warning on RT
|
||||
if it is changed.
|
||||
|
||||
Link: https://lore.kernel.org/linux-mm/20190710144138.qyn4tuttdq6h7kqx@linutronix.de/
|
||||
Acked-by: Mel Gorman <mgorman@techsingularity.net>
|
||||
Acked-by: Vlastimil Babka <vbabka@suse.cz>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
Documentation/admin-guide/sysctl/vm.rst | 3 +++
|
||||
kernel/sysctl.c | 29 ++++++++++++++++++++++++++++-
|
||||
mm/compaction.c | 4 ++++
|
||||
3 files changed, 35 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/Documentation/admin-guide/sysctl/vm.rst
|
||||
+++ b/Documentation/admin-guide/sysctl/vm.rst
|
||||
@@ -128,6 +128,9 @@ allowed to examine the unevictable lru (
|
||||
This should be used on systems where stalls for minor page faults are an
|
||||
acceptable trade for large contiguous free memory. Set to 0 to prevent
|
||||
compaction from moving pages that are unevictable. Default value is 1.
|
||||
+On CONFIG_PREEMPT_RT the default value is 0 in order to avoid a page fault, due
|
||||
+to compaction, which would block the task from becomming active until the fault
|
||||
+is resolved.
|
||||
|
||||
|
||||
dirty_background_bytes
|
||||
--- a/kernel/sysctl.c
|
||||
+++ b/kernel/sysctl.c
|
||||
@@ -212,6 +212,11 @@ static int proc_do_cad_pid(struct ctl_ta
|
||||
void __user *buffer, size_t *lenp, loff_t *ppos);
|
||||
static int proc_taint(struct ctl_table *table, int write,
|
||||
void __user *buffer, size_t *lenp, loff_t *ppos);
|
||||
+#ifdef CONFIG_COMPACTION
|
||||
+static int proc_dointvec_minmax_warn_RT_change(struct ctl_table *table,
|
||||
+ int write, void __user *buffer,
|
||||
+ size_t *lenp, loff_t *ppos);
|
||||
+#endif
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_PRINTK
|
||||
@@ -1493,7 +1498,7 @@ static struct ctl_table vm_table[] = {
|
||||
.data = &sysctl_compact_unevictable_allowed,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0644,
|
||||
- .proc_handler = proc_dointvec_minmax,
|
||||
+ .proc_handler = proc_dointvec_minmax_warn_RT_change,
|
||||
.extra1 = SYSCTL_ZERO,
|
||||
.extra2 = SYSCTL_ONE,
|
||||
},
|
||||
@@ -2581,6 +2586,28 @@ int proc_dointvec(struct ctl_table *tabl
|
||||
return do_proc_dointvec(table, write, buffer, lenp, ppos, NULL, NULL);
|
||||
}
|
||||
|
||||
+#ifdef CONFIG_COMPACTION
|
||||
+static int proc_dointvec_minmax_warn_RT_change(struct ctl_table *table,
|
||||
+ int write, void __user *buffer,
|
||||
+ size_t *lenp, loff_t *ppos)
|
||||
+{
|
||||
+ int ret, old;
|
||||
+
|
||||
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT) || !write)
|
||||
+ return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
|
||||
+
|
||||
+ old = *(int *)table->data;
|
||||
+ ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
|
||||
+ if (ret)
|
||||
+ return ret;
|
||||
+ if (old != *(int *)table->data)
|
||||
+ pr_warn_once("sysctl attribute %s changed by %s[%d]\n",
|
||||
+ table->procname, current->comm,
|
||||
+ task_pid_nr(current));
|
||||
+ return ret;
|
||||
+}
|
||||
+#endif
|
||||
+
|
||||
/**
|
||||
* proc_douintvec - read a vector of unsigned integers
|
||||
* @table: the sysctl table
|
||||
--- a/mm/compaction.c
|
||||
+++ b/mm/compaction.c
|
||||
@@ -1590,7 +1590,11 @@ typedef enum {
|
||||
* Allow userspace to control policy on scanning the unevictable LRU for
|
||||
* compactable pages.
|
||||
*/
|
||||
+#ifdef CONFIG_PREEMPT_RT
|
||||
+int sysctl_compact_unevictable_allowed __read_mostly = 0;
|
||||
+#else
|
||||
int sysctl_compact_unevictable_allowed __read_mostly = 1;
|
||||
+#endif
|
||||
|
||||
static inline void
|
||||
update_fast_start_pfn(struct compact_control *cc, unsigned long pfn)
|
1521
kernel/patches-5.4.x-rt/0073-Use-CONFIG_PREEMPTION.patch
Normal file
1521
kernel/patches-5.4.x-rt/0073-Use-CONFIG_PREEMPTION.patch
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,35 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Tue, 11 Jun 2019 11:21:02 +0200
|
||||
Subject: [PATCH 1/4] workqueue: Don't assume that the callback has interrupts
|
||||
disabled
|
||||
|
||||
Due to the TIMER_IRQSAFE flag, the timer callback is invoked with
|
||||
disabled interrupts. On -RT the callback is invoked in softirq context
|
||||
with enabled interrupts. Since the interrupts are threaded, there are
|
||||
are no in_irq() users. The local_bh_disable() around the threaded
|
||||
handler ensures that there is either a timer or a threaded handler
|
||||
active on the CPU.
|
||||
|
||||
Disable interrupts before __queue_work() is invoked from the timer
|
||||
callback.
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
kernel/workqueue.c | 4 +++-
|
||||
1 file changed, 3 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/kernel/workqueue.c
|
||||
+++ b/kernel/workqueue.c
|
||||
@@ -1614,9 +1614,11 @@ EXPORT_SYMBOL_GPL(queue_work_node);
|
||||
void delayed_work_timer_fn(struct timer_list *t)
|
||||
{
|
||||
struct delayed_work *dwork = from_timer(dwork, t, timer);
|
||||
+ unsigned long flags;
|
||||
|
||||
- /* should have been called from irqsafe timer with irq already off */
|
||||
+ local_irq_save(flags);
|
||||
__queue_work(dwork->cpu, dwork->wq, &dwork->work);
|
||||
+ local_irq_restore(flags);
|
||||
}
|
||||
EXPORT_SYMBOL(delayed_work_timer_fn);
|
||||
|
@ -0,0 +1,33 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Wed, 22 May 2019 12:42:26 +0200
|
||||
Subject: [PATCH 2/4] sched/swait: Add swait_event_lock_irq()
|
||||
|
||||
The swait_event_lock_irq() is inspired by wait_event_lock_irq(). This is
|
||||
required by the workqueue code once it switches to swait.
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/swait.h | 14 ++++++++++++++
|
||||
1 file changed, 14 insertions(+)
|
||||
|
||||
--- a/include/linux/swait.h
|
||||
+++ b/include/linux/swait.h
|
||||
@@ -297,4 +297,18 @@ do { \
|
||||
__ret; \
|
||||
})
|
||||
|
||||
+#define __swait_event_lock_irq(wq, condition, lock, cmd) \
|
||||
+ ___swait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, \
|
||||
+ raw_spin_unlock_irq(&lock); \
|
||||
+ cmd; \
|
||||
+ schedule(); \
|
||||
+ raw_spin_lock_irq(&lock))
|
||||
+
|
||||
+#define swait_event_lock_irq(wq_head, condition, lock) \
|
||||
+ do { \
|
||||
+ if (condition) \
|
||||
+ break; \
|
||||
+ __swait_event_lock_irq(wq_head, condition, lock, ); \
|
||||
+ } while (0)
|
||||
+
|
||||
#endif /* _LINUX_SWAIT_H */
|
@ -0,0 +1,53 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Tue, 11 Jun 2019 11:21:09 +0200
|
||||
Subject: [PATCH 3/4] workqueue: Use swait for wq_manager_wait
|
||||
|
||||
In order for the workqueue code use raw_spinlock_t typed locking there
|
||||
must not be a spinlock_t typed lock be acquired. A wait_queue_head uses
|
||||
a spinlock_t lock for its list protection.
|
||||
|
||||
Use a swait based queue head to avoid raw_spinlock_t -> spinlock_t
|
||||
locking.
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
kernel/workqueue.c | 7 ++++---
|
||||
1 file changed, 4 insertions(+), 3 deletions(-)
|
||||
|
||||
--- a/kernel/workqueue.c
|
||||
+++ b/kernel/workqueue.c
|
||||
@@ -50,6 +50,7 @@
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/sched/isolation.h>
|
||||
#include <linux/nmi.h>
|
||||
+#include <linux/swait.h>
|
||||
|
||||
#include "workqueue_internal.h"
|
||||
|
||||
@@ -301,7 +302,7 @@ static struct workqueue_attrs *wq_update
|
||||
static DEFINE_MUTEX(wq_pool_mutex); /* protects pools and workqueues list */
|
||||
static DEFINE_MUTEX(wq_pool_attach_mutex); /* protects worker attach/detach */
|
||||
static DEFINE_SPINLOCK(wq_mayday_lock); /* protects wq->maydays list */
|
||||
-static DECLARE_WAIT_QUEUE_HEAD(wq_manager_wait); /* wait for manager to go away */
|
||||
+static DECLARE_SWAIT_QUEUE_HEAD(wq_manager_wait); /* wait for manager to go away */
|
||||
|
||||
static LIST_HEAD(workqueues); /* PR: list of all workqueues */
|
||||
static bool workqueue_freezing; /* PL: have wqs started freezing? */
|
||||
@@ -2146,7 +2147,7 @@ static bool manage_workers(struct worker
|
||||
|
||||
pool->manager = NULL;
|
||||
pool->flags &= ~POOL_MANAGER_ACTIVE;
|
||||
- wake_up(&wq_manager_wait);
|
||||
+ swake_up_one(&wq_manager_wait);
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -3547,7 +3548,7 @@ static void put_unbound_pool(struct work
|
||||
* manager and @pool gets freed with the flag set.
|
||||
*/
|
||||
spin_lock_irq(&pool->lock);
|
||||
- wait_event_lock_irq(wq_manager_wait,
|
||||
+ swait_event_lock_irq(wq_manager_wait,
|
||||
!(pool->flags & POOL_MANAGER_ACTIVE), pool->lock);
|
||||
pool->flags |= POOL_MANAGER_ACTIVE;
|
||||
|
@ -0,0 +1,680 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Wed, 22 May 2019 12:43:56 +0200
|
||||
Subject: [PATCH 4/4] workqueue: Convert the locks to raw type
|
||||
|
||||
After all the workqueue and the timer rework, we can finally make the
|
||||
worker_pool lock raw.
|
||||
The lock is not held over an unbounded period of time/iterations.
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
kernel/workqueue.c | 168 ++++++++++++++++++++++++++---------------------------
|
||||
1 file changed, 84 insertions(+), 84 deletions(-)
|
||||
|
||||
--- a/kernel/workqueue.c
|
||||
+++ b/kernel/workqueue.c
|
||||
@@ -146,7 +146,7 @@ enum {
|
||||
/* struct worker is defined in workqueue_internal.h */
|
||||
|
||||
struct worker_pool {
|
||||
- spinlock_t lock; /* the pool lock */
|
||||
+ raw_spinlock_t lock; /* the pool lock */
|
||||
int cpu; /* I: the associated cpu */
|
||||
int node; /* I: the associated node ID */
|
||||
int id; /* I: pool ID */
|
||||
@@ -301,7 +301,7 @@ static struct workqueue_attrs *wq_update
|
||||
|
||||
static DEFINE_MUTEX(wq_pool_mutex); /* protects pools and workqueues list */
|
||||
static DEFINE_MUTEX(wq_pool_attach_mutex); /* protects worker attach/detach */
|
||||
-static DEFINE_SPINLOCK(wq_mayday_lock); /* protects wq->maydays list */
|
||||
+static DEFINE_RAW_SPINLOCK(wq_mayday_lock); /* protects wq->maydays list */
|
||||
static DECLARE_SWAIT_QUEUE_HEAD(wq_manager_wait); /* wait for manager to go away */
|
||||
|
||||
static LIST_HEAD(workqueues); /* PR: list of all workqueues */
|
||||
@@ -833,7 +833,7 @@ static struct worker *first_idle_worker(
|
||||
* Wake up the first idle worker of @pool.
|
||||
*
|
||||
* CONTEXT:
|
||||
- * spin_lock_irq(pool->lock).
|
||||
+ * raw_spin_lock_irq(pool->lock).
|
||||
*/
|
||||
static void wake_up_worker(struct worker_pool *pool)
|
||||
{
|
||||
@@ -886,7 +886,7 @@ void wq_worker_sleeping(struct task_stru
|
||||
return;
|
||||
|
||||
worker->sleeping = 1;
|
||||
- spin_lock_irq(&pool->lock);
|
||||
+ raw_spin_lock_irq(&pool->lock);
|
||||
|
||||
/*
|
||||
* The counterpart of the following dec_and_test, implied mb,
|
||||
@@ -905,7 +905,7 @@ void wq_worker_sleeping(struct task_stru
|
||||
if (next)
|
||||
wake_up_process(next->task);
|
||||
}
|
||||
- spin_unlock_irq(&pool->lock);
|
||||
+ raw_spin_unlock_irq(&pool->lock);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -916,7 +916,7 @@ void wq_worker_sleeping(struct task_stru
|
||||
* the scheduler to get a worker's last known identity.
|
||||
*
|
||||
* CONTEXT:
|
||||
- * spin_lock_irq(rq->lock)
|
||||
+ * raw_spin_lock_irq(rq->lock)
|
||||
*
|
||||
* This function is called during schedule() when a kworker is going
|
||||
* to sleep. It's used by psi to identify aggregation workers during
|
||||
@@ -947,7 +947,7 @@ work_func_t wq_worker_last_func(struct t
|
||||
* Set @flags in @worker->flags and adjust nr_running accordingly.
|
||||
*
|
||||
* CONTEXT:
|
||||
- * spin_lock_irq(pool->lock)
|
||||
+ * raw_spin_lock_irq(pool->lock)
|
||||
*/
|
||||
static inline void worker_set_flags(struct worker *worker, unsigned int flags)
|
||||
{
|
||||
@@ -972,7 +972,7 @@ static inline void worker_set_flags(stru
|
||||
* Clear @flags in @worker->flags and adjust nr_running accordingly.
|
||||
*
|
||||
* CONTEXT:
|
||||
- * spin_lock_irq(pool->lock)
|
||||
+ * raw_spin_lock_irq(pool->lock)
|
||||
*/
|
||||
static inline void worker_clr_flags(struct worker *worker, unsigned int flags)
|
||||
{
|
||||
@@ -1020,7 +1020,7 @@ static inline void worker_clr_flags(stru
|
||||
* actually occurs, it should be easy to locate the culprit work function.
|
||||
*
|
||||
* CONTEXT:
|
||||
- * spin_lock_irq(pool->lock).
|
||||
+ * raw_spin_lock_irq(pool->lock).
|
||||
*
|
||||
* Return:
|
||||
* Pointer to worker which is executing @work if found, %NULL
|
||||
@@ -1055,7 +1055,7 @@ static struct worker *find_worker_execut
|
||||
* nested inside outer list_for_each_entry_safe().
|
||||
*
|
||||
* CONTEXT:
|
||||
- * spin_lock_irq(pool->lock).
|
||||
+ * raw_spin_lock_irq(pool->lock).
|
||||
*/
|
||||
static void move_linked_works(struct work_struct *work, struct list_head *head,
|
||||
struct work_struct **nextp)
|
||||
@@ -1133,9 +1133,9 @@ static void put_pwq_unlocked(struct pool
|
||||
* As both pwqs and pools are RCU protected, the
|
||||
* following lock operations are safe.
|
||||
*/
|
||||
- spin_lock_irq(&pwq->pool->lock);
|
||||
+ raw_spin_lock_irq(&pwq->pool->lock);
|
||||
put_pwq(pwq);
|
||||
- spin_unlock_irq(&pwq->pool->lock);
|
||||
+ raw_spin_unlock_irq(&pwq->pool->lock);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1168,7 +1168,7 @@ static void pwq_activate_first_delayed(s
|
||||
* decrement nr_in_flight of its pwq and handle workqueue flushing.
|
||||
*
|
||||
* CONTEXT:
|
||||
- * spin_lock_irq(pool->lock).
|
||||
+ * raw_spin_lock_irq(pool->lock).
|
||||
*/
|
||||
static void pwq_dec_nr_in_flight(struct pool_workqueue *pwq, int color)
|
||||
{
|
||||
@@ -1267,7 +1267,7 @@ static int try_to_grab_pending(struct wo
|
||||
if (!pool)
|
||||
goto fail;
|
||||
|
||||
- spin_lock(&pool->lock);
|
||||
+ raw_spin_lock(&pool->lock);
|
||||
/*
|
||||
* work->data is guaranteed to point to pwq only while the work
|
||||
* item is queued on pwq->wq, and both updating work->data to point
|
||||
@@ -1296,11 +1296,11 @@ static int try_to_grab_pending(struct wo
|
||||
/* work->data points to pwq iff queued, point to pool */
|
||||
set_work_pool_and_keep_pending(work, pool->id);
|
||||
|
||||
- spin_unlock(&pool->lock);
|
||||
+ raw_spin_unlock(&pool->lock);
|
||||
rcu_read_unlock();
|
||||
return 1;
|
||||
}
|
||||
- spin_unlock(&pool->lock);
|
||||
+ raw_spin_unlock(&pool->lock);
|
||||
fail:
|
||||
rcu_read_unlock();
|
||||
local_irq_restore(*flags);
|
||||
@@ -1321,7 +1321,7 @@ static int try_to_grab_pending(struct wo
|
||||
* work_struct flags.
|
||||
*
|
||||
* CONTEXT:
|
||||
- * spin_lock_irq(pool->lock).
|
||||
+ * raw_spin_lock_irq(pool->lock).
|
||||
*/
|
||||
static void insert_work(struct pool_workqueue *pwq, struct work_struct *work,
|
||||
struct list_head *head, unsigned int extra_flags)
|
||||
@@ -1438,7 +1438,7 @@ static void __queue_work(int cpu, struct
|
||||
if (last_pool && last_pool != pwq->pool) {
|
||||
struct worker *worker;
|
||||
|
||||
- spin_lock(&last_pool->lock);
|
||||
+ raw_spin_lock(&last_pool->lock);
|
||||
|
||||
worker = find_worker_executing_work(last_pool, work);
|
||||
|
||||
@@ -1446,11 +1446,11 @@ static void __queue_work(int cpu, struct
|
||||
pwq = worker->current_pwq;
|
||||
} else {
|
||||
/* meh... not running there, queue here */
|
||||
- spin_unlock(&last_pool->lock);
|
||||
- spin_lock(&pwq->pool->lock);
|
||||
+ raw_spin_unlock(&last_pool->lock);
|
||||
+ raw_spin_lock(&pwq->pool->lock);
|
||||
}
|
||||
} else {
|
||||
- spin_lock(&pwq->pool->lock);
|
||||
+ raw_spin_lock(&pwq->pool->lock);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1463,7 +1463,7 @@ static void __queue_work(int cpu, struct
|
||||
*/
|
||||
if (unlikely(!pwq->refcnt)) {
|
||||
if (wq->flags & WQ_UNBOUND) {
|
||||
- spin_unlock(&pwq->pool->lock);
|
||||
+ raw_spin_unlock(&pwq->pool->lock);
|
||||
cpu_relax();
|
||||
goto retry;
|
||||
}
|
||||
@@ -1495,7 +1495,7 @@ static void __queue_work(int cpu, struct
|
||||
insert_work(pwq, work, worklist, work_flags);
|
||||
|
||||
out:
|
||||
- spin_unlock(&pwq->pool->lock);
|
||||
+ raw_spin_unlock(&pwq->pool->lock);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
@@ -1766,7 +1766,7 @@ EXPORT_SYMBOL(queue_rcu_work);
|
||||
* necessary.
|
||||
*
|
||||
* LOCKING:
|
||||
- * spin_lock_irq(pool->lock).
|
||||
+ * raw_spin_lock_irq(pool->lock).
|
||||
*/
|
||||
static void worker_enter_idle(struct worker *worker)
|
||||
{
|
||||
@@ -1806,7 +1806,7 @@ static void worker_enter_idle(struct wor
|
||||
* @worker is leaving idle state. Update stats.
|
||||
*
|
||||
* LOCKING:
|
||||
- * spin_lock_irq(pool->lock).
|
||||
+ * raw_spin_lock_irq(pool->lock).
|
||||
*/
|
||||
static void worker_leave_idle(struct worker *worker)
|
||||
{
|
||||
@@ -1944,11 +1944,11 @@ static struct worker *create_worker(stru
|
||||
worker_attach_to_pool(worker, pool);
|
||||
|
||||
/* start the newly created worker */
|
||||
- spin_lock_irq(&pool->lock);
|
||||
+ raw_spin_lock_irq(&pool->lock);
|
||||
worker->pool->nr_workers++;
|
||||
worker_enter_idle(worker);
|
||||
wake_up_process(worker->task);
|
||||
- spin_unlock_irq(&pool->lock);
|
||||
+ raw_spin_unlock_irq(&pool->lock);
|
||||
|
||||
return worker;
|
||||
|
||||
@@ -1967,7 +1967,7 @@ static struct worker *create_worker(stru
|
||||
* be idle.
|
||||
*
|
||||
* CONTEXT:
|
||||
- * spin_lock_irq(pool->lock).
|
||||
+ * raw_spin_lock_irq(pool->lock).
|
||||
*/
|
||||
static void destroy_worker(struct worker *worker)
|
||||
{
|
||||
@@ -1993,7 +1993,7 @@ static void idle_worker_timeout(struct t
|
||||
{
|
||||
struct worker_pool *pool = from_timer(pool, t, idle_timer);
|
||||
|
||||
- spin_lock_irq(&pool->lock);
|
||||
+ raw_spin_lock_irq(&pool->lock);
|
||||
|
||||
while (too_many_workers(pool)) {
|
||||
struct worker *worker;
|
||||
@@ -2011,7 +2011,7 @@ static void idle_worker_timeout(struct t
|
||||
destroy_worker(worker);
|
||||
}
|
||||
|
||||
- spin_unlock_irq(&pool->lock);
|
||||
+ raw_spin_unlock_irq(&pool->lock);
|
||||
}
|
||||
|
||||
static void send_mayday(struct work_struct *work)
|
||||
@@ -2042,8 +2042,8 @@ static void pool_mayday_timeout(struct t
|
||||
struct worker_pool *pool = from_timer(pool, t, mayday_timer);
|
||||
struct work_struct *work;
|
||||
|
||||
- spin_lock_irq(&pool->lock);
|
||||
- spin_lock(&wq_mayday_lock); /* for wq->maydays */
|
||||
+ raw_spin_lock_irq(&pool->lock);
|
||||
+ raw_spin_lock(&wq_mayday_lock); /* for wq->maydays */
|
||||
|
||||
if (need_to_create_worker(pool)) {
|
||||
/*
|
||||
@@ -2056,8 +2056,8 @@ static void pool_mayday_timeout(struct t
|
||||
send_mayday(work);
|
||||
}
|
||||
|
||||
- spin_unlock(&wq_mayday_lock);
|
||||
- spin_unlock_irq(&pool->lock);
|
||||
+ raw_spin_unlock(&wq_mayday_lock);
|
||||
+ raw_spin_unlock_irq(&pool->lock);
|
||||
|
||||
mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INTERVAL);
|
||||
}
|
||||
@@ -2076,7 +2076,7 @@ static void pool_mayday_timeout(struct t
|
||||
* may_start_working() %true.
|
||||
*
|
||||
* LOCKING:
|
||||
- * spin_lock_irq(pool->lock) which may be released and regrabbed
|
||||
+ * raw_spin_lock_irq(pool->lock) which may be released and regrabbed
|
||||
* multiple times. Does GFP_KERNEL allocations. Called only from
|
||||
* manager.
|
||||
*/
|
||||
@@ -2085,7 +2085,7 @@ static void maybe_create_worker(struct w
|
||||
__acquires(&pool->lock)
|
||||
{
|
||||
restart:
|
||||
- spin_unlock_irq(&pool->lock);
|
||||
+ raw_spin_unlock_irq(&pool->lock);
|
||||
|
||||
/* if we don't make progress in MAYDAY_INITIAL_TIMEOUT, call for help */
|
||||
mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INITIAL_TIMEOUT);
|
||||
@@ -2101,7 +2101,7 @@ static void maybe_create_worker(struct w
|
||||
}
|
||||
|
||||
del_timer_sync(&pool->mayday_timer);
|
||||
- spin_lock_irq(&pool->lock);
|
||||
+ raw_spin_lock_irq(&pool->lock);
|
||||
/*
|
||||
* This is necessary even after a new worker was just successfully
|
||||
* created as @pool->lock was dropped and the new worker might have
|
||||
@@ -2124,7 +2124,7 @@ static void maybe_create_worker(struct w
|
||||
* and may_start_working() is true.
|
||||
*
|
||||
* CONTEXT:
|
||||
- * spin_lock_irq(pool->lock) which may be released and regrabbed
|
||||
+ * raw_spin_lock_irq(pool->lock) which may be released and regrabbed
|
||||
* multiple times. Does GFP_KERNEL allocations.
|
||||
*
|
||||
* Return:
|
||||
@@ -2163,7 +2163,7 @@ static bool manage_workers(struct worker
|
||||
* call this function to process a work.
|
||||
*
|
||||
* CONTEXT:
|
||||
- * spin_lock_irq(pool->lock) which is released and regrabbed.
|
||||
+ * raw_spin_lock_irq(pool->lock) which is released and regrabbed.
|
||||
*/
|
||||
static void process_one_work(struct worker *worker, struct work_struct *work)
|
||||
__releases(&pool->lock)
|
||||
@@ -2245,7 +2245,7 @@ static void process_one_work(struct work
|
||||
*/
|
||||
set_work_pool_and_clear_pending(work, pool->id);
|
||||
|
||||
- spin_unlock_irq(&pool->lock);
|
||||
+ raw_spin_unlock_irq(&pool->lock);
|
||||
|
||||
lock_map_acquire(&pwq->wq->lockdep_map);
|
||||
lock_map_acquire(&lockdep_map);
|
||||
@@ -2300,7 +2300,7 @@ static void process_one_work(struct work
|
||||
*/
|
||||
cond_resched();
|
||||
|
||||
- spin_lock_irq(&pool->lock);
|
||||
+ raw_spin_lock_irq(&pool->lock);
|
||||
|
||||
/* clear cpu intensive status */
|
||||
if (unlikely(cpu_intensive))
|
||||
@@ -2326,7 +2326,7 @@ static void process_one_work(struct work
|
||||
* fetches a work from the top and executes it.
|
||||
*
|
||||
* CONTEXT:
|
||||
- * spin_lock_irq(pool->lock) which may be released and regrabbed
|
||||
+ * raw_spin_lock_irq(pool->lock) which may be released and regrabbed
|
||||
* multiple times.
|
||||
*/
|
||||
static void process_scheduled_works(struct worker *worker)
|
||||
@@ -2368,11 +2368,11 @@ static int worker_thread(void *__worker)
|
||||
/* tell the scheduler that this is a workqueue worker */
|
||||
set_pf_worker(true);
|
||||
woke_up:
|
||||
- spin_lock_irq(&pool->lock);
|
||||
+ raw_spin_lock_irq(&pool->lock);
|
||||
|
||||
/* am I supposed to die? */
|
||||
if (unlikely(worker->flags & WORKER_DIE)) {
|
||||
- spin_unlock_irq(&pool->lock);
|
||||
+ raw_spin_unlock_irq(&pool->lock);
|
||||
WARN_ON_ONCE(!list_empty(&worker->entry));
|
||||
set_pf_worker(false);
|
||||
|
||||
@@ -2438,7 +2438,7 @@ static int worker_thread(void *__worker)
|
||||
*/
|
||||
worker_enter_idle(worker);
|
||||
__set_current_state(TASK_IDLE);
|
||||
- spin_unlock_irq(&pool->lock);
|
||||
+ raw_spin_unlock_irq(&pool->lock);
|
||||
schedule();
|
||||
goto woke_up;
|
||||
}
|
||||
@@ -2492,7 +2492,7 @@ static int rescuer_thread(void *__rescue
|
||||
should_stop = kthread_should_stop();
|
||||
|
||||
/* see whether any pwq is asking for help */
|
||||
- spin_lock_irq(&wq_mayday_lock);
|
||||
+ raw_spin_lock_irq(&wq_mayday_lock);
|
||||
|
||||
while (!list_empty(&wq->maydays)) {
|
||||
struct pool_workqueue *pwq = list_first_entry(&wq->maydays,
|
||||
@@ -2504,11 +2504,11 @@ static int rescuer_thread(void *__rescue
|
||||
__set_current_state(TASK_RUNNING);
|
||||
list_del_init(&pwq->mayday_node);
|
||||
|
||||
- spin_unlock_irq(&wq_mayday_lock);
|
||||
+ raw_spin_unlock_irq(&wq_mayday_lock);
|
||||
|
||||
worker_attach_to_pool(rescuer, pool);
|
||||
|
||||
- spin_lock_irq(&pool->lock);
|
||||
+ raw_spin_lock_irq(&pool->lock);
|
||||
|
||||
/*
|
||||
* Slurp in all works issued via this workqueue and
|
||||
@@ -2537,7 +2537,7 @@ static int rescuer_thread(void *__rescue
|
||||
* incur MAYDAY_INTERVAL delay inbetween.
|
||||
*/
|
||||
if (need_to_create_worker(pool)) {
|
||||
- spin_lock(&wq_mayday_lock);
|
||||
+ raw_spin_lock(&wq_mayday_lock);
|
||||
/*
|
||||
* Queue iff we aren't racing destruction
|
||||
* and somebody else hasn't queued it already.
|
||||
@@ -2546,7 +2546,7 @@ static int rescuer_thread(void *__rescue
|
||||
get_pwq(pwq);
|
||||
list_add_tail(&pwq->mayday_node, &wq->maydays);
|
||||
}
|
||||
- spin_unlock(&wq_mayday_lock);
|
||||
+ raw_spin_unlock(&wq_mayday_lock);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2564,14 +2564,14 @@ static int rescuer_thread(void *__rescue
|
||||
if (need_more_worker(pool))
|
||||
wake_up_worker(pool);
|
||||
|
||||
- spin_unlock_irq(&pool->lock);
|
||||
+ raw_spin_unlock_irq(&pool->lock);
|
||||
|
||||
worker_detach_from_pool(rescuer);
|
||||
|
||||
- spin_lock_irq(&wq_mayday_lock);
|
||||
+ raw_spin_lock_irq(&wq_mayday_lock);
|
||||
}
|
||||
|
||||
- spin_unlock_irq(&wq_mayday_lock);
|
||||
+ raw_spin_unlock_irq(&wq_mayday_lock);
|
||||
|
||||
if (should_stop) {
|
||||
__set_current_state(TASK_RUNNING);
|
||||
@@ -2651,7 +2651,7 @@ static void wq_barrier_func(struct work_
|
||||
* underneath us, so we can't reliably determine pwq from @target.
|
||||
*
|
||||
* CONTEXT:
|
||||
- * spin_lock_irq(pool->lock).
|
||||
+ * raw_spin_lock_irq(pool->lock).
|
||||
*/
|
||||
static void insert_wq_barrier(struct pool_workqueue *pwq,
|
||||
struct wq_barrier *barr,
|
||||
@@ -2738,7 +2738,7 @@ static bool flush_workqueue_prep_pwqs(st
|
||||
for_each_pwq(pwq, wq) {
|
||||
struct worker_pool *pool = pwq->pool;
|
||||
|
||||
- spin_lock_irq(&pool->lock);
|
||||
+ raw_spin_lock_irq(&pool->lock);
|
||||
|
||||
if (flush_color >= 0) {
|
||||
WARN_ON_ONCE(pwq->flush_color != -1);
|
||||
@@ -2755,7 +2755,7 @@ static bool flush_workqueue_prep_pwqs(st
|
||||
pwq->work_color = work_color;
|
||||
}
|
||||
|
||||
- spin_unlock_irq(&pool->lock);
|
||||
+ raw_spin_unlock_irq(&pool->lock);
|
||||
}
|
||||
|
||||
if (flush_color >= 0 && atomic_dec_and_test(&wq->nr_pwqs_to_flush))
|
||||
@@ -2955,9 +2955,9 @@ void drain_workqueue(struct workqueue_st
|
||||
for_each_pwq(pwq, wq) {
|
||||
bool drained;
|
||||
|
||||
- spin_lock_irq(&pwq->pool->lock);
|
||||
+ raw_spin_lock_irq(&pwq->pool->lock);
|
||||
drained = !pwq->nr_active && list_empty(&pwq->delayed_works);
|
||||
- spin_unlock_irq(&pwq->pool->lock);
|
||||
+ raw_spin_unlock_irq(&pwq->pool->lock);
|
||||
|
||||
if (drained)
|
||||
continue;
|
||||
@@ -2993,7 +2993,7 @@ static bool start_flush_work(struct work
|
||||
return false;
|
||||
}
|
||||
|
||||
- spin_lock_irq(&pool->lock);
|
||||
+ raw_spin_lock_irq(&pool->lock);
|
||||
/* see the comment in try_to_grab_pending() with the same code */
|
||||
pwq = get_work_pwq(work);
|
||||
if (pwq) {
|
||||
@@ -3009,7 +3009,7 @@ static bool start_flush_work(struct work
|
||||
check_flush_dependency(pwq->wq, work);
|
||||
|
||||
insert_wq_barrier(pwq, barr, work, worker);
|
||||
- spin_unlock_irq(&pool->lock);
|
||||
+ raw_spin_unlock_irq(&pool->lock);
|
||||
|
||||
/*
|
||||
* Force a lock recursion deadlock when using flush_work() inside a
|
||||
@@ -3028,7 +3028,7 @@ static bool start_flush_work(struct work
|
||||
rcu_read_unlock();
|
||||
return true;
|
||||
already_gone:
|
||||
- spin_unlock_irq(&pool->lock);
|
||||
+ raw_spin_unlock_irq(&pool->lock);
|
||||
rcu_read_unlock();
|
||||
return false;
|
||||
}
|
||||
@@ -3421,7 +3421,7 @@ static bool wqattrs_equal(const struct w
|
||||
*/
|
||||
static int init_worker_pool(struct worker_pool *pool)
|
||||
{
|
||||
- spin_lock_init(&pool->lock);
|
||||
+ raw_spin_lock_init(&pool->lock);
|
||||
pool->id = -1;
|
||||
pool->cpu = -1;
|
||||
pool->node = NUMA_NO_NODE;
|
||||
@@ -3547,7 +3547,7 @@ static void put_unbound_pool(struct work
|
||||
* @pool's workers from blocking on attach_mutex. We're the last
|
||||
* manager and @pool gets freed with the flag set.
|
||||
*/
|
||||
- spin_lock_irq(&pool->lock);
|
||||
+ raw_spin_lock_irq(&pool->lock);
|
||||
swait_event_lock_irq(wq_manager_wait,
|
||||
!(pool->flags & POOL_MANAGER_ACTIVE), pool->lock);
|
||||
pool->flags |= POOL_MANAGER_ACTIVE;
|
||||
@@ -3555,7 +3555,7 @@ static void put_unbound_pool(struct work
|
||||
while ((worker = first_idle_worker(pool)))
|
||||
destroy_worker(worker);
|
||||
WARN_ON(pool->nr_workers || pool->nr_idle);
|
||||
- spin_unlock_irq(&pool->lock);
|
||||
+ raw_spin_unlock_irq(&pool->lock);
|
||||
|
||||
mutex_lock(&wq_pool_attach_mutex);
|
||||
if (!list_empty(&pool->workers))
|
||||
@@ -3711,7 +3711,7 @@ static void pwq_adjust_max_active(struct
|
||||
return;
|
||||
|
||||
/* this function can be called during early boot w/ irq disabled */
|
||||
- spin_lock_irqsave(&pwq->pool->lock, flags);
|
||||
+ raw_spin_lock_irqsave(&pwq->pool->lock, flags);
|
||||
|
||||
/*
|
||||
* During [un]freezing, the caller is responsible for ensuring that
|
||||
@@ -3734,7 +3734,7 @@ static void pwq_adjust_max_active(struct
|
||||
pwq->max_active = 0;
|
||||
}
|
||||
|
||||
- spin_unlock_irqrestore(&pwq->pool->lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&pwq->pool->lock, flags);
|
||||
}
|
||||
|
||||
/* initialize newly alloced @pwq which is associated with @wq and @pool */
|
||||
@@ -4136,9 +4136,9 @@ static void wq_update_unbound_numa(struc
|
||||
|
||||
use_dfl_pwq:
|
||||
mutex_lock(&wq->mutex);
|
||||
- spin_lock_irq(&wq->dfl_pwq->pool->lock);
|
||||
+ raw_spin_lock_irq(&wq->dfl_pwq->pool->lock);
|
||||
get_pwq(wq->dfl_pwq);
|
||||
- spin_unlock_irq(&wq->dfl_pwq->pool->lock);
|
||||
+ raw_spin_unlock_irq(&wq->dfl_pwq->pool->lock);
|
||||
old_pwq = numa_pwq_tbl_install(wq, node, wq->dfl_pwq);
|
||||
out_unlock:
|
||||
mutex_unlock(&wq->mutex);
|
||||
@@ -4351,9 +4351,9 @@ void destroy_workqueue(struct workqueue_
|
||||
struct worker *rescuer = wq->rescuer;
|
||||
|
||||
/* this prevents new queueing */
|
||||
- spin_lock_irq(&wq_mayday_lock);
|
||||
+ raw_spin_lock_irq(&wq_mayday_lock);
|
||||
wq->rescuer = NULL;
|
||||
- spin_unlock_irq(&wq_mayday_lock);
|
||||
+ raw_spin_unlock_irq(&wq_mayday_lock);
|
||||
|
||||
/* rescuer will empty maydays list before exiting */
|
||||
kthread_stop(rescuer->task);
|
||||
@@ -4549,10 +4549,10 @@ unsigned int work_busy(struct work_struc
|
||||
rcu_read_lock();
|
||||
pool = get_work_pool(work);
|
||||
if (pool) {
|
||||
- spin_lock_irqsave(&pool->lock, flags);
|
||||
+ raw_spin_lock_irqsave(&pool->lock, flags);
|
||||
if (find_worker_executing_work(pool, work))
|
||||
ret |= WORK_BUSY_RUNNING;
|
||||
- spin_unlock_irqrestore(&pool->lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&pool->lock, flags);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
@@ -4759,10 +4759,10 @@ void show_workqueue_state(void)
|
||||
pr_info("workqueue %s: flags=0x%x\n", wq->name, wq->flags);
|
||||
|
||||
for_each_pwq(pwq, wq) {
|
||||
- spin_lock_irqsave(&pwq->pool->lock, flags);
|
||||
+ raw_spin_lock_irqsave(&pwq->pool->lock, flags);
|
||||
if (pwq->nr_active || !list_empty(&pwq->delayed_works))
|
||||
show_pwq(pwq);
|
||||
- spin_unlock_irqrestore(&pwq->pool->lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&pwq->pool->lock, flags);
|
||||
/*
|
||||
* We could be printing a lot from atomic context, e.g.
|
||||
* sysrq-t -> show_workqueue_state(). Avoid triggering
|
||||
@@ -4776,7 +4776,7 @@ void show_workqueue_state(void)
|
||||
struct worker *worker;
|
||||
bool first = true;
|
||||
|
||||
- spin_lock_irqsave(&pool->lock, flags);
|
||||
+ raw_spin_lock_irqsave(&pool->lock, flags);
|
||||
if (pool->nr_workers == pool->nr_idle)
|
||||
goto next_pool;
|
||||
|
||||
@@ -4795,7 +4795,7 @@ void show_workqueue_state(void)
|
||||
}
|
||||
pr_cont("\n");
|
||||
next_pool:
|
||||
- spin_unlock_irqrestore(&pool->lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&pool->lock, flags);
|
||||
/*
|
||||
* We could be printing a lot from atomic context, e.g.
|
||||
* sysrq-t -> show_workqueue_state(). Avoid triggering
|
||||
@@ -4825,7 +4825,7 @@ void wq_worker_comm(char *buf, size_t si
|
||||
struct worker_pool *pool = worker->pool;
|
||||
|
||||
if (pool) {
|
||||
- spin_lock_irq(&pool->lock);
|
||||
+ raw_spin_lock_irq(&pool->lock);
|
||||
/*
|
||||
* ->desc tracks information (wq name or
|
||||
* set_worker_desc()) for the latest execution. If
|
||||
@@ -4839,7 +4839,7 @@ void wq_worker_comm(char *buf, size_t si
|
||||
scnprintf(buf + off, size - off, "-%s",
|
||||
worker->desc);
|
||||
}
|
||||
- spin_unlock_irq(&pool->lock);
|
||||
+ raw_spin_unlock_irq(&pool->lock);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4870,7 +4870,7 @@ static void unbind_workers(int cpu)
|
||||
|
||||
for_each_cpu_worker_pool(pool, cpu) {
|
||||
mutex_lock(&wq_pool_attach_mutex);
|
||||
- spin_lock_irq(&pool->lock);
|
||||
+ raw_spin_lock_irq(&pool->lock);
|
||||
|
||||
/*
|
||||
* We've blocked all attach/detach operations. Make all workers
|
||||
@@ -4884,7 +4884,7 @@ static void unbind_workers(int cpu)
|
||||
|
||||
pool->flags |= POOL_DISASSOCIATED;
|
||||
|
||||
- spin_unlock_irq(&pool->lock);
|
||||
+ raw_spin_unlock_irq(&pool->lock);
|
||||
mutex_unlock(&wq_pool_attach_mutex);
|
||||
|
||||
/*
|
||||
@@ -4910,9 +4910,9 @@ static void unbind_workers(int cpu)
|
||||
* worker blocking could lead to lengthy stalls. Kick off
|
||||
* unbound chain execution of currently pending work items.
|
||||
*/
|
||||
- spin_lock_irq(&pool->lock);
|
||||
+ raw_spin_lock_irq(&pool->lock);
|
||||
wake_up_worker(pool);
|
||||
- spin_unlock_irq(&pool->lock);
|
||||
+ raw_spin_unlock_irq(&pool->lock);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4939,7 +4939,7 @@ static void rebind_workers(struct worker
|
||||
WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task,
|
||||
pool->attrs->cpumask) < 0);
|
||||
|
||||
- spin_lock_irq(&pool->lock);
|
||||
+ raw_spin_lock_irq(&pool->lock);
|
||||
|
||||
pool->flags &= ~POOL_DISASSOCIATED;
|
||||
|
||||
@@ -4978,7 +4978,7 @@ static void rebind_workers(struct worker
|
||||
WRITE_ONCE(worker->flags, worker_flags);
|
||||
}
|
||||
|
||||
- spin_unlock_irq(&pool->lock);
|
||||
+ raw_spin_unlock_irq(&pool->lock);
|
||||
}
|
||||
|
||||
/**
|
@ -0,0 +1,116 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Thu, 15 Aug 2019 18:14:16 +0200
|
||||
Subject: [PATCH 1/4] cgroup: Remove ->css_rstat_flush()
|
||||
|
||||
I was looking at the lifetime of the the ->css_rstat_flush() to see if
|
||||
cgroup_rstat_cpu_lock should remain a raw_spinlock_t. I didn't find any
|
||||
users and is unused since it was introduced in commit
|
||||
8f53470bab042 ("cgroup: Add cgroup_subsys->css_rstat_flush()")
|
||||
|
||||
Remove the css_rstat_flush callback because it has no users.
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/cgroup-defs.h | 5 -----
|
||||
kernel/cgroup/cgroup.c | 12 ------------
|
||||
kernel/cgroup/rstat.c | 10 +---------
|
||||
3 files changed, 1 insertion(+), 26 deletions(-)
|
||||
|
||||
--- a/include/linux/cgroup-defs.h
|
||||
+++ b/include/linux/cgroup-defs.h
|
||||
@@ -144,9 +144,6 @@ struct cgroup_subsys_state {
|
||||
struct list_head sibling;
|
||||
struct list_head children;
|
||||
|
||||
- /* flush target list anchored at cgrp->rstat_css_list */
|
||||
- struct list_head rstat_css_node;
|
||||
-
|
||||
/*
|
||||
* PI: Subsys-unique ID. 0 is unused and root is always 1. The
|
||||
* matching css can be looked up using css_from_id().
|
||||
@@ -455,7 +452,6 @@ struct cgroup {
|
||||
|
||||
/* per-cpu recursive resource statistics */
|
||||
struct cgroup_rstat_cpu __percpu *rstat_cpu;
|
||||
- struct list_head rstat_css_list;
|
||||
|
||||
/* cgroup basic resource statistics */
|
||||
struct cgroup_base_stat pending_bstat; /* pending from children */
|
||||
@@ -633,7 +629,6 @@ struct cgroup_subsys {
|
||||
void (*css_released)(struct cgroup_subsys_state *css);
|
||||
void (*css_free)(struct cgroup_subsys_state *css);
|
||||
void (*css_reset)(struct cgroup_subsys_state *css);
|
||||
- void (*css_rstat_flush)(struct cgroup_subsys_state *css, int cpu);
|
||||
int (*css_extra_stat_show)(struct seq_file *seq,
|
||||
struct cgroup_subsys_state *css);
|
||||
|
||||
--- a/kernel/cgroup/cgroup.c
|
||||
+++ b/kernel/cgroup/cgroup.c
|
||||
@@ -1957,7 +1957,6 @@ static void init_cgroup_housekeeping(str
|
||||
cgrp->dom_cgrp = cgrp;
|
||||
cgrp->max_descendants = INT_MAX;
|
||||
cgrp->max_depth = INT_MAX;
|
||||
- INIT_LIST_HEAD(&cgrp->rstat_css_list);
|
||||
prev_cputime_init(&cgrp->prev_cputime);
|
||||
|
||||
for_each_subsys(ss, ssid)
|
||||
@@ -5027,12 +5026,6 @@ static void css_release_work_fn(struct w
|
||||
list_del_rcu(&css->sibling);
|
||||
|
||||
if (ss) {
|
||||
- /* css release path */
|
||||
- if (!list_empty(&css->rstat_css_node)) {
|
||||
- cgroup_rstat_flush(cgrp);
|
||||
- list_del_rcu(&css->rstat_css_node);
|
||||
- }
|
||||
-
|
||||
cgroup_idr_replace(&ss->css_idr, NULL, css->id);
|
||||
if (ss->css_released)
|
||||
ss->css_released(css);
|
||||
@@ -5094,7 +5087,6 @@ static void init_and_link_css(struct cgr
|
||||
css->id = -1;
|
||||
INIT_LIST_HEAD(&css->sibling);
|
||||
INIT_LIST_HEAD(&css->children);
|
||||
- INIT_LIST_HEAD(&css->rstat_css_node);
|
||||
css->serial_nr = css_serial_nr_next++;
|
||||
atomic_set(&css->online_cnt, 0);
|
||||
|
||||
@@ -5103,9 +5095,6 @@ static void init_and_link_css(struct cgr
|
||||
css_get(css->parent);
|
||||
}
|
||||
|
||||
- if (cgroup_on_dfl(cgrp) && ss->css_rstat_flush)
|
||||
- list_add_rcu(&css->rstat_css_node, &cgrp->rstat_css_list);
|
||||
-
|
||||
BUG_ON(cgroup_css(cgrp, ss));
|
||||
}
|
||||
|
||||
@@ -5207,7 +5196,6 @@ static struct cgroup_subsys_state *css_c
|
||||
err_list_del:
|
||||
list_del_rcu(&css->sibling);
|
||||
err_free_css:
|
||||
- list_del_rcu(&css->rstat_css_node);
|
||||
INIT_RCU_WORK(&css->destroy_rwork, css_free_rwork_fn);
|
||||
queue_rcu_work(cgroup_destroy_wq, &css->destroy_rwork);
|
||||
return ERR_PTR(err);
|
||||
--- a/kernel/cgroup/rstat.c
|
||||
+++ b/kernel/cgroup/rstat.c
|
||||
@@ -162,17 +162,9 @@ static void cgroup_rstat_flush_locked(st
|
||||
struct cgroup *pos = NULL;
|
||||
|
||||
raw_spin_lock(cpu_lock);
|
||||
- while ((pos = cgroup_rstat_cpu_pop_updated(pos, cgrp, cpu))) {
|
||||
- struct cgroup_subsys_state *css;
|
||||
-
|
||||
+ while ((pos = cgroup_rstat_cpu_pop_updated(pos, cgrp, cpu)))
|
||||
cgroup_base_stat_flush(pos, cpu);
|
||||
|
||||
- rcu_read_lock();
|
||||
- list_for_each_entry_rcu(css, &pos->rstat_css_list,
|
||||
- rstat_css_node)
|
||||
- css->ss->css_rstat_flush(css, cpu);
|
||||
- rcu_read_unlock();
|
||||
- }
|
||||
raw_spin_unlock(cpu_lock);
|
||||
|
||||
/* if @may_sleep, play nice and yield if necessary */
|
@ -0,0 +1,68 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Fri, 16 Aug 2019 12:20:42 +0200
|
||||
Subject: [PATCH 2/4] cgroup: Consolidate users of cgroup_rstat_lock.
|
||||
|
||||
cgroup_rstat_flush_irqsafe() has no users, remove it.
|
||||
cgroup_rstat_flush_hold() and cgroup_rstat_flush_release() are only used within
|
||||
this file. Make it static.
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/cgroup.h | 3 ---
|
||||
kernel/cgroup/rstat.c | 19 ++-----------------
|
||||
2 files changed, 2 insertions(+), 20 deletions(-)
|
||||
|
||||
--- a/include/linux/cgroup.h
|
||||
+++ b/include/linux/cgroup.h
|
||||
@@ -751,9 +751,6 @@ static inline void cgroup_path_from_kern
|
||||
*/
|
||||
void cgroup_rstat_updated(struct cgroup *cgrp, int cpu);
|
||||
void cgroup_rstat_flush(struct cgroup *cgrp);
|
||||
-void cgroup_rstat_flush_irqsafe(struct cgroup *cgrp);
|
||||
-void cgroup_rstat_flush_hold(struct cgroup *cgrp);
|
||||
-void cgroup_rstat_flush_release(void);
|
||||
|
||||
/*
|
||||
* Basic resource stats.
|
||||
--- a/kernel/cgroup/rstat.c
|
||||
+++ b/kernel/cgroup/rstat.c
|
||||
@@ -201,21 +201,6 @@ void cgroup_rstat_flush(struct cgroup *c
|
||||
}
|
||||
|
||||
/**
|
||||
- * cgroup_rstat_flush_irqsafe - irqsafe version of cgroup_rstat_flush()
|
||||
- * @cgrp: target cgroup
|
||||
- *
|
||||
- * This function can be called from any context.
|
||||
- */
|
||||
-void cgroup_rstat_flush_irqsafe(struct cgroup *cgrp)
|
||||
-{
|
||||
- unsigned long flags;
|
||||
-
|
||||
- spin_lock_irqsave(&cgroup_rstat_lock, flags);
|
||||
- cgroup_rstat_flush_locked(cgrp, false);
|
||||
- spin_unlock_irqrestore(&cgroup_rstat_lock, flags);
|
||||
-}
|
||||
-
|
||||
-/**
|
||||
* cgroup_rstat_flush_begin - flush stats in @cgrp's subtree and hold
|
||||
* @cgrp: target cgroup
|
||||
*
|
||||
@@ -224,7 +209,7 @@ void cgroup_rstat_flush_irqsafe(struct c
|
||||
*
|
||||
* This function may block.
|
||||
*/
|
||||
-void cgroup_rstat_flush_hold(struct cgroup *cgrp)
|
||||
+static void cgroup_rstat_flush_hold(struct cgroup *cgrp)
|
||||
__acquires(&cgroup_rstat_lock)
|
||||
{
|
||||
might_sleep();
|
||||
@@ -235,7 +220,7 @@ void cgroup_rstat_flush_hold(struct cgro
|
||||
/**
|
||||
* cgroup_rstat_flush_release - release cgroup_rstat_flush_hold()
|
||||
*/
|
||||
-void cgroup_rstat_flush_release(void)
|
||||
+static void cgroup_rstat_flush_release(void)
|
||||
__releases(&cgroup_rstat_lock)
|
||||
{
|
||||
spin_unlock_irq(&cgroup_rstat_lock);
|
@ -0,0 +1,55 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Fri, 16 Aug 2019 12:25:35 +0200
|
||||
Subject: [PATCH 3/4] cgroup: Remove `may_sleep' from
|
||||
cgroup_rstat_flush_locked()
|
||||
|
||||
cgroup_rstat_flush_locked() is always invoked with `may_sleep' set to
|
||||
true so that this case can be made default and the parameter removed.
|
||||
|
||||
Remove the `may_sleep' parameter.
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
kernel/cgroup/rstat.c | 10 ++++------
|
||||
1 file changed, 4 insertions(+), 6 deletions(-)
|
||||
|
||||
--- a/kernel/cgroup/rstat.c
|
||||
+++ b/kernel/cgroup/rstat.c
|
||||
@@ -149,7 +149,7 @@ static struct cgroup *cgroup_rstat_cpu_p
|
||||
}
|
||||
|
||||
/* see cgroup_rstat_flush() */
|
||||
-static void cgroup_rstat_flush_locked(struct cgroup *cgrp, bool may_sleep)
|
||||
+static void cgroup_rstat_flush_locked(struct cgroup *cgrp)
|
||||
__releases(&cgroup_rstat_lock) __acquires(&cgroup_rstat_lock)
|
||||
{
|
||||
int cpu;
|
||||
@@ -167,9 +167,7 @@ static void cgroup_rstat_flush_locked(st
|
||||
|
||||
raw_spin_unlock(cpu_lock);
|
||||
|
||||
- /* if @may_sleep, play nice and yield if necessary */
|
||||
- if (may_sleep && (need_resched() ||
|
||||
- spin_needbreak(&cgroup_rstat_lock))) {
|
||||
+ if (need_resched() || spin_needbreak(&cgroup_rstat_lock)) {
|
||||
spin_unlock_irq(&cgroup_rstat_lock);
|
||||
if (!cond_resched())
|
||||
cpu_relax();
|
||||
@@ -196,7 +194,7 @@ void cgroup_rstat_flush(struct cgroup *c
|
||||
might_sleep();
|
||||
|
||||
spin_lock_irq(&cgroup_rstat_lock);
|
||||
- cgroup_rstat_flush_locked(cgrp, true);
|
||||
+ cgroup_rstat_flush_locked(cgrp);
|
||||
spin_unlock_irq(&cgroup_rstat_lock);
|
||||
}
|
||||
|
||||
@@ -214,7 +212,7 @@ static void cgroup_rstat_flush_hold(stru
|
||||
{
|
||||
might_sleep();
|
||||
spin_lock_irq(&cgroup_rstat_lock);
|
||||
- cgroup_rstat_flush_locked(cgrp, true);
|
||||
+ cgroup_rstat_flush_locked(cgrp);
|
||||
}
|
||||
|
||||
/**
|
@ -0,0 +1,71 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Fri, 16 Aug 2019 12:49:36 +0200
|
||||
Subject: [PATCH 4/4] cgroup: Acquire cgroup_rstat_lock with enabled interrupts
|
||||
|
||||
There is no need to disable interrupts while cgroup_rstat_lock is
|
||||
acquired. The lock is never used in-IRQ context so a simple spin_lock()
|
||||
is enough for synchronisation purpose.
|
||||
|
||||
Acquire cgroup_rstat_lock without disabling interrupts and ensure that
|
||||
cgroup_rstat_cpu_lock is acquired with disabled interrupts (this one is
|
||||
acquired in-IRQ context).
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
kernel/cgroup/rstat.c | 16 ++++++++--------
|
||||
1 file changed, 8 insertions(+), 8 deletions(-)
|
||||
|
||||
--- a/kernel/cgroup/rstat.c
|
||||
+++ b/kernel/cgroup/rstat.c
|
||||
@@ -161,17 +161,17 @@ static void cgroup_rstat_flush_locked(st
|
||||
cpu);
|
||||
struct cgroup *pos = NULL;
|
||||
|
||||
- raw_spin_lock(cpu_lock);
|
||||
+ raw_spin_lock_irq(cpu_lock);
|
||||
while ((pos = cgroup_rstat_cpu_pop_updated(pos, cgrp, cpu)))
|
||||
cgroup_base_stat_flush(pos, cpu);
|
||||
|
||||
- raw_spin_unlock(cpu_lock);
|
||||
+ raw_spin_unlock_irq(cpu_lock);
|
||||
|
||||
if (need_resched() || spin_needbreak(&cgroup_rstat_lock)) {
|
||||
- spin_unlock_irq(&cgroup_rstat_lock);
|
||||
+ spin_unlock(&cgroup_rstat_lock);
|
||||
if (!cond_resched())
|
||||
cpu_relax();
|
||||
- spin_lock_irq(&cgroup_rstat_lock);
|
||||
+ spin_lock(&cgroup_rstat_lock);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -193,9 +193,9 @@ void cgroup_rstat_flush(struct cgroup *c
|
||||
{
|
||||
might_sleep();
|
||||
|
||||
- spin_lock_irq(&cgroup_rstat_lock);
|
||||
+ spin_lock(&cgroup_rstat_lock);
|
||||
cgroup_rstat_flush_locked(cgrp);
|
||||
- spin_unlock_irq(&cgroup_rstat_lock);
|
||||
+ spin_unlock(&cgroup_rstat_lock);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -211,7 +211,7 @@ static void cgroup_rstat_flush_hold(stru
|
||||
__acquires(&cgroup_rstat_lock)
|
||||
{
|
||||
might_sleep();
|
||||
- spin_lock_irq(&cgroup_rstat_lock);
|
||||
+ spin_lock(&cgroup_rstat_lock);
|
||||
cgroup_rstat_flush_locked(cgrp);
|
||||
}
|
||||
|
||||
@@ -221,7 +221,7 @@ static void cgroup_rstat_flush_hold(stru
|
||||
static void cgroup_rstat_flush_release(void)
|
||||
__releases(&cgroup_rstat_lock)
|
||||
{
|
||||
- spin_unlock_irq(&cgroup_rstat_lock);
|
||||
+ spin_unlock(&cgroup_rstat_lock);
|
||||
}
|
||||
|
||||
int cgroup_rstat_init(struct cgroup *cgrp)
|
@ -0,0 +1,41 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Mon, 11 Feb 2019 10:40:46 +0100
|
||||
Subject: [PATCH] mm: workingset: replace IRQ-off check with a lockdep assert.
|
||||
|
||||
Commit
|
||||
|
||||
68d48e6a2df57 ("mm: workingset: add vmstat counter for shadow nodes")
|
||||
|
||||
introduced an IRQ-off check to ensure that a lock is held which also
|
||||
disabled interrupts. This does not work the same way on -RT because none
|
||||
of the locks, that are held, disable interrupts.
|
||||
Replace this check with a lockdep assert which ensures that the lock is
|
||||
held.
|
||||
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
mm/workingset.c | 5 ++++-
|
||||
1 file changed, 4 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/mm/workingset.c
|
||||
+++ b/mm/workingset.c
|
||||
@@ -367,6 +367,8 @@ static struct list_lru shadow_nodes;
|
||||
|
||||
void workingset_update_node(struct xa_node *node)
|
||||
{
|
||||
+ struct address_space *mapping;
|
||||
+
|
||||
/*
|
||||
* Track non-empty nodes that contain only shadow entries;
|
||||
* unlink those that contain pages or are being freed.
|
||||
@@ -375,7 +377,8 @@ void workingset_update_node(struct xa_no
|
||||
* already where they should be. The list_empty() test is safe
|
||||
* as node->private_list is protected by the i_pages lock.
|
||||
*/
|
||||
- VM_WARN_ON_ONCE(!irqs_disabled()); /* For __inc_lruvec_page_state */
|
||||
+ mapping = container_of(node->array, struct address_space, i_pages);
|
||||
+ lockdep_assert_held(&mapping->i_pages.xa_lock);
|
||||
|
||||
if (node->count && node->count == node->nr_values) {
|
||||
if (list_empty(&node->private_list)) {
|
@ -0,0 +1,28 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Mon, 11 Feb 2019 11:33:11 +0100
|
||||
Subject: [PATCH] tpm: remove tpm_dev_wq_lock
|
||||
|
||||
Added in commit
|
||||
|
||||
9e1b74a63f776 ("tpm: add support for nonblocking operation")
|
||||
|
||||
but never actually used it.
|
||||
|
||||
Cc: Philip Tricca <philip.b.tricca@intel.com>
|
||||
Cc: Tadeusz Struk <tadeusz.struk@intel.com>
|
||||
Cc: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
drivers/char/tpm/tpm-dev-common.c | 1 -
|
||||
1 file changed, 1 deletion(-)
|
||||
|
||||
--- a/drivers/char/tpm/tpm-dev-common.c
|
||||
+++ b/drivers/char/tpm/tpm-dev-common.c
|
||||
@@ -20,7 +20,6 @@
|
||||
#include "tpm-dev.h"
|
||||
|
||||
static struct workqueue_struct *tpm_dev_wq;
|
||||
-static DEFINE_MUTEX(tpm_dev_wq_lock);
|
||||
|
||||
static ssize_t tpm_dev_transmit(struct tpm_chip *chip, struct tpm_space *space,
|
||||
u8 *buf, size_t bufsiz)
|
@ -0,0 +1,314 @@
|
||||
From: Rob Herring <robh@kernel.org>
|
||||
Date: Wed, 11 Dec 2019 17:23:45 -0600
|
||||
Subject: [PATCH] of: Rework and simplify phandle cache to use a fixed size
|
||||
|
||||
The phandle cache was added to speed up of_find_node_by_phandle() by
|
||||
avoiding walking the whole DT to find a matching phandle. The
|
||||
implementation has several shortcomings:
|
||||
|
||||
- The cache is designed to work on a linear set of phandle values.
|
||||
This is true for dtc generated DTs, but not for other cases such as
|
||||
Power.
|
||||
- The cache isn't enabled until of_core_init() and a typical system
|
||||
may see hundreds of calls to of_find_node_by_phandle() before that
|
||||
point.
|
||||
- The cache is freed and re-allocated when the number of phandles
|
||||
changes.
|
||||
- It takes a raw spinlock around a memory allocation which breaks on
|
||||
RT.
|
||||
|
||||
Change the implementation to a fixed size and use hash_32() as the
|
||||
cache index. This greatly simplifies the implementation. It avoids
|
||||
the need for any re-alloc of the cache and taking a reference on nodes
|
||||
in the cache. We only have a single source of removing cache entries
|
||||
which is of_detach_node().
|
||||
|
||||
Using hash_32() removes any assumption on phandle values improving
|
||||
the hit rate for non-linear phandle values. The effect on linear values
|
||||
using hash_32() is about a 10% collision. The chances of thrashing on
|
||||
colliding values seems to be low.
|
||||
|
||||
To compare performance, I used a RK3399 board which is a pretty typical
|
||||
system. I found that just measuring boot time as done previously is
|
||||
noisy and may be impacted by other things. Also bringing up secondary
|
||||
cores causes some issues with measuring, so I booted with 'nr_cpus=1'.
|
||||
With no caching, calls to of_find_node_by_phandle() take about 20124 us
|
||||
for 1248 calls. There's an additional 288 calls before time keeping is
|
||||
up. Using the average time per hit/miss with the cache, we can calculate
|
||||
these calls to take 690 us (277 hit / 11 miss) with a 128 entry cache
|
||||
and 13319 us with no cache or an uninitialized cache.
|
||||
|
||||
Comparing the 3 implementations the time spent in
|
||||
of_find_node_by_phandle() is:
|
||||
|
||||
no cache: 20124 us (+ 13319 us)
|
||||
128 entry cache: 5134 us (+ 690 us)
|
||||
current cache: 819 us (+ 13319 us)
|
||||
|
||||
We could move the allocation of the cache earlier to improve the
|
||||
current cache, but that just further complicates the situation as it
|
||||
needs to be after slab is up, so we can't do it when unflattening (which
|
||||
uses memblock).
|
||||
|
||||
Reported-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Cc: Michael Ellerman <mpe@ellerman.id.au>
|
||||
Cc: Segher Boessenkool <segher@kernel.crashing.org>
|
||||
Cc: Frank Rowand <frowand.list@gmail.com>
|
||||
Signed-off-by: Rob Herring <robh@kernel.org>
|
||||
Link: https://lkml.kernel.org/r/20191211232345.24810-1-robh@kernel.org
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
drivers/of/base.c | 133 +++++++++---------------------------------------
|
||||
drivers/of/dynamic.c | 2
|
||||
drivers/of/of_private.h | 4 -
|
||||
drivers/of/overlay.c | 10 ---
|
||||
4 files changed, 28 insertions(+), 121 deletions(-)
|
||||
|
||||
--- a/drivers/of/base.c
|
||||
+++ b/drivers/of/base.c
|
||||
@@ -123,115 +123,38 @@ int __weak of_node_to_nid(struct device_
|
||||
}
|
||||
#endif
|
||||
|
||||
-/*
|
||||
- * Assumptions behind phandle_cache implementation:
|
||||
- * - phandle property values are in a contiguous range of 1..n
|
||||
- *
|
||||
- * If the assumptions do not hold, then
|
||||
- * - the phandle lookup overhead reduction provided by the cache
|
||||
- * will likely be less
|
||||
- */
|
||||
+#define OF_PHANDLE_CACHE_BITS 7
|
||||
+#define OF_PHANDLE_CACHE_SZ BIT(OF_PHANDLE_CACHE_BITS)
|
||||
|
||||
-static struct device_node **phandle_cache;
|
||||
-static u32 phandle_cache_mask;
|
||||
+static struct device_node *phandle_cache[OF_PHANDLE_CACHE_SZ];
|
||||
|
||||
-/*
|
||||
- * Caller must hold devtree_lock.
|
||||
- */
|
||||
-static void __of_free_phandle_cache(void)
|
||||
+static u32 of_phandle_cache_hash(phandle handle)
|
||||
{
|
||||
- u32 cache_entries = phandle_cache_mask + 1;
|
||||
- u32 k;
|
||||
-
|
||||
- if (!phandle_cache)
|
||||
- return;
|
||||
-
|
||||
- for (k = 0; k < cache_entries; k++)
|
||||
- of_node_put(phandle_cache[k]);
|
||||
-
|
||||
- kfree(phandle_cache);
|
||||
- phandle_cache = NULL;
|
||||
+ return hash_32(handle, OF_PHANDLE_CACHE_BITS);
|
||||
}
|
||||
|
||||
-int of_free_phandle_cache(void)
|
||||
-{
|
||||
- unsigned long flags;
|
||||
-
|
||||
- raw_spin_lock_irqsave(&devtree_lock, flags);
|
||||
-
|
||||
- __of_free_phandle_cache();
|
||||
-
|
||||
- raw_spin_unlock_irqrestore(&devtree_lock, flags);
|
||||
-
|
||||
- return 0;
|
||||
-}
|
||||
-#if !defined(CONFIG_MODULES)
|
||||
-late_initcall_sync(of_free_phandle_cache);
|
||||
-#endif
|
||||
-
|
||||
/*
|
||||
* Caller must hold devtree_lock.
|
||||
*/
|
||||
-void __of_free_phandle_cache_entry(phandle handle)
|
||||
+void __of_phandle_cache_inv_entry(phandle handle)
|
||||
{
|
||||
- phandle masked_handle;
|
||||
+ u32 handle_hash;
|
||||
struct device_node *np;
|
||||
|
||||
if (!handle)
|
||||
return;
|
||||
|
||||
- masked_handle = handle & phandle_cache_mask;
|
||||
-
|
||||
- if (phandle_cache) {
|
||||
- np = phandle_cache[masked_handle];
|
||||
- if (np && handle == np->phandle) {
|
||||
- of_node_put(np);
|
||||
- phandle_cache[masked_handle] = NULL;
|
||||
- }
|
||||
- }
|
||||
-}
|
||||
-
|
||||
-void of_populate_phandle_cache(void)
|
||||
-{
|
||||
- unsigned long flags;
|
||||
- u32 cache_entries;
|
||||
- struct device_node *np;
|
||||
- u32 phandles = 0;
|
||||
-
|
||||
- raw_spin_lock_irqsave(&devtree_lock, flags);
|
||||
-
|
||||
- __of_free_phandle_cache();
|
||||
+ handle_hash = of_phandle_cache_hash(handle);
|
||||
|
||||
- for_each_of_allnodes(np)
|
||||
- if (np->phandle && np->phandle != OF_PHANDLE_ILLEGAL)
|
||||
- phandles++;
|
||||
-
|
||||
- if (!phandles)
|
||||
- goto out;
|
||||
-
|
||||
- cache_entries = roundup_pow_of_two(phandles);
|
||||
- phandle_cache_mask = cache_entries - 1;
|
||||
-
|
||||
- phandle_cache = kcalloc(cache_entries, sizeof(*phandle_cache),
|
||||
- GFP_ATOMIC);
|
||||
- if (!phandle_cache)
|
||||
- goto out;
|
||||
-
|
||||
- for_each_of_allnodes(np)
|
||||
- if (np->phandle && np->phandle != OF_PHANDLE_ILLEGAL) {
|
||||
- of_node_get(np);
|
||||
- phandle_cache[np->phandle & phandle_cache_mask] = np;
|
||||
- }
|
||||
-
|
||||
-out:
|
||||
- raw_spin_unlock_irqrestore(&devtree_lock, flags);
|
||||
+ np = phandle_cache[handle_hash];
|
||||
+ if (np && handle == np->phandle)
|
||||
+ phandle_cache[handle_hash] = NULL;
|
||||
}
|
||||
|
||||
void __init of_core_init(void)
|
||||
{
|
||||
struct device_node *np;
|
||||
|
||||
- of_populate_phandle_cache();
|
||||
|
||||
/* Create the kset, and register existing nodes */
|
||||
mutex_lock(&of_mutex);
|
||||
@@ -241,8 +164,11 @@ void __init of_core_init(void)
|
||||
pr_err("failed to register existing nodes\n");
|
||||
return;
|
||||
}
|
||||
- for_each_of_allnodes(np)
|
||||
+ for_each_of_allnodes(np) {
|
||||
__of_attach_node_sysfs(np);
|
||||
+ if (np->phandle && !phandle_cache[of_phandle_cache_hash(np->phandle)])
|
||||
+ phandle_cache[of_phandle_cache_hash(np->phandle)] = np;
|
||||
+ }
|
||||
mutex_unlock(&of_mutex);
|
||||
|
||||
/* Symlink in /proc as required by userspace ABI */
|
||||
@@ -1223,36 +1149,29 @@ struct device_node *of_find_node_by_phan
|
||||
{
|
||||
struct device_node *np = NULL;
|
||||
unsigned long flags;
|
||||
- phandle masked_handle;
|
||||
+ u32 handle_hash;
|
||||
|
||||
if (!handle)
|
||||
return NULL;
|
||||
|
||||
- raw_spin_lock_irqsave(&devtree_lock, flags);
|
||||
+ handle_hash = of_phandle_cache_hash(handle);
|
||||
|
||||
- masked_handle = handle & phandle_cache_mask;
|
||||
+ raw_spin_lock_irqsave(&devtree_lock, flags);
|
||||
|
||||
- if (phandle_cache) {
|
||||
- if (phandle_cache[masked_handle] &&
|
||||
- handle == phandle_cache[masked_handle]->phandle)
|
||||
- np = phandle_cache[masked_handle];
|
||||
- if (np && of_node_check_flag(np, OF_DETACHED)) {
|
||||
- WARN_ON(1); /* did not uncache np on node removal */
|
||||
- of_node_put(np);
|
||||
- phandle_cache[masked_handle] = NULL;
|
||||
- np = NULL;
|
||||
- }
|
||||
+ if (phandle_cache[handle_hash] &&
|
||||
+ handle == phandle_cache[handle_hash]->phandle)
|
||||
+ np = phandle_cache[handle_hash];
|
||||
+ if (np && of_node_check_flag(np, OF_DETACHED)) {
|
||||
+ WARN_ON(1); /* did not uncache np on node removal */
|
||||
+ phandle_cache[handle_hash] = NULL;
|
||||
+ np = NULL;
|
||||
}
|
||||
|
||||
if (!np) {
|
||||
for_each_of_allnodes(np)
|
||||
if (np->phandle == handle &&
|
||||
!of_node_check_flag(np, OF_DETACHED)) {
|
||||
- if (phandle_cache) {
|
||||
- /* will put when removed from cache */
|
||||
- of_node_get(np);
|
||||
- phandle_cache[masked_handle] = np;
|
||||
- }
|
||||
+ phandle_cache[handle_hash] = np;
|
||||
break;
|
||||
}
|
||||
}
|
||||
--- a/drivers/of/dynamic.c
|
||||
+++ b/drivers/of/dynamic.c
|
||||
@@ -276,7 +276,7 @@ void __of_detach_node(struct device_node
|
||||
of_node_set_flag(np, OF_DETACHED);
|
||||
|
||||
/* race with of_find_node_by_phandle() prevented by devtree_lock */
|
||||
- __of_free_phandle_cache_entry(np->phandle);
|
||||
+ __of_phandle_cache_inv_entry(np->phandle);
|
||||
}
|
||||
|
||||
/**
|
||||
--- a/drivers/of/of_private.h
|
||||
+++ b/drivers/of/of_private.h
|
||||
@@ -85,14 +85,12 @@ int of_resolve_phandles(struct device_no
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_OF_DYNAMIC)
|
||||
-void __of_free_phandle_cache_entry(phandle handle);
|
||||
+void __of_phandle_cache_inv_entry(phandle handle);
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_OF_OVERLAY)
|
||||
void of_overlay_mutex_lock(void);
|
||||
void of_overlay_mutex_unlock(void);
|
||||
-int of_free_phandle_cache(void);
|
||||
-void of_populate_phandle_cache(void);
|
||||
#else
|
||||
static inline void of_overlay_mutex_lock(void) {};
|
||||
static inline void of_overlay_mutex_unlock(void) {};
|
||||
--- a/drivers/of/overlay.c
|
||||
+++ b/drivers/of/overlay.c
|
||||
@@ -974,8 +974,6 @@ static int of_overlay_apply(const void *
|
||||
goto err_free_overlay_changeset;
|
||||
}
|
||||
|
||||
- of_populate_phandle_cache();
|
||||
-
|
||||
ret = __of_changeset_apply_notify(&ovcs->cset);
|
||||
if (ret)
|
||||
pr_err("overlay apply changeset entry notify error %d\n", ret);
|
||||
@@ -1218,17 +1216,9 @@ int of_overlay_remove(int *ovcs_id)
|
||||
|
||||
list_del(&ovcs->ovcs_list);
|
||||
|
||||
- /*
|
||||
- * Disable phandle cache. Avoids race condition that would arise
|
||||
- * from removing cache entry when the associated node is deleted.
|
||||
- */
|
||||
- of_free_phandle_cache();
|
||||
-
|
||||
ret_apply = 0;
|
||||
ret = __of_changeset_revert_entries(&ovcs->cset, &ret_apply);
|
||||
|
||||
- of_populate_phandle_cache();
|
||||
-
|
||||
if (ret) {
|
||||
if (ret_apply)
|
||||
devicetree_state_flags |= DTSF_REVERT_FAIL;
|
@ -0,0 +1,43 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Fri, 6 Mar 2020 15:59:06 +0100
|
||||
Subject: [PATCH] mm: Warn on memory allocation in non-preemptible context on
|
||||
RT
|
||||
|
||||
The memory allocation via kmalloc(, GFP_ATOMIC) in atomic context
|
||||
(disabled preemption or interrupts) is not allowed on RT because the
|
||||
buddy allocator is using sleeping locks which can't be acquired in this
|
||||
context.
|
||||
Such an an allocation may not trigger a warning in the buddy allocator
|
||||
if it is always satisfied in the SLUB allocator.
|
||||
|
||||
Add a warning on RT if a memory allocation was attempted in not
|
||||
preemptible region.
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Reviewed-by: Daniel Bristot de Oliveira <bristot@redhat.com>
|
||||
---
|
||||
mm/slub.c | 6 ++++++
|
||||
1 file changed, 6 insertions(+)
|
||||
|
||||
--- a/mm/slub.c
|
||||
+++ b/mm/slub.c
|
||||
@@ -2687,6 +2687,9 @@ static __always_inline void *slab_alloc_
|
||||
struct page *page;
|
||||
unsigned long tid;
|
||||
|
||||
+ if (IS_ENABLED(CONFIG_PREEMPT_RT) && IS_ENABLED(CONFIG_DEBUG_ATOMIC_SLEEP))
|
||||
+ WARN_ON_ONCE(!preemptible() && system_state >= SYSTEM_SCHEDULING);
|
||||
+
|
||||
s = slab_pre_alloc_hook(s, gfpflags);
|
||||
if (!s)
|
||||
return NULL;
|
||||
@@ -3148,6 +3151,9 @@ int kmem_cache_alloc_bulk(struct kmem_ca
|
||||
struct kmem_cache_cpu *c;
|
||||
int i;
|
||||
|
||||
+ if (IS_ENABLED(CONFIG_PREEMPT_RT) && IS_ENABLED(CONFIG_DEBUG_ATOMIC_SLEEP))
|
||||
+ WARN_ON_ONCE(!preemptible() && system_state >= SYSTEM_SCHEDULING);
|
||||
+
|
||||
/* memcg and kmem_cache debug support */
|
||||
s = slab_pre_alloc_hook(s, flags);
|
||||
if (unlikely(!s))
|
@ -0,0 +1,156 @@
|
||||
Subject: timekeeping: Split jiffies seqlock
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Thu, 14 Feb 2013 22:36:59 +0100
|
||||
|
||||
Replace jiffies_lock seqlock with a simple seqcounter and a rawlock so
|
||||
it can be taken in atomic context on RT.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
kernel/time/jiffies.c | 7 ++++---
|
||||
kernel/time/tick-common.c | 10 ++++++----
|
||||
kernel/time/tick-sched.c | 19 ++++++++++++-------
|
||||
kernel/time/timekeeping.c | 6 ++++--
|
||||
kernel/time/timekeeping.h | 3 ++-
|
||||
5 files changed, 28 insertions(+), 17 deletions(-)
|
||||
|
||||
--- a/kernel/time/jiffies.c
|
||||
+++ b/kernel/time/jiffies.c
|
||||
@@ -58,7 +58,8 @@ static struct clocksource clocksource_ji
|
||||
.max_cycles = 10,
|
||||
};
|
||||
|
||||
-__cacheline_aligned_in_smp DEFINE_SEQLOCK(jiffies_lock);
|
||||
+__cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(jiffies_lock);
|
||||
+__cacheline_aligned_in_smp seqcount_t jiffies_seq;
|
||||
|
||||
#if (BITS_PER_LONG < 64)
|
||||
u64 get_jiffies_64(void)
|
||||
@@ -67,9 +68,9 @@ u64 get_jiffies_64(void)
|
||||
u64 ret;
|
||||
|
||||
do {
|
||||
- seq = read_seqbegin(&jiffies_lock);
|
||||
+ seq = read_seqcount_begin(&jiffies_seq);
|
||||
ret = jiffies_64;
|
||||
- } while (read_seqretry(&jiffies_lock, seq));
|
||||
+ } while (read_seqcount_retry(&jiffies_seq, seq));
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(get_jiffies_64);
|
||||
--- a/kernel/time/tick-common.c
|
||||
+++ b/kernel/time/tick-common.c
|
||||
@@ -83,13 +83,15 @@ int tick_is_oneshot_available(void)
|
||||
static void tick_periodic(int cpu)
|
||||
{
|
||||
if (tick_do_timer_cpu == cpu) {
|
||||
- write_seqlock(&jiffies_lock);
|
||||
+ raw_spin_lock(&jiffies_lock);
|
||||
+ write_seqcount_begin(&jiffies_seq);
|
||||
|
||||
/* Keep track of the next tick event */
|
||||
tick_next_period = ktime_add(tick_next_period, tick_period);
|
||||
|
||||
do_timer(1);
|
||||
- write_sequnlock(&jiffies_lock);
|
||||
+ write_seqcount_end(&jiffies_seq);
|
||||
+ raw_spin_unlock(&jiffies_lock);
|
||||
update_wall_time();
|
||||
}
|
||||
|
||||
@@ -161,9 +163,9 @@ void tick_setup_periodic(struct clock_ev
|
||||
ktime_t next;
|
||||
|
||||
do {
|
||||
- seq = read_seqbegin(&jiffies_lock);
|
||||
+ seq = read_seqcount_begin(&jiffies_seq);
|
||||
next = tick_next_period;
|
||||
- } while (read_seqretry(&jiffies_lock, seq));
|
||||
+ } while (read_seqcount_retry(&jiffies_seq, seq));
|
||||
|
||||
clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT);
|
||||
|
||||
--- a/kernel/time/tick-sched.c
|
||||
+++ b/kernel/time/tick-sched.c
|
||||
@@ -65,7 +65,8 @@ static void tick_do_update_jiffies64(kti
|
||||
return;
|
||||
|
||||
/* Reevaluate with jiffies_lock held */
|
||||
- write_seqlock(&jiffies_lock);
|
||||
+ raw_spin_lock(&jiffies_lock);
|
||||
+ write_seqcount_begin(&jiffies_seq);
|
||||
|
||||
delta = ktime_sub(now, last_jiffies_update);
|
||||
if (delta >= tick_period) {
|
||||
@@ -91,10 +92,12 @@ static void tick_do_update_jiffies64(kti
|
||||
/* Keep the tick_next_period variable up to date */
|
||||
tick_next_period = ktime_add(last_jiffies_update, tick_period);
|
||||
} else {
|
||||
- write_sequnlock(&jiffies_lock);
|
||||
+ write_seqcount_end(&jiffies_seq);
|
||||
+ raw_spin_unlock(&jiffies_lock);
|
||||
return;
|
||||
}
|
||||
- write_sequnlock(&jiffies_lock);
|
||||
+ write_seqcount_end(&jiffies_seq);
|
||||
+ raw_spin_unlock(&jiffies_lock);
|
||||
update_wall_time();
|
||||
}
|
||||
|
||||
@@ -105,12 +108,14 @@ static ktime_t tick_init_jiffy_update(vo
|
||||
{
|
||||
ktime_t period;
|
||||
|
||||
- write_seqlock(&jiffies_lock);
|
||||
+ raw_spin_lock(&jiffies_lock);
|
||||
+ write_seqcount_begin(&jiffies_seq);
|
||||
/* Did we start the jiffies update yet ? */
|
||||
if (last_jiffies_update == 0)
|
||||
last_jiffies_update = tick_next_period;
|
||||
period = last_jiffies_update;
|
||||
- write_sequnlock(&jiffies_lock);
|
||||
+ write_seqcount_end(&jiffies_seq);
|
||||
+ raw_spin_unlock(&jiffies_lock);
|
||||
return period;
|
||||
}
|
||||
|
||||
@@ -665,10 +670,10 @@ static ktime_t tick_nohz_next_event(stru
|
||||
|
||||
/* Read jiffies and the time when jiffies were updated last */
|
||||
do {
|
||||
- seq = read_seqbegin(&jiffies_lock);
|
||||
+ seq = read_seqcount_begin(&jiffies_seq);
|
||||
basemono = last_jiffies_update;
|
||||
basejiff = jiffies;
|
||||
- } while (read_seqretry(&jiffies_lock, seq));
|
||||
+ } while (read_seqcount_retry(&jiffies_seq, seq));
|
||||
ts->last_jiffies = basejiff;
|
||||
ts->timer_expires_base = basemono;
|
||||
|
||||
--- a/kernel/time/timekeeping.c
|
||||
+++ b/kernel/time/timekeeping.c
|
||||
@@ -2397,8 +2397,10 @@ EXPORT_SYMBOL(hardpps);
|
||||
*/
|
||||
void xtime_update(unsigned long ticks)
|
||||
{
|
||||
- write_seqlock(&jiffies_lock);
|
||||
+ raw_spin_lock(&jiffies_lock);
|
||||
+ write_seqcount_begin(&jiffies_seq);
|
||||
do_timer(ticks);
|
||||
- write_sequnlock(&jiffies_lock);
|
||||
+ write_seqcount_end(&jiffies_seq);
|
||||
+ raw_spin_unlock(&jiffies_lock);
|
||||
update_wall_time();
|
||||
}
|
||||
--- a/kernel/time/timekeeping.h
|
||||
+++ b/kernel/time/timekeeping.h
|
||||
@@ -25,7 +25,8 @@ static inline void sched_clock_resume(vo
|
||||
extern void do_timer(unsigned long ticks);
|
||||
extern void update_wall_time(void);
|
||||
|
||||
-extern seqlock_t jiffies_lock;
|
||||
+extern raw_spinlock_t jiffies_lock;
|
||||
+extern seqcount_t jiffies_seq;
|
||||
|
||||
#define CS_NAME_LEN 32
|
||||
|
@ -0,0 +1,32 @@
|
||||
Subject: signal: Revert ptrace preempt magic
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Wed, 21 Sep 2011 19:57:12 +0200
|
||||
|
||||
Upstream commit '53da1d9456fe7f8 fix ptrace slowness' is nothing more
|
||||
than a bandaid around the ptrace design trainwreck. It's not a
|
||||
correctness issue, it's merily a cosmetic bandaid.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
kernel/signal.c | 8 --------
|
||||
1 file changed, 8 deletions(-)
|
||||
|
||||
--- a/kernel/signal.c
|
||||
+++ b/kernel/signal.c
|
||||
@@ -2202,16 +2202,8 @@ static void ptrace_stop(int exit_code, i
|
||||
if (gstop_done && ptrace_reparented(current))
|
||||
do_notify_parent_cldstop(current, false, why);
|
||||
|
||||
- /*
|
||||
- * Don't want to allow preemption here, because
|
||||
- * sys_ptrace() needs this task to be inactive.
|
||||
- *
|
||||
- * XXX: implement read_unlock_no_resched().
|
||||
- */
|
||||
- preempt_disable();
|
||||
read_unlock(&tasklist_lock);
|
||||
cgroup_enter_frozen();
|
||||
- preempt_enable_no_resched();
|
||||
freezable_schedule();
|
||||
cgroup_leave_frozen(true);
|
||||
} else {
|
@ -0,0 +1,261 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Wed, 14 Aug 2019 16:38:43 +0200
|
||||
Subject: [PATCH] dma-buf: Use seqlock_t instread disabling preemption
|
||||
|
||||
"dma reservation" disables preemption while acquiring the write access
|
||||
for "seqcount".
|
||||
|
||||
Replace the seqcount with a seqlock_t which provides seqcount like
|
||||
semantic and lock for writer.
|
||||
|
||||
Link: https://lkml.kernel.org/r/f410b429-db86-f81c-7c67-f563fa808b62@free.fr
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
drivers/dma-buf/dma-buf.c | 8 ++--
|
||||
drivers/dma-buf/dma-resv.c | 45 ++++++++---------------
|
||||
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 6 +--
|
||||
drivers/gpu/drm/i915/gem/i915_gem_busy.c | 6 +--
|
||||
include/linux/dma-resv.h | 4 +-
|
||||
5 files changed, 27 insertions(+), 42 deletions(-)
|
||||
|
||||
--- a/drivers/dma-buf/dma-buf.c
|
||||
+++ b/drivers/dma-buf/dma-buf.c
|
||||
@@ -215,7 +215,7 @@ static __poll_t dma_buf_poll(struct file
|
||||
return 0;
|
||||
|
||||
retry:
|
||||
- seq = read_seqcount_begin(&resv->seq);
|
||||
+ seq = read_seqbegin(&resv->seq);
|
||||
rcu_read_lock();
|
||||
|
||||
fobj = rcu_dereference(resv->fence);
|
||||
@@ -224,7 +224,7 @@ static __poll_t dma_buf_poll(struct file
|
||||
else
|
||||
shared_count = 0;
|
||||
fence_excl = rcu_dereference(resv->fence_excl);
|
||||
- if (read_seqcount_retry(&resv->seq, seq)) {
|
||||
+ if (read_seqretry(&resv->seq, seq)) {
|
||||
rcu_read_unlock();
|
||||
goto retry;
|
||||
}
|
||||
@@ -1190,12 +1190,12 @@ static int dma_buf_debug_show(struct seq
|
||||
|
||||
robj = buf_obj->resv;
|
||||
while (true) {
|
||||
- seq = read_seqcount_begin(&robj->seq);
|
||||
+ seq = read_seqbegin(&robj->seq);
|
||||
rcu_read_lock();
|
||||
fobj = rcu_dereference(robj->fence);
|
||||
shared_count = fobj ? fobj->shared_count : 0;
|
||||
fence = rcu_dereference(robj->fence_excl);
|
||||
- if (!read_seqcount_retry(&robj->seq, seq))
|
||||
+ if (!read_seqretry(&robj->seq, seq))
|
||||
break;
|
||||
rcu_read_unlock();
|
||||
}
|
||||
--- a/drivers/dma-buf/dma-resv.c
|
||||
+++ b/drivers/dma-buf/dma-resv.c
|
||||
@@ -49,12 +49,6 @@
|
||||
DEFINE_WD_CLASS(reservation_ww_class);
|
||||
EXPORT_SYMBOL(reservation_ww_class);
|
||||
|
||||
-struct lock_class_key reservation_seqcount_class;
|
||||
-EXPORT_SYMBOL(reservation_seqcount_class);
|
||||
-
|
||||
-const char reservation_seqcount_string[] = "reservation_seqcount";
|
||||
-EXPORT_SYMBOL(reservation_seqcount_string);
|
||||
-
|
||||
/**
|
||||
* dma_resv_list_alloc - allocate fence list
|
||||
* @shared_max: number of fences we need space for
|
||||
@@ -103,8 +97,7 @@ void dma_resv_init(struct dma_resv *obj)
|
||||
{
|
||||
ww_mutex_init(&obj->lock, &reservation_ww_class);
|
||||
|
||||
- __seqcount_init(&obj->seq, reservation_seqcount_string,
|
||||
- &reservation_seqcount_class);
|
||||
+ seqlock_init(&obj->seq);
|
||||
RCU_INIT_POINTER(obj->fence, NULL);
|
||||
RCU_INIT_POINTER(obj->fence_excl, NULL);
|
||||
}
|
||||
@@ -234,8 +227,7 @@ void dma_resv_add_shared_fence(struct dm
|
||||
fobj = dma_resv_get_list(obj);
|
||||
count = fobj->shared_count;
|
||||
|
||||
- preempt_disable();
|
||||
- write_seqcount_begin(&obj->seq);
|
||||
+ write_seqlock(&obj->seq);
|
||||
|
||||
for (i = 0; i < count; ++i) {
|
||||
|
||||
@@ -255,8 +247,7 @@ void dma_resv_add_shared_fence(struct dm
|
||||
/* pointer update must be visible before we extend the shared_count */
|
||||
smp_store_mb(fobj->shared_count, count);
|
||||
|
||||
- write_seqcount_end(&obj->seq);
|
||||
- preempt_enable();
|
||||
+ write_sequnlock(&obj->seq);
|
||||
dma_fence_put(old);
|
||||
}
|
||||
EXPORT_SYMBOL(dma_resv_add_shared_fence);
|
||||
@@ -283,14 +274,12 @@ void dma_resv_add_excl_fence(struct dma_
|
||||
if (fence)
|
||||
dma_fence_get(fence);
|
||||
|
||||
- preempt_disable();
|
||||
- write_seqcount_begin(&obj->seq);
|
||||
- /* write_seqcount_begin provides the necessary memory barrier */
|
||||
+ write_seqlock(&obj->seq);
|
||||
+ /* write_seqlock provides the necessary memory barrier */
|
||||
RCU_INIT_POINTER(obj->fence_excl, fence);
|
||||
if (old)
|
||||
old->shared_count = 0;
|
||||
- write_seqcount_end(&obj->seq);
|
||||
- preempt_enable();
|
||||
+ write_sequnlock(&obj->seq);
|
||||
|
||||
/* inplace update, no shared fences */
|
||||
while (i--)
|
||||
@@ -368,13 +357,11 @@ int dma_resv_copy_fences(struct dma_resv
|
||||
src_list = dma_resv_get_list(dst);
|
||||
old = dma_resv_get_excl(dst);
|
||||
|
||||
- preempt_disable();
|
||||
- write_seqcount_begin(&dst->seq);
|
||||
- /* write_seqcount_begin provides the necessary memory barrier */
|
||||
+ write_seqlock(&dst->seq);
|
||||
+ /* write_seqlock provides the necessary memory barrier */
|
||||
RCU_INIT_POINTER(dst->fence_excl, new);
|
||||
RCU_INIT_POINTER(dst->fence, dst_list);
|
||||
- write_seqcount_end(&dst->seq);
|
||||
- preempt_enable();
|
||||
+ write_sequnlock(&dst->seq);
|
||||
|
||||
dma_resv_list_free(src_list);
|
||||
dma_fence_put(old);
|
||||
@@ -414,7 +401,7 @@ int dma_resv_get_fences_rcu(struct dma_r
|
||||
shared_count = i = 0;
|
||||
|
||||
rcu_read_lock();
|
||||
- seq = read_seqcount_begin(&obj->seq);
|
||||
+ seq = read_seqbegin(&obj->seq);
|
||||
|
||||
fence_excl = rcu_dereference(obj->fence_excl);
|
||||
if (fence_excl && !dma_fence_get_rcu(fence_excl))
|
||||
@@ -456,7 +443,7 @@ int dma_resv_get_fences_rcu(struct dma_r
|
||||
}
|
||||
}
|
||||
|
||||
- if (i != shared_count || read_seqcount_retry(&obj->seq, seq)) {
|
||||
+ if (i != shared_count || read_seqretry(&obj->seq, seq)) {
|
||||
while (i--)
|
||||
dma_fence_put(shared[i]);
|
||||
dma_fence_put(fence_excl);
|
||||
@@ -507,7 +494,7 @@ long dma_resv_wait_timeout_rcu(struct dm
|
||||
|
||||
retry:
|
||||
shared_count = 0;
|
||||
- seq = read_seqcount_begin(&obj->seq);
|
||||
+ seq = read_seqbegin(&obj->seq);
|
||||
rcu_read_lock();
|
||||
i = -1;
|
||||
|
||||
@@ -553,7 +540,7 @@ long dma_resv_wait_timeout_rcu(struct dm
|
||||
|
||||
rcu_read_unlock();
|
||||
if (fence) {
|
||||
- if (read_seqcount_retry(&obj->seq, seq)) {
|
||||
+ if (read_seqretry(&obj->seq, seq)) {
|
||||
dma_fence_put(fence);
|
||||
goto retry;
|
||||
}
|
||||
@@ -607,7 +594,7 @@ bool dma_resv_test_signaled_rcu(struct d
|
||||
retry:
|
||||
ret = true;
|
||||
shared_count = 0;
|
||||
- seq = read_seqcount_begin(&obj->seq);
|
||||
+ seq = read_seqbegin(&obj->seq);
|
||||
|
||||
if (test_all) {
|
||||
unsigned i;
|
||||
@@ -627,7 +614,7 @@ bool dma_resv_test_signaled_rcu(struct d
|
||||
break;
|
||||
}
|
||||
|
||||
- if (read_seqcount_retry(&obj->seq, seq))
|
||||
+ if (read_seqretry(&obj->seq, seq))
|
||||
goto retry;
|
||||
}
|
||||
|
||||
@@ -639,7 +626,7 @@ bool dma_resv_test_signaled_rcu(struct d
|
||||
if (ret < 0)
|
||||
goto retry;
|
||||
|
||||
- if (read_seqcount_retry(&obj->seq, seq))
|
||||
+ if (read_seqretry(&obj->seq, seq))
|
||||
goto retry;
|
||||
}
|
||||
}
|
||||
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
|
||||
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
|
||||
@@ -252,11 +252,9 @@ static int amdgpu_amdkfd_remove_eviction
|
||||
new->shared_count = k;
|
||||
|
||||
/* Install the new fence list, seqcount provides the barriers */
|
||||
- preempt_disable();
|
||||
- write_seqcount_begin(&resv->seq);
|
||||
+ write_seqlock(&resv->seq);
|
||||
RCU_INIT_POINTER(resv->fence, new);
|
||||
- write_seqcount_end(&resv->seq);
|
||||
- preempt_enable();
|
||||
+ write_sequnlock(&resv->seq);
|
||||
|
||||
/* Drop the references to the removed fences or move them to ef_list */
|
||||
for (i = j, k = 0; i < old->shared_count; ++i) {
|
||||
--- a/drivers/gpu/drm/i915/gem/i915_gem_busy.c
|
||||
+++ b/drivers/gpu/drm/i915/gem/i915_gem_busy.c
|
||||
@@ -75,7 +75,6 @@ busy_check_writer(const struct dma_fence
|
||||
|
||||
return __busy_set_if_active(fence, __busy_write_id);
|
||||
}
|
||||
-
|
||||
int
|
||||
i915_gem_busy_ioctl(struct drm_device *dev, void *data,
|
||||
struct drm_file *file)
|
||||
@@ -110,7 +109,8 @@ i915_gem_busy_ioctl(struct drm_device *d
|
||||
*
|
||||
*/
|
||||
retry:
|
||||
- seq = raw_read_seqcount(&obj->base.resv->seq);
|
||||
+ /* XXX raw_read_seqcount() does not wait for the WRTIE to finish */
|
||||
+ seq = read_seqbegin(&obj->base.resv->seq);
|
||||
|
||||
/* Translate the exclusive fence to the READ *and* WRITE engine */
|
||||
args->busy =
|
||||
@@ -129,7 +129,7 @@ i915_gem_busy_ioctl(struct drm_device *d
|
||||
}
|
||||
}
|
||||
|
||||
- if (args->busy && read_seqcount_retry(&obj->base.resv->seq, seq))
|
||||
+ if (args->busy && read_seqretry(&obj->base.resv->seq, seq))
|
||||
goto retry;
|
||||
|
||||
err = 0;
|
||||
--- a/include/linux/dma-resv.h
|
||||
+++ b/include/linux/dma-resv.h
|
||||
@@ -65,13 +65,13 @@ struct dma_resv_list {
|
||||
/**
|
||||
* struct dma_resv - a reservation object manages fences for a buffer
|
||||
* @lock: update side lock
|
||||
- * @seq: sequence count for managing RCU read-side synchronization
|
||||
+ * @seq: sequence lock for managing RCU read-side synchronization
|
||||
* @fence_excl: the exclusive fence, if there is one currently
|
||||
* @fence: list of current shared fences
|
||||
*/
|
||||
struct dma_resv {
|
||||
struct ww_mutex lock;
|
||||
- seqcount_t seq;
|
||||
+ seqlock_t seq;
|
||||
|
||||
struct dma_fence __rcu *fence_excl;
|
||||
struct dma_resv_list __rcu *fence;
|
188
kernel/patches-5.4.x-rt/0089-seqlock-prevent-rt-starvation.patch
Normal file
188
kernel/patches-5.4.x-rt/0089-seqlock-prevent-rt-starvation.patch
Normal file
@ -0,0 +1,188 @@
|
||||
Subject: seqlock: Prevent rt starvation
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Wed, 22 Feb 2012 12:03:30 +0100
|
||||
|
||||
If a low prio writer gets preempted while holding the seqlock write
|
||||
locked, a high prio reader spins forever on RT.
|
||||
|
||||
To prevent this let the reader grab the spinlock, so it blocks and
|
||||
eventually boosts the writer. This way the writer can proceed and
|
||||
endless spinning is prevented.
|
||||
|
||||
For seqcount writers we disable preemption over the update code
|
||||
path. Thanks to Al Viro for distangling some VFS code to make that
|
||||
possible.
|
||||
|
||||
Nicholas Mc Guire:
|
||||
- spin_lock+unlock => spin_unlock_wait
|
||||
- __write_seqcount_begin => __raw_write_seqcount_begin
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
|
||||
|
||||
---
|
||||
include/linux/seqlock.h | 57 +++++++++++++++++++++++++++++++++++++-----------
|
||||
include/net/neighbour.h | 6 ++---
|
||||
2 files changed, 48 insertions(+), 15 deletions(-)
|
||||
|
||||
--- a/include/linux/seqlock.h
|
||||
+++ b/include/linux/seqlock.h
|
||||
@@ -221,20 +221,30 @@ static inline int read_seqcount_retry(co
|
||||
return __read_seqcount_retry(s, start);
|
||||
}
|
||||
|
||||
-
|
||||
-
|
||||
-static inline void raw_write_seqcount_begin(seqcount_t *s)
|
||||
+static inline void __raw_write_seqcount_begin(seqcount_t *s)
|
||||
{
|
||||
s->sequence++;
|
||||
smp_wmb();
|
||||
}
|
||||
|
||||
-static inline void raw_write_seqcount_end(seqcount_t *s)
|
||||
+static inline void raw_write_seqcount_begin(seqcount_t *s)
|
||||
+{
|
||||
+ preempt_disable_rt();
|
||||
+ __raw_write_seqcount_begin(s);
|
||||
+}
|
||||
+
|
||||
+static inline void __raw_write_seqcount_end(seqcount_t *s)
|
||||
{
|
||||
smp_wmb();
|
||||
s->sequence++;
|
||||
}
|
||||
|
||||
+static inline void raw_write_seqcount_end(seqcount_t *s)
|
||||
+{
|
||||
+ __raw_write_seqcount_end(s);
|
||||
+ preempt_enable_rt();
|
||||
+}
|
||||
+
|
||||
/**
|
||||
* raw_write_seqcount_barrier - do a seq write barrier
|
||||
* @s: pointer to seqcount_t
|
||||
@@ -428,10 +438,33 @@ typedef struct {
|
||||
/*
|
||||
* Read side functions for starting and finalizing a read side section.
|
||||
*/
|
||||
+#ifndef CONFIG_PREEMPT_RT
|
||||
static inline unsigned read_seqbegin(const seqlock_t *sl)
|
||||
{
|
||||
return read_seqcount_begin(&sl->seqcount);
|
||||
}
|
||||
+#else
|
||||
+/*
|
||||
+ * Starvation safe read side for RT
|
||||
+ */
|
||||
+static inline unsigned read_seqbegin(seqlock_t *sl)
|
||||
+{
|
||||
+ unsigned ret;
|
||||
+
|
||||
+repeat:
|
||||
+ ret = READ_ONCE(sl->seqcount.sequence);
|
||||
+ if (unlikely(ret & 1)) {
|
||||
+ /*
|
||||
+ * Take the lock and let the writer proceed (i.e. evtl
|
||||
+ * boost it), otherwise we could loop here forever.
|
||||
+ */
|
||||
+ spin_unlock_wait(&sl->lock);
|
||||
+ goto repeat;
|
||||
+ }
|
||||
+ smp_rmb();
|
||||
+ return ret;
|
||||
+}
|
||||
+#endif
|
||||
|
||||
static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start)
|
||||
{
|
||||
@@ -446,36 +479,36 @@ static inline unsigned read_seqretry(con
|
||||
static inline void write_seqlock(seqlock_t *sl)
|
||||
{
|
||||
spin_lock(&sl->lock);
|
||||
- write_seqcount_begin(&sl->seqcount);
|
||||
+ __raw_write_seqcount_begin(&sl->seqcount);
|
||||
}
|
||||
|
||||
static inline void write_sequnlock(seqlock_t *sl)
|
||||
{
|
||||
- write_seqcount_end(&sl->seqcount);
|
||||
+ __raw_write_seqcount_end(&sl->seqcount);
|
||||
spin_unlock(&sl->lock);
|
||||
}
|
||||
|
||||
static inline void write_seqlock_bh(seqlock_t *sl)
|
||||
{
|
||||
spin_lock_bh(&sl->lock);
|
||||
- write_seqcount_begin(&sl->seqcount);
|
||||
+ __raw_write_seqcount_begin(&sl->seqcount);
|
||||
}
|
||||
|
||||
static inline void write_sequnlock_bh(seqlock_t *sl)
|
||||
{
|
||||
- write_seqcount_end(&sl->seqcount);
|
||||
+ __raw_write_seqcount_end(&sl->seqcount);
|
||||
spin_unlock_bh(&sl->lock);
|
||||
}
|
||||
|
||||
static inline void write_seqlock_irq(seqlock_t *sl)
|
||||
{
|
||||
spin_lock_irq(&sl->lock);
|
||||
- write_seqcount_begin(&sl->seqcount);
|
||||
+ __raw_write_seqcount_begin(&sl->seqcount);
|
||||
}
|
||||
|
||||
static inline void write_sequnlock_irq(seqlock_t *sl)
|
||||
{
|
||||
- write_seqcount_end(&sl->seqcount);
|
||||
+ __raw_write_seqcount_end(&sl->seqcount);
|
||||
spin_unlock_irq(&sl->lock);
|
||||
}
|
||||
|
||||
@@ -484,7 +517,7 @@ static inline unsigned long __write_seql
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&sl->lock, flags);
|
||||
- write_seqcount_begin(&sl->seqcount);
|
||||
+ __raw_write_seqcount_begin(&sl->seqcount);
|
||||
return flags;
|
||||
}
|
||||
|
||||
@@ -494,7 +527,7 @@ static inline unsigned long __write_seql
|
||||
static inline void
|
||||
write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags)
|
||||
{
|
||||
- write_seqcount_end(&sl->seqcount);
|
||||
+ __raw_write_seqcount_end(&sl->seqcount);
|
||||
spin_unlock_irqrestore(&sl->lock, flags);
|
||||
}
|
||||
|
||||
--- a/include/net/neighbour.h
|
||||
+++ b/include/net/neighbour.h
|
||||
@@ -459,7 +459,7 @@ static inline int neigh_hh_bridge(struct
|
||||
}
|
||||
#endif
|
||||
|
||||
-static inline int neigh_hh_output(const struct hh_cache *hh, struct sk_buff *skb)
|
||||
+static inline int neigh_hh_output(struct hh_cache *hh, struct sk_buff *skb)
|
||||
{
|
||||
unsigned int hh_alen = 0;
|
||||
unsigned int seq;
|
||||
@@ -502,7 +502,7 @@ static inline int neigh_hh_output(const
|
||||
static inline int neigh_output(struct neighbour *n, struct sk_buff *skb,
|
||||
bool skip_cache)
|
||||
{
|
||||
- const struct hh_cache *hh = &n->hh;
|
||||
+ struct hh_cache *hh = &n->hh;
|
||||
|
||||
if ((n->nud_state & NUD_CONNECTED) && hh->hh_len && !skip_cache)
|
||||
return neigh_hh_output(hh, skb);
|
||||
@@ -543,7 +543,7 @@ struct neighbour_cb {
|
||||
|
||||
#define NEIGH_CB(skb) ((struct neighbour_cb *)(skb)->cb)
|
||||
|
||||
-static inline void neigh_ha_snapshot(char *dst, const struct neighbour *n,
|
||||
+static inline void neigh_ha_snapshot(char *dst, struct neighbour *n,
|
||||
const struct net_device *dev)
|
||||
{
|
||||
unsigned int seq;
|
@ -0,0 +1,128 @@
|
||||
Date: Fri, 28 Oct 2016 23:05:11 +0200
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
To: Trond Myklebust <trond.myklebust@primarydata.com>
|
||||
Cc: Anna Schumaker <anna.schumaker@netapp.com>,
|
||||
linux-nfs@vger.kernel.org, linux-kernel@vger.kernel.org,
|
||||
tglx@linutronix.de
|
||||
Subject: NFSv4: replace seqcount_t with a seqlock_t
|
||||
|
||||
The raw_write_seqcount_begin() in nfs4_reclaim_open_state() causes a
|
||||
preempt_disable() on -RT. The spin_lock()/spin_unlock() in that section does
|
||||
not work.
|
||||
The lockdep part was removed in commit
|
||||
abbec2da13f0 ("NFS: Use raw_write_seqcount_begin/end int nfs4_reclaim_open_state")
|
||||
because lockdep complained.
|
||||
The whole seqcount thing was introduced in commit
|
||||
c137afabe330 ("NFSv4: Allow the state manager to mark an open_owner as being recovered").
|
||||
The recovery threads runs only once.
|
||||
write_seqlock() does not work on !RT because it disables preemption and it the
|
||||
writer side is preemptible (has to remain so despite the fact that it will
|
||||
block readers).
|
||||
|
||||
Reported-by: kernel test robot <xiaolong.ye@intel.com>
|
||||
Link: https://lkml.kernel.org/r/20161021164727.24485-1-bigeasy@linutronix.de
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
fs/nfs/delegation.c | 4 ++--
|
||||
fs/nfs/nfs4_fs.h | 2 +-
|
||||
fs/nfs/nfs4proc.c | 4 ++--
|
||||
fs/nfs/nfs4state.c | 22 ++++++++++++++++------
|
||||
4 files changed, 21 insertions(+), 11 deletions(-)
|
||||
|
||||
--- a/fs/nfs/delegation.c
|
||||
+++ b/fs/nfs/delegation.c
|
||||
@@ -162,11 +162,11 @@ static int nfs_delegation_claim_opens(st
|
||||
sp = state->owner;
|
||||
/* Block nfs4_proc_unlck */
|
||||
mutex_lock(&sp->so_delegreturn_mutex);
|
||||
- seq = raw_seqcount_begin(&sp->so_reclaim_seqcount);
|
||||
+ seq = read_seqbegin(&sp->so_reclaim_seqlock);
|
||||
err = nfs4_open_delegation_recall(ctx, state, stateid);
|
||||
if (!err)
|
||||
err = nfs_delegation_claim_locks(state, stateid);
|
||||
- if (!err && read_seqcount_retry(&sp->so_reclaim_seqcount, seq))
|
||||
+ if (!err && read_seqretry(&sp->so_reclaim_seqlock, seq))
|
||||
err = -EAGAIN;
|
||||
mutex_unlock(&sp->so_delegreturn_mutex);
|
||||
put_nfs_open_context(ctx);
|
||||
--- a/fs/nfs/nfs4_fs.h
|
||||
+++ b/fs/nfs/nfs4_fs.h
|
||||
@@ -115,7 +115,7 @@ struct nfs4_state_owner {
|
||||
unsigned long so_flags;
|
||||
struct list_head so_states;
|
||||
struct nfs_seqid_counter so_seqid;
|
||||
- seqcount_t so_reclaim_seqcount;
|
||||
+ seqlock_t so_reclaim_seqlock;
|
||||
struct mutex so_delegreturn_mutex;
|
||||
};
|
||||
|
||||
--- a/fs/nfs/nfs4proc.c
|
||||
+++ b/fs/nfs/nfs4proc.c
|
||||
@@ -2967,7 +2967,7 @@ static int _nfs4_open_and_get_state(stru
|
||||
unsigned int seq;
|
||||
int ret;
|
||||
|
||||
- seq = raw_seqcount_begin(&sp->so_reclaim_seqcount);
|
||||
+ seq = raw_seqcount_begin(&sp->so_reclaim_seqlock.seqcount);
|
||||
dir_verifier = nfs_save_change_attribute(dir);
|
||||
|
||||
ret = _nfs4_proc_open(opendata, ctx);
|
||||
@@ -3021,7 +3021,7 @@ static int _nfs4_open_and_get_state(stru
|
||||
|
||||
if (d_inode(dentry) == state->inode) {
|
||||
nfs_inode_attach_open_context(ctx);
|
||||
- if (read_seqcount_retry(&sp->so_reclaim_seqcount, seq))
|
||||
+ if (read_seqretry(&sp->so_reclaim_seqlock, seq))
|
||||
nfs4_schedule_stateid_recovery(server, state);
|
||||
}
|
||||
|
||||
--- a/fs/nfs/nfs4state.c
|
||||
+++ b/fs/nfs/nfs4state.c
|
||||
@@ -508,7 +508,7 @@ nfs4_alloc_state_owner(struct nfs_server
|
||||
nfs4_init_seqid_counter(&sp->so_seqid);
|
||||
atomic_set(&sp->so_count, 1);
|
||||
INIT_LIST_HEAD(&sp->so_lru);
|
||||
- seqcount_init(&sp->so_reclaim_seqcount);
|
||||
+ seqlock_init(&sp->so_reclaim_seqlock);
|
||||
mutex_init(&sp->so_delegreturn_mutex);
|
||||
return sp;
|
||||
}
|
||||
@@ -1616,8 +1616,12 @@ static int nfs4_reclaim_open_state(struc
|
||||
* recovering after a network partition or a reboot from a
|
||||
* server that doesn't support a grace period.
|
||||
*/
|
||||
+#ifdef CONFIG_PREEMPT_RT
|
||||
+ write_seqlock(&sp->so_reclaim_seqlock);
|
||||
+#else
|
||||
+ write_seqcount_begin(&sp->so_reclaim_seqlock.seqcount);
|
||||
+#endif
|
||||
spin_lock(&sp->so_lock);
|
||||
- raw_write_seqcount_begin(&sp->so_reclaim_seqcount);
|
||||
restart:
|
||||
list_for_each_entry(state, &sp->so_states, open_states) {
|
||||
if (!test_and_clear_bit(ops->state_flag_bit, &state->flags))
|
||||
@@ -1678,14 +1682,20 @@ static int nfs4_reclaim_open_state(struc
|
||||
spin_lock(&sp->so_lock);
|
||||
goto restart;
|
||||
}
|
||||
- raw_write_seqcount_end(&sp->so_reclaim_seqcount);
|
||||
spin_unlock(&sp->so_lock);
|
||||
+#ifdef CONFIG_PREEMPT_RT
|
||||
+ write_sequnlock(&sp->so_reclaim_seqlock);
|
||||
+#else
|
||||
+ write_seqcount_end(&sp->so_reclaim_seqlock.seqcount);
|
||||
+#endif
|
||||
return 0;
|
||||
out_err:
|
||||
nfs4_put_open_state(state);
|
||||
- spin_lock(&sp->so_lock);
|
||||
- raw_write_seqcount_end(&sp->so_reclaim_seqcount);
|
||||
- spin_unlock(&sp->so_lock);
|
||||
+#ifdef CONFIG_PREEMPT_RT
|
||||
+ write_sequnlock(&sp->so_reclaim_seqlock);
|
||||
+#else
|
||||
+ write_seqcount_end(&sp->so_reclaim_seqlock.seqcount);
|
||||
+#endif
|
||||
return status;
|
||||
}
|
||||
|
@ -0,0 +1,298 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Wed, 14 Sep 2016 17:36:35 +0200
|
||||
Subject: [PATCH] net/Qdisc: use a seqlock instead seqcount
|
||||
|
||||
The seqcount disables preemption on -RT while it is held which can't
|
||||
remove. Also we don't want the reader to spin for ages if the writer is
|
||||
scheduled out. The seqlock on the other hand will serialize / sleep on
|
||||
the lock while writer is active.
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/seqlock.h | 9 +++++++++
|
||||
include/net/gen_stats.h | 11 ++++++-----
|
||||
include/net/net_seq_lock.h | 15 +++++++++++++++
|
||||
include/net/sch_generic.h | 19 +++++++++++++++++--
|
||||
net/core/gen_estimator.c | 6 +++---
|
||||
net/core/gen_stats.c | 12 ++++++------
|
||||
net/sched/sch_api.c | 2 +-
|
||||
net/sched/sch_generic.c | 13 +++++++++++++
|
||||
8 files changed, 70 insertions(+), 17 deletions(-)
|
||||
create mode 100644 include/net/net_seq_lock.h
|
||||
|
||||
--- a/include/linux/seqlock.h
|
||||
+++ b/include/linux/seqlock.h
|
||||
@@ -482,6 +482,15 @@ static inline void write_seqlock(seqlock
|
||||
__raw_write_seqcount_begin(&sl->seqcount);
|
||||
}
|
||||
|
||||
+static inline int try_write_seqlock(seqlock_t *sl)
|
||||
+{
|
||||
+ if (spin_trylock(&sl->lock)) {
|
||||
+ __raw_write_seqcount_begin(&sl->seqcount);
|
||||
+ return 1;
|
||||
+ }
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
static inline void write_sequnlock(seqlock_t *sl)
|
||||
{
|
||||
__raw_write_seqcount_end(&sl->seqcount);
|
||||
--- a/include/net/gen_stats.h
|
||||
+++ b/include/net/gen_stats.h
|
||||
@@ -6,6 +6,7 @@
|
||||
#include <linux/socket.h>
|
||||
#include <linux/rtnetlink.h>
|
||||
#include <linux/pkt_sched.h>
|
||||
+#include <net/net_seq_lock.h>
|
||||
|
||||
struct gnet_stats_basic_cpu {
|
||||
struct gnet_stats_basic_packed bstats;
|
||||
@@ -36,15 +37,15 @@ int gnet_stats_start_copy_compat(struct
|
||||
spinlock_t *lock, struct gnet_dump *d,
|
||||
int padattr);
|
||||
|
||||
-int gnet_stats_copy_basic(const seqcount_t *running,
|
||||
+int gnet_stats_copy_basic(net_seqlock_t *running,
|
||||
struct gnet_dump *d,
|
||||
struct gnet_stats_basic_cpu __percpu *cpu,
|
||||
struct gnet_stats_basic_packed *b);
|
||||
-void __gnet_stats_copy_basic(const seqcount_t *running,
|
||||
+void __gnet_stats_copy_basic(net_seqlock_t *running,
|
||||
struct gnet_stats_basic_packed *bstats,
|
||||
struct gnet_stats_basic_cpu __percpu *cpu,
|
||||
struct gnet_stats_basic_packed *b);
|
||||
-int gnet_stats_copy_basic_hw(const seqcount_t *running,
|
||||
+int gnet_stats_copy_basic_hw(net_seqlock_t *running,
|
||||
struct gnet_dump *d,
|
||||
struct gnet_stats_basic_cpu __percpu *cpu,
|
||||
struct gnet_stats_basic_packed *b);
|
||||
@@ -64,13 +65,13 @@ int gen_new_estimator(struct gnet_stats_
|
||||
struct gnet_stats_basic_cpu __percpu *cpu_bstats,
|
||||
struct net_rate_estimator __rcu **rate_est,
|
||||
spinlock_t *lock,
|
||||
- seqcount_t *running, struct nlattr *opt);
|
||||
+ net_seqlock_t *running, struct nlattr *opt);
|
||||
void gen_kill_estimator(struct net_rate_estimator __rcu **ptr);
|
||||
int gen_replace_estimator(struct gnet_stats_basic_packed *bstats,
|
||||
struct gnet_stats_basic_cpu __percpu *cpu_bstats,
|
||||
struct net_rate_estimator __rcu **ptr,
|
||||
spinlock_t *lock,
|
||||
- seqcount_t *running, struct nlattr *opt);
|
||||
+ net_seqlock_t *running, struct nlattr *opt);
|
||||
bool gen_estimator_active(struct net_rate_estimator __rcu **ptr);
|
||||
bool gen_estimator_read(struct net_rate_estimator __rcu **ptr,
|
||||
struct gnet_stats_rate_est64 *sample);
|
||||
--- /dev/null
|
||||
+++ b/include/net/net_seq_lock.h
|
||||
@@ -0,0 +1,15 @@
|
||||
+#ifndef __NET_NET_SEQ_LOCK_H__
|
||||
+#define __NET_NET_SEQ_LOCK_H__
|
||||
+
|
||||
+#ifdef CONFIG_PREEMPT_RT
|
||||
+# define net_seqlock_t seqlock_t
|
||||
+# define net_seq_begin(__r) read_seqbegin(__r)
|
||||
+# define net_seq_retry(__r, __s) read_seqretry(__r, __s)
|
||||
+
|
||||
+#else
|
||||
+# define net_seqlock_t seqcount_t
|
||||
+# define net_seq_begin(__r) read_seqcount_begin(__r)
|
||||
+# define net_seq_retry(__r, __s) read_seqcount_retry(__r, __s)
|
||||
+#endif
|
||||
+
|
||||
+#endif
|
||||
--- a/include/net/sch_generic.h
|
||||
+++ b/include/net/sch_generic.h
|
||||
@@ -10,6 +10,7 @@
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/dynamic_queue_limits.h>
|
||||
#include <linux/list.h>
|
||||
+#include <net/net_seq_lock.h>
|
||||
#include <linux/refcount.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/mutex.h>
|
||||
@@ -100,7 +101,7 @@ struct Qdisc {
|
||||
struct sk_buff_head gso_skb ____cacheline_aligned_in_smp;
|
||||
struct qdisc_skb_head q;
|
||||
struct gnet_stats_basic_packed bstats;
|
||||
- seqcount_t running;
|
||||
+ net_seqlock_t running;
|
||||
struct gnet_stats_queue qstats;
|
||||
unsigned long state;
|
||||
struct Qdisc *next_sched;
|
||||
@@ -138,7 +139,11 @@ static inline bool qdisc_is_running(stru
|
||||
{
|
||||
if (qdisc->flags & TCQ_F_NOLOCK)
|
||||
return spin_is_locked(&qdisc->seqlock);
|
||||
+#ifdef CONFIG_PREEMPT_RT
|
||||
+ return spin_is_locked(&qdisc->running.lock) ? true : false;
|
||||
+#else
|
||||
return (raw_read_seqcount(&qdisc->running) & 1) ? true : false;
|
||||
+#endif
|
||||
}
|
||||
|
||||
static inline bool qdisc_is_percpu_stats(const struct Qdisc *q)
|
||||
@@ -162,17 +167,27 @@ static inline bool qdisc_run_begin(struc
|
||||
} else if (qdisc_is_running(qdisc)) {
|
||||
return false;
|
||||
}
|
||||
+#ifdef CONFIG_PREEMPT_RT
|
||||
+ if (try_write_seqlock(&qdisc->running))
|
||||
+ return true;
|
||||
+ return false;
|
||||
+#else
|
||||
/* Variant of write_seqcount_begin() telling lockdep a trylock
|
||||
* was attempted.
|
||||
*/
|
||||
raw_write_seqcount_begin(&qdisc->running);
|
||||
seqcount_acquire(&qdisc->running.dep_map, 0, 1, _RET_IP_);
|
||||
return true;
|
||||
+#endif
|
||||
}
|
||||
|
||||
static inline void qdisc_run_end(struct Qdisc *qdisc)
|
||||
{
|
||||
+#ifdef CONFIG_PREEMPT_RT
|
||||
+ write_sequnlock(&qdisc->running);
|
||||
+#else
|
||||
write_seqcount_end(&qdisc->running);
|
||||
+#endif
|
||||
if (qdisc->flags & TCQ_F_NOLOCK)
|
||||
spin_unlock(&qdisc->seqlock);
|
||||
}
|
||||
@@ -541,7 +556,7 @@ static inline spinlock_t *qdisc_root_sle
|
||||
return qdisc_lock(root);
|
||||
}
|
||||
|
||||
-static inline seqcount_t *qdisc_root_sleeping_running(const struct Qdisc *qdisc)
|
||||
+static inline net_seqlock_t *qdisc_root_sleeping_running(const struct Qdisc *qdisc)
|
||||
{
|
||||
struct Qdisc *root = qdisc_root_sleeping(qdisc);
|
||||
|
||||
--- a/net/core/gen_estimator.c
|
||||
+++ b/net/core/gen_estimator.c
|
||||
@@ -42,7 +42,7 @@
|
||||
struct net_rate_estimator {
|
||||
struct gnet_stats_basic_packed *bstats;
|
||||
spinlock_t *stats_lock;
|
||||
- seqcount_t *running;
|
||||
+ net_seqlock_t *running;
|
||||
struct gnet_stats_basic_cpu __percpu *cpu_bstats;
|
||||
u8 ewma_log;
|
||||
u8 intvl_log; /* period : (250ms << intvl_log) */
|
||||
@@ -125,7 +125,7 @@ int gen_new_estimator(struct gnet_stats_
|
||||
struct gnet_stats_basic_cpu __percpu *cpu_bstats,
|
||||
struct net_rate_estimator __rcu **rate_est,
|
||||
spinlock_t *lock,
|
||||
- seqcount_t *running,
|
||||
+ net_seqlock_t *running,
|
||||
struct nlattr *opt)
|
||||
{
|
||||
struct gnet_estimator *parm = nla_data(opt);
|
||||
@@ -223,7 +223,7 @@ int gen_replace_estimator(struct gnet_st
|
||||
struct gnet_stats_basic_cpu __percpu *cpu_bstats,
|
||||
struct net_rate_estimator __rcu **rate_est,
|
||||
spinlock_t *lock,
|
||||
- seqcount_t *running, struct nlattr *opt)
|
||||
+ net_seqlock_t *running, struct nlattr *opt)
|
||||
{
|
||||
return gen_new_estimator(bstats, cpu_bstats, rate_est,
|
||||
lock, running, opt);
|
||||
--- a/net/core/gen_stats.c
|
||||
+++ b/net/core/gen_stats.c
|
||||
@@ -138,7 +138,7 @@ static void
|
||||
}
|
||||
|
||||
void
|
||||
-__gnet_stats_copy_basic(const seqcount_t *running,
|
||||
+__gnet_stats_copy_basic(net_seqlock_t *running,
|
||||
struct gnet_stats_basic_packed *bstats,
|
||||
struct gnet_stats_basic_cpu __percpu *cpu,
|
||||
struct gnet_stats_basic_packed *b)
|
||||
@@ -151,15 +151,15 @@ void
|
||||
}
|
||||
do {
|
||||
if (running)
|
||||
- seq = read_seqcount_begin(running);
|
||||
+ seq = net_seq_begin(running);
|
||||
bstats->bytes = b->bytes;
|
||||
bstats->packets = b->packets;
|
||||
- } while (running && read_seqcount_retry(running, seq));
|
||||
+ } while (running && net_seq_retry(running, seq));
|
||||
}
|
||||
EXPORT_SYMBOL(__gnet_stats_copy_basic);
|
||||
|
||||
static int
|
||||
-___gnet_stats_copy_basic(const seqcount_t *running,
|
||||
+___gnet_stats_copy_basic(net_seqlock_t *running,
|
||||
struct gnet_dump *d,
|
||||
struct gnet_stats_basic_cpu __percpu *cpu,
|
||||
struct gnet_stats_basic_packed *b,
|
||||
@@ -200,7 +200,7 @@ static int
|
||||
* if the room in the socket buffer was not sufficient.
|
||||
*/
|
||||
int
|
||||
-gnet_stats_copy_basic(const seqcount_t *running,
|
||||
+gnet_stats_copy_basic(net_seqlock_t *running,
|
||||
struct gnet_dump *d,
|
||||
struct gnet_stats_basic_cpu __percpu *cpu,
|
||||
struct gnet_stats_basic_packed *b)
|
||||
@@ -224,7 +224,7 @@ EXPORT_SYMBOL(gnet_stats_copy_basic);
|
||||
* if the room in the socket buffer was not sufficient.
|
||||
*/
|
||||
int
|
||||
-gnet_stats_copy_basic_hw(const seqcount_t *running,
|
||||
+gnet_stats_copy_basic_hw(net_seqlock_t *running,
|
||||
struct gnet_dump *d,
|
||||
struct gnet_stats_basic_cpu __percpu *cpu,
|
||||
struct gnet_stats_basic_packed *b)
|
||||
--- a/net/sched/sch_api.c
|
||||
+++ b/net/sched/sch_api.c
|
||||
@@ -1248,7 +1248,7 @@ static struct Qdisc *qdisc_create(struct
|
||||
rcu_assign_pointer(sch->stab, stab);
|
||||
}
|
||||
if (tca[TCA_RATE]) {
|
||||
- seqcount_t *running;
|
||||
+ net_seqlock_t *running;
|
||||
|
||||
err = -EOPNOTSUPP;
|
||||
if (sch->flags & TCQ_F_MQROOT) {
|
||||
--- a/net/sched/sch_generic.c
|
||||
+++ b/net/sched/sch_generic.c
|
||||
@@ -557,7 +557,11 @@ struct Qdisc noop_qdisc = {
|
||||
.ops = &noop_qdisc_ops,
|
||||
.q.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
|
||||
.dev_queue = &noop_netdev_queue,
|
||||
+#ifdef CONFIG_PREEMPT_RT
|
||||
+ .running = __SEQLOCK_UNLOCKED(noop_qdisc.running),
|
||||
+#else
|
||||
.running = SEQCNT_ZERO(noop_qdisc.running),
|
||||
+#endif
|
||||
.busylock = __SPIN_LOCK_UNLOCKED(noop_qdisc.busylock),
|
||||
.gso_skb = {
|
||||
.next = (struct sk_buff *)&noop_qdisc.gso_skb,
|
||||
@@ -853,7 +857,11 @@ struct Qdisc *qdisc_alloc(struct netdev_
|
||||
spin_lock_init(&sch->busylock);
|
||||
/* seqlock has the same scope of busylock, for NOLOCK qdisc */
|
||||
spin_lock_init(&sch->seqlock);
|
||||
+#ifdef CONFIG_PREEMPT_RT
|
||||
+ seqlock_init(&sch->running);
|
||||
+#else
|
||||
seqcount_init(&sch->running);
|
||||
+#endif
|
||||
|
||||
sch->ops = ops;
|
||||
sch->flags = ops->static_flags;
|
||||
@@ -867,7 +875,12 @@ struct Qdisc *qdisc_alloc(struct netdev_
|
||||
if (sch != &noop_qdisc) {
|
||||
lockdep_set_class(&sch->busylock, &dev->qdisc_tx_busylock_key);
|
||||
lockdep_set_class(&sch->seqlock, &dev->qdisc_tx_busylock_key);
|
||||
+#ifdef CONFIG_PREEMPT_RT
|
||||
+ lockdep_set_class(&sch->running.seqcount, &dev->qdisc_running_key);
|
||||
+ lockdep_set_class(&sch->running.lock, &dev->qdisc_running_key);
|
||||
+#else
|
||||
lockdep_set_class(&sch->running, &dev->qdisc_running_key);
|
||||
+#endif
|
||||
}
|
||||
|
||||
return sch;
|
@ -0,0 +1,106 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Wed, 20 Mar 2013 18:06:20 +0100
|
||||
Subject: net: Add a mutex around devnet_rename_seq
|
||||
|
||||
On RT write_seqcount_begin() disables preemption and device_rename()
|
||||
allocates memory with GFP_KERNEL and grabs later the sysfs_mutex
|
||||
mutex. Serialize with a mutex and add use the non preemption disabling
|
||||
__write_seqcount_begin().
|
||||
|
||||
To avoid writer starvation, let the reader grab the mutex and release
|
||||
it when it detects a writer in progress. This keeps the normal case
|
||||
(no reader on the fly) fast.
|
||||
|
||||
[ tglx: Instead of replacing the seqcount by a mutex, add the mutex ]
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
net/core/dev.c | 34 ++++++++++++++++++++--------------
|
||||
1 file changed, 20 insertions(+), 14 deletions(-)
|
||||
|
||||
--- a/net/core/dev.c
|
||||
+++ b/net/core/dev.c
|
||||
@@ -195,6 +195,7 @@ static unsigned int napi_gen_id = NR_CPU
|
||||
static DEFINE_READ_MOSTLY_HASHTABLE(napi_hash, 8);
|
||||
|
||||
static seqcount_t devnet_rename_seq;
|
||||
+static DEFINE_MUTEX(devnet_rename_mutex);
|
||||
|
||||
static inline void dev_base_seq_inc(struct net *net)
|
||||
{
|
||||
@@ -838,7 +839,8 @@ int netdev_get_name(struct net *net, cha
|
||||
strcpy(name, dev->name);
|
||||
rcu_read_unlock();
|
||||
if (read_seqcount_retry(&devnet_rename_seq, seq)) {
|
||||
- cond_resched();
|
||||
+ mutex_lock(&devnet_rename_mutex);
|
||||
+ mutex_unlock(&devnet_rename_mutex);
|
||||
goto retry;
|
||||
}
|
||||
|
||||
@@ -1115,20 +1117,17 @@ int dev_change_name(struct net_device *d
|
||||
likely(!(dev->priv_flags & IFF_LIVE_RENAME_OK)))
|
||||
return -EBUSY;
|
||||
|
||||
- write_seqcount_begin(&devnet_rename_seq);
|
||||
+ mutex_lock(&devnet_rename_mutex);
|
||||
+ __raw_write_seqcount_begin(&devnet_rename_seq);
|
||||
|
||||
- if (strncmp(newname, dev->name, IFNAMSIZ) == 0) {
|
||||
- write_seqcount_end(&devnet_rename_seq);
|
||||
- return 0;
|
||||
- }
|
||||
+ if (strncmp(newname, dev->name, IFNAMSIZ) == 0)
|
||||
+ goto outunlock;
|
||||
|
||||
memcpy(oldname, dev->name, IFNAMSIZ);
|
||||
|
||||
err = dev_get_valid_name(net, dev, newname);
|
||||
- if (err < 0) {
|
||||
- write_seqcount_end(&devnet_rename_seq);
|
||||
- return err;
|
||||
- }
|
||||
+ if (err < 0)
|
||||
+ goto outunlock;
|
||||
|
||||
if (oldname[0] && !strchr(oldname, '%'))
|
||||
netdev_info(dev, "renamed from %s\n", oldname);
|
||||
@@ -1141,11 +1140,12 @@ int dev_change_name(struct net_device *d
|
||||
if (ret) {
|
||||
memcpy(dev->name, oldname, IFNAMSIZ);
|
||||
dev->name_assign_type = old_assign_type;
|
||||
- write_seqcount_end(&devnet_rename_seq);
|
||||
- return ret;
|
||||
+ err = ret;
|
||||
+ goto outunlock;
|
||||
}
|
||||
|
||||
- write_seqcount_end(&devnet_rename_seq);
|
||||
+ __raw_write_seqcount_end(&devnet_rename_seq);
|
||||
+ mutex_unlock(&devnet_rename_mutex);
|
||||
|
||||
netdev_adjacent_rename_links(dev, oldname);
|
||||
|
||||
@@ -1166,7 +1166,8 @@ int dev_change_name(struct net_device *d
|
||||
/* err >= 0 after dev_alloc_name() or stores the first errno */
|
||||
if (err >= 0) {
|
||||
err = ret;
|
||||
- write_seqcount_begin(&devnet_rename_seq);
|
||||
+ mutex_lock(&devnet_rename_mutex);
|
||||
+ __raw_write_seqcount_begin(&devnet_rename_seq);
|
||||
memcpy(dev->name, oldname, IFNAMSIZ);
|
||||
memcpy(oldname, newname, IFNAMSIZ);
|
||||
dev->name_assign_type = old_assign_type;
|
||||
@@ -1179,6 +1180,11 @@ int dev_change_name(struct net_device *d
|
||||
}
|
||||
|
||||
return err;
|
||||
+
|
||||
+outunlock:
|
||||
+ __raw_write_seqcount_end(&devnet_rename_seq);
|
||||
+ mutex_unlock(&devnet_rename_mutex);
|
||||
+ return err;
|
||||
}
|
||||
|
||||
/**
|
@ -0,0 +1,70 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Wed, 18 Dec 2019 12:25:09 +0100
|
||||
Subject: [PATCH] userfaultfd: Use a seqlock instead of seqcount
|
||||
|
||||
On RT write_seqcount_begin() disables preemption which leads to warning
|
||||
in add_wait_queue() while the spinlock_t is acquired.
|
||||
The waitqueue can't be converted to swait_queue because
|
||||
userfaultfd_wake_function() is used as a custom wake function.
|
||||
|
||||
Use seqlock instead seqcount to avoid the preempt_disable() section
|
||||
during add_wait_queue().
|
||||
|
||||
Cc: stable-rt@vger.kernel.org
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
fs/userfaultfd.c | 12 ++++++------
|
||||
1 file changed, 6 insertions(+), 6 deletions(-)
|
||||
|
||||
--- a/fs/userfaultfd.c
|
||||
+++ b/fs/userfaultfd.c
|
||||
@@ -61,7 +61,7 @@ struct userfaultfd_ctx {
|
||||
/* waitqueue head for events */
|
||||
wait_queue_head_t event_wqh;
|
||||
/* a refile sequence protected by fault_pending_wqh lock */
|
||||
- struct seqcount refile_seq;
|
||||
+ seqlock_t refile_seq;
|
||||
/* pseudo fd refcounting */
|
||||
refcount_t refcount;
|
||||
/* userfaultfd syscall flags */
|
||||
@@ -1063,7 +1063,7 @@ static ssize_t userfaultfd_ctx_read(stru
|
||||
* waitqueue could become empty if this is the
|
||||
* only userfault.
|
||||
*/
|
||||
- write_seqcount_begin(&ctx->refile_seq);
|
||||
+ write_seqlock(&ctx->refile_seq);
|
||||
|
||||
/*
|
||||
* The fault_pending_wqh.lock prevents the uwq
|
||||
@@ -1089,7 +1089,7 @@ static ssize_t userfaultfd_ctx_read(stru
|
||||
list_del(&uwq->wq.entry);
|
||||
add_wait_queue(&ctx->fault_wqh, &uwq->wq);
|
||||
|
||||
- write_seqcount_end(&ctx->refile_seq);
|
||||
+ write_sequnlock(&ctx->refile_seq);
|
||||
|
||||
/* careful to always initialize msg if ret == 0 */
|
||||
*msg = uwq->msg;
|
||||
@@ -1262,11 +1262,11 @@ static __always_inline void wake_userfau
|
||||
* sure we've userfaults to wake.
|
||||
*/
|
||||
do {
|
||||
- seq = read_seqcount_begin(&ctx->refile_seq);
|
||||
+ seq = read_seqbegin(&ctx->refile_seq);
|
||||
need_wakeup = waitqueue_active(&ctx->fault_pending_wqh) ||
|
||||
waitqueue_active(&ctx->fault_wqh);
|
||||
cond_resched();
|
||||
- } while (read_seqcount_retry(&ctx->refile_seq, seq));
|
||||
+ } while (read_seqretry(&ctx->refile_seq, seq));
|
||||
if (need_wakeup)
|
||||
__wake_userfault(ctx, range);
|
||||
}
|
||||
@@ -1939,7 +1939,7 @@ static void init_once_userfaultfd_ctx(vo
|
||||
init_waitqueue_head(&ctx->fault_wqh);
|
||||
init_waitqueue_head(&ctx->event_wqh);
|
||||
init_waitqueue_head(&ctx->fd_wqh);
|
||||
- seqcount_init(&ctx->refile_seq);
|
||||
+ seqlock_init(&ctx->refile_seq);
|
||||
}
|
||||
|
||||
SYSCALL_DEFINE1(userfaultfd, int, flags)
|
@ -0,0 +1,138 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Thu, 15 Sep 2016 10:51:27 +0200
|
||||
Subject: [PATCH] fs/nfs: turn rmdir_sem into a semaphore
|
||||
|
||||
The RW semaphore had a reader side which used the _non_owner version
|
||||
because it most likely took the reader lock in one thread and released it
|
||||
in another which would cause lockdep to complain if the "regular"
|
||||
version was used.
|
||||
On -RT we need the owner because the rw lock is turned into a rtmutex.
|
||||
The semaphores on the hand are "plain simple" and should work as
|
||||
expected. We can't have multiple readers but on -RT we don't allow
|
||||
multiple readers anyway so that is not a loss.
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
fs/nfs/dir.c | 8 ++++++++
|
||||
fs/nfs/inode.c | 4 ++++
|
||||
fs/nfs/unlink.c | 31 +++++++++++++++++++++++++++----
|
||||
include/linux/nfs_fs.h | 4 ++++
|
||||
4 files changed, 43 insertions(+), 4 deletions(-)
|
||||
|
||||
--- a/fs/nfs/dir.c
|
||||
+++ b/fs/nfs/dir.c
|
||||
@@ -1846,7 +1846,11 @@ int nfs_rmdir(struct inode *dir, struct
|
||||
|
||||
trace_nfs_rmdir_enter(dir, dentry);
|
||||
if (d_really_is_positive(dentry)) {
|
||||
+#ifdef CONFIG_PREEMPT_RT
|
||||
+ down(&NFS_I(d_inode(dentry))->rmdir_sem);
|
||||
+#else
|
||||
down_write(&NFS_I(d_inode(dentry))->rmdir_sem);
|
||||
+#endif
|
||||
error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name);
|
||||
/* Ensure the VFS deletes this inode */
|
||||
switch (error) {
|
||||
@@ -1856,7 +1860,11 @@ int nfs_rmdir(struct inode *dir, struct
|
||||
case -ENOENT:
|
||||
nfs_dentry_handle_enoent(dentry);
|
||||
}
|
||||
+#ifdef CONFIG_PREEMPT_RT
|
||||
+ up(&NFS_I(d_inode(dentry))->rmdir_sem);
|
||||
+#else
|
||||
up_write(&NFS_I(d_inode(dentry))->rmdir_sem);
|
||||
+#endif
|
||||
} else
|
||||
error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name);
|
||||
trace_nfs_rmdir_exit(dir, dentry, error);
|
||||
--- a/fs/nfs/inode.c
|
||||
+++ b/fs/nfs/inode.c
|
||||
@@ -2105,7 +2105,11 @@ static void init_once(void *foo)
|
||||
atomic_long_set(&nfsi->nrequests, 0);
|
||||
atomic_long_set(&nfsi->commit_info.ncommit, 0);
|
||||
atomic_set(&nfsi->commit_info.rpcs_out, 0);
|
||||
+#ifdef CONFIG_PREEMPT_RT
|
||||
+ sema_init(&nfsi->rmdir_sem, 1);
|
||||
+#else
|
||||
init_rwsem(&nfsi->rmdir_sem);
|
||||
+#endif
|
||||
mutex_init(&nfsi->commit_mutex);
|
||||
nfs4_init_once(nfsi);
|
||||
}
|
||||
--- a/fs/nfs/unlink.c
|
||||
+++ b/fs/nfs/unlink.c
|
||||
@@ -53,6 +53,29 @@ static void nfs_async_unlink_done(struct
|
||||
rpc_restart_call_prepare(task);
|
||||
}
|
||||
|
||||
+#ifdef CONFIG_PREEMPT_RT
|
||||
+static void nfs_down_anon(struct semaphore *sema)
|
||||
+{
|
||||
+ down(sema);
|
||||
+}
|
||||
+
|
||||
+static void nfs_up_anon(struct semaphore *sema)
|
||||
+{
|
||||
+ up(sema);
|
||||
+}
|
||||
+
|
||||
+#else
|
||||
+static void nfs_down_anon(struct rw_semaphore *rwsem)
|
||||
+{
|
||||
+ down_read_non_owner(rwsem);
|
||||
+}
|
||||
+
|
||||
+static void nfs_up_anon(struct rw_semaphore *rwsem)
|
||||
+{
|
||||
+ up_read_non_owner(rwsem);
|
||||
+}
|
||||
+#endif
|
||||
+
|
||||
/**
|
||||
* nfs_async_unlink_release - Release the sillydelete data.
|
||||
* @calldata: struct nfs_unlinkdata to release
|
||||
@@ -66,7 +89,7 @@ static void nfs_async_unlink_release(voi
|
||||
struct dentry *dentry = data->dentry;
|
||||
struct super_block *sb = dentry->d_sb;
|
||||
|
||||
- up_read_non_owner(&NFS_I(d_inode(dentry->d_parent))->rmdir_sem);
|
||||
+ nfs_up_anon(&NFS_I(d_inode(dentry->d_parent))->rmdir_sem);
|
||||
d_lookup_done(dentry);
|
||||
nfs_free_unlinkdata(data);
|
||||
dput(dentry);
|
||||
@@ -119,10 +142,10 @@ static int nfs_call_unlink(struct dentry
|
||||
struct inode *dir = d_inode(dentry->d_parent);
|
||||
struct dentry *alias;
|
||||
|
||||
- down_read_non_owner(&NFS_I(dir)->rmdir_sem);
|
||||
+ nfs_down_anon(&NFS_I(dir)->rmdir_sem);
|
||||
alias = d_alloc_parallel(dentry->d_parent, &data->args.name, &data->wq);
|
||||
if (IS_ERR(alias)) {
|
||||
- up_read_non_owner(&NFS_I(dir)->rmdir_sem);
|
||||
+ nfs_up_anon(&NFS_I(dir)->rmdir_sem);
|
||||
return 0;
|
||||
}
|
||||
if (!d_in_lookup(alias)) {
|
||||
@@ -144,7 +167,7 @@ static int nfs_call_unlink(struct dentry
|
||||
ret = 0;
|
||||
spin_unlock(&alias->d_lock);
|
||||
dput(alias);
|
||||
- up_read_non_owner(&NFS_I(dir)->rmdir_sem);
|
||||
+ nfs_up_anon(&NFS_I(dir)->rmdir_sem);
|
||||
/*
|
||||
* If we'd displaced old cached devname, free it. At that
|
||||
* point dentry is definitely not a root, so we won't need
|
||||
--- a/include/linux/nfs_fs.h
|
||||
+++ b/include/linux/nfs_fs.h
|
||||
@@ -165,7 +165,11 @@ struct nfs_inode {
|
||||
|
||||
/* Readers: in-flight sillydelete RPC calls */
|
||||
/* Writers: rmdir */
|
||||
+#ifdef CONFIG_PREEMPT_RT
|
||||
+ struct semaphore rmdir_sem;
|
||||
+#else
|
||||
struct rw_semaphore rmdir_sem;
|
||||
+#endif
|
||||
struct mutex commit_mutex;
|
||||
|
||||
#if IS_ENABLED(CONFIG_NFS_V4)
|
@ -0,0 +1,87 @@
|
||||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Fri, 20 Oct 2017 11:29:53 +0200
|
||||
Subject: [PATCH] fs/dcache: disable preemption on i_dir_seq's write side
|
||||
|
||||
i_dir_seq is an opencoded seqcounter. Based on the code it looks like we
|
||||
could have two writers in parallel despite the fact that the d_lock is
|
||||
held. The problem is that during the write process on RT the preemption
|
||||
is still enabled and if this process is interrupted by a reader with RT
|
||||
priority then we lock up.
|
||||
To avoid that lock up I am disabling the preemption during the update.
|
||||
The rename of i_dir_seq is here to ensure to catch new write sides in
|
||||
future.
|
||||
|
||||
Cc: stable-rt@vger.kernel.org
|
||||
Reported-by: Oleg.Karfich@wago.com
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
fs/dcache.c | 12 +++++++-----
|
||||
fs/inode.c | 2 +-
|
||||
include/linux/fs.h | 2 +-
|
||||
3 files changed, 9 insertions(+), 7 deletions(-)
|
||||
|
||||
--- a/fs/dcache.c
|
||||
+++ b/fs/dcache.c
|
||||
@@ -2482,9 +2482,10 @@ EXPORT_SYMBOL(d_rehash);
|
||||
static inline unsigned start_dir_add(struct inode *dir)
|
||||
{
|
||||
|
||||
+ preempt_disable_rt();
|
||||
for (;;) {
|
||||
- unsigned n = dir->i_dir_seq;
|
||||
- if (!(n & 1) && cmpxchg(&dir->i_dir_seq, n, n + 1) == n)
|
||||
+ unsigned n = dir->__i_dir_seq;
|
||||
+ if (!(n & 1) && cmpxchg(&dir->__i_dir_seq, n, n + 1) == n)
|
||||
return n;
|
||||
cpu_relax();
|
||||
}
|
||||
@@ -2492,7 +2493,8 @@ static inline unsigned start_dir_add(str
|
||||
|
||||
static inline void end_dir_add(struct inode *dir, unsigned n)
|
||||
{
|
||||
- smp_store_release(&dir->i_dir_seq, n + 2);
|
||||
+ smp_store_release(&dir->__i_dir_seq, n + 2);
|
||||
+ preempt_enable_rt();
|
||||
}
|
||||
|
||||
static void d_wait_lookup(struct dentry *dentry)
|
||||
@@ -2525,7 +2527,7 @@ struct dentry *d_alloc_parallel(struct d
|
||||
|
||||
retry:
|
||||
rcu_read_lock();
|
||||
- seq = smp_load_acquire(&parent->d_inode->i_dir_seq);
|
||||
+ seq = smp_load_acquire(&parent->d_inode->__i_dir_seq);
|
||||
r_seq = read_seqbegin(&rename_lock);
|
||||
dentry = __d_lookup_rcu(parent, name, &d_seq);
|
||||
if (unlikely(dentry)) {
|
||||
@@ -2553,7 +2555,7 @@ struct dentry *d_alloc_parallel(struct d
|
||||
}
|
||||
|
||||
hlist_bl_lock(b);
|
||||
- if (unlikely(READ_ONCE(parent->d_inode->i_dir_seq) != seq)) {
|
||||
+ if (unlikely(READ_ONCE(parent->d_inode->__i_dir_seq) != seq)) {
|
||||
hlist_bl_unlock(b);
|
||||
rcu_read_unlock();
|
||||
goto retry;
|
||||
--- a/fs/inode.c
|
||||
+++ b/fs/inode.c
|
||||
@@ -157,7 +157,7 @@ int inode_init_always(struct super_block
|
||||
inode->i_bdev = NULL;
|
||||
inode->i_cdev = NULL;
|
||||
inode->i_link = NULL;
|
||||
- inode->i_dir_seq = 0;
|
||||
+ inode->__i_dir_seq = 0;
|
||||
inode->i_rdev = 0;
|
||||
inode->dirtied_when = 0;
|
||||
|
||||
--- a/include/linux/fs.h
|
||||
+++ b/include/linux/fs.h
|
||||
@@ -717,7 +717,7 @@ struct inode {
|
||||
struct block_device *i_bdev;
|
||||
struct cdev *i_cdev;
|
||||
char *i_link;
|
||||
- unsigned i_dir_seq;
|
||||
+ unsigned __i_dir_seq;
|
||||
};
|
||||
|
||||
__u32 i_generation;
|
@ -0,0 +1,118 @@
|
||||
From: Paul Gortmaker <paul.gortmaker@windriver.com>
|
||||
Date: Fri, 21 Jun 2013 15:07:25 -0400
|
||||
Subject: list_bl: Make list head locking RT safe
|
||||
|
||||
As per changes in include/linux/jbd_common.h for avoiding the
|
||||
bit_spin_locks on RT ("fs: jbd/jbd2: Make state lock and journal
|
||||
head lock rt safe") we do the same thing here.
|
||||
|
||||
We use the non atomic __set_bit and __clear_bit inside the scope of
|
||||
the lock to preserve the ability of the existing LIST_DEBUG code to
|
||||
use the zero'th bit in the sanity checks.
|
||||
|
||||
As a bit spinlock, we had no lockdep visibility into the usage
|
||||
of the list head locking. Now, if we were to implement it as a
|
||||
standard non-raw spinlock, we would see:
|
||||
|
||||
BUG: sleeping function called from invalid context at kernel/rtmutex.c:658
|
||||
in_atomic(): 1, irqs_disabled(): 0, pid: 122, name: udevd
|
||||
5 locks held by udevd/122:
|
||||
#0: (&sb->s_type->i_mutex_key#7/1){+.+.+.}, at: [<ffffffff811967e8>] lock_rename+0xe8/0xf0
|
||||
#1: (rename_lock){+.+...}, at: [<ffffffff811a277c>] d_move+0x2c/0x60
|
||||
#2: (&dentry->d_lock){+.+...}, at: [<ffffffff811a0763>] dentry_lock_for_move+0xf3/0x130
|
||||
#3: (&dentry->d_lock/2){+.+...}, at: [<ffffffff811a0734>] dentry_lock_for_move+0xc4/0x130
|
||||
#4: (&dentry->d_lock/3){+.+...}, at: [<ffffffff811a0747>] dentry_lock_for_move+0xd7/0x130
|
||||
Pid: 122, comm: udevd Not tainted 3.4.47-rt62 #7
|
||||
Call Trace:
|
||||
[<ffffffff810b9624>] __might_sleep+0x134/0x1f0
|
||||
[<ffffffff817a24d4>] rt_spin_lock+0x24/0x60
|
||||
[<ffffffff811a0c4c>] __d_shrink+0x5c/0xa0
|
||||
[<ffffffff811a1b2d>] __d_drop+0x1d/0x40
|
||||
[<ffffffff811a24be>] __d_move+0x8e/0x320
|
||||
[<ffffffff811a278e>] d_move+0x3e/0x60
|
||||
[<ffffffff81199598>] vfs_rename+0x198/0x4c0
|
||||
[<ffffffff8119b093>] sys_renameat+0x213/0x240
|
||||
[<ffffffff817a2de5>] ? _raw_spin_unlock+0x35/0x60
|
||||
[<ffffffff8107781c>] ? do_page_fault+0x1ec/0x4b0
|
||||
[<ffffffff817a32ca>] ? retint_swapgs+0xe/0x13
|
||||
[<ffffffff813eb0e6>] ? trace_hardirqs_on_thunk+0x3a/0x3f
|
||||
[<ffffffff8119b0db>] sys_rename+0x1b/0x20
|
||||
[<ffffffff817a3b96>] system_call_fastpath+0x1a/0x1f
|
||||
|
||||
Since we are only taking the lock during short lived list operations,
|
||||
lets assume for now that it being raw won't be a significant latency
|
||||
concern.
|
||||
|
||||
|
||||
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
|
||||
[julia@ni.com: Use #define instead static inline to avoid false positive from
|
||||
lockdep]
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/list_bl.h | 30 ++++++++++++++++++++++++++++--
|
||||
1 file changed, 28 insertions(+), 2 deletions(-)
|
||||
|
||||
--- a/include/linux/list_bl.h
|
||||
+++ b/include/linux/list_bl.h
|
||||
@@ -3,6 +3,7 @@
|
||||
#define _LINUX_LIST_BL_H
|
||||
|
||||
#include <linux/list.h>
|
||||
+#include <linux/spinlock.h>
|
||||
#include <linux/bit_spinlock.h>
|
||||
|
||||
/*
|
||||
@@ -33,13 +34,24 @@
|
||||
|
||||
struct hlist_bl_head {
|
||||
struct hlist_bl_node *first;
|
||||
+#ifdef CONFIG_PREEMPT_RT
|
||||
+ raw_spinlock_t lock;
|
||||
+#endif
|
||||
};
|
||||
|
||||
struct hlist_bl_node {
|
||||
struct hlist_bl_node *next, **pprev;
|
||||
};
|
||||
-#define INIT_HLIST_BL_HEAD(ptr) \
|
||||
- ((ptr)->first = NULL)
|
||||
+
|
||||
+#ifdef CONFIG_PREEMPT_RT
|
||||
+#define INIT_HLIST_BL_HEAD(h) \
|
||||
+do { \
|
||||
+ (h)->first = NULL; \
|
||||
+ raw_spin_lock_init(&(h)->lock); \
|
||||
+} while (0)
|
||||
+#else
|
||||
+#define INIT_HLIST_BL_HEAD(h) (h)->first = NULL
|
||||
+#endif
|
||||
|
||||
static inline void INIT_HLIST_BL_NODE(struct hlist_bl_node *h)
|
||||
{
|
||||
@@ -145,12 +157,26 @@ static inline void hlist_bl_del_init(str
|
||||
|
||||
static inline void hlist_bl_lock(struct hlist_bl_head *b)
|
||||
{
|
||||
+#ifndef CONFIG_PREEMPT_RT
|
||||
bit_spin_lock(0, (unsigned long *)b);
|
||||
+#else
|
||||
+ raw_spin_lock(&b->lock);
|
||||
+#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
|
||||
+ __set_bit(0, (unsigned long *)b);
|
||||
+#endif
|
||||
+#endif
|
||||
}
|
||||
|
||||
static inline void hlist_bl_unlock(struct hlist_bl_head *b)
|
||||
{
|
||||
+#ifndef CONFIG_PREEMPT_RT
|
||||
__bit_spin_unlock(0, (unsigned long *)b);
|
||||
+#else
|
||||
+#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
|
||||
+ __clear_bit(0, (unsigned long *)b);
|
||||
+#endif
|
||||
+ raw_spin_unlock(&b->lock);
|
||||
+#endif
|
||||
}
|
||||
|
||||
static inline bool hlist_bl_is_locked(struct hlist_bl_head *b)
|
@ -0,0 +1,53 @@
|
||||
From: Clark Williams <williams@redhat.com>
|
||||
Date: Tue, 3 Jul 2018 13:34:30 -0500
|
||||
Subject: [PATCH] fscache: initialize cookie hash table raw spinlocks
|
||||
|
||||
The fscache cookie mechanism uses a hash table of hlist_bl_head structures. The
|
||||
PREEMPT_RT patcheset adds a raw spinlock to this structure and so on PREEMPT_RT
|
||||
the structures get used uninitialized, causing warnings about bad magic numbers
|
||||
when spinlock debugging is turned on.
|
||||
|
||||
Use the init function for fscache cookies.
|
||||
|
||||
Signed-off-by: Clark Williams <williams@redhat.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
fs/fscache/cookie.c | 8 ++++++++
|
||||
fs/fscache/main.c | 1 +
|
||||
include/linux/fscache.h | 1 +
|
||||
3 files changed, 10 insertions(+)
|
||||
|
||||
--- a/fs/fscache/cookie.c
|
||||
+++ b/fs/fscache/cookie.c
|
||||
@@ -958,3 +958,11 @@ int __fscache_check_consistency(struct f
|
||||
return -ESTALE;
|
||||
}
|
||||
EXPORT_SYMBOL(__fscache_check_consistency);
|
||||
+
|
||||
+void __init fscache_cookie_init(void)
|
||||
+{
|
||||
+ int i;
|
||||
+
|
||||
+ for (i = 0; i < (1 << fscache_cookie_hash_shift) - 1; i++)
|
||||
+ INIT_HLIST_BL_HEAD(&fscache_cookie_hash[i]);
|
||||
+}
|
||||
--- a/fs/fscache/main.c
|
||||
+++ b/fs/fscache/main.c
|
||||
@@ -145,6 +145,7 @@ static int __init fscache_init(void)
|
||||
ret = -ENOMEM;
|
||||
goto error_cookie_jar;
|
||||
}
|
||||
+ fscache_cookie_init();
|
||||
|
||||
fscache_root = kobject_create_and_add("fscache", kernel_kobj);
|
||||
if (!fscache_root)
|
||||
--- a/include/linux/fscache.h
|
||||
+++ b/include/linux/fscache.h
|
||||
@@ -226,6 +226,7 @@ extern void __fscache_readpages_cancel(s
|
||||
extern void __fscache_disable_cookie(struct fscache_cookie *, const void *, bool);
|
||||
extern void __fscache_enable_cookie(struct fscache_cookie *, const void *, loff_t,
|
||||
bool (*)(void *), void *);
|
||||
+extern void fscache_cookie_init(void);
|
||||
|
||||
/**
|
||||
* fscache_register_netfs - Register a filesystem as desiring caching services
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user