mirror of
https://github.com/linuxkit/linuxkit.git
synced 2025-07-20 09:39:08 +00:00
442 lines
12 KiB
Diff
442 lines
12 KiB
Diff
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
|
Date: Thu, 12 Oct 2017 17:34:38 +0200
|
|
Subject: [PATCH 21/22] locking/rtmutex: add ww_mutex addon for mutex-rt
|
|
|
|
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
|
---
|
|
include/linux/mutex.h | 8 -
|
|
include/linux/ww_mutex.h | 8 +
|
|
kernel/locking/rtmutex.c | 262 ++++++++++++++++++++++++++++++++++++++--
|
|
kernel/locking/rtmutex_common.h | 2
|
|
kernel/locking/rwsem-rt.c | 2
|
|
5 files changed, 262 insertions(+), 20 deletions(-)
|
|
|
|
--- a/include/linux/mutex.h
|
|
+++ b/include/linux/mutex.h
|
|
@@ -82,14 +82,6 @@ struct mutex {
|
|
struct ww_class;
|
|
struct ww_acquire_ctx;
|
|
|
|
-struct ww_mutex {
|
|
- struct mutex base;
|
|
- struct ww_acquire_ctx *ctx;
|
|
-#ifdef CONFIG_DEBUG_MUTEXES
|
|
- struct ww_class *ww_class;
|
|
-#endif
|
|
-};
|
|
-
|
|
/*
|
|
* This is the control structure for tasks blocked on mutex,
|
|
* which resides on the blocked task's kernel stack:
|
|
--- a/include/linux/ww_mutex.h
|
|
+++ b/include/linux/ww_mutex.h
|
|
@@ -28,6 +28,14 @@ struct ww_class {
|
|
unsigned int is_wait_die;
|
|
};
|
|
|
|
+struct ww_mutex {
|
|
+ struct mutex base;
|
|
+ struct ww_acquire_ctx *ctx;
|
|
+#ifdef CONFIG_DEBUG_MUTEXES
|
|
+ struct ww_class *ww_class;
|
|
+#endif
|
|
+};
|
|
+
|
|
struct ww_acquire_ctx {
|
|
struct task_struct *task;
|
|
unsigned long stamp;
|
|
--- a/kernel/locking/rtmutex.c
|
|
+++ b/kernel/locking/rtmutex.c
|
|
@@ -24,6 +24,7 @@
|
|
#include <linux/sched/wake_q.h>
|
|
#include <linux/sched/debug.h>
|
|
#include <linux/timer.h>
|
|
+#include <linux/ww_mutex.h>
|
|
|
|
#include "rtmutex_common.h"
|
|
|
|
@@ -1234,6 +1235,40 @@ EXPORT_SYMBOL(__rt_spin_lock_init);
|
|
|
|
#endif /* PREEMPT_RT */
|
|
|
|
+#ifdef CONFIG_PREEMPT_RT
|
|
+ static inline int __sched
|
|
+__mutex_lock_check_stamp(struct rt_mutex *lock, struct ww_acquire_ctx *ctx)
|
|
+{
|
|
+ struct ww_mutex *ww = container_of(lock, struct ww_mutex, base.lock);
|
|
+ struct ww_acquire_ctx *hold_ctx = READ_ONCE(ww->ctx);
|
|
+
|
|
+ if (!hold_ctx)
|
|
+ return 0;
|
|
+
|
|
+ if (unlikely(ctx == hold_ctx))
|
|
+ return -EALREADY;
|
|
+
|
|
+ if (ctx->stamp - hold_ctx->stamp <= LONG_MAX &&
|
|
+ (ctx->stamp != hold_ctx->stamp || ctx > hold_ctx)) {
|
|
+#ifdef CONFIG_DEBUG_MUTEXES
|
|
+ DEBUG_LOCKS_WARN_ON(ctx->contending_lock);
|
|
+ ctx->contending_lock = ww;
|
|
+#endif
|
|
+ return -EDEADLK;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+#else
|
|
+ static inline int __sched
|
|
+__mutex_lock_check_stamp(struct rt_mutex *lock, struct ww_acquire_ctx *ctx)
|
|
+{
|
|
+ BUG();
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+#endif
|
|
+
|
|
static inline int
|
|
try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
|
|
struct rt_mutex_waiter *waiter)
|
|
@@ -1512,7 +1547,8 @@ void rt_mutex_init_waiter(struct rt_mute
|
|
static int __sched
|
|
__rt_mutex_slowlock(struct rt_mutex *lock, int state,
|
|
struct hrtimer_sleeper *timeout,
|
|
- struct rt_mutex_waiter *waiter)
|
|
+ struct rt_mutex_waiter *waiter,
|
|
+ struct ww_acquire_ctx *ww_ctx)
|
|
{
|
|
int ret = 0;
|
|
|
|
@@ -1530,6 +1566,12 @@ static int __sched
|
|
break;
|
|
}
|
|
|
|
+ if (ww_ctx && ww_ctx->acquired > 0) {
|
|
+ ret = __mutex_lock_check_stamp(lock, ww_ctx);
|
|
+ if (ret)
|
|
+ break;
|
|
+ }
|
|
+
|
|
raw_spin_unlock_irq(&lock->wait_lock);
|
|
|
|
schedule();
|
|
@@ -1558,16 +1600,106 @@ static void rt_mutex_handle_deadlock(int
|
|
}
|
|
}
|
|
|
|
+static __always_inline void ww_mutex_lock_acquired(struct ww_mutex *ww,
|
|
+ struct ww_acquire_ctx *ww_ctx)
|
|
+{
|
|
+#ifdef CONFIG_DEBUG_MUTEXES
|
|
+ /*
|
|
+ * If this WARN_ON triggers, you used ww_mutex_lock to acquire,
|
|
+ * but released with a normal mutex_unlock in this call.
|
|
+ *
|
|
+ * This should never happen, always use ww_mutex_unlock.
|
|
+ */
|
|
+ DEBUG_LOCKS_WARN_ON(ww->ctx);
|
|
+
|
|
+ /*
|
|
+ * Not quite done after calling ww_acquire_done() ?
|
|
+ */
|
|
+ DEBUG_LOCKS_WARN_ON(ww_ctx->done_acquire);
|
|
+
|
|
+ if (ww_ctx->contending_lock) {
|
|
+ /*
|
|
+ * After -EDEADLK you tried to
|
|
+ * acquire a different ww_mutex? Bad!
|
|
+ */
|
|
+ DEBUG_LOCKS_WARN_ON(ww_ctx->contending_lock != ww);
|
|
+
|
|
+ /*
|
|
+ * You called ww_mutex_lock after receiving -EDEADLK,
|
|
+ * but 'forgot' to unlock everything else first?
|
|
+ */
|
|
+ DEBUG_LOCKS_WARN_ON(ww_ctx->acquired > 0);
|
|
+ ww_ctx->contending_lock = NULL;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * Naughty, using a different class will lead to undefined behavior!
|
|
+ */
|
|
+ DEBUG_LOCKS_WARN_ON(ww_ctx->ww_class != ww->ww_class);
|
|
+#endif
|
|
+ ww_ctx->acquired++;
|
|
+}
|
|
+
|
|
+#ifdef CONFIG_PREEMPT_RT
|
|
+static void ww_mutex_account_lock(struct rt_mutex *lock,
|
|
+ struct ww_acquire_ctx *ww_ctx)
|
|
+{
|
|
+ struct ww_mutex *ww = container_of(lock, struct ww_mutex, base.lock);
|
|
+ struct rt_mutex_waiter *waiter, *n;
|
|
+
|
|
+ /*
|
|
+ * This branch gets optimized out for the common case,
|
|
+ * and is only important for ww_mutex_lock.
|
|
+ */
|
|
+ ww_mutex_lock_acquired(ww, ww_ctx);
|
|
+ ww->ctx = ww_ctx;
|
|
+
|
|
+ /*
|
|
+ * Give any possible sleeping processes the chance to wake up,
|
|
+ * so they can recheck if they have to back off.
|
|
+ */
|
|
+ rbtree_postorder_for_each_entry_safe(waiter, n, &lock->waiters.rb_root,
|
|
+ tree_entry) {
|
|
+ /* XXX debug rt mutex waiter wakeup */
|
|
+
|
|
+ BUG_ON(waiter->lock != lock);
|
|
+ rt_mutex_wake_waiter(waiter);
|
|
+ }
|
|
+}
|
|
+
|
|
+#else
|
|
+
|
|
+static void ww_mutex_account_lock(struct rt_mutex *lock,
|
|
+ struct ww_acquire_ctx *ww_ctx)
|
|
+{
|
|
+ BUG();
|
|
+}
|
|
+#endif
|
|
+
|
|
int __sched rt_mutex_slowlock_locked(struct rt_mutex *lock, int state,
|
|
struct hrtimer_sleeper *timeout,
|
|
enum rtmutex_chainwalk chwalk,
|
|
+ struct ww_acquire_ctx *ww_ctx,
|
|
struct rt_mutex_waiter *waiter)
|
|
{
|
|
int ret;
|
|
|
|
+#ifdef CONFIG_PREEMPT_RT
|
|
+ if (ww_ctx) {
|
|
+ struct ww_mutex *ww;
|
|
+
|
|
+ ww = container_of(lock, struct ww_mutex, base.lock);
|
|
+ if (unlikely(ww_ctx == READ_ONCE(ww->ctx)))
|
|
+ return -EALREADY;
|
|
+ }
|
|
+#endif
|
|
+
|
|
/* Try to acquire the lock again: */
|
|
- if (try_to_take_rt_mutex(lock, current, NULL))
|
|
+ if (try_to_take_rt_mutex(lock, current, NULL)) {
|
|
+ if (ww_ctx)
|
|
+ ww_mutex_account_lock(lock, ww_ctx);
|
|
return 0;
|
|
+ }
|
|
|
|
set_current_state(state);
|
|
|
|
@@ -1577,14 +1709,24 @@ int __sched rt_mutex_slowlock_locked(str
|
|
|
|
ret = task_blocks_on_rt_mutex(lock, waiter, current, chwalk);
|
|
|
|
- if (likely(!ret))
|
|
+ if (likely(!ret)) {
|
|
/* sleep on the mutex */
|
|
- ret = __rt_mutex_slowlock(lock, state, timeout, waiter);
|
|
+ ret = __rt_mutex_slowlock(lock, state, timeout, waiter,
|
|
+ ww_ctx);
|
|
+ } else if (ww_ctx) {
|
|
+ /* ww_mutex received EDEADLK, let it become EALREADY */
|
|
+ ret = __mutex_lock_check_stamp(lock, ww_ctx);
|
|
+ BUG_ON(!ret);
|
|
+ }
|
|
|
|
if (unlikely(ret)) {
|
|
__set_current_state(TASK_RUNNING);
|
|
remove_waiter(lock, waiter);
|
|
- rt_mutex_handle_deadlock(ret, chwalk, waiter);
|
|
+ /* ww_mutex wants to report EDEADLK/EALREADY, let it */
|
|
+ if (!ww_ctx)
|
|
+ rt_mutex_handle_deadlock(ret, chwalk, waiter);
|
|
+ } else if (ww_ctx) {
|
|
+ ww_mutex_account_lock(lock, ww_ctx);
|
|
}
|
|
|
|
/*
|
|
@@ -1601,7 +1743,8 @@ int __sched rt_mutex_slowlock_locked(str
|
|
static int __sched
|
|
rt_mutex_slowlock(struct rt_mutex *lock, int state,
|
|
struct hrtimer_sleeper *timeout,
|
|
- enum rtmutex_chainwalk chwalk)
|
|
+ enum rtmutex_chainwalk chwalk,
|
|
+ struct ww_acquire_ctx *ww_ctx)
|
|
{
|
|
struct rt_mutex_waiter waiter;
|
|
unsigned long flags;
|
|
@@ -1619,7 +1762,8 @@ rt_mutex_slowlock(struct rt_mutex *lock,
|
|
*/
|
|
raw_spin_lock_irqsave(&lock->wait_lock, flags);
|
|
|
|
- ret = rt_mutex_slowlock_locked(lock, state, timeout, chwalk, &waiter);
|
|
+ ret = rt_mutex_slowlock_locked(lock, state, timeout, chwalk, ww_ctx,
|
|
+ &waiter);
|
|
|
|
raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
|
|
|
|
@@ -1749,14 +1893,16 @@ static bool __sched rt_mutex_slowunlock(
|
|
*/
|
|
static inline int
|
|
rt_mutex_fastlock(struct rt_mutex *lock, int state,
|
|
+ struct ww_acquire_ctx *ww_ctx,
|
|
int (*slowfn)(struct rt_mutex *lock, int state,
|
|
struct hrtimer_sleeper *timeout,
|
|
- enum rtmutex_chainwalk chwalk))
|
|
+ enum rtmutex_chainwalk chwalk,
|
|
+ struct ww_acquire_ctx *ww_ctx))
|
|
{
|
|
if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current)))
|
|
return 0;
|
|
|
|
- return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK);
|
|
+ return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK, ww_ctx);
|
|
}
|
|
|
|
static inline int
|
|
@@ -1801,7 +1947,7 @@ rt_mutex_fastunlock(struct rt_mutex *loc
|
|
int __sched __rt_mutex_lock_state(struct rt_mutex *lock, int state)
|
|
{
|
|
might_sleep();
|
|
- return rt_mutex_fastlock(lock, state, rt_mutex_slowlock);
|
|
+ return rt_mutex_fastlock(lock, state, NULL, rt_mutex_slowlock);
|
|
}
|
|
|
|
/**
|
|
@@ -2245,7 +2391,7 @@ int rt_mutex_wait_proxy_lock(struct rt_m
|
|
raw_spin_lock_irq(&lock->wait_lock);
|
|
/* sleep on the mutex */
|
|
set_current_state(TASK_INTERRUPTIBLE);
|
|
- ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter);
|
|
+ ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter, NULL);
|
|
/*
|
|
* try_to_take_rt_mutex() sets the waiter bit unconditionally. We might
|
|
* have to fix that up.
|
|
@@ -2315,3 +2461,97 @@ bool rt_mutex_cleanup_proxy_lock(struct
|
|
|
|
return cleanup;
|
|
}
|
|
+
|
|
+static inline int
|
|
+ww_mutex_deadlock_injection(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
|
|
+{
|
|
+#ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH
|
|
+ unsigned int tmp;
|
|
+
|
|
+ if (ctx->deadlock_inject_countdown-- == 0) {
|
|
+ tmp = ctx->deadlock_inject_interval;
|
|
+ if (tmp > UINT_MAX/4)
|
|
+ tmp = UINT_MAX;
|
|
+ else
|
|
+ tmp = tmp*2 + tmp + tmp/2;
|
|
+
|
|
+ ctx->deadlock_inject_interval = tmp;
|
|
+ ctx->deadlock_inject_countdown = tmp;
|
|
+ ctx->contending_lock = lock;
|
|
+
|
|
+ ww_mutex_unlock(lock);
|
|
+
|
|
+ return -EDEADLK;
|
|
+ }
|
|
+#endif
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+#ifdef CONFIG_PREEMPT_RT
|
|
+int __sched
|
|
+ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
|
|
+{
|
|
+ int ret;
|
|
+
|
|
+ might_sleep();
|
|
+
|
|
+ mutex_acquire_nest(&lock->base.dep_map, 0, 0,
|
|
+ ctx ? &ctx->dep_map : NULL, _RET_IP_);
|
|
+ ret = rt_mutex_slowlock(&lock->base.lock, TASK_INTERRUPTIBLE, NULL, 0,
|
|
+ ctx);
|
|
+ if (ret)
|
|
+ mutex_release(&lock->base.dep_map, _RET_IP_);
|
|
+ else if (!ret && ctx && ctx->acquired > 1)
|
|
+ return ww_mutex_deadlock_injection(lock, ctx);
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(ww_mutex_lock_interruptible);
|
|
+
|
|
+int __sched
|
|
+ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
|
|
+{
|
|
+ int ret;
|
|
+
|
|
+ might_sleep();
|
|
+
|
|
+ mutex_acquire_nest(&lock->base.dep_map, 0, 0,
|
|
+ ctx ? &ctx->dep_map : NULL, _RET_IP_);
|
|
+ ret = rt_mutex_slowlock(&lock->base.lock, TASK_UNINTERRUPTIBLE, NULL, 0,
|
|
+ ctx);
|
|
+ if (ret)
|
|
+ mutex_release(&lock->base.dep_map, _RET_IP_);
|
|
+ else if (!ret && ctx && ctx->acquired > 1)
|
|
+ return ww_mutex_deadlock_injection(lock, ctx);
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(ww_mutex_lock);
|
|
+
|
|
+void __sched ww_mutex_unlock(struct ww_mutex *lock)
|
|
+{
|
|
+ /*
|
|
+ * The unlocking fastpath is the 0->1 transition from 'locked'
|
|
+ * into 'unlocked' state:
|
|
+ */
|
|
+ if (lock->ctx) {
|
|
+#ifdef CONFIG_DEBUG_MUTEXES
|
|
+ DEBUG_LOCKS_WARN_ON(!lock->ctx->acquired);
|
|
+#endif
|
|
+ if (lock->ctx->acquired > 0)
|
|
+ lock->ctx->acquired--;
|
|
+ lock->ctx = NULL;
|
|
+ }
|
|
+
|
|
+ mutex_release(&lock->base.dep_map, _RET_IP_);
|
|
+ __rt_mutex_unlock(&lock->base.lock);
|
|
+}
|
|
+EXPORT_SYMBOL(ww_mutex_unlock);
|
|
+
|
|
+int __rt_mutex_owner_current(struct rt_mutex *lock)
|
|
+{
|
|
+ return rt_mutex_owner(lock) == current;
|
|
+}
|
|
+EXPORT_SYMBOL(__rt_mutex_owner_current);
|
|
+#endif
|
|
--- a/kernel/locking/rtmutex_common.h
|
|
+++ b/kernel/locking/rtmutex_common.h
|
|
@@ -159,6 +159,7 @@ extern void rt_mutex_postunlock(struct w
|
|
struct wake_q_head *wake_sleeper_q);
|
|
|
|
/* RW semaphore special interface */
|
|
+struct ww_acquire_ctx;
|
|
|
|
extern int __rt_mutex_lock_state(struct rt_mutex *lock, int state);
|
|
extern int __rt_mutex_trylock(struct rt_mutex *lock);
|
|
@@ -166,6 +167,7 @@ extern void __rt_mutex_unlock(struct rt_
|
|
int __sched rt_mutex_slowlock_locked(struct rt_mutex *lock, int state,
|
|
struct hrtimer_sleeper *timeout,
|
|
enum rtmutex_chainwalk chwalk,
|
|
+ struct ww_acquire_ctx *ww_ctx,
|
|
struct rt_mutex_waiter *waiter);
|
|
void __sched rt_spin_lock_slowlock_locked(struct rt_mutex *lock,
|
|
struct rt_mutex_waiter *waiter,
|
|
--- a/kernel/locking/rwsem-rt.c
|
|
+++ b/kernel/locking/rwsem-rt.c
|
|
@@ -138,7 +138,7 @@ static int __sched __down_read_common(st
|
|
*/
|
|
rt_mutex_init_waiter(&waiter, false);
|
|
ret = rt_mutex_slowlock_locked(m, state, NULL, RT_MUTEX_MIN_CHAINWALK,
|
|
- &waiter);
|
|
+ NULL, &waiter);
|
|
/*
|
|
* The slowlock() above is guaranteed to return with the rtmutex (for
|
|
* ret = 0) is now held, so there can't be a writer active. Increment
|