From af9f006393b53409be0ca83ae234bef840cdef4a Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 8 Sep 2023 18:22:49 +0200 Subject: locking/rtmutex: Avoid unconditional slowpath for DEBUG_RT_MUTEXES With DEBUG_RT_MUTEXES enabled the fast-path rt_mutex_cmpxchg_acquire() always fails and all lock operations take the slow path. Provide a new helper inline rt_mutex_try_acquire() which maps to rt_mutex_cmpxchg_acquire() in the non-debug case. For the debug case it invokes rt_mutex_slowtrylock() which can acquire a non-contended rtmutex under full debug coverage. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20230908162254.999499-3-bigeasy@linutronix.de --- kernel/locking/rtmutex.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) (limited to 'kernel/locking/rtmutex.c') diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index 21db0df0eb00..bcec0533a0cc 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -218,6 +218,11 @@ static __always_inline bool rt_mutex_cmpxchg_acquire(struct rt_mutex_base *lock, return try_cmpxchg_acquire(&lock->owner, &old, new); } +static __always_inline bool rt_mutex_try_acquire(struct rt_mutex_base *lock) +{ + return rt_mutex_cmpxchg_acquire(lock, NULL, current); +} + static __always_inline bool rt_mutex_cmpxchg_release(struct rt_mutex_base *lock, struct task_struct *old, struct task_struct *new) @@ -297,6 +302,20 @@ static __always_inline bool rt_mutex_cmpxchg_acquire(struct rt_mutex_base *lock, } +static int __sched rt_mutex_slowtrylock(struct rt_mutex_base *lock); + +static __always_inline bool rt_mutex_try_acquire(struct rt_mutex_base *lock) +{ + /* + * With debug enabled rt_mutex_cmpxchg trylock() will always fail. + * + * Avoid unconditionally taking the slow path by using + * rt_mutex_slow_trylock() which is covered by the debug code and can + * acquire a non-contended rtmutex. + */ + return rt_mutex_slowtrylock(lock); +} + static __always_inline bool rt_mutex_cmpxchg_release(struct rt_mutex_base *lock, struct task_struct *old, struct task_struct *new) @@ -1755,7 +1774,7 @@ static int __sched rt_mutex_slowlock(struct rt_mutex_base *lock, static __always_inline int __rt_mutex_lock(struct rt_mutex_base *lock, unsigned int state) { - if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) + if (likely(rt_mutex_try_acquire(lock))) return 0; return rt_mutex_slowlock(lock, NULL, state); -- cgit v1.2.3 From d14f9e930b9073de264c106bf04968286ef9b3a4 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 8 Sep 2023 18:22:52 +0200 Subject: locking/rtmutex: Use rt_mutex specific scheduler helpers Have rt_mutex use the rt_mutex specific scheduler helpers to avoid recursion vs rtlock on the PI state. [[ peterz: adapted to new names ]] Reported-by: Crystal Wood Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20230908162254.999499-6-bigeasy@linutronix.de --- kernel/futex/pi.c | 11 +++++++++++ kernel/locking/rtmutex.c | 14 ++++++++++++-- kernel/locking/rwbase_rt.c | 6 ++++++ kernel/locking/rwsem.c | 8 +++++++- kernel/locking/spinlock_rt.c | 4 ++++ 5 files changed, 40 insertions(+), 3 deletions(-) (limited to 'kernel/locking/rtmutex.c') diff --git a/kernel/futex/pi.c b/kernel/futex/pi.c index ce2889f12375..f8e65b27d9d6 100644 --- a/kernel/futex/pi.c +++ b/kernel/futex/pi.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include +#include #include #include "futex.h" @@ -1002,6 +1003,12 @@ retry_private: goto no_block; } + /* + * Must be done before we enqueue the waiter, here is unfortunately + * under the hb lock, but that *should* work because it does nothing. + */ + rt_mutex_pre_schedule(); + rt_mutex_init_waiter(&rt_waiter); /* @@ -1052,6 +1059,10 @@ cleanup: if (ret && !rt_mutex_cleanup_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter)) ret = 0; + /* + * Waiter is unqueued. + */ + rt_mutex_post_schedule(); no_block: /* * Fixup the pi_state owner and possibly acquire the lock if we diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index bcec0533a0cc..a3fe05dfd0d8 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -1632,7 +1632,7 @@ static int __sched rt_mutex_slowlock_block(struct rt_mutex_base *lock, raw_spin_unlock_irq(&lock->wait_lock); if (!owner || !rtmutex_spin_on_owner(lock, waiter, owner)) - schedule(); + rt_mutex_schedule(); raw_spin_lock_irq(&lock->wait_lock); set_current_state(state); @@ -1661,7 +1661,7 @@ static void __sched rt_mutex_handle_deadlock(int res, int detect_deadlock, WARN(1, "rtmutex deadlock detected\n"); while (1) { set_current_state(TASK_INTERRUPTIBLE); - schedule(); + rt_mutex_schedule(); } } @@ -1756,6 +1756,15 @@ static int __sched rt_mutex_slowlock(struct rt_mutex_base *lock, unsigned long flags; int ret; + /* + * Do all pre-schedule work here, before we queue a waiter and invoke + * PI -- any such work that trips on rtlock (PREEMPT_RT spinlock) would + * otherwise recurse back into task_blocks_on_rt_mutex() through + * rtlock_slowlock() and will then enqueue a second waiter for this + * same task and things get really confusing real fast. + */ + rt_mutex_pre_schedule(); + /* * Technically we could use raw_spin_[un]lock_irq() here, but this can * be called in early boot if the cmpxchg() fast path is disabled @@ -1767,6 +1776,7 @@ static int __sched rt_mutex_slowlock(struct rt_mutex_base *lock, raw_spin_lock_irqsave(&lock->wait_lock, flags); ret = __rt_mutex_slowlock_locked(lock, ww_ctx, state); raw_spin_unlock_irqrestore(&lock->wait_lock, flags); + rt_mutex_post_schedule(); return ret; } diff --git a/kernel/locking/rwbase_rt.c b/kernel/locking/rwbase_rt.c index 25ec0239477c..c7258cb32d91 100644 --- a/kernel/locking/rwbase_rt.c +++ b/kernel/locking/rwbase_rt.c @@ -71,6 +71,7 @@ static int __sched __rwbase_read_lock(struct rwbase_rt *rwb, struct rt_mutex_base *rtm = &rwb->rtmutex; int ret; + rwbase_pre_schedule(); raw_spin_lock_irq(&rtm->wait_lock); /* @@ -125,6 +126,7 @@ static int __sched __rwbase_read_lock(struct rwbase_rt *rwb, rwbase_rtmutex_unlock(rtm); trace_contention_end(rwb, ret); + rwbase_post_schedule(); return ret; } @@ -237,6 +239,8 @@ static int __sched rwbase_write_lock(struct rwbase_rt *rwb, /* Force readers into slow path */ atomic_sub(READER_BIAS, &rwb->readers); + rwbase_pre_schedule(); + raw_spin_lock_irqsave(&rtm->wait_lock, flags); if (__rwbase_write_trylock(rwb)) goto out_unlock; @@ -248,6 +252,7 @@ static int __sched rwbase_write_lock(struct rwbase_rt *rwb, if (rwbase_signal_pending_state(state, current)) { rwbase_restore_current_state(); __rwbase_write_unlock(rwb, 0, flags); + rwbase_post_schedule(); trace_contention_end(rwb, -EINTR); return -EINTR; } @@ -266,6 +271,7 @@ static int __sched rwbase_write_lock(struct rwbase_rt *rwb, out_unlock: raw_spin_unlock_irqrestore(&rtm->wait_lock, flags); + rwbase_post_schedule(); return 0; } diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c index 9eabd585ce7a..2340b6d90ec6 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c @@ -1427,8 +1427,14 @@ static inline void __downgrade_write(struct rw_semaphore *sem) #define rwbase_signal_pending_state(state, current) \ signal_pending_state(state, current) +#define rwbase_pre_schedule() \ + rt_mutex_pre_schedule() + #define rwbase_schedule() \ - schedule() + rt_mutex_schedule() + +#define rwbase_post_schedule() \ + rt_mutex_post_schedule() #include "rwbase_rt.c" diff --git a/kernel/locking/spinlock_rt.c b/kernel/locking/spinlock_rt.c index 48a19ed8486d..842037b2ba54 100644 --- a/kernel/locking/spinlock_rt.c +++ b/kernel/locking/spinlock_rt.c @@ -184,9 +184,13 @@ static __always_inline int rwbase_rtmutex_trylock(struct rt_mutex_base *rtm) #define rwbase_signal_pending_state(state, current) (0) +#define rwbase_pre_schedule() + #define rwbase_schedule() \ schedule_rtlock() +#define rwbase_post_schedule() + #include "rwbase_rt.c" /* * The common functions which get wrapped into the rwlock API. -- cgit v1.2.3 From 45f67f30a22f264bc7a0a61255c2ee1a838e9403 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 8 Sep 2023 18:22:53 +0200 Subject: locking/rtmutex: Add a lockdep assert to catch potential nested blocking There used to be a BUG_ON(current->pi_blocked_on) in the lock acquisition functions, but that vanished in one of the rtmutex overhauls. Bring it back in form of a lockdep assert to catch code paths which take rtmutex based locks with current::pi_blocked_on != NULL. Reported-by: Crystal Wood Signed-off-by: Thomas Gleixner Signed-off-by: "Peter Zijlstra (Intel)" Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20230908162254.999499-7-bigeasy@linutronix.de --- kernel/locking/rtmutex.c | 2 ++ kernel/locking/rwbase_rt.c | 2 ++ kernel/locking/spinlock_rt.c | 2 ++ 3 files changed, 6 insertions(+) (limited to 'kernel/locking/rtmutex.c') diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index a3fe05dfd0d8..4a10e8c16fd2 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -1784,6 +1784,8 @@ static int __sched rt_mutex_slowlock(struct rt_mutex_base *lock, static __always_inline int __rt_mutex_lock(struct rt_mutex_base *lock, unsigned int state) { + lockdep_assert(!current->pi_blocked_on); + if (likely(rt_mutex_try_acquire(lock))) return 0; diff --git a/kernel/locking/rwbase_rt.c b/kernel/locking/rwbase_rt.c index c7258cb32d91..34a59569db6b 100644 --- a/kernel/locking/rwbase_rt.c +++ b/kernel/locking/rwbase_rt.c @@ -133,6 +133,8 @@ static int __sched __rwbase_read_lock(struct rwbase_rt *rwb, static __always_inline int rwbase_read_lock(struct rwbase_rt *rwb, unsigned int state) { + lockdep_assert(!current->pi_blocked_on); + if (rwbase_read_trylock(rwb)) return 0; diff --git a/kernel/locking/spinlock_rt.c b/kernel/locking/spinlock_rt.c index 842037b2ba54..38e292454fcc 100644 --- a/kernel/locking/spinlock_rt.c +++ b/kernel/locking/spinlock_rt.c @@ -37,6 +37,8 @@ static __always_inline void rtlock_lock(struct rt_mutex_base *rtm) { + lockdep_assert(!current->pi_blocked_on); + if (unlikely(!rt_mutex_cmpxchg_acquire(rtm, NULL, current))) rtlock_slowlock(rtm); } -- cgit v1.2.3