diff options
Diffstat (limited to 'kernel/locking')
| -rw-r--r-- | kernel/locking/mutex.c | 65 | ||||
| -rw-r--r-- | kernel/locking/percpu-rwsem.c | 29 | ||||
| -rw-r--r-- | kernel/locking/rtmutex.c | 8 | ||||
| -rw-r--r-- | kernel/locking/rtmutex_api.c | 33 | ||||
| -rw-r--r-- | kernel/locking/rwbase_rt.c | 6 | ||||
| -rw-r--r-- | kernel/locking/rwsem.c | 10 | ||||
| -rw-r--r-- | kernel/locking/semaphore.c | 4 | ||||
| -rw-r--r-- | kernel/locking/ww_mutex.h | 4 |
8 files changed, 144 insertions, 15 deletions
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index 09534628dc01..8a85912d7ee6 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -763,6 +763,7 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas raw_spin_lock_irqsave(&lock->wait_lock, flags); raw_spin_lock(¤t->blocked_lock); __set_task_blocked_on(current, lock); + set_current_state(state); if (opt_acquired) break; @@ -980,9 +981,8 @@ EXPORT_SYMBOL_GPL(ww_mutex_lock_interruptible); static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigned long ip) __releases(lock) { - struct task_struct *next = NULL; + struct task_struct *donor, *next = NULL; struct mutex_waiter *waiter; - DEFINE_WAKE_Q(wake_q); unsigned long owner; unsigned long flags; @@ -990,6 +990,14 @@ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne __release(lock); /* + * Ensures the proxy donor stack is stable across unlock and handoff. + * Specifically, it avoids the case where current->blocked_donor is + * NULL when it is inspected while doing the unlock, but a preemption + * before taking the wake_lock would make it set and a hand-off is + * missed. + */ + guard(preempt)(); + /* * Release the lock before (potentially) taking the spinlock such that * other contenders can get on with things ASAP. * @@ -1001,6 +1009,12 @@ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne MUTEX_WARN_ON(__owner_task(owner) != current); MUTEX_WARN_ON(owner & MUTEX_FLAG_PICKUP); + if (sched_proxy_exec() && current->blocked_donor) { + /* force handoff if we have a blocked_donor */ + owner = MUTEX_FLAG_HANDOFF; + break; + } + if (owner & MUTEX_FLAG_HANDOFF) break; @@ -1013,20 +1027,56 @@ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne } raw_spin_lock_irqsave(&lock->wait_lock, flags); + raw_spin_lock(¤t->blocked_lock); debug_mutex_unlock(lock); + + if (sched_proxy_exec()) { + /* + * If we have a task boosting current, and that task was boosting + * current through this lock, hand the lock to that task, as that + * is the highest waiter, as selected by the scheduling function. + */ + donor = current->blocked_donor; + if (donor) { + struct mutex *next_lock; + + raw_spin_lock_nested(&donor->blocked_lock, SINGLE_DEPTH_NESTING); + next_lock = __get_task_blocked_on(donor); + if (next_lock == lock) { + next = get_task_struct(donor); + __clear_task_blocked_on(next, lock); + current->blocked_donor = NULL; + } + raw_spin_unlock(&donor->blocked_lock); + } + } + + /* + * Failing that, pick first on the wait list. + */ waiter = lock->first_waiter; - if (waiter) { - next = waiter->task; + if (!next && waiter) { + next = get_task_struct(waiter->task); + raw_spin_lock_nested(&next->blocked_lock, SINGLE_DEPTH_NESTING); debug_mutex_wake_waiter(lock, waiter); - set_task_blocked_on_waking(next, lock); - wake_q_add(&wake_q, next); + __clear_task_blocked_on(next, lock); + raw_spin_unlock(&next->blocked_lock); + } + if (trace_contended_release_enabled() && waiter) + trace_call__contended_release(lock); + if (owner & MUTEX_FLAG_HANDOFF) __mutex_handoff(lock, next); - raw_spin_unlock_irqrestore_wake(&lock->wait_lock, flags, &wake_q); + raw_spin_unlock(¤t->blocked_lock); + raw_spin_unlock_irqrestore(&lock->wait_lock, flags); + if (next) { + wake_up_process(next); + put_task_struct(next); + } } #ifndef CONFIG_DEBUG_LOCK_ALLOC @@ -1220,6 +1270,7 @@ EXPORT_SYMBOL(ww_mutex_lock_interruptible); EXPORT_TRACEPOINT_SYMBOL_GPL(contention_begin); EXPORT_TRACEPOINT_SYMBOL_GPL(contention_end); +EXPORT_TRACEPOINT_SYMBOL_GPL(contended_release); /** * atomic_dec_and_mutex_lock - return holding mutex if we dec to 0 diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c index ef234469baac..f7e152c40d6d 100644 --- a/kernel/locking/percpu-rwsem.c +++ b/kernel/locking/percpu-rwsem.c @@ -263,6 +263,9 @@ void percpu_up_write(struct percpu_rw_semaphore *sem) { rwsem_release(&sem->dep_map, _RET_IP_); + if (trace_contended_release_enabled() && wq_has_sleeper(&sem->waiters)) + trace_call__contended_release(sem); + /* * Signal the writer is done, no fast path yet. * @@ -288,3 +291,29 @@ void percpu_up_write(struct percpu_rw_semaphore *sem) rcu_sync_exit(&sem->rss); } EXPORT_SYMBOL_GPL(percpu_up_write); + +void __percpu_up_read(struct percpu_rw_semaphore *sem) +{ + lockdep_assert_preemption_disabled(); + /* + * After percpu_up_write() completes, rcu_sync_is_idle() can still + * return false during the grace period, forcing readers into this + * slowpath. Only trace when a writer is actually waiting for + * readers to drain. + */ + if (trace_contended_release_enabled() && rcuwait_active(&sem->writer)) + trace_call__contended_release(sem); + /* + * slowpath; reader will only ever wake a single blocked + * writer. + */ + smp_mb(); /* B matches C */ + /* + * In other words, if they see our decrement (presumably to + * aggregate zero, as that is the only time it matters) they + * will also see our critical section. + */ + this_cpu_dec(*sem->read_count); + rcuwait_wake_up(&sem->writer); +} +EXPORT_SYMBOL_GPL(__percpu_up_read); diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index 4f386ea6c792..4728631ae719 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -484,6 +484,7 @@ static __always_inline bool __waiter_less(struct rb_node *a, const struct rb_nod static __always_inline void rt_mutex_enqueue(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter) + __must_hold(&lock->wait_lock) { lockdep_assert_held(&lock->wait_lock); @@ -492,6 +493,7 @@ rt_mutex_enqueue(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter) static __always_inline void rt_mutex_dequeue(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter) + __must_hold(&lock->wait_lock) { lockdep_assert_held(&lock->wait_lock); @@ -1092,6 +1094,7 @@ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task, static int __sched try_to_take_rt_mutex(struct rt_mutex_base *lock, struct task_struct *task, struct rt_mutex_waiter *waiter) + __must_hold(&lock->wait_lock) { lockdep_assert_held(&lock->wait_lock); @@ -1319,6 +1322,7 @@ static int __sched task_blocks_on_rt_mutex(struct rt_mutex_base *lock, */ static void __sched mark_wakeup_next_waiter(struct rt_wake_q_head *wqh, struct rt_mutex_base *lock) + __must_hold(&lock->wait_lock) { struct rt_mutex_waiter *waiter; @@ -1466,6 +1470,7 @@ static void __sched rt_mutex_slowunlock(struct rt_mutex_base *lock) raw_spin_lock_irqsave(&lock->wait_lock, flags); } + trace_contended_release(lock); /* * The wakeup next waiter path does not suffer from the above * race. See the comments there. @@ -1558,6 +1563,9 @@ static void __sched remove_waiter(struct rt_mutex_base *lock, lockdep_assert_held(&lock->wait_lock); + if (!waiter_task) /* never enqueued */ + return; + scoped_guard(raw_spinlock, &waiter_task->pi_lock) { rt_mutex_dequeue(lock, waiter); waiter_task->pi_blocked_on = NULL; diff --git a/kernel/locking/rtmutex_api.c b/kernel/locking/rtmutex_api.c index 124219aea46e..5d48d64725b1 100644 --- a/kernel/locking/rtmutex_api.c +++ b/kernel/locking/rtmutex_api.c @@ -41,6 +41,7 @@ static __always_inline int __rt_mutex_lock_common(struct rt_mutex *lock, unsigned int state, struct lockdep_map *nest_lock, unsigned int subclass) + __cond_acquires(0, lock) { int ret; @@ -67,13 +68,27 @@ EXPORT_SYMBOL(rt_mutex_base_init); */ void __sched rt_mutex_lock_nested(struct rt_mutex *lock, unsigned int subclass) { - __rt_mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, NULL, subclass); + if (__rt_mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, NULL, subclass) == 0) + return; + /* + * The code below is never reached because __rt_mutex_lock_common() only + * returns an error code if interrupted by a signal or upon a timeout. + */ + WARN_ON_ONCE(true); + __acquire(lock); } EXPORT_SYMBOL_GPL(rt_mutex_lock_nested); void __sched _rt_mutex_lock_nest_lock(struct rt_mutex *lock, struct lockdep_map *nest_lock) { - __rt_mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, nest_lock, 0); + if (__rt_mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, nest_lock, 0) == 0) + return; + /* + * The code below is never reached because __rt_mutex_lock_common() only + * returns an error code if interrupted by a signal or upon a timeout. + */ + WARN_ON_ONCE(true); + __acquire(lock); } EXPORT_SYMBOL_GPL(_rt_mutex_lock_nest_lock); @@ -86,7 +101,14 @@ EXPORT_SYMBOL_GPL(_rt_mutex_lock_nest_lock); */ void __sched rt_mutex_lock(struct rt_mutex *lock) { - __rt_mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, NULL, 0); + if (__rt_mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, NULL, 0) == 0) + return; + /* + * The code below is never reached because __rt_mutex_lock_common() only + * returns an error code if interrupted by a signal or upon a timeout. + */ + WARN_ON_ONCE(true); + __acquire(lock); } EXPORT_SYMBOL_GPL(rt_mutex_lock); #endif @@ -157,6 +179,7 @@ void __sched rt_mutex_unlock(struct rt_mutex *lock) { mutex_release(&lock->dep_map, _RET_IP_); __rt_mutex_unlock(&lock->rtmutex); + __release(lock); } EXPORT_SYMBOL_GPL(rt_mutex_unlock); @@ -182,6 +205,7 @@ int __sched __rt_mutex_futex_trylock(struct rt_mutex_base *lock) */ bool __sched __rt_mutex_futex_unlock(struct rt_mutex_base *lock, struct rt_wake_q_head *wqh) + __must_hold(&lock->wait_lock) { lockdep_assert_held(&lock->wait_lock); @@ -312,6 +336,7 @@ int __sched __rt_mutex_start_proxy_lock(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter, struct task_struct *task, struct wake_q_head *wake_q) + __must_hold(&lock->wait_lock) { int ret; @@ -365,7 +390,7 @@ int __sched rt_mutex_start_proxy_lock(struct rt_mutex_base *lock, raw_spin_lock_irq(&lock->wait_lock); ret = __rt_mutex_start_proxy_lock(lock, waiter, task, &wake_q); - if (unlikely(ret)) + if (unlikely(ret < 0)) remove_waiter(lock, waiter); preempt_disable(); raw_spin_unlock_irq(&lock->wait_lock); diff --git a/kernel/locking/rwbase_rt.c b/kernel/locking/rwbase_rt.c index 82e078c0665a..2835c9ef9b3f 100644 --- a/kernel/locking/rwbase_rt.c +++ b/kernel/locking/rwbase_rt.c @@ -174,6 +174,8 @@ static void __sched __rwbase_read_unlock(struct rwbase_rt *rwb, static __always_inline void rwbase_read_unlock(struct rwbase_rt *rwb, unsigned int state) { + if (trace_contended_release_enabled() && rt_mutex_owner(&rwb->rtmutex)) + trace_call__contended_release(rwb); /* * rwb->readers can only hit 0 when a writer is waiting for the * active readers to leave the critical section. @@ -205,6 +207,8 @@ static inline void rwbase_write_unlock(struct rwbase_rt *rwb) unsigned long flags; raw_spin_lock_irqsave(&rtm->wait_lock, flags); + if (trace_contended_release_enabled() && rt_mutex_has_waiters(rtm)) + trace_call__contended_release(rwb); __rwbase_write_unlock(rwb, WRITER_BIAS, flags); } @@ -214,6 +218,8 @@ static inline void rwbase_write_downgrade(struct rwbase_rt *rwb) unsigned long flags; raw_spin_lock_irqsave(&rtm->wait_lock, flags); + if (trace_contended_release_enabled() && rt_mutex_has_waiters(rtm)) + trace_call__contended_release(rwb); /* Release it and account current as reader */ __rwbase_write_unlock(rwb, WRITER_BIAS - 1, flags); } diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c index bf647097369c..b9c180ac1eee 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c @@ -1387,6 +1387,8 @@ static inline void __up_read(struct rw_semaphore *sem) rwsem_clear_reader_owned(sem); tmp = atomic_long_add_return_release(-RWSEM_READER_BIAS, &sem->count); DEBUG_RWSEMS_WARN_ON(tmp < 0, sem); + if (trace_contended_release_enabled() && (tmp & RWSEM_FLAG_WAITERS)) + trace_call__contended_release(sem); if (unlikely((tmp & (RWSEM_LOCK_MASK|RWSEM_FLAG_WAITERS)) == RWSEM_FLAG_WAITERS)) { clear_nonspinnable(sem); @@ -1413,8 +1415,10 @@ static inline void __up_write(struct rw_semaphore *sem) preempt_disable(); rwsem_clear_owner(sem); tmp = atomic_long_fetch_add_release(-RWSEM_WRITER_LOCKED, &sem->count); - if (unlikely(tmp & RWSEM_FLAG_WAITERS)) + if (unlikely(tmp & RWSEM_FLAG_WAITERS)) { + trace_contended_release(sem); rwsem_wake(sem); + } preempt_enable(); } @@ -1437,8 +1441,10 @@ static inline void __downgrade_write(struct rw_semaphore *sem) tmp = atomic_long_fetch_add_release( -RWSEM_WRITER_LOCKED+RWSEM_READER_BIAS, &sem->count); rwsem_set_reader_owned(sem); - if (tmp & RWSEM_FLAG_WAITERS) + if (tmp & RWSEM_FLAG_WAITERS) { + trace_contended_release(sem); rwsem_downgrade_wake(sem); + } preempt_enable(); } diff --git a/kernel/locking/semaphore.c b/kernel/locking/semaphore.c index 74d41433ba13..233730c25933 100644 --- a/kernel/locking/semaphore.c +++ b/kernel/locking/semaphore.c @@ -230,6 +230,10 @@ void __sched up(struct semaphore *sem) sem->count++; else __up(sem, &wake_q); + + if (trace_contended_release_enabled() && !wake_q_empty(&wake_q)) + trace_call__contended_release(sem); + raw_spin_unlock_irqrestore(&sem->lock, flags); if (!wake_q_empty(&wake_q)) wake_up_q(&wake_q); diff --git a/kernel/locking/ww_mutex.h b/kernel/locking/ww_mutex.h index 6c12452097e1..d62b49b53ec3 100644 --- a/kernel/locking/ww_mutex.h +++ b/kernel/locking/ww_mutex.h @@ -324,7 +324,7 @@ __ww_mutex_die(struct MUTEX *lock, struct MUTEX_WAITER *waiter, * blocked_on to PROXY_WAKING. Otherwise we can see * circular blocked_on relationships that can't resolve. */ - set_task_blocked_on_waking(waiter->task, lock); + clear_task_blocked_on(waiter->task, lock); wake_q_add(wake_q, waiter->task); } @@ -383,7 +383,7 @@ static bool __ww_mutex_wound(struct MUTEX *lock, * are waking the mutex owner, who may be currently * blocked on a different mutex. */ - set_task_blocked_on_waking(owner, NULL); + clear_task_blocked_on(owner, NULL); wake_q_add(wake_q, owner); } return true; |
