summaryrefslogtreecommitdiff
path: root/kernel/locking
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/locking')
-rw-r--r--kernel/locking/mutex.c65
-rw-r--r--kernel/locking/percpu-rwsem.c29
-rw-r--r--kernel/locking/rtmutex.c8
-rw-r--r--kernel/locking/rtmutex_api.c33
-rw-r--r--kernel/locking/rwbase_rt.c6
-rw-r--r--kernel/locking/rwsem.c10
-rw-r--r--kernel/locking/semaphore.c4
-rw-r--r--kernel/locking/ww_mutex.h4
8 files changed, 144 insertions, 15 deletions
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index 09534628dc01..8a85912d7ee6 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -763,6 +763,7 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
raw_spin_lock_irqsave(&lock->wait_lock, flags);
raw_spin_lock(&current->blocked_lock);
__set_task_blocked_on(current, lock);
+ set_current_state(state);
if (opt_acquired)
break;
@@ -980,9 +981,8 @@ EXPORT_SYMBOL_GPL(ww_mutex_lock_interruptible);
static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigned long ip)
__releases(lock)
{
- struct task_struct *next = NULL;
+ struct task_struct *donor, *next = NULL;
struct mutex_waiter *waiter;
- DEFINE_WAKE_Q(wake_q);
unsigned long owner;
unsigned long flags;
@@ -990,6 +990,14 @@ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne
__release(lock);
/*
+ * Ensures the proxy donor stack is stable across unlock and handoff.
+ * Specifically, it avoids the case where current->blocked_donor is
+ * NULL when it is inspected while doing the unlock, but a preemption
+ * before taking the wake_lock would make it set and a hand-off is
+ * missed.
+ */
+ guard(preempt)();
+ /*
* Release the lock before (potentially) taking the spinlock such that
* other contenders can get on with things ASAP.
*
@@ -1001,6 +1009,12 @@ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne
MUTEX_WARN_ON(__owner_task(owner) != current);
MUTEX_WARN_ON(owner & MUTEX_FLAG_PICKUP);
+ if (sched_proxy_exec() && current->blocked_donor) {
+ /* force handoff if we have a blocked_donor */
+ owner = MUTEX_FLAG_HANDOFF;
+ break;
+ }
+
if (owner & MUTEX_FLAG_HANDOFF)
break;
@@ -1013,20 +1027,56 @@ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne
}
raw_spin_lock_irqsave(&lock->wait_lock, flags);
+ raw_spin_lock(&current->blocked_lock);
debug_mutex_unlock(lock);
+
+ if (sched_proxy_exec()) {
+ /*
+ * If we have a task boosting current, and that task was boosting
+ * current through this lock, hand the lock to that task, as that
+ * is the highest waiter, as selected by the scheduling function.
+ */
+ donor = current->blocked_donor;
+ if (donor) {
+ struct mutex *next_lock;
+
+ raw_spin_lock_nested(&donor->blocked_lock, SINGLE_DEPTH_NESTING);
+ next_lock = __get_task_blocked_on(donor);
+ if (next_lock == lock) {
+ next = get_task_struct(donor);
+ __clear_task_blocked_on(next, lock);
+ current->blocked_donor = NULL;
+ }
+ raw_spin_unlock(&donor->blocked_lock);
+ }
+ }
+
+ /*
+ * Failing that, pick first on the wait list.
+ */
waiter = lock->first_waiter;
- if (waiter) {
- next = waiter->task;
+ if (!next && waiter) {
+ next = get_task_struct(waiter->task);
+ raw_spin_lock_nested(&next->blocked_lock, SINGLE_DEPTH_NESTING);
debug_mutex_wake_waiter(lock, waiter);
- set_task_blocked_on_waking(next, lock);
- wake_q_add(&wake_q, next);
+ __clear_task_blocked_on(next, lock);
+ raw_spin_unlock(&next->blocked_lock);
+
}
+ if (trace_contended_release_enabled() && waiter)
+ trace_call__contended_release(lock);
+
if (owner & MUTEX_FLAG_HANDOFF)
__mutex_handoff(lock, next);
- raw_spin_unlock_irqrestore_wake(&lock->wait_lock, flags, &wake_q);
+ raw_spin_unlock(&current->blocked_lock);
+ raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
+ if (next) {
+ wake_up_process(next);
+ put_task_struct(next);
+ }
}
#ifndef CONFIG_DEBUG_LOCK_ALLOC
@@ -1220,6 +1270,7 @@ EXPORT_SYMBOL(ww_mutex_lock_interruptible);
EXPORT_TRACEPOINT_SYMBOL_GPL(contention_begin);
EXPORT_TRACEPOINT_SYMBOL_GPL(contention_end);
+EXPORT_TRACEPOINT_SYMBOL_GPL(contended_release);
/**
* atomic_dec_and_mutex_lock - return holding mutex if we dec to 0
diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c
index ef234469baac..f7e152c40d6d 100644
--- a/kernel/locking/percpu-rwsem.c
+++ b/kernel/locking/percpu-rwsem.c
@@ -263,6 +263,9 @@ void percpu_up_write(struct percpu_rw_semaphore *sem)
{
rwsem_release(&sem->dep_map, _RET_IP_);
+ if (trace_contended_release_enabled() && wq_has_sleeper(&sem->waiters))
+ trace_call__contended_release(sem);
+
/*
* Signal the writer is done, no fast path yet.
*
@@ -288,3 +291,29 @@ void percpu_up_write(struct percpu_rw_semaphore *sem)
rcu_sync_exit(&sem->rss);
}
EXPORT_SYMBOL_GPL(percpu_up_write);
+
+void __percpu_up_read(struct percpu_rw_semaphore *sem)
+{
+ lockdep_assert_preemption_disabled();
+ /*
+ * After percpu_up_write() completes, rcu_sync_is_idle() can still
+ * return false during the grace period, forcing readers into this
+ * slowpath. Only trace when a writer is actually waiting for
+ * readers to drain.
+ */
+ if (trace_contended_release_enabled() && rcuwait_active(&sem->writer))
+ trace_call__contended_release(sem);
+ /*
+ * slowpath; reader will only ever wake a single blocked
+ * writer.
+ */
+ smp_mb(); /* B matches C */
+ /*
+ * In other words, if they see our decrement (presumably to
+ * aggregate zero, as that is the only time it matters) they
+ * will also see our critical section.
+ */
+ this_cpu_dec(*sem->read_count);
+ rcuwait_wake_up(&sem->writer);
+}
+EXPORT_SYMBOL_GPL(__percpu_up_read);
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 4f386ea6c792..4728631ae719 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -484,6 +484,7 @@ static __always_inline bool __waiter_less(struct rb_node *a, const struct rb_nod
static __always_inline void
rt_mutex_enqueue(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter)
+ __must_hold(&lock->wait_lock)
{
lockdep_assert_held(&lock->wait_lock);
@@ -492,6 +493,7 @@ rt_mutex_enqueue(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter)
static __always_inline void
rt_mutex_dequeue(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter)
+ __must_hold(&lock->wait_lock)
{
lockdep_assert_held(&lock->wait_lock);
@@ -1092,6 +1094,7 @@ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task,
static int __sched
try_to_take_rt_mutex(struct rt_mutex_base *lock, struct task_struct *task,
struct rt_mutex_waiter *waiter)
+ __must_hold(&lock->wait_lock)
{
lockdep_assert_held(&lock->wait_lock);
@@ -1319,6 +1322,7 @@ static int __sched task_blocks_on_rt_mutex(struct rt_mutex_base *lock,
*/
static void __sched mark_wakeup_next_waiter(struct rt_wake_q_head *wqh,
struct rt_mutex_base *lock)
+ __must_hold(&lock->wait_lock)
{
struct rt_mutex_waiter *waiter;
@@ -1466,6 +1470,7 @@ static void __sched rt_mutex_slowunlock(struct rt_mutex_base *lock)
raw_spin_lock_irqsave(&lock->wait_lock, flags);
}
+ trace_contended_release(lock);
/*
* The wakeup next waiter path does not suffer from the above
* race. See the comments there.
@@ -1558,6 +1563,9 @@ static void __sched remove_waiter(struct rt_mutex_base *lock,
lockdep_assert_held(&lock->wait_lock);
+ if (!waiter_task) /* never enqueued */
+ return;
+
scoped_guard(raw_spinlock, &waiter_task->pi_lock) {
rt_mutex_dequeue(lock, waiter);
waiter_task->pi_blocked_on = NULL;
diff --git a/kernel/locking/rtmutex_api.c b/kernel/locking/rtmutex_api.c
index 124219aea46e..5d48d64725b1 100644
--- a/kernel/locking/rtmutex_api.c
+++ b/kernel/locking/rtmutex_api.c
@@ -41,6 +41,7 @@ static __always_inline int __rt_mutex_lock_common(struct rt_mutex *lock,
unsigned int state,
struct lockdep_map *nest_lock,
unsigned int subclass)
+ __cond_acquires(0, lock)
{
int ret;
@@ -67,13 +68,27 @@ EXPORT_SYMBOL(rt_mutex_base_init);
*/
void __sched rt_mutex_lock_nested(struct rt_mutex *lock, unsigned int subclass)
{
- __rt_mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, NULL, subclass);
+ if (__rt_mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, NULL, subclass) == 0)
+ return;
+ /*
+ * The code below is never reached because __rt_mutex_lock_common() only
+ * returns an error code if interrupted by a signal or upon a timeout.
+ */
+ WARN_ON_ONCE(true);
+ __acquire(lock);
}
EXPORT_SYMBOL_GPL(rt_mutex_lock_nested);
void __sched _rt_mutex_lock_nest_lock(struct rt_mutex *lock, struct lockdep_map *nest_lock)
{
- __rt_mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, nest_lock, 0);
+ if (__rt_mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, nest_lock, 0) == 0)
+ return;
+ /*
+ * The code below is never reached because __rt_mutex_lock_common() only
+ * returns an error code if interrupted by a signal or upon a timeout.
+ */
+ WARN_ON_ONCE(true);
+ __acquire(lock);
}
EXPORT_SYMBOL_GPL(_rt_mutex_lock_nest_lock);
@@ -86,7 +101,14 @@ EXPORT_SYMBOL_GPL(_rt_mutex_lock_nest_lock);
*/
void __sched rt_mutex_lock(struct rt_mutex *lock)
{
- __rt_mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, NULL, 0);
+ if (__rt_mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, NULL, 0) == 0)
+ return;
+ /*
+ * The code below is never reached because __rt_mutex_lock_common() only
+ * returns an error code if interrupted by a signal or upon a timeout.
+ */
+ WARN_ON_ONCE(true);
+ __acquire(lock);
}
EXPORT_SYMBOL_GPL(rt_mutex_lock);
#endif
@@ -157,6 +179,7 @@ void __sched rt_mutex_unlock(struct rt_mutex *lock)
{
mutex_release(&lock->dep_map, _RET_IP_);
__rt_mutex_unlock(&lock->rtmutex);
+ __release(lock);
}
EXPORT_SYMBOL_GPL(rt_mutex_unlock);
@@ -182,6 +205,7 @@ int __sched __rt_mutex_futex_trylock(struct rt_mutex_base *lock)
*/
bool __sched __rt_mutex_futex_unlock(struct rt_mutex_base *lock,
struct rt_wake_q_head *wqh)
+ __must_hold(&lock->wait_lock)
{
lockdep_assert_held(&lock->wait_lock);
@@ -312,6 +336,7 @@ int __sched __rt_mutex_start_proxy_lock(struct rt_mutex_base *lock,
struct rt_mutex_waiter *waiter,
struct task_struct *task,
struct wake_q_head *wake_q)
+ __must_hold(&lock->wait_lock)
{
int ret;
@@ -365,7 +390,7 @@ int __sched rt_mutex_start_proxy_lock(struct rt_mutex_base *lock,
raw_spin_lock_irq(&lock->wait_lock);
ret = __rt_mutex_start_proxy_lock(lock, waiter, task, &wake_q);
- if (unlikely(ret))
+ if (unlikely(ret < 0))
remove_waiter(lock, waiter);
preempt_disable();
raw_spin_unlock_irq(&lock->wait_lock);
diff --git a/kernel/locking/rwbase_rt.c b/kernel/locking/rwbase_rt.c
index 82e078c0665a..2835c9ef9b3f 100644
--- a/kernel/locking/rwbase_rt.c
+++ b/kernel/locking/rwbase_rt.c
@@ -174,6 +174,8 @@ static void __sched __rwbase_read_unlock(struct rwbase_rt *rwb,
static __always_inline void rwbase_read_unlock(struct rwbase_rt *rwb,
unsigned int state)
{
+ if (trace_contended_release_enabled() && rt_mutex_owner(&rwb->rtmutex))
+ trace_call__contended_release(rwb);
/*
* rwb->readers can only hit 0 when a writer is waiting for the
* active readers to leave the critical section.
@@ -205,6 +207,8 @@ static inline void rwbase_write_unlock(struct rwbase_rt *rwb)
unsigned long flags;
raw_spin_lock_irqsave(&rtm->wait_lock, flags);
+ if (trace_contended_release_enabled() && rt_mutex_has_waiters(rtm))
+ trace_call__contended_release(rwb);
__rwbase_write_unlock(rwb, WRITER_BIAS, flags);
}
@@ -214,6 +218,8 @@ static inline void rwbase_write_downgrade(struct rwbase_rt *rwb)
unsigned long flags;
raw_spin_lock_irqsave(&rtm->wait_lock, flags);
+ if (trace_contended_release_enabled() && rt_mutex_has_waiters(rtm))
+ trace_call__contended_release(rwb);
/* Release it and account current as reader */
__rwbase_write_unlock(rwb, WRITER_BIAS - 1, flags);
}
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index bf647097369c..b9c180ac1eee 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -1387,6 +1387,8 @@ static inline void __up_read(struct rw_semaphore *sem)
rwsem_clear_reader_owned(sem);
tmp = atomic_long_add_return_release(-RWSEM_READER_BIAS, &sem->count);
DEBUG_RWSEMS_WARN_ON(tmp < 0, sem);
+ if (trace_contended_release_enabled() && (tmp & RWSEM_FLAG_WAITERS))
+ trace_call__contended_release(sem);
if (unlikely((tmp & (RWSEM_LOCK_MASK|RWSEM_FLAG_WAITERS)) ==
RWSEM_FLAG_WAITERS)) {
clear_nonspinnable(sem);
@@ -1413,8 +1415,10 @@ static inline void __up_write(struct rw_semaphore *sem)
preempt_disable();
rwsem_clear_owner(sem);
tmp = atomic_long_fetch_add_release(-RWSEM_WRITER_LOCKED, &sem->count);
- if (unlikely(tmp & RWSEM_FLAG_WAITERS))
+ if (unlikely(tmp & RWSEM_FLAG_WAITERS)) {
+ trace_contended_release(sem);
rwsem_wake(sem);
+ }
preempt_enable();
}
@@ -1437,8 +1441,10 @@ static inline void __downgrade_write(struct rw_semaphore *sem)
tmp = atomic_long_fetch_add_release(
-RWSEM_WRITER_LOCKED+RWSEM_READER_BIAS, &sem->count);
rwsem_set_reader_owned(sem);
- if (tmp & RWSEM_FLAG_WAITERS)
+ if (tmp & RWSEM_FLAG_WAITERS) {
+ trace_contended_release(sem);
rwsem_downgrade_wake(sem);
+ }
preempt_enable();
}
diff --git a/kernel/locking/semaphore.c b/kernel/locking/semaphore.c
index 74d41433ba13..233730c25933 100644
--- a/kernel/locking/semaphore.c
+++ b/kernel/locking/semaphore.c
@@ -230,6 +230,10 @@ void __sched up(struct semaphore *sem)
sem->count++;
else
__up(sem, &wake_q);
+
+ if (trace_contended_release_enabled() && !wake_q_empty(&wake_q))
+ trace_call__contended_release(sem);
+
raw_spin_unlock_irqrestore(&sem->lock, flags);
if (!wake_q_empty(&wake_q))
wake_up_q(&wake_q);
diff --git a/kernel/locking/ww_mutex.h b/kernel/locking/ww_mutex.h
index 6c12452097e1..d62b49b53ec3 100644
--- a/kernel/locking/ww_mutex.h
+++ b/kernel/locking/ww_mutex.h
@@ -324,7 +324,7 @@ __ww_mutex_die(struct MUTEX *lock, struct MUTEX_WAITER *waiter,
* blocked_on to PROXY_WAKING. Otherwise we can see
* circular blocked_on relationships that can't resolve.
*/
- set_task_blocked_on_waking(waiter->task, lock);
+ clear_task_blocked_on(waiter->task, lock);
wake_q_add(wake_q, waiter->task);
}
@@ -383,7 +383,7 @@ static bool __ww_mutex_wound(struct MUTEX *lock,
* are waking the mutex owner, who may be currently
* blocked on a different mutex.
*/
- set_task_blocked_on_waking(owner, NULL);
+ clear_task_blocked_on(owner, NULL);
wake_q_add(wake_q, owner);
}
return true;