summaryrefslogtreecommitdiff
path: root/kernel/locking
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/locking')
-rw-r--r--kernel/locking/Makefile8
-rw-r--r--kernel/locking/lock_events_list.h33
-rw-r--r--kernel/locking/lockdep.c135
-rw-r--r--kernel/locking/lockdep_internals.h19
-rw-r--r--kernel/locking/lockdep_proc.c4
-rw-r--r--kernel/locking/locktorture.c92
-rw-r--r--kernel/locking/mcs_spinlock.h10
-rw-r--r--kernel/locking/mutex-debug.c24
-rw-r--r--kernel/locking/mutex.c202
-rw-r--r--kernel/locking/mutex.h15
-rw-r--r--kernel/locking/percpu-rwsem.c15
-rw-r--r--kernel/locking/qspinlock.c193
-rw-r--r--kernel/locking/qspinlock.h201
-rw-r--r--kernel/locking/rtmutex.c63
-rw-r--r--kernel/locking/rtmutex_api.c74
-rw-r--r--kernel/locking/rtmutex_common.h36
-rw-r--r--kernel/locking/rwbase_rt.c1
-rw-r--r--kernel/locking/rwsem.c148
-rw-r--r--kernel/locking/semaphore.c98
-rw-r--r--kernel/locking/spinlock.c12
-rw-r--r--kernel/locking/spinlock_debug.c4
-rw-r--r--kernel/locking/test-ww_mutex.c176
-rw-r--r--kernel/locking/ww_mutex.h103
-rw-r--r--kernel/locking/ww_rt_mutex.c1
24 files changed, 1133 insertions, 534 deletions
diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile
index 0db4093d17b8..cee1901d4cff 100644
--- a/kernel/locking/Makefile
+++ b/kernel/locking/Makefile
@@ -3,9 +3,15 @@
# and is generally not a function of system call inputs.
KCOV_INSTRUMENT := n
+CONTEXT_ANALYSIS_mutex.o := y
+CONTEXT_ANALYSIS_rtmutex_api.o := y
+CONTEXT_ANALYSIS_ww_rt_mutex.o := y
+CONTEXT_ANALYSIS_rwsem.o := y
+
obj-y += mutex.o semaphore.o rwsem.o percpu-rwsem.o
-# Avoid recursion lockdep -> sanitizer -> ... -> lockdep.
+# Avoid recursion lockdep -> sanitizer -> ... -> lockdep & improve performance.
+KASAN_SANITIZE_lockdep.o := n
KCSAN_SANITIZE_lockdep.o := n
ifdef CONFIG_FUNCTION_TRACER
diff --git a/kernel/locking/lock_events_list.h b/kernel/locking/lock_events_list.h
index 97fb6f3f840a..4e36258cc34f 100644
--- a/kernel/locking/lock_events_list.h
+++ b/kernel/locking/lock_events_list.h
@@ -50,6 +50,11 @@ LOCK_EVENT(lock_no_node) /* # of locking ops w/o using percpu node */
#endif /* CONFIG_QUEUED_SPINLOCKS */
/*
+ * Locking events for Resilient Queued Spin Lock
+ */
+LOCK_EVENT(rqspinlock_lock_timeout) /* # of locking ops that timeout */
+
+/*
* Locking events for rwsem
*/
LOCK_EVENT(rwsem_sleep_reader) /* # of reader sleeps */
@@ -67,3 +72,31 @@ LOCK_EVENT(rwsem_rlock_handoff) /* # of read lock handoffs */
LOCK_EVENT(rwsem_wlock) /* # of write locks acquired */
LOCK_EVENT(rwsem_wlock_fail) /* # of failed write lock acquisitions */
LOCK_EVENT(rwsem_wlock_handoff) /* # of write lock handoffs */
+
+/*
+ * Locking events for rtlock_slowlock()
+ */
+LOCK_EVENT(rtlock_slowlock) /* # of rtlock_slowlock() calls */
+LOCK_EVENT(rtlock_slow_acq1) /* # of locks acquired after wait_lock */
+LOCK_EVENT(rtlock_slow_acq2) /* # of locks acquired in for loop */
+LOCK_EVENT(rtlock_slow_sleep) /* # of sleeps */
+LOCK_EVENT(rtlock_slow_wake) /* # of wakeup's */
+
+/*
+ * Locking events for rt_mutex_slowlock()
+ */
+LOCK_EVENT(rtmutex_slowlock) /* # of rt_mutex_slowlock() calls */
+LOCK_EVENT(rtmutex_slow_block) /* # of rt_mutex_slowlock_block() calls */
+LOCK_EVENT(rtmutex_slow_acq1) /* # of locks acquired after wait_lock */
+LOCK_EVENT(rtmutex_slow_acq2) /* # of locks acquired at the end */
+LOCK_EVENT(rtmutex_slow_acq3) /* # of locks acquired in *block() */
+LOCK_EVENT(rtmutex_slow_sleep) /* # of sleeps */
+LOCK_EVENT(rtmutex_slow_wake) /* # of wakeup's */
+LOCK_EVENT(rtmutex_deadlock) /* # of rt_mutex_handle_deadlock()'s */
+
+/*
+ * Locking events for lockdep
+ */
+LOCK_EVENT(lockdep_acquire)
+LOCK_EVENT(lockdep_lock)
+LOCK_EVENT(lockdep_nocheck)
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 4470680f0226..2d4c5bab5af8 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -57,10 +57,12 @@
#include <linux/lockdep.h>
#include <linux/context_tracking.h>
#include <linux/console.h>
+#include <linux/kasan.h>
#include <asm/sections.h>
#include "lockdep_internals.h"
+#include "lock_events.h"
#include <trace/events/lock.h>
@@ -170,6 +172,7 @@ static struct task_struct *lockdep_selftest_task_struct;
static int graph_lock(void)
{
lockdep_lock();
+ lockevent_inc(lockdep_lock);
/*
* Make sure that if another CPU detected a bug while
* walking the graph we dont change it (while the other
@@ -216,6 +219,7 @@ static DECLARE_BITMAP(list_entries_in_use, MAX_LOCKDEP_ENTRIES);
static struct hlist_head lock_keys_hash[KEYHASH_SIZE];
unsigned long nr_lock_classes;
unsigned long nr_zapped_classes;
+unsigned long nr_dynamic_keys;
unsigned long max_lock_class_idx;
struct lock_class lock_classes[MAX_LOCKDEP_KEYS];
DECLARE_BITMAP(lock_classes_in_use, MAX_LOCKDEP_KEYS);
@@ -293,33 +297,30 @@ static inline void lock_time_add(struct lock_time *src, struct lock_time *dst)
dst->nr += src->nr;
}
-struct lock_class_stats lock_stats(struct lock_class *class)
+void lock_stats(struct lock_class *class, struct lock_class_stats *stats)
{
- struct lock_class_stats stats;
int cpu, i;
- memset(&stats, 0, sizeof(struct lock_class_stats));
+ memset(stats, 0, sizeof(struct lock_class_stats));
for_each_possible_cpu(cpu) {
struct lock_class_stats *pcs =
&per_cpu(cpu_lock_stats, cpu)[class - lock_classes];
- for (i = 0; i < ARRAY_SIZE(stats.contention_point); i++)
- stats.contention_point[i] += pcs->contention_point[i];
+ for (i = 0; i < ARRAY_SIZE(stats->contention_point); i++)
+ stats->contention_point[i] += pcs->contention_point[i];
- for (i = 0; i < ARRAY_SIZE(stats.contending_point); i++)
- stats.contending_point[i] += pcs->contending_point[i];
+ for (i = 0; i < ARRAY_SIZE(stats->contending_point); i++)
+ stats->contending_point[i] += pcs->contending_point[i];
- lock_time_add(&pcs->read_waittime, &stats.read_waittime);
- lock_time_add(&pcs->write_waittime, &stats.write_waittime);
+ lock_time_add(&pcs->read_waittime, &stats->read_waittime);
+ lock_time_add(&pcs->write_waittime, &stats->write_waittime);
- lock_time_add(&pcs->read_holdtime, &stats.read_holdtime);
- lock_time_add(&pcs->write_holdtime, &stats.write_holdtime);
+ lock_time_add(&pcs->read_holdtime, &stats->read_holdtime);
+ lock_time_add(&pcs->write_holdtime, &stats->write_holdtime);
- for (i = 0; i < ARRAY_SIZE(stats.bounces); i++)
- stats.bounces[i] += pcs->bounces[i];
+ for (i = 0; i < ARRAY_SIZE(stats->bounces); i++)
+ stats->bounces[i] += pcs->bounces[i];
}
-
- return stats;
}
void clear_lock_stats(struct lock_class *class)
@@ -1235,6 +1236,7 @@ void lockdep_register_key(struct lock_class_key *key)
goto out_unlock;
}
hlist_add_head_rcu(&key->hash_entry, hash_head);
+ nr_dynamic_keys++;
out_unlock:
graph_unlock();
restore_irqs:
@@ -1974,41 +1976,6 @@ print_circular_bug_header(struct lock_list *entry, unsigned int depth,
}
/*
- * We are about to add A -> B into the dependency graph, and in __bfs() a
- * strong dependency path A -> .. -> B is found: hlock_class equals
- * entry->class.
- *
- * If A -> .. -> B can replace A -> B in any __bfs() search (means the former
- * is _stronger_ than or equal to the latter), we consider A -> B as redundant.
- * For example if A -> .. -> B is -(EN)-> (i.e. A -(E*)-> .. -(*N)-> B), and A
- * -> B is -(ER)-> or -(EN)->, then we don't need to add A -> B into the
- * dependency graph, as any strong path ..-> A -> B ->.. we can get with
- * having dependency A -> B, we could already get a equivalent path ..-> A ->
- * .. -> B -> .. with A -> .. -> B. Therefore A -> B is redundant.
- *
- * We need to make sure both the start and the end of A -> .. -> B is not
- * weaker than A -> B. For the start part, please see the comment in
- * check_redundant(). For the end part, we need:
- *
- * Either
- *
- * a) A -> B is -(*R)-> (everything is not weaker than that)
- *
- * or
- *
- * b) A -> .. -> B is -(*N)-> (nothing is stronger than this)
- *
- */
-static inline bool hlock_equal(struct lock_list *entry, void *data)
-{
- struct held_lock *hlock = (struct held_lock *)data;
-
- return hlock_class(hlock) == entry->class && /* Found A -> .. -> B */
- (hlock->read == 2 || /* A -> B is -(*R)-> */
- !entry->only_xr); /* A -> .. -> B is -(*N)-> */
-}
-
-/*
* We are about to add B -> A into the dependency graph, and in __bfs() a
* strong dependency path A -> .. -> B is found: hlock_class equals
* entry->class.
@@ -2913,6 +2880,41 @@ static inline bool usage_skip(struct lock_list *entry, void *mask)
#ifdef CONFIG_LOCKDEP_SMALL
/*
+ * We are about to add A -> B into the dependency graph, and in __bfs() a
+ * strong dependency path A -> .. -> B is found: hlock_class equals
+ * entry->class.
+ *
+ * If A -> .. -> B can replace A -> B in any __bfs() search (means the former
+ * is _stronger_ than or equal to the latter), we consider A -> B as redundant.
+ * For example if A -> .. -> B is -(EN)-> (i.e. A -(E*)-> .. -(*N)-> B), and A
+ * -> B is -(ER)-> or -(EN)->, then we don't need to add A -> B into the
+ * dependency graph, as any strong path ..-> A -> B ->.. we can get with
+ * having dependency A -> B, we could already get a equivalent path ..-> A ->
+ * .. -> B -> .. with A -> .. -> B. Therefore A -> B is redundant.
+ *
+ * We need to make sure both the start and the end of A -> .. -> B is not
+ * weaker than A -> B. For the start part, please see the comment in
+ * check_redundant(). For the end part, we need:
+ *
+ * Either
+ *
+ * a) A -> B is -(*R)-> (everything is not weaker than that)
+ *
+ * or
+ *
+ * b) A -> .. -> B is -(*N)-> (nothing is stronger than this)
+ *
+ */
+static inline bool hlock_equal(struct lock_list *entry, void *data)
+{
+ struct held_lock *hlock = (struct held_lock *)data;
+
+ return hlock_class(hlock) == entry->class && /* Found A -> .. -> B */
+ (hlock->read == 2 || /* A -> B is -(*R)-> */
+ !entry->only_xr); /* A -> .. -> B is -(*N)-> */
+}
+
+/*
* Check that the dependency graph starting at <src> can lead to
* <target> or not. If it can, <src> -> <target> dependency is already
* in the graph.
@@ -5091,8 +5093,15 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
if (unlikely(lock->key == &__lockdep_no_track__))
return 0;
- if (!prove_locking || lock->key == &__lockdep_no_validate__)
+ lockevent_inc(lockdep_acquire);
+
+ if (!prove_locking || lock->key == &__lockdep_no_validate__) {
check = 0;
+ lockevent_inc(lockdep_nocheck);
+ }
+
+ if (DEBUG_LOCKS_WARN_ON(subclass >= MAX_LOCKDEP_SUBCLASSES))
+ return 0;
if (subclass < NR_LOCKDEP_CACHING_CLASSES)
class = lock->class_cache[subclass];
@@ -5824,6 +5833,14 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
if (!debug_locks)
return;
+ /*
+ * As KASAN instrumentation is disabled and lock_acquire() is usually
+ * the first lockdep call when a task tries to acquire a lock, add
+ * kasan_check_byte() here to check for use-after-free and other
+ * memory errors.
+ */
+ kasan_check_byte(lock);
+
if (unlikely(!lockdep_enabled())) {
/* XXX allow trylock from NMI ?!? */
if (lockdep_nmi() && !trylock) {
@@ -6249,6 +6266,9 @@ static void zap_class(struct pending_free *pf, struct lock_class *class)
hlist_del_rcu(&class->hash_entry);
WRITE_ONCE(class->key, NULL);
WRITE_ONCE(class->name, NULL);
+ /* Class allocated but not used, -1 in nr_unused_locks */
+ if (class->usage_mask == 0)
+ debug_atomic_dec(nr_unused_locks);
nr_lock_classes--;
__clear_bit(class - lock_classes, lock_classes_in_use);
if (class - lock_classes == max_lock_class_idx)
@@ -6588,6 +6608,7 @@ void lockdep_unregister_key(struct lock_class_key *key)
pf = get_pending_free();
__lockdep_free_key_range(pf, key, 1);
need_callback = prepare_call_rcu_zapped(pf);
+ nr_dynamic_keys--;
}
lockdep_unlock();
raw_local_irq_restore(flags);
@@ -6595,8 +6616,16 @@ void lockdep_unregister_key(struct lock_class_key *key)
if (need_callback)
call_rcu(&delayed_free.rcu_head, free_zapped_rcu);
- /* Wait until is_dynamic_key() has finished accessing k->hash_entry. */
- synchronize_rcu();
+ /*
+ * Wait until is_dynamic_key() has finished accessing k->hash_entry.
+ *
+ * Some operations like __qdisc_destroy() will call this in a debug
+ * kernel, and the network traffic is disabled while waiting, hence
+ * the delay of the wait matters in debugging cases. Currently use a
+ * synchronize_rcu_expedited() to speed up the wait at the cost of
+ * system IPIs. TODO: Replace RCU with hazptr for this.
+ */
+ synchronize_rcu_expedited();
}
EXPORT_SYMBOL_GPL(lockdep_unregister_key);
diff --git a/kernel/locking/lockdep_internals.h b/kernel/locking/lockdep_internals.h
index 20f9ef58d3d0..0e5e6ffe91a3 100644
--- a/kernel/locking/lockdep_internals.h
+++ b/kernel/locking/lockdep_internals.h
@@ -47,29 +47,31 @@ enum {
__LOCKF(USED_READ)
};
+enum {
#define LOCKDEP_STATE(__STATE) LOCKF_ENABLED_##__STATE |
-static const unsigned long LOCKF_ENABLED_IRQ =
+ LOCKF_ENABLED_IRQ =
#include "lockdep_states.h"
- 0;
+ 0,
#undef LOCKDEP_STATE
#define LOCKDEP_STATE(__STATE) LOCKF_USED_IN_##__STATE |
-static const unsigned long LOCKF_USED_IN_IRQ =
+ LOCKF_USED_IN_IRQ =
#include "lockdep_states.h"
- 0;
+ 0,
#undef LOCKDEP_STATE
#define LOCKDEP_STATE(__STATE) LOCKF_ENABLED_##__STATE##_READ |
-static const unsigned long LOCKF_ENABLED_IRQ_READ =
+ LOCKF_ENABLED_IRQ_READ =
#include "lockdep_states.h"
- 0;
+ 0,
#undef LOCKDEP_STATE
#define LOCKDEP_STATE(__STATE) LOCKF_USED_IN_##__STATE##_READ |
-static const unsigned long LOCKF_USED_IN_IRQ_READ =
+ LOCKF_USED_IN_IRQ_READ =
#include "lockdep_states.h"
- 0;
+ 0,
#undef LOCKDEP_STATE
+};
#define LOCKF_ENABLED_IRQ_ALL (LOCKF_ENABLED_IRQ | LOCKF_ENABLED_IRQ_READ)
#define LOCKF_USED_IN_IRQ_ALL (LOCKF_USED_IN_IRQ | LOCKF_USED_IN_IRQ_READ)
@@ -138,6 +140,7 @@ extern unsigned long nr_lock_classes;
extern unsigned long nr_zapped_classes;
extern unsigned long nr_zapped_lock_chains;
extern unsigned long nr_list_entries;
+extern unsigned long nr_dynamic_keys;
long lockdep_next_lockchain(long i);
unsigned long lock_chain_count(void);
extern unsigned long nr_stack_trace_entries;
diff --git a/kernel/locking/lockdep_proc.c b/kernel/locking/lockdep_proc.c
index 6db0f43fc4df..1916db9aa46b 100644
--- a/kernel/locking/lockdep_proc.c
+++ b/kernel/locking/lockdep_proc.c
@@ -286,6 +286,8 @@ static int lockdep_stats_show(struct seq_file *m, void *v)
#endif
seq_printf(m, " lock-classes: %11lu [max: %lu]\n",
nr_lock_classes, MAX_LOCKDEP_KEYS);
+ seq_printf(m, " dynamic-keys: %11lu\n",
+ nr_dynamic_keys);
seq_printf(m, " direct dependencies: %11lu [max: %lu]\n",
nr_list_entries, MAX_LOCKDEP_ENTRIES);
seq_printf(m, " indirect dependencies: %11lu\n",
@@ -655,7 +657,7 @@ static int lock_stat_open(struct inode *inode, struct file *file)
if (!test_bit(idx, lock_classes_in_use))
continue;
iter->class = class;
- iter->stats = lock_stats(class);
+ lock_stats(class, &iter->stats);
iter++;
}
diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c
index cc33470f4de9..e618bcf75e2d 100644
--- a/kernel/locking/locktorture.c
+++ b/kernel/locking/locktorture.c
@@ -103,8 +103,8 @@ static const struct kernel_param_ops lt_bind_ops = {
.get = param_get_cpumask,
};
-module_param_cb(bind_readers, &lt_bind_ops, &bind_readers, 0644);
-module_param_cb(bind_writers, &lt_bind_ops, &bind_writers, 0644);
+module_param_cb(bind_readers, &lt_bind_ops, &bind_readers, 0444);
+module_param_cb(bind_writers, &lt_bind_ops, &bind_writers, 0444);
long torture_sched_setaffinity(pid_t pid, const struct cpumask *in_mask, bool dowarn);
@@ -362,6 +362,60 @@ static struct lock_torture_ops raw_spin_lock_irq_ops = {
.name = "raw_spin_lock_irq"
};
+#ifdef CONFIG_BPF_SYSCALL
+
+#include <asm/rqspinlock.h>
+static rqspinlock_t rqspinlock;
+
+static int torture_raw_res_spin_write_lock(int tid __maybe_unused)
+{
+ raw_res_spin_lock(&rqspinlock);
+ return 0;
+}
+
+static void torture_raw_res_spin_write_unlock(int tid __maybe_unused)
+{
+ raw_res_spin_unlock(&rqspinlock);
+}
+
+static struct lock_torture_ops raw_res_spin_lock_ops = {
+ .writelock = torture_raw_res_spin_write_lock,
+ .write_delay = torture_spin_lock_write_delay,
+ .task_boost = torture_rt_boost,
+ .writeunlock = torture_raw_res_spin_write_unlock,
+ .readlock = NULL,
+ .read_delay = NULL,
+ .readunlock = NULL,
+ .name = "raw_res_spin_lock"
+};
+
+static int torture_raw_res_spin_write_lock_irq(int tid __maybe_unused)
+{
+ unsigned long flags;
+
+ raw_res_spin_lock_irqsave(&rqspinlock, flags);
+ cxt.cur_ops->flags = flags;
+ return 0;
+}
+
+static void torture_raw_res_spin_write_unlock_irq(int tid __maybe_unused)
+{
+ raw_res_spin_unlock_irqrestore(&rqspinlock, cxt.cur_ops->flags);
+}
+
+static struct lock_torture_ops raw_res_spin_lock_irq_ops = {
+ .writelock = torture_raw_res_spin_write_lock_irq,
+ .write_delay = torture_spin_lock_write_delay,
+ .task_boost = torture_rt_boost,
+ .writeunlock = torture_raw_res_spin_write_unlock_irq,
+ .readlock = NULL,
+ .read_delay = NULL,
+ .readunlock = NULL,
+ .name = "raw_res_spin_lock_irq"
+};
+
+#endif
+
static DEFINE_RWLOCK(torture_rwlock);
static int torture_rwlock_write_lock(int tid __maybe_unused)
@@ -556,9 +610,8 @@ static void torture_ww_mutex_init(void)
ww_mutex_init(&torture_ww_mutex_1, &torture_ww_class);
ww_mutex_init(&torture_ww_mutex_2, &torture_ww_class);
- ww_acquire_ctxs = kmalloc_array(cxt.nrealwriters_stress,
- sizeof(*ww_acquire_ctxs),
- GFP_KERNEL);
+ ww_acquire_ctxs = kmalloc_objs(*ww_acquire_ctxs,
+ cxt.nrealwriters_stress);
if (!ww_acquire_ctxs)
VERBOSE_TOROUT_STRING("ww_acquire_ctx: Out of memory");
}
@@ -1075,7 +1128,8 @@ static int call_rcu_chain_init(void)
if (call_rcu_chains <= 0)
return 0;
- call_rcu_chain_list = kcalloc(call_rcu_chains, sizeof(*call_rcu_chain_list), GFP_KERNEL);
+ call_rcu_chain_list = kzalloc_objs(*call_rcu_chain_list,
+ call_rcu_chains);
if (!call_rcu_chain_list)
return -ENOMEM;
for (i = 0; i < call_rcu_chains; i++) {
@@ -1157,6 +1211,10 @@ end:
cxt.cur_ops->exit();
cxt.init_called = false;
}
+
+ free_cpumask_var(bind_readers);
+ free_cpumask_var(bind_writers);
+
torture_cleanup_end();
}
@@ -1168,6 +1226,9 @@ static int __init lock_torture_init(void)
&lock_busted_ops,
&spin_lock_ops, &spin_lock_irq_ops,
&raw_spin_lock_ops, &raw_spin_lock_irq_ops,
+#ifdef CONFIG_BPF_SYSCALL
+ &raw_res_spin_lock_ops, &raw_res_spin_lock_irq_ops,
+#endif
&rw_lock_ops, &rw_lock_irq_ops,
&mutex_lock_ops,
&ww_mutex_lock_ops,
@@ -1232,9 +1293,7 @@ static int __init lock_torture_init(void)
/* Initialize the statistics so that each run gets its own numbers. */
if (nwriters_stress) {
lock_is_write_held = false;
- cxt.lwsa = kmalloc_array(cxt.nrealwriters_stress,
- sizeof(*cxt.lwsa),
- GFP_KERNEL);
+ cxt.lwsa = kmalloc_objs(*cxt.lwsa, cxt.nrealwriters_stress);
if (cxt.lwsa == NULL) {
VERBOSE_TOROUT_STRING("cxt.lwsa: Out of memory");
firsterr = -ENOMEM;
@@ -1262,9 +1321,8 @@ static int __init lock_torture_init(void)
}
if (nreaders_stress) {
- cxt.lrsa = kmalloc_array(cxt.nrealreaders_stress,
- sizeof(*cxt.lrsa),
- GFP_KERNEL);
+ cxt.lrsa = kmalloc_objs(*cxt.lrsa,
+ cxt.nrealreaders_stress);
if (cxt.lrsa == NULL) {
VERBOSE_TOROUT_STRING("cxt.lrsa: Out of memory");
firsterr = -ENOMEM;
@@ -1311,9 +1369,8 @@ static int __init lock_torture_init(void)
}
if (nwriters_stress) {
- writer_tasks = kcalloc(cxt.nrealwriters_stress,
- sizeof(writer_tasks[0]),
- GFP_KERNEL);
+ writer_tasks = kzalloc_objs(writer_tasks[0],
+ cxt.nrealwriters_stress);
if (writer_tasks == NULL) {
TOROUT_ERRSTRING("writer_tasks: Out of memory");
firsterr = -ENOMEM;
@@ -1326,9 +1383,8 @@ static int __init lock_torture_init(void)
nested_locks = MAX_NESTED_LOCKS;
if (cxt.cur_ops->readlock) {
- reader_tasks = kcalloc(cxt.nrealreaders_stress,
- sizeof(reader_tasks[0]),
- GFP_KERNEL);
+ reader_tasks = kzalloc_objs(reader_tasks[0],
+ cxt.nrealreaders_stress);
if (reader_tasks == NULL) {
TOROUT_ERRSTRING("reader_tasks: Out of memory");
kfree(writer_tasks);
diff --git a/kernel/locking/mcs_spinlock.h b/kernel/locking/mcs_spinlock.h
index 85251d8771d9..5c92ba199b90 100644
--- a/kernel/locking/mcs_spinlock.h
+++ b/kernel/locking/mcs_spinlock.h
@@ -15,12 +15,6 @@
#include <asm/mcs_spinlock.h>
-struct mcs_spinlock {
- struct mcs_spinlock *next;
- int locked; /* 1 if lock acquired */
- int count; /* nesting count, see qspinlock.c */
-};
-
#ifndef arch_mcs_spin_lock_contended
/*
* Using smp_cond_load_acquire() provides the acquire semantics
@@ -30,9 +24,7 @@ struct mcs_spinlock {
* spinning, and smp_cond_load_acquire() provides that behavior.
*/
#define arch_mcs_spin_lock_contended(l) \
-do { \
- smp_cond_load_acquire(l, VAL); \
-} while (0)
+ smp_cond_load_acquire(l, VAL)
#endif
#ifndef arch_mcs_spin_unlock_contended
diff --git a/kernel/locking/mutex-debug.c b/kernel/locking/mutex-debug.c
index 6e6f6071cfa2..785decd9d0c0 100644
--- a/kernel/locking/mutex-debug.c
+++ b/kernel/locking/mutex-debug.c
@@ -37,9 +37,8 @@ void debug_mutex_lock_common(struct mutex *lock, struct mutex_waiter *waiter)
void debug_mutex_wake_waiter(struct mutex *lock, struct mutex_waiter *waiter)
{
lockdep_assert_held(&lock->wait_lock);
- DEBUG_LOCKS_WARN_ON(list_empty(&lock->wait_list));
+ DEBUG_LOCKS_WARN_ON(!lock->first_waiter);
DEBUG_LOCKS_WARN_ON(waiter->magic != waiter);
- DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list));
}
void debug_mutex_free_waiter(struct mutex_waiter *waiter)
@@ -53,17 +52,17 @@ void debug_mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter,
{
lockdep_assert_held(&lock->wait_lock);
- /* Mark the current thread as blocked on the lock: */
- task->blocked_on = waiter;
+ /* Current thread can't be already blocked (since it's executing!) */
+ DEBUG_LOCKS_WARN_ON(get_task_blocked_on(task));
}
void debug_mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
struct task_struct *task)
{
- DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list));
+ struct mutex *blocked_on = get_task_blocked_on(task);
+
DEBUG_LOCKS_WARN_ON(waiter->task != task);
- DEBUG_LOCKS_WARN_ON(task->blocked_on != waiter);
- task->blocked_on = NULL;
+ DEBUG_LOCKS_WARN_ON(blocked_on && blocked_on != lock);
INIT_LIST_HEAD(&waiter->list);
waiter->task = NULL;
@@ -73,20 +72,11 @@ void debug_mutex_unlock(struct mutex *lock)
{
if (likely(debug_locks)) {
DEBUG_LOCKS_WARN_ON(lock->magic != lock);
- DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next);
}
}
-void debug_mutex_init(struct mutex *lock, const char *name,
- struct lock_class_key *key)
+void debug_mutex_init(struct mutex *lock)
{
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
- /*
- * Make sure we are not reinitializing a held lock:
- */
- debug_check_no_locks_freed((void *)lock, sizeof(*lock));
- lockdep_init_map_wait(&lock->dep_map, name, key, 0, LD_WAIT_SLEEP);
-#endif
lock->magic = lock;
}
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index b36f23de48f1..09534628dc01 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -29,6 +29,7 @@
#include <linux/interrupt.h>
#include <linux/debug_locks.h>
#include <linux/osq_lock.h>
+#include <linux/hung_task.h>
#define CREATE_TRACE_POINTS
#include <trace/events/lock.h>
@@ -42,19 +43,17 @@
# define MUTEX_WARN_ON(cond)
#endif
-void
-__mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key)
+static void __mutex_init_generic(struct mutex *lock)
{
atomic_long_set(&lock->owner, 0);
- raw_spin_lock_init(&lock->wait_lock);
- INIT_LIST_HEAD(&lock->wait_list);
+ scoped_guard (raw_spinlock_init, &lock->wait_lock) {
+ lock->first_waiter = NULL;
+ }
#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
osq_lock_init(&lock->osq);
#endif
-
- debug_mutex_init(lock, name, key);
+ debug_mutex_init(lock);
}
-EXPORT_SYMBOL(__mutex_init);
static inline struct task_struct *__owner_task(unsigned long owner)
{
@@ -72,6 +71,14 @@ static inline unsigned long __owner_flags(unsigned long owner)
return owner & MUTEX_FLAGS;
}
+/* Do not use the return value as a pointer directly. */
+unsigned long mutex_get_owner(struct mutex *lock)
+{
+ unsigned long owner = atomic_long_read(&lock->owner);
+
+ return (unsigned long)__owner_task(owner);
+}
+
/*
* Returns: __mutex_owner(lock) on failure or NULL on success.
*/
@@ -133,16 +140,24 @@ static inline bool __mutex_trylock(struct mutex *lock)
* There is nothing that would stop spreading the lockdep annotations outwards
* except more code.
*/
+void mutex_init_generic(struct mutex *lock)
+{
+ __mutex_init_generic(lock);
+}
+EXPORT_SYMBOL(mutex_init_generic);
/*
* Optimistic trylock that only works in the uncontended case. Make sure to
* follow with a __mutex_trylock() before failing.
*/
static __always_inline bool __mutex_trylock_fast(struct mutex *lock)
+ __cond_acquires(true, lock)
{
unsigned long curr = (unsigned long)current;
unsigned long zero = 0UL;
+ MUTEX_WARN_ON(lock->magic != lock);
+
if (atomic_long_try_cmpxchg_acquire(&lock->owner, &zero, curr))
return true;
@@ -150,12 +165,27 @@ static __always_inline bool __mutex_trylock_fast(struct mutex *lock)
}
static __always_inline bool __mutex_unlock_fast(struct mutex *lock)
+ __cond_releases(true, lock)
{
unsigned long curr = (unsigned long)current;
return atomic_long_try_cmpxchg_release(&lock->owner, &curr, 0UL);
}
-#endif
+
+#else /* !CONFIG_DEBUG_LOCK_ALLOC */
+
+void mutex_init_lockdep(struct mutex *lock, const char *name, struct lock_class_key *key)
+{
+ __mutex_init_generic(lock);
+
+ /*
+ * Make sure we are not reinitializing a held lock:
+ */
+ debug_check_no_locks_freed((void *)lock, sizeof(*lock));
+ lockdep_init_map_wait(&lock->dep_map, name, key, 0, LD_WAIT_SLEEP);
+}
+EXPORT_SYMBOL(mutex_init_lockdep);
+#endif /* !CONFIG_DEBUG_LOCK_ALLOC */
static inline void __mutex_set_flag(struct mutex *lock, unsigned long flag)
{
@@ -167,34 +197,61 @@ static inline void __mutex_clear_flag(struct mutex *lock, unsigned long flag)
atomic_long_andnot(flag, &lock->owner);
}
-static inline bool __mutex_waiter_is_first(struct mutex *lock, struct mutex_waiter *waiter)
-{
- return list_first_entry(&lock->wait_list, struct mutex_waiter, list) == waiter;
-}
-
/*
- * Add @waiter to a given location in the lock wait_list and set the
- * FLAG_WAITERS flag if it's the first waiter.
+ * Add @waiter to the @lock wait_list and set the FLAG_WAITERS flag if it's
+ * the first waiter.
+ *
+ * When @pos, @waiter is added before the waiter indicated by @pos. Otherwise
+ * @waiter will be added to the tail of the list.
*/
static void
__mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter,
- struct list_head *list)
+ struct mutex_waiter *pos)
+ __must_hold(&lock->wait_lock)
{
+ struct mutex_waiter *first = lock->first_waiter;
+
+ hung_task_set_blocker(lock, BLOCKER_TYPE_MUTEX);
debug_mutex_add_waiter(lock, waiter, current);
- list_add_tail(&waiter->list, list);
- if (__mutex_waiter_is_first(lock, waiter))
- __mutex_set_flag(lock, MUTEX_FLAG_WAITERS);
+ if (pos) {
+ /*
+ * Insert @waiter before @pos.
+ */
+ list_add_tail(&waiter->list, &pos->list);
+ /*
+ * If @pos == @first, then @waiter will be the new first.
+ */
+ if (pos == first)
+ lock->first_waiter = waiter;
+ return;
+ }
+
+ if (first) {
+ list_add_tail(&waiter->list, &first->list);
+ return;
+ }
+
+ INIT_LIST_HEAD(&waiter->list);
+ lock->first_waiter = waiter;
+ __mutex_set_flag(lock, MUTEX_FLAG_WAITERS);
}
static void
__mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter)
+ __must_hold(&lock->wait_lock)
{
- list_del(&waiter->list);
- if (likely(list_empty(&lock->wait_list)))
+ if (list_empty(&waiter->list)) {
__mutex_clear_flag(lock, MUTEX_FLAGS);
+ lock->first_waiter = NULL;
+ } else {
+ if (lock->first_waiter == waiter)
+ lock->first_waiter = list_next_entry(waiter, list);
+ list_del(&waiter->list);
+ }
debug_mutex_remove_waiter(lock, waiter, current);
+ hung_task_clear_blocker();
}
/*
@@ -230,7 +287,8 @@ static void __mutex_handoff(struct mutex *lock, struct task_struct *task)
* We also put the fastpath first in the kernel image, to make sure the
* branch is predicted by the CPU as default-untaken.
*/
-static void __sched __mutex_lock_slowpath(struct mutex *lock);
+static void __sched __mutex_lock_slowpath(struct mutex *lock)
+ __acquires(lock);
/**
* mutex_lock - acquire the mutex
@@ -311,7 +369,7 @@ bool ww_mutex_spin_on_owner(struct mutex *lock, struct ww_acquire_ctx *ww_ctx,
* Similarly, stop spinning if we are no longer the
* first waiter.
*/
- if (waiter && !__mutex_waiter_is_first(lock, waiter))
+ if (waiter && data_race(lock->first_waiter != waiter))
return false;
return true;
@@ -496,7 +554,8 @@ mutex_optimistic_spin(struct mutex *lock, struct ww_acquire_ctx *ww_ctx,
}
#endif
-static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigned long ip);
+static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigned long ip)
+ __releases(lock);
/**
* mutex_unlock - release the mutex
@@ -536,6 +595,7 @@ EXPORT_SYMBOL(mutex_unlock);
* of a unlocked mutex is not allowed.
*/
void __sched ww_mutex_unlock(struct ww_mutex *lock)
+ __no_context_analysis
{
__ww_mutex_unlock(lock);
mutex_unlock(&lock->base);
@@ -549,6 +609,7 @@ static __always_inline int __sched
__mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclass,
struct lockdep_map *nest_lock, unsigned long ip,
struct ww_acquire_ctx *ww_ctx, const bool use_ww_ctx)
+ __cond_acquires(0, lock)
{
DEFINE_WAKE_Q(wake_q);
struct mutex_waiter waiter;
@@ -616,7 +677,7 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
if (!use_ww_ctx) {
/* add waiting tasks to the end of the waitqueue (FIFO): */
- __mutex_add_waiter(lock, &waiter, &lock->wait_list);
+ __mutex_add_waiter(lock, &waiter, NULL);
} else {
/*
* Add in stamp order, waking up waiters that must kill
@@ -627,6 +688,8 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
goto err_early_kill;
}
+ raw_spin_lock(&current->blocked_lock);
+ __set_task_blocked_on(current, lock);
set_current_state(state);
trace_contention_begin(lock, LCB_F_MUTEX);
for (;;) {
@@ -639,8 +702,9 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
* the handoff.
*/
if (__mutex_trylock(lock))
- goto acquired;
+ break;
+ raw_spin_unlock(&current->blocked_lock);
/*
* Check for signals and kill conditions while holding
* wait_lock. This ensures the lock cancellation is ordered
@@ -661,8 +725,16 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
schedule_preempt_disabled();
- first = __mutex_waiter_is_first(lock, &waiter);
+ first = lock->first_waiter == &waiter;
+ raw_spin_lock_irqsave(&lock->wait_lock, flags);
+ raw_spin_lock(&current->blocked_lock);
+ /*
+ * As we likely have been woken up by task
+ * that has cleared our blocked_on state, re-set
+ * it to the lock we are trying to acquire.
+ */
+ __set_task_blocked_on(current, lock);
set_current_state(state);
/*
* Here we order against unlock; we must either see it change
@@ -673,25 +745,40 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
break;
if (first) {
+ bool opt_acquired;
+
+ /*
+ * mutex_optimistic_spin() can call schedule(), so
+ * we need to release these locks before calling it,
+ * and clear blocked on so we don't become unselectable
+ * to run.
+ */
+ __clear_task_blocked_on(current, lock);
+ raw_spin_unlock(&current->blocked_lock);
+ raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
+
trace_contention_begin(lock, LCB_F_MUTEX | LCB_F_SPIN);
- if (mutex_optimistic_spin(lock, ww_ctx, &waiter))
+ opt_acquired = mutex_optimistic_spin(lock, ww_ctx, &waiter);
+
+ raw_spin_lock_irqsave(&lock->wait_lock, flags);
+ raw_spin_lock(&current->blocked_lock);
+ __set_task_blocked_on(current, lock);
+
+ if (opt_acquired)
break;
trace_contention_begin(lock, LCB_F_MUTEX);
}
-
- raw_spin_lock_irqsave(&lock->wait_lock, flags);
}
- raw_spin_lock_irqsave(&lock->wait_lock, flags);
-acquired:
+ __clear_task_blocked_on(current, lock);
__set_current_state(TASK_RUNNING);
+ raw_spin_unlock(&current->blocked_lock);
if (ww_ctx) {
/*
* Wound-Wait; we stole the lock (!first_waiter), check the
* waiters as anyone might want to wound us.
*/
- if (!ww_ctx->is_wait_die &&
- !__mutex_waiter_is_first(lock, &waiter))
+ if (!ww_ctx->is_wait_die && lock->first_waiter != &waiter)
__ww_mutex_check_waiters(lock, ww_ctx, &wake_q);
}
@@ -712,9 +799,11 @@ skip_wait:
return 0;
err:
+ clear_task_blocked_on(current, lock);
__set_current_state(TASK_RUNNING);
__mutex_remove_waiter(lock, &waiter);
err_early_kill:
+ WARN_ON(get_task_blocked_on(current));
trace_contention_end(lock, ret);
raw_spin_unlock_irqrestore_wake(&lock->wait_lock, flags, &wake_q);
debug_mutex_free_waiter(&waiter);
@@ -726,6 +815,7 @@ err_early_kill:
static int __sched
__mutex_lock(struct mutex *lock, unsigned int state, unsigned int subclass,
struct lockdep_map *nest_lock, unsigned long ip)
+ __cond_acquires(0, lock)
{
return __mutex_lock_common(lock, state, subclass, nest_lock, ip, NULL, false);
}
@@ -733,6 +823,7 @@ __mutex_lock(struct mutex *lock, unsigned int state, unsigned int subclass,
static int __sched
__ww_mutex_lock(struct mutex *lock, unsigned int state, unsigned int subclass,
unsigned long ip, struct ww_acquire_ctx *ww_ctx)
+ __cond_acquires(0, lock)
{
return __mutex_lock_common(lock, state, subclass, NULL, ip, ww_ctx, true);
}
@@ -780,6 +871,7 @@ void __sched
mutex_lock_nested(struct mutex *lock, unsigned int subclass)
{
__mutex_lock(lock, TASK_UNINTERRUPTIBLE, subclass, NULL, _RET_IP_);
+ __acquire(lock);
}
EXPORT_SYMBOL_GPL(mutex_lock_nested);
@@ -788,15 +880,17 @@ void __sched
_mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest)
{
__mutex_lock(lock, TASK_UNINTERRUPTIBLE, 0, nest, _RET_IP_);
+ __acquire(lock);
}
EXPORT_SYMBOL_GPL(_mutex_lock_nest_lock);
int __sched
-mutex_lock_killable_nested(struct mutex *lock, unsigned int subclass)
+_mutex_lock_killable(struct mutex *lock, unsigned int subclass,
+ struct lockdep_map *nest)
{
- return __mutex_lock(lock, TASK_KILLABLE, subclass, NULL, _RET_IP_);
+ return __mutex_lock(lock, TASK_KILLABLE, subclass, nest, _RET_IP_);
}
-EXPORT_SYMBOL_GPL(mutex_lock_killable_nested);
+EXPORT_SYMBOL_GPL(_mutex_lock_killable);
int __sched
mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass)
@@ -815,12 +909,14 @@ mutex_lock_io_nested(struct mutex *lock, unsigned int subclass)
token = io_schedule_prepare();
__mutex_lock_common(lock, TASK_UNINTERRUPTIBLE,
subclass, NULL, _RET_IP_, NULL, 0);
+ __acquire(lock);
io_schedule_finish(token);
}
EXPORT_SYMBOL_GPL(mutex_lock_io_nested);
static inline int
ww_mutex_deadlock_injection(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
+ __cond_releases(nonzero, lock)
{
#ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH
unsigned tmp;
@@ -882,13 +978,16 @@ EXPORT_SYMBOL_GPL(ww_mutex_lock_interruptible);
* Release the lock, slowpath:
*/
static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigned long ip)
+ __releases(lock)
{
struct task_struct *next = NULL;
+ struct mutex_waiter *waiter;
DEFINE_WAKE_Q(wake_q);
unsigned long owner;
unsigned long flags;
mutex_release(&lock->dep_map, ip);
+ __release(lock);
/*
* Release the lock before (potentially) taking the spinlock such that
@@ -915,15 +1014,12 @@ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne
raw_spin_lock_irqsave(&lock->wait_lock, flags);
debug_mutex_unlock(lock);
- if (!list_empty(&lock->wait_list)) {
- /* get the first entry from the wait-list: */
- struct mutex_waiter *waiter =
- list_first_entry(&lock->wait_list,
- struct mutex_waiter, list);
-
+ waiter = lock->first_waiter;
+ if (waiter) {
next = waiter->task;
debug_mutex_wake_waiter(lock, waiter);
+ set_task_blocked_on_waking(next, lock);
wake_q_add(&wake_q, next);
}
@@ -1013,24 +1109,29 @@ EXPORT_SYMBOL_GPL(mutex_lock_io);
static noinline void __sched
__mutex_lock_slowpath(struct mutex *lock)
+ __acquires(lock)
{
__mutex_lock(lock, TASK_UNINTERRUPTIBLE, 0, NULL, _RET_IP_);
+ __acquire(lock);
}
static noinline int __sched
__mutex_lock_killable_slowpath(struct mutex *lock)
+ __cond_acquires(0, lock)
{
return __mutex_lock(lock, TASK_KILLABLE, 0, NULL, _RET_IP_);
}
static noinline int __sched
__mutex_lock_interruptible_slowpath(struct mutex *lock)
+ __cond_acquires(0, lock)
{
return __mutex_lock(lock, TASK_INTERRUPTIBLE, 0, NULL, _RET_IP_);
}
static noinline int __sched
__ww_mutex_lock_slowpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
+ __cond_acquires(0, lock)
{
return __ww_mutex_lock(&lock->base, TASK_UNINTERRUPTIBLE, 0,
_RET_IP_, ctx);
@@ -1039,6 +1140,7 @@ __ww_mutex_lock_slowpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
static noinline int __sched
__ww_mutex_lock_interruptible_slowpath(struct ww_mutex *lock,
struct ww_acquire_ctx *ctx)
+ __cond_acquires(0, lock)
{
return __ww_mutex_lock(&lock->base, TASK_INTERRUPTIBLE, 0,
_RET_IP_, ctx);
@@ -1046,6 +1148,7 @@ __ww_mutex_lock_interruptible_slowpath(struct ww_mutex *lock,
#endif
+#ifndef CONFIG_DEBUG_LOCK_ALLOC
/**
* mutex_trylock - try to acquire the mutex, without waiting
* @lock: the mutex to be acquired
@@ -1062,17 +1165,24 @@ __ww_mutex_lock_interruptible_slowpath(struct ww_mutex *lock,
*/
int __sched mutex_trylock(struct mutex *lock)
{
+ MUTEX_WARN_ON(lock->magic != lock);
+ return __mutex_trylock(lock);
+}
+EXPORT_SYMBOL(mutex_trylock);
+#else
+int __sched _mutex_trylock_nest_lock(struct mutex *lock, struct lockdep_map *nest_lock)
+{
bool locked;
MUTEX_WARN_ON(lock->magic != lock);
-
locked = __mutex_trylock(lock);
if (locked)
- mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_);
+ mutex_acquire_nest(&lock->dep_map, 0, 1, nest_lock, _RET_IP_);
return locked;
}
-EXPORT_SYMBOL(mutex_trylock);
+EXPORT_SYMBOL(_mutex_trylock_nest_lock);
+#endif
#ifndef CONFIG_DEBUG_LOCK_ALLOC
int __sched
diff --git a/kernel/locking/mutex.h b/kernel/locking/mutex.h
index cbff35b9b7ae..3e263e98e5fc 100644
--- a/kernel/locking/mutex.h
+++ b/kernel/locking/mutex.h
@@ -6,7 +6,8 @@
*
* Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
*/
-
+#ifndef CONFIG_PREEMPT_RT
+#include <linux/mutex.h>
/*
* This is the control structure for tasks blocked on mutex, which resides
* on the blocked task's kernel stack:
@@ -47,6 +48,12 @@ static inline struct task_struct *__mutex_owner(struct mutex *lock)
return (struct task_struct *)(atomic_long_read(&lock->owner) & ~MUTEX_FLAGS);
}
+static inline struct mutex *get_task_blocked_on(struct task_struct *p)
+{
+ guard(raw_spinlock_irqsave)(&p->blocked_lock);
+ return __get_task_blocked_on(p);
+}
+
#ifdef CONFIG_DEBUG_MUTEXES
extern void debug_mutex_lock_common(struct mutex *lock,
struct mutex_waiter *waiter);
@@ -59,8 +66,7 @@ extern void debug_mutex_add_waiter(struct mutex *lock,
extern void debug_mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
struct task_struct *task);
extern void debug_mutex_unlock(struct mutex *lock);
-extern void debug_mutex_init(struct mutex *lock, const char *name,
- struct lock_class_key *key);
+extern void debug_mutex_init(struct mutex *lock);
#else /* CONFIG_DEBUG_MUTEXES */
# define debug_mutex_lock_common(lock, waiter) do { } while (0)
# define debug_mutex_wake_waiter(lock, waiter) do { } while (0)
@@ -68,5 +74,6 @@ extern void debug_mutex_init(struct mutex *lock, const char *name,
# define debug_mutex_add_waiter(lock, waiter, ti) do { } while (0)
# define debug_mutex_remove_waiter(lock, waiter, ti) do { } while (0)
# define debug_mutex_unlock(lock) do { } while (0)
-# define debug_mutex_init(lock, name, key) do { } while (0)
+# define debug_mutex_init(lock) do { } while (0)
#endif /* !CONFIG_DEBUG_MUTEXES */
+#endif /* CONFIG_PREEMPT_RT */
diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c
index 6083883c4fe0..ef234469baac 100644
--- a/kernel/locking/percpu-rwsem.c
+++ b/kernel/locking/percpu-rwsem.c
@@ -138,7 +138,8 @@ static int percpu_rwsem_wake_function(struct wait_queue_entry *wq_entry,
return !reader; /* wake (readers until) 1 writer */
}
-static void percpu_rwsem_wait(struct percpu_rw_semaphore *sem, bool reader)
+static void percpu_rwsem_wait(struct percpu_rw_semaphore *sem, bool reader,
+ bool freeze)
{
DEFINE_WAIT_FUNC(wq_entry, percpu_rwsem_wake_function);
bool wait;
@@ -156,7 +157,8 @@ static void percpu_rwsem_wait(struct percpu_rw_semaphore *sem, bool reader)
spin_unlock_irq(&sem->waiters.lock);
while (wait) {
- set_current_state(TASK_UNINTERRUPTIBLE);
+ set_current_state(TASK_UNINTERRUPTIBLE |
+ (freeze ? TASK_FREEZABLE : 0));
if (!smp_load_acquire(&wq_entry.private))
break;
schedule();
@@ -164,7 +166,8 @@ static void percpu_rwsem_wait(struct percpu_rw_semaphore *sem, bool reader)
__set_current_state(TASK_RUNNING);
}
-bool __sched __percpu_down_read(struct percpu_rw_semaphore *sem, bool try)
+bool __sched __percpu_down_read(struct percpu_rw_semaphore *sem, bool try,
+ bool freeze)
{
if (__percpu_down_read_trylock(sem))
return true;
@@ -174,7 +177,7 @@ bool __sched __percpu_down_read(struct percpu_rw_semaphore *sem, bool try)
trace_contention_begin(sem, LCB_F_PERCPU | LCB_F_READ);
preempt_enable();
- percpu_rwsem_wait(sem, /* .reader = */ true);
+ percpu_rwsem_wait(sem, /* .reader = */ true, freeze);
preempt_disable();
trace_contention_end(sem, 0);
@@ -184,7 +187,7 @@ EXPORT_SYMBOL_GPL(__percpu_down_read);
#define per_cpu_sum(var) \
({ \
- typeof(var) __sum = 0; \
+ TYPEOF_UNQUAL(var) __sum = 0; \
int cpu; \
compiletime_assert_atomic_type(__sum); \
for_each_possible_cpu(cpu) \
@@ -237,7 +240,7 @@ void __sched percpu_down_write(struct percpu_rw_semaphore *sem)
*/
if (!__percpu_down_write_trylock(sem)) {
trace_contention_begin(sem, LCB_F_PERCPU | LCB_F_WRITE);
- percpu_rwsem_wait(sem, /* .reader = */ false);
+ percpu_rwsem_wait(sem, /* .reader = */ false, false);
contended = true;
}
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
index 7d96bed718e4..af8d122bb649 100644
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -25,8 +25,9 @@
#include <trace/events/lock.h>
/*
- * Include queued spinlock statistics code
+ * Include queued spinlock definitions and statistics code
*/
+#include "qspinlock.h"
#include "qspinlock_stat.h"
/*
@@ -67,36 +68,6 @@
*/
#include "mcs_spinlock.h"
-#define MAX_NODES 4
-
-/*
- * On 64-bit architectures, the mcs_spinlock structure will be 16 bytes in
- * size and four of them will fit nicely in one 64-byte cacheline. For
- * pvqspinlock, however, we need more space for extra data. To accommodate
- * that, we insert two more long words to pad it up to 32 bytes. IOW, only
- * two of them can fit in a cacheline in this case. That is OK as it is rare
- * to have more than 2 levels of slowpath nesting in actual use. We don't
- * want to penalize pvqspinlocks to optimize for a rare case in native
- * qspinlocks.
- */
-struct qnode {
- struct mcs_spinlock mcs;
-#ifdef CONFIG_PARAVIRT_SPINLOCKS
- long reserved[2];
-#endif
-};
-
-/*
- * The pending bit spinning loop count.
- * This heuristic is used to limit the number of lockword accesses
- * made by atomic_cond_read_relaxed when waiting for the lock to
- * transition out of the "== _Q_PENDING_VAL" state. We don't spin
- * indefinitely because there's no guarantee that we'll make forward
- * progress.
- */
-#ifndef _Q_PENDING_LOOPS
-#define _Q_PENDING_LOOPS 1
-#endif
/*
* Per-CPU queue node structures; we can never have more than 4 nested
@@ -106,161 +77,7 @@ struct qnode {
*
* PV doubles the storage and uses the second cacheline for PV state.
*/
-static DEFINE_PER_CPU_ALIGNED(struct qnode, qnodes[MAX_NODES]);
-
-/*
- * We must be able to distinguish between no-tail and the tail at 0:0,
- * therefore increment the cpu number by one.
- */
-
-static inline __pure u32 encode_tail(int cpu, int idx)
-{
- u32 tail;
-
- tail = (cpu + 1) << _Q_TAIL_CPU_OFFSET;
- tail |= idx << _Q_TAIL_IDX_OFFSET; /* assume < 4 */
-
- return tail;
-}
-
-static inline __pure struct mcs_spinlock *decode_tail(u32 tail)
-{
- int cpu = (tail >> _Q_TAIL_CPU_OFFSET) - 1;
- int idx = (tail & _Q_TAIL_IDX_MASK) >> _Q_TAIL_IDX_OFFSET;
-
- return per_cpu_ptr(&qnodes[idx].mcs, cpu);
-}
-
-static inline __pure
-struct mcs_spinlock *grab_mcs_node(struct mcs_spinlock *base, int idx)
-{
- return &((struct qnode *)base + idx)->mcs;
-}
-
-#define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK | _Q_PENDING_MASK)
-
-#if _Q_PENDING_BITS == 8
-/**
- * clear_pending - clear the pending bit.
- * @lock: Pointer to queued spinlock structure
- *
- * *,1,* -> *,0,*
- */
-static __always_inline void clear_pending(struct qspinlock *lock)
-{
- WRITE_ONCE(lock->pending, 0);
-}
-
-/**
- * clear_pending_set_locked - take ownership and clear the pending bit.
- * @lock: Pointer to queued spinlock structure
- *
- * *,1,0 -> *,0,1
- *
- * Lock stealing is not allowed if this function is used.
- */
-static __always_inline void clear_pending_set_locked(struct qspinlock *lock)
-{
- WRITE_ONCE(lock->locked_pending, _Q_LOCKED_VAL);
-}
-
-/*
- * xchg_tail - Put in the new queue tail code word & retrieve previous one
- * @lock : Pointer to queued spinlock structure
- * @tail : The new queue tail code word
- * Return: The previous queue tail code word
- *
- * xchg(lock, tail), which heads an address dependency
- *
- * p,*,* -> n,*,* ; prev = xchg(lock, node)
- */
-static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
-{
- /*
- * We can use relaxed semantics since the caller ensures that the
- * MCS node is properly initialized before updating the tail.
- */
- return (u32)xchg_relaxed(&lock->tail,
- tail >> _Q_TAIL_OFFSET) << _Q_TAIL_OFFSET;
-}
-
-#else /* _Q_PENDING_BITS == 8 */
-
-/**
- * clear_pending - clear the pending bit.
- * @lock: Pointer to queued spinlock structure
- *
- * *,1,* -> *,0,*
- */
-static __always_inline void clear_pending(struct qspinlock *lock)
-{
- atomic_andnot(_Q_PENDING_VAL, &lock->val);
-}
-
-/**
- * clear_pending_set_locked - take ownership and clear the pending bit.
- * @lock: Pointer to queued spinlock structure
- *
- * *,1,0 -> *,0,1
- */
-static __always_inline void clear_pending_set_locked(struct qspinlock *lock)
-{
- atomic_add(-_Q_PENDING_VAL + _Q_LOCKED_VAL, &lock->val);
-}
-
-/**
- * xchg_tail - Put in the new queue tail code word & retrieve previous one
- * @lock : Pointer to queued spinlock structure
- * @tail : The new queue tail code word
- * Return: The previous queue tail code word
- *
- * xchg(lock, tail)
- *
- * p,*,* -> n,*,* ; prev = xchg(lock, node)
- */
-static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
-{
- u32 old, new;
-
- old = atomic_read(&lock->val);
- do {
- new = (old & _Q_LOCKED_PENDING_MASK) | tail;
- /*
- * We can use relaxed semantics since the caller ensures that
- * the MCS node is properly initialized before updating the
- * tail.
- */
- } while (!atomic_try_cmpxchg_relaxed(&lock->val, &old, new));
-
- return old;
-}
-#endif /* _Q_PENDING_BITS == 8 */
-
-/**
- * queued_fetch_set_pending_acquire - fetch the whole lock value and set pending
- * @lock : Pointer to queued spinlock structure
- * Return: The previous lock value
- *
- * *,*,* -> *,1,*
- */
-#ifndef queued_fetch_set_pending_acquire
-static __always_inline u32 queued_fetch_set_pending_acquire(struct qspinlock *lock)
-{
- return atomic_fetch_or_acquire(_Q_PENDING_VAL, &lock->val);
-}
-#endif
-
-/**
- * set_locked - Set the lock bit and own the lock
- * @lock: Pointer to queued spinlock structure
- *
- * *,*,0 -> *,0,1
- */
-static __always_inline void set_locked(struct qspinlock *lock)
-{
- WRITE_ONCE(lock->locked, _Q_LOCKED_VAL);
-}
-
+static DEFINE_PER_CPU_ALIGNED(struct qnode, qnodes[_Q_MAX_NODES]);
/*
* Generate the native code for queued_spin_unlock_slowpath(); provide NOPs for
@@ -410,7 +227,7 @@ pv_queue:
* any MCS node. This is not the most elegant solution, but is
* simple enough.
*/
- if (unlikely(idx >= MAX_NODES)) {
+ if (unlikely(idx >= _Q_MAX_NODES)) {
lockevent_inc(lock_no_node);
while (!queued_spin_trylock(lock))
cpu_relax();
@@ -465,7 +282,7 @@ pv_queue:
* head of the waitqueue.
*/
if (old & _Q_TAIL_MASK) {
- prev = decode_tail(old);
+ prev = decode_tail(old, qnodes);
/* Link @node into the waitqueue. */
WRITE_ONCE(prev->next, node);
diff --git a/kernel/locking/qspinlock.h b/kernel/locking/qspinlock.h
new file mode 100644
index 000000000000..d69958a844f7
--- /dev/null
+++ b/kernel/locking/qspinlock.h
@@ -0,0 +1,201 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Queued spinlock defines
+ *
+ * This file contains macro definitions and functions shared between different
+ * qspinlock slow path implementations.
+ */
+#ifndef __LINUX_QSPINLOCK_H
+#define __LINUX_QSPINLOCK_H
+
+#include <asm-generic/percpu.h>
+#include <linux/percpu-defs.h>
+#include <asm-generic/qspinlock.h>
+#include <asm-generic/mcs_spinlock.h>
+
+#define _Q_MAX_NODES 4
+
+/*
+ * The pending bit spinning loop count.
+ * This heuristic is used to limit the number of lockword accesses
+ * made by atomic_cond_read_relaxed when waiting for the lock to
+ * transition out of the "== _Q_PENDING_VAL" state. We don't spin
+ * indefinitely because there's no guarantee that we'll make forward
+ * progress.
+ */
+#ifndef _Q_PENDING_LOOPS
+#define _Q_PENDING_LOOPS 1
+#endif
+
+/*
+ * On 64-bit architectures, the mcs_spinlock structure will be 16 bytes in
+ * size and four of them will fit nicely in one 64-byte cacheline. For
+ * pvqspinlock, however, we need more space for extra data. To accommodate
+ * that, we insert two more long words to pad it up to 32 bytes. IOW, only
+ * two of them can fit in a cacheline in this case. That is OK as it is rare
+ * to have more than 2 levels of slowpath nesting in actual use. We don't
+ * want to penalize pvqspinlocks to optimize for a rare case in native
+ * qspinlocks.
+ */
+struct qnode {
+ struct mcs_spinlock mcs;
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
+ long reserved[2];
+#endif
+};
+
+/*
+ * We must be able to distinguish between no-tail and the tail at 0:0,
+ * therefore increment the cpu number by one.
+ */
+
+static inline __pure u32 encode_tail(int cpu, int idx)
+{
+ u32 tail;
+
+ tail = (cpu + 1) << _Q_TAIL_CPU_OFFSET;
+ tail |= idx << _Q_TAIL_IDX_OFFSET; /* assume < 4 */
+
+ return tail;
+}
+
+static inline __pure struct mcs_spinlock *decode_tail(u32 tail,
+ struct qnode __percpu *qnodes)
+{
+ int cpu = (tail >> _Q_TAIL_CPU_OFFSET) - 1;
+ int idx = (tail & _Q_TAIL_IDX_MASK) >> _Q_TAIL_IDX_OFFSET;
+
+ return per_cpu_ptr(&qnodes[idx].mcs, cpu);
+}
+
+static inline __pure
+struct mcs_spinlock *grab_mcs_node(struct mcs_spinlock *base, int idx)
+{
+ return &((struct qnode *)base + idx)->mcs;
+}
+
+#define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK | _Q_PENDING_MASK)
+
+#if _Q_PENDING_BITS == 8
+/**
+ * clear_pending - clear the pending bit.
+ * @lock: Pointer to queued spinlock structure
+ *
+ * *,1,* -> *,0,*
+ */
+static __always_inline void clear_pending(struct qspinlock *lock)
+{
+ WRITE_ONCE(lock->pending, 0);
+}
+
+/**
+ * clear_pending_set_locked - take ownership and clear the pending bit.
+ * @lock: Pointer to queued spinlock structure
+ *
+ * *,1,0 -> *,0,1
+ *
+ * Lock stealing is not allowed if this function is used.
+ */
+static __always_inline void clear_pending_set_locked(struct qspinlock *lock)
+{
+ WRITE_ONCE(lock->locked_pending, _Q_LOCKED_VAL);
+}
+
+/*
+ * xchg_tail - Put in the new queue tail code word & retrieve previous one
+ * @lock : Pointer to queued spinlock structure
+ * @tail : The new queue tail code word
+ * Return: The previous queue tail code word
+ *
+ * xchg(lock, tail), which heads an address dependency
+ *
+ * p,*,* -> n,*,* ; prev = xchg(lock, node)
+ */
+static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
+{
+ /*
+ * We can use relaxed semantics since the caller ensures that the
+ * MCS node is properly initialized before updating the tail.
+ */
+ return (u32)xchg_relaxed(&lock->tail,
+ tail >> _Q_TAIL_OFFSET) << _Q_TAIL_OFFSET;
+}
+
+#else /* _Q_PENDING_BITS == 8 */
+
+/**
+ * clear_pending - clear the pending bit.
+ * @lock: Pointer to queued spinlock structure
+ *
+ * *,1,* -> *,0,*
+ */
+static __always_inline void clear_pending(struct qspinlock *lock)
+{
+ atomic_andnot(_Q_PENDING_VAL, &lock->val);
+}
+
+/**
+ * clear_pending_set_locked - take ownership and clear the pending bit.
+ * @lock: Pointer to queued spinlock structure
+ *
+ * *,1,0 -> *,0,1
+ */
+static __always_inline void clear_pending_set_locked(struct qspinlock *lock)
+{
+ atomic_add(-_Q_PENDING_VAL + _Q_LOCKED_VAL, &lock->val);
+}
+
+/**
+ * xchg_tail - Put in the new queue tail code word & retrieve previous one
+ * @lock : Pointer to queued spinlock structure
+ * @tail : The new queue tail code word
+ * Return: The previous queue tail code word
+ *
+ * xchg(lock, tail)
+ *
+ * p,*,* -> n,*,* ; prev = xchg(lock, node)
+ */
+static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
+{
+ u32 old, new;
+
+ old = atomic_read(&lock->val);
+ do {
+ new = (old & _Q_LOCKED_PENDING_MASK) | tail;
+ /*
+ * We can use relaxed semantics since the caller ensures that
+ * the MCS node is properly initialized before updating the
+ * tail.
+ */
+ } while (!atomic_try_cmpxchg_relaxed(&lock->val, &old, new));
+
+ return old;
+}
+#endif /* _Q_PENDING_BITS == 8 */
+
+/**
+ * queued_fetch_set_pending_acquire - fetch the whole lock value and set pending
+ * @lock : Pointer to queued spinlock structure
+ * Return: The previous lock value
+ *
+ * *,*,* -> *,1,*
+ */
+#ifndef queued_fetch_set_pending_acquire
+static __always_inline u32 queued_fetch_set_pending_acquire(struct qspinlock *lock)
+{
+ return atomic_fetch_or_acquire(_Q_PENDING_VAL, &lock->val);
+}
+#endif
+
+/**
+ * set_locked - Set the lock bit and own the lock
+ * @lock: Pointer to queued spinlock structure
+ *
+ * *,*,0 -> *,0,1
+ */
+static __always_inline void set_locked(struct qspinlock *lock)
+{
+ WRITE_ONCE(lock->locked, _Q_LOCKED_VAL);
+}
+
+#endif /* __LINUX_QSPINLOCK_H */
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 4a8df1800cbb..daeeeef973e2 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -27,6 +27,7 @@
#include <trace/events/lock.h>
#include "rtmutex_common.h"
+#include "lock_events.h"
#ifndef WW_RT
# define build_ww_mutex() (false)
@@ -93,6 +94,7 @@ static inline int __ww_mutex_check_kill(struct rt_mutex *lock,
static __always_inline struct task_struct *
rt_mutex_owner_encode(struct rt_mutex_base *lock, struct task_struct *owner)
+ __must_hold(&lock->wait_lock)
{
unsigned long val = (unsigned long)owner;
@@ -104,6 +106,7 @@ rt_mutex_owner_encode(struct rt_mutex_base *lock, struct task_struct *owner)
static __always_inline void
rt_mutex_set_owner(struct rt_mutex_base *lock, struct task_struct *owner)
+ __must_hold(&lock->wait_lock)
{
/*
* lock->wait_lock is held but explicit acquire semantics are needed
@@ -113,12 +116,14 @@ rt_mutex_set_owner(struct rt_mutex_base *lock, struct task_struct *owner)
}
static __always_inline void rt_mutex_clear_owner(struct rt_mutex_base *lock)
+ __must_hold(&lock->wait_lock)
{
/* lock->wait_lock is held so the unlock provides release semantics. */
WRITE_ONCE(lock->owner, rt_mutex_owner_encode(lock, NULL));
}
static __always_inline void clear_rt_mutex_waiters(struct rt_mutex_base *lock)
+ __must_hold(&lock->wait_lock)
{
lock->owner = (struct task_struct *)
((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS);
@@ -126,6 +131,7 @@ static __always_inline void clear_rt_mutex_waiters(struct rt_mutex_base *lock)
static __always_inline void
fixup_rt_mutex_waiters(struct rt_mutex_base *lock, bool acquire_lock)
+ __must_hold(&lock->wait_lock)
{
unsigned long owner, *p = (unsigned long *) &lock->owner;
@@ -327,6 +333,7 @@ static __always_inline bool rt_mutex_cmpxchg_release(struct rt_mutex_base *lock,
}
static __always_inline void mark_rt_mutex_waiters(struct rt_mutex_base *lock)
+ __must_hold(&lock->wait_lock)
{
lock->owner = (struct task_struct *)
((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS);
@@ -1205,6 +1212,7 @@ static int __sched task_blocks_on_rt_mutex(struct rt_mutex_base *lock,
struct ww_acquire_ctx *ww_ctx,
enum rtmutex_chainwalk chwalk,
struct wake_q_head *wake_q)
+ __must_hold(&lock->wait_lock)
{
struct task_struct *owner = rt_mutex_owner(lock);
struct rt_mutex_waiter *top_waiter = waiter;
@@ -1248,6 +1256,7 @@ static int __sched task_blocks_on_rt_mutex(struct rt_mutex_base *lock,
/* Check whether the waiter should back out immediately */
rtm = container_of(lock, struct rt_mutex, rtmutex);
+ __assume_ctx_lock(&rtm->rtmutex.wait_lock);
res = __ww_mutex_add_waiter(waiter, rtm, ww_ctx, wake_q);
if (res) {
raw_spin_lock(&task->pi_lock);
@@ -1355,6 +1364,7 @@ static void __sched mark_wakeup_next_waiter(struct rt_wake_q_head *wqh,
}
static int __sched __rt_mutex_slowtrylock(struct rt_mutex_base *lock)
+ __must_hold(&lock->wait_lock)
{
int ret = try_to_take_rt_mutex(lock, current, NULL);
@@ -1504,7 +1514,7 @@ static bool rtmutex_spin_on_owner(struct rt_mutex_base *lock,
* - the VCPU on which owner runs is preempted
*/
if (!owner_on_cpu(owner) || need_resched() ||
- !rt_mutex_waiter_is_top_waiter(lock, waiter)) {
+ !data_race(rt_mutex_waiter_is_top_waiter(lock, waiter))) {
res = false;
break;
}
@@ -1534,20 +1544,27 @@ static bool rtmutex_spin_on_owner(struct rt_mutex_base *lock,
*
* Must be called with lock->wait_lock held and interrupts disabled. It must
* have just failed to try_to_take_rt_mutex().
+ *
+ * When invoked from rt_mutex_start_proxy_lock() waiter::task != current !
*/
static void __sched remove_waiter(struct rt_mutex_base *lock,
struct rt_mutex_waiter *waiter)
+ __must_hold(&lock->wait_lock)
{
bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock));
struct task_struct *owner = rt_mutex_owner(lock);
+ struct task_struct *waiter_task = waiter->task;
struct rt_mutex_base *next_lock;
lockdep_assert_held(&lock->wait_lock);
- raw_spin_lock(&current->pi_lock);
- rt_mutex_dequeue(lock, waiter);
- current->pi_blocked_on = NULL;
- raw_spin_unlock(&current->pi_lock);
+ if (!waiter_task) /* never enqueued */
+ return;
+
+ scoped_guard(raw_spinlock, &waiter_task->pi_lock) {
+ rt_mutex_dequeue(lock, waiter);
+ waiter_task->pi_blocked_on = NULL;
+ }
/*
* Only update priority if the waiter was the highest priority
@@ -1583,7 +1600,7 @@ static void __sched remove_waiter(struct rt_mutex_base *lock,
raw_spin_unlock_irq(&lock->wait_lock);
rt_mutex_adjust_prio_chain(owner, RT_MUTEX_MIN_CHAINWALK, lock,
- next_lock, NULL, current);
+ next_lock, NULL, waiter_task);
raw_spin_lock_irq(&lock->wait_lock);
}
@@ -1612,10 +1629,15 @@ static int __sched rt_mutex_slowlock_block(struct rt_mutex_base *lock,
struct task_struct *owner;
int ret = 0;
+ __assume_ctx_lock(&rtm->rtmutex.wait_lock);
+
+ lockevent_inc(rtmutex_slow_block);
for (;;) {
/* Try to acquire the lock: */
- if (try_to_take_rt_mutex(lock, current, waiter))
+ if (try_to_take_rt_mutex(lock, current, waiter)) {
+ lockevent_inc(rtmutex_slow_acq3);
break;
+ }
if (timeout && !timeout->task) {
ret = -ETIMEDOUT;
@@ -1638,8 +1660,10 @@ static int __sched rt_mutex_slowlock_block(struct rt_mutex_base *lock,
owner = NULL;
raw_spin_unlock_irq_wake(&lock->wait_lock, wake_q);
- if (!owner || !rtmutex_spin_on_owner(lock, waiter, owner))
+ if (!owner || !rtmutex_spin_on_owner(lock, waiter, owner)) {
+ lockevent_inc(rtmutex_slow_sleep);
rt_mutex_schedule();
+ }
raw_spin_lock_irq(&lock->wait_lock);
set_current_state(state);
@@ -1652,6 +1676,7 @@ static int __sched rt_mutex_slowlock_block(struct rt_mutex_base *lock,
static void __sched rt_mutex_handle_deadlock(int res, int detect_deadlock,
struct rt_mutex_base *lock,
struct rt_mutex_waiter *w)
+ __must_hold(&lock->wait_lock)
{
/*
* If the result is not -EDEADLOCK or the caller requested
@@ -1688,12 +1713,15 @@ static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock,
enum rtmutex_chainwalk chwalk,
struct rt_mutex_waiter *waiter,
struct wake_q_head *wake_q)
+ __must_hold(&lock->wait_lock)
{
struct rt_mutex *rtm = container_of(lock, struct rt_mutex, rtmutex);
struct ww_mutex *ww = ww_container_of(rtm);
int ret;
+ __assume_ctx_lock(&rtm->rtmutex.wait_lock);
lockdep_assert_held(&lock->wait_lock);
+ lockevent_inc(rtmutex_slowlock);
/* Try to acquire the lock again: */
if (try_to_take_rt_mutex(lock, current, NULL)) {
@@ -1701,6 +1729,7 @@ static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock,
__ww_mutex_check_waiters(rtm, ww_ctx, wake_q);
ww_mutex_lock_acquired(ww, ww_ctx);
}
+ lockevent_inc(rtmutex_slow_acq1);
return 0;
}
@@ -1719,10 +1748,12 @@ static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock,
__ww_mutex_check_waiters(rtm, ww_ctx, wake_q);
ww_mutex_lock_acquired(ww, ww_ctx);
}
+ lockevent_inc(rtmutex_slow_acq2);
} else {
__set_current_state(TASK_RUNNING);
remove_waiter(lock, waiter);
rt_mutex_handle_deadlock(ret, chwalk, lock, waiter);
+ lockevent_inc(rtmutex_deadlock);
}
/*
@@ -1740,6 +1771,7 @@ static inline int __rt_mutex_slowlock_locked(struct rt_mutex_base *lock,
struct ww_acquire_ctx *ww_ctx,
unsigned int state,
struct wake_q_head *wake_q)
+ __must_hold(&lock->wait_lock)
{
struct rt_mutex_waiter waiter;
int ret;
@@ -1751,6 +1783,7 @@ static inline int __rt_mutex_slowlock_locked(struct rt_mutex_base *lock,
&waiter, wake_q);
debug_rt_mutex_free_waiter(&waiter);
+ lockevent_cond_inc(rtmutex_slow_wake, !wake_q_empty(wake_q));
return ret;
}
@@ -1823,9 +1856,12 @@ static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock,
struct task_struct *owner;
lockdep_assert_held(&lock->wait_lock);
+ lockevent_inc(rtlock_slowlock);
- if (try_to_take_rt_mutex(lock, current, NULL))
+ if (try_to_take_rt_mutex(lock, current, NULL)) {
+ lockevent_inc(rtlock_slow_acq1);
return;
+ }
rt_mutex_init_rtlock_waiter(&waiter);
@@ -1838,8 +1874,10 @@ static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock,
for (;;) {
/* Try to acquire the lock again */
- if (try_to_take_rt_mutex(lock, current, &waiter))
+ if (try_to_take_rt_mutex(lock, current, &waiter)) {
+ lockevent_inc(rtlock_slow_acq2);
break;
+ }
if (&waiter == rt_mutex_top_waiter(lock))
owner = rt_mutex_owner(lock);
@@ -1847,8 +1885,10 @@ static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock,
owner = NULL;
raw_spin_unlock_irq_wake(&lock->wait_lock, wake_q);
- if (!owner || !rtmutex_spin_on_owner(lock, &waiter, owner))
+ if (!owner || !rtmutex_spin_on_owner(lock, &waiter, owner)) {
+ lockevent_inc(rtlock_slow_sleep);
schedule_rtlock();
+ }
raw_spin_lock_irq(&lock->wait_lock);
set_current_state(TASK_RTLOCK_WAIT);
@@ -1865,6 +1905,7 @@ static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock,
debug_rt_mutex_free_waiter(&waiter);
trace_contention_end(lock, 0);
+ lockevent_cond_inc(rtlock_slow_wake, !wake_q_empty(wake_q));
}
static __always_inline void __sched rtlock_slowlock(struct rt_mutex_base *lock)
diff --git a/kernel/locking/rtmutex_api.c b/kernel/locking/rtmutex_api.c
index 191e4720e546..514fce7a4e0a 100644
--- a/kernel/locking/rtmutex_api.c
+++ b/kernel/locking/rtmutex_api.c
@@ -13,6 +13,24 @@
*/
int max_lock_depth = 1024;
+static const struct ctl_table rtmutex_sysctl_table[] = {
+ {
+ .procname = "max_lock_depth",
+ .data = &max_lock_depth,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+};
+
+static int __init init_rtmutex_sysctl(void)
+{
+ register_sysctl_init("kernel", rtmutex_sysctl_table);
+ return 0;
+}
+
+subsys_initcall(init_rtmutex_sysctl);
+
/*
* Debug aware fast / slowpath lock,trylock,unlock
*
@@ -347,7 +365,7 @@ int __sched rt_mutex_start_proxy_lock(struct rt_mutex_base *lock,
raw_spin_lock_irq(&lock->wait_lock);
ret = __rt_mutex_start_proxy_lock(lock, waiter, task, &wake_q);
- if (unlikely(ret))
+ if (unlikely(ret < 0))
remove_waiter(lock, waiter);
preempt_disable();
raw_spin_unlock_irq(&lock->wait_lock);
@@ -497,19 +515,18 @@ void rt_mutex_debug_task_free(struct task_struct *task)
#ifdef CONFIG_PREEMPT_RT
/* Mutexes */
-void __mutex_rt_init(struct mutex *mutex, const char *name,
- struct lock_class_key *key)
+static void __mutex_rt_init_generic(struct mutex *mutex)
{
+ rt_mutex_base_init(&mutex->rtmutex);
debug_check_no_locks_freed((void *)mutex, sizeof(*mutex));
- lockdep_init_map_wait(&mutex->dep_map, name, key, 0, LD_WAIT_SLEEP);
}
-EXPORT_SYMBOL(__mutex_rt_init);
static __always_inline int __mutex_lock_common(struct mutex *lock,
unsigned int state,
unsigned int subclass,
struct lockdep_map *nest_lock,
unsigned long ip)
+ __acquires(lock) __no_context_analysis
{
int ret;
@@ -524,6 +541,13 @@ static __always_inline int __mutex_lock_common(struct mutex *lock,
}
#ifdef CONFIG_DEBUG_LOCK_ALLOC
+void mutex_rt_init_lockdep(struct mutex *mutex, const char *name, struct lock_class_key *key)
+{
+ __mutex_rt_init_generic(mutex);
+ lockdep_init_map_wait(&mutex->dep_map, name, key, 0, LD_WAIT_SLEEP);
+}
+EXPORT_SYMBOL(mutex_rt_init_lockdep);
+
void __sched mutex_lock_nested(struct mutex *lock, unsigned int subclass)
{
__mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, subclass, NULL, _RET_IP_);
@@ -544,12 +568,12 @@ int __sched mutex_lock_interruptible_nested(struct mutex *lock,
}
EXPORT_SYMBOL_GPL(mutex_lock_interruptible_nested);
-int __sched mutex_lock_killable_nested(struct mutex *lock,
- unsigned int subclass)
+int __sched _mutex_lock_killable(struct mutex *lock, unsigned int subclass,
+ struct lockdep_map *nest_lock)
{
- return __mutex_lock_common(lock, TASK_KILLABLE, subclass, NULL, _RET_IP_);
+ return __mutex_lock_common(lock, TASK_KILLABLE, subclass, nest_lock, _RET_IP_);
}
-EXPORT_SYMBOL_GPL(mutex_lock_killable_nested);
+EXPORT_SYMBOL_GPL(_mutex_lock_killable);
void __sched mutex_lock_io_nested(struct mutex *lock, unsigned int subclass)
{
@@ -563,8 +587,29 @@ void __sched mutex_lock_io_nested(struct mutex *lock, unsigned int subclass)
}
EXPORT_SYMBOL_GPL(mutex_lock_io_nested);
+int __sched _mutex_trylock_nest_lock(struct mutex *lock,
+ struct lockdep_map *nest_lock)
+{
+ int ret;
+
+ if (IS_ENABLED(CONFIG_DEBUG_RT_MUTEXES) && WARN_ON_ONCE(!in_task()))
+ return 0;
+
+ ret = __rt_mutex_trylock(&lock->rtmutex);
+ if (ret)
+ mutex_acquire_nest(&lock->dep_map, 0, 1, nest_lock, _RET_IP_);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(_mutex_trylock_nest_lock);
#else /* CONFIG_DEBUG_LOCK_ALLOC */
+void mutex_rt_init_generic(struct mutex *mutex)
+{
+ __mutex_rt_init_generic(mutex);
+}
+EXPORT_SYMBOL(mutex_rt_init_generic);
+
void __sched mutex_lock(struct mutex *lock)
{
__mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, NULL, _RET_IP_);
@@ -591,24 +636,19 @@ void __sched mutex_lock_io(struct mutex *lock)
io_schedule_finish(token);
}
EXPORT_SYMBOL(mutex_lock_io);
-#endif /* !CONFIG_DEBUG_LOCK_ALLOC */
int __sched mutex_trylock(struct mutex *lock)
{
- int ret;
-
if (IS_ENABLED(CONFIG_DEBUG_RT_MUTEXES) && WARN_ON_ONCE(!in_task()))
return 0;
- ret = __rt_mutex_trylock(&lock->rtmutex);
- if (ret)
- mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_);
-
- return ret;
+ return __rt_mutex_trylock(&lock->rtmutex);
}
EXPORT_SYMBOL(mutex_trylock);
+#endif /* !CONFIG_DEBUG_LOCK_ALLOC */
void __sched mutex_unlock(struct mutex *lock)
+ __releases(lock) __no_context_analysis
{
mutex_release(&lock->dep_map, _RET_IP_);
__rt_mutex_unlock(&lock->rtmutex);
diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h
index 78dd3d8c6554..c38b7bdea7b3 100644
--- a/kernel/locking/rtmutex_common.h
+++ b/kernel/locking/rtmutex_common.h
@@ -79,12 +79,18 @@ struct rt_wake_q_head {
* PI-futex support (proxy locking functions, etc.):
*/
extern void rt_mutex_init_proxy_locked(struct rt_mutex_base *lock,
- struct task_struct *proxy_owner);
-extern void rt_mutex_proxy_unlock(struct rt_mutex_base *lock);
+ struct task_struct *proxy_owner)
+ __must_hold(&lock->wait_lock);
+
+extern void rt_mutex_proxy_unlock(struct rt_mutex_base *lock)
+ __must_hold(&lock->wait_lock);
+
extern int __rt_mutex_start_proxy_lock(struct rt_mutex_base *lock,
struct rt_mutex_waiter *waiter,
struct task_struct *task,
- struct wake_q_head *);
+ struct wake_q_head *)
+ __must_hold(&lock->wait_lock);
+
extern int rt_mutex_start_proxy_lock(struct rt_mutex_base *lock,
struct rt_mutex_waiter *waiter,
struct task_struct *task);
@@ -94,8 +100,9 @@ extern int rt_mutex_wait_proxy_lock(struct rt_mutex_base *lock,
extern bool rt_mutex_cleanup_proxy_lock(struct rt_mutex_base *lock,
struct rt_mutex_waiter *waiter);
-extern int rt_mutex_futex_trylock(struct rt_mutex_base *l);
-extern int __rt_mutex_futex_trylock(struct rt_mutex_base *l);
+extern int rt_mutex_futex_trylock(struct rt_mutex_base *lock);
+extern int __rt_mutex_futex_trylock(struct rt_mutex_base *lock)
+ __must_hold(&lock->wait_lock);
extern void rt_mutex_futex_unlock(struct rt_mutex_base *lock);
extern bool __rt_mutex_futex_unlock(struct rt_mutex_base *lock,
@@ -109,6 +116,7 @@ extern void rt_mutex_postunlock(struct rt_wake_q_head *wqh);
*/
#ifdef CONFIG_RT_MUTEXES
static inline int rt_mutex_has_waiters(struct rt_mutex_base *lock)
+ __must_hold(&lock->wait_lock)
{
return !RB_EMPTY_ROOT(&lock->waiters.rb_root);
}
@@ -120,6 +128,7 @@ static inline int rt_mutex_has_waiters(struct rt_mutex_base *lock)
*/
static inline bool rt_mutex_waiter_is_top_waiter(struct rt_mutex_base *lock,
struct rt_mutex_waiter *waiter)
+ __must_hold(&lock->wait_lock)
{
struct rb_node *leftmost = rb_first_cached(&lock->waiters);
@@ -127,6 +136,7 @@ static inline bool rt_mutex_waiter_is_top_waiter(struct rt_mutex_base *lock,
}
static inline struct rt_mutex_waiter *rt_mutex_top_waiter(struct rt_mutex_base *lock)
+ __must_hold(&lock->wait_lock)
{
struct rb_node *leftmost = rb_first_cached(&lock->waiters);
struct rt_mutex_waiter *w = NULL;
@@ -153,15 +163,6 @@ static inline struct rt_mutex_waiter *task_top_pi_waiter(struct task_struct *p)
pi_tree.entry);
}
-#define RT_MUTEX_HAS_WAITERS 1UL
-
-static inline struct task_struct *rt_mutex_owner(struct rt_mutex_base *lock)
-{
- unsigned long owner = (unsigned long) READ_ONCE(lock->owner);
-
- return (struct task_struct *) (owner & ~RT_MUTEX_HAS_WAITERS);
-}
-
/*
* Constants for rt mutex functions which have a selectable deadlock
* detection.
@@ -179,9 +180,10 @@ enum rtmutex_chainwalk {
static inline void __rt_mutex_base_init(struct rt_mutex_base *lock)
{
- raw_spin_lock_init(&lock->wait_lock);
- lock->waiters = RB_ROOT_CACHED;
- lock->owner = NULL;
+ scoped_guard (raw_spinlock_init, &lock->wait_lock) {
+ lock->waiters = RB_ROOT_CACHED;
+ lock->owner = NULL;
+ }
}
/* Debug functions */
diff --git a/kernel/locking/rwbase_rt.c b/kernel/locking/rwbase_rt.c
index 9f4322c07486..82e078c0665a 100644
--- a/kernel/locking/rwbase_rt.c
+++ b/kernel/locking/rwbase_rt.c
@@ -186,6 +186,7 @@ static __always_inline void rwbase_read_unlock(struct rwbase_rt *rwb,
static inline void __rwbase_write_unlock(struct rwbase_rt *rwb, int bias,
unsigned long flags)
+ __releases(&rwb->rtmutex.wait_lock)
{
struct rt_mutex_base *rtm = &rwb->rtmutex;
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index 2ddb827e3bea..bf647097369c 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -27,6 +27,7 @@
#include <linux/export.h>
#include <linux/rwsem.h>
#include <linux/atomic.h>
+#include <linux/hung_task.h>
#include <trace/events/lock.h>
#ifndef CONFIG_PREEMPT_RT
@@ -71,7 +72,7 @@
#c, atomic_long_read(&(sem)->count), \
(unsigned long) sem->magic, \
atomic_long_read(&(sem)->owner), (long)current, \
- list_empty(&(sem)->wait_list) ? "" : "not ")) \
+ rwsem_is_contended(sem) ? "" : "not ")) \
debug_locks_off(); \
} while (0)
#else
@@ -181,11 +182,11 @@ static inline void rwsem_set_reader_owned(struct rw_semaphore *sem)
__rwsem_set_reader_owned(sem, current);
}
-#ifdef CONFIG_DEBUG_RWSEMS
+#if defined(CONFIG_DEBUG_RWSEMS) || defined(CONFIG_DETECT_HUNG_TASK_BLOCKER)
/*
* Return just the real task structure pointer of the owner
*/
-static inline struct task_struct *rwsem_owner(struct rw_semaphore *sem)
+struct task_struct *rwsem_owner(struct rw_semaphore *sem)
{
return (struct task_struct *)
(atomic_long_read(&sem->owner) & ~RWSEM_OWNER_FLAGS_MASK);
@@ -194,7 +195,7 @@ static inline struct task_struct *rwsem_owner(struct rw_semaphore *sem)
/*
* Return true if the rwsem is owned by a reader.
*/
-static inline bool is_rwsem_reader_owned(struct rw_semaphore *sem)
+bool is_rwsem_reader_owned(struct rw_semaphore *sem)
{
/*
* Check the count to see if it is write-locked.
@@ -207,10 +208,10 @@ static inline bool is_rwsem_reader_owned(struct rw_semaphore *sem)
}
/*
- * With CONFIG_DEBUG_RWSEMS configured, it will make sure that if there
- * is a task pointer in owner of a reader-owned rwsem, it will be the
- * real owner or one of the real owners. The only exception is when the
- * unlock is done by up_read_non_owner().
+ * With CONFIG_DEBUG_RWSEMS or CONFIG_DETECT_HUNG_TASK_BLOCKER configured,
+ * it will make sure that the owner field of a reader-owned rwsem either
+ * points to a real reader-owner(s) or gets cleared. The only exception is
+ * when the unlock is done by up_read_non_owner().
*/
static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem)
{
@@ -319,9 +320,10 @@ void __init_rwsem(struct rw_semaphore *sem, const char *name,
sem->magic = sem;
#endif
atomic_long_set(&sem->count, RWSEM_UNLOCKED_VALUE);
- raw_spin_lock_init(&sem->wait_lock);
- INIT_LIST_HEAD(&sem->wait_list);
atomic_long_set(&sem->owner, 0L);
+ scoped_guard (raw_spinlock_init, &sem->wait_lock) {
+ sem->first_waiter = NULL;
+ }
#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
osq_lock_init(&sem->osq);
#endif
@@ -340,8 +342,6 @@ struct rwsem_waiter {
unsigned long timeout;
bool handoff_set;
};
-#define rwsem_first_waiter(sem) \
- list_first_entry(&sem->wait_list, struct rwsem_waiter, list)
enum rwsem_wake_type {
RWSEM_WAKE_ANY, /* Wake whatever's at head of wait list */
@@ -364,12 +364,22 @@ enum rwsem_wake_type {
*/
#define MAX_READERS_WAKEUP 0x100
-static inline void
-rwsem_add_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter)
+static inline
+bool __rwsem_del_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter)
+ __must_hold(&sem->wait_lock)
{
- lockdep_assert_held(&sem->wait_lock);
- list_add_tail(&waiter->list, &sem->wait_list);
- /* caller will set RWSEM_FLAG_WAITERS */
+ if (list_empty(&waiter->list)) {
+ sem->first_waiter = NULL;
+ return false;
+ }
+
+ if (sem->first_waiter == waiter) {
+ sem->first_waiter = list_first_entry(&waiter->list,
+ struct rwsem_waiter, list);
+ }
+ list_del(&waiter->list);
+
+ return true;
}
/*
@@ -384,14 +394,24 @@ static inline bool
rwsem_del_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter)
{
lockdep_assert_held(&sem->wait_lock);
- list_del(&waiter->list);
- if (likely(!list_empty(&sem->wait_list)))
+ if (__rwsem_del_waiter(sem, waiter))
return true;
-
atomic_long_andnot(RWSEM_FLAG_HANDOFF | RWSEM_FLAG_WAITERS, &sem->count);
return false;
}
+static inline
+struct rwsem_waiter *next_waiter(const struct rw_semaphore *sem,
+ const struct rwsem_waiter *waiter)
+ __must_hold(&sem->wait_lock)
+{
+ struct rwsem_waiter *next = list_first_entry(&waiter->list,
+ struct rwsem_waiter, list);
+ if (next == sem->first_waiter)
+ return NULL;
+ return next;
+}
+
/*
* handle the lock release when processes blocked on it that can now run
* - if we come here from up_xxxx(), then the RWSEM_FLAG_WAITERS bit must
@@ -410,7 +430,7 @@ static void rwsem_mark_wake(struct rw_semaphore *sem,
enum rwsem_wake_type wake_type,
struct wake_q_head *wake_q)
{
- struct rwsem_waiter *waiter, *tmp;
+ struct rwsem_waiter *waiter, *next;
long oldcount, woken = 0, adjustment = 0;
struct list_head wlist;
@@ -420,7 +440,7 @@ static void rwsem_mark_wake(struct rw_semaphore *sem,
* Take a peek at the queue head waiter such that we can determine
* the wakeup(s) to perform.
*/
- waiter = rwsem_first_waiter(sem);
+ waiter = sem->first_waiter;
if (waiter->type == RWSEM_WAITING_FOR_WRITE) {
if (wake_type == RWSEM_WAKE_ANY) {
@@ -505,25 +525,28 @@ static void rwsem_mark_wake(struct rw_semaphore *sem,
* put them into wake_q to be woken up later.
*/
INIT_LIST_HEAD(&wlist);
- list_for_each_entry_safe(waiter, tmp, &sem->wait_list, list) {
+ do {
+ next = next_waiter(sem, waiter);
if (waiter->type == RWSEM_WAITING_FOR_WRITE)
continue;
woken++;
list_move_tail(&waiter->list, &wlist);
+ if (sem->first_waiter == waiter)
+ sem->first_waiter = next;
/*
* Limit # of readers that can be woken up per wakeup call.
*/
if (unlikely(woken >= MAX_READERS_WAKEUP))
break;
- }
+ } while ((waiter = next) != NULL);
adjustment = woken * RWSEM_READER_BIAS - adjustment;
lockevent_cond_inc(rwsem_wake_reader, woken);
oldcount = atomic_long_read(&sem->count);
- if (list_empty(&sem->wait_list)) {
+ if (!sem->first_waiter) {
/*
* Combined with list_move_tail() above, this implies
* rwsem_del_waiter().
@@ -544,7 +567,7 @@ static void rwsem_mark_wake(struct rw_semaphore *sem,
atomic_long_add(adjustment, &sem->count);
/* 2nd pass */
- list_for_each_entry_safe(waiter, tmp, &wlist, list) {
+ list_for_each_entry_safe(waiter, next, &wlist, list) {
struct task_struct *tsk;
tsk = waiter->task;
@@ -576,7 +599,7 @@ rwsem_del_wake_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter,
struct wake_q_head *wake_q)
__releases(&sem->wait_lock)
{
- bool first = rwsem_first_waiter(sem) == waiter;
+ bool first = sem->first_waiter == waiter;
wake_q_init(wake_q);
@@ -601,8 +624,9 @@ rwsem_del_wake_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter,
*/
static inline bool rwsem_try_write_lock(struct rw_semaphore *sem,
struct rwsem_waiter *waiter)
+ __must_hold(&sem->wait_lock)
{
- struct rwsem_waiter *first = rwsem_first_waiter(sem);
+ struct rwsem_waiter *first = sem->first_waiter;
long count, new;
lockdep_assert_held(&sem->wait_lock);
@@ -638,7 +662,7 @@ static inline bool rwsem_try_write_lock(struct rw_semaphore *sem,
new |= RWSEM_WRITER_LOCKED;
new &= ~RWSEM_FLAG_HANDOFF;
- if (list_is_singular(&sem->wait_list))
+ if (list_empty(&first->list))
new &= ~RWSEM_FLAG_WAITERS;
}
} while (!atomic_long_try_cmpxchg_acquire(&sem->count, &count, new));
@@ -658,7 +682,8 @@ static inline bool rwsem_try_write_lock(struct rw_semaphore *sem,
* Have rwsem_try_write_lock() fully imply rwsem_del_waiter() on
* success.
*/
- list_del(&waiter->list);
+ __rwsem_del_waiter(sem, waiter);
+
rwsem_set_owner(sem);
return true;
}
@@ -727,8 +752,6 @@ static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem)
return ret;
}
-#define OWNER_SPINNABLE (OWNER_NULL | OWNER_WRITER | OWNER_READER)
-
static inline enum owner_state
rwsem_owner_state(struct task_struct *owner, unsigned long flags)
{
@@ -835,7 +858,7 @@ static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
enum owner_state owner_state;
owner_state = rwsem_spin_on_owner(sem);
- if (!(owner_state & OWNER_SPINNABLE))
+ if (owner_state == OWNER_NONSPINNABLE)
break;
/*
@@ -995,7 +1018,7 @@ rwsem_down_read_slowpath(struct rw_semaphore *sem, long count, unsigned int stat
{
long adjustment = -RWSEM_READER_BIAS;
long rcnt = (count >> RWSEM_READER_SHIFT);
- struct rwsem_waiter waiter;
+ struct rwsem_waiter waiter, *first;
DEFINE_WAKE_Q(wake_q);
/*
@@ -1020,7 +1043,7 @@ rwsem_down_read_slowpath(struct rw_semaphore *sem, long count, unsigned int stat
*/
if ((rcnt == 1) && (count & RWSEM_FLAG_WAITERS)) {
raw_spin_lock_irq(&sem->wait_lock);
- if (!list_empty(&sem->wait_list))
+ if (sem->first_waiter)
rwsem_mark_wake(sem, RWSEM_WAKE_READ_OWNED,
&wake_q);
raw_spin_unlock_irq(&sem->wait_lock);
@@ -1036,7 +1059,8 @@ queue:
waiter.handoff_set = false;
raw_spin_lock_irq(&sem->wait_lock);
- if (list_empty(&sem->wait_list)) {
+ first = sem->first_waiter;
+ if (!first) {
/*
* In case the wait queue is empty and the lock isn't owned
* by a writer, this reader can exit the slowpath and return
@@ -1052,8 +1076,11 @@ queue:
return sem;
}
adjustment += RWSEM_FLAG_WAITERS;
+ INIT_LIST_HEAD(&waiter.list);
+ sem->first_waiter = &waiter;
+ } else {
+ list_add_tail(&waiter.list, &first->list);
}
- rwsem_add_waiter(sem, &waiter);
/* we're now waiting on the lock, but no longer actively locking */
count = atomic_long_add_return(adjustment, &sem->count);
@@ -1065,10 +1092,13 @@ queue:
wake_up_q(&wake_q);
trace_contention_begin(sem, LCB_F_READ);
+ set_current_state(state);
+
+ if (state == TASK_UNINTERRUPTIBLE)
+ hung_task_set_blocker(sem, BLOCKER_TYPE_RWSEM_READER);
/* wait to be given the lock */
for (;;) {
- set_current_state(state);
if (!smp_load_acquire(&waiter.task)) {
/* Matches rwsem_mark_wake()'s smp_store_release(). */
break;
@@ -1083,8 +1113,12 @@ queue:
}
schedule_preempt_disabled();
lockevent_inc(rwsem_sleep_reader);
+ set_current_state(state);
}
+ if (state == TASK_UNINTERRUPTIBLE)
+ hung_task_clear_blocker();
+
__set_current_state(TASK_RUNNING);
lockevent_inc(rwsem_rlock);
trace_contention_end(sem, 0);
@@ -1104,7 +1138,7 @@ out_nolock:
static struct rw_semaphore __sched *
rwsem_down_write_slowpath(struct rw_semaphore *sem, int state)
{
- struct rwsem_waiter waiter;
+ struct rwsem_waiter waiter, *first;
DEFINE_WAKE_Q(wake_q);
/* do optimistic spinning and steal lock if possible */
@@ -1123,10 +1157,10 @@ rwsem_down_write_slowpath(struct rw_semaphore *sem, int state)
waiter.handoff_set = false;
raw_spin_lock_irq(&sem->wait_lock);
- rwsem_add_waiter(sem, &waiter);
- /* we're now waiting on the lock */
- if (rwsem_first_waiter(sem) != &waiter) {
+ first = sem->first_waiter;
+ if (first) {
+ list_add_tail(&waiter.list, &first->list);
rwsem_cond_wake_waiter(sem, atomic_long_read(&sem->count),
&wake_q);
if (!wake_q_empty(&wake_q)) {
@@ -1139,6 +1173,8 @@ rwsem_down_write_slowpath(struct rw_semaphore *sem, int state)
raw_spin_lock_irq(&sem->wait_lock);
}
} else {
+ INIT_LIST_HEAD(&waiter.list);
+ sem->first_waiter = &waiter;
atomic_long_or(RWSEM_FLAG_WAITERS, &sem->count);
}
@@ -1146,6 +1182,9 @@ rwsem_down_write_slowpath(struct rw_semaphore *sem, int state)
set_current_state(state);
trace_contention_begin(sem, LCB_F_WRITE);
+ if (state == TASK_UNINTERRUPTIBLE)
+ hung_task_set_blocker(sem, BLOCKER_TYPE_RWSEM_WRITER);
+
for (;;) {
if (rwsem_try_write_lock(sem, &waiter)) {
/* rwsem_try_write_lock() implies ACQUIRE on success */
@@ -1179,6 +1218,10 @@ rwsem_down_write_slowpath(struct rw_semaphore *sem, int state)
trylock_again:
raw_spin_lock_irq(&sem->wait_lock);
}
+
+ if (state == TASK_UNINTERRUPTIBLE)
+ hung_task_clear_blocker();
+
__set_current_state(TASK_RUNNING);
raw_spin_unlock_irq(&sem->wait_lock);
lockevent_inc(rwsem_wlock);
@@ -1205,7 +1248,7 @@ static struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
raw_spin_lock_irqsave(&sem->wait_lock, flags);
- if (!list_empty(&sem->wait_list))
+ if (sem->first_waiter)
rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
@@ -1226,7 +1269,7 @@ static struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
raw_spin_lock_irqsave(&sem->wait_lock, flags);
- if (!list_empty(&sem->wait_list))
+ if (sem->first_waiter)
rwsem_mark_wake(sem, RWSEM_WAKE_READ_OWNED, &wake_q);
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
@@ -1519,6 +1562,7 @@ static inline bool is_rwsem_reader_owned(struct rw_semaphore *sem)
* lock for reading
*/
void __sched down_read(struct rw_semaphore *sem)
+ __no_context_analysis
{
might_sleep();
rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_);
@@ -1528,6 +1572,7 @@ void __sched down_read(struct rw_semaphore *sem)
EXPORT_SYMBOL(down_read);
int __sched down_read_interruptible(struct rw_semaphore *sem)
+ __no_context_analysis
{
might_sleep();
rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_);
@@ -1542,6 +1587,7 @@ int __sched down_read_interruptible(struct rw_semaphore *sem)
EXPORT_SYMBOL(down_read_interruptible);
int __sched down_read_killable(struct rw_semaphore *sem)
+ __no_context_analysis
{
might_sleep();
rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_);
@@ -1559,6 +1605,7 @@ EXPORT_SYMBOL(down_read_killable);
* trylock for reading -- returns 1 if successful, 0 if contention
*/
int down_read_trylock(struct rw_semaphore *sem)
+ __no_context_analysis
{
int ret = __down_read_trylock(sem);
@@ -1572,6 +1619,7 @@ EXPORT_SYMBOL(down_read_trylock);
* lock for writing
*/
void __sched down_write(struct rw_semaphore *sem)
+ __no_context_analysis
{
might_sleep();
rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
@@ -1583,6 +1631,7 @@ EXPORT_SYMBOL(down_write);
* lock for writing
*/
int __sched down_write_killable(struct rw_semaphore *sem)
+ __no_context_analysis
{
might_sleep();
rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
@@ -1601,6 +1650,7 @@ EXPORT_SYMBOL(down_write_killable);
* trylock for writing -- returns 1 if successful, 0 if contention
*/
int down_write_trylock(struct rw_semaphore *sem)
+ __no_context_analysis
{
int ret = __down_write_trylock(sem);
@@ -1615,6 +1665,7 @@ EXPORT_SYMBOL(down_write_trylock);
* release a read lock
*/
void up_read(struct rw_semaphore *sem)
+ __no_context_analysis
{
rwsem_release(&sem->dep_map, _RET_IP_);
__up_read(sem);
@@ -1625,6 +1676,7 @@ EXPORT_SYMBOL(up_read);
* release a write lock
*/
void up_write(struct rw_semaphore *sem)
+ __no_context_analysis
{
rwsem_release(&sem->dep_map, _RET_IP_);
__up_write(sem);
@@ -1635,6 +1687,7 @@ EXPORT_SYMBOL(up_write);
* downgrade write lock to read lock
*/
void downgrade_write(struct rw_semaphore *sem)
+ __no_context_analysis
{
lock_downgrade(&sem->dep_map, _RET_IP_);
__downgrade_write(sem);
@@ -1644,6 +1697,7 @@ EXPORT_SYMBOL(downgrade_write);
#ifdef CONFIG_DEBUG_LOCK_ALLOC
void down_read_nested(struct rw_semaphore *sem, int subclass)
+ __no_context_analysis
{
might_sleep();
rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_);
@@ -1652,6 +1706,7 @@ void down_read_nested(struct rw_semaphore *sem, int subclass)
EXPORT_SYMBOL(down_read_nested);
int down_read_killable_nested(struct rw_semaphore *sem, int subclass)
+ __no_context_analysis
{
might_sleep();
rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_);
@@ -1666,6 +1721,7 @@ int down_read_killable_nested(struct rw_semaphore *sem, int subclass)
EXPORT_SYMBOL(down_read_killable_nested);
void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest)
+ __no_context_analysis
{
might_sleep();
rwsem_acquire_nest(&sem->dep_map, 0, 0, nest, _RET_IP_);
@@ -1674,6 +1730,7 @@ void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest)
EXPORT_SYMBOL(_down_write_nest_lock);
void down_read_non_owner(struct rw_semaphore *sem)
+ __no_context_analysis
{
might_sleep();
__down_read(sem);
@@ -1688,6 +1745,7 @@ void down_read_non_owner(struct rw_semaphore *sem)
EXPORT_SYMBOL(down_read_non_owner);
void down_write_nested(struct rw_semaphore *sem, int subclass)
+ __no_context_analysis
{
might_sleep();
rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_);
@@ -1696,6 +1754,7 @@ void down_write_nested(struct rw_semaphore *sem, int subclass)
EXPORT_SYMBOL(down_write_nested);
int __sched down_write_killable_nested(struct rw_semaphore *sem, int subclass)
+ __no_context_analysis
{
might_sleep();
rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_);
@@ -1711,6 +1770,7 @@ int __sched down_write_killable_nested(struct rw_semaphore *sem, int subclass)
EXPORT_SYMBOL(down_write_killable_nested);
void up_read_non_owner(struct rw_semaphore *sem)
+ __no_context_analysis
{
DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem);
__up_read(sem);
diff --git a/kernel/locking/semaphore.c b/kernel/locking/semaphore.c
index de9117c0e671..74d41433ba13 100644
--- a/kernel/locking/semaphore.c
+++ b/kernel/locking/semaphore.c
@@ -21,7 +21,7 @@
* too.
*
* The ->count variable represents how many more tasks can acquire this
- * semaphore. If it's zero, there may be tasks waiting on the wait_list.
+ * semaphore. If it's zero, there may be waiters.
*/
#include <linux/compiler.h>
@@ -34,6 +34,7 @@
#include <linux/spinlock.h>
#include <linux/ftrace.h>
#include <trace/events/lock.h>
+#include <linux/hung_task.h>
static noinline void __down(struct semaphore *sem);
static noinline int __down_interruptible(struct semaphore *sem);
@@ -41,6 +42,41 @@ static noinline int __down_killable(struct semaphore *sem);
static noinline int __down_timeout(struct semaphore *sem, long timeout);
static noinline void __up(struct semaphore *sem, struct wake_q_head *wake_q);
+#ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER
+static inline void hung_task_sem_set_holder(struct semaphore *sem)
+{
+ WRITE_ONCE((sem)->last_holder, (unsigned long)current);
+}
+
+static inline void hung_task_sem_clear_if_holder(struct semaphore *sem)
+{
+ if (READ_ONCE((sem)->last_holder) == (unsigned long)current)
+ WRITE_ONCE((sem)->last_holder, 0UL);
+}
+
+unsigned long sem_last_holder(struct semaphore *sem)
+{
+ return READ_ONCE(sem->last_holder);
+}
+#else
+static inline void hung_task_sem_set_holder(struct semaphore *sem)
+{
+}
+static inline void hung_task_sem_clear_if_holder(struct semaphore *sem)
+{
+}
+unsigned long sem_last_holder(struct semaphore *sem)
+{
+ return 0UL;
+}
+#endif
+
+static inline void __sem_acquire(struct semaphore *sem)
+{
+ sem->count--;
+ hung_task_sem_set_holder(sem);
+}
+
/**
* down - acquire the semaphore
* @sem: the semaphore to be acquired
@@ -59,7 +95,7 @@ void __sched down(struct semaphore *sem)
might_sleep();
raw_spin_lock_irqsave(&sem->lock, flags);
if (likely(sem->count > 0))
- sem->count--;
+ __sem_acquire(sem);
else
__down(sem);
raw_spin_unlock_irqrestore(&sem->lock, flags);
@@ -83,7 +119,7 @@ int __sched down_interruptible(struct semaphore *sem)
might_sleep();
raw_spin_lock_irqsave(&sem->lock, flags);
if (likely(sem->count > 0))
- sem->count--;
+ __sem_acquire(sem);
else
result = __down_interruptible(sem);
raw_spin_unlock_irqrestore(&sem->lock, flags);
@@ -110,7 +146,7 @@ int __sched down_killable(struct semaphore *sem)
might_sleep();
raw_spin_lock_irqsave(&sem->lock, flags);
if (likely(sem->count > 0))
- sem->count--;
+ __sem_acquire(sem);
else
result = __down_killable(sem);
raw_spin_unlock_irqrestore(&sem->lock, flags);
@@ -140,7 +176,7 @@ int __sched down_trylock(struct semaphore *sem)
raw_spin_lock_irqsave(&sem->lock, flags);
count = sem->count - 1;
if (likely(count >= 0))
- sem->count = count;
+ __sem_acquire(sem);
raw_spin_unlock_irqrestore(&sem->lock, flags);
return (count < 0);
@@ -165,7 +201,7 @@ int __sched down_timeout(struct semaphore *sem, long timeout)
might_sleep();
raw_spin_lock_irqsave(&sem->lock, flags);
if (likely(sem->count > 0))
- sem->count--;
+ __sem_acquire(sem);
else
result = __down_timeout(sem, timeout);
raw_spin_unlock_irqrestore(&sem->lock, flags);
@@ -187,7 +223,10 @@ void __sched up(struct semaphore *sem)
DEFINE_WAKE_Q(wake_q);
raw_spin_lock_irqsave(&sem->lock, flags);
- if (likely(list_empty(&sem->wait_list)))
+
+ hung_task_sem_clear_if_holder(sem);
+
+ if (likely(!sem->first_waiter))
sem->count++;
else
__up(sem, &wake_q);
@@ -205,6 +244,21 @@ struct semaphore_waiter {
bool up;
};
+static inline
+void sem_del_waiter(struct semaphore *sem, struct semaphore_waiter *waiter)
+{
+ if (list_empty(&waiter->list)) {
+ sem->first_waiter = NULL;
+ return;
+ }
+
+ if (sem->first_waiter == waiter) {
+ sem->first_waiter = list_first_entry(&waiter->list,
+ struct semaphore_waiter, list);
+ }
+ list_del(&waiter->list);
+}
+
/*
* Because this function is inlined, the 'state' parameter will be
* constant, and thus optimised away by the compiler. Likewise the
@@ -213,9 +267,15 @@ struct semaphore_waiter {
static inline int __sched ___down_common(struct semaphore *sem, long state,
long timeout)
{
- struct semaphore_waiter waiter;
-
- list_add_tail(&waiter.list, &sem->wait_list);
+ struct semaphore_waiter waiter, *first;
+
+ first = sem->first_waiter;
+ if (first) {
+ list_add_tail(&waiter.list, &first->list);
+ } else {
+ INIT_LIST_HEAD(&waiter.list);
+ sem->first_waiter = &waiter;
+ }
waiter.task = current;
waiter.up = false;
@@ -228,16 +288,18 @@ static inline int __sched ___down_common(struct semaphore *sem, long state,
raw_spin_unlock_irq(&sem->lock);
timeout = schedule_timeout(timeout);
raw_spin_lock_irq(&sem->lock);
- if (waiter.up)
+ if (waiter.up) {
+ hung_task_sem_set_holder(sem);
return 0;
+ }
}
timed_out:
- list_del(&waiter.list);
+ sem_del_waiter(sem, &waiter);
return -ETIME;
interrupted:
- list_del(&waiter.list);
+ sem_del_waiter(sem, &waiter);
return -EINTR;
}
@@ -246,10 +308,14 @@ static inline int __sched __down_common(struct semaphore *sem, long state,
{
int ret;
+ hung_task_set_blocker(sem, BLOCKER_TYPE_SEM);
+
trace_contention_begin(sem, 0);
ret = ___down_common(sem, state, timeout);
trace_contention_end(sem, ret);
+ hung_task_clear_blocker();
+
return ret;
}
@@ -276,9 +342,9 @@ static noinline int __sched __down_timeout(struct semaphore *sem, long timeout)
static noinline void __sched __up(struct semaphore *sem,
struct wake_q_head *wake_q)
{
- struct semaphore_waiter *waiter = list_first_entry(&sem->wait_list,
- struct semaphore_waiter, list);
- list_del(&waiter->list);
+ struct semaphore_waiter *waiter = sem->first_waiter;
+
+ sem_del_waiter(sem, waiter);
waiter->up = true;
wake_q_add(wake_q, waiter->task);
}
diff --git a/kernel/locking/spinlock.c b/kernel/locking/spinlock.c
index 7685defd7c52..b42d293da38b 100644
--- a/kernel/locking/spinlock.c
+++ b/kernel/locking/spinlock.c
@@ -64,8 +64,9 @@ EXPORT_PER_CPU_SYMBOL(__mmiowb_state);
* time (making _this_ CPU preemptible if possible), and we also signal
* towards that other CPU that it should break the lock ASAP.
*/
-#define BUILD_LOCK_OPS(op, locktype) \
+#define BUILD_LOCK_OPS(op, locktype, lock_ctx_op) \
static void __lockfunc __raw_##op##_lock(locktype##_t *lock) \
+ lock_ctx_op(lock) \
{ \
for (;;) { \
preempt_disable(); \
@@ -78,6 +79,7 @@ static void __lockfunc __raw_##op##_lock(locktype##_t *lock) \
} \
\
static unsigned long __lockfunc __raw_##op##_lock_irqsave(locktype##_t *lock) \
+ lock_ctx_op(lock) \
{ \
unsigned long flags; \
\
@@ -96,11 +98,13 @@ static unsigned long __lockfunc __raw_##op##_lock_irqsave(locktype##_t *lock) \
} \
\
static void __lockfunc __raw_##op##_lock_irq(locktype##_t *lock) \
+ lock_ctx_op(lock) \
{ \
_raw_##op##_lock_irqsave(lock); \
} \
\
static void __lockfunc __raw_##op##_lock_bh(locktype##_t *lock) \
+ lock_ctx_op(lock) \
{ \
unsigned long flags; \
\
@@ -123,11 +127,11 @@ static void __lockfunc __raw_##op##_lock_bh(locktype##_t *lock) \
* __[spin|read|write]_lock_irqsave()
* __[spin|read|write]_lock_bh()
*/
-BUILD_LOCK_OPS(spin, raw_spinlock);
+BUILD_LOCK_OPS(spin, raw_spinlock, __acquires);
#ifndef CONFIG_PREEMPT_RT
-BUILD_LOCK_OPS(read, rwlock);
-BUILD_LOCK_OPS(write, rwlock);
+BUILD_LOCK_OPS(read, rwlock, __acquires_shared);
+BUILD_LOCK_OPS(write, rwlock, __acquires);
#endif
#endif
diff --git a/kernel/locking/spinlock_debug.c b/kernel/locking/spinlock_debug.c
index 87b03d2e41db..2338b3adfb55 100644
--- a/kernel/locking/spinlock_debug.c
+++ b/kernel/locking/spinlock_debug.c
@@ -184,8 +184,8 @@ void do_raw_read_unlock(rwlock_t *lock)
static inline void debug_write_lock_before(rwlock_t *lock)
{
RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic");
- RWLOCK_BUG_ON(lock->owner == current, lock, "recursion");
- RWLOCK_BUG_ON(lock->owner_cpu == raw_smp_processor_id(),
+ RWLOCK_BUG_ON(READ_ONCE(lock->owner) == current, lock, "recursion");
+ RWLOCK_BUG_ON(READ_ONCE(lock->owner_cpu) == raw_smp_processor_id(),
lock, "cpu recursion");
}
diff --git a/kernel/locking/test-ww_mutex.c b/kernel/locking/test-ww_mutex.c
index bcb1b9fea588..838d631544ed 100644
--- a/kernel/locking/test-ww_mutex.c
+++ b/kernel/locking/test-ww_mutex.c
@@ -13,7 +13,8 @@
#include <linux/slab.h>
#include <linux/ww_mutex.h>
-static DEFINE_WD_CLASS(ww_class);
+static DEFINE_WD_CLASS(wd_class);
+static DEFINE_WW_CLASS(ww_class);
struct workqueue_struct *wq;
#ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH
@@ -54,16 +55,16 @@ static void test_mutex_work(struct work_struct *work)
ww_mutex_unlock(&mtx->mutex);
}
-static int __test_mutex(unsigned int flags)
+static int __test_mutex(struct ww_class *class, unsigned int flags)
{
#define TIMEOUT (HZ / 16)
struct test_mutex mtx;
struct ww_acquire_ctx ctx;
int ret;
- ww_mutex_init(&mtx.mutex, &ww_class);
+ ww_mutex_init(&mtx.mutex, class);
if (flags & TEST_MTX_CTX)
- ww_acquire_init(&ctx, &ww_class);
+ ww_acquire_init(&ctx, class);
INIT_WORK_ONSTACK(&mtx.work, test_mutex_work);
init_completion(&mtx.ready);
@@ -71,7 +72,7 @@ static int __test_mutex(unsigned int flags)
init_completion(&mtx.done);
mtx.flags = flags;
- schedule_work(&mtx.work);
+ queue_work(wq, &mtx.work);
wait_for_completion(&mtx.ready);
ww_mutex_lock(&mtx.mutex, (flags & TEST_MTX_CTX) ? &ctx : NULL);
@@ -106,13 +107,13 @@ static int __test_mutex(unsigned int flags)
#undef TIMEOUT
}
-static int test_mutex(void)
+static int test_mutex(struct ww_class *class)
{
int ret;
int i;
for (i = 0; i < __TEST_MTX_LAST; i++) {
- ret = __test_mutex(i);
+ ret = __test_mutex(class, i);
if (ret)
return ret;
}
@@ -120,15 +121,15 @@ static int test_mutex(void)
return 0;
}
-static int test_aa(bool trylock)
+static int test_aa(struct ww_class *class, bool trylock)
{
struct ww_mutex mutex;
struct ww_acquire_ctx ctx;
int ret;
const char *from = trylock ? "trylock" : "lock";
- ww_mutex_init(&mutex, &ww_class);
- ww_acquire_init(&ctx, &ww_class);
+ ww_mutex_init(&mutex, class);
+ ww_acquire_init(&ctx, class);
if (!trylock) {
ret = ww_mutex_lock(&mutex, &ctx);
@@ -177,6 +178,7 @@ out:
struct test_abba {
struct work_struct work;
+ struct ww_class *class;
struct ww_mutex a_mutex;
struct ww_mutex b_mutex;
struct completion a_ready;
@@ -191,7 +193,7 @@ static void test_abba_work(struct work_struct *work)
struct ww_acquire_ctx ctx;
int err;
- ww_acquire_init_noinject(&ctx, &ww_class);
+ ww_acquire_init_noinject(&ctx, abba->class);
if (!abba->trylock)
ww_mutex_lock(&abba->b_mutex, &ctx);
else
@@ -217,23 +219,24 @@ static void test_abba_work(struct work_struct *work)
abba->result = err;
}
-static int test_abba(bool trylock, bool resolve)
+static int test_abba(struct ww_class *class, bool trylock, bool resolve)
{
struct test_abba abba;
struct ww_acquire_ctx ctx;
int err, ret;
- ww_mutex_init(&abba.a_mutex, &ww_class);
- ww_mutex_init(&abba.b_mutex, &ww_class);
+ ww_mutex_init(&abba.a_mutex, class);
+ ww_mutex_init(&abba.b_mutex, class);
INIT_WORK_ONSTACK(&abba.work, test_abba_work);
init_completion(&abba.a_ready);
init_completion(&abba.b_ready);
+ abba.class = class;
abba.trylock = trylock;
abba.resolve = resolve;
- schedule_work(&abba.work);
+ queue_work(wq, &abba.work);
- ww_acquire_init_noinject(&ctx, &ww_class);
+ ww_acquire_init_noinject(&ctx, class);
if (!trylock)
ww_mutex_lock(&abba.a_mutex, &ctx);
else
@@ -278,6 +281,7 @@ static int test_abba(bool trylock, bool resolve)
struct test_cycle {
struct work_struct work;
+ struct ww_class *class;
struct ww_mutex a_mutex;
struct ww_mutex *b_mutex;
struct completion *a_signal;
@@ -291,7 +295,7 @@ static void test_cycle_work(struct work_struct *work)
struct ww_acquire_ctx ctx;
int err, erra = 0;
- ww_acquire_init_noinject(&ctx, &ww_class);
+ ww_acquire_init_noinject(&ctx, cycle->class);
ww_mutex_lock(&cycle->a_mutex, &ctx);
complete(cycle->a_signal);
@@ -314,20 +318,21 @@ static void test_cycle_work(struct work_struct *work)
cycle->result = err ?: erra;
}
-static int __test_cycle(unsigned int nthreads)
+static int __test_cycle(struct ww_class *class, unsigned int nthreads)
{
struct test_cycle *cycles;
unsigned int n, last = nthreads - 1;
int ret;
- cycles = kmalloc_array(nthreads, sizeof(*cycles), GFP_KERNEL);
+ cycles = kmalloc_objs(*cycles, nthreads);
if (!cycles)
return -ENOMEM;
for (n = 0; n < nthreads; n++) {
struct test_cycle *cycle = &cycles[n];
- ww_mutex_init(&cycle->a_mutex, &ww_class);
+ cycle->class = class;
+ ww_mutex_init(&cycle->a_mutex, class);
if (n == last)
cycle->b_mutex = &cycles[0].a_mutex;
else
@@ -367,13 +372,13 @@ static int __test_cycle(unsigned int nthreads)
return ret;
}
-static int test_cycle(unsigned int ncpus)
+static int test_cycle(struct ww_class *class, unsigned int ncpus)
{
unsigned int n;
int ret;
for (n = 2; n <= ncpus + 1; n++) {
- ret = __test_cycle(n);
+ ret = __test_cycle(class, n);
if (ret)
return ret;
}
@@ -384,6 +389,7 @@ static int test_cycle(unsigned int ncpus)
struct stress {
struct work_struct work;
struct ww_mutex *locks;
+ struct ww_class *class;
unsigned long timeout;
int nlocks;
};
@@ -406,7 +412,7 @@ static int *get_random_order(int count)
int *order;
int n, r;
- order = kmalloc_array(count, sizeof(*order), GFP_KERNEL);
+ order = kmalloc_objs(*order, count);
if (!order)
return order;
@@ -443,7 +449,7 @@ static void stress_inorder_work(struct work_struct *work)
int contended = -1;
int n, err;
- ww_acquire_init(&ctx, &ww_class);
+ ww_acquire_init(&ctx, stress->class);
retry:
err = 0;
for (n = 0; n < nlocks; n++) {
@@ -500,7 +506,7 @@ static void stress_reorder_work(struct work_struct *work)
return;
for (n = 0; n < stress->nlocks; n++) {
- ll = kmalloc(sizeof(*ll), GFP_KERNEL);
+ ll = kmalloc_obj(*ll);
if (!ll)
goto out;
@@ -511,7 +517,7 @@ static void stress_reorder_work(struct work_struct *work)
order = NULL;
do {
- ww_acquire_init(&ctx, &ww_class);
+ ww_acquire_init(&ctx, stress->class);
list_for_each_entry(ll, &locks, link) {
err = ww_mutex_lock(ll->lock, &ctx);
@@ -570,25 +576,24 @@ static void stress_one_work(struct work_struct *work)
#define STRESS_ONE BIT(2)
#define STRESS_ALL (STRESS_INORDER | STRESS_REORDER | STRESS_ONE)
-static int stress(int nlocks, int nthreads, unsigned int flags)
+static int stress(struct ww_class *class, int nlocks, int nthreads, unsigned int flags)
{
struct ww_mutex *locks;
struct stress *stress_array;
int n, count;
- locks = kmalloc_array(nlocks, sizeof(*locks), GFP_KERNEL);
+ locks = kmalloc_objs(*locks, nlocks);
if (!locks)
return -ENOMEM;
- stress_array = kmalloc_array(nthreads, sizeof(*stress_array),
- GFP_KERNEL);
+ stress_array = kmalloc_objs(*stress_array, nthreads);
if (!stress_array) {
kfree(locks);
return -ENOMEM;
}
for (n = 0; n < nlocks; n++)
- ww_mutex_init(&locks[n], &ww_class);
+ ww_mutex_init(&locks[n], class);
count = 0;
for (n = 0; nthreads; n++) {
@@ -617,6 +622,7 @@ static int stress(int nlocks, int nthreads, unsigned int flags)
stress = &stress_array[count++];
INIT_WORK(&stress->work, fn);
+ stress->class = class;
stress->locks = locks;
stress->nlocks = nlocks;
stress->timeout = jiffies + 2*HZ;
@@ -635,59 +641,131 @@ static int stress(int nlocks, int nthreads, unsigned int flags)
return 0;
}
-static int __init test_ww_mutex_init(void)
+static int run_tests(struct ww_class *class)
{
int ncpus = num_online_cpus();
int ret, i;
- printk(KERN_INFO "Beginning ww mutex selftests\n");
-
- prandom_seed_state(&rng, get_random_u64());
-
- wq = alloc_workqueue("test-ww_mutex", WQ_UNBOUND, 0);
- if (!wq)
- return -ENOMEM;
-
- ret = test_mutex();
+ ret = test_mutex(class);
if (ret)
return ret;
- ret = test_aa(false);
+ ret = test_aa(class, false);
if (ret)
return ret;
- ret = test_aa(true);
+ ret = test_aa(class, true);
if (ret)
return ret;
for (i = 0; i < 4; i++) {
- ret = test_abba(i & 1, i & 2);
+ ret = test_abba(class, i & 1, i & 2);
if (ret)
return ret;
}
- ret = test_cycle(ncpus);
+ ret = test_cycle(class, ncpus);
+ if (ret)
+ return ret;
+
+ ret = stress(class, 16, 2 * ncpus, STRESS_INORDER);
+ if (ret)
+ return ret;
+
+ ret = stress(class, 16, 2 * ncpus, STRESS_REORDER);
if (ret)
return ret;
- ret = stress(16, 2*ncpus, STRESS_INORDER);
+ ret = stress(class, 2046, hweight32(STRESS_ALL) * ncpus, STRESS_ALL);
if (ret)
return ret;
- ret = stress(16, 2*ncpus, STRESS_REORDER);
+ return 0;
+}
+
+static int run_test_classes(void)
+{
+ int ret;
+
+ pr_info("Beginning ww (wound) mutex selftests\n");
+
+ ret = run_tests(&ww_class);
if (ret)
return ret;
- ret = stress(2046, hweight32(STRESS_ALL)*ncpus, STRESS_ALL);
+ pr_info("Beginning ww (die) mutex selftests\n");
+ ret = run_tests(&wd_class);
if (ret)
return ret;
- printk(KERN_INFO "All ww mutex selftests passed\n");
+ pr_info("All ww mutex selftests passed\n");
return 0;
}
+static DEFINE_MUTEX(run_lock);
+
+static ssize_t run_tests_store(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ if (!mutex_trylock(&run_lock)) {
+ pr_err("Test already running\n");
+ return count;
+ }
+
+ run_test_classes();
+ mutex_unlock(&run_lock);
+
+ return count;
+}
+
+static struct kobj_attribute run_tests_attribute =
+ __ATTR(run_tests, 0664, NULL, run_tests_store);
+
+static struct attribute *attrs[] = {
+ &run_tests_attribute.attr,
+ NULL, /* need to NULL terminate the list of attributes */
+};
+
+static struct attribute_group attr_group = {
+ .attrs = attrs,
+};
+
+static struct kobject *test_ww_mutex_kobj;
+
+static int __init test_ww_mutex_init(void)
+{
+ int ret;
+
+ prandom_seed_state(&rng, get_random_u64());
+
+ wq = alloc_workqueue("test-ww_mutex", WQ_UNBOUND, 0);
+ if (!wq)
+ return -ENOMEM;
+
+ test_ww_mutex_kobj = kobject_create_and_add("test_ww_mutex", kernel_kobj);
+ if (!test_ww_mutex_kobj) {
+ destroy_workqueue(wq);
+ return -ENOMEM;
+ }
+
+ /* Create the files associated with this kobject */
+ ret = sysfs_create_group(test_ww_mutex_kobj, &attr_group);
+ if (ret) {
+ kobject_put(test_ww_mutex_kobj);
+ destroy_workqueue(wq);
+ return ret;
+ }
+
+ mutex_lock(&run_lock);
+ ret = run_test_classes();
+ mutex_unlock(&run_lock);
+
+ return ret;
+}
+
static void __exit test_ww_mutex_exit(void)
{
+ kobject_put(test_ww_mutex_kobj);
destroy_workqueue(wq);
}
diff --git a/kernel/locking/ww_mutex.h b/kernel/locking/ww_mutex.h
index 37f025a096c9..6c12452097e1 100644
--- a/kernel/locking/ww_mutex.h
+++ b/kernel/locking/ww_mutex.h
@@ -4,58 +4,83 @@
#define MUTEX mutex
#define MUTEX_WAITER mutex_waiter
+#define WAIT_LOCK wait_lock
+
+/*
+ * +--------+
+ * | first |
+ * +--------+
+ * |
+ * v
+ * +----+ +----+ +----+
+ * | W3 | <-> | W1 | <-> | W2 |
+ * +----+ +----+ +----+
+ * ^ ^
+ * +---------------------+
+ */
static inline struct mutex_waiter *
__ww_waiter_first(struct mutex *lock)
+ __must_hold(&lock->wait_lock)
{
- struct mutex_waiter *w;
-
- w = list_first_entry(&lock->wait_list, struct mutex_waiter, list);
- if (list_entry_is_head(w, &lock->wait_list, list))
- return NULL;
-
- return w;
+ return lock->first_waiter;
}
+/*
+ * for (cur = __ww_waiter_first(); cur; cur = __ww_waiter_next())
+ *
+ * Should iterate like: W1, W2, W3
+ */
static inline struct mutex_waiter *
__ww_waiter_next(struct mutex *lock, struct mutex_waiter *w)
+ __must_hold(&lock->wait_lock)
{
w = list_next_entry(w, list);
- if (list_entry_is_head(w, &lock->wait_list, list))
+ /*
+ * Terminate if the next entry is the first again, that has already
+ * been observed.
+ */
+ if (lock->first_waiter == w)
return NULL;
return w;
}
+/*
+ * for (cur = __ww_waiter_last(); cur; cur = __ww_waiter_prev())
+ *
+ * Should iterate like: W3, W2, W1
+ */
static inline struct mutex_waiter *
__ww_waiter_prev(struct mutex *lock, struct mutex_waiter *w)
+ __must_hold(&lock->wait_lock)
{
- w = list_prev_entry(w, list);
- if (list_entry_is_head(w, &lock->wait_list, list))
+ /*
+ * Terminate at the first entry, the previous entry of first is the
+ * last and that has already been observed.
+ */
+ if (lock->first_waiter == w)
return NULL;
- return w;
+ return list_prev_entry(w, list);
}
static inline struct mutex_waiter *
__ww_waiter_last(struct mutex *lock)
+ __must_hold(&lock->wait_lock)
{
- struct mutex_waiter *w;
-
- w = list_last_entry(&lock->wait_list, struct mutex_waiter, list);
- if (list_entry_is_head(w, &lock->wait_list, list))
- return NULL;
+ struct mutex_waiter *w = lock->first_waiter;
+ if (w)
+ w = list_prev_entry(w, list);
return w;
}
static inline void
__ww_waiter_add(struct mutex *lock, struct mutex_waiter *waiter, struct mutex_waiter *pos)
+ __must_hold(&lock->wait_lock)
{
- struct list_head *p = &lock->wait_list;
- if (pos)
- p = &pos->list;
- __mutex_add_waiter(lock, waiter, p);
+ __mutex_add_waiter(lock, waiter, pos);
}
static inline struct task_struct *
@@ -71,16 +96,19 @@ __ww_mutex_has_waiters(struct mutex *lock)
}
static inline void lock_wait_lock(struct mutex *lock, unsigned long *flags)
+ __acquires(&lock->wait_lock)
{
raw_spin_lock_irqsave(&lock->wait_lock, *flags);
}
static inline void unlock_wait_lock(struct mutex *lock, unsigned long *flags)
+ __releases(&lock->wait_lock)
{
raw_spin_unlock_irqrestore(&lock->wait_lock, *flags);
}
static inline void lockdep_assert_wait_lock_held(struct mutex *lock)
+ __must_hold(&lock->wait_lock)
{
lockdep_assert_held(&lock->wait_lock);
}
@@ -89,9 +117,11 @@ static inline void lockdep_assert_wait_lock_held(struct mutex *lock)
#define MUTEX rt_mutex
#define MUTEX_WAITER rt_mutex_waiter
+#define WAIT_LOCK rtmutex.wait_lock
static inline struct rt_mutex_waiter *
__ww_waiter_first(struct rt_mutex *lock)
+ __must_hold(&lock->rtmutex.wait_lock)
{
struct rb_node *n = rb_first(&lock->rtmutex.waiters.rb_root);
if (!n)
@@ -119,6 +149,7 @@ __ww_waiter_prev(struct rt_mutex *lock, struct rt_mutex_waiter *w)
static inline struct rt_mutex_waiter *
__ww_waiter_last(struct rt_mutex *lock)
+ __must_hold(&lock->rtmutex.wait_lock)
{
struct rb_node *n = rb_last(&lock->rtmutex.waiters.rb_root);
if (!n)
@@ -140,21 +171,25 @@ __ww_mutex_owner(struct rt_mutex *lock)
static inline bool
__ww_mutex_has_waiters(struct rt_mutex *lock)
+ __must_hold(&lock->rtmutex.wait_lock)
{
return rt_mutex_has_waiters(&lock->rtmutex);
}
static inline void lock_wait_lock(struct rt_mutex *lock, unsigned long *flags)
+ __acquires(&lock->rtmutex.wait_lock)
{
raw_spin_lock_irqsave(&lock->rtmutex.wait_lock, *flags);
}
static inline void unlock_wait_lock(struct rt_mutex *lock, unsigned long *flags)
+ __releases(&lock->rtmutex.wait_lock)
{
raw_spin_unlock_irqrestore(&lock->rtmutex.wait_lock, *flags);
}
static inline void lockdep_assert_wait_lock_held(struct rt_mutex *lock)
+ __must_hold(&lock->rtmutex.wait_lock)
{
lockdep_assert_held(&lock->rtmutex.wait_lock);
}
@@ -284,6 +319,12 @@ __ww_mutex_die(struct MUTEX *lock, struct MUTEX_WAITER *waiter,
#ifndef WW_RT
debug_mutex_wake_waiter(lock, waiter);
#endif
+ /*
+ * When waking up the task to die, be sure to set the
+ * blocked_on to PROXY_WAKING. Otherwise we can see
+ * circular blocked_on relationships that can't resolve.
+ */
+ set_task_blocked_on_waking(waiter->task, lock);
wake_q_add(wake_q, waiter->task);
}
@@ -301,6 +342,7 @@ static bool __ww_mutex_wound(struct MUTEX *lock,
struct ww_acquire_ctx *ww_ctx,
struct ww_acquire_ctx *hold_ctx,
struct wake_q_head *wake_q)
+ __must_hold(&lock->WAIT_LOCK)
{
struct task_struct *owner = __ww_mutex_owner(lock);
@@ -331,9 +373,19 @@ static bool __ww_mutex_wound(struct MUTEX *lock,
* it's wounded in __ww_mutex_check_kill() or has a
* wakeup pending to re-read the wounded state.
*/
- if (owner != current)
+ if (owner != current) {
+ /*
+ * When waking up the task to wound, be sure to set the
+ * blocked_on to PROXY_WAKING. Otherwise we can see
+ * circular blocked_on relationships that can't resolve.
+ *
+ * NOTE: We pass NULL here instead of lock, because we
+ * are waking the mutex owner, who may be currently
+ * blocked on a different mutex.
+ */
+ set_task_blocked_on_waking(owner, NULL);
wake_q_add(wake_q, owner);
-
+ }
return true;
}
@@ -355,6 +407,7 @@ static bool __ww_mutex_wound(struct MUTEX *lock,
static void
__ww_mutex_check_waiters(struct MUTEX *lock, struct ww_acquire_ctx *ww_ctx,
struct wake_q_head *wake_q)
+ __must_hold(&lock->WAIT_LOCK)
{
struct MUTEX_WAITER *cur;
@@ -381,6 +434,7 @@ ww_mutex_set_context_fastpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
{
DEFINE_WAKE_Q(wake_q);
unsigned long flags;
+ bool has_waiters;
ww_mutex_lock_acquired(lock, ctx);
@@ -402,7 +456,8 @@ ww_mutex_set_context_fastpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
* __ww_mutex_add_waiter() and makes sure we either observe ww->ctx
* and/or !empty list.
*/
- if (likely(!__ww_mutex_has_waiters(&lock->base)))
+ has_waiters = data_race(__ww_mutex_has_waiters(&lock->base));
+ if (likely(!has_waiters))
return;
/*
@@ -448,6 +503,7 @@ __ww_mutex_kill(struct MUTEX *lock, struct ww_acquire_ctx *ww_ctx)
static inline int
__ww_mutex_check_kill(struct MUTEX *lock, struct MUTEX_WAITER *waiter,
struct ww_acquire_ctx *ctx)
+ __must_hold(&lock->WAIT_LOCK)
{
struct ww_mutex *ww = container_of(lock, struct ww_mutex, base);
struct ww_acquire_ctx *hold_ctx = READ_ONCE(ww->ctx);
@@ -498,6 +554,7 @@ __ww_mutex_add_waiter(struct MUTEX_WAITER *waiter,
struct MUTEX *lock,
struct ww_acquire_ctx *ww_ctx,
struct wake_q_head *wake_q)
+ __must_hold(&lock->WAIT_LOCK)
{
struct MUTEX_WAITER *cur, *pos = NULL;
bool is_wait_die;
diff --git a/kernel/locking/ww_rt_mutex.c b/kernel/locking/ww_rt_mutex.c
index c7196de838ed..e07fb3b96bc3 100644
--- a/kernel/locking/ww_rt_mutex.c
+++ b/kernel/locking/ww_rt_mutex.c
@@ -90,6 +90,7 @@ ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
EXPORT_SYMBOL(ww_mutex_lock_interruptible);
void __sched ww_mutex_unlock(struct ww_mutex *lock)
+ __no_context_analysis
{
struct rt_mutex *rtm = &lock->base;