From 64d816cba06c67eeee455b8c78ebcda349d49c24 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Mon, 9 Nov 2015 19:09:21 -0500 Subject: locking/qspinlock: Use _acquire/_release() versions of cmpxchg() & xchg() This patch replaces the cmpxchg() and xchg() calls in the native qspinlock code with the more relaxed _acquire or _release versions of those calls to enable other architectures to adopt queued spinlocks with less memory barrier performance overhead. Signed-off-by: Waiman Long Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Davidlohr Bueso Cc: Douglas Hatch Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Scott J Norton Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1447114167-47185-2-git-send-email-Waiman.Long@hpe.com Signed-off-by: Ingo Molnar --- include/asm-generic/qspinlock.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h index e2aadbc7151f..39e1cb201b8e 100644 --- a/include/asm-generic/qspinlock.h +++ b/include/asm-generic/qspinlock.h @@ -12,8 +12,9 @@ * GNU General Public License for more details. * * (C) Copyright 2013-2015 Hewlett-Packard Development Company, L.P. + * (C) Copyright 2015 Hewlett-Packard Enterprise Development LP * - * Authors: Waiman Long + * Authors: Waiman Long */ #ifndef __ASM_GENERIC_QSPINLOCK_H #define __ASM_GENERIC_QSPINLOCK_H @@ -62,7 +63,7 @@ static __always_inline int queued_spin_is_contended(struct qspinlock *lock) static __always_inline int queued_spin_trylock(struct qspinlock *lock) { if (!atomic_read(&lock->val) && - (atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) == 0)) + (atomic_cmpxchg_acquire(&lock->val, 0, _Q_LOCKED_VAL) == 0)) return 1; return 0; } @@ -77,7 +78,7 @@ static __always_inline void queued_spin_lock(struct qspinlock *lock) { u32 val; - val = atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL); + val = atomic_cmpxchg_acquire(&lock->val, 0, _Q_LOCKED_VAL); if (likely(val == 0)) return; queued_spin_lock_slowpath(lock, val); @@ -93,7 +94,7 @@ static __always_inline void queued_spin_unlock(struct qspinlock *lock) /* * smp_mb__before_atomic() in order to guarantee release semantics */ - smp_mb__before_atomic_dec(); + smp_mb__before_atomic(); atomic_sub(_Q_LOCKED_VAL, &lock->val); } #endif -- cgit v1.2.3 From b3e0b1b6d841a4b2f64fc09ea728913da8218424 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 16 Oct 2015 14:39:38 +0200 Subject: locking, sched: Introduce smp_cond_acquire() and use it Introduce smp_cond_acquire() which combines a control dependency and a read barrier to form acquire semantics. This primitive has two benefits: - it documents control dependencies, - its typically cheaper than using smp_load_acquire() in a loop. Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Mike Galbraith Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/compiler.h | 17 +++++++++++++++++ kernel/locking/qspinlock.c | 3 +-- kernel/sched/core.c | 8 +------- kernel/sched/sched.h | 2 +- 4 files changed, 20 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 4dac1036594f..00b042c49ccd 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -299,6 +299,23 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s __u.__val; \ }) +/** + * smp_cond_acquire() - Spin wait for cond with ACQUIRE ordering + * @cond: boolean expression to wait for + * + * Equivalent to using smp_load_acquire() on the condition variable but employs + * the control dependency of the wait to reduce the barrier on many platforms. + * + * The control dependency provides a LOAD->STORE order, the additional RMB + * provides LOAD->LOAD order, together they provide LOAD->{LOAD,STORE} order, + * aka. ACQUIRE. + */ +#define smp_cond_acquire(cond) do { \ + while (!(cond)) \ + cpu_relax(); \ + smp_rmb(); /* ctrl + rmb := acquire */ \ +} while (0) + #endif /* __KERNEL__ */ #endif /* __ASSEMBLY__ */ diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index 986207887def..ed9d96708f93 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -433,8 +433,7 @@ queue: * */ pv_wait_head(lock, node); - while ((val = smp_load_acquire(&lock->val.counter)) & _Q_LOCKED_PENDING_MASK) - cpu_relax(); + smp_cond_acquire(!((val = atomic_read(&lock->val)) & _Q_LOCKED_PENDING_MASK)); /* * claim the lock: diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 7063c6a07440..9f7862da2cd1 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1968,19 +1968,13 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) /* * If the owning (remote) cpu is still in the middle of schedule() with * this task as prev, wait until its done referencing the task. - */ - while (p->on_cpu) - cpu_relax(); - /* - * Combined with the control dependency above, we have an effective - * smp_load_acquire() without the need for full barriers. * * Pairs with the smp_store_release() in finish_lock_switch(). * * This ensures that tasks getting woken will be fully ordered against * their previous state and preserve Program Order. */ - smp_rmb(); + smp_cond_acquire(!p->on_cpu); p->sched_contributes_to_load = !!task_contributes_to_load(p); p->state = TASK_WAKING; diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index b242775bf670..1e0bb4afe3fd 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1076,7 +1076,7 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) * In particular, the load of prev->state in finish_task_switch() must * happen before this. * - * Pairs with the control dependency and rmb in try_to_wake_up(). + * Pairs with the smp_cond_acquire() in try_to_wake_up(). */ smp_store_release(&prev->on_cpu, 0); #endif -- cgit v1.2.3 From d5a73cadf3fdec95e9518ee5bb91bd0747c42b30 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Tue, 27 Oct 2015 12:53:49 -0700 Subject: lcoking/barriers, arch: Use smp barriers in smp_store_release() With commit b92b8b35a2e ("locking/arch: Rename set_mb() to smp_store_mb()") it was made clear that the context of this call (and thus set_mb) is strictly for CPU ordering, as opposed to IO. As such all archs should use the smp variant of mb(), respecting the semantics and saving a mandatory barrier on UP. Signed-off-by: Davidlohr Bueso Signed-off-by: Peter Zijlstra (Intel) Cc: Cc: Andrew Morton Cc: Benjamin Herrenschmidt Cc: Heiko Carstens Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tony Luck Cc: dave@stgolabs.net Link: http://lkml.kernel.org/r/1445975631-17047-3-git-send-email-dave@stgolabs.net Signed-off-by: Ingo Molnar --- arch/ia64/include/asm/barrier.h | 2 +- arch/powerpc/include/asm/barrier.h | 2 +- arch/s390/include/asm/barrier.h | 2 +- include/asm-generic/barrier.h | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/arch/ia64/include/asm/barrier.h b/arch/ia64/include/asm/barrier.h index df896a1c41d3..209c4b817c95 100644 --- a/arch/ia64/include/asm/barrier.h +++ b/arch/ia64/include/asm/barrier.h @@ -77,7 +77,7 @@ do { \ ___p1; \ }) -#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0) +#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0) /* * The group barrier in front of the rsm & ssm are necessary to ensure diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h index 0eca6efc0631..a7af5fb7b914 100644 --- a/arch/powerpc/include/asm/barrier.h +++ b/arch/powerpc/include/asm/barrier.h @@ -34,7 +34,7 @@ #define rmb() __asm__ __volatile__ ("sync" : : : "memory") #define wmb() __asm__ __volatile__ ("sync" : : : "memory") -#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0) +#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0) #ifdef __SUBARCH_HAS_LWSYNC # define SMPWMB LWSYNC diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h index d68e11e0df5e..7ffd0b19135c 100644 --- a/arch/s390/include/asm/barrier.h +++ b/arch/s390/include/asm/barrier.h @@ -36,7 +36,7 @@ #define smp_mb__before_atomic() smp_mb() #define smp_mb__after_atomic() smp_mb() -#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0) +#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0) #define smp_store_release(p, v) \ do { \ diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h index b42afada1280..0f45f93ef692 100644 --- a/include/asm-generic/barrier.h +++ b/include/asm-generic/barrier.h @@ -93,7 +93,7 @@ #endif /* CONFIG_SMP */ #ifndef smp_store_mb -#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); mb(); } while (0) +#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0) #endif #ifndef smp_mb__before_atomic -- cgit v1.2.3