diff options
Diffstat (limited to 'kernel/locking/qspinlock_paravirt.h')
-rw-r--r-- | kernel/locking/qspinlock_paravirt.h | 141 |
1 files changed, 119 insertions, 22 deletions
diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h index aaeeefb791f8..ace60a451b4f 100644 --- a/kernel/locking/qspinlock_paravirt.h +++ b/kernel/locking/qspinlock_paravirt.h @@ -41,6 +41,89 @@ struct pv_node { }; /* + * By replacing the regular queued_spin_trylock() with the function below, + * it will be called once when a lock waiter enter the PV slowpath before + * being queued. By allowing one lock stealing attempt here when the pending + * bit is off, it helps to reduce the performance impact of lock waiter + * preemption without the drawback of lock starvation. + */ +#define queued_spin_trylock(l) pv_queued_spin_steal_lock(l) +static inline bool pv_queued_spin_steal_lock(struct qspinlock *lock) +{ + struct __qspinlock *l = (void *)lock; + + return !(atomic_read(&lock->val) & _Q_LOCKED_PENDING_MASK) && + (cmpxchg(&l->locked, 0, _Q_LOCKED_VAL) == 0); +} + +/* + * The pending bit is used by the queue head vCPU to indicate that it + * is actively spinning on the lock and no lock stealing is allowed. + */ +#if _Q_PENDING_BITS == 8 +static __always_inline void set_pending(struct qspinlock *lock) +{ + struct __qspinlock *l = (void *)lock; + + WRITE_ONCE(l->pending, 1); +} + +static __always_inline void clear_pending(struct qspinlock *lock) +{ + struct __qspinlock *l = (void *)lock; + + WRITE_ONCE(l->pending, 0); +} + +/* + * The pending bit check in pv_queued_spin_steal_lock() isn't a memory + * barrier. Therefore, an atomic cmpxchg() is used to acquire the lock + * just to be sure that it will get it. + */ +static __always_inline int trylock_clear_pending(struct qspinlock *lock) +{ + struct __qspinlock *l = (void *)lock; + + return !READ_ONCE(l->locked) && + (cmpxchg(&l->locked_pending, _Q_PENDING_VAL, _Q_LOCKED_VAL) + == _Q_PENDING_VAL); +} +#else /* _Q_PENDING_BITS == 8 */ +static __always_inline void set_pending(struct qspinlock *lock) +{ + atomic_set_mask(_Q_PENDING_VAL, &lock->val); +} + +static __always_inline void clear_pending(struct qspinlock *lock) +{ + atomic_clear_mask(_Q_PENDING_VAL, &lock->val); +} + +static __always_inline int trylock_clear_pending(struct qspinlock *lock) +{ + int val = atomic_read(&lock->val); + + for (;;) { + int old, new; + + if (val & _Q_LOCKED_MASK) + break; + + /* + * Try to clear pending bit & set locked bit + */ + old = val; + new = (val & ~_Q_PENDING_MASK) | _Q_LOCKED_VAL; + val = atomic_cmpxchg(&lock->val, old, new); + + if (val == old) + return 1; + } + return 0; +} +#endif /* _Q_PENDING_BITS == 8 */ + +/* * Include queued spinlock statistics code */ #include "qspinlock_stat.h" @@ -202,8 +285,8 @@ static void pv_wait_node(struct mcs_spinlock *node) /* * If pv_kick_node() changed us to vcpu_hashed, retain that - * value so that pv_wait_head() knows to not also try to hash - * this lock. + * value so that pv_wait_head_or_lock() knows to not also try + * to hash this lock. */ cmpxchg(&pn->state, vcpu_halted, vcpu_running); @@ -227,8 +310,9 @@ static void pv_wait_node(struct mcs_spinlock *node) /* * Called after setting next->locked = 1 when we're the lock owner. * - * Instead of waking the waiters stuck in pv_wait_node() advance their state such - * that they're waiting in pv_wait_head(), this avoids a wake/sleep cycle. + * Instead of waking the waiters stuck in pv_wait_node() advance their state + * such that they're waiting in pv_wait_head_or_lock(), this avoids a + * wake/sleep cycle. */ static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node) { @@ -257,10 +341,14 @@ static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node) } /* - * Wait for l->locked to become clear; halt the vcpu after a short spin. + * Wait for l->locked to become clear and acquire the lock; + * halt the vcpu after a short spin. * __pv_queued_spin_unlock() will wake us. + * + * The current value of the lock will be returned for additional processing. */ -static void pv_wait_head(struct qspinlock *lock, struct mcs_spinlock *node) +static u32 +pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node) { struct pv_node *pn = (struct pv_node *)node; struct __qspinlock *l = (void *)lock; @@ -276,11 +364,18 @@ static void pv_wait_head(struct qspinlock *lock, struct mcs_spinlock *node) lp = (struct qspinlock **)1; for (;; waitcnt++) { + /* + * Set the pending bit in the active lock spinning loop to + * disable lock stealing before attempting to acquire the lock. + */ + set_pending(lock); for (loop = SPIN_THRESHOLD; loop; loop--) { - if (!READ_ONCE(l->locked)) - return; + if (trylock_clear_pending(lock)) + goto gotlock; cpu_relax(); } + clear_pending(lock); + if (!lp) { /* ONCE */ lp = pv_hash(lock, pn); @@ -296,36 +391,38 @@ static void pv_wait_head(struct qspinlock *lock, struct mcs_spinlock *node) * * Matches the smp_rmb() in __pv_queued_spin_unlock(). */ - if (!cmpxchg(&l->locked, _Q_LOCKED_VAL, _Q_SLOW_VAL)) { + if (xchg(&l->locked, _Q_SLOW_VAL) == 0) { /* - * The lock is free and _Q_SLOW_VAL has never - * been set. Therefore we need to unhash before - * getting the lock. + * The lock was free and now we own the lock. + * Change the lock value back to _Q_LOCKED_VAL + * and unhash the table. */ + WRITE_ONCE(l->locked, _Q_LOCKED_VAL); WRITE_ONCE(*lp, NULL); - return; + goto gotlock; } } qstat_inc(qstat_pv_wait_head, true); qstat_inc(qstat_pv_wait_again, waitcnt); pv_wait(&l->locked, _Q_SLOW_VAL); - if (!READ_ONCE(l->locked)) - return; /* * The unlocker should have freed the lock before kicking the * CPU. So if the lock is still not free, it is a spurious - * wakeup and so the vCPU should wait again after spinning for - * a while. + * wakeup or another vCPU has stolen the lock. The current + * vCPU should spin again. */ - qstat_inc(qstat_pv_spurious_wakeup, true); + qstat_inc(qstat_pv_spurious_wakeup, READ_ONCE(l->locked)); } /* - * Lock is unlocked now; the caller will acquire it without waiting. - * As with pv_wait_node() we rely on the caller to do a load-acquire - * for us. + * The cmpxchg() or xchg() call before coming here provides the + * acquire semantics for locking. The dummy ORing of _Q_LOCKED_VAL + * here is to indicate to the compiler that the value will always + * be nozero to enable better code optimization. */ +gotlock: + return (u32)(atomic_read(&lock->val) | _Q_LOCKED_VAL); } /* @@ -350,7 +447,7 @@ __pv_queued_spin_unlock_slowpath(struct qspinlock *lock, u8 locked) * so we need a barrier to order the read of the node data in * pv_unhash *after* we've read the lock being _Q_SLOW_VAL. * - * Matches the cmpxchg() in pv_wait_head() setting _Q_SLOW_VAL. + * Matches the cmpxchg() in pv_wait_head_or_lock() setting _Q_SLOW_VAL. */ smp_rmb(); |