From 890658b7ab48d1362a0362df842cecc73c83146f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 23 Aug 2016 13:36:04 +0200 Subject: locking/mutex: Kill arch specific code Its all generic atomic_long_t stuff now. Tested-by: Jason Low Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/alpha/include/asm/mutex.h | 9 --- arch/arc/include/asm/mutex.h | 18 ----- arch/arm/include/asm/mutex.h | 21 ------ arch/arm64/include/asm/Kbuild | 1 - arch/avr32/include/asm/mutex.h | 9 --- arch/blackfin/include/asm/Kbuild | 1 - arch/c6x/include/asm/mutex.h | 6 -- arch/cris/include/asm/mutex.h | 9 --- arch/frv/include/asm/mutex.h | 9 --- arch/h8300/include/asm/mutex.h | 9 --- arch/hexagon/include/asm/mutex.h | 8 --- arch/ia64/include/asm/mutex.h | 90 ------------------------ arch/m32r/include/asm/mutex.h | 9 --- arch/m68k/include/asm/Kbuild | 1 - arch/metag/include/asm/Kbuild | 1 - arch/microblaze/include/asm/mutex.h | 1 - arch/mips/include/asm/Kbuild | 1 - arch/mn10300/include/asm/mutex.h | 16 ----- arch/nios2/include/asm/mutex.h | 1 - arch/openrisc/include/asm/mutex.h | 27 -------- arch/parisc/include/asm/Kbuild | 1 - arch/powerpc/include/asm/mutex.h | 132 ------------------------------------ arch/s390/include/asm/mutex.h | 9 --- arch/score/include/asm/mutex.h | 6 -- arch/sh/include/asm/mutex-llsc.h | 109 ----------------------------- arch/sh/include/asm/mutex.h | 12 ---- arch/sparc/include/asm/Kbuild | 1 - arch/tile/include/asm/Kbuild | 1 - arch/um/include/asm/Kbuild | 1 - arch/unicore32/include/asm/mutex.h | 20 ------ arch/x86/include/asm/mutex.h | 5 -- arch/x86/include/asm/mutex_32.h | 110 ------------------------------ arch/x86/include/asm/mutex_64.h | 127 ---------------------------------- arch/xtensa/include/asm/mutex.h | 9 --- 34 files changed, 790 deletions(-) delete mode 100644 arch/alpha/include/asm/mutex.h delete mode 100644 arch/arc/include/asm/mutex.h delete mode 100644 arch/arm/include/asm/mutex.h delete mode 100644 arch/avr32/include/asm/mutex.h delete mode 100644 arch/c6x/include/asm/mutex.h delete mode 100644 arch/cris/include/asm/mutex.h delete mode 100644 arch/frv/include/asm/mutex.h delete mode 100644 arch/h8300/include/asm/mutex.h delete mode 100644 arch/hexagon/include/asm/mutex.h delete mode 100644 arch/ia64/include/asm/mutex.h delete mode 100644 arch/m32r/include/asm/mutex.h delete mode 100644 arch/microblaze/include/asm/mutex.h delete mode 100644 arch/mn10300/include/asm/mutex.h delete mode 100644 arch/nios2/include/asm/mutex.h delete mode 100644 arch/openrisc/include/asm/mutex.h delete mode 100644 arch/powerpc/include/asm/mutex.h delete mode 100644 arch/s390/include/asm/mutex.h delete mode 100644 arch/score/include/asm/mutex.h delete mode 100644 arch/sh/include/asm/mutex-llsc.h delete mode 100644 arch/sh/include/asm/mutex.h delete mode 100644 arch/unicore32/include/asm/mutex.h delete mode 100644 arch/x86/include/asm/mutex.h delete mode 100644 arch/x86/include/asm/mutex_32.h delete mode 100644 arch/x86/include/asm/mutex_64.h delete mode 100644 arch/xtensa/include/asm/mutex.h (limited to 'arch') diff --git a/arch/alpha/include/asm/mutex.h b/arch/alpha/include/asm/mutex.h deleted file mode 100644 index 458c1f7fbc18..000000000000 --- a/arch/alpha/include/asm/mutex.h +++ /dev/null @@ -1,9 +0,0 @@ -/* - * Pull in the generic implementation for the mutex fastpath. - * - * TODO: implement optimized primitives instead, or leave the generic - * implementation in place, or pick the atomic_xchg() based generic - * implementation. (see asm-generic/mutex-xchg.h for details) - */ - -#include diff --git a/arch/arc/include/asm/mutex.h b/arch/arc/include/asm/mutex.h deleted file mode 100644 index a2f88ff9f506..000000000000 --- a/arch/arc/include/asm/mutex.h +++ /dev/null @@ -1,18 +0,0 @@ -/* - * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -/* - * xchg() based mutex fast path maintains a state of 0 or 1, as opposed to - * atomic dec based which can "count" any number of lock contenders. - * This ideally needs to be fixed in core, but for now switching to dec ver. - */ -#if defined(CONFIG_SMP) && (CONFIG_NR_CPUS > 2) -#include -#else -#include -#endif diff --git a/arch/arm/include/asm/mutex.h b/arch/arm/include/asm/mutex.h deleted file mode 100644 index 87c044910fe0..000000000000 --- a/arch/arm/include/asm/mutex.h +++ /dev/null @@ -1,21 +0,0 @@ -/* - * arch/arm/include/asm/mutex.h - * - * ARM optimized mutex locking primitives - * - * Please look into asm-generic/mutex-xchg.h for a formal definition. - */ -#ifndef _ASM_MUTEX_H -#define _ASM_MUTEX_H -/* - * On pre-ARMv6 hardware this results in a swp-based implementation, - * which is the most efficient. For ARMv6+, we have exclusive memory - * accessors and use atomic_dec to avoid the extra xchg operations - * on the locking slowpaths. - */ -#if __LINUX_ARM_ARCH__ < 6 -#include -#else -#include -#endif -#endif /* _ASM_MUTEX_H */ diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild index 44e1d7f10add..b4ab238a59ec 100644 --- a/arch/arm64/include/asm/Kbuild +++ b/arch/arm64/include/asm/Kbuild @@ -24,7 +24,6 @@ generic-y += mm-arch-hooks.h generic-y += mman.h generic-y += msgbuf.h generic-y += msi.h -generic-y += mutex.h generic-y += poll.h generic-y += preempt.h generic-y += resource.h diff --git a/arch/avr32/include/asm/mutex.h b/arch/avr32/include/asm/mutex.h deleted file mode 100644 index 458c1f7fbc18..000000000000 --- a/arch/avr32/include/asm/mutex.h +++ /dev/null @@ -1,9 +0,0 @@ -/* - * Pull in the generic implementation for the mutex fastpath. - * - * TODO: implement optimized primitives instead, or leave the generic - * implementation in place, or pick the atomic_xchg() based generic - * implementation. (see asm-generic/mutex-xchg.h for details) - */ - -#include diff --git a/arch/blackfin/include/asm/Kbuild b/arch/blackfin/include/asm/Kbuild index 91d49c0a3118..2fb67b59d188 100644 --- a/arch/blackfin/include/asm/Kbuild +++ b/arch/blackfin/include/asm/Kbuild @@ -24,7 +24,6 @@ generic-y += mcs_spinlock.h generic-y += mm-arch-hooks.h generic-y += mman.h generic-y += msgbuf.h -generic-y += mutex.h generic-y += param.h generic-y += percpu.h generic-y += pgalloc.h diff --git a/arch/c6x/include/asm/mutex.h b/arch/c6x/include/asm/mutex.h deleted file mode 100644 index 7a7248e0462d..000000000000 --- a/arch/c6x/include/asm/mutex.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _ASM_C6X_MUTEX_H -#define _ASM_C6X_MUTEX_H - -#include - -#endif /* _ASM_C6X_MUTEX_H */ diff --git a/arch/cris/include/asm/mutex.h b/arch/cris/include/asm/mutex.h deleted file mode 100644 index 458c1f7fbc18..000000000000 --- a/arch/cris/include/asm/mutex.h +++ /dev/null @@ -1,9 +0,0 @@ -/* - * Pull in the generic implementation for the mutex fastpath. - * - * TODO: implement optimized primitives instead, or leave the generic - * implementation in place, or pick the atomic_xchg() based generic - * implementation. (see asm-generic/mutex-xchg.h for details) - */ - -#include diff --git a/arch/frv/include/asm/mutex.h b/arch/frv/include/asm/mutex.h deleted file mode 100644 index 458c1f7fbc18..000000000000 --- a/arch/frv/include/asm/mutex.h +++ /dev/null @@ -1,9 +0,0 @@ -/* - * Pull in the generic implementation for the mutex fastpath. - * - * TODO: implement optimized primitives instead, or leave the generic - * implementation in place, or pick the atomic_xchg() based generic - * implementation. (see asm-generic/mutex-xchg.h for details) - */ - -#include diff --git a/arch/h8300/include/asm/mutex.h b/arch/h8300/include/asm/mutex.h deleted file mode 100644 index 458c1f7fbc18..000000000000 --- a/arch/h8300/include/asm/mutex.h +++ /dev/null @@ -1,9 +0,0 @@ -/* - * Pull in the generic implementation for the mutex fastpath. - * - * TODO: implement optimized primitives instead, or leave the generic - * implementation in place, or pick the atomic_xchg() based generic - * implementation. (see asm-generic/mutex-xchg.h for details) - */ - -#include diff --git a/arch/hexagon/include/asm/mutex.h b/arch/hexagon/include/asm/mutex.h deleted file mode 100644 index 58b52de1bc22..000000000000 --- a/arch/hexagon/include/asm/mutex.h +++ /dev/null @@ -1,8 +0,0 @@ -/* - * Pull in the generic implementation for the mutex fastpath. - * - * TODO: implement optimized primitives instead, or leave the generic - * implementation in place, or pick the atomic_xchg() based generic - * implementation. (see asm-generic/mutex-xchg.h for details) - */ -#include diff --git a/arch/ia64/include/asm/mutex.h b/arch/ia64/include/asm/mutex.h deleted file mode 100644 index 28cb819e0ff9..000000000000 --- a/arch/ia64/include/asm/mutex.h +++ /dev/null @@ -1,90 +0,0 @@ -/* - * ia64 implementation of the mutex fastpath. - * - * Copyright (C) 2006 Ken Chen - * - */ - -#ifndef _ASM_MUTEX_H -#define _ASM_MUTEX_H - -/** - * __mutex_fastpath_lock - try to take the lock by moving the count - * from 1 to a 0 value - * @count: pointer of type atomic_t - * @fail_fn: function to call if the original value was not 1 - * - * Change the count from 1 to a value lower than 1, and call if - * it wasn't 1 originally. This function MUST leave the value lower than - * 1 even when the "1" assertion wasn't true. - */ -static inline void -__mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *)) -{ - if (unlikely(ia64_fetchadd4_acq(count, -1) != 1)) - fail_fn(count); -} - -/** - * __mutex_fastpath_lock_retval - try to take the lock by moving the count - * from 1 to a 0 value - * @count: pointer of type atomic_t - * - * Change the count from 1 to a value lower than 1. This function returns 0 - * if the fastpath succeeds, or -1 otherwise. - */ -static inline int -__mutex_fastpath_lock_retval(atomic_t *count) -{ - if (unlikely(ia64_fetchadd4_acq(count, -1) != 1)) - return -1; - return 0; -} - -/** - * __mutex_fastpath_unlock - try to promote the count from 0 to 1 - * @count: pointer of type atomic_t - * @fail_fn: function to call if the original value was not 0 - * - * Try to promote the count from 0 to 1. If it wasn't 0, call . - * In the failure case, this function is allowed to either set the value to - * 1, or to set it to a value lower than 1. - * - * If the implementation sets it to a value of lower than 1, then the - * __mutex_slowpath_needs_to_unlock() macro needs to return 1, it needs - * to return 0 otherwise. - */ -static inline void -__mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *)) -{ - int ret = ia64_fetchadd4_rel(count, 1); - if (unlikely(ret < 0)) - fail_fn(count); -} - -#define __mutex_slowpath_needs_to_unlock() 1 - -/** - * __mutex_fastpath_trylock - try to acquire the mutex, without waiting - * - * @count: pointer of type atomic_t - * @fail_fn: fallback function - * - * Change the count from 1 to a value lower than 1, and return 0 (failure) - * if it wasn't 1 originally, or return 1 (success) otherwise. This function - * MUST leave the value lower than 1 even when the "1" assertion wasn't true. - * Additionally, if the value was < 0 originally, this function must not leave - * it to 0 on failure. - * - * If the architecture has no effective trylock variant, it should call the - * spinlock-based trylock variant unconditionally. - */ -static inline int -__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *)) -{ - if (atomic_read(count) == 1 && cmpxchg_acq(count, 1, 0) == 1) - return 1; - return 0; -} - -#endif diff --git a/arch/m32r/include/asm/mutex.h b/arch/m32r/include/asm/mutex.h deleted file mode 100644 index 458c1f7fbc18..000000000000 --- a/arch/m32r/include/asm/mutex.h +++ /dev/null @@ -1,9 +0,0 @@ -/* - * Pull in the generic implementation for the mutex fastpath. - * - * TODO: implement optimized primitives instead, or leave the generic - * implementation in place, or pick the atomic_xchg() based generic - * implementation. (see asm-generic/mutex-xchg.h for details) - */ - -#include diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild index eb85bd9c6180..1f2e5d31cb24 100644 --- a/arch/m68k/include/asm/Kbuild +++ b/arch/m68k/include/asm/Kbuild @@ -20,7 +20,6 @@ generic-y += local64.h generic-y += mcs_spinlock.h generic-y += mm-arch-hooks.h generic-y += mman.h -generic-y += mutex.h generic-y += percpu.h generic-y += preempt.h generic-y += resource.h diff --git a/arch/metag/include/asm/Kbuild b/arch/metag/include/asm/Kbuild index 29acb89daaaa..167150c701d1 100644 --- a/arch/metag/include/asm/Kbuild +++ b/arch/metag/include/asm/Kbuild @@ -27,7 +27,6 @@ generic-y += local64.h generic-y += mcs_spinlock.h generic-y += mm-arch-hooks.h generic-y += msgbuf.h -generic-y += mutex.h generic-y += param.h generic-y += pci.h generic-y += percpu.h diff --git a/arch/microblaze/include/asm/mutex.h b/arch/microblaze/include/asm/mutex.h deleted file mode 100644 index ff6101aa2c71..000000000000 --- a/arch/microblaze/include/asm/mutex.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild index 9740066cc631..3269b742a75e 100644 --- a/arch/mips/include/asm/Kbuild +++ b/arch/mips/include/asm/Kbuild @@ -9,7 +9,6 @@ generic-y += irq_work.h generic-y += local64.h generic-y += mcs_spinlock.h generic-y += mm-arch-hooks.h -generic-y += mutex.h generic-y += parport.h generic-y += percpu.h generic-y += preempt.h diff --git a/arch/mn10300/include/asm/mutex.h b/arch/mn10300/include/asm/mutex.h deleted file mode 100644 index 84f5490c6fb4..000000000000 --- a/arch/mn10300/include/asm/mutex.h +++ /dev/null @@ -1,16 +0,0 @@ -/* MN10300 Mutex fastpath - * - * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. - * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public Licence - * as published by the Free Software Foundation; either version - * 2 of the Licence, or (at your option) any later version. - * - * - * TODO: implement optimized primitives instead, or leave the generic - * implementation in place, or pick the atomic_xchg() based generic - * implementation. (see asm-generic/mutex-xchg.h for details) - */ -#include diff --git a/arch/nios2/include/asm/mutex.h b/arch/nios2/include/asm/mutex.h deleted file mode 100644 index ff6101aa2c71..000000000000 --- a/arch/nios2/include/asm/mutex.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/openrisc/include/asm/mutex.h b/arch/openrisc/include/asm/mutex.h deleted file mode 100644 index b85a0cfa9fc9..000000000000 --- a/arch/openrisc/include/asm/mutex.h +++ /dev/null @@ -1,27 +0,0 @@ -/* - * OpenRISC Linux - * - * Linux architectural port borrowing liberally from similar works of - * others. All original copyrights apply as per the original source - * declaration. - * - * OpenRISC implementation: - * Copyright (C) 2003 Matjaz Breskvar - * Copyright (C) 2010-2011 Jonas Bonn - * et al. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ - -/* - * Pull in the generic implementation for the mutex fastpath. - * - * TODO: implement optimized primitives instead, or leave the generic - * implementation in place, or pick the atomic_xchg() based generic - * implementation. (see asm-generic/mutex-xchg.h for details) - */ - -#include diff --git a/arch/parisc/include/asm/Kbuild b/arch/parisc/include/asm/Kbuild index f9b3a81aefcd..91f53c07f410 100644 --- a/arch/parisc/include/asm/Kbuild +++ b/arch/parisc/include/asm/Kbuild @@ -16,7 +16,6 @@ generic-y += local.h generic-y += local64.h generic-y += mcs_spinlock.h generic-y += mm-arch-hooks.h -generic-y += mutex.h generic-y += param.h generic-y += percpu.h generic-y += poll.h diff --git a/arch/powerpc/include/asm/mutex.h b/arch/powerpc/include/asm/mutex.h deleted file mode 100644 index 078155fa1189..000000000000 --- a/arch/powerpc/include/asm/mutex.h +++ /dev/null @@ -1,132 +0,0 @@ -/* - * Optimised mutex implementation of include/asm-generic/mutex-dec.h algorithm - */ -#ifndef _ASM_POWERPC_MUTEX_H -#define _ASM_POWERPC_MUTEX_H - -static inline int __mutex_cmpxchg_lock(atomic_t *v, int old, int new) -{ - int t; - - __asm__ __volatile__ ( -"1: lwarx %0,0,%1 # mutex trylock\n\ - cmpw 0,%0,%2\n\ - bne- 2f\n" - PPC405_ERR77(0,%1) -" stwcx. %3,0,%1\n\ - bne- 1b" - PPC_ACQUIRE_BARRIER - "\n\ -2:" - : "=&r" (t) - : "r" (&v->counter), "r" (old), "r" (new) - : "cc", "memory"); - - return t; -} - -static inline int __mutex_dec_return_lock(atomic_t *v) -{ - int t; - - __asm__ __volatile__( -"1: lwarx %0,0,%1 # mutex lock\n\ - addic %0,%0,-1\n" - PPC405_ERR77(0,%1) -" stwcx. %0,0,%1\n\ - bne- 1b" - PPC_ACQUIRE_BARRIER - : "=&r" (t) - : "r" (&v->counter) - : "cc", "memory"); - - return t; -} - -static inline int __mutex_inc_return_unlock(atomic_t *v) -{ - int t; - - __asm__ __volatile__( - PPC_RELEASE_BARRIER -"1: lwarx %0,0,%1 # mutex unlock\n\ - addic %0,%0,1\n" - PPC405_ERR77(0,%1) -" stwcx. %0,0,%1 \n\ - bne- 1b" - : "=&r" (t) - : "r" (&v->counter) - : "cc", "memory"); - - return t; -} - -/** - * __mutex_fastpath_lock - try to take the lock by moving the count - * from 1 to a 0 value - * @count: pointer of type atomic_t - * @fail_fn: function to call if the original value was not 1 - * - * Change the count from 1 to a value lower than 1, and call if - * it wasn't 1 originally. This function MUST leave the value lower than - * 1 even when the "1" assertion wasn't true. - */ -static inline void -__mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *)) -{ - if (unlikely(__mutex_dec_return_lock(count) < 0)) - fail_fn(count); -} - -/** - * __mutex_fastpath_lock_retval - try to take the lock by moving the count - * from 1 to a 0 value - * @count: pointer of type atomic_t - * - * Change the count from 1 to a value lower than 1. This function returns 0 - * if the fastpath succeeds, or -1 otherwise. - */ -static inline int -__mutex_fastpath_lock_retval(atomic_t *count) -{ - if (unlikely(__mutex_dec_return_lock(count) < 0)) - return -1; - return 0; -} - -/** - * __mutex_fastpath_unlock - try to promote the count from 0 to 1 - * @count: pointer of type atomic_t - * @fail_fn: function to call if the original value was not 0 - * - * Try to promote the count from 0 to 1. If it wasn't 0, call . - * In the failure case, this function is allowed to either set the value to - * 1, or to set it to a value lower than 1. - */ -static inline void -__mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *)) -{ - if (unlikely(__mutex_inc_return_unlock(count) <= 0)) - fail_fn(count); -} - -#define __mutex_slowpath_needs_to_unlock() 1 - -/** - * __mutex_fastpath_trylock - try to acquire the mutex, without waiting - * - * @count: pointer of type atomic_t - * @fail_fn: fallback function - * - * Change the count from 1 to 0, and return 1 (success), or if the count - * was not 1, then return 0 (failure). - */ -static inline int -__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *)) -{ - if (likely(atomic_read(count) == 1 && __mutex_cmpxchg_lock(count, 1, 0) == 1)) - return 1; - return 0; -} - -#endif diff --git a/arch/s390/include/asm/mutex.h b/arch/s390/include/asm/mutex.h deleted file mode 100644 index 458c1f7fbc18..000000000000 --- a/arch/s390/include/asm/mutex.h +++ /dev/null @@ -1,9 +0,0 @@ -/* - * Pull in the generic implementation for the mutex fastpath. - * - * TODO: implement optimized primitives instead, or leave the generic - * implementation in place, or pick the atomic_xchg() based generic - * implementation. (see asm-generic/mutex-xchg.h for details) - */ - -#include diff --git a/arch/score/include/asm/mutex.h b/arch/score/include/asm/mutex.h deleted file mode 100644 index 10d48fe4db97..000000000000 --- a/arch/score/include/asm/mutex.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _ASM_SCORE_MUTEX_H -#define _ASM_SCORE_MUTEX_H - -#include - -#endif /* _ASM_SCORE_MUTEX_H */ diff --git a/arch/sh/include/asm/mutex-llsc.h b/arch/sh/include/asm/mutex-llsc.h deleted file mode 100644 index dad29b687bd3..000000000000 --- a/arch/sh/include/asm/mutex-llsc.h +++ /dev/null @@ -1,109 +0,0 @@ -/* - * arch/sh/include/asm/mutex-llsc.h - * - * SH-4A optimized mutex locking primitives - * - * Please look into asm-generic/mutex-xchg.h for a formal definition. - */ -#ifndef __ASM_SH_MUTEX_LLSC_H -#define __ASM_SH_MUTEX_LLSC_H - -/* - * Attempting to lock a mutex on SH4A is done like in ARMv6+ architecure. - * with a bastardized atomic decrement (it is not a reliable atomic decrement - * but it satisfies the defined semantics for our purpose, while being - * smaller and faster than a real atomic decrement or atomic swap. - * The idea is to attempt decrementing the lock value only once. If once - * decremented it isn't zero, or if its store-back fails due to a dispute - * on the exclusive store, we simply bail out immediately through the slow - * path where the lock will be reattempted until it succeeds. - */ -static inline void -__mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *)) -{ - int __done, __res; - - __asm__ __volatile__ ( - "movli.l @%2, %0 \n" - "add #-1, %0 \n" - "movco.l %0, @%2 \n" - "movt %1 \n" - : "=&z" (__res), "=&r" (__done) - : "r" (&(count)->counter) - : "t"); - - if (unlikely(!__done || __res != 0)) - fail_fn(count); -} - -static inline int -__mutex_fastpath_lock_retval(atomic_t *count) -{ - int __done, __res; - - __asm__ __volatile__ ( - "movli.l @%2, %0 \n" - "add #-1, %0 \n" - "movco.l %0, @%2 \n" - "movt %1 \n" - : "=&z" (__res), "=&r" (__done) - : "r" (&(count)->counter) - : "t"); - - if (unlikely(!__done || __res != 0)) - __res = -1; - - return __res; -} - -static inline void -__mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *)) -{ - int __done, __res; - - __asm__ __volatile__ ( - "movli.l @%2, %0 \n\t" - "add #1, %0 \n\t" - "movco.l %0, @%2 \n\t" - "movt %1 \n\t" - : "=&z" (__res), "=&r" (__done) - : "r" (&(count)->counter) - : "t"); - - if (unlikely(!__done || __res <= 0)) - fail_fn(count); -} - -/* - * If the unlock was done on a contended lock, or if the unlock simply fails - * then the mutex remains locked. - */ -#define __mutex_slowpath_needs_to_unlock() 1 - -/* - * For __mutex_fastpath_trylock we do an atomic decrement and check the - * result and put it in the __res variable. - */ -static inline int -__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *)) -{ - int __res, __orig; - - __asm__ __volatile__ ( - "1: movli.l @%2, %0 \n\t" - "dt %0 \n\t" - "movco.l %0,@%2 \n\t" - "bf 1b \n\t" - "cmp/eq #0,%0 \n\t" - "bt 2f \n\t" - "mov #0, %1 \n\t" - "bf 3f \n\t" - "2: mov #1, %1 \n\t" - "3: " - : "=&z" (__orig), "=&r" (__res) - : "r" (&count->counter) - : "t"); - - return __res; -} -#endif /* __ASM_SH_MUTEX_LLSC_H */ diff --git a/arch/sh/include/asm/mutex.h b/arch/sh/include/asm/mutex.h deleted file mode 100644 index d8e37716a4a0..000000000000 --- a/arch/sh/include/asm/mutex.h +++ /dev/null @@ -1,12 +0,0 @@ -/* - * Pull in the generic implementation for the mutex fastpath. - * - * TODO: implement optimized primitives instead, or leave the generic - * implementation in place, or pick the atomic_xchg() based generic - * implementation. (see asm-generic/mutex-xchg.h for details) - */ -#if defined(CONFIG_CPU_SH4A) -#include -#else -#include -#endif diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild index cfc918067f80..0569bfac4afb 100644 --- a/arch/sparc/include/asm/Kbuild +++ b/arch/sparc/include/asm/Kbuild @@ -15,7 +15,6 @@ generic-y += local64.h generic-y += mcs_spinlock.h generic-y += mm-arch-hooks.h generic-y += module.h -generic-y += mutex.h generic-y += preempt.h generic-y += rwsem.h generic-y += serial.h diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild index ba35c41c71ff..2d1f5638974c 100644 --- a/arch/tile/include/asm/Kbuild +++ b/arch/tile/include/asm/Kbuild @@ -21,7 +21,6 @@ generic-y += local64.h generic-y += mcs_spinlock.h generic-y += mm-arch-hooks.h generic-y += msgbuf.h -generic-y += mutex.h generic-y += param.h generic-y += parport.h generic-y += poll.h diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild index 904f3ebf4220..052f7f6d0551 100644 --- a/arch/um/include/asm/Kbuild +++ b/arch/um/include/asm/Kbuild @@ -17,7 +17,6 @@ generic-y += irq_work.h generic-y += kdebug.h generic-y += mcs_spinlock.h generic-y += mm-arch-hooks.h -generic-y += mutex.h generic-y += param.h generic-y += pci.h generic-y += percpu.h diff --git a/arch/unicore32/include/asm/mutex.h b/arch/unicore32/include/asm/mutex.h deleted file mode 100644 index fab7d0e8adf6..000000000000 --- a/arch/unicore32/include/asm/mutex.h +++ /dev/null @@ -1,20 +0,0 @@ -/* - * linux/arch/unicore32/include/asm/mutex.h - * - * Code specific to PKUnity SoC and UniCore ISA - * - * Copyright (C) 2001-2010 GUAN Xue-tao - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * UniCore optimized mutex locking primitives - * - * Please look into asm-generic/mutex-xchg.h for a formal definition. - */ -#ifndef __UNICORE_MUTEX_H__ -#define __UNICORE_MUTEX_H__ - -# include -#endif diff --git a/arch/x86/include/asm/mutex.h b/arch/x86/include/asm/mutex.h deleted file mode 100644 index 7d3a48275394..000000000000 --- a/arch/x86/include/asm/mutex.h +++ /dev/null @@ -1,5 +0,0 @@ -#ifdef CONFIG_X86_32 -# include -#else -# include -#endif diff --git a/arch/x86/include/asm/mutex_32.h b/arch/x86/include/asm/mutex_32.h deleted file mode 100644 index e9355a84fc67..000000000000 --- a/arch/x86/include/asm/mutex_32.h +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Assembly implementation of the mutex fastpath, based on atomic - * decrement/increment. - * - * started by Ingo Molnar: - * - * Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar - */ -#ifndef _ASM_X86_MUTEX_32_H -#define _ASM_X86_MUTEX_32_H - -#include - -/** - * __mutex_fastpath_lock - try to take the lock by moving the count - * from 1 to a 0 value - * @count: pointer of type atomic_t - * @fn: function to call if the original value was not 1 - * - * Change the count from 1 to a value lower than 1, and call if it - * wasn't 1 originally. This function MUST leave the value lower than 1 - * even when the "1" assertion wasn't true. - */ -#define __mutex_fastpath_lock(count, fail_fn) \ -do { \ - unsigned int dummy; \ - \ - typecheck(atomic_t *, count); \ - typecheck_fn(void (*)(atomic_t *), fail_fn); \ - \ - asm volatile(LOCK_PREFIX " decl (%%eax)\n" \ - " jns 1f \n" \ - " call " #fail_fn "\n" \ - "1:\n" \ - : "=a" (dummy) \ - : "a" (count) \ - : "memory", "ecx", "edx"); \ -} while (0) - - -/** - * __mutex_fastpath_lock_retval - try to take the lock by moving the count - * from 1 to a 0 value - * @count: pointer of type atomic_t - * - * Change the count from 1 to a value lower than 1. This function returns 0 - * if the fastpath succeeds, or -1 otherwise. - */ -static inline int __mutex_fastpath_lock_retval(atomic_t *count) -{ - if (unlikely(atomic_dec_return(count) < 0)) - return -1; - else - return 0; -} - -/** - * __mutex_fastpath_unlock - try to promote the mutex from 0 to 1 - * @count: pointer of type atomic_t - * @fail_fn: function to call if the original value was not 0 - * - * try to promote the mutex from 0 to 1. if it wasn't 0, call . - * In the failure case, this function is allowed to either set the value - * to 1, or to set it to a value lower than 1. - * - * If the implementation sets it to a value of lower than 1, the - * __mutex_slowpath_needs_to_unlock() macro needs to return 1, it needs - * to return 0 otherwise. - */ -#define __mutex_fastpath_unlock(count, fail_fn) \ -do { \ - unsigned int dummy; \ - \ - typecheck(atomic_t *, count); \ - typecheck_fn(void (*)(atomic_t *), fail_fn); \ - \ - asm volatile(LOCK_PREFIX " incl (%%eax)\n" \ - " jg 1f\n" \ - " call " #fail_fn "\n" \ - "1:\n" \ - : "=a" (dummy) \ - : "a" (count) \ - : "memory", "ecx", "edx"); \ -} while (0) - -#define __mutex_slowpath_needs_to_unlock() 1 - -/** - * __mutex_fastpath_trylock - try to acquire the mutex, without waiting - * - * @count: pointer of type atomic_t - * @fail_fn: fallback function - * - * Change the count from 1 to a value lower than 1, and return 0 (failure) - * if it wasn't 1 originally, or return 1 (success) otherwise. This function - * MUST leave the value lower than 1 even when the "1" assertion wasn't true. - * Additionally, if the value was < 0 originally, this function must not leave - * it to 0 on failure. - */ -static inline int __mutex_fastpath_trylock(atomic_t *count, - int (*fail_fn)(atomic_t *)) -{ - /* cmpxchg because it never induces a false contention state. */ - if (likely(atomic_read(count) == 1 && atomic_cmpxchg(count, 1, 0) == 1)) - return 1; - - return 0; -} - -#endif /* _ASM_X86_MUTEX_32_H */ diff --git a/arch/x86/include/asm/mutex_64.h b/arch/x86/include/asm/mutex_64.h deleted file mode 100644 index d9850758464e..000000000000 --- a/arch/x86/include/asm/mutex_64.h +++ /dev/null @@ -1,127 +0,0 @@ -/* - * Assembly implementation of the mutex fastpath, based on atomic - * decrement/increment. - * - * started by Ingo Molnar: - * - * Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar - */ -#ifndef _ASM_X86_MUTEX_64_H -#define _ASM_X86_MUTEX_64_H - -/** - * __mutex_fastpath_lock - decrement and call function if negative - * @v: pointer of type atomic_t - * @fail_fn: function to call if the result is negative - * - * Atomically decrements @v and calls if the result is negative. - */ -#ifdef CC_HAVE_ASM_GOTO -static inline void __mutex_fastpath_lock(atomic_t *v, - void (*fail_fn)(atomic_t *)) -{ - asm_volatile_goto(LOCK_PREFIX " decl %0\n" - " jns %l[exit]\n" - : : "m" (v->counter) - : "memory", "cc" - : exit); - fail_fn(v); -exit: - return; -} -#else -#define __mutex_fastpath_lock(v, fail_fn) \ -do { \ - unsigned long dummy; \ - \ - typecheck(atomic_t *, v); \ - typecheck_fn(void (*)(atomic_t *), fail_fn); \ - \ - asm volatile(LOCK_PREFIX " decl (%%rdi)\n" \ - " jns 1f \n" \ - " call " #fail_fn "\n" \ - "1:" \ - : "=D" (dummy) \ - : "D" (v) \ - : "rax", "rsi", "rdx", "rcx", \ - "r8", "r9", "r10", "r11", "memory"); \ -} while (0) -#endif - -/** - * __mutex_fastpath_lock_retval - try to take the lock by moving the count - * from 1 to a 0 value - * @count: pointer of type atomic_t - * - * Change the count from 1 to a value lower than 1. This function returns 0 - * if the fastpath succeeds, or -1 otherwise. - */ -static inline int __mutex_fastpath_lock_retval(atomic_t *count) -{ - if (unlikely(atomic_dec_return(count) < 0)) - return -1; - else - return 0; -} - -/** - * __mutex_fastpath_unlock - increment and call function if nonpositive - * @v: pointer of type atomic_t - * @fail_fn: function to call if the result is nonpositive - * - * Atomically increments @v and calls if the result is nonpositive. - */ -#ifdef CC_HAVE_ASM_GOTO -static inline void __mutex_fastpath_unlock(atomic_t *v, - void (*fail_fn)(atomic_t *)) -{ - asm_volatile_goto(LOCK_PREFIX " incl %0\n" - " jg %l[exit]\n" - : : "m" (v->counter) - : "memory", "cc" - : exit); - fail_fn(v); -exit: - return; -} -#else -#define __mutex_fastpath_unlock(v, fail_fn) \ -do { \ - unsigned long dummy; \ - \ - typecheck(atomic_t *, v); \ - typecheck_fn(void (*)(atomic_t *), fail_fn); \ - \ - asm volatile(LOCK_PREFIX " incl (%%rdi)\n" \ - " jg 1f\n" \ - " call " #fail_fn "\n" \ - "1:" \ - : "=D" (dummy) \ - : "D" (v) \ - : "rax", "rsi", "rdx", "rcx", \ - "r8", "r9", "r10", "r11", "memory"); \ -} while (0) -#endif - -#define __mutex_slowpath_needs_to_unlock() 1 - -/** - * __mutex_fastpath_trylock - try to acquire the mutex, without waiting - * - * @count: pointer of type atomic_t - * @fail_fn: fallback function - * - * Change the count from 1 to 0 and return 1 (success), or return 0 (failure) - * if it wasn't 1 originally. [the fallback function is never used on - * x86_64, because all x86_64 CPUs have a CMPXCHG instruction.] - */ -static inline int __mutex_fastpath_trylock(atomic_t *count, - int (*fail_fn)(atomic_t *)) -{ - if (likely(atomic_read(count) == 1 && atomic_cmpxchg(count, 1, 0) == 1)) - return 1; - - return 0; -} - -#endif /* _ASM_X86_MUTEX_64_H */ diff --git a/arch/xtensa/include/asm/mutex.h b/arch/xtensa/include/asm/mutex.h deleted file mode 100644 index 458c1f7fbc18..000000000000 --- a/arch/xtensa/include/asm/mutex.h +++ /dev/null @@ -1,9 +0,0 @@ -/* - * Pull in the generic implementation for the mutex fastpath. - * - * TODO: implement optimized primitives instead, or leave the generic - * implementation in place, or pick the atomic_xchg() based generic - * implementation. (see asm-generic/mutex-xchg.h for details) - */ - -#include -- cgit v1.2.3 From 79ab11cdb90d8536817ab7357ecb6b1ff76be26c Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Tue, 25 Oct 2016 11:03:11 +0200 Subject: locking/core: Introduce cpu_relax_yield() For spinning loops people do often use barrier() or cpu_relax(). For most architectures cpu_relax and barrier are the same, but on some architectures cpu_relax can add some latency. For example on power,sparc64 and arc, cpu_relax can shift the CPU towards other hardware threads in an SMT environment. On s390 cpu_relax does even more, it uses an hypercall to the hypervisor to give up the timeslice. In contrast to the SMT yielding this can result in larger latencies. In some places this latency is unwanted, so another variant "cpu_relax_lowlatency" was introduced. Before this is used in more and more places, lets revert the logic and provide a cpu_relax_yield that can be called in places where yielding is more important than latency. By default this is the same as cpu_relax on all architectures. Signed-off-by: Christian Borntraeger Signed-off-by: Peter Zijlstra (Intel) Cc: Catalin Marinas Cc: Heiko Carstens Cc: Linus Torvalds Cc: Martin Schwidefsky Cc: Nicholas Piggin Cc: Noam Camus Cc: Peter Zijlstra Cc: Russell King Cc: Thomas Gleixner Cc: Will Deacon Cc: linuxppc-dev@lists.ozlabs.org Cc: virtualization@lists.linux-foundation.org Cc: xen-devel@lists.xenproject.org Link: http://lkml.kernel.org/r/1477386195-32736-2-git-send-email-borntraeger@de.ibm.com Signed-off-by: Ingo Molnar --- arch/alpha/include/asm/processor.h | 1 + arch/arc/include/asm/processor.h | 2 ++ arch/arm/include/asm/processor.h | 1 + arch/arm64/include/asm/processor.h | 1 + arch/avr32/include/asm/processor.h | 1 + arch/blackfin/include/asm/processor.h | 1 + arch/c6x/include/asm/processor.h | 1 + arch/cris/include/asm/processor.h | 1 + arch/frv/include/asm/processor.h | 1 + arch/h8300/include/asm/processor.h | 1 + arch/hexagon/include/asm/processor.h | 1 + arch/ia64/include/asm/processor.h | 1 + arch/m32r/include/asm/processor.h | 1 + arch/m68k/include/asm/processor.h | 1 + arch/metag/include/asm/processor.h | 1 + arch/microblaze/include/asm/processor.h | 1 + arch/mips/include/asm/processor.h | 1 + arch/mn10300/include/asm/processor.h | 1 + arch/nios2/include/asm/processor.h | 1 + arch/openrisc/include/asm/processor.h | 1 + arch/parisc/include/asm/processor.h | 1 + arch/powerpc/include/asm/processor.h | 1 + arch/s390/include/asm/processor.h | 3 ++- arch/s390/kernel/processor.c | 4 ++-- arch/score/include/asm/processor.h | 1 + arch/sh/include/asm/processor.h | 1 + arch/sparc/include/asm/processor_32.h | 1 + arch/sparc/include/asm/processor_64.h | 1 + arch/tile/include/asm/processor.h | 1 + arch/unicore32/include/asm/processor.h | 1 + arch/x86/include/asm/processor.h | 1 + arch/x86/um/asm/processor.h | 1 + arch/xtensa/include/asm/processor.h | 1 + 33 files changed, 36 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/alpha/include/asm/processor.h b/arch/alpha/include/asm/processor.h index 43a7559c448b..0556fda7bc6d 100644 --- a/arch/alpha/include/asm/processor.h +++ b/arch/alpha/include/asm/processor.h @@ -58,6 +58,7 @@ unsigned long get_wchan(struct task_struct *p); ((tsk) == current ? rdusp() : task_thread_info(tsk)->pcb.usp) #define cpu_relax() barrier() +#define cpu_relax_yield() cpu_relax() #define cpu_relax_lowlatency() cpu_relax() #define ARCH_HAS_PREFETCH diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h index 16b630fbeb6a..6c158d576355 100644 --- a/arch/arc/include/asm/processor.h +++ b/arch/arc/include/asm/processor.h @@ -60,6 +60,7 @@ struct task_struct; #ifndef CONFIG_EZNPS_MTM_EXT #define cpu_relax() barrier() +#define cpu_relax_yield() cpu_relax() #define cpu_relax_lowlatency() cpu_relax() #else @@ -67,6 +68,7 @@ struct task_struct; #define cpu_relax() \ __asm__ __volatile__ (".word %0" : : "i"(CTOP_INST_SCHD_RW) : "memory") +#define cpu_relax_yield() cpu_relax() #define cpu_relax_lowlatency() barrier() #endif diff --git a/arch/arm/include/asm/processor.h b/arch/arm/include/asm/processor.h index 8a1e8e995dae..db660e0b4bd3 100644 --- a/arch/arm/include/asm/processor.h +++ b/arch/arm/include/asm/processor.h @@ -82,6 +82,7 @@ unsigned long get_wchan(struct task_struct *p); #define cpu_relax() barrier() #endif +#define cpu_relax_yield() cpu_relax() #define cpu_relax_lowlatency() cpu_relax() #define task_pt_regs(p) \ diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 60e34824e18c..3f9b0e54dae3 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -149,6 +149,7 @@ static inline void cpu_relax(void) asm volatile("yield" ::: "memory"); } +#define cpu_relax_yield() cpu_relax() #define cpu_relax_lowlatency() cpu_relax() /* Thread switching */ diff --git a/arch/avr32/include/asm/processor.h b/arch/avr32/include/asm/processor.h index 941593c7d9f3..e412e8b68a7d 100644 --- a/arch/avr32/include/asm/processor.h +++ b/arch/avr32/include/asm/processor.h @@ -92,6 +92,7 @@ extern struct avr32_cpuinfo boot_cpu_data; #define TASK_UNMAPPED_BASE (PAGE_ALIGN(TASK_SIZE / 3)) #define cpu_relax() barrier() +#define cpu_relax_yield() cpu_relax() #define cpu_relax_lowlatency() cpu_relax() #define cpu_sync_pipeline() asm volatile("sub pc, -2" : : : "memory") diff --git a/arch/blackfin/include/asm/processor.h b/arch/blackfin/include/asm/processor.h index 0c265aba94ad..8b8704a44b77 100644 --- a/arch/blackfin/include/asm/processor.h +++ b/arch/blackfin/include/asm/processor.h @@ -92,6 +92,7 @@ unsigned long get_wchan(struct task_struct *p); #define KSTK_ESP(tsk) ((tsk) == current ? rdusp() : (tsk)->thread.usp) #define cpu_relax() smp_mb() +#define cpu_relax_yield() cpu_relax() #define cpu_relax_lowlatency() cpu_relax() /* Get the Silicon Revision of the chip */ diff --git a/arch/c6x/include/asm/processor.h b/arch/c6x/include/asm/processor.h index f2ef31be2f8b..914d7308bdb4 100644 --- a/arch/c6x/include/asm/processor.h +++ b/arch/c6x/include/asm/processor.h @@ -121,6 +121,7 @@ extern unsigned long get_wchan(struct task_struct *p); #define KSTK_ESP(task) (task_pt_regs(task)->sp) #define cpu_relax() do { } while (0) +#define cpu_relax_yield() cpu_relax() #define cpu_relax_lowlatency() cpu_relax() extern const struct seq_operations cpuinfo_op; diff --git a/arch/cris/include/asm/processor.h b/arch/cris/include/asm/processor.h index 862126b58116..01dd52ecac84 100644 --- a/arch/cris/include/asm/processor.h +++ b/arch/cris/include/asm/processor.h @@ -63,6 +63,7 @@ static inline void release_thread(struct task_struct *dead_task) #define init_stack (init_thread_union.stack) #define cpu_relax() barrier() +#define cpu_relax_yield() cpu_relax() #define cpu_relax_lowlatency() cpu_relax() void default_idle(void); diff --git a/arch/frv/include/asm/processor.h b/arch/frv/include/asm/processor.h index 73f0a79ad8e6..4d00d65f5e5e 100644 --- a/arch/frv/include/asm/processor.h +++ b/arch/frv/include/asm/processor.h @@ -107,6 +107,7 @@ unsigned long get_wchan(struct task_struct *p); #define KSTK_ESP(tsk) ((tsk)->thread.frame0->sp) #define cpu_relax() barrier() +#define cpu_relax_yield() cpu_relax() #define cpu_relax_lowlatency() cpu_relax() /* data cache prefetch */ diff --git a/arch/h8300/include/asm/processor.h b/arch/h8300/include/asm/processor.h index 111df7397ac7..683a061c3cb5 100644 --- a/arch/h8300/include/asm/processor.h +++ b/arch/h8300/include/asm/processor.h @@ -127,6 +127,7 @@ unsigned long get_wchan(struct task_struct *p); #define KSTK_ESP(tsk) ((tsk) == current ? rdusp() : (tsk)->thread.usp) #define cpu_relax() barrier() +#define cpu_relax_yield() cpu_relax() #define cpu_relax_lowlatency() cpu_relax() #define HARD_RESET_NOW() ({ \ diff --git a/arch/hexagon/include/asm/processor.h b/arch/hexagon/include/asm/processor.h index d8501137c8d0..1558ddb9e5d7 100644 --- a/arch/hexagon/include/asm/processor.h +++ b/arch/hexagon/include/asm/processor.h @@ -56,6 +56,7 @@ struct thread_struct { } #define cpu_relax() __vmyield() +#define cpu_relax_yield() cpu_relax() #define cpu_relax_lowlatency() cpu_relax() /* diff --git a/arch/ia64/include/asm/processor.h b/arch/ia64/include/asm/processor.h index ce53c50d0ba4..4654b71dc8db 100644 --- a/arch/ia64/include/asm/processor.h +++ b/arch/ia64/include/asm/processor.h @@ -547,6 +547,7 @@ ia64_eoi (void) } #define cpu_relax() ia64_hint(ia64_hint_pause) +#define cpu_relax_yield() cpu_relax() #define cpu_relax_lowlatency() cpu_relax() static inline int diff --git a/arch/m32r/include/asm/processor.h b/arch/m32r/include/asm/processor.h index 9f8fd9bef70f..b2620370320d 100644 --- a/arch/m32r/include/asm/processor.h +++ b/arch/m32r/include/asm/processor.h @@ -133,6 +133,7 @@ unsigned long get_wchan(struct task_struct *p); #define KSTK_ESP(tsk) ((tsk)->thread.sp) #define cpu_relax() barrier() +#define cpu_relax_yield() cpu_relax() #define cpu_relax_lowlatency() cpu_relax() #endif /* _ASM_M32R_PROCESSOR_H */ diff --git a/arch/m68k/include/asm/processor.h b/arch/m68k/include/asm/processor.h index c84a2183b3f0..13e07ae6786d 100644 --- a/arch/m68k/include/asm/processor.h +++ b/arch/m68k/include/asm/processor.h @@ -156,6 +156,7 @@ unsigned long get_wchan(struct task_struct *p); #define task_pt_regs(tsk) ((struct pt_regs *) ((tsk)->thread.esp0)) #define cpu_relax() barrier() +#define cpu_relax_yield() cpu_relax() #define cpu_relax_lowlatency() cpu_relax() #endif diff --git a/arch/metag/include/asm/processor.h b/arch/metag/include/asm/processor.h index a0333ebcac35..61d6e2722893 100644 --- a/arch/metag/include/asm/processor.h +++ b/arch/metag/include/asm/processor.h @@ -152,6 +152,7 @@ unsigned long get_wchan(struct task_struct *p); #define user_stack_pointer(regs) ((regs)->ctx.AX[0].U0) #define cpu_relax() barrier() +#define cpu_relax_yield() cpu_relax() #define cpu_relax_lowlatency() cpu_relax() extern void setup_priv(void); diff --git a/arch/microblaze/include/asm/processor.h b/arch/microblaze/include/asm/processor.h index c38d0dd91134..fd7dd11c730a 100644 --- a/arch/microblaze/include/asm/processor.h +++ b/arch/microblaze/include/asm/processor.h @@ -22,6 +22,7 @@ extern const struct seq_operations cpuinfo_op; # define cpu_relax() barrier() +# define cpu_relax_yield() cpu_relax() # define cpu_relax_lowlatency() cpu_relax() #define task_pt_regs(tsk) \ diff --git a/arch/mips/include/asm/processor.h b/arch/mips/include/asm/processor.h index 0d36c87acbe2..9a656f6afc7c 100644 --- a/arch/mips/include/asm/processor.h +++ b/arch/mips/include/asm/processor.h @@ -389,6 +389,7 @@ unsigned long get_wchan(struct task_struct *p); #define KSTK_STATUS(tsk) (task_pt_regs(tsk)->cp0_status) #define cpu_relax() barrier() +#define cpu_relax_yield() cpu_relax() #define cpu_relax_lowlatency() cpu_relax() /* diff --git a/arch/mn10300/include/asm/processor.h b/arch/mn10300/include/asm/processor.h index b10ba121c849..89f63d1720ee 100644 --- a/arch/mn10300/include/asm/processor.h +++ b/arch/mn10300/include/asm/processor.h @@ -69,6 +69,7 @@ extern void print_cpu_info(struct mn10300_cpuinfo *); extern void dodgy_tsc(void); #define cpu_relax() barrier() +#define cpu_relax_yield() cpu_relax() #define cpu_relax_lowlatency() cpu_relax() /* diff --git a/arch/nios2/include/asm/processor.h b/arch/nios2/include/asm/processor.h index 1c953f0cadbf..303e5932a733 100644 --- a/arch/nios2/include/asm/processor.h +++ b/arch/nios2/include/asm/processor.h @@ -88,6 +88,7 @@ extern unsigned long get_wchan(struct task_struct *p); #define KSTK_ESP(tsk) ((tsk)->thread.kregs->sp) #define cpu_relax() barrier() +#define cpu_relax_yield() cpu_relax() #define cpu_relax_lowlatency() cpu_relax() #endif /* __ASSEMBLY__ */ diff --git a/arch/openrisc/include/asm/processor.h b/arch/openrisc/include/asm/processor.h index 70334c9f7d24..6ecfc2ab28e4 100644 --- a/arch/openrisc/include/asm/processor.h +++ b/arch/openrisc/include/asm/processor.h @@ -92,6 +92,7 @@ extern unsigned long thread_saved_pc(struct task_struct *t); #define init_stack (init_thread_union.stack) #define cpu_relax() barrier() +#define cpu_relax_yield() cpu_relax() #define cpu_relax_lowlatency() cpu_relax() #endif /* __ASSEMBLY__ */ diff --git a/arch/parisc/include/asm/processor.h b/arch/parisc/include/asm/processor.h index 2e674e13e005..ea2ff9febffd 100644 --- a/arch/parisc/include/asm/processor.h +++ b/arch/parisc/include/asm/processor.h @@ -309,6 +309,7 @@ extern unsigned long get_wchan(struct task_struct *p); #define KSTK_ESP(tsk) ((tsk)->thread.regs.gr[30]) #define cpu_relax() barrier() +#define cpu_relax_yield() cpu_relax() #define cpu_relax_lowlatency() cpu_relax() /* diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index c07c31b0e89e..908fa7cb3fb3 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -404,6 +404,7 @@ static inline unsigned long __pack_fe01(unsigned int fpmode) #define cpu_relax() barrier() #endif +#define cpu_relax_yield() cpu_relax() #define cpu_relax_lowlatency() cpu_relax() /* Check that a certain kernel stack pointer is valid in task_struct p */ diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 602af692efdc..5bb44333d320 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -234,8 +234,9 @@ static inline unsigned short stap(void) /* * Give up the time slice of the virtual PU. */ -void cpu_relax(void); +void cpu_relax_yield(void); +#define cpu_relax() cpu_relax_yield() #define cpu_relax_lowlatency() barrier() #define ECAG_CACHE_ATTRIBUTE 0 diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index 81d0808085e6..9e60ef144d03 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -53,7 +53,7 @@ void s390_update_cpu_mhz(void) on_each_cpu(update_cpu_mhz, NULL, 0); } -void notrace cpu_relax(void) +void notrace cpu_relax_yield(void) { if (!smp_cpu_mtid && MACHINE_HAS_DIAG44) { diag_stat_inc(DIAG_STAT_X044); @@ -61,7 +61,7 @@ void notrace cpu_relax(void) } barrier(); } -EXPORT_SYMBOL(cpu_relax); +EXPORT_SYMBOL(cpu_relax_yield); /* * cpu_init - initializes state that is per-CPU. diff --git a/arch/score/include/asm/processor.h b/arch/score/include/asm/processor.h index 851f441991d2..e8e87b4d7971 100644 --- a/arch/score/include/asm/processor.h +++ b/arch/score/include/asm/processor.h @@ -24,6 +24,7 @@ extern unsigned long get_wchan(struct task_struct *p); #define current_text_addr() ({ __label__ _l; _l: &&_l; }) #define cpu_relax() barrier() +#define cpu_relax_yield() cpu_relax() #define cpu_relax_lowlatency() cpu_relax() #define release_thread(thread) do {} while (0) diff --git a/arch/sh/include/asm/processor.h b/arch/sh/include/asm/processor.h index f9a09942a32d..099a99105e1c 100644 --- a/arch/sh/include/asm/processor.h +++ b/arch/sh/include/asm/processor.h @@ -97,6 +97,7 @@ extern struct sh_cpuinfo cpu_data[]; #define cpu_sleep() __asm__ __volatile__ ("sleep" : : : "memory") #define cpu_relax() barrier() +#define cpu_relax_yield() cpu_relax() #define cpu_relax_lowlatency() cpu_relax() void default_idle(void); diff --git a/arch/sparc/include/asm/processor_32.h b/arch/sparc/include/asm/processor_32.h index 812fd08f3e62..50e908a3c651 100644 --- a/arch/sparc/include/asm/processor_32.h +++ b/arch/sparc/include/asm/processor_32.h @@ -119,6 +119,7 @@ extern struct task_struct *last_task_used_math; int do_mathemu(struct pt_regs *regs, struct task_struct *fpt); #define cpu_relax() barrier() +#define cpu_relax_yield() cpu_relax() #define cpu_relax_lowlatency() cpu_relax() extern void (*sparc_idle)(void); diff --git a/arch/sparc/include/asm/processor_64.h b/arch/sparc/include/asm/processor_64.h index ce2595c89471..3e8fac724f18 100644 --- a/arch/sparc/include/asm/processor_64.h +++ b/arch/sparc/include/asm/processor_64.h @@ -216,6 +216,7 @@ unsigned long get_wchan(struct task_struct *task); "nop\n\t" \ ".previous" \ ::: "memory") +#define cpu_relax_yield() cpu_relax() #define cpu_relax_lowlatency() cpu_relax() /* Prefetch support. This is tuned for UltraSPARC-III and later. diff --git a/arch/tile/include/asm/processor.h b/arch/tile/include/asm/processor.h index 0684e88aacd8..91a39a50520d 100644 --- a/arch/tile/include/asm/processor.h +++ b/arch/tile/include/asm/processor.h @@ -264,6 +264,7 @@ static inline void cpu_relax(void) barrier(); } +#define cpu_relax_yield() cpu_relax() #define cpu_relax_lowlatency() cpu_relax() /* Info on this processor (see fs/proc/cpuinfo.c) */ diff --git a/arch/unicore32/include/asm/processor.h b/arch/unicore32/include/asm/processor.h index 8d21b7adf26b..fc54d5d50419 100644 --- a/arch/unicore32/include/asm/processor.h +++ b/arch/unicore32/include/asm/processor.h @@ -71,6 +71,7 @@ extern void release_thread(struct task_struct *); unsigned long get_wchan(struct task_struct *p); #define cpu_relax() barrier() +#define cpu_relax_yield() cpu_relax() #define cpu_relax_lowlatency() cpu_relax() #define task_pt_regs(p) \ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 984a7bf17f6a..44adadab44d6 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -588,6 +588,7 @@ static __always_inline void cpu_relax(void) rep_nop(); } +#define cpu_relax_yield() cpu_relax() #define cpu_relax_lowlatency() cpu_relax() /* Stop speculative execution and prefetching of modified code. */ diff --git a/arch/x86/um/asm/processor.h b/arch/x86/um/asm/processor.h index 233ee09c1ce8..01597afa8888 100644 --- a/arch/x86/um/asm/processor.h +++ b/arch/x86/um/asm/processor.h @@ -26,6 +26,7 @@ static inline void rep_nop(void) } #define cpu_relax() rep_nop() +#define cpu_relax_yield() cpu_relax() #define cpu_relax_lowlatency() cpu_relax() #define task_pt_regs(t) (&(t)->thread.regs) diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h index b42d68bfe3cf..fe14dc2b394a 100644 --- a/arch/xtensa/include/asm/processor.h +++ b/arch/xtensa/include/asm/processor.h @@ -206,6 +206,7 @@ extern unsigned long get_wchan(struct task_struct *p); #define KSTK_ESP(tsk) (task_pt_regs(tsk)->areg[1]) #define cpu_relax() barrier() +#define cpu_relax_yield() cpu_relax() #define cpu_relax_lowlatency() cpu_relax() /* Special register access. */ -- cgit v1.2.3 From 22b6430d36659b37ed139b7fd87fcc7237fb0cfd Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Tue, 25 Oct 2016 11:03:13 +0200 Subject: locking/core, s390: Make cpu_relax() a barrier again stop_machine() seemed to be the only important place for yielding during cpu_relax(). This was fixed by using cpu_relax_yield(). Therefore, we can now redefine cpu_relax() to be a barrier instead on s390, making s390 identical to all other architectures. Signed-off-by: Christian Borntraeger Signed-off-by: Peter Zijlstra (Intel) Cc: Catalin Marinas Cc: Heiko Carstens Cc: Linus Torvalds Cc: Martin Schwidefsky Cc: Nicholas Piggin Cc: Noam Camus Cc: Peter Zijlstra Cc: Russell King Cc: Thomas Gleixner Cc: Will Deacon Cc: linuxppc-dev@lists.ozlabs.org Cc: virtualization@lists.linux-foundation.org Cc: xen-devel@lists.xenproject.org Link: http://lkml.kernel.org/r/1477386195-32736-4-git-send-email-borntraeger@de.ibm.com Signed-off-by: Ingo Molnar --- arch/s390/include/asm/processor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 5bb44333d320..79343e37b455 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -236,7 +236,7 @@ static inline unsigned short stap(void) */ void cpu_relax_yield(void); -#define cpu_relax() cpu_relax_yield() +#define cpu_relax() barrier() #define cpu_relax_lowlatency() barrier() #define ECAG_CACHE_ATTRIBUTE 0 -- cgit v1.2.3 From 5bd0b85ba8bb9de6f61f33f3752fc85f4c87fc22 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Tue, 25 Oct 2016 11:03:15 +0200 Subject: locking/core, arch: Remove cpu_relax_lowlatency() As there are no users left, we can remove cpu_relax_lowlatency() implementations from every architecture. Signed-off-by: Christian Borntraeger Signed-off-by: Peter Zijlstra (Intel) Cc: Catalin Marinas Cc: Heiko Carstens Cc: Linus Torvalds Cc: Martin Schwidefsky Cc: Nicholas Piggin Cc: Noam Camus Cc: Peter Zijlstra Cc: Russell King Cc: Thomas Gleixner Cc: Will Deacon Cc: linuxppc-dev@lists.ozlabs.org Cc: virtualization@lists.linux-foundation.org Cc: xen-devel@lists.xenproject.org Cc: Link: http://lkml.kernel.org/r/1477386195-32736-6-git-send-email-borntraeger@de.ibm.com Signed-off-by: Ingo Molnar --- arch/alpha/include/asm/processor.h | 1 - arch/arc/include/asm/processor.h | 2 -- arch/arm/include/asm/processor.h | 1 - arch/arm64/include/asm/processor.h | 1 - arch/avr32/include/asm/processor.h | 1 - arch/blackfin/include/asm/processor.h | 1 - arch/c6x/include/asm/processor.h | 1 - arch/cris/include/asm/processor.h | 1 - arch/frv/include/asm/processor.h | 1 - arch/h8300/include/asm/processor.h | 1 - arch/hexagon/include/asm/processor.h | 1 - arch/ia64/include/asm/processor.h | 1 - arch/m32r/include/asm/processor.h | 1 - arch/m68k/include/asm/processor.h | 1 - arch/metag/include/asm/processor.h | 1 - arch/microblaze/include/asm/processor.h | 1 - arch/mips/include/asm/processor.h | 1 - arch/mn10300/include/asm/processor.h | 1 - arch/nios2/include/asm/processor.h | 1 - arch/openrisc/include/asm/processor.h | 1 - arch/parisc/include/asm/processor.h | 1 - arch/powerpc/include/asm/processor.h | 1 - arch/s390/include/asm/processor.h | 1 - arch/score/include/asm/processor.h | 1 - arch/sh/include/asm/processor.h | 1 - arch/sparc/include/asm/processor_32.h | 1 - arch/sparc/include/asm/processor_64.h | 1 - arch/tile/include/asm/processor.h | 1 - arch/unicore32/include/asm/processor.h | 1 - arch/x86/include/asm/processor.h | 1 - arch/x86/um/asm/processor.h | 1 - arch/xtensa/include/asm/processor.h | 1 - 32 files changed, 33 deletions(-) (limited to 'arch') diff --git a/arch/alpha/include/asm/processor.h b/arch/alpha/include/asm/processor.h index 0556fda7bc6d..31e8dbeef10b 100644 --- a/arch/alpha/include/asm/processor.h +++ b/arch/alpha/include/asm/processor.h @@ -59,7 +59,6 @@ unsigned long get_wchan(struct task_struct *p); #define cpu_relax() barrier() #define cpu_relax_yield() cpu_relax() -#define cpu_relax_lowlatency() cpu_relax() #define ARCH_HAS_PREFETCH #define ARCH_HAS_PREFETCHW diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h index 6c158d576355..d102a49ad8c5 100644 --- a/arch/arc/include/asm/processor.h +++ b/arch/arc/include/asm/processor.h @@ -61,7 +61,6 @@ struct task_struct; #define cpu_relax() barrier() #define cpu_relax_yield() cpu_relax() -#define cpu_relax_lowlatency() cpu_relax() #else @@ -69,7 +68,6 @@ struct task_struct; __asm__ __volatile__ (".word %0" : : "i"(CTOP_INST_SCHD_RW) : "memory") #define cpu_relax_yield() cpu_relax() -#define cpu_relax_lowlatency() barrier() #endif diff --git a/arch/arm/include/asm/processor.h b/arch/arm/include/asm/processor.h index db660e0b4bd3..9e71c58bfa6f 100644 --- a/arch/arm/include/asm/processor.h +++ b/arch/arm/include/asm/processor.h @@ -83,7 +83,6 @@ unsigned long get_wchan(struct task_struct *p); #endif #define cpu_relax_yield() cpu_relax() -#define cpu_relax_lowlatency() cpu_relax() #define task_pt_regs(p) \ ((struct pt_regs *)(THREAD_START_SP + task_stack_page(p)) - 1) diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 3f9b0e54dae3..6132f64af68d 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -150,7 +150,6 @@ static inline void cpu_relax(void) } #define cpu_relax_yield() cpu_relax() -#define cpu_relax_lowlatency() cpu_relax() /* Thread switching */ extern struct task_struct *cpu_switch_to(struct task_struct *prev, diff --git a/arch/avr32/include/asm/processor.h b/arch/avr32/include/asm/processor.h index e412e8b68a7d..ee62365ad0da 100644 --- a/arch/avr32/include/asm/processor.h +++ b/arch/avr32/include/asm/processor.h @@ -93,7 +93,6 @@ extern struct avr32_cpuinfo boot_cpu_data; #define cpu_relax() barrier() #define cpu_relax_yield() cpu_relax() -#define cpu_relax_lowlatency() cpu_relax() #define cpu_sync_pipeline() asm volatile("sub pc, -2" : : : "memory") struct cpu_context { diff --git a/arch/blackfin/include/asm/processor.h b/arch/blackfin/include/asm/processor.h index 8b8704a44b77..57acfb1acfe5 100644 --- a/arch/blackfin/include/asm/processor.h +++ b/arch/blackfin/include/asm/processor.h @@ -93,7 +93,6 @@ unsigned long get_wchan(struct task_struct *p); #define cpu_relax() smp_mb() #define cpu_relax_yield() cpu_relax() -#define cpu_relax_lowlatency() cpu_relax() /* Get the Silicon Revision of the chip */ static inline uint32_t __pure bfin_revid(void) diff --git a/arch/c6x/include/asm/processor.h b/arch/c6x/include/asm/processor.h index 914d7308bdb4..1fd22e727e44 100644 --- a/arch/c6x/include/asm/processor.h +++ b/arch/c6x/include/asm/processor.h @@ -122,7 +122,6 @@ extern unsigned long get_wchan(struct task_struct *p); #define cpu_relax() do { } while (0) #define cpu_relax_yield() cpu_relax() -#define cpu_relax_lowlatency() cpu_relax() extern const struct seq_operations cpuinfo_op; diff --git a/arch/cris/include/asm/processor.h b/arch/cris/include/asm/processor.h index 01dd52ecac84..1a578417397f 100644 --- a/arch/cris/include/asm/processor.h +++ b/arch/cris/include/asm/processor.h @@ -64,7 +64,6 @@ static inline void release_thread(struct task_struct *dead_task) #define cpu_relax() barrier() #define cpu_relax_yield() cpu_relax() -#define cpu_relax_lowlatency() cpu_relax() void default_idle(void); diff --git a/arch/frv/include/asm/processor.h b/arch/frv/include/asm/processor.h index 4d00d65f5e5e..c1e5f2a0ca31 100644 --- a/arch/frv/include/asm/processor.h +++ b/arch/frv/include/asm/processor.h @@ -108,7 +108,6 @@ unsigned long get_wchan(struct task_struct *p); #define cpu_relax() barrier() #define cpu_relax_yield() cpu_relax() -#define cpu_relax_lowlatency() cpu_relax() /* data cache prefetch */ #define ARCH_HAS_PREFETCH diff --git a/arch/h8300/include/asm/processor.h b/arch/h8300/include/asm/processor.h index 683a061c3cb5..42d605369217 100644 --- a/arch/h8300/include/asm/processor.h +++ b/arch/h8300/include/asm/processor.h @@ -128,7 +128,6 @@ unsigned long get_wchan(struct task_struct *p); #define cpu_relax() barrier() #define cpu_relax_yield() cpu_relax() -#define cpu_relax_lowlatency() cpu_relax() #define HARD_RESET_NOW() ({ \ local_irq_disable(); \ diff --git a/arch/hexagon/include/asm/processor.h b/arch/hexagon/include/asm/processor.h index 1558ddb9e5d7..5d694cccc85a 100644 --- a/arch/hexagon/include/asm/processor.h +++ b/arch/hexagon/include/asm/processor.h @@ -57,7 +57,6 @@ struct thread_struct { #define cpu_relax() __vmyield() #define cpu_relax_yield() cpu_relax() -#define cpu_relax_lowlatency() cpu_relax() /* * Decides where the kernel will search for a free chunk of vm space during diff --git a/arch/ia64/include/asm/processor.h b/arch/ia64/include/asm/processor.h index 4654b71dc8db..0c2c3b256f6c 100644 --- a/arch/ia64/include/asm/processor.h +++ b/arch/ia64/include/asm/processor.h @@ -548,7 +548,6 @@ ia64_eoi (void) #define cpu_relax() ia64_hint(ia64_hint_pause) #define cpu_relax_yield() cpu_relax() -#define cpu_relax_lowlatency() cpu_relax() static inline int ia64_get_irr(unsigned int vector) diff --git a/arch/m32r/include/asm/processor.h b/arch/m32r/include/asm/processor.h index b2620370320d..9b83a13290fc 100644 --- a/arch/m32r/include/asm/processor.h +++ b/arch/m32r/include/asm/processor.h @@ -134,6 +134,5 @@ unsigned long get_wchan(struct task_struct *p); #define cpu_relax() barrier() #define cpu_relax_yield() cpu_relax() -#define cpu_relax_lowlatency() cpu_relax() #endif /* _ASM_M32R_PROCESSOR_H */ diff --git a/arch/m68k/include/asm/processor.h b/arch/m68k/include/asm/processor.h index 13e07ae6786d..b0d044224ce5 100644 --- a/arch/m68k/include/asm/processor.h +++ b/arch/m68k/include/asm/processor.h @@ -157,6 +157,5 @@ unsigned long get_wchan(struct task_struct *p); #define cpu_relax() barrier() #define cpu_relax_yield() cpu_relax() -#define cpu_relax_lowlatency() cpu_relax() #endif diff --git a/arch/metag/include/asm/processor.h b/arch/metag/include/asm/processor.h index 61d6e2722893..ee302a637f24 100644 --- a/arch/metag/include/asm/processor.h +++ b/arch/metag/include/asm/processor.h @@ -153,7 +153,6 @@ unsigned long get_wchan(struct task_struct *p); #define cpu_relax() barrier() #define cpu_relax_yield() cpu_relax() -#define cpu_relax_lowlatency() cpu_relax() extern void setup_priv(void); diff --git a/arch/microblaze/include/asm/processor.h b/arch/microblaze/include/asm/processor.h index fd7dd11c730a..08ec1f725b7f 100644 --- a/arch/microblaze/include/asm/processor.h +++ b/arch/microblaze/include/asm/processor.h @@ -23,7 +23,6 @@ extern const struct seq_operations cpuinfo_op; # define cpu_relax() barrier() # define cpu_relax_yield() cpu_relax() -# define cpu_relax_lowlatency() cpu_relax() #define task_pt_regs(tsk) \ (((struct pt_regs *)(THREAD_SIZE + task_stack_page(tsk))) - 1) diff --git a/arch/mips/include/asm/processor.h b/arch/mips/include/asm/processor.h index 9a656f6afc7c..8ea95e77ec9d 100644 --- a/arch/mips/include/asm/processor.h +++ b/arch/mips/include/asm/processor.h @@ -390,7 +390,6 @@ unsigned long get_wchan(struct task_struct *p); #define cpu_relax() barrier() #define cpu_relax_yield() cpu_relax() -#define cpu_relax_lowlatency() cpu_relax() /* * Return_address is a replacement for __builtin_return_address(count) diff --git a/arch/mn10300/include/asm/processor.h b/arch/mn10300/include/asm/processor.h index 89f63d1720ee..d11397bc9429 100644 --- a/arch/mn10300/include/asm/processor.h +++ b/arch/mn10300/include/asm/processor.h @@ -70,7 +70,6 @@ extern void dodgy_tsc(void); #define cpu_relax() barrier() #define cpu_relax_yield() cpu_relax() -#define cpu_relax_lowlatency() cpu_relax() /* * User space process size: 1.75GB (default). diff --git a/arch/nios2/include/asm/processor.h b/arch/nios2/include/asm/processor.h index 303e5932a733..d32c17669123 100644 --- a/arch/nios2/include/asm/processor.h +++ b/arch/nios2/include/asm/processor.h @@ -89,7 +89,6 @@ extern unsigned long get_wchan(struct task_struct *p); #define cpu_relax() barrier() #define cpu_relax_yield() cpu_relax() -#define cpu_relax_lowlatency() cpu_relax() #endif /* __ASSEMBLY__ */ diff --git a/arch/openrisc/include/asm/processor.h b/arch/openrisc/include/asm/processor.h index 6ecfc2ab28e4..7f47fc7b7eae 100644 --- a/arch/openrisc/include/asm/processor.h +++ b/arch/openrisc/include/asm/processor.h @@ -93,7 +93,6 @@ extern unsigned long thread_saved_pc(struct task_struct *t); #define cpu_relax() barrier() #define cpu_relax_yield() cpu_relax() -#define cpu_relax_lowlatency() cpu_relax() #endif /* __ASSEMBLY__ */ #endif /* __ASM_OPENRISC_PROCESSOR_H */ diff --git a/arch/parisc/include/asm/processor.h b/arch/parisc/include/asm/processor.h index ea2ff9febffd..a4a07f4f7c20 100644 --- a/arch/parisc/include/asm/processor.h +++ b/arch/parisc/include/asm/processor.h @@ -310,7 +310,6 @@ extern unsigned long get_wchan(struct task_struct *p); #define cpu_relax() barrier() #define cpu_relax_yield() cpu_relax() -#define cpu_relax_lowlatency() cpu_relax() /* * parisc_requires_coherency() is used to identify the combined VIPT/PIPT diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index 908fa7cb3fb3..5684e6872473 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -405,7 +405,6 @@ static inline unsigned long __pack_fe01(unsigned int fpmode) #endif #define cpu_relax_yield() cpu_relax() -#define cpu_relax_lowlatency() cpu_relax() /* Check that a certain kernel stack pointer is valid in task_struct p */ int validate_sp(unsigned long sp, struct task_struct *p, diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 79343e37b455..9e32f25bdea3 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -237,7 +237,6 @@ static inline unsigned short stap(void) void cpu_relax_yield(void); #define cpu_relax() barrier() -#define cpu_relax_lowlatency() barrier() #define ECAG_CACHE_ATTRIBUTE 0 #define ECAG_CPU_ATTRIBUTE 1 diff --git a/arch/score/include/asm/processor.h b/arch/score/include/asm/processor.h index e8e87b4d7971..a1e97c063ed4 100644 --- a/arch/score/include/asm/processor.h +++ b/arch/score/include/asm/processor.h @@ -25,7 +25,6 @@ extern unsigned long get_wchan(struct task_struct *p); #define cpu_relax() barrier() #define cpu_relax_yield() cpu_relax() -#define cpu_relax_lowlatency() cpu_relax() #define release_thread(thread) do {} while (0) /* diff --git a/arch/sh/include/asm/processor.h b/arch/sh/include/asm/processor.h index 099a99105e1c..9454ff1ad0d9 100644 --- a/arch/sh/include/asm/processor.h +++ b/arch/sh/include/asm/processor.h @@ -98,7 +98,6 @@ extern struct sh_cpuinfo cpu_data[]; #define cpu_sleep() __asm__ __volatile__ ("sleep" : : : "memory") #define cpu_relax() barrier() #define cpu_relax_yield() cpu_relax() -#define cpu_relax_lowlatency() cpu_relax() void default_idle(void); void stop_this_cpu(void *); diff --git a/arch/sparc/include/asm/processor_32.h b/arch/sparc/include/asm/processor_32.h index 50e908a3c651..fc32b7311481 100644 --- a/arch/sparc/include/asm/processor_32.h +++ b/arch/sparc/include/asm/processor_32.h @@ -120,7 +120,6 @@ int do_mathemu(struct pt_regs *regs, struct task_struct *fpt); #define cpu_relax() barrier() #define cpu_relax_yield() cpu_relax() -#define cpu_relax_lowlatency() cpu_relax() extern void (*sparc_idle)(void); diff --git a/arch/sparc/include/asm/processor_64.h b/arch/sparc/include/asm/processor_64.h index 3e8fac724f18..12787dfeb11c 100644 --- a/arch/sparc/include/asm/processor_64.h +++ b/arch/sparc/include/asm/processor_64.h @@ -217,7 +217,6 @@ unsigned long get_wchan(struct task_struct *task); ".previous" \ ::: "memory") #define cpu_relax_yield() cpu_relax() -#define cpu_relax_lowlatency() cpu_relax() /* Prefetch support. This is tuned for UltraSPARC-III and later. * UltraSPARC-I will treat these as nops, and UltraSPARC-II has diff --git a/arch/tile/include/asm/processor.h b/arch/tile/include/asm/processor.h index 91a39a50520d..c1c228b16f7f 100644 --- a/arch/tile/include/asm/processor.h +++ b/arch/tile/include/asm/processor.h @@ -265,7 +265,6 @@ static inline void cpu_relax(void) } #define cpu_relax_yield() cpu_relax() -#define cpu_relax_lowlatency() cpu_relax() /* Info on this processor (see fs/proc/cpuinfo.c) */ struct seq_operations; diff --git a/arch/unicore32/include/asm/processor.h b/arch/unicore32/include/asm/processor.h index fc54d5d50419..eeefe7c529eb 100644 --- a/arch/unicore32/include/asm/processor.h +++ b/arch/unicore32/include/asm/processor.h @@ -72,7 +72,6 @@ unsigned long get_wchan(struct task_struct *p); #define cpu_relax() barrier() #define cpu_relax_yield() cpu_relax() -#define cpu_relax_lowlatency() cpu_relax() #define task_pt_regs(p) \ ((struct pt_regs *)(THREAD_START_SP + task_stack_page(p)) - 1) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 44adadab44d6..7513c996f673 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -589,7 +589,6 @@ static __always_inline void cpu_relax(void) } #define cpu_relax_yield() cpu_relax() -#define cpu_relax_lowlatency() cpu_relax() /* Stop speculative execution and prefetching of modified code. */ static inline void sync_core(void) diff --git a/arch/x86/um/asm/processor.h b/arch/x86/um/asm/processor.h index 01597afa8888..b4bd63b22b7f 100644 --- a/arch/x86/um/asm/processor.h +++ b/arch/x86/um/asm/processor.h @@ -27,7 +27,6 @@ static inline void rep_nop(void) #define cpu_relax() rep_nop() #define cpu_relax_yield() cpu_relax() -#define cpu_relax_lowlatency() cpu_relax() #define task_pt_regs(t) (&(t)->thread.regs) diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h index fe14dc2b394a..7d8d6bececfc 100644 --- a/arch/xtensa/include/asm/processor.h +++ b/arch/xtensa/include/asm/processor.h @@ -207,7 +207,6 @@ extern unsigned long get_wchan(struct task_struct *p); #define cpu_relax() barrier() #define cpu_relax_yield() cpu_relax() -#define cpu_relax_lowlatency() cpu_relax() /* Special register access. */ -- cgit v1.2.3 From 6d0d287891a022ebba572327cbd70b5de69a63a2 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Wed, 16 Nov 2016 13:23:05 +0100 Subject: locking/core: Provide common cpu_relax_yield() definition No need to duplicate the same define everywhere. Since the only user is stop-machine and the only provider is s390, we can use a default implementation of cpu_relax_yield() in sched.h. Suggested-by: Russell King Signed-off-by: Christian Borntraeger Reviewed-by: David Hildenbrand Acked-by: Russell King Cc: Andrew Morton Cc: Catalin Marinas Cc: Heiko Carstens Cc: Linus Torvalds Cc: Martin Schwidefsky Cc: Nicholas Piggin Cc: Noam Camus Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Cc: kvm@vger.kernel.org Cc: linux-arch@vger.kernel.org Cc: linux-s390 Cc: linuxppc-dev@lists.ozlabs.org Cc: sparclinux@vger.kernel.org Cc: virtualization@lists.linux-foundation.org Cc: xen-devel@lists.xenproject.org Link: http://lkml.kernel.org/r/1479298985-191589-1-git-send-email-borntraeger@de.ibm.com Signed-off-by: Ingo Molnar --- arch/alpha/include/asm/processor.h | 1 - arch/arc/include/asm/processor.h | 3 --- arch/arm/include/asm/processor.h | 2 -- arch/arm64/include/asm/processor.h | 2 -- arch/avr32/include/asm/processor.h | 1 - arch/blackfin/include/asm/processor.h | 1 - arch/c6x/include/asm/processor.h | 1 - arch/cris/include/asm/processor.h | 1 - arch/frv/include/asm/processor.h | 1 - arch/h8300/include/asm/processor.h | 1 - arch/hexagon/include/asm/processor.h | 1 - arch/ia64/include/asm/processor.h | 1 - arch/m32r/include/asm/processor.h | 1 - arch/m68k/include/asm/processor.h | 1 - arch/metag/include/asm/processor.h | 1 - arch/microblaze/include/asm/processor.h | 1 - arch/mips/include/asm/processor.h | 1 - arch/mn10300/include/asm/processor.h | 1 - arch/nios2/include/asm/processor.h | 1 - arch/openrisc/include/asm/processor.h | 1 - arch/parisc/include/asm/processor.h | 1 - arch/powerpc/include/asm/processor.h | 2 -- arch/s390/include/asm/processor.h | 1 + arch/score/include/asm/processor.h | 1 - arch/sh/include/asm/processor.h | 1 - arch/sparc/include/asm/processor_32.h | 1 - arch/sparc/include/asm/processor_64.h | 1 - arch/tile/include/asm/processor.h | 2 -- arch/unicore32/include/asm/processor.h | 1 - arch/x86/include/asm/processor.h | 2 -- arch/x86/um/asm/processor.h | 1 - arch/xtensa/include/asm/processor.h | 1 - include/linux/sched.h | 4 ++++ 33 files changed, 5 insertions(+), 38 deletions(-) (limited to 'arch') diff --git a/arch/alpha/include/asm/processor.h b/arch/alpha/include/asm/processor.h index 31e8dbeef10b..2fec2dee3020 100644 --- a/arch/alpha/include/asm/processor.h +++ b/arch/alpha/include/asm/processor.h @@ -58,7 +58,6 @@ unsigned long get_wchan(struct task_struct *p); ((tsk) == current ? rdusp() : task_thread_info(tsk)->pcb.usp) #define cpu_relax() barrier() -#define cpu_relax_yield() cpu_relax() #define ARCH_HAS_PREFETCH #define ARCH_HAS_PREFETCHW diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h index d102a49ad8c5..6e1242da0159 100644 --- a/arch/arc/include/asm/processor.h +++ b/arch/arc/include/asm/processor.h @@ -60,15 +60,12 @@ struct task_struct; #ifndef CONFIG_EZNPS_MTM_EXT #define cpu_relax() barrier() -#define cpu_relax_yield() cpu_relax() #else #define cpu_relax() \ __asm__ __volatile__ (".word %0" : : "i"(CTOP_INST_SCHD_RW) : "memory") -#define cpu_relax_yield() cpu_relax() - #endif #define copy_segments(tsk, mm) do { } while (0) diff --git a/arch/arm/include/asm/processor.h b/arch/arm/include/asm/processor.h index 9e71c58bfa6f..c3d5fc124a05 100644 --- a/arch/arm/include/asm/processor.h +++ b/arch/arm/include/asm/processor.h @@ -82,8 +82,6 @@ unsigned long get_wchan(struct task_struct *p); #define cpu_relax() barrier() #endif -#define cpu_relax_yield() cpu_relax() - #define task_pt_regs(p) \ ((struct pt_regs *)(THREAD_START_SP + task_stack_page(p)) - 1) diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 6132f64af68d..747c65a616ed 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -149,8 +149,6 @@ static inline void cpu_relax(void) asm volatile("yield" ::: "memory"); } -#define cpu_relax_yield() cpu_relax() - /* Thread switching */ extern struct task_struct *cpu_switch_to(struct task_struct *prev, struct task_struct *next); diff --git a/arch/avr32/include/asm/processor.h b/arch/avr32/include/asm/processor.h index ee62365ad0da..972adcc1e8f4 100644 --- a/arch/avr32/include/asm/processor.h +++ b/arch/avr32/include/asm/processor.h @@ -92,7 +92,6 @@ extern struct avr32_cpuinfo boot_cpu_data; #define TASK_UNMAPPED_BASE (PAGE_ALIGN(TASK_SIZE / 3)) #define cpu_relax() barrier() -#define cpu_relax_yield() cpu_relax() #define cpu_sync_pipeline() asm volatile("sub pc, -2" : : : "memory") struct cpu_context { diff --git a/arch/blackfin/include/asm/processor.h b/arch/blackfin/include/asm/processor.h index 57acfb1acfe5..85d4af97c986 100644 --- a/arch/blackfin/include/asm/processor.h +++ b/arch/blackfin/include/asm/processor.h @@ -92,7 +92,6 @@ unsigned long get_wchan(struct task_struct *p); #define KSTK_ESP(tsk) ((tsk) == current ? rdusp() : (tsk)->thread.usp) #define cpu_relax() smp_mb() -#define cpu_relax_yield() cpu_relax() /* Get the Silicon Revision of the chip */ static inline uint32_t __pure bfin_revid(void) diff --git a/arch/c6x/include/asm/processor.h b/arch/c6x/include/asm/processor.h index 1fd22e727e44..b9eb3da7f278 100644 --- a/arch/c6x/include/asm/processor.h +++ b/arch/c6x/include/asm/processor.h @@ -121,7 +121,6 @@ extern unsigned long get_wchan(struct task_struct *p); #define KSTK_ESP(task) (task_pt_regs(task)->sp) #define cpu_relax() do { } while (0) -#define cpu_relax_yield() cpu_relax() extern const struct seq_operations cpuinfo_op; diff --git a/arch/cris/include/asm/processor.h b/arch/cris/include/asm/processor.h index 1a578417397f..15b815df29c1 100644 --- a/arch/cris/include/asm/processor.h +++ b/arch/cris/include/asm/processor.h @@ -63,7 +63,6 @@ static inline void release_thread(struct task_struct *dead_task) #define init_stack (init_thread_union.stack) #define cpu_relax() barrier() -#define cpu_relax_yield() cpu_relax() void default_idle(void); diff --git a/arch/frv/include/asm/processor.h b/arch/frv/include/asm/processor.h index c1e5f2a0ca31..ddaeb9cc9143 100644 --- a/arch/frv/include/asm/processor.h +++ b/arch/frv/include/asm/processor.h @@ -107,7 +107,6 @@ unsigned long get_wchan(struct task_struct *p); #define KSTK_ESP(tsk) ((tsk)->thread.frame0->sp) #define cpu_relax() barrier() -#define cpu_relax_yield() cpu_relax() /* data cache prefetch */ #define ARCH_HAS_PREFETCH diff --git a/arch/h8300/include/asm/processor.h b/arch/h8300/include/asm/processor.h index 42d605369217..65132d7ae9e5 100644 --- a/arch/h8300/include/asm/processor.h +++ b/arch/h8300/include/asm/processor.h @@ -127,7 +127,6 @@ unsigned long get_wchan(struct task_struct *p); #define KSTK_ESP(tsk) ((tsk) == current ? rdusp() : (tsk)->thread.usp) #define cpu_relax() barrier() -#define cpu_relax_yield() cpu_relax() #define HARD_RESET_NOW() ({ \ local_irq_disable(); \ diff --git a/arch/hexagon/include/asm/processor.h b/arch/hexagon/include/asm/processor.h index 5d694cccc85a..45a825402f63 100644 --- a/arch/hexagon/include/asm/processor.h +++ b/arch/hexagon/include/asm/processor.h @@ -56,7 +56,6 @@ struct thread_struct { } #define cpu_relax() __vmyield() -#define cpu_relax_yield() cpu_relax() /* * Decides where the kernel will search for a free chunk of vm space during diff --git a/arch/ia64/include/asm/processor.h b/arch/ia64/include/asm/processor.h index 0c2c3b256f6c..03911a336406 100644 --- a/arch/ia64/include/asm/processor.h +++ b/arch/ia64/include/asm/processor.h @@ -547,7 +547,6 @@ ia64_eoi (void) } #define cpu_relax() ia64_hint(ia64_hint_pause) -#define cpu_relax_yield() cpu_relax() static inline int ia64_get_irr(unsigned int vector) diff --git a/arch/m32r/include/asm/processor.h b/arch/m32r/include/asm/processor.h index 9b83a13290fc..5767367550c6 100644 --- a/arch/m32r/include/asm/processor.h +++ b/arch/m32r/include/asm/processor.h @@ -133,6 +133,5 @@ unsigned long get_wchan(struct task_struct *p); #define KSTK_ESP(tsk) ((tsk)->thread.sp) #define cpu_relax() barrier() -#define cpu_relax_yield() cpu_relax() #endif /* _ASM_M32R_PROCESSOR_H */ diff --git a/arch/m68k/include/asm/processor.h b/arch/m68k/include/asm/processor.h index b0d044224ce5..f5f790c31bf8 100644 --- a/arch/m68k/include/asm/processor.h +++ b/arch/m68k/include/asm/processor.h @@ -156,6 +156,5 @@ unsigned long get_wchan(struct task_struct *p); #define task_pt_regs(tsk) ((struct pt_regs *) ((tsk)->thread.esp0)) #define cpu_relax() barrier() -#define cpu_relax_yield() cpu_relax() #endif diff --git a/arch/metag/include/asm/processor.h b/arch/metag/include/asm/processor.h index ee302a637f24..ec6a49076980 100644 --- a/arch/metag/include/asm/processor.h +++ b/arch/metag/include/asm/processor.h @@ -152,7 +152,6 @@ unsigned long get_wchan(struct task_struct *p); #define user_stack_pointer(regs) ((regs)->ctx.AX[0].U0) #define cpu_relax() barrier() -#define cpu_relax_yield() cpu_relax() extern void setup_priv(void); diff --git a/arch/microblaze/include/asm/processor.h b/arch/microblaze/include/asm/processor.h index 08ec1f725b7f..37ef196e4519 100644 --- a/arch/microblaze/include/asm/processor.h +++ b/arch/microblaze/include/asm/processor.h @@ -22,7 +22,6 @@ extern const struct seq_operations cpuinfo_op; # define cpu_relax() barrier() -# define cpu_relax_yield() cpu_relax() #define task_pt_regs(tsk) \ (((struct pt_regs *)(THREAD_SIZE + task_stack_page(tsk))) - 1) diff --git a/arch/mips/include/asm/processor.h b/arch/mips/include/asm/processor.h index 8ea95e77ec9d..95b8c471f572 100644 --- a/arch/mips/include/asm/processor.h +++ b/arch/mips/include/asm/processor.h @@ -389,7 +389,6 @@ unsigned long get_wchan(struct task_struct *p); #define KSTK_STATUS(tsk) (task_pt_regs(tsk)->cp0_status) #define cpu_relax() barrier() -#define cpu_relax_yield() cpu_relax() /* * Return_address is a replacement for __builtin_return_address(count) diff --git a/arch/mn10300/include/asm/processor.h b/arch/mn10300/include/asm/processor.h index d11397bc9429..18e17abf7664 100644 --- a/arch/mn10300/include/asm/processor.h +++ b/arch/mn10300/include/asm/processor.h @@ -69,7 +69,6 @@ extern void print_cpu_info(struct mn10300_cpuinfo *); extern void dodgy_tsc(void); #define cpu_relax() barrier() -#define cpu_relax_yield() cpu_relax() /* * User space process size: 1.75GB (default). diff --git a/arch/nios2/include/asm/processor.h b/arch/nios2/include/asm/processor.h index d32c17669123..3bbbc3d798e5 100644 --- a/arch/nios2/include/asm/processor.h +++ b/arch/nios2/include/asm/processor.h @@ -88,7 +88,6 @@ extern unsigned long get_wchan(struct task_struct *p); #define KSTK_ESP(tsk) ((tsk)->thread.kregs->sp) #define cpu_relax() barrier() -#define cpu_relax_yield() cpu_relax() #endif /* __ASSEMBLY__ */ diff --git a/arch/openrisc/include/asm/processor.h b/arch/openrisc/include/asm/processor.h index 7f47fc7b7eae..a908e6c30a00 100644 --- a/arch/openrisc/include/asm/processor.h +++ b/arch/openrisc/include/asm/processor.h @@ -92,7 +92,6 @@ extern unsigned long thread_saved_pc(struct task_struct *t); #define init_stack (init_thread_union.stack) #define cpu_relax() barrier() -#define cpu_relax_yield() cpu_relax() #endif /* __ASSEMBLY__ */ #endif /* __ASM_OPENRISC_PROCESSOR_H */ diff --git a/arch/parisc/include/asm/processor.h b/arch/parisc/include/asm/processor.h index a4a07f4f7c20..ca40741378be 100644 --- a/arch/parisc/include/asm/processor.h +++ b/arch/parisc/include/asm/processor.h @@ -309,7 +309,6 @@ extern unsigned long get_wchan(struct task_struct *p); #define KSTK_ESP(tsk) ((tsk)->thread.regs.gr[30]) #define cpu_relax() barrier() -#define cpu_relax_yield() cpu_relax() /* * parisc_requires_coherency() is used to identify the combined VIPT/PIPT diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index 5684e6872473..dac83fcb9445 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -404,8 +404,6 @@ static inline unsigned long __pack_fe01(unsigned int fpmode) #define cpu_relax() barrier() #endif -#define cpu_relax_yield() cpu_relax() - /* Check that a certain kernel stack pointer is valid in task_struct p */ int validate_sp(unsigned long sp, struct task_struct *p, unsigned long nbytes); diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 9e32f25bdea3..9d3a21aedc97 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -234,6 +234,7 @@ static inline unsigned short stap(void) /* * Give up the time slice of the virtual PU. */ +#define cpu_relax_yield cpu_relax_yield void cpu_relax_yield(void); #define cpu_relax() barrier() diff --git a/arch/score/include/asm/processor.h b/arch/score/include/asm/processor.h index a1e97c063ed4..d9a922d8711b 100644 --- a/arch/score/include/asm/processor.h +++ b/arch/score/include/asm/processor.h @@ -24,7 +24,6 @@ extern unsigned long get_wchan(struct task_struct *p); #define current_text_addr() ({ __label__ _l; _l: &&_l; }) #define cpu_relax() barrier() -#define cpu_relax_yield() cpu_relax() #define release_thread(thread) do {} while (0) /* diff --git a/arch/sh/include/asm/processor.h b/arch/sh/include/asm/processor.h index 9454ff1ad0d9..5addd69f70ef 100644 --- a/arch/sh/include/asm/processor.h +++ b/arch/sh/include/asm/processor.h @@ -97,7 +97,6 @@ extern struct sh_cpuinfo cpu_data[]; #define cpu_sleep() __asm__ __volatile__ ("sleep" : : : "memory") #define cpu_relax() barrier() -#define cpu_relax_yield() cpu_relax() void default_idle(void); void stop_this_cpu(void *); diff --git a/arch/sparc/include/asm/processor_32.h b/arch/sparc/include/asm/processor_32.h index fc32b7311481..365d4cb267b4 100644 --- a/arch/sparc/include/asm/processor_32.h +++ b/arch/sparc/include/asm/processor_32.h @@ -119,7 +119,6 @@ extern struct task_struct *last_task_used_math; int do_mathemu(struct pt_regs *regs, struct task_struct *fpt); #define cpu_relax() barrier() -#define cpu_relax_yield() cpu_relax() extern void (*sparc_idle)(void); diff --git a/arch/sparc/include/asm/processor_64.h b/arch/sparc/include/asm/processor_64.h index 12787dfeb11c..6448cfc8292f 100644 --- a/arch/sparc/include/asm/processor_64.h +++ b/arch/sparc/include/asm/processor_64.h @@ -216,7 +216,6 @@ unsigned long get_wchan(struct task_struct *task); "nop\n\t" \ ".previous" \ ::: "memory") -#define cpu_relax_yield() cpu_relax() /* Prefetch support. This is tuned for UltraSPARC-III and later. * UltraSPARC-I will treat these as nops, and UltraSPARC-II has diff --git a/arch/tile/include/asm/processor.h b/arch/tile/include/asm/processor.h index c1c228b16f7f..0bc9968b97a1 100644 --- a/arch/tile/include/asm/processor.h +++ b/arch/tile/include/asm/processor.h @@ -264,8 +264,6 @@ static inline void cpu_relax(void) barrier(); } -#define cpu_relax_yield() cpu_relax() - /* Info on this processor (see fs/proc/cpuinfo.c) */ struct seq_operations; extern const struct seq_operations cpuinfo_op; diff --git a/arch/unicore32/include/asm/processor.h b/arch/unicore32/include/asm/processor.h index eeefe7c529eb..4eaa42167667 100644 --- a/arch/unicore32/include/asm/processor.h +++ b/arch/unicore32/include/asm/processor.h @@ -71,7 +71,6 @@ extern void release_thread(struct task_struct *); unsigned long get_wchan(struct task_struct *p); #define cpu_relax() barrier() -#define cpu_relax_yield() cpu_relax() #define task_pt_regs(p) \ ((struct pt_regs *)(THREAD_START_SP + task_stack_page(p)) - 1) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 7513c996f673..c84605bb2a15 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -588,8 +588,6 @@ static __always_inline void cpu_relax(void) rep_nop(); } -#define cpu_relax_yield() cpu_relax() - /* Stop speculative execution and prefetching of modified code. */ static inline void sync_core(void) { diff --git a/arch/x86/um/asm/processor.h b/arch/x86/um/asm/processor.h index b4bd63b22b7f..c77db2288982 100644 --- a/arch/x86/um/asm/processor.h +++ b/arch/x86/um/asm/processor.h @@ -26,7 +26,6 @@ static inline void rep_nop(void) } #define cpu_relax() rep_nop() -#define cpu_relax_yield() cpu_relax() #define task_pt_regs(t) (&(t)->thread.regs) diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h index 7d8d6bececfc..86ffcd68e496 100644 --- a/arch/xtensa/include/asm/processor.h +++ b/arch/xtensa/include/asm/processor.h @@ -206,7 +206,6 @@ extern unsigned long get_wchan(struct task_struct *p); #define KSTK_ESP(tsk) (task_pt_regs(tsk)->areg[1]) #define cpu_relax() barrier() -#define cpu_relax_yield() cpu_relax() /* Special register access. */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 348f51b0ec92..c1aa3b02f6ac 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2444,6 +2444,10 @@ static inline void calc_load_enter_idle(void) { } static inline void calc_load_exit_idle(void) { } #endif /* CONFIG_NO_HZ_COMMON */ +#ifndef cpu_relax_yield +#define cpu_relax_yield() cpu_relax() +#endif + /* * Do not use outside of architecture code which knows its limitations. * -- cgit v1.2.3 From 41946c86876ea6a3e8857182356e6d76dbfe7fb6 Mon Sep 17 00:00:00 2001 From: Pan Xinhui Date: Wed, 2 Nov 2016 05:08:31 -0400 Subject: locking/core, powerpc: Implement vcpu_is_preempted(cpu) Optimize spinlock and mutex busy-loops by providing a vcpu_is_preempted(cpu) function on pSeries. We do not support PowerNV. All this can be achieved by using lppaca->yield_count, which is zero on PowerNV. Suggested-by: Boqun Feng Suggested-by: Peter Zijlstra (Intel) Signed-off-by: Pan Xinhui Signed-off-by: Peter Zijlstra (Intel) Cc: David.Laight@ACULAB.COM Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: benh@kernel.crashing.org Cc: borntraeger@de.ibm.com Cc: bsingharora@gmail.com Cc: dave@stgolabs.net Cc: jgross@suse.com Cc: kernellwp@gmail.com Cc: konrad.wilk@oracle.com Cc: linuxppc-dev@lists.ozlabs.org Cc: mpe@ellerman.id.au Cc: paulmck@linux.vnet.ibm.com Cc: paulus@samba.org Cc: pbonzini@redhat.com Cc: rkrcmar@redhat.com Cc: virtualization@lists.linux-foundation.org Cc: will.deacon@arm.com Cc: xen-devel-request@lists.xenproject.org Cc: xen-devel@lists.xenproject.org Link: http://lkml.kernel.org/r/1478077718-37424-5-git-send-email-xinhui.pan@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- arch/powerpc/include/asm/spinlock.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h index fa37fe93bc02..8c1b913de6d7 100644 --- a/arch/powerpc/include/asm/spinlock.h +++ b/arch/powerpc/include/asm/spinlock.h @@ -52,6 +52,14 @@ #define SYNC_IO #endif +#ifdef CONFIG_PPC_PSERIES +#define vcpu_is_preempted vcpu_is_preempted +static inline bool vcpu_is_preempted(int cpu) +{ + return !!(be32_to_cpu(lppaca_of(cpu).yield_count) & 1); +} +#endif + static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock) { return lock.slock == 0; -- cgit v1.2.3 From 760928c0dafc7d0faf0c0248e28e16d4c8dc7ad6 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Wed, 2 Nov 2016 05:08:32 -0400 Subject: locking/spinlocks, s390: Implement vcpu_is_preempted(cpu) This implements the s390 version for vcpu_is_preempted(cpu), by reworking the existing smp_vcpu_scheduled() function into arch_vcpu_is_preempted(). We can then also get rid of the local cpu_is_preempted() function by moving the CIF_ENABLED_WAIT test into arch_vcpu_is_preempted(). Signed-off-by: Christian Borntraeger Signed-off-by: Peter Zijlstra (Intel) Acked-by: Heiko Carstens Cc: David.Laight@ACULAB.COM Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: benh@kernel.crashing.org Cc: boqun.feng@gmail.com Cc: bsingharora@gmail.com Cc: dave@stgolabs.net Cc: jgross@suse.com Cc: kernellwp@gmail.com Cc: konrad.wilk@oracle.com Cc: linuxppc-dev@lists.ozlabs.org Cc: mpe@ellerman.id.au Cc: paulmck@linux.vnet.ibm.com Cc: paulus@samba.org Cc: pbonzini@redhat.com Cc: rkrcmar@redhat.com Cc: virtualization@lists.linux-foundation.org Cc: will.deacon@arm.com Cc: xen-devel-request@lists.xenproject.org Cc: xen-devel@lists.xenproject.org Link: http://lkml.kernel.org/r/1478077718-37424-6-git-send-email-xinhui.pan@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- arch/s390/include/asm/spinlock.h | 8 ++++++++ arch/s390/kernel/smp.c | 9 +++++++-- arch/s390/lib/spinlock.c | 25 ++++++++----------------- 3 files changed, 23 insertions(+), 19 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h index 7e9e09f600fa..7ecd8902a5c3 100644 --- a/arch/s390/include/asm/spinlock.h +++ b/arch/s390/include/asm/spinlock.h @@ -23,6 +23,14 @@ _raw_compare_and_swap(unsigned int *lock, unsigned int old, unsigned int new) return __sync_bool_compare_and_swap(lock, old, new); } +#ifndef CONFIG_SMP +static inline bool arch_vcpu_is_preempted(int cpu) { return false; } +#else +bool arch_vcpu_is_preempted(int cpu); +#endif + +#define vcpu_is_preempted arch_vcpu_is_preempted + /* * Simple spin lock operations. There are two variants, one clears IRQ's * on the local processor, one does not. diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 35531fe1c5ea..b988ed1d75ad 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -368,10 +368,15 @@ int smp_find_processor_id(u16 address) return -1; } -int smp_vcpu_scheduled(int cpu) +bool arch_vcpu_is_preempted(int cpu) { - return pcpu_running(pcpu_devices + cpu); + if (test_cpu_flag_of(CIF_ENABLED_WAIT, cpu)) + return false; + if (pcpu_running(pcpu_devices + cpu)) + return false; + return true; } +EXPORT_SYMBOL(arch_vcpu_is_preempted); void smp_yield_cpu(int cpu) { diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c index e5f50a7d2f4e..e48a48ec24bc 100644 --- a/arch/s390/lib/spinlock.c +++ b/arch/s390/lib/spinlock.c @@ -37,15 +37,6 @@ static inline void _raw_compare_and_delay(unsigned int *lock, unsigned int old) asm(".insn rsy,0xeb0000000022,%0,0,%1" : : "d" (old), "Q" (*lock)); } -static inline int cpu_is_preempted(int cpu) -{ - if (test_cpu_flag_of(CIF_ENABLED_WAIT, cpu)) - return 0; - if (smp_vcpu_scheduled(cpu)) - return 0; - return 1; -} - void arch_spin_lock_wait(arch_spinlock_t *lp) { unsigned int cpu = SPINLOCK_LOCKVAL; @@ -62,7 +53,7 @@ void arch_spin_lock_wait(arch_spinlock_t *lp) continue; } /* First iteration: check if the lock owner is running. */ - if (first_diag && cpu_is_preempted(~owner)) { + if (first_diag && arch_vcpu_is_preempted(~owner)) { smp_yield_cpu(~owner); first_diag = 0; continue; @@ -81,7 +72,7 @@ void arch_spin_lock_wait(arch_spinlock_t *lp) * yield the CPU unconditionally. For LPAR rely on the * sense running status. */ - if (!MACHINE_IS_LPAR || cpu_is_preempted(~owner)) { + if (!MACHINE_IS_LPAR || arch_vcpu_is_preempted(~owner)) { smp_yield_cpu(~owner); first_diag = 0; } @@ -108,7 +99,7 @@ void arch_spin_lock_wait_flags(arch_spinlock_t *lp, unsigned long flags) continue; } /* Check if the lock owner is running. */ - if (first_diag && cpu_is_preempted(~owner)) { + if (first_diag && arch_vcpu_is_preempted(~owner)) { smp_yield_cpu(~owner); first_diag = 0; continue; @@ -127,7 +118,7 @@ void arch_spin_lock_wait_flags(arch_spinlock_t *lp, unsigned long flags) * yield the CPU unconditionally. For LPAR rely on the * sense running status. */ - if (!MACHINE_IS_LPAR || cpu_is_preempted(~owner)) { + if (!MACHINE_IS_LPAR || arch_vcpu_is_preempted(~owner)) { smp_yield_cpu(~owner); first_diag = 0; } @@ -165,7 +156,7 @@ void _raw_read_lock_wait(arch_rwlock_t *rw) owner = 0; while (1) { if (count-- <= 0) { - if (owner && cpu_is_preempted(~owner)) + if (owner && arch_vcpu_is_preempted(~owner)) smp_yield_cpu(~owner); count = spin_retry; } @@ -211,7 +202,7 @@ void _raw_write_lock_wait(arch_rwlock_t *rw, unsigned int prev) owner = 0; while (1) { if (count-- <= 0) { - if (owner && cpu_is_preempted(~owner)) + if (owner && arch_vcpu_is_preempted(~owner)) smp_yield_cpu(~owner); count = spin_retry; } @@ -241,7 +232,7 @@ void _raw_write_lock_wait(arch_rwlock_t *rw) owner = 0; while (1) { if (count-- <= 0) { - if (owner && cpu_is_preempted(~owner)) + if (owner && arch_vcpu_is_preempted(~owner)) smp_yield_cpu(~owner); count = spin_retry; } @@ -285,7 +276,7 @@ void arch_lock_relax(unsigned int cpu) { if (!cpu) return; - if (MACHINE_IS_LPAR && !cpu_is_preempted(~cpu)) + if (MACHINE_IS_LPAR && !arch_vcpu_is_preempted(~cpu)) return; smp_yield_cpu(~cpu); } -- cgit v1.2.3 From 446f3dc8cc0af59259c6c8b898726fae7ed2c055 Mon Sep 17 00:00:00 2001 From: Pan Xinhui Date: Wed, 2 Nov 2016 05:08:33 -0400 Subject: locking/core, x86/paravirt: Implement vcpu_is_preempted(cpu) for KVM and Xen guests Optimize spinlock and mutex busy-loops by providing a vcpu_is_preempted(cpu) function on KVM and Xen platforms. Extend the pv_lock_ops interface accordingly and implement the callbacks on KVM and Xen. Signed-off-by: Pan Xinhui Signed-off-by: Peter Zijlstra (Intel) [ Translated to English. ] Acked-by: Paolo Bonzini Cc: David.Laight@ACULAB.COM Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: benh@kernel.crashing.org Cc: boqun.feng@gmail.com Cc: borntraeger@de.ibm.com Cc: bsingharora@gmail.com Cc: dave@stgolabs.net Cc: jgross@suse.com Cc: kernellwp@gmail.com Cc: konrad.wilk@oracle.com Cc: linuxppc-dev@lists.ozlabs.org Cc: mpe@ellerman.id.au Cc: paulmck@linux.vnet.ibm.com Cc: paulus@samba.org Cc: rkrcmar@redhat.com Cc: virtualization@lists.linux-foundation.org Cc: will.deacon@arm.com Cc: xen-devel-request@lists.xenproject.org Cc: xen-devel@lists.xenproject.org Link: http://lkml.kernel.org/r/1478077718-37424-7-git-send-email-xinhui.pan@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/paravirt_types.h | 2 ++ arch/x86/include/asm/spinlock.h | 8 ++++++++ arch/x86/kernel/paravirt-spinlocks.c | 6 ++++++ 3 files changed, 16 insertions(+) (limited to 'arch') diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 0f400c0e4979..38c3bb74740f 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -310,6 +310,8 @@ struct pv_lock_ops { void (*wait)(u8 *ptr, u8 val); void (*kick)(int cpu); + + bool (*vcpu_is_preempted)(int cpu); }; /* This contains all the paravirt structures: we get a convenient diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index 921bea7a2708..0526f596e399 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h @@ -26,6 +26,14 @@ extern struct static_key paravirt_ticketlocks_enabled; static __always_inline bool static_key_false(struct static_key *key); +#ifdef CONFIG_PARAVIRT_SPINLOCKS +#define vcpu_is_preempted vcpu_is_preempted +static inline bool vcpu_is_preempted(int cpu) +{ + return pv_lock_ops.vcpu_is_preempted(cpu); +} +#endif + #include /* diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c index 2c55a003b793..2f204dd552a4 100644 --- a/arch/x86/kernel/paravirt-spinlocks.c +++ b/arch/x86/kernel/paravirt-spinlocks.c @@ -21,12 +21,18 @@ bool pv_is_native_spin_unlock(void) __raw_callee_save___native_queued_spin_unlock; } +static bool native_vcpu_is_preempted(int cpu) +{ + return 0; +} + struct pv_lock_ops pv_lock_ops = { #ifdef CONFIG_SMP .queued_spin_lock_slowpath = native_queued_spin_lock_slowpath, .queued_spin_unlock = PV_CALLEE_SAVE(__native_queued_spin_unlock), .wait = paravirt_nop, .kick = paravirt_nop, + .vcpu_is_preempted = native_vcpu_is_preempted, #endif /* SMP */ }; EXPORT_SYMBOL(pv_lock_ops); -- cgit v1.2.3 From 0b9f6c4615c993d2b552e0d2bd1ade49b56e5beb Mon Sep 17 00:00:00 2001 From: Pan Xinhui Date: Wed, 2 Nov 2016 05:08:35 -0400 Subject: x86/kvm: Support the vCPU preemption check Support the vcpu_is_preempted() functionality under KVM. This will enhance lock performance on overcommitted hosts (more runnable vCPUs than physical CPUs in the system) as doing busy waits for preempted vCPUs will hurt system performance far worse than early yielding. Use struct kvm_steal_time::preempted to indicate that if a vCPU is running or not. Signed-off-by: Pan Xinhui Signed-off-by: Peter Zijlstra (Intel) Acked-by: Paolo Bonzini Cc: David.Laight@ACULAB.COM Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: benh@kernel.crashing.org Cc: boqun.feng@gmail.com Cc: borntraeger@de.ibm.com Cc: bsingharora@gmail.com Cc: dave@stgolabs.net Cc: jgross@suse.com Cc: kernellwp@gmail.com Cc: konrad.wilk@oracle.com Cc: linuxppc-dev@lists.ozlabs.org Cc: mpe@ellerman.id.au Cc: paulmck@linux.vnet.ibm.com Cc: paulus@samba.org Cc: rkrcmar@redhat.com Cc: virtualization@lists.linux-foundation.org Cc: will.deacon@arm.com Cc: xen-devel-request@lists.xenproject.org Cc: xen-devel@lists.xenproject.org Link: http://lkml.kernel.org/r/1478077718-37424-9-git-send-email-xinhui.pan@linux.vnet.ibm.com [ Typo fixes. ] Signed-off-by: Ingo Molnar --- arch/x86/include/uapi/asm/kvm_para.h | 4 +++- arch/x86/kvm/x86.c | 16 ++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h index 94dc8ca434e0..1421a6585126 100644 --- a/arch/x86/include/uapi/asm/kvm_para.h +++ b/arch/x86/include/uapi/asm/kvm_para.h @@ -45,7 +45,9 @@ struct kvm_steal_time { __u64 steal; __u32 version; __u32 flags; - __u32 pad[12]; + __u8 preempted; + __u8 u8_pad[3]; + __u32 pad[11]; }; #define KVM_STEAL_ALIGNMENT_BITS 5 diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 04c5d96b1d67..59c2d6f1b131 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2071,6 +2071,8 @@ static void record_steal_time(struct kvm_vcpu *vcpu) &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)))) return; + vcpu->arch.st.steal.preempted = 0; + if (vcpu->arch.st.steal.version & 1) vcpu->arch.st.steal.version += 1; /* first time write, random junk */ @@ -2826,8 +2828,22 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu); } +static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu) +{ + if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED)) + return; + + vcpu->arch.st.steal.preempted = 1; + + kvm_write_guest_offset_cached(vcpu->kvm, &vcpu->arch.st.stime, + &vcpu->arch.st.steal.preempted, + offsetof(struct kvm_steal_time, preempted), + sizeof(vcpu->arch.st.steal.preempted)); +} + void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { + kvm_steal_time_set_preempted(vcpu); kvm_x86_ops->vcpu_put(vcpu); kvm_put_guest_fpu(vcpu); vcpu->arch.last_host_tsc = rdtsc(); -- cgit v1.2.3 From 1885aa7041c9e801e5d5b093b9dad38937ca37f6 Mon Sep 17 00:00:00 2001 From: Pan Xinhui Date: Wed, 2 Nov 2016 05:08:36 -0400 Subject: x86/kvm: Support the vCPU preemption check Support the vcpu_is_preempted() functionality under KVM. This will enhance lock performance on overcommitted hosts (more runnable vCPUs than physical CPUs in the system) as doing busy waits for preempted vCPUs will hurt system performance far worse than early yielding. struct kvm_steal_time::preempted indicates that if one vCPU is running or not after commit "x86, kvm/x86.c: support vCPU preempted check". unix benchmark result: host: kernel 4.8.1, i5-4570, 4 cpus guest: kernel 4.8.1, 8 vcpus test-case after-patch before-patch Execl Throughput | 18307.9 lps | 11701.6 lps File Copy 1024 bufsize 2000 maxblocks | 1352407.3 KBps | 790418.9 KBps File Copy 256 bufsize 500 maxblocks | 367555.6 KBps | 222867.7 KBps File Copy 4096 bufsize 8000 maxblocks | 3675649.7 KBps | 1780614.4 KBps Pipe Throughput | 11872208.7 lps | 11855628.9 lps Pipe-based Context Switching | 1495126.5 lps | 1490533.9 lps Process Creation | 29881.2 lps | 28572.8 lps Shell Scripts (1 concurrent) | 23224.3 lpm | 22607.4 lpm Shell Scripts (8 concurrent) | 3531.4 lpm | 3211.9 lpm System Call Overhead | 10385653.0 lps | 10419979.0 lps Signed-off-by: Pan Xinhui Signed-off-by: Peter Zijlstra (Intel) Acked-by: Paolo Bonzini Cc: David.Laight@ACULAB.COM Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: benh@kernel.crashing.org Cc: boqun.feng@gmail.com Cc: borntraeger@de.ibm.com Cc: bsingharora@gmail.com Cc: dave@stgolabs.net Cc: jgross@suse.com Cc: kernellwp@gmail.com Cc: konrad.wilk@oracle.com Cc: linuxppc-dev@lists.ozlabs.org Cc: mpe@ellerman.id.au Cc: paulmck@linux.vnet.ibm.com Cc: paulus@samba.org Cc: rkrcmar@redhat.com Cc: virtualization@lists.linux-foundation.org Cc: will.deacon@arm.com Cc: xen-devel-request@lists.xenproject.org Cc: xen-devel@lists.xenproject.org Link: http://lkml.kernel.org/r/1478077718-37424-10-git-send-email-xinhui.pan@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/kvm.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index edbbfc854e39..0b48dd2c3554 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -415,6 +415,15 @@ void kvm_disable_steal_time(void) wrmsr(MSR_KVM_STEAL_TIME, 0, 0); } +static bool kvm_vcpu_is_preempted(int cpu) +{ + struct kvm_steal_time *src; + + src = &per_cpu(steal_time, cpu); + + return !!src->preempted; +} + #ifdef CONFIG_SMP static void __init kvm_smp_prepare_boot_cpu(void) { @@ -471,6 +480,9 @@ void __init kvm_guest_init(void) if (kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) { has_steal_clock = 1; pv_time_ops.steal_clock = kvm_steal_clock; +#ifdef CONFIG_PARAVIRT_SPINLOCKS + pv_lock_ops.vcpu_is_preempted = kvm_vcpu_is_preempted; +#endif } if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) -- cgit v1.2.3 From de7689cf8f3820b34088da3b22ce1a548dda2fc5 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Wed, 2 Nov 2016 05:08:37 -0400 Subject: x86/xen: Support the vCPU preemption check Support the vcpu_is_preempted() functionality under Xen. This will enhance lock performance on overcommitted hosts (more runnable vCPUs than physical CPUs in the system) as doing busy waits for preempted vCPUs will hurt system performance far worse than early yielding. A quick test (4 vCPUs on 1 physical CPU doing a parallel build job with "make -j 8") reduced system time by about 5% with this patch. Signed-off-by: Juergen Gross Signed-off-by: Pan Xinhui Signed-off-by: Peter Zijlstra (Intel) Cc: David.Laight@ACULAB.COM Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: benh@kernel.crashing.org Cc: boqun.feng@gmail.com Cc: borntraeger@de.ibm.com Cc: bsingharora@gmail.com Cc: dave@stgolabs.net Cc: kernellwp@gmail.com Cc: konrad.wilk@oracle.com Cc: linuxppc-dev@lists.ozlabs.org Cc: mpe@ellerman.id.au Cc: paulmck@linux.vnet.ibm.com Cc: paulus@samba.org Cc: pbonzini@redhat.com Cc: rkrcmar@redhat.com Cc: virtualization@lists.linux-foundation.org Cc: will.deacon@arm.com Cc: xen-devel-request@lists.xenproject.org Cc: xen-devel@lists.xenproject.org Link: http://lkml.kernel.org/r/1478077718-37424-11-git-send-email-xinhui.pan@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- arch/x86/xen/spinlock.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index 3d6e0064cbfc..74756bbd28c4 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c @@ -114,7 +114,6 @@ void xen_uninit_lock_cpu(int cpu) per_cpu(irq_name, cpu) = NULL; } - /* * Our init of PV spinlocks is split in two init functions due to us * using paravirt patching and jump labels patching and having to do @@ -137,6 +136,8 @@ void __init xen_init_spinlocks(void) pv_lock_ops.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock); pv_lock_ops.wait = xen_qlock_wait; pv_lock_ops.kick = xen_qlock_kick; + + pv_lock_ops.vcpu_is_preempted = xen_vcpu_stolen; } /* -- cgit v1.2.3 From 3cded41794818d788aa1dc028ede4a1c1222d937 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 15 Nov 2016 16:47:06 +0100 Subject: x86/paravirt: Optimize native pv_lock_ops.vcpu_is_preempted() Avoid the pointless function call to pv_lock_ops.vcpu_is_preempted() when a paravirt spinlock enabled kernel is ran on native hardware. Do this by patching out the CALL instruction with "XOR %RAX,%RAX" which has the same effect (0 return value). Signed-off-by: Peter Zijlstra (Intel) Cc: David.Laight@ACULAB.COM Cc: Linus Torvalds Cc: Pan Xinhui Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: benh@kernel.crashing.org Cc: boqun.feng@gmail.com Cc: borntraeger@de.ibm.com Cc: bsingharora@gmail.com Cc: dave@stgolabs.net Cc: jgross@suse.com Cc: kernellwp@gmail.com Cc: konrad.wilk@oracle.com Cc: mpe@ellerman.id.au Cc: paulmck@linux.vnet.ibm.com Cc: paulus@samba.org Cc: pbonzini@redhat.com Cc: rkrcmar@redhat.com Cc: will.deacon@arm.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/paravirt.h | 5 +++++ arch/x86/include/asm/paravirt_types.h | 2 +- arch/x86/include/asm/qspinlock.h | 6 ++++++ arch/x86/include/asm/spinlock.h | 8 -------- arch/x86/kernel/kvm.c | 25 +++++++++++++------------ arch/x86/kernel/paravirt-spinlocks.c | 14 ++++++++++---- arch/x86/kernel/paravirt_patch_32.c | 8 ++++++++ arch/x86/kernel/paravirt_patch_64.c | 8 ++++++++ arch/x86/xen/spinlock.c | 5 +++-- 9 files changed, 54 insertions(+), 27 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index ce932812f142..6108b1fada2b 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -678,6 +678,11 @@ static __always_inline void pv_kick(int cpu) PVOP_VCALL1(pv_lock_ops.kick, cpu); } +static __always_inline bool pv_vcpu_is_preempted(int cpu) +{ + return PVOP_CALLEE1(bool, pv_lock_ops.vcpu_is_preempted, cpu); +} + #endif /* SMP && PARAVIRT_SPINLOCKS */ #ifdef CONFIG_X86_32 diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 38c3bb74740f..2614bd7a7839 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -311,7 +311,7 @@ struct pv_lock_ops { void (*wait)(u8 *ptr, u8 val); void (*kick)(int cpu); - bool (*vcpu_is_preempted)(int cpu); + struct paravirt_callee_save vcpu_is_preempted; }; /* This contains all the paravirt structures: we get a convenient diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h index eaba08076030..c343ab52579f 100644 --- a/arch/x86/include/asm/qspinlock.h +++ b/arch/x86/include/asm/qspinlock.h @@ -32,6 +32,12 @@ static inline void queued_spin_unlock(struct qspinlock *lock) { pv_queued_spin_unlock(lock); } + +#define vcpu_is_preempted vcpu_is_preempted +static inline bool vcpu_is_preempted(int cpu) +{ + return pv_vcpu_is_preempted(cpu); +} #else static inline void queued_spin_unlock(struct qspinlock *lock) { diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index 0526f596e399..921bea7a2708 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h @@ -26,14 +26,6 @@ extern struct static_key paravirt_ticketlocks_enabled; static __always_inline bool static_key_false(struct static_key *key); -#ifdef CONFIG_PARAVIRT_SPINLOCKS -#define vcpu_is_preempted vcpu_is_preempted -static inline bool vcpu_is_preempted(int cpu) -{ - return pv_lock_ops.vcpu_is_preempted(cpu); -} -#endif - #include /* diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 0b48dd2c3554..52e90d6054fb 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -415,15 +415,6 @@ void kvm_disable_steal_time(void) wrmsr(MSR_KVM_STEAL_TIME, 0, 0); } -static bool kvm_vcpu_is_preempted(int cpu) -{ - struct kvm_steal_time *src; - - src = &per_cpu(steal_time, cpu); - - return !!src->preempted; -} - #ifdef CONFIG_SMP static void __init kvm_smp_prepare_boot_cpu(void) { @@ -480,9 +471,6 @@ void __init kvm_guest_init(void) if (kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) { has_steal_clock = 1; pv_time_ops.steal_clock = kvm_steal_clock; -#ifdef CONFIG_PARAVIRT_SPINLOCKS - pv_lock_ops.vcpu_is_preempted = kvm_vcpu_is_preempted; -#endif } if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) @@ -604,6 +592,14 @@ out: local_irq_restore(flags); } +__visible bool __kvm_vcpu_is_preempted(int cpu) +{ + struct kvm_steal_time *src = &per_cpu(steal_time, cpu); + + return !!src->preempted; +} +PV_CALLEE_SAVE_REGS_THUNK(__kvm_vcpu_is_preempted); + /* * Setup pv_lock_ops to exploit KVM_FEATURE_PV_UNHALT if present. */ @@ -620,6 +616,11 @@ void __init kvm_spinlock_init(void) pv_lock_ops.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock); pv_lock_ops.wait = kvm_wait; pv_lock_ops.kick = kvm_kick_cpu; + + if (kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) { + pv_lock_ops.vcpu_is_preempted = + PV_CALLEE_SAVE(__kvm_vcpu_is_preempted); + } } static __init int kvm_spinlock_init_jump(void) diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c index 2f204dd552a4..6d4bf812af45 100644 --- a/arch/x86/kernel/paravirt-spinlocks.c +++ b/arch/x86/kernel/paravirt-spinlocks.c @@ -12,7 +12,6 @@ __visible void __native_queued_spin_unlock(struct qspinlock *lock) { native_queued_spin_unlock(lock); } - PV_CALLEE_SAVE_REGS_THUNK(__native_queued_spin_unlock); bool pv_is_native_spin_unlock(void) @@ -21,9 +20,16 @@ bool pv_is_native_spin_unlock(void) __raw_callee_save___native_queued_spin_unlock; } -static bool native_vcpu_is_preempted(int cpu) +__visible bool __native_vcpu_is_preempted(int cpu) +{ + return false; +} +PV_CALLEE_SAVE_REGS_THUNK(__native_vcpu_is_preempted); + +bool pv_is_native_vcpu_is_preempted(void) { - return 0; + return pv_lock_ops.vcpu_is_preempted.func == + __raw_callee_save___native_vcpu_is_preempted; } struct pv_lock_ops pv_lock_ops = { @@ -32,7 +38,7 @@ struct pv_lock_ops pv_lock_ops = { .queued_spin_unlock = PV_CALLEE_SAVE(__native_queued_spin_unlock), .wait = paravirt_nop, .kick = paravirt_nop, - .vcpu_is_preempted = native_vcpu_is_preempted, + .vcpu_is_preempted = PV_CALLEE_SAVE(__native_vcpu_is_preempted), #endif /* SMP */ }; EXPORT_SYMBOL(pv_lock_ops); diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c index 920c6ae08592..ff03dbd28625 100644 --- a/arch/x86/kernel/paravirt_patch_32.c +++ b/arch/x86/kernel/paravirt_patch_32.c @@ -12,6 +12,7 @@ DEF_NATIVE(pv_cpu_ops, clts, "clts"); #if defined(CONFIG_PARAVIRT_SPINLOCKS) DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%eax)"); +DEF_NATIVE(pv_lock_ops, vcpu_is_preempted, "xor %eax, %eax"); #endif unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len) @@ -27,6 +28,7 @@ unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len) } extern bool pv_is_native_spin_unlock(void); +extern bool pv_is_native_vcpu_is_preempted(void); unsigned native_patch(u8 type, u16 clobbers, void *ibuf, unsigned long addr, unsigned len) @@ -56,6 +58,12 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf, end = end_pv_lock_ops_queued_spin_unlock; goto patch_site; } + case PARAVIRT_PATCH(pv_lock_ops.vcpu_is_preempted): + if (pv_is_native_vcpu_is_preempted()) { + start = start_pv_lock_ops_vcpu_is_preempted; + end = end_pv_lock_ops_vcpu_is_preempted; + goto patch_site; + } #endif default: diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c index bb3840cedb4f..e61dd9791f4f 100644 --- a/arch/x86/kernel/paravirt_patch_64.c +++ b/arch/x86/kernel/paravirt_patch_64.c @@ -21,6 +21,7 @@ DEF_NATIVE(, mov64, "mov %rdi, %rax"); #if defined(CONFIG_PARAVIRT_SPINLOCKS) DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%rdi)"); +DEF_NATIVE(pv_lock_ops, vcpu_is_preempted, "xor %rax, %rax"); #endif unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len) @@ -36,6 +37,7 @@ unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len) } extern bool pv_is_native_spin_unlock(void); +extern bool pv_is_native_vcpu_is_preempted(void); unsigned native_patch(u8 type, u16 clobbers, void *ibuf, unsigned long addr, unsigned len) @@ -68,6 +70,12 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf, end = end_pv_lock_ops_queued_spin_unlock; goto patch_site; } + case PARAVIRT_PATCH(pv_lock_ops.vcpu_is_preempted): + if (pv_is_native_vcpu_is_preempted()) { + start = start_pv_lock_ops_vcpu_is_preempted; + end = end_pv_lock_ops_vcpu_is_preempted; + goto patch_site; + } #endif default: diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index 74756bbd28c4..e8a9ea7d7a21 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c @@ -114,6 +114,8 @@ void xen_uninit_lock_cpu(int cpu) per_cpu(irq_name, cpu) = NULL; } +PV_CALLEE_SAVE_REGS_THUNK(xen_vcpu_stolen); + /* * Our init of PV spinlocks is split in two init functions due to us * using paravirt patching and jump labels patching and having to do @@ -136,8 +138,7 @@ void __init xen_init_spinlocks(void) pv_lock_ops.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock); pv_lock_ops.wait = xen_qlock_wait; pv_lock_ops.kick = xen_qlock_kick; - - pv_lock_ops.vcpu_is_preempted = xen_vcpu_stolen; + pv_lock_ops.vcpu_is_preempted = PV_CALLEE_SAVE(xen_vcpu_stolen); } /* -- cgit v1.2.3 From 45dbea5f55c05980cbb4c30047c71a820cd3f282 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 8 Dec 2016 16:42:14 +0100 Subject: x86/paravirt: Fix native_patch() While chasing a regression I noticed we potentially patch the wrong code in native_patch(). If we do not select the native code sequence, we must use the default patcher, not fall-through the switch case. Signed-off-by: Peter Zijlstra (Intel) Cc: Alok Kataria Cc: Borislav Petkov Cc: Chris Wright Cc: Jeremy Fitzhardinge Cc: Linus Torvalds Cc: Pan Xinhui Cc: Paolo Bonzini Cc: Peter Anvin Cc: Peter Zijlstra Cc: Rusty Russell Cc: Thomas Gleixner Cc: kernel test robot Fixes: 3cded4179481 ("x86/paravirt: Optimize native pv_lock_ops.vcpu_is_preempted()") Link: http://lkml.kernel.org/r/20161208154349.270616999@infradead.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/paravirt_patch_32.c | 4 ++++ arch/x86/kernel/paravirt_patch_64.c | 4 ++++ 2 files changed, 8 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c index ff03dbd28625..33cdec221f3d 100644 --- a/arch/x86/kernel/paravirt_patch_32.c +++ b/arch/x86/kernel/paravirt_patch_32.c @@ -58,15 +58,19 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf, end = end_pv_lock_ops_queued_spin_unlock; goto patch_site; } + goto patch_default; + case PARAVIRT_PATCH(pv_lock_ops.vcpu_is_preempted): if (pv_is_native_vcpu_is_preempted()) { start = start_pv_lock_ops_vcpu_is_preempted; end = end_pv_lock_ops_vcpu_is_preempted; goto patch_site; } + goto patch_default; #endif default: +patch_default: ret = paravirt_patch_default(type, clobbers, ibuf, addr, len); break; diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c index e61dd9791f4f..b0fceff502b3 100644 --- a/arch/x86/kernel/paravirt_patch_64.c +++ b/arch/x86/kernel/paravirt_patch_64.c @@ -70,15 +70,19 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf, end = end_pv_lock_ops_queued_spin_unlock; goto patch_site; } + goto patch_default; + case PARAVIRT_PATCH(pv_lock_ops.vcpu_is_preempted): if (pv_is_native_vcpu_is_preempted()) { start = start_pv_lock_ops_vcpu_is_preempted; end = end_pv_lock_ops_vcpu_is_preempted; goto patch_site; } + goto patch_default; #endif default: +patch_default: ret = paravirt_patch_default(type, clobbers, ibuf, addr, len); break; -- cgit v1.2.3 From 11f254dbb3a2e3f0d8552d0dd37f4faa432b6b16 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 8 Dec 2016 16:42:15 +0100 Subject: x86/paravirt: Fix bool return type for PVOP_CALL() Commit: 3cded4179481 ("x86/paravirt: Optimize native pv_lock_ops.vcpu_is_preempted()") introduced a paravirt op with bool return type [*] It turns out that the PVOP_CALL*() macros miscompile when rettype is bool. Code that looked like: 83 ef 01 sub $0x1,%edi ff 15 32 a0 d8 00 callq *0xd8a032(%rip) # ffffffff81e28120 84 c0 test %al,%al ended up looking like so after PVOP_CALL1() was applied: 83 ef 01 sub $0x1,%edi 48 63 ff movslq %edi,%rdi ff 14 25 20 81 e2 81 callq *0xffffffff81e28120 48 85 c0 test %rax,%rax Note how it tests the whole of %rax, even though a typical bool return function only sets %al, like: 0f 95 c0 setne %al c3 retq This is because ____PVOP_CALL() does: __ret = (rettype)__eax; and while regular integer type casts truncate the result, a cast to bool tests for any !0 value. Fix this by explicitly truncating to sizeof(rettype) before casting. [*] The actual bug should've been exposed in commit: 446f3dc8cc0a ("locking/core, x86/paravirt: Implement vcpu_is_preempted(cpu) for KVM and Xen guests") but that didn't properly implement the paravirt call. Reported-by: kernel test robot Signed-off-by: Peter Zijlstra (Intel) Cc: Alok Kataria Cc: Borislav Petkov Cc: Chris Wright Cc: Jeremy Fitzhardinge Cc: Linus Torvalds Cc: Pan Xinhui Cc: Paolo Bonzini Cc: Peter Anvin Cc: Peter Zijlstra Cc: Rusty Russell Cc: Thomas Gleixner Fixes: 3cded4179481 ("x86/paravirt: Optimize native pv_lock_ops.vcpu_is_preempted()") Link: http://lkml.kernel.org/r/20161208154349.346057680@infradead.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/paravirt_types.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 2614bd7a7839..3f2bc0f0d3e8 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -510,6 +510,18 @@ int paravirt_disable_iospace(void); #define PVOP_TEST_NULL(op) ((void)op) #endif +#define PVOP_RETMASK(rettype) \ + ({ unsigned long __mask = ~0UL; \ + switch (sizeof(rettype)) { \ + case 1: __mask = 0xffUL; break; \ + case 2: __mask = 0xffffUL; break; \ + case 4: __mask = 0xffffffffUL; break; \ + default: break; \ + } \ + __mask; \ + }) + + #define ____PVOP_CALL(rettype, op, clbr, call_clbr, extra_clbr, \ pre, post, ...) \ ({ \ @@ -537,7 +549,7 @@ int paravirt_disable_iospace(void); paravirt_clobber(clbr), \ ##__VA_ARGS__ \ : "memory", "cc" extra_clbr); \ - __ret = (rettype)__eax; \ + __ret = (rettype)(__eax & PVOP_RETMASK(rettype)); \ } \ __ret; \ }) -- cgit v1.2.3