diff options
author | Ingo Molnar <mingo@kernel.org> | 2024-02-14 10:45:07 +0100 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2024-02-14 10:45:07 +0100 |
commit | 03c11eb3b16dc0058589751dfd91f254be2be613 (patch) | |
tree | e5f2889212fec0bb0babdce9abd781ab487e246a /arch/x86/lib | |
parent | de8c6a352131f642b82474abe0cbb5dd26a7e081 (diff) | |
parent | 841c35169323cd833294798e58b9bf63fa4fa1de (diff) | |
download | lwn-03c11eb3b16dc0058589751dfd91f254be2be613.tar.gz lwn-03c11eb3b16dc0058589751dfd91f254be2be613.zip |
Merge tag 'v6.8-rc4' into x86/percpu, to resolve conflicts and refresh the branch
Conflicts:
arch/x86/include/asm/percpu.h
arch/x86/include/asm/text-patching.h
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch/x86/lib')
-rw-r--r-- | arch/x86/lib/cache-smp.c | 1 | ||||
-rw-r--r-- | arch/x86/lib/checksum_32.S | 2 | ||||
-rw-r--r-- | arch/x86/lib/clear_page_64.S | 2 | ||||
-rw-r--r-- | arch/x86/lib/cmpxchg8b_emu.S | 2 | ||||
-rw-r--r-- | arch/x86/lib/copy_mc.c | 8 | ||||
-rw-r--r-- | arch/x86/lib/copy_page_64.S | 2 | ||||
-rw-r--r-- | arch/x86/lib/copy_user_64.S | 2 | ||||
-rw-r--r-- | arch/x86/lib/copy_user_uncached_64.S | 2 | ||||
-rw-r--r-- | arch/x86/lib/csum-partial_64.c | 105 | ||||
-rw-r--r-- | arch/x86/lib/delay.c | 2 | ||||
-rw-r--r-- | arch/x86/lib/getuser.S | 26 | ||||
-rw-r--r-- | arch/x86/lib/hweight.S | 2 | ||||
-rw-r--r-- | arch/x86/lib/memcpy_64.S | 4 | ||||
-rw-r--r-- | arch/x86/lib/memmove_32.S | 2 | ||||
-rw-r--r-- | arch/x86/lib/memmove_64.S | 4 | ||||
-rw-r--r-- | arch/x86/lib/memset_64.S | 4 | ||||
-rw-r--r-- | arch/x86/lib/misc.c | 2 | ||||
-rw-r--r-- | arch/x86/lib/putuser.S | 27 | ||||
-rw-r--r-- | arch/x86/lib/retpoline.S | 188 |
19 files changed, 188 insertions, 199 deletions
diff --git a/arch/x86/lib/cache-smp.c b/arch/x86/lib/cache-smp.c index 7c48ff4ae8d1..7af743bd3b13 100644 --- a/arch/x86/lib/cache-smp.c +++ b/arch/x86/lib/cache-smp.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#include <asm/paravirt.h> #include <linux/smp.h> #include <linux/export.h> diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S index 23318c338db0..68f7fa3e1322 100644 --- a/arch/x86/lib/checksum_32.S +++ b/arch/x86/lib/checksum_32.S @@ -21,10 +21,10 @@ * converted to pure assembler */ +#include <linux/export.h> #include <linux/linkage.h> #include <asm/errno.h> #include <asm/asm.h> -#include <asm/export.h> #include <asm/nospec-branch.h> /* diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S index f74a3e704a1c..2760a15fbc00 100644 --- a/arch/x86/lib/clear_page_64.S +++ b/arch/x86/lib/clear_page_64.S @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ +#include <linux/export.h> #include <linux/linkage.h> #include <asm/asm.h> -#include <asm/export.h> /* * Most CPUs support enhanced REP MOVSB/STOSB instructions. It is diff --git a/arch/x86/lib/cmpxchg8b_emu.S b/arch/x86/lib/cmpxchg8b_emu.S index 8632d7dd1f00..1c96be769adc 100644 --- a/arch/x86/lib/cmpxchg8b_emu.S +++ b/arch/x86/lib/cmpxchg8b_emu.S @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ +#include <linux/export.h> #include <linux/linkage.h> -#include <asm/export.h> #include <asm/percpu.h> #include <asm/processor-flags.h> diff --git a/arch/x86/lib/copy_mc.c b/arch/x86/lib/copy_mc.c index 80efd45a7761..6e8b7e600def 100644 --- a/arch/x86/lib/copy_mc.c +++ b/arch/x86/lib/copy_mc.c @@ -70,23 +70,23 @@ unsigned long __must_check copy_mc_to_kernel(void *dst, const void *src, unsigne } EXPORT_SYMBOL_GPL(copy_mc_to_kernel); -unsigned long __must_check copy_mc_to_user(void *dst, const void *src, unsigned len) +unsigned long __must_check copy_mc_to_user(void __user *dst, const void *src, unsigned len) { unsigned long ret; if (copy_mc_fragile_enabled) { __uaccess_begin(); - ret = copy_mc_fragile(dst, src, len); + ret = copy_mc_fragile((__force void *)dst, src, len); __uaccess_end(); return ret; } if (static_cpu_has(X86_FEATURE_ERMS)) { __uaccess_begin(); - ret = copy_mc_enhanced_fast_string(dst, src, len); + ret = copy_mc_enhanced_fast_string((__force void *)dst, src, len); __uaccess_end(); return ret; } - return copy_user_generic(dst, src, len); + return copy_user_generic((__force void *)dst, src, len); } diff --git a/arch/x86/lib/copy_page_64.S b/arch/x86/lib/copy_page_64.S index 30ea644bf446..d6ae793d08fa 100644 --- a/arch/x86/lib/copy_page_64.S +++ b/arch/x86/lib/copy_page_64.S @@ -1,10 +1,10 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* Written 2003 by Andi Kleen, based on a kernel by Evandro Menezes */ +#include <linux/export.h> #include <linux/linkage.h> #include <asm/cpufeatures.h> #include <asm/alternative.h> -#include <asm/export.h> /* * Some CPUs run faster using the string copy instructions (sane microcode). diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index 0a81aafed7f8..fc9fb5d06174 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S @@ -6,11 +6,11 @@ * Functions to copy from and to user space. */ +#include <linux/export.h> #include <linux/linkage.h> #include <asm/cpufeatures.h> #include <asm/alternative.h> #include <asm/asm.h> -#include <asm/export.h> /* * rep_movs_alternative - memory copy with exception handling. diff --git a/arch/x86/lib/copy_user_uncached_64.S b/arch/x86/lib/copy_user_uncached_64.S index 5c5f38d32672..2918e36eece2 100644 --- a/arch/x86/lib/copy_user_uncached_64.S +++ b/arch/x86/lib/copy_user_uncached_64.S @@ -3,9 +3,9 @@ * Copyright 2023 Linus Torvalds <torvalds@linux-foundation.org> */ +#include <linux/export.h> #include <linux/linkage.h> #include <asm/asm.h> -#include <asm/export.h> /* * copy_user_nocache - Uncached memory copy with exception handling diff --git a/arch/x86/lib/csum-partial_64.c b/arch/x86/lib/csum-partial_64.c index cea25ca8b8cf..c9dae65ac01b 100644 --- a/arch/x86/lib/csum-partial_64.c +++ b/arch/x86/lib/csum-partial_64.c @@ -11,26 +11,23 @@ #include <asm/checksum.h> #include <asm/word-at-a-time.h> -static inline unsigned short from32to16(unsigned a) +static inline __wsum csum_finalize_sum(u64 temp64) { - unsigned short b = a >> 16; - asm("addw %w2,%w0\n\t" - "adcw $0,%w0\n" - : "=r" (b) - : "0" (b), "r" (a)); - return b; + return (__force __wsum)((temp64 + ror64(temp64, 32)) >> 32); } -static inline __wsum csum_tail(u64 temp64, int odd) +static inline unsigned long update_csum_40b(unsigned long sum, const unsigned long m[5]) { - unsigned int result; - - result = add32_with_carry(temp64 >> 32, temp64 & 0xffffffff); - if (unlikely(odd)) { - result = from32to16(result); - result = ((result >> 8) & 0xff) | ((result & 0xff) << 8); - } - return (__force __wsum)result; + asm("addq %1,%0\n\t" + "adcq %2,%0\n\t" + "adcq %3,%0\n\t" + "adcq %4,%0\n\t" + "adcq %5,%0\n\t" + "adcq $0,%0" + :"+r" (sum) + :"m" (m[0]), "m" (m[1]), "m" (m[2]), + "m" (m[3]), "m" (m[4])); + return sum; } /* @@ -47,64 +44,32 @@ static inline __wsum csum_tail(u64 temp64, int odd) __wsum csum_partial(const void *buff, int len, __wsum sum) { u64 temp64 = (__force u64)sum; - unsigned odd; - odd = 1 & (unsigned long) buff; - if (unlikely(odd)) { - if (unlikely(len == 0)) - return sum; - temp64 = ror32((__force u32)sum, 8); - temp64 += (*(unsigned char *)buff << 8); - len--; - buff++; + /* Do two 40-byte chunks in parallel to get better ILP */ + if (likely(len >= 80)) { + u64 temp64_2 = 0; + do { + temp64 = update_csum_40b(temp64, buff); + temp64_2 = update_csum_40b(temp64_2, buff + 40); + buff += 80; + len -= 80; + } while (len >= 80); + + asm("addq %1,%0\n\t" + "adcq $0,%0" + :"+r" (temp64): "r" (temp64_2)); } /* - * len == 40 is the hot case due to IPv6 headers, but annotating it likely() - * has noticeable negative affect on codegen for all other cases with - * minimal performance benefit here. + * len == 40 is the hot case due to IPv6 headers, so return + * early for that exact case without checking the tail bytes. */ - if (len == 40) { - asm("addq 0*8(%[src]),%[res]\n\t" - "adcq 1*8(%[src]),%[res]\n\t" - "adcq 2*8(%[src]),%[res]\n\t" - "adcq 3*8(%[src]),%[res]\n\t" - "adcq 4*8(%[src]),%[res]\n\t" - "adcq $0,%[res]" - : [res] "+r"(temp64) - : [src] "r"(buff), "m"(*(const char(*)[40])buff)); - return csum_tail(temp64, odd); - } - if (unlikely(len >= 64)) { - /* - * Extra accumulators for better ILP in the loop. - */ - u64 tmp_accum, tmp_carries; - - asm("xorl %k[tmp_accum],%k[tmp_accum]\n\t" - "xorl %k[tmp_carries],%k[tmp_carries]\n\t" - "subl $64, %[len]\n\t" - "1:\n\t" - "addq 0*8(%[src]),%[res]\n\t" - "adcq 1*8(%[src]),%[res]\n\t" - "adcq 2*8(%[src]),%[res]\n\t" - "adcq 3*8(%[src]),%[res]\n\t" - "adcl $0,%k[tmp_carries]\n\t" - "addq 4*8(%[src]),%[tmp_accum]\n\t" - "adcq 5*8(%[src]),%[tmp_accum]\n\t" - "adcq 6*8(%[src]),%[tmp_accum]\n\t" - "adcq 7*8(%[src]),%[tmp_accum]\n\t" - "adcl $0,%k[tmp_carries]\n\t" - "addq $64, %[src]\n\t" - "subl $64, %[len]\n\t" - "jge 1b\n\t" - "addq %[tmp_accum],%[res]\n\t" - "adcq %[tmp_carries],%[res]\n\t" - "adcq $0,%[res]" - : [tmp_accum] "=&r"(tmp_accum), - [tmp_carries] "=&r"(tmp_carries), [res] "+r"(temp64), - [len] "+r"(len), [src] "+r"(buff) - : "m"(*(const char *)buff)); + if (len >= 40) { + temp64 = update_csum_40b(temp64, buff); + len -= 40; + if (!len) + return csum_finalize_sum(temp64); + buff += 40; } if (len & 32) { @@ -143,7 +108,7 @@ __wsum csum_partial(const void *buff, int len, __wsum sum) : [res] "+r"(temp64) : [trail] "r"(trail)); } - return csum_tail(temp64, odd); + return csum_finalize_sum(temp64); } EXPORT_SYMBOL(csum_partial); diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c index 0e65d00e2339..23f81ca3f06b 100644 --- a/arch/x86/lib/delay.c +++ b/arch/x86/lib/delay.c @@ -128,7 +128,7 @@ static void delay_halt_mwaitx(u64 unused, u64 cycles) delay = min_t(u64, MWAITX_MAX_WAIT_CYCLES, cycles); /* - * Use cpu_tss_rw as a cacheline-aligned, seldomly accessed per-cpu + * Use cpu_tss_rw as a cacheline-aligned, seldom accessed per-cpu * variable as the monitor target. */ __monitorx(raw_cpu_ptr(&cpu_tss_rw), 0, 0); diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S index 9c63713477bb..10d5ed8b5990 100644 --- a/arch/x86/lib/getuser.S +++ b/arch/x86/lib/getuser.S @@ -26,6 +26,7 @@ * as they get called from within inline assembly. */ +#include <linux/export.h> #include <linux/linkage.h> #include <asm/page_types.h> #include <asm/errno.h> @@ -33,7 +34,6 @@ #include <asm/thread_info.h> #include <asm/asm.h> #include <asm/smap.h> -#include <asm/export.h> #define ASM_BARRIER_NOSPEC ALTERNATIVE "", "lfence", X86_FEATURE_LFENCE_RDTSC @@ -163,23 +163,23 @@ SYM_CODE_END(__get_user_8_handle_exception) #endif /* get_user */ - _ASM_EXTABLE(1b, __get_user_handle_exception) - _ASM_EXTABLE(2b, __get_user_handle_exception) - _ASM_EXTABLE(3b, __get_user_handle_exception) + _ASM_EXTABLE_UA(1b, __get_user_handle_exception) + _ASM_EXTABLE_UA(2b, __get_user_handle_exception) + _ASM_EXTABLE_UA(3b, __get_user_handle_exception) #ifdef CONFIG_X86_64 - _ASM_EXTABLE(4b, __get_user_handle_exception) + _ASM_EXTABLE_UA(4b, __get_user_handle_exception) #else - _ASM_EXTABLE(4b, __get_user_8_handle_exception) - _ASM_EXTABLE(5b, __get_user_8_handle_exception) + _ASM_EXTABLE_UA(4b, __get_user_8_handle_exception) + _ASM_EXTABLE_UA(5b, __get_user_8_handle_exception) #endif /* __get_user */ - _ASM_EXTABLE(6b, __get_user_handle_exception) - _ASM_EXTABLE(7b, __get_user_handle_exception) - _ASM_EXTABLE(8b, __get_user_handle_exception) + _ASM_EXTABLE_UA(6b, __get_user_handle_exception) + _ASM_EXTABLE_UA(7b, __get_user_handle_exception) + _ASM_EXTABLE_UA(8b, __get_user_handle_exception) #ifdef CONFIG_X86_64 - _ASM_EXTABLE(9b, __get_user_handle_exception) + _ASM_EXTABLE_UA(9b, __get_user_handle_exception) #else - _ASM_EXTABLE(9b, __get_user_8_handle_exception) - _ASM_EXTABLE(10b, __get_user_8_handle_exception) + _ASM_EXTABLE_UA(9b, __get_user_8_handle_exception) + _ASM_EXTABLE_UA(10b, __get_user_8_handle_exception) #endif diff --git a/arch/x86/lib/hweight.S b/arch/x86/lib/hweight.S index 0a152e51d3f5..774bdf3e6f0a 100644 --- a/arch/x86/lib/hweight.S +++ b/arch/x86/lib/hweight.S @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#include <linux/export.h> #include <linux/linkage.h> -#include <asm/export.h> #include <asm/asm.h> diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index 8f95fb267caa..0ae2e1712e2e 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S @@ -1,12 +1,12 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* Copyright 2002 Andi Kleen */ +#include <linux/export.h> #include <linux/linkage.h> #include <linux/cfi_types.h> #include <asm/errno.h> #include <asm/cpufeatures.h> #include <asm/alternative.h> -#include <asm/export.h> .section .noinstr.text, "ax" @@ -40,7 +40,7 @@ SYM_TYPED_FUNC_START(__memcpy) SYM_FUNC_END(__memcpy) EXPORT_SYMBOL(__memcpy) -SYM_FUNC_ALIAS(memcpy, __memcpy) +SYM_FUNC_ALIAS_MEMFUNC(memcpy, __memcpy) EXPORT_SYMBOL(memcpy) SYM_FUNC_START_LOCAL(memcpy_orig) diff --git a/arch/x86/lib/memmove_32.S b/arch/x86/lib/memmove_32.S index 0588b2c0fc95..35010ba3dd6f 100644 --- a/arch/x86/lib/memmove_32.S +++ b/arch/x86/lib/memmove_32.S @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#include <linux/export.h> #include <linux/linkage.h> -#include <asm/export.h> SYM_FUNC_START(memmove) /* diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S index 0559b206fb11..1b60ae81ecd8 100644 --- a/arch/x86/lib/memmove_64.S +++ b/arch/x86/lib/memmove_64.S @@ -6,10 +6,10 @@ * This assembly file is re-written from memmove_64.c file. * - Copyright 2011 Fenghua Yu <fenghua.yu@intel.com> */ +#include <linux/export.h> #include <linux/linkage.h> #include <asm/cpufeatures.h> #include <asm/alternative.h> -#include <asm/export.h> #undef memmove @@ -212,5 +212,5 @@ SYM_FUNC_START(__memmove) SYM_FUNC_END(__memmove) EXPORT_SYMBOL(__memmove) -SYM_FUNC_ALIAS(memmove, __memmove) +SYM_FUNC_ALIAS_MEMFUNC(memmove, __memmove) EXPORT_SYMBOL(memmove) diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S index 7c59a704c458..0199d56cb479 100644 --- a/arch/x86/lib/memset_64.S +++ b/arch/x86/lib/memset_64.S @@ -1,10 +1,10 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* Copyright 2002 Andi Kleen, SuSE Labs */ +#include <linux/export.h> #include <linux/linkage.h> #include <asm/cpufeatures.h> #include <asm/alternative.h> -#include <asm/export.h> .section .noinstr.text, "ax" @@ -40,7 +40,7 @@ SYM_FUNC_START(__memset) SYM_FUNC_END(__memset) EXPORT_SYMBOL(__memset) -SYM_FUNC_ALIAS(memset, __memset) +SYM_FUNC_ALIAS_MEMFUNC(memset, __memset) EXPORT_SYMBOL(memset) SYM_FUNC_START_LOCAL(memset_orig) diff --git a/arch/x86/lib/misc.c b/arch/x86/lib/misc.c index 92cd8ecc3a2c..40b81c338ae5 100644 --- a/arch/x86/lib/misc.c +++ b/arch/x86/lib/misc.c @@ -8,7 +8,7 @@ */ int num_digits(int val) { - int m = 10; + long long m = 10; int d = 1; if (val < 0) { diff --git a/arch/x86/lib/putuser.S b/arch/x86/lib/putuser.S index 1451e0c4ae22..975c9c18263d 100644 --- a/arch/x86/lib/putuser.S +++ b/arch/x86/lib/putuser.S @@ -11,13 +11,12 @@ * return an error value in addition to the "real" * return value. */ +#include <linux/export.h> #include <linux/linkage.h> #include <asm/thread_info.h> #include <asm/errno.h> #include <asm/asm.h> #include <asm/smap.h> -#include <asm/export.h> - /* * __put_user_X @@ -56,7 +55,6 @@ SYM_FUNC_END(__put_user_1) EXPORT_SYMBOL(__put_user_1) SYM_FUNC_START(__put_user_nocheck_1) - ENDBR ASM_STAC 2: movb %al,(%_ASM_CX) xor %ecx,%ecx @@ -76,7 +74,6 @@ SYM_FUNC_END(__put_user_2) EXPORT_SYMBOL(__put_user_2) SYM_FUNC_START(__put_user_nocheck_2) - ENDBR ASM_STAC 4: movw %ax,(%_ASM_CX) xor %ecx,%ecx @@ -96,7 +93,6 @@ SYM_FUNC_END(__put_user_4) EXPORT_SYMBOL(__put_user_4) SYM_FUNC_START(__put_user_nocheck_4) - ENDBR ASM_STAC 6: movl %eax,(%_ASM_CX) xor %ecx,%ecx @@ -119,7 +115,6 @@ SYM_FUNC_END(__put_user_8) EXPORT_SYMBOL(__put_user_8) SYM_FUNC_START(__put_user_nocheck_8) - ENDBR ASM_STAC 9: mov %_ASM_AX,(%_ASM_CX) #ifdef CONFIG_X86_32 @@ -138,15 +133,15 @@ SYM_CODE_START_LOCAL(__put_user_handle_exception) RET SYM_CODE_END(__put_user_handle_exception) - _ASM_EXTABLE(1b, __put_user_handle_exception) - _ASM_EXTABLE(2b, __put_user_handle_exception) - _ASM_EXTABLE(3b, __put_user_handle_exception) - _ASM_EXTABLE(4b, __put_user_handle_exception) - _ASM_EXTABLE(5b, __put_user_handle_exception) - _ASM_EXTABLE(6b, __put_user_handle_exception) - _ASM_EXTABLE(7b, __put_user_handle_exception) - _ASM_EXTABLE(9b, __put_user_handle_exception) + _ASM_EXTABLE_UA(1b, __put_user_handle_exception) + _ASM_EXTABLE_UA(2b, __put_user_handle_exception) + _ASM_EXTABLE_UA(3b, __put_user_handle_exception) + _ASM_EXTABLE_UA(4b, __put_user_handle_exception) + _ASM_EXTABLE_UA(5b, __put_user_handle_exception) + _ASM_EXTABLE_UA(6b, __put_user_handle_exception) + _ASM_EXTABLE_UA(7b, __put_user_handle_exception) + _ASM_EXTABLE_UA(9b, __put_user_handle_exception) #ifdef CONFIG_X86_32 - _ASM_EXTABLE(8b, __put_user_handle_exception) - _ASM_EXTABLE(10b, __put_user_handle_exception) + _ASM_EXTABLE_UA(8b, __put_user_handle_exception) + _ASM_EXTABLE_UA(10b, __put_user_handle_exception) #endif diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index cd86aeb5fdd3..7b2589877d06 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -1,12 +1,12 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#include <linux/export.h> #include <linux/stringify.h> #include <linux/linkage.h> #include <asm/dwarf2.h> #include <asm/cpufeatures.h> #include <asm/alternative.h> #include <asm/asm-offsets.h> -#include <asm/export.h> #include <asm/nospec-branch.h> #include <asm/unwind_hints.h> #include <asm/percpu.h> @@ -126,11 +126,19 @@ SYM_CODE_END(__x86_indirect_jump_thunk_array) #include <asm/GEN-for-each-reg.h> #undef GEN #endif + +#ifdef CONFIG_RETHUNK + /* - * This function name is magical and is used by -mfunction-return=thunk-extern - * for the compiler to generate JMPs to it. + * Be careful here: that label cannot really be removed because in + * some configurations and toolchains, the JMP __x86_return_thunk the + * compiler issues is either a short one or the compiler doesn't use + * relocations for same-section JMPs and that breaks the returns + * detection logic in apply_returns() and in objtool. */ -#ifdef CONFIG_RETHUNK + .section .text..__x86.return_thunk + +#ifdef CONFIG_CPU_SRSO /* * srso_alias_untrain_ret() and srso_alias_safe_ret() are placed at @@ -147,29 +155,18 @@ SYM_CODE_END(__x86_indirect_jump_thunk_array) * * As a result, srso_alias_safe_ret() becomes a safe return. */ -#ifdef CONFIG_CPU_SRSO - .section .text..__x86.rethunk_untrain - -SYM_START(srso_alias_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE) + .pushsection .text..__x86.rethunk_untrain +SYM_CODE_START_NOALIGN(srso_alias_untrain_ret) UNWIND_HINT_FUNC ANNOTATE_NOENDBR ASM_NOP2 lfence jmp srso_alias_return_thunk SYM_FUNC_END(srso_alias_untrain_ret) -__EXPORT_THUNK(srso_alias_untrain_ret) - - .section .text..__x86.rethunk_safe -#else -/* dummy definition for alternatives */ -SYM_START(srso_alias_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE) - ANNOTATE_UNRET_SAFE - ret - int3 -SYM_FUNC_END(srso_alias_untrain_ret) -#endif + .popsection -SYM_START(srso_alias_safe_ret, SYM_L_GLOBAL, SYM_A_NONE) + .pushsection .text..__x86.rethunk_safe +SYM_CODE_START_NOALIGN(srso_alias_safe_ret) lea 8(%_ASM_SP), %_ASM_SP UNWIND_HINT_FUNC ANNOTATE_UNRET_SAFE @@ -177,14 +174,63 @@ SYM_START(srso_alias_safe_ret, SYM_L_GLOBAL, SYM_A_NONE) int3 SYM_FUNC_END(srso_alias_safe_ret) - .section .text..__x86.return_thunk - -SYM_CODE_START(srso_alias_return_thunk) +SYM_CODE_START_NOALIGN(srso_alias_return_thunk) UNWIND_HINT_FUNC ANNOTATE_NOENDBR call srso_alias_safe_ret ud2 SYM_CODE_END(srso_alias_return_thunk) + .popsection + +/* + * SRSO untraining sequence for Zen1/2, similar to retbleed_untrain_ret() + * above. On kernel entry, srso_untrain_ret() is executed which is a + * + * movabs $0xccccc30824648d48,%rax + * + * and when the return thunk executes the inner label srso_safe_ret() + * later, it is a stack manipulation and a RET which is mispredicted and + * thus a "safe" one to use. + */ + .align 64 + .skip 64 - (srso_safe_ret - srso_untrain_ret), 0xcc +SYM_CODE_START_LOCAL_NOALIGN(srso_untrain_ret) + ANNOTATE_NOENDBR + .byte 0x48, 0xb8 + +/* + * This forces the function return instruction to speculate into a trap + * (UD2 in srso_return_thunk() below). This RET will then mispredict + * and execution will continue at the return site read from the top of + * the stack. + */ +SYM_INNER_LABEL(srso_safe_ret, SYM_L_GLOBAL) + lea 8(%_ASM_SP), %_ASM_SP + ret + int3 + int3 + /* end of movabs */ + lfence + call srso_safe_ret + ud2 +SYM_CODE_END(srso_safe_ret) +SYM_FUNC_END(srso_untrain_ret) + +SYM_CODE_START(srso_return_thunk) + UNWIND_HINT_FUNC + ANNOTATE_NOENDBR + call srso_safe_ret + ud2 +SYM_CODE_END(srso_return_thunk) + +#define JMP_SRSO_UNTRAIN_RET "jmp srso_untrain_ret" +#define JMP_SRSO_ALIAS_UNTRAIN_RET "jmp srso_alias_untrain_ret" +#else /* !CONFIG_CPU_SRSO */ +#define JMP_SRSO_UNTRAIN_RET "ud2" +#define JMP_SRSO_ALIAS_UNTRAIN_RET "ud2" +#endif /* CONFIG_CPU_SRSO */ + +#ifdef CONFIG_CPU_UNRET_ENTRY /* * Some generic notes on the untraining sequences: @@ -216,7 +262,7 @@ SYM_CODE_END(srso_alias_return_thunk) */ .align 64 .skip 64 - (retbleed_return_thunk - retbleed_untrain_ret), 0xcc -SYM_START(retbleed_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE) +SYM_CODE_START_LOCAL_NOALIGN(retbleed_untrain_ret) ANNOTATE_NOENDBR /* * As executed from retbleed_untrain_ret, this is: @@ -264,72 +310,27 @@ SYM_CODE_END(retbleed_return_thunk) jmp retbleed_return_thunk int3 SYM_FUNC_END(retbleed_untrain_ret) -__EXPORT_THUNK(retbleed_untrain_ret) -/* - * SRSO untraining sequence for Zen1/2, similar to retbleed_untrain_ret() - * above. On kernel entry, srso_untrain_ret() is executed which is a - * - * movabs $0xccccc30824648d48,%rax - * - * and when the return thunk executes the inner label srso_safe_ret() - * later, it is a stack manipulation and a RET which is mispredicted and - * thus a "safe" one to use. - */ - .align 64 - .skip 64 - (srso_safe_ret - srso_untrain_ret), 0xcc -SYM_START(srso_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE) - ANNOTATE_NOENDBR - .byte 0x48, 0xb8 +#define JMP_RETBLEED_UNTRAIN_RET "jmp retbleed_untrain_ret" +#else /* !CONFIG_CPU_UNRET_ENTRY */ +#define JMP_RETBLEED_UNTRAIN_RET "ud2" +#endif /* CONFIG_CPU_UNRET_ENTRY */ -/* - * This forces the function return instruction to speculate into a trap - * (UD2 in srso_return_thunk() below). This RET will then mispredict - * and execution will continue at the return site read from the top of - * the stack. - */ -SYM_INNER_LABEL(srso_safe_ret, SYM_L_GLOBAL) - lea 8(%_ASM_SP), %_ASM_SP - ret - int3 - int3 - /* end of movabs */ - lfence - call srso_safe_ret - ud2 -SYM_CODE_END(srso_safe_ret) -SYM_FUNC_END(srso_untrain_ret) -__EXPORT_THUNK(srso_untrain_ret) - -SYM_CODE_START(srso_return_thunk) - UNWIND_HINT_FUNC - ANNOTATE_NOENDBR - call srso_safe_ret - ud2 -SYM_CODE_END(srso_return_thunk) +#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_SRSO) SYM_FUNC_START(entry_untrain_ret) - ALTERNATIVE_2 "jmp retbleed_untrain_ret", \ - "jmp srso_untrain_ret", X86_FEATURE_SRSO, \ - "jmp srso_alias_untrain_ret", X86_FEATURE_SRSO_ALIAS + ALTERNATIVE_2 JMP_RETBLEED_UNTRAIN_RET, \ + JMP_SRSO_UNTRAIN_RET, X86_FEATURE_SRSO, \ + JMP_SRSO_ALIAS_UNTRAIN_RET, X86_FEATURE_SRSO_ALIAS SYM_FUNC_END(entry_untrain_ret) __EXPORT_THUNK(entry_untrain_ret) -SYM_CODE_START(__x86_return_thunk) - UNWIND_HINT_FUNC - ANNOTATE_NOENDBR - ANNOTATE_UNRET_SAFE - ret - int3 -SYM_CODE_END(__x86_return_thunk) -EXPORT_SYMBOL(__x86_return_thunk) - -#endif /* CONFIG_RETHUNK */ +#endif /* CONFIG_CPU_UNRET_ENTRY || CONFIG_CPU_SRSO */ #ifdef CONFIG_CALL_DEPTH_TRACKING .align 64 -SYM_FUNC_START(__x86_return_skl) +SYM_FUNC_START(call_depth_return_thunk) ANNOTATE_NOENDBR /* * Keep the hotpath in a 16byte I-fetch for the non-debug @@ -356,6 +357,33 @@ SYM_FUNC_START(__x86_return_skl) ANNOTATE_UNRET_SAFE ret int3 -SYM_FUNC_END(__x86_return_skl) +SYM_FUNC_END(call_depth_return_thunk) #endif /* CONFIG_CALL_DEPTH_TRACKING */ + +/* + * This function name is magical and is used by -mfunction-return=thunk-extern + * for the compiler to generate JMPs to it. + * + * This code is only used during kernel boot or module init. All + * 'JMP __x86_return_thunk' sites are changed to something else by + * apply_returns(). + * + * This should be converted eventually to call a warning function which + * should scream loudly when the default return thunk is called after + * alternatives have been applied. + * + * That warning function cannot BUG() because the bug splat cannot be + * displayed in all possible configurations, leading to users not really + * knowing why the machine froze. + */ +SYM_CODE_START(__x86_return_thunk) + UNWIND_HINT_FUNC + ANNOTATE_NOENDBR + ANNOTATE_UNRET_SAFE + ret + int3 +SYM_CODE_END(__x86_return_thunk) +EXPORT_SYMBOL(__x86_return_thunk) + +#endif /* CONFIG_RETHUNK */ |