diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-03-11 19:53:15 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-03-11 19:53:15 -0700 |
commit | 685d98211273f60e38a6d361b62d7016c545297e (patch) | |
tree | 76c3be7af88578437c72325c322a52a12be3d05d /arch/x86/lib | |
parent | fcc196579aa1fc167d6778948bff69fae6116737 (diff) | |
parent | 35ce64922c8263448e58a2b9e8d15a64e11e9b2d (diff) | |
download | lwn-685d98211273f60e38a6d361b62d7016c545297e.tar.gz lwn-685d98211273f60e38a6d361b62d7016c545297e.zip |
Merge tag 'x86-core-2024-03-11' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull core x86 updates from Ingo Molnar:
- The biggest change is the rework of the percpu code, to support the
'Named Address Spaces' GCC feature, by Uros Bizjak:
- This allows C code to access GS and FS segment relative memory
via variables declared with such attributes, which allows the
compiler to better optimize those accesses than the previous
inline assembly code.
- The series also includes a number of micro-optimizations for
various percpu access methods, plus a number of cleanups of %gs
accesses in assembly code.
- These changes have been exposed to linux-next testing for the
last ~5 months, with no known regressions in this area.
- Fix/clean up __switch_to()'s broken but accidentally working handling
of FPU switching - which also generates better code
- Propagate more RIP-relative addressing in assembly code, to generate
slightly better code
- Rework the CPU mitigations Kconfig space to be less idiosyncratic, to
make it easier for distros to follow & maintain these options
- Rework the x86 idle code to cure RCU violations and to clean up the
logic
- Clean up the vDSO Makefile logic
- Misc cleanups and fixes
* tag 'x86-core-2024-03-11' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (52 commits)
x86/idle: Select idle routine only once
x86/idle: Let prefer_mwait_c1_over_halt() return bool
x86/idle: Cleanup idle_setup()
x86/idle: Clean up idle selection
x86/idle: Sanitize X86_BUG_AMD_E400 handling
sched/idle: Conditionally handle tick broadcast in default_idle_call()
x86: Increase brk randomness entropy for 64-bit systems
x86/vdso: Move vDSO to mmap region
x86/vdso/kbuild: Group non-standard build attributes and primary object file rules together
x86/vdso: Fix rethunk patching for vdso-image-{32,64}.o
x86/retpoline: Ensure default return thunk isn't used at runtime
x86/vdso: Use CONFIG_COMPAT_32 to specify vdso32
x86/vdso: Use $(addprefix ) instead of $(foreach )
x86/vdso: Simplify obj-y addition
x86/vdso: Consolidate targets and clean-files
x86/bugs: Rename CONFIG_RETHUNK => CONFIG_MITIGATION_RETHUNK
x86/bugs: Rename CONFIG_CPU_SRSO => CONFIG_MITIGATION_SRSO
x86/bugs: Rename CONFIG_CPU_IBRS_ENTRY => CONFIG_MITIGATION_IBRS_ENTRY
x86/bugs: Rename CONFIG_CPU_UNRET_ENTRY => CONFIG_MITIGATION_UNRET_ENTRY
x86/bugs: Rename CONFIG_SLS => CONFIG_MITIGATION_SLS
...
Diffstat (limited to 'arch/x86/lib')
-rw-r--r-- | arch/x86/lib/Makefile | 2 | ||||
-rw-r--r-- | arch/x86/lib/cmpxchg16b_emu.S | 12 | ||||
-rw-r--r-- | arch/x86/lib/cmpxchg8b_emu.S | 30 | ||||
-rw-r--r-- | arch/x86/lib/retpoline.S | 41 |
4 files changed, 47 insertions, 38 deletions
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index ea3a28e7b613..72cc9c90e9f3 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -49,7 +49,7 @@ lib-$(CONFIG_ARCH_HAS_COPY_MC) += copy_mc.o copy_mc_64.o lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o -lib-$(CONFIG_RETPOLINE) += retpoline.o +lib-$(CONFIG_MITIGATION_RETPOLINE) += retpoline.o obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o obj-y += iomem.o diff --git a/arch/x86/lib/cmpxchg16b_emu.S b/arch/x86/lib/cmpxchg16b_emu.S index 6962df315793..4fb44894ad87 100644 --- a/arch/x86/lib/cmpxchg16b_emu.S +++ b/arch/x86/lib/cmpxchg16b_emu.S @@ -23,14 +23,14 @@ SYM_FUNC_START(this_cpu_cmpxchg16b_emu) cli /* if (*ptr == old) */ - cmpq PER_CPU_VAR(0(%rsi)), %rax + cmpq __percpu (%rsi), %rax jne .Lnot_same - cmpq PER_CPU_VAR(8(%rsi)), %rdx + cmpq __percpu 8(%rsi), %rdx jne .Lnot_same /* *ptr = new */ - movq %rbx, PER_CPU_VAR(0(%rsi)) - movq %rcx, PER_CPU_VAR(8(%rsi)) + movq %rbx, __percpu (%rsi) + movq %rcx, __percpu 8(%rsi) /* set ZF in EFLAGS to indicate success */ orl $X86_EFLAGS_ZF, (%rsp) @@ -42,8 +42,8 @@ SYM_FUNC_START(this_cpu_cmpxchg16b_emu) /* *ptr != old */ /* old = *ptr */ - movq PER_CPU_VAR(0(%rsi)), %rax - movq PER_CPU_VAR(8(%rsi)), %rdx + movq __percpu (%rsi), %rax + movq __percpu 8(%rsi), %rdx /* clear ZF in EFLAGS to indicate failure */ andl $(~X86_EFLAGS_ZF), (%rsp) diff --git a/arch/x86/lib/cmpxchg8b_emu.S b/arch/x86/lib/cmpxchg8b_emu.S index 873e4ef23e49..1c96be769adc 100644 --- a/arch/x86/lib/cmpxchg8b_emu.S +++ b/arch/x86/lib/cmpxchg8b_emu.S @@ -24,12 +24,12 @@ SYM_FUNC_START(cmpxchg8b_emu) pushfl cli - cmpl 0(%esi), %eax + cmpl (%esi), %eax jne .Lnot_same cmpl 4(%esi), %edx jne .Lnot_same - movl %ebx, 0(%esi) + movl %ebx, (%esi) movl %ecx, 4(%esi) orl $X86_EFLAGS_ZF, (%esp) @@ -38,7 +38,7 @@ SYM_FUNC_START(cmpxchg8b_emu) RET .Lnot_same: - movl 0(%esi), %eax + movl (%esi), %eax movl 4(%esi), %edx andl $(~X86_EFLAGS_ZF), (%esp) @@ -53,18 +53,30 @@ EXPORT_SYMBOL(cmpxchg8b_emu) #ifndef CONFIG_UML +/* + * Emulate 'cmpxchg8b %fs:(%rsi)' + * + * Inputs: + * %esi : memory location to compare + * %eax : low 32 bits of old value + * %edx : high 32 bits of old value + * %ebx : low 32 bits of new value + * %ecx : high 32 bits of new value + * + * Notably this is not LOCK prefixed and is not safe against NMIs + */ SYM_FUNC_START(this_cpu_cmpxchg8b_emu) pushfl cli - cmpl PER_CPU_VAR(0(%esi)), %eax + cmpl __percpu (%esi), %eax jne .Lnot_same2 - cmpl PER_CPU_VAR(4(%esi)), %edx + cmpl __percpu 4(%esi), %edx jne .Lnot_same2 - movl %ebx, PER_CPU_VAR(0(%esi)) - movl %ecx, PER_CPU_VAR(4(%esi)) + movl %ebx, __percpu (%esi) + movl %ecx, __percpu 4(%esi) orl $X86_EFLAGS_ZF, (%esp) @@ -72,8 +84,8 @@ SYM_FUNC_START(this_cpu_cmpxchg8b_emu) RET .Lnot_same2: - movl PER_CPU_VAR(0(%esi)), %eax - movl PER_CPU_VAR(4(%esi)), %edx + movl __percpu (%esi), %eax + movl __percpu 4(%esi), %edx andl $(~X86_EFLAGS_ZF), (%esp) diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index 7b2589877d06..721b528da9ac 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -71,7 +71,7 @@ SYM_CODE_END(__x86_indirect_thunk_array) #include <asm/GEN-for-each-reg.h> #undef GEN -#ifdef CONFIG_CALL_DEPTH_TRACKING +#ifdef CONFIG_MITIGATION_CALL_DEPTH_TRACKING .macro CALL_THUNK reg .align RETPOLINE_THUNK_SIZE @@ -127,7 +127,7 @@ SYM_CODE_END(__x86_indirect_jump_thunk_array) #undef GEN #endif -#ifdef CONFIG_RETHUNK +#ifdef CONFIG_MITIGATION_RETHUNK /* * Be careful here: that label cannot really be removed because in @@ -138,7 +138,7 @@ SYM_CODE_END(__x86_indirect_jump_thunk_array) */ .section .text..__x86.return_thunk -#ifdef CONFIG_CPU_SRSO +#ifdef CONFIG_MITIGATION_SRSO /* * srso_alias_untrain_ret() and srso_alias_safe_ret() are placed at @@ -225,12 +225,12 @@ SYM_CODE_END(srso_return_thunk) #define JMP_SRSO_UNTRAIN_RET "jmp srso_untrain_ret" #define JMP_SRSO_ALIAS_UNTRAIN_RET "jmp srso_alias_untrain_ret" -#else /* !CONFIG_CPU_SRSO */ +#else /* !CONFIG_MITIGATION_SRSO */ #define JMP_SRSO_UNTRAIN_RET "ud2" #define JMP_SRSO_ALIAS_UNTRAIN_RET "ud2" -#endif /* CONFIG_CPU_SRSO */ +#endif /* CONFIG_MITIGATION_SRSO */ -#ifdef CONFIG_CPU_UNRET_ENTRY +#ifdef CONFIG_MITIGATION_UNRET_ENTRY /* * Some generic notes on the untraining sequences: @@ -312,11 +312,11 @@ SYM_CODE_END(retbleed_return_thunk) SYM_FUNC_END(retbleed_untrain_ret) #define JMP_RETBLEED_UNTRAIN_RET "jmp retbleed_untrain_ret" -#else /* !CONFIG_CPU_UNRET_ENTRY */ +#else /* !CONFIG_MITIGATION_UNRET_ENTRY */ #define JMP_RETBLEED_UNTRAIN_RET "ud2" -#endif /* CONFIG_CPU_UNRET_ENTRY */ +#endif /* CONFIG_MITIGATION_UNRET_ENTRY */ -#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_SRSO) +#if defined(CONFIG_MITIGATION_UNRET_ENTRY) || defined(CONFIG_MITIGATION_SRSO) SYM_FUNC_START(entry_untrain_ret) ALTERNATIVE_2 JMP_RETBLEED_UNTRAIN_RET, \ @@ -325,9 +325,9 @@ SYM_FUNC_START(entry_untrain_ret) SYM_FUNC_END(entry_untrain_ret) __EXPORT_THUNK(entry_untrain_ret) -#endif /* CONFIG_CPU_UNRET_ENTRY || CONFIG_CPU_SRSO */ +#endif /* CONFIG_MITIGATION_UNRET_ENTRY || CONFIG_MITIGATION_SRSO */ -#ifdef CONFIG_CALL_DEPTH_TRACKING +#ifdef CONFIG_MITIGATION_CALL_DEPTH_TRACKING .align 64 SYM_FUNC_START(call_depth_return_thunk) @@ -359,7 +359,7 @@ SYM_FUNC_START(call_depth_return_thunk) int3 SYM_FUNC_END(call_depth_return_thunk) -#endif /* CONFIG_CALL_DEPTH_TRACKING */ +#endif /* CONFIG_MITIGATION_CALL_DEPTH_TRACKING */ /* * This function name is magical and is used by -mfunction-return=thunk-extern @@ -369,21 +369,18 @@ SYM_FUNC_END(call_depth_return_thunk) * 'JMP __x86_return_thunk' sites are changed to something else by * apply_returns(). * - * This should be converted eventually to call a warning function which - * should scream loudly when the default return thunk is called after - * alternatives have been applied. - * - * That warning function cannot BUG() because the bug splat cannot be - * displayed in all possible configurations, leading to users not really - * knowing why the machine froze. + * The ALTERNATIVE below adds a really loud warning to catch the case + * where the insufficient default return thunk ends up getting used for + * whatever reason like miscompilation or failure of + * objtool/alternatives/etc to patch all the return sites. */ SYM_CODE_START(__x86_return_thunk) UNWIND_HINT_FUNC ANNOTATE_NOENDBR - ANNOTATE_UNRET_SAFE - ret + ALTERNATIVE __stringify(ANNOTATE_UNRET_SAFE; ret), \ + "jmp warn_thunk_thunk", X86_FEATURE_ALWAYS int3 SYM_CODE_END(__x86_return_thunk) EXPORT_SYMBOL(__x86_return_thunk) -#endif /* CONFIG_RETHUNK */ +#endif /* CONFIG_MITIGATION_RETHUNK */ |