diff options
Diffstat (limited to 'arch/x86/kernel/cpu/common.c')
-rw-r--r-- | arch/x86/kernel/cpu/common.c | 82 |
1 files changed, 56 insertions, 26 deletions
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index dbc6f066e231..6ef6ed9ccca6 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -430,7 +430,7 @@ void load_percpu_segment(int cpu) #ifdef CONFIG_X86_32 loadsegment(fs, __KERNEL_PERCPU); #else - loadsegment(gs, 0); + __loadsegment_simple(gs, 0); wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu)); #endif load_stack_canary_segment(); @@ -866,30 +866,34 @@ static void detect_nopl(struct cpuinfo_x86 *c) #else set_cpu_cap(c, X86_FEATURE_NOPL); #endif +} +static void detect_null_seg_behavior(struct cpuinfo_x86 *c) +{ +#ifdef CONFIG_X86_64 /* - * ESPFIX is a strange bug. All real CPUs have it. Paravirt - * systems that run Linux at CPL > 0 may or may not have the - * issue, but, even if they have the issue, there's absolutely - * nothing we can do about it because we can't use the real IRET - * instruction. + * Empirically, writing zero to a segment selector on AMD does + * not clear the base, whereas writing zero to a segment + * selector on Intel does clear the base. Intel's behavior + * allows slightly faster context switches in the common case + * where GS is unused by the prev and next threads. * - * NB: For the time being, only 32-bit kernels support - * X86_BUG_ESPFIX as such. 64-bit kernels directly choose - * whether to apply espfix using paravirt hooks. If any - * non-paravirt system ever shows up that does *not* have the - * ESPFIX issue, we can change this. + * Since neither vendor documents this anywhere that I can see, + * detect it directly instead of hardcoding the choice by + * vendor. + * + * I've designated AMD's behavior as the "bug" because it's + * counterintuitive and less friendly. */ -#ifdef CONFIG_X86_32 -#ifdef CONFIG_PARAVIRT - do { - extern void native_iret(void); - if (pv_cpu_ops.iret == native_iret) - set_cpu_bug(c, X86_BUG_ESPFIX); - } while (0); -#else - set_cpu_bug(c, X86_BUG_ESPFIX); -#endif + + unsigned long old_base, tmp; + rdmsrl(MSR_FS_BASE, old_base); + wrmsrl(MSR_FS_BASE, 1); + loadsegment(fs, 0); + rdmsrl(MSR_FS_BASE, tmp); + if (tmp != 0) + set_cpu_bug(c, X86_BUG_NULL_SEG); + wrmsrl(MSR_FS_BASE, old_base); #endif } @@ -925,6 +929,33 @@ static void generic_identify(struct cpuinfo_x86 *c) get_model_name(c); /* Default name */ detect_nopl(c); + + detect_null_seg_behavior(c); + + /* + * ESPFIX is a strange bug. All real CPUs have it. Paravirt + * systems that run Linux at CPL > 0 may or may not have the + * issue, but, even if they have the issue, there's absolutely + * nothing we can do about it because we can't use the real IRET + * instruction. + * + * NB: For the time being, only 32-bit kernels support + * X86_BUG_ESPFIX as such. 64-bit kernels directly choose + * whether to apply espfix using paravirt hooks. If any + * non-paravirt system ever shows up that does *not* have the + * ESPFIX issue, we can change this. + */ +#ifdef CONFIG_X86_32 +# ifdef CONFIG_PARAVIRT + do { + extern void native_iret(void); + if (pv_cpu_ops.iret == native_iret) + set_cpu_bug(c, X86_BUG_ESPFIX); + } while (0); +# else + set_cpu_bug(c, X86_BUG_ESPFIX); +# endif +#endif } static void x86_init_cache_qos(struct cpuinfo_x86 *c) @@ -1080,12 +1111,12 @@ void enable_sep_cpu(void) struct tss_struct *tss; int cpu; + if (!boot_cpu_has(X86_FEATURE_SEP)) + return; + cpu = get_cpu(); tss = &per_cpu(cpu_tss, cpu); - if (!boot_cpu_has(X86_FEATURE_SEP)) - goto out; - /* * We cache MSR_IA32_SYSENTER_CS's value in the TSS's ss1 field -- * see the big comment in struct x86_hw_tss's definition. @@ -1100,7 +1131,6 @@ void enable_sep_cpu(void) wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0); -out: put_cpu(); } #endif @@ -1532,7 +1562,7 @@ void cpu_init(void) pr_info("Initializing CPU#%d\n", cpu); if (cpu_feature_enabled(X86_FEATURE_VME) || - cpu_has_tsc || + boot_cpu_has(X86_FEATURE_TSC) || boot_cpu_has(X86_FEATURE_DE)) cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); |