summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig2
-rw-r--r--arch/x86/boot/compressed/head_64.S11
-rw-r--r--arch/x86/entry/common.c49
-rw-r--r--arch/x86/entry/entry_32.S5
-rw-r--r--arch/x86/entry/entry_64_compat.S31
-rw-r--r--arch/x86/events/Makefile2
-rw-r--r--arch/x86/hyperv/hv_init.c5
-rw-r--r--arch/x86/include/asm/bitops.h6
-rw-r--r--arch/x86/include/asm/bug.h6
-rw-r--r--arch/x86/include/asm/cpu.h5
-rw-r--r--arch/x86/include/asm/cpumask.h18
-rw-r--r--arch/x86/include/asm/fpu/internal.h5
-rw-r--r--arch/x86/include/asm/idtentry.h47
-rw-r--r--arch/x86/include/asm/kvm_host.h4
-rw-r--r--arch/x86/include/asm/mwait.h2
-rw-r--r--arch/x86/include/asm/pgtable_types.h2
-rw-r--r--arch/x86/include/asm/processor.h2
-rw-r--r--arch/x86/kernel/cpu/centaur.c1
-rw-r--r--arch/x86/kernel/cpu/common.c24
-rw-r--r--arch/x86/kernel/cpu/cpu.h4
-rw-r--r--arch/x86/kernel/cpu/intel.c11
-rw-r--r--arch/x86/kernel/cpu/mce/core.c6
-rw-r--r--arch/x86/kernel/cpu/resctrl/core.c8
-rw-r--r--arch/x86/kernel/cpu/resctrl/internal.h1
-rw-r--r--arch/x86/kernel/cpu/resctrl/rdtgroup.c1
-rw-r--r--arch/x86/kernel/cpu/umwait.c6
-rw-r--r--arch/x86/kernel/cpu/zhaoxin.c1
-rw-r--r--arch/x86/kernel/fpu/core.c6
-rw-r--r--arch/x86/kernel/ldt.c26
-rw-r--r--arch/x86/kernel/nmi.c2
-rw-r--r--arch/x86/kernel/traps.c92
-rw-r--r--arch/x86/kvm/lapic.c50
-rw-r--r--arch/x86/kvm/mmu.h2
-rw-r--r--arch/x86/kvm/mmu/mmu.c4
-rw-r--r--arch/x86/kvm/mmu/paging_tmpl.h16
-rw-r--r--arch/x86/kvm/svm/svm.c2
-rw-r--r--arch/x86/kvm/vmx/vmcs.h32
-rw-r--r--arch/x86/kvm/vmx/vmx.c27
-rw-r--r--arch/x86/kvm/vmx/vmx.h2
-rw-r--r--arch/x86/kvm/x86.c7
-rw-r--r--arch/x86/lib/memcpy_64.S4
-rw-r--r--arch/x86/lib/usercopy_64.c1
-rw-r--r--arch/x86/power/cpu.c6
-rw-r--r--arch/x86/xen/enlighten_pv.c28
-rw-r--r--arch/x86/xen/xen-asm_64.S25
45 files changed, 382 insertions, 215 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 6a0cc524882d..883da0abf779 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -67,7 +67,7 @@ config X86
select ARCH_HAS_FILTER_PGPROT
select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_GCOV_PROFILE_ALL
- select ARCH_HAS_KCOV if X86_64
+ select ARCH_HAS_KCOV if X86_64 && STACK_VALIDATION
select ARCH_HAS_MEM_ENCRYPT
select ARCH_HAS_MEMBARRIER_SYNC_CORE
select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index e821a7d7d5c4..97d37f0a34f5 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -213,7 +213,6 @@ SYM_FUNC_START(startup_32)
* We place all of the values on our mini stack so lret can
* used to perform that far jump.
*/
- pushl $__KERNEL_CS
leal startup_64(%ebp), %eax
#ifdef CONFIG_EFI_MIXED
movl efi32_boot_args(%ebp), %edi
@@ -224,11 +223,20 @@ SYM_FUNC_START(startup_32)
movl efi32_boot_args+8(%ebp), %edx // saved bootparams pointer
cmpl $0, %edx
jnz 1f
+ /*
+ * efi_pe_entry uses MS calling convention, which requires 32 bytes of
+ * shadow space on the stack even if all arguments are passed in
+ * registers. We also need an additional 8 bytes for the space that
+ * would be occupied by the return address, and this also results in
+ * the correct stack alignment for entry.
+ */
+ subl $40, %esp
leal efi_pe_entry(%ebp), %eax
movl %edi, %ecx // MS calling convention
movl %esi, %edx
1:
#endif
+ pushl $__KERNEL_CS
pushl %eax
/* Enter paged protected Mode, activating Long Mode */
@@ -784,6 +792,7 @@ SYM_DATA_LOCAL(boot_heap, .fill BOOT_HEAP_SIZE, 1, 0)
SYM_DATA_START_LOCAL(boot_stack)
.fill BOOT_STACK_SIZE, 1, 0
+ .balign 16
SYM_DATA_END_LABEL(boot_stack, SYM_L_LOCAL, boot_stack_end)
/*
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index bd3f14175193..e83b3f14897c 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -45,6 +45,32 @@
#define CREATE_TRACE_POINTS
#include <trace/events/syscalls.h>
+/* Check that the stack and regs on entry from user mode are sane. */
+static void check_user_regs(struct pt_regs *regs)
+{
+ if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) {
+ /*
+ * Make sure that the entry code gave us a sensible EFLAGS
+ * register. Native because we want to check the actual CPU
+ * state, not the interrupt state as imagined by Xen.
+ */
+ unsigned long flags = native_save_fl();
+ WARN_ON_ONCE(flags & (X86_EFLAGS_AC | X86_EFLAGS_DF |
+ X86_EFLAGS_NT));
+
+ /* We think we came from user mode. Make sure pt_regs agrees. */
+ WARN_ON_ONCE(!user_mode(regs));
+
+ /*
+ * All entries from user mode (except #DF) should be on the
+ * normal thread stack and should have user pt_regs in the
+ * correct location.
+ */
+ WARN_ON_ONCE(!on_thread_stack());
+ WARN_ON_ONCE(regs != task_pt_regs(current));
+ }
+}
+
#ifdef CONFIG_CONTEXT_TRACKING
/**
* enter_from_user_mode - Establish state when coming from user mode
@@ -127,9 +153,6 @@ static long syscall_trace_enter(struct pt_regs *regs)
unsigned long ret = 0;
u32 work;
- if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
- BUG_ON(regs != task_pt_regs(current));
-
work = READ_ONCE(ti->flags);
if (work & (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU)) {
@@ -346,6 +369,8 @@ __visible noinstr void do_syscall_64(unsigned long nr, struct pt_regs *regs)
{
struct thread_info *ti;
+ check_user_regs(regs);
+
enter_from_user_mode();
instrumentation_begin();
@@ -409,6 +434,8 @@ static void do_syscall_32_irqs_on(struct pt_regs *regs)
/* Handles int $0x80 */
__visible noinstr void do_int80_syscall_32(struct pt_regs *regs)
{
+ check_user_regs(regs);
+
enter_from_user_mode();
instrumentation_begin();
@@ -460,6 +487,8 @@ __visible noinstr long do_fast_syscall_32(struct pt_regs *regs)
vdso_image_32.sym_int80_landing_pad;
bool success;
+ check_user_regs(regs);
+
/*
* SYSENTER loses EIP, and even SYSCALL32 needs us to skip forward
* so that 'regs->ip -= 2' lands back on an int $0x80 instruction.
@@ -510,6 +539,18 @@ __visible noinstr long do_fast_syscall_32(struct pt_regs *regs)
(regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF | X86_EFLAGS_VM)) == 0;
#endif
}
+
+/* Returns 0 to return using IRET or 1 to return using SYSEXIT/SYSRETL. */
+__visible noinstr long do_SYSENTER_32(struct pt_regs *regs)
+{
+ /* SYSENTER loses RSP, but the vDSO saved it in RBP. */
+ regs->sp = regs->bp;
+
+ /* SYSENTER clobbers EFLAGS.IF. Assume it was set in usermode. */
+ regs->flags |= X86_EFLAGS_IF;
+
+ return do_fast_syscall_32(regs);
+}
#endif
SYSCALL_DEFINE0(ni_syscall)
@@ -553,6 +594,7 @@ SYSCALL_DEFINE0(ni_syscall)
bool noinstr idtentry_enter_cond_rcu(struct pt_regs *regs)
{
if (user_mode(regs)) {
+ check_user_regs(regs);
enter_from_user_mode();
return false;
}
@@ -686,6 +728,7 @@ void noinstr idtentry_exit_cond_rcu(struct pt_regs *regs, bool rcu_exit)
*/
void noinstr idtentry_enter_user(struct pt_regs *regs)
{
+ check_user_regs(regs);
enter_from_user_mode();
}
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 024d7d276cd4..2d0bd5d5f032 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -933,9 +933,8 @@ SYM_FUNC_START(entry_SYSENTER_32)
.Lsysenter_past_esp:
pushl $__USER_DS /* pt_regs->ss */
- pushl %ebp /* pt_regs->sp (stashed in bp) */
+ pushl $0 /* pt_regs->sp (placeholder) */
pushfl /* pt_regs->flags (except IF = 0) */
- orl $X86_EFLAGS_IF, (%esp) /* Fix IF */
pushl $__USER_CS /* pt_regs->cs */
pushl $0 /* pt_regs->ip = 0 (placeholder) */
pushl %eax /* pt_regs->orig_ax */
@@ -965,7 +964,7 @@ SYM_FUNC_START(entry_SYSENTER_32)
.Lsysenter_flags_fixed:
movl %esp, %eax
- call do_fast_syscall_32
+ call do_SYSENTER_32
/* XEN PV guests always use IRET path */
ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \
"jmp .Lsyscall_32_done", X86_FEATURE_XENPV
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index 0f974ae01e62..541fdaf64045 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -57,29 +57,30 @@ SYM_CODE_START(entry_SYSENTER_compat)
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
- /*
- * User tracing code (ptrace or signal handlers) might assume that
- * the saved RAX contains a 32-bit number when we're invoking a 32-bit
- * syscall. Just in case the high bits are nonzero, zero-extend
- * the syscall number. (This could almost certainly be deleted
- * with no ill effects.)
- */
- movl %eax, %eax
-
/* Construct struct pt_regs on stack */
pushq $__USER32_DS /* pt_regs->ss */
- pushq %rbp /* pt_regs->sp (stashed in bp) */
+ pushq $0 /* pt_regs->sp = 0 (placeholder) */
/*
* Push flags. This is nasty. First, interrupts are currently
- * off, but we need pt_regs->flags to have IF set. Second, even
- * if TF was set when SYSENTER started, it's clear by now. We fix
- * that later using TIF_SINGLESTEP.
+ * off, but we need pt_regs->flags to have IF set. Second, if TS
+ * was set in usermode, it's still set, and we're singlestepping
+ * through this code. do_SYSENTER_32() will fix up IF.
*/
pushfq /* pt_regs->flags (except IF = 0) */
- orl $X86_EFLAGS_IF, (%rsp) /* Fix saved flags */
pushq $__USER32_CS /* pt_regs->cs */
pushq $0 /* pt_regs->ip = 0 (placeholder) */
+SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL)
+
+ /*
+ * User tracing code (ptrace or signal handlers) might assume that
+ * the saved RAX contains a 32-bit number when we're invoking a 32-bit
+ * syscall. Just in case the high bits are nonzero, zero-extend
+ * the syscall number. (This could almost certainly be deleted
+ * with no ill effects.)
+ */
+ movl %eax, %eax
+
pushq %rax /* pt_regs->orig_ax */
pushq %rdi /* pt_regs->di */
pushq %rsi /* pt_regs->si */
@@ -135,7 +136,7 @@ SYM_CODE_START(entry_SYSENTER_compat)
.Lsysenter_flags_fixed:
movq %rsp, %rdi
- call do_fast_syscall_32
+ call do_SYSENTER_32
/* XEN PV guests always use IRET path */
ALTERNATIVE "testl %eax, %eax; jz swapgs_restore_regs_and_return_to_usermode", \
"jmp swapgs_restore_regs_and_return_to_usermode", X86_FEATURE_XENPV
diff --git a/arch/x86/events/Makefile b/arch/x86/events/Makefile
index 12c42eba77ec..9933c0e8e97a 100644
--- a/arch/x86/events/Makefile
+++ b/arch/x86/events/Makefile
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
obj-y += core.o probe.o
-obj-$(PERF_EVENTS_INTEL_RAPL) += rapl.o
+obj-$(CONFIG_PERF_EVENTS_INTEL_RAPL) += rapl.o
obj-y += amd/
obj-$(CONFIG_X86_LOCAL_APIC) += msr.o
obj-$(CONFIG_CPU_SUP_INTEL) += intel/
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index a54c6a401581..6035df1b49e1 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -375,7 +375,10 @@ void __init hyperv_init(void)
guest_id = generate_guest_id(0, LINUX_VERSION_CODE, 0);
wrmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id);
- hv_hypercall_pg = vmalloc_exec(PAGE_SIZE);
+ hv_hypercall_pg = __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START,
+ VMALLOC_END, GFP_KERNEL, PAGE_KERNEL_ROX,
+ VM_FLUSH_RESET_PERMS, NUMA_NO_NODE,
+ __builtin_return_address(0));
if (hv_hypercall_pg == NULL) {
wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0);
goto remove_cpuhp_state;
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index 35460fef39b8..0367efdc5b7a 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -201,12 +201,8 @@ arch_test_and_change_bit(long nr, volatile unsigned long *addr)
return GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btc), *addr, c, "Ir", nr);
}
-static __no_kcsan_or_inline bool constant_test_bit(long nr, const volatile unsigned long *addr)
+static __always_inline bool constant_test_bit(long nr, const volatile unsigned long *addr)
{
- /*
- * Because this is a plain access, we need to disable KCSAN here to
- * avoid double instrumentation via instrumented bitops.
- */
return ((1UL << (nr & (BITS_PER_LONG-1))) &
(addr[nr >> _BITOPS_LONG_SHIFT])) != 0;
}
diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h
index fb34ff641e0a..028189575560 100644
--- a/arch/x86/include/asm/bug.h
+++ b/arch/x86/include/asm/bug.h
@@ -75,6 +75,12 @@ do { \
unreachable(); \
} while (0)
+/*
+ * This instrumentation_begin() is strictly speaking incorrect; but it
+ * suppresses the complaints from WARN()s in noinstr code. If such a WARN()
+ * were to trigger, we'd rather wreck the machine in an attempt to get the
+ * message out than not know about it.
+ */
#define __WARN_FLAGS(flags) \
do { \
instrumentation_begin(); \
diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h
index dd17c2da1af5..da78ccbd493b 100644
--- a/arch/x86/include/asm/cpu.h
+++ b/arch/x86/include/asm/cpu.h
@@ -58,4 +58,9 @@ static inline bool handle_guest_split_lock(unsigned long ip)
return false;
}
#endif
+#ifdef CONFIG_IA32_FEAT_CTL
+void init_ia32_feat_ctl(struct cpuinfo_x86 *c);
+#else
+static inline void init_ia32_feat_ctl(struct cpuinfo_x86 *c) {}
+#endif
#endif /* _ASM_X86_CPU_H */
diff --git a/arch/x86/include/asm/cpumask.h b/arch/x86/include/asm/cpumask.h
index 6722ffcef2e6..3afa990d756b 100644
--- a/arch/x86/include/asm/cpumask.h
+++ b/arch/x86/include/asm/cpumask.h
@@ -11,5 +11,23 @@ extern cpumask_var_t cpu_sibling_setup_mask;
extern void setup_cpu_local_masks(void);
+/*
+ * NMI and MCE exceptions need cpu_is_offline() _really_ early,
+ * provide an arch_ special for them to avoid instrumentation.
+ */
+#if NR_CPUS > 1
+static __always_inline bool arch_cpu_online(int cpu)
+{
+ return arch_test_bit(cpu, cpumask_bits(cpu_online_mask));
+}
+#else
+static __always_inline bool arch_cpu_online(int cpu)
+{
+ return cpu == 0;
+}
+#endif
+
+#define arch_cpu_is_offline(cpu) unlikely(!arch_cpu_online(cpu))
+
#endif /* __ASSEMBLY__ */
#endif /* _ASM_X86_CPUMASK_H */
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 42159f45bf9c..845e7481ab77 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -623,6 +623,11 @@ static inline void switch_fpu_finish(struct fpu *new_fpu)
* MXCSR and XCR definitions:
*/
+static inline void ldmxcsr(u32 mxcsr)
+{
+ asm volatile("ldmxcsr %0" :: "m" (mxcsr));
+}
+
extern unsigned int mxcsr_feature_mask;
#define XCR_XFEATURE_ENABLED_MASK 0x00000000
diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h
index cf51c50eb356..eeac6dc2adaa 100644
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -353,10 +353,6 @@ static __always_inline void __##func(struct pt_regs *regs)
#else /* CONFIG_X86_64 */
-/* Maps to a regular IDTENTRY on 32bit for now */
-# define DECLARE_IDTENTRY_IST DECLARE_IDTENTRY
-# define DEFINE_IDTENTRY_IST DEFINE_IDTENTRY
-
/**
* DECLARE_IDTENTRY_DF - Declare functions for double fault 32bit variant
* @vector: Vector number (ignored for C)
@@ -387,28 +383,18 @@ __visible noinstr void func(struct pt_regs *regs, \
#endif /* !CONFIG_X86_64 */
/* C-Code mapping */
+#define DECLARE_IDTENTRY_NMI DECLARE_IDTENTRY_RAW
+#define DEFINE_IDTENTRY_NMI DEFINE_IDTENTRY_RAW
+
+#ifdef CONFIG_X86_64
#define DECLARE_IDTENTRY_MCE DECLARE_IDTENTRY_IST
#define DEFINE_IDTENTRY_MCE DEFINE_IDTENTRY_IST
#define DEFINE_IDTENTRY_MCE_USER DEFINE_IDTENTRY_NOIST
-#define DECLARE_IDTENTRY_NMI DECLARE_IDTENTRY_RAW
-#define DEFINE_IDTENTRY_NMI DEFINE_IDTENTRY_RAW
-
#define DECLARE_IDTENTRY_DEBUG DECLARE_IDTENTRY_IST
#define DEFINE_IDTENTRY_DEBUG DEFINE_IDTENTRY_IST
#define DEFINE_IDTENTRY_DEBUG_USER DEFINE_IDTENTRY_NOIST
-
-/**
- * DECLARE_IDTENTRY_XEN - Declare functions for XEN redirect IDT entry points
- * @vector: Vector number (ignored for C)
- * @func: Function name of the entry point
- *
- * Used for xennmi and xendebug redirections. No DEFINE as this is all ASM
- * indirection magic.
- */
-#define DECLARE_IDTENTRY_XEN(vector, func) \
- asmlinkage void xen_asm_exc_xen##func(void); \
- asmlinkage void asm_exc_xen##func(void)
+#endif
#else /* !__ASSEMBLY__ */
@@ -455,9 +441,6 @@ __visible noinstr void func(struct pt_regs *regs, \
# define DECLARE_IDTENTRY_MCE(vector, func) \
DECLARE_IDTENTRY(vector, func)
-# define DECLARE_IDTENTRY_DEBUG(vector, func) \
- DECLARE_IDTENTRY(vector, func)
-
/* No ASM emitted for DF as this goes through a C shim */
# define DECLARE_IDTENTRY_DF(vector, func)
@@ -469,10 +452,6 @@ __visible noinstr void func(struct pt_regs *regs, \
/* No ASM code emitted for NMI */
#define DECLARE_IDTENTRY_NMI(vector, func)
-/* XEN NMI and DB wrapper */
-#define DECLARE_IDTENTRY_XEN(vector, func) \
- idtentry vector asm_exc_xen##func exc_##func has_error_code=0
-
/*
* ASM code to emit the common vector entry stubs where each stub is
* packed into 8 bytes.
@@ -565,16 +544,28 @@ DECLARE_IDTENTRY_RAW(X86_TRAP_BP, exc_int3);
DECLARE_IDTENTRY_RAW_ERRORCODE(X86_TRAP_PF, exc_page_fault);
#ifdef CONFIG_X86_MCE
+#ifdef CONFIG_X86_64
DECLARE_IDTENTRY_MCE(X86_TRAP_MC, exc_machine_check);
+#else
+DECLARE_IDTENTRY_RAW(X86_TRAP_MC, exc_machine_check);
+#endif
#endif
/* NMI */
DECLARE_IDTENTRY_NMI(X86_TRAP_NMI, exc_nmi);
-DECLARE_IDTENTRY_XEN(X86_TRAP_NMI, nmi);
+#ifdef CONFIG_XEN_PV
+DECLARE_IDTENTRY_RAW(X86_TRAP_NMI, xenpv_exc_nmi);
+#endif
/* #DB */
+#ifdef CONFIG_X86_64
DECLARE_IDTENTRY_DEBUG(X86_TRAP_DB, exc_debug);
-DECLARE_IDTENTRY_XEN(X86_TRAP_DB, debug);
+#else
+DECLARE_IDTENTRY_RAW(X86_TRAP_DB, exc_debug);
+#endif
+#ifdef CONFIG_XEN_PV
+DECLARE_IDTENTRY_RAW(X86_TRAP_DB, xenpv_exc_debug);
+#endif
/* #DF */
DECLARE_IDTENTRY_DF(X86_TRAP_DF, exc_double_fault);
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index f8998e97457f..be5363b21540 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -943,7 +943,7 @@ struct kvm_arch {
atomic_t vapics_in_nmi_mode;
struct mutex apic_map_lock;
struct kvm_apic_map *apic_map;
- bool apic_map_dirty;
+ atomic_t apic_map_dirty;
bool apic_access_page_done;
unsigned long apicv_inhibit_reasons;
@@ -1220,7 +1220,7 @@ struct kvm_x86_ops {
void (*enable_log_dirty_pt_masked)(struct kvm *kvm,
struct kvm_memory_slot *slot,
gfn_t offset, unsigned long mask);
- int (*write_log_dirty)(struct kvm_vcpu *vcpu);
+ int (*write_log_dirty)(struct kvm_vcpu *vcpu, gpa_t l2_gpa);
/* pmu operations of sub-arch */
const struct kvm_pmu_ops *pmu_ops;
diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
index 73d997aa2966..e039a933aca3 100644
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -25,8 +25,6 @@
#define TPAUSE_C01_STATE 1
#define TPAUSE_C02_STATE 0
-u32 get_umwait_control_msr(void);
-
static inline void __monitor(const void *eax, unsigned long ecx,
unsigned long edx)
{
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 2da1f95b88d7..816b31c68550 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -194,6 +194,7 @@ enum page_cache_mode {
#define _PAGE_TABLE_NOENC (__PP|__RW|_USR|___A| 0|___D| 0| 0)
#define _PAGE_TABLE (__PP|__RW|_USR|___A| 0|___D| 0| 0| _ENC)
#define __PAGE_KERNEL_RO (__PP| 0| 0|___A|__NX|___D| 0|___G)
+#define __PAGE_KERNEL_ROX (__PP| 0| 0|___A| 0|___D| 0|___G)
#define __PAGE_KERNEL_NOCACHE (__PP|__RW| 0|___A|__NX|___D| 0|___G| __NC)
#define __PAGE_KERNEL_VVAR (__PP| 0|_USR|___A|__NX|___D| 0|___G)
#define __PAGE_KERNEL_LARGE (__PP|__RW| 0|___A|__NX|___D|_PSE|___G)
@@ -219,6 +220,7 @@ enum page_cache_mode {
#define PAGE_KERNEL_RO __pgprot_mask(__PAGE_KERNEL_RO | _ENC)
#define PAGE_KERNEL_EXEC __pgprot_mask(__PAGE_KERNEL_EXEC | _ENC)
#define PAGE_KERNEL_EXEC_NOENC __pgprot_mask(__PAGE_KERNEL_EXEC | 0)
+#define PAGE_KERNEL_ROX __pgprot_mask(__PAGE_KERNEL_ROX | _ENC)
#define PAGE_KERNEL_NOCACHE __pgprot_mask(__PAGE_KERNEL_NOCACHE | _ENC)
#define PAGE_KERNEL_LARGE __pgprot_mask(__PAGE_KERNEL_LARGE | _ENC)
#define PAGE_KERNEL_LARGE_EXEC __pgprot_mask(__PAGE_KERNEL_LARGE_EXEC | _ENC)
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 42cd333616c4..03b7c4ca425a 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -370,7 +370,7 @@ struct x86_hw_tss {
#define IO_BITMAP_OFFSET_INVALID (__KERNEL_TSS_LIMIT + 1)
struct entry_stack {
- unsigned long words[64];
+ char stack[PAGE_SIZE];
};
struct entry_stack_page {
diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c
index 426792565d86..c5cf336e5077 100644
--- a/arch/x86/kernel/cpu/centaur.c
+++ b/arch/x86/kernel/cpu/centaur.c
@@ -3,6 +3,7 @@
#include <linux/sched.h>
#include <linux/sched/clock.h>
+#include <asm/cpu.h>
#include <asm/cpufeature.h>
#include <asm/e820/api.h>
#include <asm/mtrr.h>
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 043d93cdcaad..95c090a45b4b 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -347,6 +347,9 @@ out:
cr4_clear_bits(X86_CR4_UMIP);
}
+/* These bits should not change their value after CPU init is finished. */
+static const unsigned long cr4_pinned_mask =
+ X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP | X86_CR4_FSGSBASE;
static DEFINE_STATIC_KEY_FALSE_RO(cr_pinning);
static unsigned long cr4_pinned_bits __ro_after_init;
@@ -371,20 +374,20 @@ EXPORT_SYMBOL(native_write_cr0);
void native_write_cr4(unsigned long val)
{
- unsigned long bits_missing = 0;
+ unsigned long bits_changed = 0;
set_register:
asm volatile("mov %0,%%cr4": "+r" (val), "+m" (cr4_pinned_bits));
if (static_branch_likely(&cr_pinning)) {
- if (unlikely((val & cr4_pinned_bits) != cr4_pinned_bits)) {
- bits_missing = ~val & cr4_pinned_bits;
- val |= bits_missing;
+ if (unlikely((val & cr4_pinned_mask) != cr4_pinned_bits)) {
+ bits_changed = (val & cr4_pinned_mask) ^ cr4_pinned_bits;
+ val = (val & ~cr4_pinned_mask) | cr4_pinned_bits;
goto set_register;
}
- /* Warn after we've set the missing bits. */
- WARN_ONCE(bits_missing, "CR4 bits went missing: %lx!?\n",
- bits_missing);
+ /* Warn after we've corrected the changed bits. */
+ WARN_ONCE(bits_changed, "pinned CR4 bits changed: 0x%lx!?\n",
+ bits_changed);
}
}
#if IS_MODULE(CONFIG_LKDTM)
@@ -419,7 +422,7 @@ void cr4_init(void)
if (boot_cpu_has(X86_FEATURE_PCID))
cr4 |= X86_CR4_PCIDE;
if (static_branch_likely(&cr_pinning))
- cr4 |= cr4_pinned_bits;
+ cr4 = (cr4 & ~cr4_pinned_mask) | cr4_pinned_bits;
__write_cr4(cr4);
@@ -434,10 +437,7 @@ void cr4_init(void)
*/
static void __init setup_cr_pinning(void)
{
- unsigned long mask;
-
- mask = (X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP);
- cr4_pinned_bits = this_cpu_read(cpu_tlbstate.cr4) & mask;
+ cr4_pinned_bits = this_cpu_read(cpu_tlbstate.cr4) & cr4_pinned_mask;
static_key_enable(&cr_pinning.key);
}
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index fb538fccd24c..9d033693519a 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -81,8 +81,4 @@ extern void update_srbds_msr(void);
extern u64 x86_read_arch_cap_msr(void);
-#ifdef CONFIG_IA32_FEAT_CTL
-void init_ia32_feat_ctl(struct cpuinfo_x86 *c);
-#endif
-
#endif /* ARCH_X86_CPU_H */
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index c25a67a34bd3..0ab48f1cdf84 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -50,6 +50,13 @@ static enum split_lock_detect_state sld_state __ro_after_init = sld_off;
static u64 msr_test_ctrl_cache __ro_after_init;
/*
+ * With a name like MSR_TEST_CTL it should go without saying, but don't touch
+ * MSR_TEST_CTL unless the CPU is one of the whitelisted models. Writing it
+ * on CPUs that do not support SLD can cause fireworks, even when writing '0'.
+ */
+static bool cpu_model_supports_sld __ro_after_init;
+
+/*
* Processors which have self-snooping capability can handle conflicting
* memory type across CPUs by snooping its own cache. However, there exists
* CPU models in which having conflicting memory types still leads to
@@ -1071,7 +1078,8 @@ static void sld_update_msr(bool on)
static void split_lock_init(void)
{
- split_lock_verify_msr(sld_state != sld_off);
+ if (cpu_model_supports_sld)
+ split_lock_verify_msr(sld_state != sld_off);
}
static void split_lock_warn(unsigned long ip)
@@ -1177,5 +1185,6 @@ void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c)
return;
}
+ cpu_model_supports_sld = true;
split_lock_setup();
}
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index ce9120c4f740..14e4b4d17ee5 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -1083,7 +1083,7 @@ static noinstr bool mce_check_crashing_cpu(void)
{
unsigned int cpu = smp_processor_id();
- if (cpu_is_offline(cpu) ||
+ if (arch_cpu_is_offline(cpu) ||
(crashing_cpu != -1 && crashing_cpu != cpu)) {
u64 mcgstatus;
@@ -1901,6 +1901,8 @@ void (*machine_check_vector)(struct pt_regs *) = unexpected_machine_check;
static __always_inline void exc_machine_check_kernel(struct pt_regs *regs)
{
+ WARN_ON_ONCE(user_mode(regs));
+
/*
* Only required when from kernel mode. See
* mce_check_crashing_cpu() for details.
@@ -1954,7 +1956,7 @@ DEFINE_IDTENTRY_MCE_USER(exc_machine_check)
}
#else
/* 32bit unified entry point */
-DEFINE_IDTENTRY_MCE(exc_machine_check)
+DEFINE_IDTENTRY_RAW(exc_machine_check)
{
unsigned long dr7;
diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index 12f967c6b603..6a9df71c1b9e 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -981,10 +981,10 @@ void resctrl_cpu_detect(struct cpuinfo_x86 *c)
c->x86_cache_max_rmid = ecx;
c->x86_cache_occ_scale = ebx;
- if (c->x86_vendor == X86_VENDOR_INTEL)
- c->x86_cache_mbm_width_offset = eax & 0xff;
- else
- c->x86_cache_mbm_width_offset = -1;
+ c->x86_cache_mbm_width_offset = eax & 0xff;
+
+ if (c->x86_vendor == X86_VENDOR_AMD && !c->x86_cache_mbm_width_offset)
+ c->x86_cache_mbm_width_offset = MBM_CNTR_WIDTH_OFFSET_AMD;
}
}
diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index f20a47d120b1..5ffa32256b3b 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -37,6 +37,7 @@
#define MBA_IS_LINEAR 0x4
#define MBA_MAX_MBPS U32_MAX
#define MAX_MBA_BW_AMD 0x800
+#define MBM_CNTR_WIDTH_OFFSET_AMD 20
#define RMID_VAL_ERROR BIT_ULL(63)
#define RMID_VAL_UNAVAIL BIT_ULL(62)
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 23b4b61319d3..3f844f14fc0a 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -1117,6 +1117,7 @@ static int rdt_cdp_peer_get(struct rdt_resource *r, struct rdt_domain *d,
_d_cdp = rdt_find_domain(_r_cdp, d->id, NULL);
if (WARN_ON(IS_ERR_OR_NULL(_d_cdp))) {
_r_cdp = NULL;
+ _d_cdp = NULL;
ret = -EINVAL;
}
diff --git a/arch/x86/kernel/cpu/umwait.c b/arch/x86/kernel/cpu/umwait.c
index 300e3fd5ade3..ec8064c0ae03 100644
--- a/arch/x86/kernel/cpu/umwait.c
+++ b/arch/x86/kernel/cpu/umwait.c
@@ -18,12 +18,6 @@
*/
static u32 umwait_control_cached = UMWAIT_CTRL_VAL(100000, UMWAIT_C02_ENABLE);
-u32 get_umwait_control_msr(void)
-{
- return umwait_control_cached;
-}
-EXPORT_SYMBOL_GPL(get_umwait_control_msr);
-
/*
* Cache the original IA32_UMWAIT_CONTROL MSR value which is configured by
* hardware or BIOS before kernel boot.
diff --git a/arch/x86/kernel/cpu/zhaoxin.c b/arch/x86/kernel/cpu/zhaoxin.c
index df1358ba622b..05fa4ef63490 100644
--- a/arch/x86/kernel/cpu/zhaoxin.c
+++ b/arch/x86/kernel/cpu/zhaoxin.c
@@ -2,6 +2,7 @@
#include <linux/sched.h>
#include <linux/sched/clock.h>
+#include <asm/cpu.h>
#include <asm/cpufeature.h>
#include "cpu.h"
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 06c818967bb6..15247b96c6ea 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -101,6 +101,12 @@ void kernel_fpu_begin(void)
copy_fpregs_to_fpstate(&current->thread.fpu);
}
__cpu_invalidate_fpregs_state();
+
+ if (boot_cpu_has(X86_FEATURE_XMM))
+ ldmxcsr(MXCSR_DEFAULT);
+
+ if (boot_cpu_has(X86_FEATURE_FPU))
+ asm volatile ("fninit");
}
EXPORT_SYMBOL_GPL(kernel_fpu_begin);
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index 8748321c4486..b8aee71840ae 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -29,6 +29,8 @@
#include <asm/mmu_context.h>
#include <asm/pgtable_areas.h>
+#include <xen/xen.h>
+
/* This is a multiple of PAGE_SIZE. */
#define LDT_SLOT_STRIDE (LDT_ENTRIES * LDT_ENTRY_SIZE)
@@ -543,6 +545,28 @@ static int read_default_ldt(void __user *ptr, unsigned long bytecount)
return bytecount;
}
+static bool allow_16bit_segments(void)
+{
+ if (!IS_ENABLED(CONFIG_X86_16BIT))
+ return false;
+
+#ifdef CONFIG_XEN_PV
+ /*
+ * Xen PV does not implement ESPFIX64, which means that 16-bit
+ * segments will not work correctly. Until either Xen PV implements
+ * ESPFIX64 and can signal this fact to the guest or unless someone
+ * provides compelling evidence that allowing broken 16-bit segments
+ * is worthwhile, disallow 16-bit segments under Xen PV.
+ */
+ if (xen_pv_domain()) {
+ pr_info_once("Warning: 16-bit segments do not work correctly in a Xen PV guest\n");
+ return false;
+ }
+#endif
+
+ return true;
+}
+
static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
{
struct mm_struct *mm = current->mm;
@@ -574,7 +598,7 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
/* The user wants to clear the entry. */
memset(&ldt, 0, sizeof(ldt));
} else {
- if (!IS_ENABLED(CONFIG_X86_16BIT) && !ldt_info.seg_32bit) {
+ if (!ldt_info.seg_32bit && !allow_16bit_segments()) {
error = -EINVAL;
goto out;
}
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 2de365f15684..d7c5e44b26f7 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -478,7 +478,7 @@ static DEFINE_PER_CPU(unsigned long, nmi_dr7);
DEFINE_IDTENTRY_RAW(exc_nmi)
{
- if (IS_ENABLED(CONFIG_SMP) && cpu_is_offline(smp_processor_id()))
+ if (IS_ENABLED(CONFIG_SMP) && arch_cpu_is_offline(smp_processor_id()))
return;
if (this_cpu_read(nmi_state) != NMI_NOT_RUNNING) {
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index f9727b96961f..b038695f36c5 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -84,17 +84,16 @@ static inline void cond_local_irq_disable(struct pt_regs *regs)
local_irq_disable();
}
-int is_valid_bugaddr(unsigned long addr)
+__always_inline int is_valid_bugaddr(unsigned long addr)
{
- unsigned short ud;
-
if (addr < TASK_SIZE_MAX)
return 0;
- if (get_kernel_nofault(ud, (unsigned short *)addr))
- return 0;
-
- return ud == INSN_UD0 || ud == INSN_UD2;
+ /*
+ * We got #UD, if the text isn't readable we'd have gotten
+ * a different exception.
+ */
+ return *(unsigned short *)addr == INSN_UD2;
}
static nokprobe_inline int
@@ -216,40 +215,45 @@ static inline void handle_invalid_op(struct pt_regs *regs)
ILL_ILLOPN, error_get_trap_addr(regs));
}
-DEFINE_IDTENTRY_RAW(exc_invalid_op)
+static noinstr bool handle_bug(struct pt_regs *regs)
{
- bool rcu_exit;
+ bool handled = false;
+
+ if (!is_valid_bugaddr(regs->ip))
+ return handled;
/*
- * Handle BUG/WARN like NMIs instead of like normal idtentries:
- * if we bugged/warned in a bad RCU context, for example, the last
- * thing we want is to BUG/WARN again in the idtentry code, ad
- * infinitum.
+ * All lies, just get the WARN/BUG out.
+ */
+ instrumentation_begin();
+ /*
+ * Since we're emulating a CALL with exceptions, restore the interrupt
+ * state to what it was at the exception site.
*/
- if (!user_mode(regs) && is_valid_bugaddr(regs->ip)) {
- enum bug_trap_type type;
+ if (regs->flags & X86_EFLAGS_IF)
+ raw_local_irq_enable();
+ if (report_bug(regs->ip, regs) == BUG_TRAP_TYPE_WARN) {
+ regs->ip += LEN_UD2;
+ handled = true;
+ }
+ if (regs->flags & X86_EFLAGS_IF)
+ raw_local_irq_disable();
+ instrumentation_end();
- nmi_enter();
- instrumentation_begin();
- trace_hardirqs_off_finish();
- type = report_bug(regs->ip, regs);
- if (regs->flags & X86_EFLAGS_IF)
- trace_hardirqs_on_prepare();
- instrumentation_end();
- nmi_exit();
+ return handled;
+}
- if (type == BUG_TRAP_TYPE_WARN) {
- /* Skip the ud2. */
- regs->ip += LEN_UD2;
- return;
- }
+DEFINE_IDTENTRY_RAW(exc_invalid_op)
+{
+ bool rcu_exit;
- /*
- * Else, if this was a BUG and report_bug returns or if this
- * was just a normal #UD, we want to continue onward and
- * crash.
- */
- }
+ /*
+ * We use UD2 as a short encoding for 'CALL __WARN', as such
+ * handle it before exception entry to avoid recursive WARN
+ * in case exception entry is the one triggering WARNs.
+ */
+ if (!user_mode(regs) && handle_bug(regs))
+ return;
rcu_exit = idtentry_enter_cond_rcu(regs);
instrumentation_begin();
@@ -691,13 +695,13 @@ struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s)
(struct bad_iret_stack *)__this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1;
/* Copy the IRET target to the temporary storage. */
- memcpy(&tmp.regs.ip, (void *)s->regs.sp, 5*8);
+ __memcpy(&tmp.regs.ip, (void *)s->regs.sp, 5*8);
/* Copy the remainder of the stack from the current stack. */
- memcpy(&tmp, s, offsetof(struct bad_iret_stack, regs.ip));
+ __memcpy(&tmp, s, offsetof(struct bad_iret_stack, regs.ip));
/* Update the entry stack */
- memcpy(new_stack, &tmp, sizeof(tmp));
+ __memcpy(new_stack, &tmp, sizeof(tmp));
BUG_ON(!user_mode(&new_stack->regs));
return new_stack;
@@ -866,6 +870,12 @@ static __always_inline void exc_debug_kernel(struct pt_regs *regs,
trace_hardirqs_off_finish();
/*
+ * If something gets miswired and we end up here for a user mode
+ * #DB, we will malfunction.
+ */
+ WARN_ON_ONCE(user_mode(regs));
+
+ /*
* Catch SYSENTER with TF set and clear DR_STEP. If this hit a
* watchpoint at the same time then that will still be handled.
*/
@@ -883,6 +893,12 @@ static __always_inline void exc_debug_kernel(struct pt_regs *regs,
static __always_inline void exc_debug_user(struct pt_regs *regs,
unsigned long dr6)
{
+ /*
+ * If something gets miswired and we end up here for a kernel mode
+ * #DB, we will malfunction.
+ */
+ WARN_ON_ONCE(!user_mode(regs));
+
idtentry_enter_user(regs);
instrumentation_begin();
@@ -913,7 +929,7 @@ DEFINE_IDTENTRY_DEBUG_USER(exc_debug)
}
#else
/* 32 bit does not have separate entry points. */
-DEFINE_IDTENTRY_DEBUG(exc_debug)
+DEFINE_IDTENTRY_RAW(exc_debug)
{
unsigned long dr6, dr7;
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 34a7e0533dad..5bf72fc86a8e 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -169,6 +169,18 @@ static void kvm_apic_map_free(struct rcu_head *rcu)
kvfree(map);
}
+/*
+ * CLEAN -> DIRTY and UPDATE_IN_PROGRESS -> DIRTY changes happen without a lock.
+ *
+ * DIRTY -> UPDATE_IN_PROGRESS and UPDATE_IN_PROGRESS -> CLEAN happen with
+ * apic_map_lock_held.
+ */
+enum {
+ CLEAN,
+ UPDATE_IN_PROGRESS,
+ DIRTY
+};
+
void kvm_recalculate_apic_map(struct kvm *kvm)
{
struct kvm_apic_map *new, *old = NULL;
@@ -176,17 +188,17 @@ void kvm_recalculate_apic_map(struct kvm *kvm)
int i;
u32 max_id = 255; /* enough space for any xAPIC ID */
- if (!kvm->arch.apic_map_dirty) {
- /*
- * Read kvm->arch.apic_map_dirty before
- * kvm->arch.apic_map
- */
- smp_rmb();
+ /* Read kvm->arch.apic_map_dirty before kvm->arch.apic_map. */
+ if (atomic_read_acquire(&kvm->arch.apic_map_dirty) == CLEAN)
return;
- }
mutex_lock(&kvm->arch.apic_map_lock);
- if (!kvm->arch.apic_map_dirty) {
+ /*
+ * Read kvm->arch.apic_map_dirty before kvm->arch.apic_map
+ * (if clean) or the APIC registers (if dirty).
+ */
+ if (atomic_cmpxchg_acquire(&kvm->arch.apic_map_dirty,
+ DIRTY, UPDATE_IN_PROGRESS) == CLEAN) {
/* Someone else has updated the map. */
mutex_unlock(&kvm->arch.apic_map_lock);
return;
@@ -256,11 +268,11 @@ out:
lockdep_is_held(&kvm->arch.apic_map_lock));
rcu_assign_pointer(kvm->arch.apic_map, new);
/*
- * Write kvm->arch.apic_map before
- * clearing apic->apic_map_dirty
+ * Write kvm->arch.apic_map before clearing apic->apic_map_dirty.
+ * If another update has come in, leave it DIRTY.
*/
- smp_wmb();
- kvm->arch.apic_map_dirty = false;
+ atomic_cmpxchg_release(&kvm->arch.apic_map_dirty,
+ UPDATE_IN_PROGRESS, CLEAN);
mutex_unlock(&kvm->arch.apic_map_lock);
if (old)
@@ -282,20 +294,20 @@ static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
else
static_key_slow_inc(&apic_sw_disabled.key);
- apic->vcpu->kvm->arch.apic_map_dirty = true;
+ atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
}
}
static inline void kvm_apic_set_xapic_id(struct kvm_lapic *apic, u8 id)
{
kvm_lapic_set_reg(apic, APIC_ID, id << 24);
- apic->vcpu->kvm->arch.apic_map_dirty = true;
+ atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
}
static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id)
{
kvm_lapic_set_reg(apic, APIC_LDR, id);
- apic->vcpu->kvm->arch.apic_map_dirty = true;
+ atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
}
static inline u32 kvm_apic_calc_x2apic_ldr(u32 id)
@@ -311,7 +323,7 @@ static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u32 id)
kvm_lapic_set_reg(apic, APIC_ID, id);
kvm_lapic_set_reg(apic, APIC_LDR, ldr);
- apic->vcpu->kvm->arch.apic_map_dirty = true;
+ atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
}
static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type)
@@ -1976,7 +1988,7 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
case APIC_DFR:
if (!apic_x2apic_mode(apic)) {
kvm_lapic_set_reg(apic, APIC_DFR, val | 0x0FFFFFFF);
- apic->vcpu->kvm->arch.apic_map_dirty = true;
+ atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
} else
ret = 1;
break;
@@ -2232,7 +2244,7 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
static_key_slow_dec_deferred(&apic_hw_disabled);
} else {
static_key_slow_inc(&apic_hw_disabled.key);
- vcpu->kvm->arch.apic_map_dirty = true;
+ atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
}
}
@@ -2273,7 +2285,6 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
if (!apic)
return;
- vcpu->kvm->arch.apic_map_dirty = false;
/* Stop the timer in case it's a reset to an active apic */
hrtimer_cancel(&apic->lapic_timer.timer);
@@ -2567,6 +2578,7 @@ int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
}
memcpy(vcpu->arch.apic->regs, s->regs, sizeof(*s));
+ atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
kvm_recalculate_apic_map(vcpu->kvm);
kvm_apic_set_version(vcpu);
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 0ad06bfe2c2c..444bb9c54548 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -222,7 +222,7 @@ void kvm_mmu_gfn_disallow_lpage(struct kvm_memory_slot *slot, gfn_t gfn);
void kvm_mmu_gfn_allow_lpage(struct kvm_memory_slot *slot, gfn_t gfn);
bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
struct kvm_memory_slot *slot, u64 gfn);
-int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu);
+int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu, gpa_t l2_gpa);
int kvm_mmu_post_init_vm(struct kvm *kvm);
void kvm_mmu_pre_destroy_vm(struct kvm *kvm);
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index fdd05c233308..76817d13c86e 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -1745,10 +1745,10 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
* Emulate arch specific page modification logging for the
* nested hypervisor
*/
-int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu)
+int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu, gpa_t l2_gpa)
{
if (kvm_x86_ops.write_log_dirty)
- return kvm_x86_ops.write_log_dirty(vcpu);
+ return kvm_x86_ops.write_log_dirty(vcpu, l2_gpa);
return 0;
}
diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
index a6d484ea110b..bd70ece1ef8b 100644
--- a/arch/x86/kvm/mmu/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
@@ -235,7 +235,7 @@ static inline unsigned FNAME(gpte_access)(u64 gpte)
static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu,
struct kvm_mmu *mmu,
struct guest_walker *walker,
- int write_fault)
+ gpa_t addr, int write_fault)
{
unsigned level, index;
pt_element_t pte, orig_pte;
@@ -260,7 +260,7 @@ static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu,
!(pte & PT_GUEST_DIRTY_MASK)) {
trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte));
#if PTTYPE == PTTYPE_EPT
- if (kvm_arch_write_log_dirty(vcpu))
+ if (kvm_arch_write_log_dirty(vcpu, addr))
return -EINVAL;
#endif
pte |= PT_GUEST_DIRTY_MASK;
@@ -360,7 +360,6 @@ retry_walk:
++walker->level;
do {
- gfn_t real_gfn;
unsigned long host_addr;
pt_access = pte_access;
@@ -375,7 +374,7 @@ retry_walk:
walker->table_gfn[walker->level - 1] = table_gfn;
walker->pte_gpa[walker->level - 1] = pte_gpa;
- real_gfn = mmu->translate_gpa(vcpu, gfn_to_gpa(table_gfn),
+ real_gpa = mmu->translate_gpa(vcpu, gfn_to_gpa(table_gfn),
nested_access,
&walker->fault);
@@ -389,12 +388,10 @@ retry_walk:
* information to fix the exit_qualification or exit_info_1
* fields.
*/
- if (unlikely(real_gfn == UNMAPPED_GVA))
+ if (unlikely(real_gpa == UNMAPPED_GVA))
return 0;
- real_gfn = gpa_to_gfn(real_gfn);
-
- host_addr = kvm_vcpu_gfn_to_hva_prot(vcpu, real_gfn,
+ host_addr = kvm_vcpu_gfn_to_hva_prot(vcpu, gpa_to_gfn(real_gpa),
&walker->pte_writable[walker->level - 1]);
if (unlikely(kvm_is_error_hva(host_addr)))
goto error;
@@ -457,7 +454,8 @@ retry_walk:
(PT_GUEST_DIRTY_SHIFT - PT_GUEST_ACCESSED_SHIFT);
if (unlikely(!accessed_dirty)) {
- ret = FNAME(update_accessed_dirty_bits)(vcpu, mmu, walker, write_fault);
+ ret = FNAME(update_accessed_dirty_bits)(vcpu, mmu, walker,
+ addr, write_fault);
if (unlikely(ret < 0))
goto error;
else if (ret)
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 8ccfa4197d9c..c0da4dd78ac5 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -3344,7 +3344,7 @@ static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
void __svm_vcpu_run(unsigned long vmcb_pa, unsigned long *regs);
-static fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
+static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
{
fastpath_t exit_fastpath;
struct vcpu_svm *svm = to_svm(vcpu);
diff --git a/arch/x86/kvm/vmx/vmcs.h b/arch/x86/kvm/vmx/vmcs.h
index 5c0ff80b85c0..7a3675fddec2 100644
--- a/arch/x86/kvm/vmx/vmcs.h
+++ b/arch/x86/kvm/vmx/vmcs.h
@@ -72,11 +72,24 @@ struct loaded_vmcs {
struct vmcs_controls_shadow controls_shadow;
};
+static inline bool is_intr_type(u32 intr_info, u32 type)
+{
+ const u32 mask = INTR_INFO_VALID_MASK | INTR_INFO_INTR_TYPE_MASK;
+
+ return (intr_info & mask) == (INTR_INFO_VALID_MASK | type);
+}
+
+static inline bool is_intr_type_n(u32 intr_info, u32 type, u8 vector)
+{
+ const u32 mask = INTR_INFO_VALID_MASK | INTR_INFO_INTR_TYPE_MASK |
+ INTR_INFO_VECTOR_MASK;
+
+ return (intr_info & mask) == (INTR_INFO_VALID_MASK | type | vector);
+}
+
static inline bool is_exception_n(u32 intr_info, u8 vector)
{
- return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
- INTR_INFO_VALID_MASK)) ==
- (INTR_TYPE_HARD_EXCEPTION | vector | INTR_INFO_VALID_MASK);
+ return is_intr_type_n(intr_info, INTR_TYPE_HARD_EXCEPTION, vector);
}
static inline bool is_debug(u32 intr_info)
@@ -106,28 +119,23 @@ static inline bool is_gp_fault(u32 intr_info)
static inline bool is_machine_check(u32 intr_info)
{
- return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
- INTR_INFO_VALID_MASK)) ==
- (INTR_TYPE_HARD_EXCEPTION | MC_VECTOR | INTR_INFO_VALID_MASK);
+ return is_exception_n(intr_info, MC_VECTOR);
}
/* Undocumented: icebp/int1 */
static inline bool is_icebp(u32 intr_info)
{
- return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
- == (INTR_TYPE_PRIV_SW_EXCEPTION | INTR_INFO_VALID_MASK);
+ return is_intr_type(intr_info, INTR_TYPE_PRIV_SW_EXCEPTION);
}
static inline bool is_nmi(u32 intr_info)
{
- return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
- == (INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK);
+ return is_intr_type(intr_info, INTR_TYPE_NMI_INTR);
}
static inline bool is_external_intr(u32 intr_info)
{
- return (intr_info & (INTR_INFO_VALID_MASK | INTR_INFO_INTR_TYPE_MASK))
- == (INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR);
+ return is_intr_type(intr_info, INTR_TYPE_EXT_INTR);
}
enum vmcs_field_width {
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 36c771728c8c..cb22f33bf1d8 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6606,23 +6606,6 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx)
msrs[i].host, false);
}
-static void atomic_switch_umwait_control_msr(struct vcpu_vmx *vmx)
-{
- u32 host_umwait_control;
-
- if (!vmx_has_waitpkg(vmx))
- return;
-
- host_umwait_control = get_umwait_control_msr();
-
- if (vmx->msr_ia32_umwait_control != host_umwait_control)
- add_atomic_switch_msr(vmx, MSR_IA32_UMWAIT_CONTROL,
- vmx->msr_ia32_umwait_control,
- host_umwait_control, false);
- else
- clear_atomic_switch_msr(vmx, MSR_IA32_UMWAIT_CONTROL);
-}
-
static void vmx_update_hv_timer(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -6728,9 +6711,7 @@ reenter_guest:
pt_guest_enter(vmx);
- if (vcpu_to_pmu(vcpu)->version)
- atomic_switch_perf_msrs(vmx);
- atomic_switch_umwait_control_msr(vmx);
+ atomic_switch_perf_msrs(vmx);
if (enable_preemption_timer)
vmx_update_hv_timer(vcpu);
@@ -7501,11 +7482,11 @@ static void vmx_flush_log_dirty(struct kvm *kvm)
kvm_flush_pml_buffers(kvm);
}
-static int vmx_write_pml_buffer(struct kvm_vcpu *vcpu)
+static int vmx_write_pml_buffer(struct kvm_vcpu *vcpu, gpa_t gpa)
{
struct vmcs12 *vmcs12;
struct vcpu_vmx *vmx = to_vmx(vcpu);
- gpa_t gpa, dst;
+ gpa_t dst;
if (is_guest_mode(vcpu)) {
WARN_ON_ONCE(vmx->nested.pml_full);
@@ -7524,7 +7505,7 @@ static int vmx_write_pml_buffer(struct kvm_vcpu *vcpu)
return 1;
}
- gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS) & ~0xFFFull;
+ gpa &= ~0xFFFull;
dst = vmcs12->pml_address + sizeof(u64) * vmcs12->guest_pml_index;
if (kvm_write_guest_page(vcpu->kvm, gpa_to_gfn(dst), &gpa,
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 8a83b5edc820..639798e4a6ca 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -288,8 +288,6 @@ struct vcpu_vmx {
u64 current_tsc_ratio;
- u32 host_pkru;
-
unsigned long host_debugctlmsr;
/*
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 00c88c2f34e4..3b92db412335 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2856,7 +2856,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return kvm_mtrr_set_msr(vcpu, msr, data);
case MSR_IA32_APICBASE:
return kvm_set_apic_base(vcpu, msr_info);
- case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
+ case APIC_BASE_MSR ... APIC_BASE_MSR + 0xff:
return kvm_x2apic_msr_write(vcpu, msr, data);
case MSR_IA32_TSCDEADLINE:
kvm_set_lapic_tscdeadline_msr(vcpu, data);
@@ -3196,7 +3196,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_IA32_APICBASE:
msr_info->data = kvm_get_apic_base(vcpu);
break;
- case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
+ case APIC_BASE_MSR ... APIC_BASE_MSR + 0xff:
return kvm_x2apic_msr_read(vcpu, msr_info->index, &msr_info->data);
case MSR_IA32_TSCDEADLINE:
msr_info->data = kvm_get_lapic_tscdeadline_msr(vcpu);
@@ -4603,7 +4603,8 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
r = -EINVAL;
user_tsc_khz = (u32)arg;
- if (user_tsc_khz >= kvm_max_guest_tsc_khz)
+ if (kvm_has_tsc_control &&
+ user_tsc_khz >= kvm_max_guest_tsc_khz)
goto out;
if (user_tsc_khz == 0)
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index 56b243b14c3a..bbcc05bcefad 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -8,6 +8,8 @@
#include <asm/alternative-asm.h>
#include <asm/export.h>
+.pushsection .noinstr.text, "ax"
+
/*
* We build a jump to memcpy_orig by default which gets NOPped out on
* the majority of x86 CPUs which set REP_GOOD. In addition, CPUs which
@@ -184,6 +186,8 @@ SYM_FUNC_START_LOCAL(memcpy_orig)
retq
SYM_FUNC_END(memcpy_orig)
+.popsection
+
#ifndef CONFIG_UML
MCSAFE_TEST_CTL
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index fff28c6f73a2..b0dfac3d3df7 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -24,6 +24,7 @@ unsigned long __clear_user(void __user *addr, unsigned long size)
asm volatile(
" testq %[size8],%[size8]\n"
" jz 4f\n"
+ " .align 16\n"
"0: movq $0,(%[dst])\n"
" addq $8,%[dst]\n"
" decl %%ecx ; jnz 0b\n"
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 7c65102debaf..db1378c6ff26 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -193,6 +193,8 @@ static void fix_processor_context(void)
*/
static void notrace __restore_processor_state(struct saved_context *ctxt)
{
+ struct cpuinfo_x86 *c;
+
if (ctxt->misc_enable_saved)
wrmsrl(MSR_IA32_MISC_ENABLE, ctxt->misc_enable);
/*
@@ -263,6 +265,10 @@ static void notrace __restore_processor_state(struct saved_context *ctxt)
mtrr_bp_restore();
perf_restore_debug_store();
msr_restore_context(ctxt);
+
+ c = &cpu_data(smp_processor_id());
+ if (cpu_has(c, X86_FEATURE_MSR_IA32_FEAT_CTL))
+ init_ia32_feat_ctl(c);
}
/* Needed by apm.c */
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index acc49fa6a097..0d68948c82ad 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -598,6 +598,26 @@ static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum,
}
#ifdef CONFIG_X86_64
+void noist_exc_debug(struct pt_regs *regs);
+
+DEFINE_IDTENTRY_RAW(xenpv_exc_nmi)
+{
+ /* On Xen PV, NMI doesn't use IST. The C part is the sane as native. */
+ exc_nmi(regs);
+}
+
+DEFINE_IDTENTRY_RAW(xenpv_exc_debug)
+{
+ /*
+ * There's no IST on Xen PV, but we still need to dispatch
+ * to the correct handler.
+ */
+ if (user_mode(regs))
+ noist_exc_debug(regs);
+ else
+ exc_debug(regs);
+}
+
struct trap_array_entry {
void (*orig)(void);
void (*xen)(void);
@@ -609,18 +629,18 @@ struct trap_array_entry {
.xen = xen_asm_##func, \
.ist_okay = ist_ok }
-#define TRAP_ENTRY_REDIR(func, xenfunc, ist_ok) { \
+#define TRAP_ENTRY_REDIR(func, ist_ok) { \
.orig = asm_##func, \
- .xen = xen_asm_##xenfunc, \
+ .xen = xen_asm_xenpv_##func, \
.ist_okay = ist_ok }
static struct trap_array_entry trap_array[] = {
- TRAP_ENTRY_REDIR(exc_debug, exc_xendebug, true ),
+ TRAP_ENTRY_REDIR(exc_debug, true ),
TRAP_ENTRY(exc_double_fault, true ),
#ifdef CONFIG_X86_MCE
TRAP_ENTRY(exc_machine_check, true ),
#endif
- TRAP_ENTRY_REDIR(exc_nmi, exc_xennmi, true ),
+ TRAP_ENTRY_REDIR(exc_nmi, true ),
TRAP_ENTRY(exc_int3, false ),
TRAP_ENTRY(exc_overflow, false ),
#ifdef CONFIG_IA32_EMULATION
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
index 5d252aaeade8..aab1d99b2b48 100644
--- a/arch/x86/xen/xen-asm_64.S
+++ b/arch/x86/xen/xen-asm_64.S
@@ -29,10 +29,9 @@ _ASM_NOKPROBE(xen_\name)
.endm
xen_pv_trap asm_exc_divide_error
-xen_pv_trap asm_exc_debug
-xen_pv_trap asm_exc_xendebug
+xen_pv_trap asm_xenpv_exc_debug
xen_pv_trap asm_exc_int3
-xen_pv_trap asm_exc_xennmi
+xen_pv_trap asm_xenpv_exc_nmi
xen_pv_trap asm_exc_overflow
xen_pv_trap asm_exc_bounds
xen_pv_trap asm_exc_invalid_op
@@ -161,10 +160,22 @@ SYM_FUNC_END(xen_syscall32_target)
/* 32-bit compat sysenter target */
SYM_FUNC_START(xen_sysenter_target)
- mov 0*8(%rsp), %rcx
- mov 1*8(%rsp), %r11
- mov 5*8(%rsp), %rsp
- jmp entry_SYSENTER_compat
+ /*
+ * NB: Xen is polite and clears TF from EFLAGS for us. This means
+ * that we don't need to guard against single step exceptions here.
+ */
+ popq %rcx
+ popq %r11
+
+ /*
+ * Neither Xen nor the kernel really knows what the old SS and
+ * CS were. The kernel expects __USER32_DS and __USER32_CS, so
+ * report those values even though Xen will guess its own values.
+ */
+ movq $__USER32_DS, 4*8(%rsp)
+ movq $__USER32_CS, 1*8(%rsp)
+
+ jmp entry_SYSENTER_compat_after_hwframe
SYM_FUNC_END(xen_sysenter_target)
#else /* !CONFIG_IA32_EMULATION */