Merge tag 'x86_cpu_for_v5.10' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 cpu updates from Borislav Petkov: - Add support for hardware-enforced cache coherency on AMD which obviates the need to flush cachelines before changing the PTE encryption bit (Krish Sadhukhan) - Add Centaur initialization support for families >= 7 (Tony W Wang-oc) - Add a feature flag for, and expose TSX suspend load tracking feature to KVM (Cathy Zhang) - Emulate SLDT and STR so that windows programs don't crash on UMIP machines (Brendan Shanks and Ricardo Neri) - Use the new SERIALIZE insn on Intel hardware which supports it (Ricardo Neri) - Misc cleanups and fixes * tag 'x86_cpu_for_v5.10' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: KVM: SVM: Don't flush cache if hardware enforces cache coherency across encryption domains x86/mm/pat: Don't flush cache if hardware enforces cache coherency across encryption domnains x86/cpu: Add hardware-enforced cache coherency as a CPUID feature x86/cpu/centaur: Add Centaur family >=7 CPUs initialization support x86/cpu/centaur: Replace two-condition switch-case with an if statement x86/kvm: Expose TSX Suspend Load Tracking feature x86/cpufeatures: Enumerate TSX suspend load address tracking instructions x86/umip: Add emulation/spoofing for SLDT and STR instructions x86/cpu: Fix typos and improve the comments in sync_core() x86/cpu: Use XGETBV and XSETBV mnemonics in fpu/internal.h x86/cpu: Use SERIALIZE in sync_core() when available
author: Linus Torvalds <torvalds@linux-foundation.org> 2020-10-12 10:24:40 -0700
committer: Linus Torvalds <torvalds@linux-foundation.org> 2020-10-12 10:24:40 -0700
commit: 92a0610b6acd3bfdc977b612853ba6711447e887 (patch)
tree: 43ef29eb21ba8a33e5de80242ce80b9ed13a27df /arch/x86/include
parent: ca1b66922a702316734bcd5ea2100e5fb8f3caa3 (diff)
parent: e1ebb2b49048c4767cfa0d8466f9c701e549fa5e (diff)
download: lwn-92a0610b6acd3bfdc977b612853ba6711447e887.tar.gz
lwn-92a0610b6acd3bfdc977b612853ba6711447e887.zip
4 files changed, 32 insertions, 18 deletions
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 2901d5df4366..50b2a8d85ef0 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -96,7 +96,7 @@
 #define X86_FEATURE_SYSCALL32		( 3*32+14) /* "" syscall in IA32 userspace */
 #define X86_FEATURE_SYSENTER32		( 3*32+15) /* "" sysenter in IA32 userspace */
 #define X86_FEATURE_REP_GOOD		( 3*32+16) /* REP microcode works well */
-/* free					( 3*32+17) */
+#define X86_FEATURE_SME_COHERENT	( 3*32+17) /* "" AMD hardware-enforced cache coherency */
 #define X86_FEATURE_LFENCE_RDTSC	( 3*32+18) /* "" LFENCE synchronizes RDTSC */
 #define X86_FEATURE_ACC_POWER		( 3*32+19) /* AMD Accumulated Power Mechanism */
 #define X86_FEATURE_NOPL		( 3*32+20) /* The NOPL (0F 1F) instructions */
@@ -368,6 +368,7 @@
 #define X86_FEATURE_MD_CLEAR		(18*32+10) /* VERW clears CPU buffers */
 #define X86_FEATURE_TSX_FORCE_ABORT	(18*32+13) /* "" TSX_FORCE_ABORT */
 #define X86_FEATURE_SERIALIZE		(18*32+14) /* SERIALIZE instruction */
+#define X86_FEATURE_TSXLDTRK		(18*32+16) /* TSX Suspend Load Address Tracking */
 #define X86_FEATURE_PCONFIG		(18*32+18) /* Intel PCONFIG */
 #define X86_FEATURE_ARCH_LBR		(18*32+19) /* Intel ARCH LBR */
 #define X86_FEATURE_SPEC_CTRL		(18*32+26) /* "" Speculation Control (IBRS + IBPB) */
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 0a460f2a3f90..21a8b5259477 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -602,9 +602,7 @@ static inline u64 xgetbv(u32 index)
 {
 	u32 eax, edx;
 
-	asm volatile(".byte 0x0f,0x01,0xd0" /* xgetbv */
-		     : "=a" (eax), "=d" (edx)
-		     : "c" (index));
+	asm volatile("xgetbv" : "=a" (eax), "=d" (edx) : "c" (index));
 	return eax + ((u64)edx << 32);
 }
 
@@ -613,8 +611,7 @@ static inline void xsetbv(u32 index, u64 value)
 	u32 eax = value;
 	u32 edx = value >> 32;
 
-	asm volatile(".byte 0x0f,0x01,0xd1" /* xsetbv */
-		     : : "a" (eax), "d" (edx), "c" (index));
+	asm volatile("xsetbv" :: "a" (eax), "d" (edx), "c" (index));
 }
 
 #endif /* _ASM_X86_FPU_INTERNAL_H */
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index 59a3e13204c3..5999b0b3dd4a 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -234,6 +234,12 @@ static inline void clwb(volatile void *__p)
 
 #define nop() asm volatile ("nop")
 
+static inline void serialize(void)
+{
+	/* Instruction opcode for SERIALIZE; supported in binutils >= 2.35. */
+	asm volatile(".byte 0xf, 0x1, 0xe8" ::: "memory");
+}
+
 #endif /* __KERNEL__ */
 
 #endif /* _ASM_X86_SPECIAL_INSNS_H */
diff --git a/arch/x86/include/asm/sync_core.h b/arch/x86/include/asm/sync_core.h
index fdb5b356e59b..0fd4a9dfb29c 100644
--- a/arch/x86/include/asm/sync_core.h
+++ b/arch/x86/include/asm/sync_core.h
@@ -5,6 +5,7 @@
 #include <linux/preempt.h>
 #include <asm/processor.h>
 #include <asm/cpufeature.h>
+#include <asm/special_insns.h>
 
 #ifdef CONFIG_X86_32
 static inline void iret_to_self(void)
@@ -46,22 +47,34 @@ static inline void iret_to_self(void)
  *
  *  b) Text was modified on a different CPU, may subsequently be
  *     executed on this CPU, and you want to make sure the new version
- *     gets executed.  This generally means you're calling this in a IPI.
+ *     gets executed.  This generally means you're calling this in an IPI.
  *
  * If you're calling this for a different reason, you're probably doing
  * it wrong.
+ *
+ * Like all of Linux's memory ordering operations, this is a
+ * compiler barrier as well.
  */
 static inline void sync_core(void)
 {
 	/*
-	 * There are quite a few ways to do this.  IRET-to-self is nice
-	 * because it works on every CPU, at any CPL (so it's compatible
-	 * with paravirtualization), and it never exits to a hypervisor.
-	 * The only down sides are that it's a bit slow (it seems to be
-	 * a bit more than 2x slower than the fastest options) and that
-	 * it unmasks NMIs.  The "push %cs" is needed because, in
-	 * paravirtual environments, __KERNEL_CS may not be a valid CS
-	 * value when we do IRET directly.
+	 * The SERIALIZE instruction is the most straightforward way to
+	 * do this, but it is not universally available.
+	 */
+	if (static_cpu_has(X86_FEATURE_SERIALIZE)) {
+		serialize();
+		return;
+	}
+
+	/*
+	 * For all other processors, there are quite a few ways to do this.
+	 * IRET-to-self is nice because it works on every CPU, at any CPL
+	 * (so it's compatible with paravirtualization), and it never exits
+	 * to a hypervisor.  The only downsides are that it's a bit slow
+	 * (it seems to be a bit more than 2x slower than the fastest
+	 * options) and that it unmasks NMIs.  The "push %cs" is needed,
+	 * because in paravirtual environments __KERNEL_CS may not be a
+	 * valid CS value when we do IRET directly.
 	 *
 	 * In case NMI unmasking or performance ever becomes a problem,
 	 * the next best option appears to be MOV-to-CR2 and an
@@ -71,9 +84,6 @@ static inline void sync_core(void)
 	 * CPUID is the conventional way, but it's nasty: it doesn't
 	 * exist on some 486-like CPUs, and it usually exits to a
 	 * hypervisor.
-	 *
-	 * Like all of Linux's memory ordering operations, this is a
-	 * compiler barrier as well.
 	 */
 	iret_to_self();
 }
author	Linus Torvalds <torvalds@linux-foundation.org>	2020-10-12 10:24:40 -0700
committer	Linus Torvalds <torvalds@linux-foundation.org>	2020-10-12 10:24:40 -0700
commit	92a0610b6acd3bfdc977b612853ba6711447e887 (patch)
tree	43ef29eb21ba8a33e5de80242ce80b9ed13a27df /arch/x86/include
parent	ca1b66922a702316734bcd5ea2100e5fb8f3caa3 (diff)
parent	e1ebb2b49048c4767cfa0d8466f9c701e549fa5e (diff)
download	lwn-92a0610b6acd3bfdc977b612853ba6711447e887.tar.gz lwn-92a0610b6acd3bfdc977b612853ba6711447e887.zip