77 files changed, 4125 insertions, 1081 deletions
diff --git a/tools/testing/selftests/kvm/x86/amx_test.c b/tools/testing/selftests/kvm/x86/amx_test.c
index f4ce5a185a7d..4e63da2b1889 100644
--- a/tools/testing/selftests/kvm/x86/amx_test.c
+++ b/tools/testing/selftests/kvm/x86/amx_test.c
@@ -69,15 +69,21 @@ static inline void __tileloadd(void *tile)
 		     : : "a"(tile), "d"(0));
 }
 
+static inline int tileloadd_safe(void *tile)
+{
+	return kvm_asm_safe(".byte 0xc4,0xe2,0x7b,0x4b,0x04,0x10",
+			    "a"(tile), "d"(0));
+}
+
 static inline void __tilerelease(void)
 {
 	asm volatile(".byte 0xc4, 0xe2, 0x78, 0x49, 0xc0" ::);
 }
 
-static inline void __xsavec(struct xstate *xstate, uint64_t rfbm)
+static inline void __xsavec(struct xstate *xstate, u64 rfbm)
 {
-	uint32_t rfbm_lo = rfbm;
-	uint32_t rfbm_hi = rfbm >> 32;
+	u32 rfbm_lo = rfbm;
+	u32 rfbm_hi = rfbm >> 32;
 
 	asm volatile("xsavec (%%rdi)"
 		     : : "D" (xstate), "a" (rfbm_lo), "d" (rfbm_hi)
@@ -124,27 +130,52 @@ static void set_tilecfg(struct tile_config *cfg)
 	}
 }
 
+enum {
+	/* Retrieve TMM0 from guest, stash it for TEST_RESTORE_TILEDATA */
+	TEST_SAVE_TILEDATA = 1,
+
+	/* Check TMM0 against tiledata */
+	TEST_COMPARE_TILEDATA = 2,
+
+	/* Restore TMM0 from earlier save */
+	TEST_RESTORE_TILEDATA = 4,
+
+	/* Full VM save/restore */
+	TEST_SAVE_RESTORE = 8,
+};
+
 static void __attribute__((__flatten__)) guest_code(struct tile_config *amx_cfg,
 						    struct tile_data *tiledata,
 						    struct xstate *xstate)
 {
+	int vector;
+
 	GUEST_ASSERT(this_cpu_has(X86_FEATURE_XSAVE) &&
 		     this_cpu_has(X86_FEATURE_OSXSAVE));
 	check_xtile_info();
-	GUEST_SYNC(1);
+	GUEST_SYNC(TEST_SAVE_RESTORE);
 
 	/* xfd=0, enable amx */
 	wrmsr(MSR_IA32_XFD, 0);
-	GUEST_SYNC(2);
+	GUEST_SYNC(TEST_SAVE_RESTORE);
 	GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == 0);
 	set_tilecfg(amx_cfg);
 	__ldtilecfg(amx_cfg);
-	GUEST_SYNC(3);
+	GUEST_SYNC(TEST_SAVE_RESTORE);
 	/* Check save/restore when trap to userspace */
 	__tileloadd(tiledata);
-	GUEST_SYNC(4);
+	GUEST_SYNC(TEST_SAVE_TILEDATA | TEST_COMPARE_TILEDATA | TEST_SAVE_RESTORE);
+
+	/* xfd=0x40000, disable amx tiledata */
+	wrmsr(MSR_IA32_XFD, XFEATURE_MASK_XTILE_DATA);
+
+	/* host tries setting tiledata while guest XFD is set */
+	GUEST_SYNC(TEST_RESTORE_TILEDATA);
+	GUEST_SYNC(TEST_SAVE_RESTORE);
+
+	wrmsr(MSR_IA32_XFD, 0);
 	__tilerelease();
-	GUEST_SYNC(5);
+	GUEST_SYNC(TEST_SAVE_RESTORE);
 	/*
 	 * After XSAVEC, XTILEDATA is cleared in the xstate_bv but is set in
 	 * the xcomp_bv.
@@ -154,6 +185,8 @@ static void __attribute__((__flatten__)) guest_code(struct tile_config *amx_cfg,
 	GUEST_ASSERT(!(xstate->header.xstate_bv & XFEATURE_MASK_XTILE_DATA));
 	GUEST_ASSERT(xstate->header.xcomp_bv & XFEATURE_MASK_XTILE_DATA);
 
+	/* #NM test */
+
 	/* xfd=0x40000, disable amx tiledata */
 	wrmsr(MSR_IA32_XFD, XFEATURE_MASK_XTILE_DATA);
 
@@ -166,32 +199,33 @@ static void __attribute__((__flatten__)) guest_code(struct tile_config *amx_cfg,
 	GUEST_ASSERT(!(xstate->header.xstate_bv & XFEATURE_MASK_XTILE_DATA));
 	GUEST_ASSERT((xstate->header.xcomp_bv & XFEATURE_MASK_XTILE_DATA));
 
-	GUEST_SYNC(6);
+	GUEST_SYNC(TEST_SAVE_RESTORE);
 	GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == XFEATURE_MASK_XTILE_DATA);
 	set_tilecfg(amx_cfg);
 	__ldtilecfg(amx_cfg);
-	/* Trigger #NM exception */
-	__tileloadd(tiledata);
-	GUEST_SYNC(10);
 
-	GUEST_DONE();
-}
+	/* Trigger #NM exception */
+	vector = tileloadd_safe(tiledata);
+	__GUEST_ASSERT(vector == NM_VECTOR,
+		       "Wanted #NM on tileloadd with XFD[18]=1, got %s",
+		       ex_str(vector));
 
-void guest_nm_handler(struct ex_regs *regs)
-{
-	/* Check if #NM is triggered by XFEATURE_MASK_XTILE_DATA */
-	GUEST_SYNC(7);
 	GUEST_ASSERT(!(get_cr0() & X86_CR0_TS));
 	GUEST_ASSERT(rdmsr(MSR_IA32_XFD_ERR) == XFEATURE_MASK_XTILE_DATA);
 	GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == XFEATURE_MASK_XTILE_DATA);
-	GUEST_SYNC(8);
+	GUEST_SYNC(TEST_SAVE_RESTORE);
 	GUEST_ASSERT(rdmsr(MSR_IA32_XFD_ERR) == XFEATURE_MASK_XTILE_DATA);
 	GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == XFEATURE_MASK_XTILE_DATA);
 	/* Clear xfd_err */
 	wrmsr(MSR_IA32_XFD_ERR, 0);
 	/* xfd=0, enable amx */
 	wrmsr(MSR_IA32_XFD, 0);
-	GUEST_SYNC(9);
+	GUEST_SYNC(TEST_SAVE_RESTORE);
+
+	__tileloadd(tiledata);
+	GUEST_SYNC(TEST_COMPARE_TILEDATA | TEST_SAVE_RESTORE);
+
+	GUEST_DONE();
 }
 
 int main(int argc, char *argv[])
@@ -200,10 +234,10 @@ int main(int argc, char *argv[])
 	struct kvm_vcpu *vcpu;
 	struct kvm_vm *vm;
 	struct kvm_x86_state *state;
+	struct kvm_x86_state *tile_state = NULL;
 	int xsave_restore_size;
-	vm_vaddr_t amx_cfg, tiledata, xstate;
+	gva_t amx_cfg, tiledata, xstate;
 	struct ucall uc;
-	u32 amx_offset;
 	int ret;
 
 	/*
@@ -228,22 +262,20 @@ int main(int argc, char *argv[])
 
 	vcpu_regs_get(vcpu, &regs1);
 
-	/* Register #NM handler */
-	vm_install_exception_handler(vm, NM_VECTOR, guest_nm_handler);
-
 	/* amx cfg for guest_code */
-	amx_cfg = vm_vaddr_alloc_page(vm);
+	amx_cfg = vm_alloc_page(vm);
 	memset(addr_gva2hva(vm, amx_cfg), 0x0, getpagesize());
 
 	/* amx tiledata for guest_code */
-	tiledata = vm_vaddr_alloc_pages(vm, 2);
+	tiledata = vm_alloc_pages(vm, 2);
 	memset(addr_gva2hva(vm, tiledata), rand() | 1, 2 * getpagesize());
 
 	/* XSAVE state for guest_code */
-	xstate = vm_vaddr_alloc_pages(vm, DIV_ROUND_UP(XSAVE_SIZE, PAGE_SIZE));
+	xstate = vm_alloc_pages(vm, DIV_ROUND_UP(XSAVE_SIZE, PAGE_SIZE));
 	memset(addr_gva2hva(vm, xstate), 0, PAGE_SIZE * DIV_ROUND_UP(XSAVE_SIZE, PAGE_SIZE));
 	vcpu_args_set(vcpu, 3, amx_cfg, tiledata, xstate);
 
+	int iter = 0;
 	for (;;) {
 		vcpu_run(vcpu);
 		TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
@@ -253,37 +285,47 @@ int main(int argc, char *argv[])
 			REPORT_GUEST_ASSERT(uc);
 			/* NOT REACHED */
 		case UCALL_SYNC:
-			switch (uc.args[1]) {
-			case 1:
-			case 2:
-			case 3:
-			case 5:
-			case 6:
-			case 7:
-			case 8:
-				fprintf(stderr, "GUEST_SYNC(%ld)\n", uc.args[1]);
-				break;
-			case 4:
-			case 10:
-				fprintf(stderr,
-				"GUEST_SYNC(%ld), check save/restore status\n", uc.args[1]);
+			++iter;
+			if (uc.args[1] & TEST_SAVE_TILEDATA) {
+				fprintf(stderr, "GUEST_SYNC #%d, save tiledata\n", iter);
+				tile_state = vcpu_save_state(vcpu);
+			}
+			if (uc.args[1] & TEST_COMPARE_TILEDATA) {
+				fprintf(stderr, "GUEST_SYNC #%d, check TMM0 contents\n", iter);
 
 				/* Compacted mode, get amx offset by xsave area
 				 * size subtract 8K amx size.
 				 */
-				amx_offset = xsave_restore_size - NUM_TILES*TILE_SIZE;
-				state = vcpu_save_state(vcpu);
-				void *amx_start = (void *)state->xsave + amx_offset;
+				u32 amx_offset = xsave_restore_size - NUM_TILES*TILE_SIZE;
+				void *amx_start = (void *)tile_state->xsave + amx_offset;
 				void *tiles_data = (void *)addr_gva2hva(vm, tiledata);
 				/* Only check TMM0 register, 1 tile */
 				ret = memcmp(amx_start, tiles_data, TILE_SIZE);
 				TEST_ASSERT(ret == 0, "memcmp failed, ret=%d", ret);
+			}
+			if (uc.args[1] & TEST_RESTORE_TILEDATA) {
+				fprintf(stderr, "GUEST_SYNC #%d, before KVM_SET_XSAVE\n", iter);
+				vcpu_xsave_set(vcpu, tile_state->xsave);
+				fprintf(stderr, "GUEST_SYNC #%d, after KVM_SET_XSAVE\n", iter);
+			}
+			if (uc.args[1] & TEST_SAVE_RESTORE) {
+				fprintf(stderr, "GUEST_SYNC #%d, save/restore VM state\n", iter);
+				state = vcpu_save_state(vcpu);
+				memset(&regs1, 0, sizeof(regs1));
+				vcpu_regs_get(vcpu, &regs1);
+
+				kvm_vm_release(vm);
+
+				/* Restore state in a new VM.  */
+				vcpu = vm_recreate_with_one_vcpu(vm);
+				vcpu_load_state(vcpu, state);
 				kvm_x86_state_cleanup(state);
-				break;
-			case 9:
-				fprintf(stderr,
-				"GUEST_SYNC(%ld), #NM exception and enable amx\n", uc.args[1]);
-				break;
+
+				memset(&regs2, 0, sizeof(regs2));
+				vcpu_regs_get(vcpu, &regs2);
+				TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
+					    "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
+					    (ulong) regs2.rdi, (ulong) regs2.rsi);
 			}
 			break;
 		case UCALL_DONE:
@@ -293,22 +335,6 @@ int main(int argc, char *argv[])
 			TEST_FAIL("Unknown ucall %lu", uc.cmd);
 		}
 
-		state = vcpu_save_state(vcpu);
-		memset(&regs1, 0, sizeof(regs1));
-		vcpu_regs_get(vcpu, &regs1);
-
-		kvm_vm_release(vm);
-
-		/* Restore state in a new VM.  */
-		vcpu = vm_recreate_with_one_vcpu(vm);
-		vcpu_load_state(vcpu, state);
-		kvm_x86_state_cleanup(state);
-
-		memset(&regs2, 0, sizeof(regs2));
-		vcpu_regs_get(vcpu, &regs2);
-		TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
-			    "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
-			    (ulong) regs2.rdi, (ulong) regs2.rsi);
 	}
 done:
 	kvm_vm_free(vm);
diff --git a/tools/testing/selftests/kvm/x86/aperfmperf_test.c b/tools/testing/selftests/kvm/x86/aperfmperf_test.c
new file mode 100644
index 000000000000..c91660103137
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/aperfmperf_test.c
@@ -0,0 +1,213 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Test for KVM_X86_DISABLE_EXITS_APERFMPERF
+ *
+ * Copyright (C) 2025, Google LLC.
+ *
+ * Test the ability to disable VM-exits for rdmsr of IA32_APERF and
+ * IA32_MPERF. When these VM-exits are disabled, reads of these MSRs
+ * return the host's values.
+ *
+ * Note: Requires read access to /dev/cpu/<lpu>/msr to read host MSRs.
+ */
+
+#include <fcntl.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <asm/msr-index.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+#include "test_util.h"
+#include "vmx.h"
+
+#define NUM_ITERATIONS 10000
+
+static int open_dev_msr(int cpu)
+{
+	char path[PATH_MAX];
+
+	snprintf(path, sizeof(path), "/dev/cpu/%d/msr", cpu);
+	return open_path_or_exit(path, O_RDONLY);
+}
+
+static u64 read_dev_msr(int msr_fd, u32 msr)
+{
+	u64 data;
+	ssize_t rc;
+
+	rc = pread(msr_fd, &data, sizeof(data), msr);
+	TEST_ASSERT(rc == sizeof(data), "Read of MSR 0x%x failed", msr);
+
+	return data;
+}
+
+static void guest_read_aperf_mperf(void)
+{
+	int i;
+
+	for (i = 0; i < NUM_ITERATIONS; i++)
+		GUEST_SYNC2(rdmsr(MSR_IA32_APERF), rdmsr(MSR_IA32_MPERF));
+}
+
+#define L2_GUEST_STACK_SIZE	64
+
+static void l2_guest_code(void)
+{
+	guest_read_aperf_mperf();
+	GUEST_DONE();
+}
+
+static void l1_svm_code(struct svm_test_data *svm)
+{
+	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+	struct vmcb *vmcb = svm->vmcb;
+
+	generic_svm_setup(svm, l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+	run_guest(vmcb, svm->vmcb_gpa);
+}
+
+static void l1_vmx_code(struct vmx_pages *vmx)
+{
+	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+	GUEST_ASSERT_EQ(prepare_for_vmx_operation(vmx), true);
+	GUEST_ASSERT_EQ(load_vmcs(vmx), true);
+
+	prepare_vmcs(vmx, NULL, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+	/*
+	 * Enable MSR bitmaps (the bitmap itself is allocated, zeroed, and set
+	 * in the VMCS by prepare_vmcs()), as MSR exiting mandatory on Intel.
+	 */
+	vmwrite(CPU_BASED_VM_EXEC_CONTROL,
+		vmreadz(CPU_BASED_VM_EXEC_CONTROL) | CPU_BASED_USE_MSR_BITMAPS);
+
+	GUEST_ASSERT(!vmwrite(GUEST_RIP, (u64)l2_guest_code));
+	GUEST_ASSERT(!vmlaunch());
+}
+
+static void guest_code(void *nested_test_data)
+{
+	guest_read_aperf_mperf();
+
+	if (this_cpu_has(X86_FEATURE_SVM))
+		l1_svm_code(nested_test_data);
+	else if (this_cpu_has(X86_FEATURE_VMX))
+		l1_vmx_code(nested_test_data);
+	else
+		GUEST_DONE();
+
+	TEST_FAIL("L2 should have signaled 'done'");
+}
+
+static void guest_no_aperfmperf(void)
+{
+	u64 msr_val;
+	u8 vector;
+
+	vector = rdmsr_safe(MSR_IA32_APERF, &msr_val);
+	GUEST_ASSERT(vector == GP_VECTOR);
+
+	vector = rdmsr_safe(MSR_IA32_APERF, &msr_val);
+	GUEST_ASSERT(vector == GP_VECTOR);
+
+	GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+	const bool has_nested = kvm_cpu_has(X86_FEATURE_SVM) || kvm_cpu_has(X86_FEATURE_VMX);
+	u64 host_aperf_before, host_mperf_before;
+	gva_t nested_test_data_gva;
+	struct kvm_vcpu *vcpu;
+	struct kvm_vm *vm;
+	int msr_fd, cpu, i;
+
+	/* Sanity check that APERF/MPERF are unsupported by default. */
+	vm = vm_create_with_one_vcpu(&vcpu, guest_no_aperfmperf);
+	vcpu_run(vcpu);
+	TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
+	kvm_vm_free(vm);
+
+	cpu = pin_self_to_any_cpu();
+
+	msr_fd = open_dev_msr(cpu);
+
+	/*
+	 * This test requires a non-standard VM initialization, because
+	 * KVM_ENABLE_CAP cannot be used on a VM file descriptor after
+	 * a VCPU has been created.
+	 */
+	vm = vm_create(1);
+
+	TEST_REQUIRE(vm_check_cap(vm, KVM_CAP_X86_DISABLE_EXITS) &
+		     KVM_X86_DISABLE_EXITS_APERFMPERF);
+
+	vm_enable_cap(vm, KVM_CAP_X86_DISABLE_EXITS,
+		      KVM_X86_DISABLE_EXITS_APERFMPERF);
+
+	vcpu = vm_vcpu_add(vm, 0, guest_code);
+
+	if (!has_nested)
+		nested_test_data_gva = NONCANONICAL;
+	else if (kvm_cpu_has(X86_FEATURE_SVM))
+		vcpu_alloc_svm(vm, &nested_test_data_gva);
+	else
+		vcpu_alloc_vmx(vm, &nested_test_data_gva);
+
+	vcpu_args_set(vcpu, 1, nested_test_data_gva);
+
+	host_aperf_before = read_dev_msr(msr_fd, MSR_IA32_APERF);
+	host_mperf_before = read_dev_msr(msr_fd, MSR_IA32_MPERF);
+
+	for (i = 0; i <= NUM_ITERATIONS * (1 + has_nested); i++) {
+		u64 host_aperf_after, host_mperf_after;
+		u64 guest_aperf, guest_mperf;
+		struct ucall uc;
+
+		vcpu_run(vcpu);
+		TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+		switch (get_ucall(vcpu, &uc)) {
+		case UCALL_DONE:
+			goto done;
+		case UCALL_ABORT:
+			REPORT_GUEST_ASSERT(uc);
+		case UCALL_SYNC:
+			guest_aperf = uc.args[0];
+			guest_mperf = uc.args[1];
+
+			host_aperf_after = read_dev_msr(msr_fd, MSR_IA32_APERF);
+			host_mperf_after = read_dev_msr(msr_fd, MSR_IA32_MPERF);
+
+			TEST_ASSERT(host_aperf_before < guest_aperf,
+				    "APERF: host_before (0x%" PRIx64 ") >= guest (0x%" PRIx64 ")",
+				    host_aperf_before, guest_aperf);
+			TEST_ASSERT(guest_aperf < host_aperf_after,
+				    "APERF: guest (0x%" PRIx64 ") >= host_after (0x%" PRIx64 ")",
+				    guest_aperf, host_aperf_after);
+			TEST_ASSERT(host_mperf_before < guest_mperf,
+				    "MPERF: host_before (0x%" PRIx64 ") >= guest (0x%" PRIx64 ")",
+				    host_mperf_before, guest_mperf);
+			TEST_ASSERT(guest_mperf < host_mperf_after,
+				    "MPERF: guest (0x%" PRIx64 ") >= host_after (0x%" PRIx64 ")",
+				    guest_mperf, host_mperf_after);
+
+			host_aperf_before = host_aperf_after;
+			host_mperf_before = host_mperf_after;
+
+			break;
+		}
+	}
+	TEST_FAIL("Didn't receive UCALL_DONE\n");
+done:
+	kvm_vm_free(vm);
+	close(msr_fd);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/apic_bus_clock_test.c b/tools/testing/selftests/kvm/x86/apic_bus_clock_test.c
index f8916bb34405..0c84c27ea584 100644
--- a/tools/testing/selftests/kvm/x86/apic_bus_clock_test.c
+++ b/tools/testing/selftests/kvm/x86/apic_bus_clock_test.c
@@ -19,8 +19,8 @@
  * timer frequency.
  */
 static const struct {
-	const uint32_t tdcr;
-	const uint32_t divide_count;
+	const u32 tdcr;
+	const u32 divide_count;
 } tdcrs[] = {
 	{0x0, 2},
 	{0x1, 4},
@@ -42,12 +42,12 @@ static void apic_enable(void)
 		xapic_enable();
 }
 
-static uint32_t apic_read_reg(unsigned int reg)
+static u32 apic_read_reg(unsigned int reg)
 {
 	return is_x2apic ? x2apic_read_reg(reg) : xapic_read_reg(reg);
 }
 
-static void apic_write_reg(unsigned int reg, uint32_t val)
+static void apic_write_reg(unsigned int reg, u32 val)
 {
 	if (is_x2apic)
 		x2apic_write_reg(reg, val);
@@ -55,12 +55,12 @@ static void apic_write_reg(unsigned int reg, uint32_t val)
 		xapic_write_reg(reg, val);
 }
 
-static void apic_guest_code(uint64_t apic_hz, uint64_t delay_ms)
+static void apic_guest_code(u64 apic_hz, u64 delay_ms)
 {
-	uint64_t tsc_hz = guest_tsc_khz * 1000;
-	const uint32_t tmict = ~0u;
-	uint64_t tsc0, tsc1, freq;
-	uint32_t tmcct;
+	u64 tsc_hz = guest_tsc_khz * 1000;
+	const u32 tmict = ~0u;
+	u64 tsc0, tsc1, freq;
+	u32 tmcct;
 	int i;
 
 	apic_enable();
@@ -121,7 +121,7 @@ static void test_apic_bus_clock(struct kvm_vcpu *vcpu)
 	}
 }
 
-static void run_apic_bus_clock_test(uint64_t apic_hz, uint64_t delay_ms,
+static void run_apic_bus_clock_test(u64 apic_hz, u64 delay_ms,
 				    bool x2apic)
 {
 	struct kvm_vcpu *vcpu;
@@ -137,6 +137,10 @@ static void run_apic_bus_clock_test(uint64_t apic_hz, uint64_t delay_ms,
 	vm_enable_cap(vm, KVM_CAP_X86_APIC_BUS_CYCLES_NS,
 		      NSEC_PER_SEC / apic_hz);
 
+	TEST_ASSERT_EQ(kvm_check_cap(KVM_CAP_X86_APIC_BUS_CYCLES_NS), 1);
+	TEST_ASSERT_EQ(vm_check_cap(vm, KVM_CAP_X86_APIC_BUS_CYCLES_NS),
+		       NSEC_PER_SEC / apic_hz);
+
 	vcpu = vm_vcpu_add(vm, 0, apic_guest_code);
 	vcpu_args_set(vcpu, 2, apic_hz, delay_ms);
 
@@ -168,8 +172,8 @@ int main(int argc, char *argv[])
 	 * Arbitrarilty default to 25MHz for the APIC bus frequency, which is
 	 * different enough from the default 1GHz to be interesting.
 	 */
-	uint64_t apic_hz = 25 * 1000 * 1000;
-	uint64_t delay_ms = 100;
+	u64 apic_hz = 25 * 1000 * 1000;
+	u64 delay_ms = 100;
 	int opt;
 
 	TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_APIC_BUS_CYCLES_NS));
diff --git a/tools/testing/selftests/kvm/x86/cpuid_test.c b/tools/testing/selftests/kvm/x86/cpuid_test.c
index 7b3fda6842bc..ef0ddd240887 100644
--- a/tools/testing/selftests/kvm/x86/cpuid_test.c
+++ b/tools/testing/selftests/kvm/x86/cpuid_test.c
@@ -140,10 +140,10 @@ static void run_vcpu(struct kvm_vcpu *vcpu, int stage)
 	}
 }
 
-struct kvm_cpuid2 *vcpu_alloc_cpuid(struct kvm_vm *vm, vm_vaddr_t *p_gva, struct kvm_cpuid2 *cpuid)
+struct kvm_cpuid2 *vcpu_alloc_cpuid(struct kvm_vm *vm, gva_t *p_gva, struct kvm_cpuid2 *cpuid)
 {
 	int size = sizeof(*cpuid) + cpuid->nent * sizeof(cpuid->entries[0]);
-	vm_vaddr_t gva = vm_vaddr_alloc(vm, size, KVM_UTIL_MIN_VADDR);
+	gva_t gva = vm_alloc(vm, size, KVM_UTIL_MIN_VADDR);
 	struct kvm_cpuid2 *guest_cpuids = addr_gva2hva(vm, gva);
 
 	memcpy(guest_cpuids, cpuid, size);
@@ -155,6 +155,7 @@ struct kvm_cpuid2 *vcpu_alloc_cpuid(struct kvm_vm *vm, vm_vaddr_t *p_gva, struct
 static void set_cpuid_after_run(struct kvm_vcpu *vcpu)
 {
 	struct kvm_cpuid_entry2 *ent;
+	struct kvm_sregs sregs;
 	int rc;
 	u32 eax, ebx, x;
 
@@ -162,6 +163,20 @@ static void set_cpuid_after_run(struct kvm_vcpu *vcpu)
 	rc = __vcpu_set_cpuid(vcpu);
 	TEST_ASSERT(!rc, "Setting unmodified CPUID after KVM_RUN failed: %d", rc);
 
+	/*
+	 * Toggle CR4 bits that affect dynamic CPUID feature flags to verify
+	 * setting unmodified CPUID succeeds with runtime CPUID updates.
+	 */
+	vcpu_sregs_get(vcpu, &sregs);
+	if (kvm_cpu_has(X86_FEATURE_XSAVE))
+		sregs.cr4 ^= X86_CR4_OSXSAVE;
+	if (kvm_cpu_has(X86_FEATURE_PKU))
+		sregs.cr4 ^= X86_CR4_PKE;
+	vcpu_sregs_set(vcpu, &sregs);
+
+	rc = __vcpu_set_cpuid(vcpu);
+	TEST_ASSERT(!rc, "Setting unmodified CPUID after KVM_RUN failed: %d", rc);
+
 	/* Changing CPU features is forbidden */
 	ent = vcpu_get_cpuid_entry(vcpu, 0x7);
 	ebx = ent->ebx;
@@ -202,7 +217,7 @@ static void test_get_cpuid2(struct kvm_vcpu *vcpu)
 int main(void)
 {
 	struct kvm_vcpu *vcpu;
-	vm_vaddr_t cpuid_gva;
+	gva_t cpuid_gva;
 	struct kvm_vm *vm;
 	int stage;
 
diff --git a/tools/testing/selftests/kvm/x86/debug_regs.c b/tools/testing/selftests/kvm/x86/debug_regs.c
index 2d814c1d1dc4..0dfaf03cd0a0 100644
--- a/tools/testing/selftests/kvm/x86/debug_regs.c
+++ b/tools/testing/selftests/kvm/x86/debug_regs.c
@@ -16,7 +16,7 @@
 #define IRQ_VECTOR 0xAA
 
 /* For testing data access debug BP */
-uint32_t guest_value;
+u32 guest_value;
 
 extern unsigned char sw_bp, hw_bp, write_data, ss_start, bd_start;
 
@@ -86,7 +86,7 @@ int main(void)
 	struct kvm_run *run;
 	struct kvm_vm *vm;
 	struct ucall uc;
-	uint64_t cmd;
+	u64 cmd;
 	int i;
 	/* Instruction lengths starting at ss_start */
 	int ss_size[6] = {
diff --git a/tools/testing/selftests/kvm/x86/dirty_log_page_splitting_test.c b/tools/testing/selftests/kvm/x86/dirty_log_page_splitting_test.c
index 2929c067c207..388ba4101f97 100644
--- a/tools/testing/selftests/kvm/x86/dirty_log_page_splitting_test.c
+++ b/tools/testing/selftests/kvm/x86/dirty_log_page_splitting_test.c
@@ -23,7 +23,7 @@
 #define SLOTS		2
 #define ITERATIONS	2
 
-static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE;
+static u64 guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE;
 
 static enum vm_mem_backing_src_type backing_src = VM_MEM_SRC_ANONYMOUS_HUGETLB;
 
@@ -33,17 +33,17 @@ static int iteration;
 static int vcpu_last_completed_iteration[KVM_MAX_VCPUS];
 
 struct kvm_page_stats {
-	uint64_t pages_4k;
-	uint64_t pages_2m;
-	uint64_t pages_1g;
-	uint64_t hugepages;
+	u64 pages_4k;
+	u64 pages_2m;
+	u64 pages_1g;
+	u64 hugepages;
 };
 
 static void get_page_stats(struct kvm_vm *vm, struct kvm_page_stats *stats, const char *stage)
 {
-	stats->pages_4k = vm_get_stat(vm, "pages_4k");
-	stats->pages_2m = vm_get_stat(vm, "pages_2m");
-	stats->pages_1g = vm_get_stat(vm, "pages_1g");
+	stats->pages_4k = vm_get_stat(vm, pages_4k);
+	stats->pages_2m = vm_get_stat(vm, pages_2m);
+	stats->pages_1g = vm_get_stat(vm, pages_1g);
 	stats->hugepages = stats->pages_2m + stats->pages_1g;
 
 	pr_debug("\nPage stats after %s: 4K: %ld 2M: %ld 1G: %ld huge: %ld\n",
@@ -89,9 +89,9 @@ static void run_test(enum vm_guest_mode mode, void *unused)
 {
 	struct kvm_vm *vm;
 	unsigned long **bitmaps;
-	uint64_t guest_num_pages;
-	uint64_t host_num_pages;
-	uint64_t pages_per_slot;
+	u64 guest_num_pages;
+	u64 host_num_pages;
+	u64 pages_per_slot;
 	int i;
 	struct kvm_page_stats stats_populated;
 	struct kvm_page_stats stats_dirty_logging_enabled;
diff --git a/tools/testing/selftests/kvm/x86/evmcs_smm_controls_test.c b/tools/testing/selftests/kvm/x86/evmcs_smm_controls_test.c
new file mode 100644
index 000000000000..5b3aef109cfc
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/evmcs_smm_controls_test.c
@@ -0,0 +1,150 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2026, Red Hat, Inc.
+ *
+ * Test that vmx_leave_smm() validates vmcs12 controls before re-entering
+ * nested guest mode on RSM.
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "smm.h"
+#include "hyperv.h"
+#include "vmx.h"
+
+#define SMRAM_GPA	0x1000000
+#define SMRAM_STAGE	0xfe
+
+#define SYNC_PORT	0xe
+
+#define STR(x) #x
+#define XSTR(s) STR(s)
+
+/*
+ * SMI handler: runs in real-address mode.
+ * Reports SMRAM_STAGE via port IO, then does RSM.
+ */
+static u8 smi_handler[] = {
+	0xb0, SMRAM_STAGE,    /* mov $SMRAM_STAGE, %al */
+	0xe4, SYNC_PORT,      /* in $SYNC_PORT, %al */
+	0x0f, 0xaa,           /* rsm */
+};
+
+static inline void sync_with_host(u64 phase)
+{
+	asm volatile("in $" XSTR(SYNC_PORT) ", %%al \n"
+		     : "+a" (phase));
+}
+
+static void l2_guest_code(void)
+{
+	sync_with_host(1);
+
+	/* After SMI+RSM with invalid controls, we should not reach here. */
+	vmcall();
+}
+
+static void guest_code(struct vmx_pages *vmx_pages,
+		       struct hyperv_test_pages *hv_pages)
+{
+#define L2_GUEST_STACK_SIZE 64
+	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+	/* Set up Hyper-V enlightenments and eVMCS */
+	wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
+	enable_vp_assist(hv_pages->vp_assist_gpa, hv_pages->vp_assist);
+	evmcs_enable();
+
+	GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+	GUEST_ASSERT(load_evmcs(hv_pages));
+	prepare_vmcs(vmx_pages, l2_guest_code,
+		     &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+	GUEST_ASSERT(!vmlaunch());
+
+	/* L2 exits via vmcall if test fails */
+	sync_with_host(2);
+}
+
+int main(int argc, char *argv[])
+{
+	gva_t vmx_pages_gva = 0, hv_pages_gva = 0;
+	struct hyperv_test_pages *hv;
+	struct hv_enlightened_vmcs *evmcs;
+	struct kvm_vcpu *vcpu;
+	struct kvm_vm *vm;
+	struct kvm_regs regs;
+	int stage_reported;
+
+	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+	TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE));
+	TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS));
+	TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_SMM));
+
+	vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+	setup_smram(vm, vcpu, SMRAM_GPA, smi_handler, sizeof(smi_handler));
+
+	vcpu_set_hv_cpuid(vcpu);
+	vcpu_enable_evmcs(vcpu);
+	vcpu_alloc_vmx(vm, &vmx_pages_gva);
+	hv = vcpu_alloc_hyperv_test_pages(vm, &hv_pages_gva);
+	vcpu_args_set(vcpu, 2, vmx_pages_gva, hv_pages_gva);
+
+	vcpu_run(vcpu);
+
+	/* L2 is running and syncs with host.  */
+	TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+	vcpu_regs_get(vcpu, &regs);
+	stage_reported = regs.rax & 0xff;
+	TEST_ASSERT(stage_reported == 1,
+		    "Expected stage 1, got %d", stage_reported);
+
+	/* Inject SMI while L2 is running.  */
+	inject_smi(vcpu);
+	vcpu_run(vcpu);
+	TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+	vcpu_regs_get(vcpu, &regs);
+	stage_reported = regs.rax & 0xff;
+	TEST_ASSERT(stage_reported == SMRAM_STAGE,
+		    "Expected SMM handler stage %#x, got %#x",
+		    SMRAM_STAGE, stage_reported);
+
+	/*
+	 * Guest is now paused in the SMI handler, about to execute RSM.
+	 * Hack the eVMCS page to set-up invalid pin-based execution
+	 * control (PIN_BASED_VIRTUAL_NMIS without PIN_BASED_NMI_EXITING).
+	 */
+	evmcs = hv->enlightened_vmcs_hva;
+	evmcs->pin_based_vm_exec_control |= PIN_BASED_VIRTUAL_NMIS;
+	evmcs->hv_clean_fields = 0;
+
+	/*
+	 * Trigger copy_enlightened_to_vmcs12() via KVM_GET_NESTED_STATE,
+	 * copying the invalid pin_based_vm_exec_control into cached_vmcs12.
+	 */
+	union {
+		struct kvm_nested_state state;
+		char state_[16384];
+	} nested_state_buf;
+
+	memset(&nested_state_buf, 0, sizeof(nested_state_buf));
+	nested_state_buf.state.size = sizeof(nested_state_buf);
+	vcpu_nested_state_get(vcpu, &nested_state_buf.state);
+
+	/*
+	 * Resume the guest.  The SMI handler executes RSM, which calls
+	 * vmx_leave_smm().  nested_vmx_check_controls() should detect
+	 * VIRTUAL_NMIS without NMI_EXITING and cause a triple fault.
+	 */
+	vcpu_run(vcpu);
+	TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_SHUTDOWN);
+
+	kvm_vm_free(vm);
+	return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/fastops_test.c b/tools/testing/selftests/kvm/x86/fastops_test.c
new file mode 100644
index 000000000000..c0d30ccd8767
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/fastops_test.c
@@ -0,0 +1,209 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+/*
+ * Execute a fastop() instruction, with or without forced emulation.  BT bit 0
+ * to set RFLAGS.CF based on whether or not the input is even or odd, so that
+ * instructions like ADC and SBB are deterministic.
+ */
+#define fastop(__insn)									\
+	"bt $0, %[bt_val]\n\t"								\
+	__insn "\n\t"									\
+	"pushfq\n\t"									\
+	"pop %[flags]\n\t"
+
+#define flags_constraint(flags_val) [flags]"=r"(flags_val)
+#define bt_constraint(__bt_val) [bt_val]"rm"((u32)__bt_val)
+
+#define guest_execute_fastop_1(FEP, insn, __val, __flags)				\
+({											\
+	__asm__ __volatile__(fastop(FEP insn " %[val]")					\
+			     : [val]"+r"(__val), flags_constraint(__flags)		\
+			     : bt_constraint(__val)					\
+			     : "cc", "memory");						\
+})
+
+#define guest_test_fastop_1(insn, type_t, __val)					\
+({											\
+	type_t val = __val, ex_val = __val, input = __val;				\
+	u64 flags, ex_flags;								\
+											\
+	guest_execute_fastop_1("", insn, ex_val, ex_flags);				\
+	guest_execute_fastop_1(KVM_FEP, insn, val, flags);				\
+											\
+	__GUEST_ASSERT(val == ex_val,							\
+		       "Wanted 0x%lx for '%s 0x%lx', got 0x%lx",			\
+		       (u64)ex_val, insn, (u64)input, (u64)val);			\
+	__GUEST_ASSERT(flags == ex_flags,						\
+			"Wanted flags 0x%lx for '%s 0x%lx', got 0x%lx",			\
+			ex_flags, insn, (u64)input, flags);				\
+})
+
+#define guest_execute_fastop_2(FEP, insn, __input, __output, __flags)			\
+({											\
+	__asm__ __volatile__(fastop(FEP insn " %[input], %[output]")			\
+			     : [output]"+r"(__output), flags_constraint(__flags)	\
+			     : [input]"r"(__input), bt_constraint(__output)		\
+			     : "cc", "memory");						\
+})
+
+#define guest_test_fastop_2(insn, type_t, __val1, __val2)				\
+({											\
+	type_t input = __val1, input2 = __val2, output = __val2, ex_output = __val2;	\
+	u64 flags, ex_flags;								\
+											\
+	guest_execute_fastop_2("", insn, input, ex_output, ex_flags);			\
+	guest_execute_fastop_2(KVM_FEP, insn, input, output, flags);			\
+											\
+	__GUEST_ASSERT(output == ex_output,						\
+		       "Wanted 0x%lx for '%s 0x%lx 0x%lx', got 0x%lx",			\
+		       (u64)ex_output, insn, (u64)input,				\
+		       (u64)input2, (u64)output);					\
+	__GUEST_ASSERT(flags == ex_flags,						\
+			"Wanted flags 0x%lx for '%s 0x%lx, 0x%lx', got 0x%lx",		\
+			ex_flags, insn, (u64)input, (u64)input2, flags);		\
+})
+
+#define guest_execute_fastop_cl(FEP, insn, __shift, __output, __flags)			\
+({											\
+	__asm__ __volatile__(fastop(FEP insn " %%cl, %[output]")			\
+			     : [output]"+r"(__output), flags_constraint(__flags)	\
+			     : "c"(__shift), bt_constraint(__output)			\
+			     : "cc", "memory");						\
+})
+
+#define guest_test_fastop_cl(insn, type_t, __val1, __val2)				\
+({											\
+	type_t output = __val2, ex_output = __val2, input = __val2;			\
+	u8 shift = __val1;								\
+	u64 flags, ex_flags;								\
+											\
+	guest_execute_fastop_cl("", insn, shift, ex_output, ex_flags);			\
+	guest_execute_fastop_cl(KVM_FEP, insn, shift, output, flags);			\
+											\
+	__GUEST_ASSERT(output == ex_output,						\
+		       "Wanted 0x%lx for '%s 0x%x, 0x%lx', got 0x%lx",			\
+		       (u64)ex_output, insn, shift, (u64)input,				\
+		       (u64)output);							\
+	__GUEST_ASSERT(flags == ex_flags,						\
+			"Wanted flags 0x%lx for '%s 0x%x, 0x%lx', got 0x%lx",		\
+			ex_flags, insn, shift, (u64)input, flags);			\
+})
+
+#define guest_execute_fastop_div(__KVM_ASM_SAFE, insn, __a, __d, __rm, __flags)		\
+({											\
+	u64 ign_error_code;								\
+	u8 vector;									\
+											\
+	__asm__ __volatile__(fastop(__KVM_ASM_SAFE(insn " %[denom]"))			\
+			     : "+a"(__a), "+d"(__d), flags_constraint(__flags),		\
+			       KVM_ASM_SAFE_OUTPUTS(vector, ign_error_code)		\
+			     : [denom]"rm"(__rm), bt_constraint(__rm)			\
+			     : "cc", "memory", KVM_ASM_SAFE_CLOBBERS);			\
+	vector;										\
+})
+
+#define guest_test_fastop_div(insn, type_t, __val1, __val2)				\
+({											\
+	type_t _a = __val1, _d = __val1, rm = __val2;					\
+	type_t a = _a, d = _d, ex_a = _a, ex_d = _d;					\
+	u64 flags, ex_flags;								\
+	u8 v, ex_v;									\
+											\
+	ex_v = guest_execute_fastop_div(KVM_ASM_SAFE, insn, ex_a, ex_d, rm, ex_flags);	\
+	v = guest_execute_fastop_div(KVM_ASM_SAFE_FEP, insn, a, d, rm, flags);		\
+											\
+	GUEST_ASSERT_EQ(v, ex_v);							\
+	__GUEST_ASSERT(v == ex_v,							\
+		       "Wanted vector 0x%x for '%s 0x%lx:0x%lx/0x%lx', got 0x%x",	\
+		       ex_v, insn, (u64)_a, (u64)_d, (u64)rm, v);			\
+	__GUEST_ASSERT(a == ex_a && d == ex_d,						\
+		       "Wanted 0x%lx:0x%lx for '%s 0x%lx:0x%lx/0x%lx', got 0x%lx:0x%lx",\
+		       (u64)ex_a, (u64)ex_d, insn, (u64)_a,				\
+		       (u64)_d, (u64)rm, (u64)a, (u64)d);				\
+	__GUEST_ASSERT(v || ex_v || (flags == ex_flags),				\
+			"Wanted flags 0x%lx for '%s  0x%lx:0x%lx/0x%lx', got 0x%lx",	\
+			ex_flags, insn, (u64)_a, (u64)_d, (u64)rm, flags);\
+})
+
+static const u64 vals[] = {
+	0,
+	1,
+	2,
+	4,
+	7,
+	0x5555555555555555,
+	0xaaaaaaaaaaaaaaaa,
+	0xfefefefefefefefe,
+	0xffffffffffffffff,
+};
+
+#define guest_test_fastops(type_t, suffix)						\
+do {											\
+	int i, j;									\
+											\
+	for (i = 0; i < ARRAY_SIZE(vals); i++) {					\
+		guest_test_fastop_1("dec" suffix, type_t, vals[i]);			\
+		guest_test_fastop_1("inc" suffix, type_t, vals[i]);			\
+		guest_test_fastop_1("neg" suffix, type_t, vals[i]);			\
+		guest_test_fastop_1("not" suffix, type_t, vals[i]);			\
+											\
+		for (j = 0; j < ARRAY_SIZE(vals); j++) {				\
+			guest_test_fastop_2("add" suffix, type_t, vals[i], vals[j]);	\
+			guest_test_fastop_2("adc" suffix, type_t, vals[i], vals[j]);	\
+			guest_test_fastop_2("and" suffix, type_t, vals[i], vals[j]);	\
+if (sizeof(type_t) != 1) {							\
+			guest_test_fastop_2("bsf" suffix, type_t, vals[i], vals[j]);	\
+			guest_test_fastop_2("bsr" suffix, type_t, vals[i], vals[j]);	\
+			guest_test_fastop_2("bt" suffix, type_t, vals[i], vals[j]);	\
+			guest_test_fastop_2("btc" suffix, type_t, vals[i], vals[j]);	\
+			guest_test_fastop_2("btr" suffix, type_t, vals[i], vals[j]);	\
+			guest_test_fastop_2("bts" suffix, type_t, vals[i], vals[j]);	\
+			guest_test_fastop_2("imul" suffix, type_t, vals[i], vals[j]);	\
+}											\
+			guest_test_fastop_2("cmp" suffix, type_t, vals[i], vals[j]);	\
+			guest_test_fastop_2("or" suffix, type_t, vals[i], vals[j]);	\
+			guest_test_fastop_2("sbb" suffix, type_t, vals[i], vals[j]);	\
+			guest_test_fastop_2("sub" suffix, type_t, vals[i], vals[j]);	\
+			guest_test_fastop_2("test" suffix, type_t, vals[i], vals[j]);	\
+			guest_test_fastop_2("xor" suffix, type_t, vals[i], vals[j]);	\
+											\
+			guest_test_fastop_cl("rol" suffix, type_t, vals[i], vals[j]);	\
+			guest_test_fastop_cl("ror" suffix, type_t, vals[i], vals[j]);	\
+			guest_test_fastop_cl("rcl" suffix, type_t, vals[i], vals[j]);	\
+			guest_test_fastop_cl("rcr" suffix, type_t, vals[i], vals[j]);	\
+			guest_test_fastop_cl("sar" suffix, type_t, vals[i], vals[j]);	\
+			guest_test_fastop_cl("shl" suffix, type_t, vals[i], vals[j]);	\
+			guest_test_fastop_cl("shr" suffix, type_t, vals[i], vals[j]);	\
+											\
+			guest_test_fastop_div("div" suffix, type_t, vals[i], vals[j]);	\
+		}									\
+	}										\
+} while (0)
+
+static void guest_code(void)
+{
+	guest_test_fastops(u8, "b");
+	guest_test_fastops(u16, "w");
+	guest_test_fastops(u32, "l");
+	guest_test_fastops(u64, "q");
+
+	GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+	struct kvm_vcpu *vcpu;
+	struct kvm_vm *vm;
+
+	TEST_REQUIRE(is_forced_emulation_enabled);
+
+	vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+	vcpu_run(vcpu);
+	TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
+
+	kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86/feature_msrs_test.c b/tools/testing/selftests/kvm/x86/feature_msrs_test.c
index a72f13ae2edb..158550701771 100644
--- a/tools/testing/selftests/kvm/x86/feature_msrs_test.c
+++ b/tools/testing/selftests/kvm/x86/feature_msrs_test.c
@@ -12,7 +12,7 @@
 #include "kvm_util.h"
 #include "processor.h"
 
-static bool is_kvm_controlled_msr(uint32_t msr)
+static bool is_kvm_controlled_msr(u32 msr)
 {
 	return msr == MSR_IA32_VMX_CR0_FIXED1 || msr == MSR_IA32_VMX_CR4_FIXED1;
 }
@@ -21,7 +21,7 @@ static bool is_kvm_controlled_msr(uint32_t msr)
  * For VMX MSRs with a "true" variant, KVM requires userspace to set the "true"
  * MSR, and doesn't allow setting the hidden version.
  */
-static bool is_hidden_vmx_msr(uint32_t msr)
+static bool is_hidden_vmx_msr(u32 msr)
 {
 	switch (msr) {
 	case MSR_IA32_VMX_PINBASED_CTLS:
@@ -34,15 +34,15 @@ static bool is_hidden_vmx_msr(uint32_t msr)
 	}
 }
 
-static bool is_quirked_msr(uint32_t msr)
+static bool is_quirked_msr(u32 msr)
 {
 	return msr != MSR_AMD64_DE_CFG;
 }
 
-static void test_feature_msr(uint32_t msr)
+static void test_feature_msr(u32 msr)
 {
-	const uint64_t supported_mask = kvm_get_feature_msr(msr);
-	uint64_t reset_value = is_quirked_msr(msr) ? supported_mask : 0;
+	const u64 supported_mask = kvm_get_feature_msr(msr);
+	u64 reset_value = is_quirked_msr(msr) ? supported_mask : 0;
 	struct kvm_vcpu *vcpu;
 	struct kvm_vm *vm;
 
diff --git a/tools/testing/selftests/kvm/x86/fix_hypercall_test.c b/tools/testing/selftests/kvm/x86/fix_hypercall_test.c
index 762628f7d4ba..753a0e730ea8 100644
--- a/tools/testing/selftests/kvm/x86/fix_hypercall_test.c
+++ b/tools/testing/selftests/kvm/x86/fix_hypercall_test.c
@@ -26,18 +26,18 @@ static void guest_ud_handler(struct ex_regs *regs)
 	regs->rip += HYPERCALL_INSN_SIZE;
 }
 
-static const uint8_t vmx_vmcall[HYPERCALL_INSN_SIZE]  = { 0x0f, 0x01, 0xc1 };
-static const uint8_t svm_vmmcall[HYPERCALL_INSN_SIZE] = { 0x0f, 0x01, 0xd9 };
+static const u8 vmx_vmcall[HYPERCALL_INSN_SIZE]  = { 0x0f, 0x01, 0xc1 };
+static const u8 svm_vmmcall[HYPERCALL_INSN_SIZE] = { 0x0f, 0x01, 0xd9 };
 
-extern uint8_t hypercall_insn[HYPERCALL_INSN_SIZE];
-static uint64_t do_sched_yield(uint8_t apic_id)
+extern u8 hypercall_insn[HYPERCALL_INSN_SIZE];
+static u64 do_sched_yield(u8 apic_id)
 {
-	uint64_t ret;
+	u64 ret;
 
 	asm volatile("hypercall_insn:\n\t"
 		     ".byte 0xcc,0xcc,0xcc\n\t"
 		     : "=a"(ret)
-		     : "a"((uint64_t)KVM_HC_SCHED_YIELD), "b"((uint64_t)apic_id)
+		     : "a"((u64)KVM_HC_SCHED_YIELD), "b"((u64)apic_id)
 		     : "memory");
 
 	return ret;
@@ -45,14 +45,14 @@ static uint64_t do_sched_yield(uint8_t apic_id)
 
 static void guest_main(void)
 {
-	const uint8_t *native_hypercall_insn;
-	const uint8_t *other_hypercall_insn;
-	uint64_t ret;
+	const u8 *native_hypercall_insn;
+	const u8 *other_hypercall_insn;
+	u64 ret;
 
 	if (host_cpu_is_intel) {
 		native_hypercall_insn = vmx_vmcall;
 		other_hypercall_insn  = svm_vmmcall;
-	} else if (host_cpu_is_amd) {
+	} else if (host_cpu_is_amd_compatible) {
 		native_hypercall_insn = svm_vmmcall;
 		other_hypercall_insn  = vmx_vmcall;
 	} else {
@@ -72,7 +72,7 @@ static void guest_main(void)
 	 * the "right" hypercall.
 	 */
 	if (quirk_disabled) {
-		GUEST_ASSERT(ret == (uint64_t)-EFAULT);
+		GUEST_ASSERT(ret == (u64)-EFAULT);
 		GUEST_ASSERT(!memcmp(other_hypercall_insn, hypercall_insn,
 			     HYPERCALL_INSN_SIZE));
 	} else {
diff --git a/tools/testing/selftests/kvm/x86/flds_emulation.h b/tools/testing/selftests/kvm/x86/flds_emulation.h
index 37b1a9f52864..fd6b6c67199a 100644
--- a/tools/testing/selftests/kvm/x86/flds_emulation.h
+++ b/tools/testing/selftests/kvm/x86/flds_emulation.h
@@ -12,7 +12,7 @@
  * KVM to emulate the instruction (e.g. by providing an MMIO address) to
  * exercise emulation failures.
  */
-static inline void flds(uint64_t address)
+static inline void flds(u64 address)
 {
 	__asm__ __volatile__(FLDS_MEM_EAX :: "a"(address));
 }
@@ -21,8 +21,8 @@ static inline void handle_flds_emulation_failure_exit(struct kvm_vcpu *vcpu)
 {
 	struct kvm_run *run = vcpu->run;
 	struct kvm_regs regs;
-	uint8_t *insn_bytes;
-	uint64_t flags;
+	u8 *insn_bytes;
+	u64 flags;
 
 	TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_INTERNAL_ERROR);
 
diff --git a/tools/testing/selftests/kvm/x86/hwcr_msr_test.c b/tools/testing/selftests/kvm/x86/hwcr_msr_test.c
index 10b1b0ba374e..8e20a03b3329 100644
--- a/tools/testing/selftests/kvm/x86/hwcr_msr_test.c
+++ b/tools/testing/selftests/kvm/x86/hwcr_msr_test.c
@@ -10,11 +10,11 @@
 
 void test_hwcr_bit(struct kvm_vcpu *vcpu, unsigned int bit)
 {
-	const uint64_t ignored = BIT_ULL(3) | BIT_ULL(6) | BIT_ULL(8);
-	const uint64_t valid = BIT_ULL(18) | BIT_ULL(24);
-	const uint64_t legal = ignored | valid;
-	uint64_t val = BIT_ULL(bit);
-	uint64_t actual;
+	const u64 ignored = BIT_ULL(3) | BIT_ULL(6) | BIT_ULL(8);
+	const u64 valid = BIT_ULL(18) | BIT_ULL(24);
+	const u64 legal = ignored | valid;
+	u64 val = BIT_ULL(bit);
+	u64 actual;
 	int r;
 
 	r = _vcpu_set_msr(vcpu, MSR_K7_HWCR, val);
diff --git a/tools/testing/selftests/kvm/x86/hyperv_clock.c b/tools/testing/selftests/kvm/x86/hyperv_clock.c
index e058bc676cd6..c083cea546dc 100644
--- a/tools/testing/selftests/kvm/x86/hyperv_clock.c
+++ b/tools/testing/selftests/kvm/x86/hyperv_clock.c
@@ -98,7 +98,7 @@ static inline void check_tsc_msr_tsc_page(struct ms_hyperv_tsc_page *tsc_page)
 	GUEST_ASSERT(r2 >= t1 && r2 - t2 < 100000);
 }
 
-static void guest_main(struct ms_hyperv_tsc_page *tsc_page, vm_paddr_t tsc_page_gpa)
+static void guest_main(struct ms_hyperv_tsc_page *tsc_page, gpa_t tsc_page_gpa)
 {
 	u64 tsc_scale, tsc_offset;
 
@@ -208,7 +208,7 @@ int main(void)
 	struct kvm_vcpu *vcpu;
 	struct kvm_vm *vm;
 	struct ucall uc;
-	vm_vaddr_t tsc_page_gva;
+	gva_t tsc_page_gva;
 	int stage;
 
 	TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_TIME));
@@ -218,7 +218,7 @@ int main(void)
 
 	vcpu_set_hv_cpuid(vcpu);
 
-	tsc_page_gva = vm_vaddr_alloc_page(vm);
+	tsc_page_gva = vm_alloc_page(vm);
 	memset(addr_gva2hva(vm, tsc_page_gva), 0x0, getpagesize());
 	TEST_ASSERT((addr_gva2gpa(vm, tsc_page_gva) & (getpagesize() - 1)) == 0,
 		"TSC page has to be page aligned");
diff --git a/tools/testing/selftests/kvm/x86/hyperv_cpuid.c b/tools/testing/selftests/kvm/x86/hyperv_cpuid.c
index 4e920705681a..3c21af811d8f 100644
--- a/tools/testing/selftests/kvm/x86/hyperv_cpuid.c
+++ b/tools/testing/selftests/kvm/x86/hyperv_cpuid.c
@@ -22,25 +22,6 @@ static void guest_code(void)
 {
 }
 
-static bool smt_possible(void)
-{
-	char buf[16];
-	FILE *f;
-	bool res = true;
-
-	f = fopen("/sys/devices/system/cpu/smt/control", "r");
-	if (f) {
-		if (fread(buf, sizeof(*buf), sizeof(buf), f) > 0) {
-			if (!strncmp(buf, "forceoff", 8) ||
-			    !strncmp(buf, "notsupported", 12))
-				res = false;
-		}
-		fclose(f);
-	}
-
-	return res;
-}
-
 static void test_hv_cpuid(struct kvm_vcpu *vcpu, bool evmcs_expected)
 {
 	const bool has_irqchip = !vcpu || vcpu->vm->has_irqchip;
@@ -64,7 +45,7 @@ static void test_hv_cpuid(struct kvm_vcpu *vcpu, bool evmcs_expected)
 
 		TEST_ASSERT((entry->function >= 0x40000000) &&
 			    (entry->function <= 0x40000082),
-			    "function %x is our of supported range",
+			    "function %x is out of supported range",
 			    entry->function);
 
 		TEST_ASSERT(entry->index == 0,
@@ -93,7 +74,7 @@ static void test_hv_cpuid(struct kvm_vcpu *vcpu, bool evmcs_expected)
 		case 0x40000004:
 			test_val = entry->eax & (1UL << 18);
 
-			TEST_ASSERT(!!test_val == !smt_possible(),
+			TEST_ASSERT(!!test_val == !is_smt_possible(),
 				    "NoNonArchitecturalCoreSharing bit"
 				    " doesn't reflect SMT setting");
 
diff --git a/tools/testing/selftests/kvm/x86/hyperv_evmcs.c b/tools/testing/selftests/kvm/x86/hyperv_evmcs.c
index 74cf19661309..c7fa114aee20 100644
--- a/tools/testing/selftests/kvm/x86/hyperv_evmcs.c
+++ b/tools/testing/selftests/kvm/x86/hyperv_evmcs.c
@@ -30,7 +30,7 @@ static void guest_nmi_handler(struct ex_regs *regs)
 {
 }
 
-static inline void rdmsr_from_l2(uint32_t msr)
+static inline void rdmsr_from_l2(u32 msr)
 {
 	/* Currently, L1 doesn't preserve GPRs during vmexits. */
 	__asm__ __volatile__ ("rdmsr" : : "c"(msr) :
@@ -76,7 +76,7 @@ void l2_guest_code(void)
 }
 
 void guest_code(struct vmx_pages *vmx_pages, struct hyperv_test_pages *hv_pages,
-		vm_vaddr_t hv_hcall_page_gpa)
+		gpa_t hv_hcall_page_gpa)
 {
 #define L2_GUEST_STACK_SIZE 64
 	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
@@ -231,8 +231,8 @@ static struct kvm_vcpu *save_restore_vm(struct kvm_vm *vm,
 
 int main(int argc, char *argv[])
 {
-	vm_vaddr_t vmx_pages_gva = 0, hv_pages_gva = 0;
-	vm_vaddr_t hcall_page;
+	gva_t vmx_pages_gva = 0, hv_pages_gva = 0;
+	gva_t hcall_page;
 
 	struct kvm_vcpu *vcpu;
 	struct kvm_vm *vm;
@@ -246,7 +246,7 @@ int main(int argc, char *argv[])
 
 	vm = vm_create_with_one_vcpu(&vcpu, guest_code);
 
-	hcall_page = vm_vaddr_alloc_pages(vm, 1);
+	hcall_page = vm_alloc_pages(vm, 1);
 	memset(addr_gva2hva(vm, hcall_page), 0x0,  getpagesize());
 
 	vcpu_set_hv_cpuid(vcpu);
diff --git a/tools/testing/selftests/kvm/x86/hyperv_extended_hypercalls.c b/tools/testing/selftests/kvm/x86/hyperv_extended_hypercalls.c
index 949e08e98f31..ae047db7b1be 100644
--- a/tools/testing/selftests/kvm/x86/hyperv_extended_hypercalls.c
+++ b/tools/testing/selftests/kvm/x86/hyperv_extended_hypercalls.c
@@ -15,19 +15,19 @@
 /* Any value is fine */
 #define EXT_CAPABILITIES 0xbull
 
-static void guest_code(vm_paddr_t in_pg_gpa, vm_paddr_t out_pg_gpa,
-		       vm_vaddr_t out_pg_gva)
+static void guest_code(gpa_t in_pg_gpa, gpa_t out_pg_gpa,
+		       gva_t out_pg_gva)
 {
-	uint64_t *output_gva;
+	u64 *output_gva;
 
 	wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
 	wrmsr(HV_X64_MSR_HYPERCALL, in_pg_gpa);
 
-	output_gva = (uint64_t *)out_pg_gva;
+	output_gva = (u64 *)out_pg_gva;
 
 	hyperv_hypercall(HV_EXT_CALL_QUERY_CAPABILITIES, in_pg_gpa, out_pg_gpa);
 
-	/* TLFS states output will be a uint64_t value */
+	/* TLFS states output will be a u64 value */
 	GUEST_ASSERT_EQ(*output_gva, EXT_CAPABILITIES);
 
 	GUEST_DONE();
@@ -35,12 +35,12 @@ static void guest_code(vm_paddr_t in_pg_gpa, vm_paddr_t out_pg_gpa,
 
 int main(void)
 {
-	vm_vaddr_t hcall_out_page;
-	vm_vaddr_t hcall_in_page;
+	gva_t hcall_out_page;
+	gva_t hcall_in_page;
 	struct kvm_vcpu *vcpu;
 	struct kvm_run *run;
 	struct kvm_vm *vm;
-	uint64_t *outval;
+	u64 *outval;
 	struct ucall uc;
 
 	TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_CPUID));
@@ -57,11 +57,11 @@ int main(void)
 	vcpu_set_hv_cpuid(vcpu);
 
 	/* Hypercall input */
-	hcall_in_page = vm_vaddr_alloc_pages(vm, 1);
+	hcall_in_page = vm_alloc_pages(vm, 1);
 	memset(addr_gva2hva(vm, hcall_in_page), 0x0, vm->page_size);
 
 	/* Hypercall output */
-	hcall_out_page = vm_vaddr_alloc_pages(vm, 1);
+	hcall_out_page = vm_alloc_pages(vm, 1);
 	memset(addr_gva2hva(vm, hcall_out_page), 0x0, vm->page_size);
 
 	vcpu_args_set(vcpu, 3, addr_gva2gpa(vm, hcall_in_page),
diff --git a/tools/testing/selftests/kvm/x86/hyperv_features.c b/tools/testing/selftests/kvm/x86/hyperv_features.c
index 068e9c69710d..7347f1fe5157 100644
--- a/tools/testing/selftests/kvm/x86/hyperv_features.c
+++ b/tools/testing/selftests/kvm/x86/hyperv_features.c
@@ -22,27 +22,27 @@
 	KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EBX, 0)
 
 struct msr_data {
-	uint32_t idx;
+	u32 idx;
 	bool fault_expected;
 	bool write;
 	u64 write_val;
 };
 
 struct hcall_data {
-	uint64_t control;
-	uint64_t expect;
+	u64 control;
+	u64 expect;
 	bool ud_expected;
 };
 
-static bool is_write_only_msr(uint32_t msr)
+static bool is_write_only_msr(u32 msr)
 {
 	return msr == HV_X64_MSR_EOI;
 }
 
 static void guest_msr(struct msr_data *msr)
 {
-	uint8_t vector = 0;
-	uint64_t msr_val = 0;
+	u8 vector = 0;
+	u64 msr_val = 0;
 
 	GUEST_ASSERT(msr->idx);
 
@@ -54,12 +54,12 @@ static void guest_msr(struct msr_data *msr)
 
 	if (msr->fault_expected)
 		__GUEST_ASSERT(vector == GP_VECTOR,
-			       "Expected #GP on %sMSR(0x%x), got vector '0x%x'",
-			       msr->write ? "WR" : "RD", msr->idx, vector);
+			       "Expected #GP on %sMSR(0x%x), got %s",
+			       msr->write ? "WR" : "RD", msr->idx, ex_str(vector));
 	else
 		__GUEST_ASSERT(!vector,
-			       "Expected success on %sMSR(0x%x), got vector '0x%x'",
-			       msr->write ? "WR" : "RD", msr->idx, vector);
+			       "Expected success on %sMSR(0x%x), got %s",
+			       msr->write ? "WR" : "RD", msr->idx, ex_str(vector));
 
 	if (vector || is_write_only_msr(msr->idx))
 		goto done;
@@ -82,10 +82,10 @@ done:
 	GUEST_DONE();
 }
 
-static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall)
+static void guest_hcall(gpa_t pgs_gpa, struct hcall_data *hcall)
 {
 	u64 res, input, output;
-	uint8_t vector;
+	u8 vector;
 
 	GUEST_ASSERT_NE(hcall->control, 0);
 
@@ -94,7 +94,7 @@ static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall)
 
 	if (!(hcall->control & HV_HYPERCALL_FAST_BIT)) {
 		input = pgs_gpa;
-		output = pgs_gpa + 4096;
+		output = pgs_gpa + PAGE_SIZE;
 	} else {
 		input = output = 0;
 	}
@@ -102,12 +102,12 @@ static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall)
 	vector = __hyperv_hypercall(hcall->control, input, output, &res);
 	if (hcall->ud_expected) {
 		__GUEST_ASSERT(vector == UD_VECTOR,
-			       "Expected #UD for control '%lu', got vector '0x%x'",
-			       hcall->control, vector);
+			       "Expected #UD for control '%lu', got %s",
+			       hcall->control, ex_str(vector));
 	} else {
 		__GUEST_ASSERT(!vector,
-			       "Expected no exception for control '%lu', got vector '0x%x'",
-			       hcall->control, vector);
+			       "Expected no exception for control '%lu', got %s",
+			       hcall->control, ex_str(vector));
 		GUEST_ASSERT_EQ(res, hcall->expect);
 	}
 
@@ -134,14 +134,14 @@ static void guest_test_msrs_access(void)
 	struct kvm_vm *vm;
 	struct ucall uc;
 	int stage = 0;
-	vm_vaddr_t msr_gva;
+	gva_t msr_gva;
 	struct msr_data *msr;
 	bool has_invtsc = kvm_cpu_has(X86_FEATURE_INVTSC);
 
 	while (true) {
 		vm = vm_create_with_one_vcpu(&vcpu, guest_msr);
 
-		msr_gva = vm_vaddr_alloc_page(vm);
+		msr_gva = vm_alloc_page(vm);
 		memset(addr_gva2hva(vm, msr_gva), 0x0, getpagesize());
 		msr = addr_gva2hva(vm, msr_gva);
 
@@ -523,17 +523,17 @@ static void guest_test_hcalls_access(void)
 	struct kvm_vm *vm;
 	struct ucall uc;
 	int stage = 0;
-	vm_vaddr_t hcall_page, hcall_params;
+	gva_t hcall_page, hcall_params;
 	struct hcall_data *hcall;
 
 	while (true) {
 		vm = vm_create_with_one_vcpu(&vcpu, guest_hcall);
 
 		/* Hypercall input/output */
-		hcall_page = vm_vaddr_alloc_pages(vm, 2);
+		hcall_page = vm_alloc_pages(vm, 2);
 		memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize());
 
-		hcall_params = vm_vaddr_alloc_page(vm);
+		hcall_params = vm_alloc_page(vm);
 		memset(addr_gva2hva(vm, hcall_params), 0x0, getpagesize());
 		hcall = addr_gva2hva(vm, hcall_params);
 
diff --git a/tools/testing/selftests/kvm/x86/hyperv_ipi.c b/tools/testing/selftests/kvm/x86/hyperv_ipi.c
index 22c0c124582f..771535f9aad3 100644
--- a/tools/testing/selftests/kvm/x86/hyperv_ipi.c
+++ b/tools/testing/selftests/kvm/x86/hyperv_ipi.c
@@ -18,7 +18,7 @@
 
 #define IPI_VECTOR	 0xfe
 
-static volatile uint64_t ipis_rcvd[RECEIVER_VCPU_ID_2 + 1];
+static volatile u64 ipis_rcvd[RECEIVER_VCPU_ID_2 + 1];
 
 struct hv_vpset {
 	u64 format;
@@ -45,13 +45,13 @@ struct hv_send_ipi_ex {
 	struct hv_vpset vp_set;
 };
 
-static inline void hv_init(vm_vaddr_t pgs_gpa)
+static inline void hv_init(gpa_t pgs_gpa)
 {
 	wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
 	wrmsr(HV_X64_MSR_HYPERCALL, pgs_gpa);
 }
 
-static void receiver_code(void *hcall_page, vm_vaddr_t pgs_gpa)
+static void receiver_code(void *hcall_page, gpa_t pgs_gpa)
 {
 	u32 vcpu_id;
 
@@ -63,8 +63,10 @@ static void receiver_code(void *hcall_page, vm_vaddr_t pgs_gpa)
 	/* Signal sender vCPU we're ready */
 	ipis_rcvd[vcpu_id] = (u64)-1;
 
-	for (;;)
-		asm volatile("sti; hlt; cli");
+	for (;;) {
+		safe_halt();
+		cli();
+	}
 }
 
 static void guest_ipi_handler(struct ex_regs *regs)
@@ -83,7 +85,7 @@ static inline void nop_loop(void)
 		asm volatile("nop");
 }
 
-static void sender_guest_code(void *hcall_page, vm_vaddr_t pgs_gpa)
+static void sender_guest_code(void *hcall_page, gpa_t pgs_gpa)
 {
 	struct hv_send_ipi *ipi = (struct hv_send_ipi *)hcall_page;
 	struct hv_send_ipi_ex *ipi_ex = (struct hv_send_ipi_ex *)hcall_page;
@@ -100,7 +102,7 @@ static void sender_guest_code(void *hcall_page, vm_vaddr_t pgs_gpa)
 	/* 'Slow' HvCallSendSyntheticClusterIpi to RECEIVER_VCPU_ID_1 */
 	ipi->vector = IPI_VECTOR;
 	ipi->cpu_mask = 1 << RECEIVER_VCPU_ID_1;
-	hyperv_hypercall(HVCALL_SEND_IPI, pgs_gpa, pgs_gpa + 4096);
+	hyperv_hypercall(HVCALL_SEND_IPI, pgs_gpa, pgs_gpa + PAGE_SIZE);
 	nop_loop();
 	GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
 	GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]);
@@ -114,13 +116,13 @@ static void sender_guest_code(void *hcall_page, vm_vaddr_t pgs_gpa)
 	GUEST_SYNC(stage++);
 
 	/* 'Slow' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_1 */
-	memset(hcall_page, 0, 4096);
+	memset(hcall_page, 0, PAGE_SIZE);
 	ipi_ex->vector = IPI_VECTOR;
 	ipi_ex->vp_set.format = HV_GENERIC_SET_SPARSE_4K;
 	ipi_ex->vp_set.valid_bank_mask = 1 << 0;
 	ipi_ex->vp_set.bank_contents[0] = BIT(RECEIVER_VCPU_ID_1);
 	hyperv_hypercall(HVCALL_SEND_IPI_EX | (1 << HV_HYPERCALL_VARHEAD_OFFSET),
-			 pgs_gpa, pgs_gpa + 4096);
+			 pgs_gpa, pgs_gpa + PAGE_SIZE);
 	nop_loop();
 	GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
 	GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]);
@@ -136,13 +138,13 @@ static void sender_guest_code(void *hcall_page, vm_vaddr_t pgs_gpa)
 	GUEST_SYNC(stage++);
 
 	/* 'Slow' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_2 */
-	memset(hcall_page, 0, 4096);
+	memset(hcall_page, 0, PAGE_SIZE);
 	ipi_ex->vector = IPI_VECTOR;
 	ipi_ex->vp_set.format = HV_GENERIC_SET_SPARSE_4K;
 	ipi_ex->vp_set.valid_bank_mask = 1 << 1;
 	ipi_ex->vp_set.bank_contents[0] = BIT(RECEIVER_VCPU_ID_2 - 64);
 	hyperv_hypercall(HVCALL_SEND_IPI_EX | (1 << HV_HYPERCALL_VARHEAD_OFFSET),
-			 pgs_gpa, pgs_gpa + 4096);
+			 pgs_gpa, pgs_gpa + PAGE_SIZE);
 	nop_loop();
 	GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ipis_expected[0]);
 	GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]);
@@ -158,14 +160,14 @@ static void sender_guest_code(void *hcall_page, vm_vaddr_t pgs_gpa)
 	GUEST_SYNC(stage++);
 
 	/* 'Slow' HvCallSendSyntheticClusterIpiEx to both RECEIVER_VCPU_ID_{1,2} */
-	memset(hcall_page, 0, 4096);
+	memset(hcall_page, 0, PAGE_SIZE);
 	ipi_ex->vector = IPI_VECTOR;
 	ipi_ex->vp_set.format = HV_GENERIC_SET_SPARSE_4K;
 	ipi_ex->vp_set.valid_bank_mask = 1 << 1 | 1;
 	ipi_ex->vp_set.bank_contents[0] = BIT(RECEIVER_VCPU_ID_1);
 	ipi_ex->vp_set.bank_contents[1] = BIT(RECEIVER_VCPU_ID_2 - 64);
 	hyperv_hypercall(HVCALL_SEND_IPI_EX | (2 << HV_HYPERCALL_VARHEAD_OFFSET),
-			 pgs_gpa, pgs_gpa + 4096);
+			 pgs_gpa, pgs_gpa + PAGE_SIZE);
 	nop_loop();
 	GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
 	GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]);
@@ -181,10 +183,10 @@ static void sender_guest_code(void *hcall_page, vm_vaddr_t pgs_gpa)
 	GUEST_SYNC(stage++);
 
 	/* 'Slow' HvCallSendSyntheticClusterIpiEx to HV_GENERIC_SET_ALL */
-	memset(hcall_page, 0, 4096);
+	memset(hcall_page, 0, PAGE_SIZE);
 	ipi_ex->vector = IPI_VECTOR;
 	ipi_ex->vp_set.format = HV_GENERIC_SET_ALL;
-	hyperv_hypercall(HVCALL_SEND_IPI_EX, pgs_gpa, pgs_gpa + 4096);
+	hyperv_hypercall(HVCALL_SEND_IPI_EX, pgs_gpa, pgs_gpa + PAGE_SIZE);
 	nop_loop();
 	GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
 	GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]);
@@ -241,7 +243,7 @@ int main(int argc, char *argv[])
 {
 	struct kvm_vm *vm;
 	struct kvm_vcpu *vcpu[3];
-	vm_vaddr_t hcall_page;
+	gva_t hcall_page;
 	pthread_t threads[2];
 	int stage = 1, r;
 	struct ucall uc;
@@ -251,7 +253,7 @@ int main(int argc, char *argv[])
 	vm = vm_create_with_one_vcpu(&vcpu[0], sender_guest_code);
 
 	/* Hypercall input/output */
-	hcall_page = vm_vaddr_alloc_pages(vm, 2);
+	hcall_page = vm_alloc_pages(vm, 2);
 	memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize());
 
 
diff --git a/tools/testing/selftests/kvm/x86/hyperv_svm_test.c b/tools/testing/selftests/kvm/x86/hyperv_svm_test.c
index 0ddb63229bcb..7a62f6a9d606 100644
--- a/tools/testing/selftests/kvm/x86/hyperv_svm_test.c
+++ b/tools/testing/selftests/kvm/x86/hyperv_svm_test.c
@@ -21,7 +21,7 @@
 #define L2_GUEST_STACK_SIZE 256
 
 /* Exit to L1 from L2 with RDMSR instruction */
-static inline void rdmsr_from_l2(uint32_t msr)
+static inline void rdmsr_from_l2(u32 msr)
 {
 	/* Currently, L1 doesn't preserve GPRs during vmexits. */
 	__asm__ __volatile__ ("rdmsr" : : "c"(msr) :
@@ -67,7 +67,7 @@ void l2_guest_code(void)
 
 static void __attribute__((__flatten__)) guest_code(struct svm_test_data *svm,
 						    struct hyperv_test_pages *hv_pages,
-						    vm_vaddr_t pgs_gpa)
+						    gpa_t pgs_gpa)
 {
 	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
 	struct vmcb *vmcb = svm->vmcb;
@@ -149,8 +149,8 @@ static void __attribute__((__flatten__)) guest_code(struct svm_test_data *svm,
 
 int main(int argc, char *argv[])
 {
-	vm_vaddr_t nested_gva = 0, hv_pages_gva = 0;
-	vm_vaddr_t hcall_page;
+	gva_t nested_gva = 0, hv_pages_gva = 0;
+	gva_t hcall_page;
 	struct kvm_vcpu *vcpu;
 	struct kvm_vm *vm;
 	struct ucall uc;
@@ -165,7 +165,7 @@ int main(int argc, char *argv[])
 	vcpu_alloc_svm(vm, &nested_gva);
 	vcpu_alloc_hyperv_test_pages(vm, &hv_pages_gva);
 
-	hcall_page = vm_vaddr_alloc_pages(vm, 1);
+	hcall_page = vm_alloc_pages(vm, 1);
 	memset(addr_gva2hva(vm, hcall_page), 0x0,  getpagesize());
 
 	vcpu_args_set(vcpu, 3, nested_gva, hv_pages_gva, addr_gva2gpa(vm, hcall_page));
diff --git a/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c b/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c
index 077cd0ec3040..15ee8b7bfc11 100644
--- a/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c
+++ b/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c
@@ -61,14 +61,14 @@ struct hv_tlb_flush_ex {
  * - GVAs of the test pages' PTEs
  */
 struct test_data {
-	vm_vaddr_t hcall_gva;
-	vm_paddr_t hcall_gpa;
-	vm_vaddr_t test_pages;
-	vm_vaddr_t test_pages_pte[NTEST_PAGES];
+	gva_t hcall_gva;
+	gpa_t hcall_gpa;
+	gva_t test_pages;
+	gva_t test_pages_pte[NTEST_PAGES];
 };
 
 /* 'Worker' vCPU code checking the contents of the test page */
-static void worker_guest_code(vm_vaddr_t test_data)
+static void worker_guest_code(gva_t test_data)
 {
 	struct test_data *data = (struct test_data *)test_data;
 	u32 vcpu_id = rdmsr(HV_X64_MSR_VP_INDEX);
@@ -133,12 +133,12 @@ static void set_expected_val(void *addr, u64 val, int vcpu_id)
  * Update PTEs swapping two test pages.
  * TODO: use swap()/xchg() when these are provided.
  */
-static void swap_two_test_pages(vm_paddr_t pte_gva1, vm_paddr_t pte_gva2)
+static void swap_two_test_pages(gpa_t pte_gva1, gpa_t pte_gva2)
 {
-	uint64_t tmp = *(uint64_t *)pte_gva1;
+	u64 tmp = *(u64 *)pte_gva1;
 
-	*(uint64_t *)pte_gva1 = *(uint64_t *)pte_gva2;
-	*(uint64_t *)pte_gva2 = tmp;
+	*(u64 *)pte_gva1 = *(u64 *)pte_gva2;
+	*(u64 *)pte_gva2 = tmp;
 }
 
 /*
@@ -196,12 +196,12 @@ static inline void post_test(struct test_data *data, u64 exp1, u64 exp2)
 #define TESTVAL2 0x0202020202020202
 
 /* Main vCPU doing the test */
-static void sender_guest_code(vm_vaddr_t test_data)
+static void sender_guest_code(gva_t test_data)
 {
 	struct test_data *data = (struct test_data *)test_data;
 	struct hv_tlb_flush *flush = (struct hv_tlb_flush *)data->hcall_gva;
 	struct hv_tlb_flush_ex *flush_ex = (struct hv_tlb_flush_ex *)data->hcall_gva;
-	vm_paddr_t hcall_gpa = data->hcall_gpa;
+	gpa_t hcall_gpa = data->hcall_gpa;
 	int i, stage = 1;
 
 	wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
@@ -581,9 +581,9 @@ int main(int argc, char *argv[])
 	struct kvm_vm *vm;
 	struct kvm_vcpu *vcpu[3];
 	pthread_t threads[2];
-	vm_vaddr_t test_data_page, gva;
-	vm_paddr_t gpa;
-	uint64_t *pte;
+	gva_t test_data_page, gva;
+	gpa_t gpa;
+	u64 *pte;
 	struct test_data *data;
 	struct ucall uc;
 	int stage = 1, r, i;
@@ -593,11 +593,11 @@ int main(int argc, char *argv[])
 	vm = vm_create_with_one_vcpu(&vcpu[0], sender_guest_code);
 
 	/* Test data page */
-	test_data_page = vm_vaddr_alloc_page(vm);
+	test_data_page = vm_alloc_page(vm);
 	data = (struct test_data *)addr_gva2hva(vm, test_data_page);
 
 	/* Hypercall input/output */
-	data->hcall_gva = vm_vaddr_alloc_pages(vm, 2);
+	data->hcall_gva = vm_alloc_pages(vm, 2);
 	data->hcall_gpa = addr_gva2gpa(vm, data->hcall_gva);
 	memset(addr_gva2hva(vm, data->hcall_gva), 0x0, 2 * PAGE_SIZE);
 
@@ -606,7 +606,7 @@ int main(int argc, char *argv[])
 	 * and the test will swap their mappings. The third page keeps the indication
 	 * about the current state of mappings.
 	 */
-	data->test_pages = vm_vaddr_alloc_pages(vm, NTEST_PAGES + 1);
+	data->test_pages = vm_alloc_pages(vm, NTEST_PAGES + 1);
 	for (i = 0; i < NTEST_PAGES; i++)
 		memset(addr_gva2hva(vm, data->test_pages + PAGE_SIZE * i),
 		       (u8)(i + 1), PAGE_SIZE);
@@ -617,11 +617,11 @@ int main(int argc, char *argv[])
 	 * Get PTE pointers for test pages and map them inside the guest.
 	 * Use separate page for each PTE for simplicity.
 	 */
-	gva = vm_vaddr_unused_gap(vm, NTEST_PAGES * PAGE_SIZE, KVM_UTIL_MIN_VADDR);
+	gva = vm_unused_gva_gap(vm, NTEST_PAGES * PAGE_SIZE, KVM_UTIL_MIN_VADDR);
 	for (i = 0; i < NTEST_PAGES; i++) {
-		pte = vm_get_page_table_entry(vm, data->test_pages + i * PAGE_SIZE);
+		pte = vm_get_pte(vm, data->test_pages + i * PAGE_SIZE);
 		gpa = addr_hva2gpa(vm, pte);
-		__virt_pg_map(vm, gva + PAGE_SIZE * i, gpa & PAGE_MASK, PG_LEVEL_4K);
+		virt_pg_map(vm, gva + PAGE_SIZE * i, gpa & PAGE_MASK);
 		data->test_pages_pte[i] = gva + (gpa & ~PAGE_MASK);
 	}
 
diff --git a/tools/testing/selftests/kvm/x86/kvm_buslock_test.c b/tools/testing/selftests/kvm/x86/kvm_buslock_test.c
new file mode 100644
index 000000000000..52014a3210c8
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/kvm_buslock_test.c
@@ -0,0 +1,135 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2024 Advanced Micro Devices, Inc.
+ */
+#include <linux/atomic.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+#include "vmx.h"
+#include "test_util.h"
+
+#define NR_BUS_LOCKS_PER_LEVEL 100
+#define CACHE_LINE_SIZE		64
+
+/*
+ * To generate a bus lock, carve out a buffer that precisely occupies two cache
+ * lines and perform an atomic access that splits the two lines.
+ */
+static u8 buffer[CACHE_LINE_SIZE * 2] __aligned(CACHE_LINE_SIZE);
+static atomic_t *val = (void *)&buffer[CACHE_LINE_SIZE - (sizeof(*val) / 2)];
+
+static void guest_generate_buslocks(void)
+{
+	for (int i = 0; i < NR_BUS_LOCKS_PER_LEVEL; i++)
+		atomic_inc(val);
+}
+
+#define L2_GUEST_STACK_SIZE	64
+
+static void l2_guest_code(void)
+{
+	guest_generate_buslocks();
+	GUEST_DONE();
+}
+
+static void l1_svm_code(struct svm_test_data *svm)
+{
+	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+	struct vmcb *vmcb = svm->vmcb;
+
+	generic_svm_setup(svm, l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+	run_guest(vmcb, svm->vmcb_gpa);
+}
+
+static void l1_vmx_code(struct vmx_pages *vmx)
+{
+	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+	GUEST_ASSERT_EQ(prepare_for_vmx_operation(vmx), true);
+	GUEST_ASSERT_EQ(load_vmcs(vmx), true);
+
+	prepare_vmcs(vmx, NULL, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+	GUEST_ASSERT(!vmwrite(GUEST_RIP, (u64)l2_guest_code));
+	GUEST_ASSERT(!vmlaunch());
+}
+
+static void guest_code(void *test_data)
+{
+	guest_generate_buslocks();
+
+	if (this_cpu_has(X86_FEATURE_SVM))
+		l1_svm_code(test_data);
+	else if (this_cpu_has(X86_FEATURE_VMX))
+		l1_vmx_code(test_data);
+	else
+		GUEST_DONE();
+
+	TEST_FAIL("L2 should have signaled 'done'");
+}
+
+int main(int argc, char *argv[])
+{
+	const bool has_nested = kvm_cpu_has(X86_FEATURE_SVM) || kvm_cpu_has(X86_FEATURE_VMX);
+	gva_t nested_test_data_gva;
+	struct kvm_vcpu *vcpu;
+	struct kvm_run *run;
+	struct kvm_vm *vm;
+	int i, bus_locks = 0;
+
+	TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_BUS_LOCK_EXIT));
+
+	vm = vm_create(1);
+	vm_enable_cap(vm, KVM_CAP_X86_BUS_LOCK_EXIT, KVM_BUS_LOCK_DETECTION_EXIT);
+	vcpu = vm_vcpu_add(vm, 0, guest_code);
+
+	if (kvm_cpu_has(X86_FEATURE_SVM))
+		vcpu_alloc_svm(vm, &nested_test_data_gva);
+	else
+		vcpu_alloc_vmx(vm, &nested_test_data_gva);
+
+	vcpu_args_set(vcpu, 1, nested_test_data_gva);
+
+	run = vcpu->run;
+
+	for (i = 0; i <= NR_BUS_LOCKS_PER_LEVEL * (1 + has_nested); i++) {
+		struct ucall uc;
+
+		vcpu_run(vcpu);
+
+		if (run->exit_reason == KVM_EXIT_IO) {
+			switch (get_ucall(vcpu, &uc)) {
+			case UCALL_ABORT:
+				REPORT_GUEST_ASSERT(uc);
+				goto done;
+			case UCALL_SYNC:
+				continue;
+			case UCALL_DONE:
+				goto done;
+			default:
+				TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
+			}
+		}
+
+		TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_X86_BUS_LOCK);
+
+		/*
+		 * Verify the counter is actually getting incremented, e.g. that
+		 * KVM isn't skipping the instruction.  On Intel, the exit is
+		 * trap-like, i.e. the counter should already have been
+		 * incremented.  On AMD, it's fault-like, i.e. the counter will
+		 * be incremented when the guest re-executes the instruction.
+		 */
+		sync_global_from_guest(vm, *val);
+		TEST_ASSERT_EQ(atomic_read(val), bus_locks + host_cpu_is_intel);
+
+		bus_locks++;
+	}
+	TEST_FAIL("Didn't receive UCALL_DONE, took %u bus lock exits\n", bus_locks);
+done:
+	TEST_ASSERT_EQ(i, bus_locks);
+	kvm_vm_free(vm);
+	return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/kvm_clock_test.c b/tools/testing/selftests/kvm/x86/kvm_clock_test.c
index 5bc12222d87a..5ad4aeb8e373 100644
--- a/tools/testing/selftests/kvm/x86/kvm_clock_test.c
+++ b/tools/testing/selftests/kvm/x86/kvm_clock_test.c
@@ -17,8 +17,8 @@
 #include "processor.h"
 
 struct test_case {
-	uint64_t kvmclock_base;
-	int64_t realtime_offset;
+	u64 kvmclock_base;
+	s64 realtime_offset;
 };
 
 static struct test_case test_cases[] = {
@@ -31,7 +31,7 @@ static struct test_case test_cases[] = {
 #define GUEST_SYNC_CLOCK(__stage, __val)			\
 		GUEST_SYNC_ARGS(__stage, __val, 0, 0, 0)
 
-static void guest_main(vm_paddr_t pvti_pa, struct pvclock_vcpu_time_info *pvti)
+static void guest_main(gpa_t pvti_pa, struct pvclock_vcpu_time_info *pvti)
 {
 	int i;
 
@@ -52,7 +52,7 @@ static inline void assert_flags(struct kvm_clock_data *data)
 static void handle_sync(struct ucall *uc, struct kvm_clock_data *start,
 			struct kvm_clock_data *end)
 {
-	uint64_t obs, exp_lo, exp_hi;
+	u64 obs, exp_lo, exp_hi;
 
 	obs = uc->args[2];
 	exp_lo = start->clock;
@@ -135,8 +135,8 @@ static void enter_guest(struct kvm_vcpu *vcpu)
 int main(void)
 {
 	struct kvm_vcpu *vcpu;
-	vm_vaddr_t pvti_gva;
-	vm_paddr_t pvti_gpa;
+	gva_t pvti_gva;
+	gpa_t pvti_gpa;
 	struct kvm_vm *vm;
 	int flags;
 
@@ -147,7 +147,7 @@ int main(void)
 
 	vm = vm_create_with_one_vcpu(&vcpu, guest_main);
 
-	pvti_gva = vm_vaddr_alloc(vm, getpagesize(), 0x10000);
+	pvti_gva = vm_alloc(vm, getpagesize(), 0x10000);
 	pvti_gpa = addr_gva2gpa(vm, pvti_gva);
 	vcpu_args_set(vcpu, 2, pvti_gpa, pvti_gva);
 
diff --git a/tools/testing/selftests/kvm/x86/kvm_pv_test.c b/tools/testing/selftests/kvm/x86/kvm_pv_test.c
index 1b805cbdb47b..8ed5fa635021 100644
--- a/tools/testing/selftests/kvm/x86/kvm_pv_test.c
+++ b/tools/testing/selftests/kvm/x86/kvm_pv_test.c
@@ -13,7 +13,7 @@
 #include "processor.h"
 
 struct msr_data {
-	uint32_t idx;
+	u32 idx;
 	const char *name;
 };
 
@@ -40,8 +40,8 @@ static struct msr_data msrs_to_test[] = {
 
 static void test_msr(struct msr_data *msr)
 {
-	uint64_t ignored;
-	uint8_t vector;
+	u64 ignored;
+	u8 vector;
 
 	PR_MSR(msr);
 
@@ -53,7 +53,7 @@ static void test_msr(struct msr_data *msr)
 }
 
 struct hcall_data {
-	uint64_t nr;
+	u64 nr;
 	const char *name;
 };
 
@@ -73,7 +73,7 @@ static struct hcall_data hcalls_to_test[] = {
 
 static void test_hcall(struct hcall_data *hc)
 {
-	uint64_t r;
+	u64 r;
 
 	PR_HCALL(hc);
 	r = kvm_hypercall(hc->nr, 0, 0, 0, 0);
diff --git a/tools/testing/selftests/kvm/x86/monitor_mwait_test.c b/tools/testing/selftests/kvm/x86/monitor_mwait_test.c
index 2b550eff35f1..9c156cf7db0e 100644
--- a/tools/testing/selftests/kvm/x86/monitor_mwait_test.c
+++ b/tools/testing/selftests/kvm/x86/monitor_mwait_test.c
@@ -7,6 +7,7 @@
 
 #include "kvm_util.h"
 #include "processor.h"
+#include "kselftest.h"
 
 #define CPUID_MWAIT (1u << 3)
 
@@ -14,6 +15,8 @@ enum monitor_mwait_testcases {
 	MWAIT_QUIRK_DISABLED = BIT(0),
 	MISC_ENABLES_QUIRK_DISABLED = BIT(1),
 	MWAIT_DISABLED = BIT(2),
+	CPUID_DISABLED = BIT(3),
+	TEST_MAX = CPUID_DISABLED * 2 - 1,
 };
 
 /*
@@ -27,19 +30,27 @@ do {									\
 									\
 	if (fault_wanted)						\
 		__GUEST_ASSERT((vector) == UD_VECTOR,			\
-			       "Expected #UD on " insn " for testcase '0x%x', got '0x%x'", \
-			       testcase, vector);			\
+			       "Expected #UD on " insn " for testcase '0x%x', got %s", \
+			       testcase, ex_str(vector));		\
 	else								\
 		__GUEST_ASSERT(!(vector),				\
-			       "Expected success on " insn " for testcase '0x%x', got '0x%x'", \
-			       testcase, vector);			\
+			       "Expected success on " insn " for testcase '0x%x', got %s", \
+			       testcase, ex_str(vector));		\
 } while (0)
 
-static void guest_monitor_wait(int testcase)
+static void guest_monitor_wait(void *arg)
 {
+	int testcase = (int) (long) arg;
 	u8 vector;
 
-	GUEST_SYNC(testcase);
+	u64 val = rdmsr(MSR_IA32_MISC_ENABLE) & ~MSR_IA32_MISC_ENABLE_MWAIT;
+	if (!(testcase & MWAIT_DISABLED))
+		val |= MSR_IA32_MISC_ENABLE_MWAIT;
+	wrmsr(MSR_IA32_MISC_ENABLE, val);
+
+	__GUEST_ASSERT(this_cpu_has(X86_FEATURE_MWAIT) == !(testcase & MWAIT_DISABLED),
+		       "Expected CPUID.MWAIT %s\n",
+		       (testcase & MWAIT_DISABLED) ? "cleared" : "set");
 
 	/*
 	 * Arbitrarily MONITOR this function, SVM performs fault checks before
@@ -50,80 +61,76 @@ static void guest_monitor_wait(int testcase)
 
 	vector = kvm_asm_safe("mwait", "a"(guest_monitor_wait), "c"(0), "d"(0));
 	GUEST_ASSERT_MONITOR_MWAIT("MWAIT", testcase, vector);
-}
-
-static void guest_code(void)
-{
-	guest_monitor_wait(MWAIT_DISABLED);
-
-	guest_monitor_wait(MWAIT_QUIRK_DISABLED | MWAIT_DISABLED);
-
-	guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_DISABLED);
-	guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED);
-
-	guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_QUIRK_DISABLED | MWAIT_DISABLED);
-	guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_QUIRK_DISABLED);
 
 	GUEST_DONE();
 }
 
 int main(int argc, char *argv[])
 {
-	uint64_t disabled_quirks;
+	u64 disabled_quirks;
 	struct kvm_vcpu *vcpu;
 	struct kvm_vm *vm;
 	struct ucall uc;
 	int testcase;
+	char test[80];
 
 	TEST_REQUIRE(this_cpu_has(X86_FEATURE_MWAIT));
 	TEST_REQUIRE(kvm_has_cap(KVM_CAP_DISABLE_QUIRKS2));
 
-	vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-	vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_MWAIT);
+	ksft_print_header();
+	ksft_set_plan(12);
+	for (testcase = 0; testcase <= TEST_MAX; testcase++) {
+		vm = vm_create_with_one_vcpu(&vcpu, guest_monitor_wait);
+		vcpu_args_set(vcpu, 1, (void *)(long)testcase);
+
+		disabled_quirks = 0;
+		if (testcase & MWAIT_QUIRK_DISABLED) {
+			disabled_quirks |= KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS;
+			strcpy(test, "MWAIT can fault");
+		} else {
+			strcpy(test, "MWAIT never faults");
+		}
+		if (testcase & MISC_ENABLES_QUIRK_DISABLED) {
+			disabled_quirks |= KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT;
+			strcat(test, ", MISC_ENABLE updates CPUID");
+		} else {
+			strcat(test, ", no CPUID updates");
+		}
+
+		vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, disabled_quirks);
+
+		if (!(testcase & MISC_ENABLES_QUIRK_DISABLED) &&
+		    (!!(testcase & CPUID_DISABLED) ^ !!(testcase & MWAIT_DISABLED)))
+			continue;
+
+		if (testcase & CPUID_DISABLED) {
+			strcat(test, ", CPUID clear");
+			vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_MWAIT);
+		} else {
+			strcat(test, ", CPUID set");
+			vcpu_set_cpuid_feature(vcpu, X86_FEATURE_MWAIT);
+		}
+
+		if (testcase & MWAIT_DISABLED)
+			strcat(test, ", MWAIT disabled");
 
-	while (1) {
 		vcpu_run(vcpu);
 		TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
 
 		switch (get_ucall(vcpu, &uc)) {
-		case UCALL_SYNC:
-			testcase = uc.args[1];
-			break;
 		case UCALL_ABORT:
-			REPORT_GUEST_ASSERT(uc);
-			goto done;
+			/* Detected in vcpu_run */
+			break;
 		case UCALL_DONE:
-			goto done;
+			ksft_test_result_pass("%s\n", test);
+			break;
 		default:
 			TEST_FAIL("Unknown ucall %lu", uc.cmd);
-			goto done;
-		}
-
-		disabled_quirks = 0;
-		if (testcase & MWAIT_QUIRK_DISABLED)
-			disabled_quirks |= KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS;
-		if (testcase & MISC_ENABLES_QUIRK_DISABLED)
-			disabled_quirks |= KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT;
-		vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, disabled_quirks);
-
-		/*
-		 * If the MISC_ENABLES quirk (KVM neglects to update CPUID to
-		 * enable/disable MWAIT) is disabled, toggle the ENABLE_MWAIT
-		 * bit in MISC_ENABLES accordingly.  If the quirk is enabled,
-		 * the only valid configuration is MWAIT disabled, as CPUID
-		 * can't be manually changed after running the vCPU.
-		 */
-		if (!(testcase & MISC_ENABLES_QUIRK_DISABLED)) {
-			TEST_ASSERT(testcase & MWAIT_DISABLED,
-				    "Can't toggle CPUID features after running vCPU");
-			continue;
+			break;
 		}
-
-		vcpu_set_msr(vcpu, MSR_IA32_MISC_ENABLE,
-			     (testcase & MWAIT_DISABLED) ? 0 : MSR_IA32_MISC_ENABLE_MWAIT);
+		kvm_vm_free(vm);
 	}
+	ksft_finished();
 
-done:
-	kvm_vm_free(vm);
 	return 0;
 }
diff --git a/tools/testing/selftests/kvm/x86/msrs_test.c b/tools/testing/selftests/kvm/x86/msrs_test.c
new file mode 100644
index 000000000000..f7e39bf887ad
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/msrs_test.c
@@ -0,0 +1,489 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <asm/msr-index.h>
+
+#include <stdint.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+
+/* Use HYPERVISOR for MSRs that are emulated unconditionally (as is HYPERVISOR). */
+#define X86_FEATURE_NONE X86_FEATURE_HYPERVISOR
+
+struct kvm_msr {
+	const struct kvm_x86_cpu_feature feature;
+	const struct kvm_x86_cpu_feature feature2;
+	const char *name;
+	const u64 reset_val;
+	const u64 write_val;
+	const u64 rsvd_val;
+	const u32 index;
+	const bool is_kvm_defined;
+};
+
+#define ____MSR_TEST(msr, str, val, rsvd, reset, feat, f2, is_kvm)	\
+{									\
+	.index = msr,							\
+	.name = str,							\
+	.write_val = val,						\
+	.rsvd_val = rsvd,						\
+	.reset_val = reset,						\
+	.feature = X86_FEATURE_ ##feat,					\
+	.feature2 = X86_FEATURE_ ##f2,					\
+	.is_kvm_defined = is_kvm,					\
+}
+
+#define __MSR_TEST(msr, str, val, rsvd, reset, feat)			\
+	____MSR_TEST(msr, str, val, rsvd, reset, feat, feat, false)
+
+#define MSR_TEST_NON_ZERO(msr, val, rsvd, reset, feat)			\
+	__MSR_TEST(msr, #msr, val, rsvd, reset, feat)
+
+#define MSR_TEST(msr, val, rsvd, feat)					\
+	__MSR_TEST(msr, #msr, val, rsvd, 0, feat)
+
+#define MSR_TEST2(msr, val, rsvd, feat, f2)				\
+	____MSR_TEST(msr, #msr, val, rsvd, 0, feat, f2, false)
+
+/*
+ * Note, use a page aligned value for the canonical value so that the value
+ * is compatible with MSRs that use bits 11:0 for things other than addresses.
+ */
+static const u64 canonical_val = 0x123456789000ull;
+
+/*
+ * Arbitrary value with bits set in every byte, but not all bits set.  This is
+ * also a non-canonical value, but that's coincidental (any 64-bit value with
+ * an alternating 0s/1s pattern will be non-canonical).
+ */
+static const u64 u64_val = 0xaaaa5555aaaa5555ull;
+
+#define MSR_TEST_CANONICAL(msr, feat)					\
+	__MSR_TEST(msr, #msr, canonical_val, NONCANONICAL, 0, feat)
+
+#define MSR_TEST_KVM(msr, val, rsvd, feat)				\
+	____MSR_TEST(KVM_REG_ ##msr, #msr, val, rsvd, 0, feat, feat, true)
+
+/*
+ * The main struct must be scoped to a function due to the use of structures to
+ * define features.  For the global structure, allocate enough space for the
+ * foreseeable future without getting too ridiculous, to minimize maintenance
+ * costs (bumping the array size every time an MSR is added is really annoying).
+ */
+static struct kvm_msr msrs[128];
+static int idx;
+
+static bool ignore_unsupported_msrs;
+
+static u64 fixup_rdmsr_val(u32 msr, u64 want)
+{
+	/*
+	 * AMD CPUs drop bits 63:32 on some MSRs that Intel CPUs support.  KVM
+	 * is supposed to emulate that behavior based on guest vendor model
+	 * (which is the same as the host vendor model for this test).
+	 */
+	if (!host_cpu_is_amd_compatible)
+		return want;
+
+	switch (msr) {
+	case MSR_IA32_SYSENTER_ESP:
+	case MSR_IA32_SYSENTER_EIP:
+	case MSR_TSC_AUX:
+		return want & GENMASK_ULL(31, 0);
+	default:
+		return want;
+	}
+}
+
+static void __rdmsr(u32 msr, u64 want)
+{
+	u64 val;
+	u8 vec;
+
+	vec = rdmsr_safe(msr, &val);
+	__GUEST_ASSERT(!vec, "Unexpected %s on RDMSR(0x%x)", ex_str(vec), msr);
+
+	__GUEST_ASSERT(val == want, "Wanted 0x%lx from RDMSR(0x%x), got 0x%lx",
+		       want, msr, val);
+}
+
+static void __wrmsr(u32 msr, u64 val)
+{
+	u8 vec;
+
+	vec = wrmsr_safe(msr, val);
+	__GUEST_ASSERT(!vec, "Unexpected %s on WRMSR(0x%x, 0x%lx)",
+		       ex_str(vec), msr, val);
+	__rdmsr(msr, fixup_rdmsr_val(msr, val));
+}
+
+static void guest_test_supported_msr(const struct kvm_msr *msr)
+{
+	__rdmsr(msr->index, msr->reset_val);
+	__wrmsr(msr->index, msr->write_val);
+	GUEST_SYNC(fixup_rdmsr_val(msr->index, msr->write_val));
+
+	__rdmsr(msr->index, msr->reset_val);
+}
+
+static void guest_test_unsupported_msr(const struct kvm_msr *msr)
+{
+	u64 val;
+	u8 vec;
+
+	/*
+	 * KVM's ABI with respect to ignore_msrs is a mess and largely beyond
+	 * repair, just skip the unsupported MSR tests.
+	 */
+	if (ignore_unsupported_msrs)
+		goto skip_wrmsr_gp;
+
+	/*
+	 * {S,U}_CET exist if IBT or SHSTK is supported, but with bits that are
+	 * writable only if their associated feature is supported.  Skip the
+	 * RDMSR #GP test if the secondary feature is supported, but perform
+	 * the WRMSR #GP test as the to-be-written value is tied to the primary
+	 * feature.  For all other MSRs, simply do nothing.
+	 */
+	if (this_cpu_has(msr->feature2)) {
+		if  (msr->index != MSR_IA32_U_CET &&
+		     msr->index != MSR_IA32_S_CET)
+			goto skip_wrmsr_gp;
+
+		goto skip_rdmsr_gp;
+	}
+
+	vec = rdmsr_safe(msr->index, &val);
+	__GUEST_ASSERT(vec == GP_VECTOR, "Wanted #GP on RDMSR(0x%x), got %s",
+		       msr->index, ex_str(vec));
+
+skip_rdmsr_gp:
+	vec = wrmsr_safe(msr->index, msr->write_val);
+	__GUEST_ASSERT(vec == GP_VECTOR, "Wanted #GP on WRMSR(0x%x, 0x%lx), got %s",
+		       msr->index, msr->write_val, ex_str(vec));
+
+skip_wrmsr_gp:
+	GUEST_SYNC(0);
+}
+
+void guest_test_reserved_val(const struct kvm_msr *msr)
+{
+	/* Skip reserved value checks as well, ignore_msrs is trully a mess. */
+	if (ignore_unsupported_msrs)
+		return;
+
+	/*
+	 * If the CPU will truncate the written value (e.g. SYSENTER on AMD),
+	 * expect success and a truncated value, not #GP.
+	 */
+	if ((!this_cpu_has(msr->feature) && !this_cpu_has(msr->feature2)) ||
+	    msr->rsvd_val == fixup_rdmsr_val(msr->index, msr->rsvd_val)) {
+		u8 vec = wrmsr_safe(msr->index, msr->rsvd_val);
+
+		__GUEST_ASSERT(vec == GP_VECTOR,
+			       "Wanted #GP on WRMSR(0x%x, 0x%lx), got %s",
+			       msr->index, msr->rsvd_val, ex_str(vec));
+	} else {
+		__wrmsr(msr->index, msr->rsvd_val);
+		__wrmsr(msr->index, msr->reset_val);
+	}
+}
+
+static void guest_main(void)
+{
+	for (;;) {
+		const struct kvm_msr *msr = &msrs[READ_ONCE(idx)];
+
+		if (this_cpu_has(msr->feature))
+			guest_test_supported_msr(msr);
+		else
+			guest_test_unsupported_msr(msr);
+
+		if (msr->rsvd_val)
+			guest_test_reserved_val(msr);
+
+		GUEST_SYNC(msr->reset_val);
+	}
+}
+
+static bool has_one_reg;
+static bool use_one_reg;
+
+#define KVM_X86_MAX_NR_REGS	1
+
+static bool vcpu_has_reg(struct kvm_vcpu *vcpu, u64 reg)
+{
+	struct {
+		struct kvm_reg_list list;
+		u64 regs[KVM_X86_MAX_NR_REGS];
+	} regs = {};
+	int r, i;
+
+	/*
+	 * If KVM_GET_REG_LIST succeeds with n=0, i.e. there are no supported
+	 * regs, then the vCPU obviously doesn't support the reg.
+	 */
+	r = __vcpu_ioctl(vcpu, KVM_GET_REG_LIST, &regs.list);
+	if (!r)
+		return false;
+
+	TEST_ASSERT_EQ(errno, E2BIG);
+
+	/*
+	 * KVM x86 is expected to support enumerating a relative small number
+	 * of regs.  The majority of registers supported by KVM_{G,S}ET_ONE_REG
+	 * are enumerated via other ioctls, e.g. KVM_GET_MSR_INDEX_LIST.  For
+	 * simplicity, hardcode the maximum number of regs and manually update
+	 * the test as necessary.
+	 */
+	TEST_ASSERT(regs.list.n <= KVM_X86_MAX_NR_REGS,
+		    "KVM reports %llu regs, test expects at most %u regs, stale test?",
+		    regs.list.n, KVM_X86_MAX_NR_REGS);
+
+	vcpu_ioctl(vcpu, KVM_GET_REG_LIST, &regs.list);
+	for (i = 0; i < regs.list.n; i++) {
+		if (regs.regs[i] == reg)
+			return true;
+	}
+
+	return false;
+}
+
+static void host_test_kvm_reg(struct kvm_vcpu *vcpu)
+{
+	bool has_reg = vcpu_cpuid_has(vcpu, msrs[idx].feature);
+	u64 reset_val = msrs[idx].reset_val;
+	u64 write_val = msrs[idx].write_val;
+	u64 rsvd_val = msrs[idx].rsvd_val;
+	u32 reg = msrs[idx].index;
+	u64 val;
+	int r;
+
+	if (!use_one_reg)
+		return;
+
+	TEST_ASSERT_EQ(vcpu_has_reg(vcpu, KVM_X86_REG_KVM(reg)), has_reg);
+
+	if (!has_reg) {
+		r = __vcpu_get_reg(vcpu, KVM_X86_REG_KVM(reg), &val);
+		TEST_ASSERT(r && errno == EINVAL,
+			    "Expected failure on get_reg(0x%x)", reg);
+		rsvd_val = 0;
+		goto out;
+	}
+
+	val = vcpu_get_reg(vcpu, KVM_X86_REG_KVM(reg));
+	TEST_ASSERT(val == reset_val, "Wanted 0x%lx from get_reg(0x%x), got 0x%lx",
+		    reset_val, reg, val);
+
+	vcpu_set_reg(vcpu, KVM_X86_REG_KVM(reg), write_val);
+	val = vcpu_get_reg(vcpu, KVM_X86_REG_KVM(reg));
+	TEST_ASSERT(val == write_val, "Wanted 0x%lx from get_reg(0x%x), got 0x%lx",
+		    write_val, reg, val);
+
+out:
+	r = __vcpu_set_reg(vcpu, KVM_X86_REG_KVM(reg), rsvd_val);
+	TEST_ASSERT(r, "Expected failure on set_reg(0x%x, 0x%lx)", reg, rsvd_val);
+}
+
+static void host_test_msr(struct kvm_vcpu *vcpu, u64 guest_val)
+{
+	u64 reset_val = msrs[idx].reset_val;
+	u32 msr = msrs[idx].index;
+	u64 val;
+
+	if (!kvm_cpu_has(msrs[idx].feature))
+		return;
+
+	val = vcpu_get_msr(vcpu, msr);
+	TEST_ASSERT(val == guest_val, "Wanted 0x%lx from get_msr(0x%x), got 0x%lx",
+		    guest_val, msr, val);
+
+	if (use_one_reg)
+		vcpu_set_reg(vcpu, KVM_X86_REG_MSR(msr), reset_val);
+	else
+		vcpu_set_msr(vcpu, msr, reset_val);
+
+	val = vcpu_get_msr(vcpu, msr);
+	TEST_ASSERT(val == reset_val, "Wanted 0x%lx from get_msr(0x%x), got 0x%lx",
+		    reset_val, msr, val);
+
+	if (!has_one_reg)
+		return;
+
+	val = vcpu_get_reg(vcpu, KVM_X86_REG_MSR(msr));
+	TEST_ASSERT(val == reset_val, "Wanted 0x%lx from get_reg(0x%x), got 0x%lx",
+		    reset_val, msr, val);
+}
+
+static void do_vcpu_run(struct kvm_vcpu *vcpu)
+{
+	struct ucall uc;
+
+	for (;;) {
+		vcpu_run(vcpu);
+
+		switch (get_ucall(vcpu, &uc)) {
+		case UCALL_SYNC:
+			host_test_msr(vcpu, uc.args[1]);
+			return;
+		case UCALL_PRINTF:
+			pr_info("%s", uc.buffer);
+			break;
+		case UCALL_ABORT:
+			REPORT_GUEST_ASSERT(uc);
+		case UCALL_DONE:
+			TEST_FAIL("Unexpected UCALL_DONE");
+		default:
+			TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
+		}
+	}
+}
+
+static void vcpus_run(struct kvm_vcpu **vcpus, const int NR_VCPUS)
+{
+	int i;
+
+	for (i = 0; i < NR_VCPUS; i++)
+		do_vcpu_run(vcpus[i]);
+}
+
+#define MISC_ENABLES_RESET_VAL (MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL | MSR_IA32_MISC_ENABLE_BTS_UNAVAIL)
+
+static void test_msrs(void)
+{
+	const struct kvm_msr __msrs[] = {
+		MSR_TEST_NON_ZERO(MSR_IA32_MISC_ENABLE,
+				  MISC_ENABLES_RESET_VAL | MSR_IA32_MISC_ENABLE_FAST_STRING,
+				  MSR_IA32_MISC_ENABLE_FAST_STRING, MISC_ENABLES_RESET_VAL, NONE),
+		MSR_TEST_NON_ZERO(MSR_IA32_CR_PAT, 0x07070707, 0, 0x7040600070406, NONE),
+
+		/*
+		 * TSC_AUX is supported if RDTSCP *or* RDPID is supported.  Add
+		 * entries for each features so that TSC_AUX doesn't exists for
+		 * the "unsupported" vCPU, and obviously to test both cases.
+		 */
+		MSR_TEST2(MSR_TSC_AUX, 0x12345678, u64_val, RDTSCP, RDPID),
+		MSR_TEST2(MSR_TSC_AUX, 0x12345678, u64_val, RDPID, RDTSCP),
+
+		MSR_TEST(MSR_IA32_SYSENTER_CS, 0x1234, 0, NONE),
+		/*
+		 * SYSENTER_{ESP,EIP} are technically non-canonical on Intel,
+		 * but KVM doesn't emulate that behavior on emulated writes,
+		 * i.e. this test will observe different behavior if the MSR
+		 * writes are handed by hardware vs. KVM.  KVM's behavior is
+		 * intended (though far from ideal), so don't bother testing
+		 * non-canonical values.
+		 */
+		MSR_TEST(MSR_IA32_SYSENTER_ESP, canonical_val, 0, NONE),
+		MSR_TEST(MSR_IA32_SYSENTER_EIP, canonical_val, 0, NONE),
+
+		MSR_TEST_CANONICAL(MSR_FS_BASE, LM),
+		MSR_TEST_CANONICAL(MSR_GS_BASE, LM),
+		MSR_TEST_CANONICAL(MSR_KERNEL_GS_BASE, LM),
+		MSR_TEST_CANONICAL(MSR_LSTAR, LM),
+		MSR_TEST_CANONICAL(MSR_CSTAR, LM),
+		MSR_TEST(MSR_SYSCALL_MASK, 0xffffffff, 0, LM),
+
+		MSR_TEST2(MSR_IA32_S_CET, CET_SHSTK_EN, CET_RESERVED, SHSTK, IBT),
+		MSR_TEST2(MSR_IA32_S_CET, CET_ENDBR_EN, CET_RESERVED, IBT, SHSTK),
+		MSR_TEST2(MSR_IA32_U_CET, CET_SHSTK_EN, CET_RESERVED, SHSTK, IBT),
+		MSR_TEST2(MSR_IA32_U_CET, CET_ENDBR_EN, CET_RESERVED, IBT, SHSTK),
+		MSR_TEST_CANONICAL(MSR_IA32_PL0_SSP, SHSTK),
+		MSR_TEST(MSR_IA32_PL0_SSP, canonical_val, canonical_val | 1, SHSTK),
+		MSR_TEST_CANONICAL(MSR_IA32_PL1_SSP, SHSTK),
+		MSR_TEST(MSR_IA32_PL1_SSP, canonical_val, canonical_val | 1, SHSTK),
+		MSR_TEST_CANONICAL(MSR_IA32_PL2_SSP, SHSTK),
+		MSR_TEST(MSR_IA32_PL2_SSP, canonical_val, canonical_val | 1, SHSTK),
+		MSR_TEST_CANONICAL(MSR_IA32_PL3_SSP, SHSTK),
+		MSR_TEST(MSR_IA32_PL3_SSP, canonical_val, canonical_val | 1, SHSTK),
+
+		MSR_TEST_KVM(GUEST_SSP, canonical_val, NONCANONICAL, SHSTK),
+	};
+
+	const struct kvm_x86_cpu_feature feat_none = X86_FEATURE_NONE;
+	const struct kvm_x86_cpu_feature feat_lm = X86_FEATURE_LM;
+
+	/*
+	 * Create three vCPUs, but run them on the same task, to validate KVM's
+	 * context switching of MSR state.  Don't pin the task to a pCPU to
+	 * also validate KVM's handling of cross-pCPU migration.  Use the full
+	 * set of features for the first two vCPUs, but clear all features in
+	 * third vCPU in order to test both positive and negative paths.
+	 */
+	const int NR_VCPUS = 3;
+	struct kvm_vcpu *vcpus[NR_VCPUS];
+	struct kvm_vm *vm;
+	int i;
+
+	kvm_static_assert(sizeof(__msrs) <= sizeof(msrs));
+	kvm_static_assert(ARRAY_SIZE(__msrs) <= ARRAY_SIZE(msrs));
+	memcpy(msrs, __msrs, sizeof(__msrs));
+
+	ignore_unsupported_msrs = kvm_is_ignore_msrs();
+
+	vm = vm_create_with_vcpus(NR_VCPUS, guest_main, vcpus);
+
+	sync_global_to_guest(vm, msrs);
+	sync_global_to_guest(vm, ignore_unsupported_msrs);
+
+	/*
+	 * Clear features in the "unsupported features" vCPU.  This needs to be
+	 * done before the first vCPU run as KVM's ABI is that guest CPUID is
+	 * immutable once the vCPU has been run.
+	 */
+	for (idx = 0; idx < ARRAY_SIZE(__msrs); idx++) {
+		/*
+		 * Don't clear LM; selftests are 64-bit only, and KVM doesn't
+		 * honor LM=0 for MSRs that are supposed to exist if and only
+		 * if the vCPU is a 64-bit model.  Ditto for NONE; clearing a
+		 * fake feature flag will result in false failures.
+		 */
+		if (memcmp(&msrs[idx].feature, &feat_lm, sizeof(feat_lm)) &&
+		    memcmp(&msrs[idx].feature, &feat_none, sizeof(feat_none)))
+			vcpu_clear_cpuid_feature(vcpus[2], msrs[idx].feature);
+	}
+
+	for (idx = 0; idx < ARRAY_SIZE(__msrs); idx++) {
+		struct kvm_msr *msr = &msrs[idx];
+
+		if (msr->is_kvm_defined) {
+			for (i = 0; i < NR_VCPUS; i++)
+				host_test_kvm_reg(vcpus[i]);
+			continue;
+		}
+
+		/*
+		 * Verify KVM_GET_SUPPORTED_CPUID and KVM_GET_MSR_INDEX_LIST
+		 * are consistent with respect to MSRs whose existence is
+		 * enumerated via CPUID.  Skip the check for FS/GS.base MSRs,
+		 * as they aren't reported in the save/restore list since their
+		 * state is managed via SREGS.
+		 */
+		TEST_ASSERT(msr->index == MSR_FS_BASE || msr->index == MSR_GS_BASE ||
+			    kvm_msr_is_in_save_restore_list(msr->index) ==
+			    (kvm_cpu_has(msr->feature) || kvm_cpu_has(msr->feature2)),
+			    "%s %s in save/restore list, but %s according to CPUID", msr->name,
+			    kvm_msr_is_in_save_restore_list(msr->index) ? "is" : "isn't",
+			    (kvm_cpu_has(msr->feature) || kvm_cpu_has(msr->feature2)) ?
+			    "supported" : "unsupported");
+
+		sync_global_to_guest(vm, idx);
+
+		vcpus_run(vcpus, NR_VCPUS);
+		vcpus_run(vcpus, NR_VCPUS);
+	}
+
+	kvm_vm_free(vm);
+}
+
+int main(void)
+{
+	has_one_reg = kvm_has_cap(KVM_CAP_ONE_REG);
+
+	test_msrs();
+
+	if (has_one_reg) {
+		use_one_reg = true;
+		test_msrs();
+	}
+}
diff --git a/tools/testing/selftests/kvm/x86/vmx_close_while_nested_test.c b/tools/testing/selftests/kvm/x86/nested_close_kvm_test.c
index dad988351493..761fec293408 100644
--- a/tools/testing/selftests/kvm/x86/vmx_close_while_nested_test.c
+++ b/tools/testing/selftests/kvm/x86/nested_close_kvm_test.c
@@ -1,7 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * vmx_close_while_nested
- *
  * Copyright (C) 2019, Red Hat, Inc.
  *
  * Verify that nothing bad happens if a KVM user exits with open
@@ -12,6 +10,7 @@
 #include "kvm_util.h"
 #include "processor.h"
 #include "vmx.h"
+#include "svm_util.h"
 
 #include <string.h>
 #include <sys/ioctl.h>
@@ -22,6 +21,8 @@ enum {
 	PORT_L0_EXIT = 0x2000,
 };
 
+#define L2_GUEST_STACK_SIZE 64
+
 static void l2_guest_code(void)
 {
 	/* Exit to L0 */
@@ -29,9 +30,8 @@ static void l2_guest_code(void)
 		     : : [port] "d" (PORT_L0_EXIT) : "rax");
 }
 
-static void l1_guest_code(struct vmx_pages *vmx_pages)
+static void l1_vmx_code(struct vmx_pages *vmx_pages)
 {
-#define L2_GUEST_STACK_SIZE 64
 	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
 
 	GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
@@ -45,19 +45,43 @@ static void l1_guest_code(struct vmx_pages *vmx_pages)
 	GUEST_ASSERT(0);
 }
 
+static void l1_svm_code(struct svm_test_data *svm)
+{
+	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+	/* Prepare the VMCB for L2 execution. */
+	generic_svm_setup(svm, l2_guest_code,
+			  &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+	run_guest(svm->vmcb, svm->vmcb_gpa);
+	GUEST_ASSERT(0);
+}
+
+static void l1_guest_code(void *data)
+{
+	if (this_cpu_has(X86_FEATURE_VMX))
+		l1_vmx_code(data);
+	else
+		l1_svm_code(data);
+}
+
 int main(int argc, char *argv[])
 {
-	vm_vaddr_t vmx_pages_gva;
+	gva_t guest_gva;
 	struct kvm_vcpu *vcpu;
 	struct kvm_vm *vm;
 
-	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX) ||
+		     kvm_cpu_has(X86_FEATURE_SVM));
 
 	vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
 
-	/* Allocate VMX pages and shared descriptors (vmx_pages). */
-	vcpu_alloc_vmx(vm, &vmx_pages_gva);
-	vcpu_args_set(vcpu, 1, vmx_pages_gva);
+	if (kvm_cpu_has(X86_FEATURE_VMX))
+		vcpu_alloc_vmx(vm, &guest_gva);
+	else
+		vcpu_alloc_svm(vm, &guest_gva);
+
+	vcpu_args_set(vcpu, 1, guest_gva);
 
 	for (;;) {
 		volatile struct kvm_run *run = vcpu->run;
diff --git a/tools/testing/selftests/kvm/x86/nested_dirty_log_test.c b/tools/testing/selftests/kvm/x86/nested_dirty_log_test.c
new file mode 100644
index 000000000000..0e67cce83570
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/nested_dirty_log_test.c
@@ -0,0 +1,293 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KVM dirty page logging test
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <linux/bitmap.h>
+#include <linux/bitops.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+#include "vmx.h"
+
+/* The memory slot index to track dirty pages */
+#define TEST_MEM_SLOT_INDEX		1
+
+/*
+ * Allocate four pages total.  Two pages are used to verify that the KVM marks
+ * the accessed page/GFN as marked dirty, but not the "other" page.  Times two
+ * so that each "normal" page can be accessed from L2 via an aliased L2 GVA+GPA
+ * (when TDP is enabled), to verify KVM marks _L1's_ page/GFN as dirty (to
+ * detect failures, L2 => L1 GPAs can't be identity mapped in the TDP page
+ * tables, as marking L2's GPA dirty would get a false pass if L1 == L2).
+ */
+#define TEST_MEM_PAGES			4
+
+#define TEST_MEM_BASE			0xc0000000
+#define TEST_MEM_ALIAS_BASE		0xc0002000
+
+#define TEST_GUEST_ADDR(base, idx)	((base) + (idx) * PAGE_SIZE)
+
+#define TEST_GVA(idx)			TEST_GUEST_ADDR(TEST_MEM_BASE, idx)
+#define TEST_GPA(idx)			TEST_GUEST_ADDR(TEST_MEM_BASE, idx)
+
+#define TEST_ALIAS_GPA(idx)		TEST_GUEST_ADDR(TEST_MEM_ALIAS_BASE, idx)
+
+#define TEST_HVA(vm, idx)		addr_gpa2hva(vm, TEST_GPA(idx))
+
+#define L2_GUEST_STACK_SIZE 64
+
+/* Use the page offset bits to communicate the access+fault type. */
+#define TEST_SYNC_READ_FAULT		BIT(0)
+#define TEST_SYNC_WRITE_FAULT		BIT(1)
+#define TEST_SYNC_NO_FAULT		BIT(2)
+
+static void l2_guest_code(gva_t base)
+{
+	gva_t page0 = TEST_GUEST_ADDR(base, 0);
+	gva_t page1 = TEST_GUEST_ADDR(base, 1);
+
+	READ_ONCE(*(u64 *)page0);
+	GUEST_SYNC(page0 | TEST_SYNC_READ_FAULT);
+	WRITE_ONCE(*(u64 *)page0, 1);
+	GUEST_SYNC(page0 | TEST_SYNC_WRITE_FAULT);
+	READ_ONCE(*(u64 *)page0);
+	GUEST_SYNC(page0 | TEST_SYNC_NO_FAULT);
+
+	WRITE_ONCE(*(u64 *)page1, 1);
+	GUEST_SYNC(page1 | TEST_SYNC_WRITE_FAULT);
+	WRITE_ONCE(*(u64 *)page1, 1);
+	GUEST_SYNC(page1 | TEST_SYNC_WRITE_FAULT);
+	READ_ONCE(*(u64 *)page1);
+	GUEST_SYNC(page1 | TEST_SYNC_NO_FAULT);
+
+	/* Exit to L1 and never come back.  */
+	vmcall();
+}
+
+static void l2_guest_code_tdp_enabled(void)
+{
+	/*
+	 * Use the aliased virtual addresses when running with TDP to verify
+	 * that KVM correctly handles the case where a page is dirtied via a
+	 * different GPA than would be used by L1.
+	 */
+	l2_guest_code(TEST_MEM_ALIAS_BASE);
+}
+
+static void l2_guest_code_tdp_disabled(void)
+{
+	/*
+	 * Use the "normal" virtual addresses when running without TDP enabled,
+	 * in which case L2 will use the same page tables as L1, and thus needs
+	 * to use the same virtual addresses that are mapped into L1.
+	 */
+	l2_guest_code(TEST_MEM_BASE);
+}
+
+void l1_vmx_code(struct vmx_pages *vmx)
+{
+	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+	void *l2_rip;
+
+	GUEST_ASSERT(vmx->vmcs_gpa);
+	GUEST_ASSERT(prepare_for_vmx_operation(vmx));
+	GUEST_ASSERT(load_vmcs(vmx));
+
+	if (vmx->eptp_gpa)
+		l2_rip = l2_guest_code_tdp_enabled;
+	else
+		l2_rip = l2_guest_code_tdp_disabled;
+
+	prepare_vmcs(vmx, l2_rip, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+	GUEST_SYNC(TEST_SYNC_NO_FAULT);
+	GUEST_ASSERT(!vmlaunch());
+	GUEST_SYNC(TEST_SYNC_NO_FAULT);
+	GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_VMCALL);
+	GUEST_DONE();
+}
+
+static void l1_svm_code(struct svm_test_data *svm)
+{
+	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+	void *l2_rip;
+
+	if (svm->ncr3_gpa)
+		l2_rip = l2_guest_code_tdp_enabled;
+	else
+		l2_rip = l2_guest_code_tdp_disabled;
+
+	generic_svm_setup(svm, l2_rip, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+	GUEST_SYNC(TEST_SYNC_NO_FAULT);
+	run_guest(svm->vmcb, svm->vmcb_gpa);
+	GUEST_SYNC(TEST_SYNC_NO_FAULT);
+	GUEST_ASSERT_EQ(svm->vmcb->control.exit_code, SVM_EXIT_VMMCALL);
+	GUEST_DONE();
+}
+
+static void l1_guest_code(void *data)
+{
+	if (this_cpu_has(X86_FEATURE_VMX))
+		l1_vmx_code(data);
+	else
+		l1_svm_code(data);
+}
+
+static void test_handle_ucall_sync(struct kvm_vm *vm, u64 arg,
+				   unsigned long *bmap)
+{
+	gva_t gva = arg & ~(PAGE_SIZE - 1);
+	int page_nr, i;
+
+	/*
+	 * Extract the page number of underlying physical page, which is also
+	 * the _L1_ page number.  The dirty bitmap _must_ be updated based on
+	 * the L1 GPA, not L2 GPA, i.e. whether or not L2 used an aliased GPA
+	 * (i.e. if TDP enabled for L2) is irrelevant with respect to the dirty
+	 * bitmap and which underlying physical page is accessed.
+	 *
+	 * Note, gva will be '0' if there was no access, i.e. if the purpose of
+	 * the sync is to verify all pages are clean.
+	 */
+	if (!gva)
+		page_nr = 0;
+	else if (gva >= TEST_MEM_ALIAS_BASE)
+		page_nr = (gva - TEST_MEM_ALIAS_BASE) >> PAGE_SHIFT;
+	else
+		page_nr = (gva - TEST_MEM_BASE) >> PAGE_SHIFT;
+	TEST_ASSERT(page_nr == 0 || page_nr == 1,
+		    "Test bug, unexpected frame number '%u' for arg = %lx", page_nr, arg);
+	TEST_ASSERT(gva || (arg & TEST_SYNC_NO_FAULT),
+		    "Test bug, gva must be valid if a fault is expected");
+
+	kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap);
+
+	/*
+	 * Check all pages to verify the correct physical page was modified (or
+	 * not), and that all pages are clean/dirty as expected.
+	 *
+	 * If a fault of any kind is expected, the target page should be dirty
+	 * as the Dirty bit is set in the gPTE.  KVM should create a writable
+	 * SPTE even on a read fault, *and* KVM must mark the GFN as dirty
+	 * when doing so.
+	 */
+	for (i = 0; i < TEST_MEM_PAGES; i++) {
+		if (i == page_nr && (arg & TEST_SYNC_WRITE_FAULT))
+			TEST_ASSERT(*(u64 *)TEST_HVA(vm, i) == 1,
+				    "Page %u incorrectly not written by guest", i);
+		else
+			TEST_ASSERT(*(u64 *)TEST_HVA(vm, i) == 0xaaaaaaaaaaaaaaaaULL,
+				    "Page %u incorrectly written by guest", i);
+
+		if (i == page_nr && !(arg & TEST_SYNC_NO_FAULT))
+			TEST_ASSERT(test_bit(i, bmap),
+				    "Page %u incorrectly reported clean on %s fault",
+				    i, arg & TEST_SYNC_READ_FAULT ? "read" : "write");
+		else
+			TEST_ASSERT(!test_bit(i, bmap),
+				    "Page %u incorrectly reported dirty", i);
+	}
+}
+
+static void test_dirty_log(bool nested_tdp)
+{
+	gva_t nested_gva = 0;
+	unsigned long *bmap;
+	struct kvm_vcpu *vcpu;
+	struct kvm_vm *vm;
+	struct ucall uc;
+	bool done = false;
+
+	pr_info("Nested TDP: %s\n", nested_tdp ? "enabled" : "disabled");
+
+	/* Create VM */
+	vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+	if (nested_tdp)
+		vm_enable_tdp(vm);
+
+	if (kvm_cpu_has(X86_FEATURE_VMX))
+		vcpu_alloc_vmx(vm, &nested_gva);
+	else
+		vcpu_alloc_svm(vm, &nested_gva);
+
+	vcpu_args_set(vcpu, 1, nested_gva);
+
+	/* Add an extra memory slot for testing dirty logging */
+	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+				    TEST_MEM_BASE,
+				    TEST_MEM_SLOT_INDEX,
+				    TEST_MEM_PAGES,
+				    KVM_MEM_LOG_DIRTY_PAGES);
+
+	/*
+	 * Add an identity map for GVA range [0xc0000000, 0xc0004000).  This
+	 * affects both L1 and L2.  However...
+	 */
+	virt_map(vm, TEST_MEM_BASE, TEST_MEM_BASE, TEST_MEM_PAGES);
+
+	/*
+	 * ... pages in the L2 GPA address range [0xc0002000, 0xc0004000) will
+	 * map to [0xc0000000, 0xc0002000) when TDP is enabled (for L2).
+	 *
+	 * When TDP is disabled, the L2 guest code will still access the same L1
+	 * GPAs as the TDP enabled case.
+	 *
+	 * Set the Dirty bit in the PTEs used by L2 so that KVM will create
+	 * writable SPTEs when handling read faults (if the Dirty bit isn't
+	 * set, KVM must intercept the next write to emulate the Dirty bit
+	 * update).
+	 */
+	if (nested_tdp) {
+		tdp_identity_map_default_memslots(vm);
+		tdp_map(vm, TEST_ALIAS_GPA(0), TEST_GPA(0), PAGE_SIZE);
+		tdp_map(vm, TEST_ALIAS_GPA(1), TEST_GPA(1), PAGE_SIZE);
+
+		*tdp_get_pte(vm, TEST_ALIAS_GPA(0)) |= PTE_DIRTY_MASK(&vm->stage2_mmu);
+		*tdp_get_pte(vm, TEST_ALIAS_GPA(1)) |= PTE_DIRTY_MASK(&vm->stage2_mmu);
+	} else {
+		*vm_get_pte(vm, TEST_GVA(0)) |= PTE_DIRTY_MASK(&vm->mmu);
+		*vm_get_pte(vm, TEST_GVA(1)) |= PTE_DIRTY_MASK(&vm->mmu);
+	}
+
+	bmap = bitmap_zalloc(TEST_MEM_PAGES);
+
+	while (!done) {
+		memset(TEST_HVA(vm, 0), 0xaa, TEST_MEM_PAGES * PAGE_SIZE);
+
+		vcpu_run(vcpu);
+		TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+		switch (get_ucall(vcpu, &uc)) {
+		case UCALL_ABORT:
+			REPORT_GUEST_ASSERT(uc);
+			/* NOT REACHED */
+		case UCALL_SYNC:
+			test_handle_ucall_sync(vm, uc.args[1], bmap);
+			break;
+		case UCALL_DONE:
+			done = true;
+			break;
+		default:
+			TEST_FAIL("Unknown ucall %lu", uc.cmd);
+		}
+	}
+}
+
+int main(int argc, char *argv[])
+{
+	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX) || kvm_cpu_has(X86_FEATURE_SVM));
+
+	test_dirty_log(/*nested_tdp=*/false);
+
+	if (kvm_cpu_has_tdp())
+		test_dirty_log(/*nested_tdp=*/true);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/nested_emulation_test.c b/tools/testing/selftests/kvm/x86/nested_emulation_test.c
new file mode 100644
index 000000000000..fb7dcbe53ac7
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/nested_emulation_test.c
@@ -0,0 +1,146 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+#include "svm_util.h"
+
+enum {
+	SVM_F,
+	VMX_F,
+	NR_VIRTUALIZATION_FLAVORS,
+};
+
+struct emulated_instruction {
+	const char name[32];
+	u8 opcode[15];
+	u32 exit_reason[NR_VIRTUALIZATION_FLAVORS];
+};
+
+static struct emulated_instruction instructions[] = {
+	{
+		.name = "pause",
+		.opcode = { 0xf3, 0x90 },
+		.exit_reason = { SVM_EXIT_PAUSE,
+				 EXIT_REASON_PAUSE_INSTRUCTION, }
+	},
+	{
+		.name = "hlt",
+		.opcode = { 0xf4 },
+		.exit_reason = { SVM_EXIT_HLT,
+				 EXIT_REASON_HLT, }
+	},
+};
+
+static u8 kvm_fep[] = { 0x0f, 0x0b, 0x6b, 0x76, 0x6d };	/* ud2 ; .ascii "kvm" */
+static u8 l2_guest_code[sizeof(kvm_fep) + 15];
+static u8 *l2_instruction = &l2_guest_code[sizeof(kvm_fep)];
+
+static u32 get_instruction_length(struct emulated_instruction *insn)
+{
+	u32 i;
+
+	for (i = 0; i < ARRAY_SIZE(insn->opcode) && insn->opcode[i]; i++)
+		;
+
+	return i;
+}
+
+static void guest_code(void *test_data)
+{
+	int f = this_cpu_has(X86_FEATURE_SVM) ? SVM_F : VMX_F;
+	int i;
+
+	memcpy(l2_guest_code, kvm_fep, sizeof(kvm_fep));
+
+	if (f == SVM_F) {
+		struct svm_test_data *svm = test_data;
+		struct vmcb *vmcb = svm->vmcb;
+
+		generic_svm_setup(svm, NULL, NULL);
+		vmcb->save.idtr.limit = 0;
+		vmcb->save.rip = (u64)l2_guest_code;
+
+		vmcb->control.intercept |= BIT_ULL(INTERCEPT_SHUTDOWN) |
+					   BIT_ULL(INTERCEPT_PAUSE) |
+					   BIT_ULL(INTERCEPT_HLT);
+		vmcb->control.intercept_exceptions = 0;
+	} else {
+		GUEST_ASSERT(prepare_for_vmx_operation(test_data));
+		GUEST_ASSERT(load_vmcs(test_data));
+
+		prepare_vmcs(test_data, NULL, NULL);
+		GUEST_ASSERT(!vmwrite(GUEST_IDTR_LIMIT, 0));
+		GUEST_ASSERT(!vmwrite(GUEST_RIP, (u64)l2_guest_code));
+		GUEST_ASSERT(!vmwrite(EXCEPTION_BITMAP, 0));
+
+		vmwrite(CPU_BASED_VM_EXEC_CONTROL, vmreadz(CPU_BASED_VM_EXEC_CONTROL) |
+						   CPU_BASED_PAUSE_EXITING |
+						   CPU_BASED_HLT_EXITING);
+	}
+
+	for (i = 0; i < ARRAY_SIZE(instructions); i++) {
+		struct emulated_instruction *insn = &instructions[i];
+		u32 insn_len = get_instruction_length(insn);
+		u32 exit_insn_len;
+		u32 exit_reason;
+
+		/*
+		 * Copy the target instruction to the L2 code stream, and fill
+		 * the remaining bytes with INT3s so that a missed intercept
+		 * results in a consistent failure mode (SHUTDOWN).
+		 */
+		memcpy(l2_instruction, insn->opcode, insn_len);
+		memset(l2_instruction + insn_len, 0xcc, sizeof(insn->opcode) - insn_len);
+
+		if (f == SVM_F) {
+			struct svm_test_data *svm = test_data;
+			struct vmcb *vmcb = svm->vmcb;
+
+			run_guest(vmcb, svm->vmcb_gpa);
+			exit_reason = vmcb->control.exit_code;
+			exit_insn_len = vmcb->control.next_rip - vmcb->save.rip;
+			GUEST_ASSERT_EQ(vmcb->save.rip, (u64)l2_instruction);
+		} else {
+			GUEST_ASSERT_EQ(i ? vmresume() : vmlaunch(), 0);
+			exit_reason = vmreadz(VM_EXIT_REASON);
+			exit_insn_len = vmreadz(VM_EXIT_INSTRUCTION_LEN);
+			GUEST_ASSERT_EQ(vmreadz(GUEST_RIP), (u64)l2_instruction);
+		}
+
+		__GUEST_ASSERT(exit_reason == insn->exit_reason[f],
+			       "Wanted exit_reason '0x%x' for '%s', got '0x%x'",
+			       insn->exit_reason[f], insn->name, exit_reason);
+
+		__GUEST_ASSERT(exit_insn_len == insn_len,
+			       "Wanted insn_len '%u' for '%s', got '%u'",
+			       insn_len, insn->name, exit_insn_len);
+	}
+
+	GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+	gva_t nested_test_data_gva;
+	struct kvm_vcpu *vcpu;
+	struct kvm_vm *vm;
+
+	TEST_REQUIRE(is_forced_emulation_enabled);
+	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM) || kvm_cpu_has(X86_FEATURE_VMX));
+
+	vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+	vm_enable_cap(vm, KVM_CAP_EXCEPTION_PAYLOAD, -2ul);
+
+	if (kvm_cpu_has(X86_FEATURE_SVM))
+		vcpu_alloc_svm(vm, &nested_test_data_gva);
+	else
+		vcpu_alloc_vmx(vm, &nested_test_data_gva);
+
+	vcpu_args_set(vcpu, 1, nested_test_data_gva);
+
+	vcpu_run(vcpu);
+	TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
+
+	kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86/nested_exceptions_test.c b/tools/testing/selftests/kvm/x86/nested_exceptions_test.c
index 3641a42934ac..186e980aa8ee 100644
--- a/tools/testing/selftests/kvm/x86/nested_exceptions_test.c
+++ b/tools/testing/selftests/kvm/x86/nested_exceptions_test.c
@@ -72,7 +72,7 @@ static void l2_ss_injected_tf_test(void)
 }
 
 static void svm_run_l2(struct svm_test_data *svm, void *l2_code, int vector,
-		       uint32_t error_code)
+		       u32 error_code)
 {
 	struct vmcb *vmcb = svm->vmcb;
 	struct vmcb_control_area *ctrl = &vmcb->control;
@@ -111,7 +111,7 @@ static void l1_svm_code(struct svm_test_data *svm)
 	GUEST_DONE();
 }
 
-static void vmx_run_l2(void *l2_code, int vector, uint32_t error_code)
+static void vmx_run_l2(void *l2_code, int vector, u32 error_code)
 {
 	GUEST_ASSERT(!vmwrite(GUEST_RIP, (u64)l2_code));
 
@@ -216,7 +216,7 @@ static void queue_ss_exception(struct kvm_vcpu *vcpu, bool inject)
  */
 int main(int argc, char *argv[])
 {
-	vm_vaddr_t nested_test_data_gva;
+	gva_t nested_test_data_gva;
 	struct kvm_vcpu_events events;
 	struct kvm_vcpu *vcpu;
 	struct kvm_vm *vm;
diff --git a/tools/testing/selftests/kvm/x86/nested_invalid_cr3_test.c b/tools/testing/selftests/kvm/x86/nested_invalid_cr3_test.c
new file mode 100644
index 000000000000..11fd2467d823
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/nested_invalid_cr3_test.c
@@ -0,0 +1,116 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2025, Google LLC.
+ *
+ * This test verifies that L1 fails to enter L2 with an invalid CR3, and
+ * succeeds otherwise.
+ */
+#include "kvm_util.h"
+#include "vmx.h"
+#include "svm_util.h"
+#include "kselftest.h"
+
+
+#define L2_GUEST_STACK_SIZE 64
+
+static void l2_guest_code(void)
+{
+	vmcall();
+}
+
+static void l1_svm_code(struct svm_test_data *svm)
+{
+	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+	uintptr_t save_cr3;
+
+	generic_svm_setup(svm, l2_guest_code,
+			  &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+	/* Try to run L2 with invalid CR3 and make sure it fails */
+	save_cr3 = svm->vmcb->save.cr3;
+	svm->vmcb->save.cr3 = -1ull;
+	run_guest(svm->vmcb, svm->vmcb_gpa);
+	GUEST_ASSERT(svm->vmcb->control.exit_code == SVM_EXIT_ERR);
+
+	/* Now restore CR3 and make sure L2 runs successfully */
+	svm->vmcb->save.cr3 = save_cr3;
+	run_guest(svm->vmcb, svm->vmcb_gpa);
+	GUEST_ASSERT(svm->vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+
+	GUEST_DONE();
+}
+
+static void l1_vmx_code(struct vmx_pages *vmx_pages)
+{
+	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+	uintptr_t save_cr3;
+
+	GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+	GUEST_ASSERT(load_vmcs(vmx_pages));
+
+	prepare_vmcs(vmx_pages, l2_guest_code,
+		     &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+	/* Try to run L2 with invalid CR3 and make sure it fails */
+	save_cr3 = vmreadz(GUEST_CR3);
+	vmwrite(GUEST_CR3, -1ull);
+	GUEST_ASSERT(!vmlaunch());
+	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) ==
+		     (EXIT_REASON_FAILED_VMENTRY | EXIT_REASON_INVALID_STATE));
+
+	/* Now restore CR3 and make sure L2 runs successfully */
+	vmwrite(GUEST_CR3, save_cr3);
+	GUEST_ASSERT(!vmlaunch());
+	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+	GUEST_DONE();
+}
+
+static void l1_guest_code(void *data)
+{
+	if (this_cpu_has(X86_FEATURE_VMX))
+		l1_vmx_code(data);
+	else
+		l1_svm_code(data);
+}
+
+int main(int argc, char *argv[])
+{
+	struct kvm_vcpu *vcpu;
+	struct kvm_vm *vm;
+	gva_t guest_gva = 0;
+
+	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX) ||
+		     kvm_cpu_has(X86_FEATURE_SVM));
+
+	vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+
+	if (kvm_cpu_has(X86_FEATURE_VMX))
+		vcpu_alloc_vmx(vm, &guest_gva);
+	else
+		vcpu_alloc_svm(vm, &guest_gva);
+
+	vcpu_args_set(vcpu, 1, guest_gva);
+
+	for (;;) {
+		struct ucall uc;
+
+		vcpu_run(vcpu);
+		TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+		switch (get_ucall(vcpu, &uc)) {
+		case UCALL_ABORT:
+			REPORT_GUEST_ASSERT(uc);
+		case UCALL_SYNC:
+			break;
+		case UCALL_DONE:
+			goto done;
+		default:
+			TEST_FAIL("Unknown ucall %lu", uc.cmd);
+		}
+	}
+
+done:
+	kvm_vm_free(vm);
+	return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/vmx_set_nested_state_test.c b/tools/testing/selftests/kvm/x86/nested_set_state_test.c
index 67a62a5a8895..831380732671 100644
--- a/tools/testing/selftests/kvm/x86/vmx_set_nested_state_test.c
+++ b/tools/testing/selftests/kvm/x86/nested_set_state_test.c
@@ -1,7 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * vmx_set_nested_state_test
- *
  * Copyright (C) 2019, Google LLC.
  *
  * This test verifies the integrity of calling the ioctl KVM_SET_NESTED_STATE.
@@ -11,6 +9,7 @@
 #include "kvm_util.h"
 #include "processor.h"
 #include "vmx.h"
+#include "svm_util.h"
 
 #include <errno.h>
 #include <linux/kvm.h>
@@ -241,8 +240,108 @@ void test_vmx_nested_state(struct kvm_vcpu *vcpu)
 	TEST_ASSERT(state->size >= sizeof(*state) && state->size <= state_sz,
 		    "Size must be between %ld and %d.  The size returned was %d.",
 		    sizeof(*state), state_sz, state->size);
-	TEST_ASSERT(state->hdr.vmx.vmxon_pa == -1ull, "vmxon_pa must be -1ull.");
-	TEST_ASSERT(state->hdr.vmx.vmcs12_pa == -1ull, "vmcs_pa must be -1ull.");
+
+	TEST_ASSERT_EQ(state->hdr.vmx.vmxon_pa, -1ull);
+	TEST_ASSERT_EQ(state->hdr.vmx.vmcs12_pa, -1ull);
+	TEST_ASSERT_EQ(state->flags, 0);
+
+	free(state);
+}
+
+static void vcpu_efer_enable_svm(struct kvm_vcpu *vcpu)
+{
+	u64 old_efer = vcpu_get_msr(vcpu, MSR_EFER);
+
+	vcpu_set_msr(vcpu, MSR_EFER, old_efer | EFER_SVME);
+}
+
+static void vcpu_efer_disable_svm(struct kvm_vcpu *vcpu)
+{
+	u64 old_efer = vcpu_get_msr(vcpu, MSR_EFER);
+
+	vcpu_set_msr(vcpu, MSR_EFER, old_efer & ~EFER_SVME);
+}
+
+void set_default_svm_state(struct kvm_nested_state *state, int size)
+{
+	memset(state, 0, size);
+	state->format = 1;
+	state->size = size;
+	state->hdr.svm.vmcb_pa = 0x3000;
+}
+
+void test_svm_nested_state(struct kvm_vcpu *vcpu)
+{
+	/* Add a page for VMCB. */
+	const int state_sz = sizeof(struct kvm_nested_state) + getpagesize();
+	struct kvm_nested_state *state =
+		(struct kvm_nested_state *)malloc(state_sz);
+
+	vcpu_set_cpuid_feature(vcpu, X86_FEATURE_SVM);
+
+	/* The format must be set to 1. 0 for VMX, 1 for SVM. */
+	set_default_svm_state(state, state_sz);
+	state->format = 0;
+	test_nested_state_expect_einval(vcpu, state);
+
+	/* Invalid flags are rejected, KVM_STATE_NESTED_EVMCS is VMX-only  */
+	set_default_svm_state(state, state_sz);
+	state->flags = KVM_STATE_NESTED_EVMCS;
+	test_nested_state_expect_einval(vcpu, state);
+
+	/*
+	 * If EFER.SVME is clear, guest mode is disallowed and GIF can be set or
+	 * cleared.
+	 */
+	vcpu_efer_disable_svm(vcpu);
+
+	set_default_svm_state(state, state_sz);
+	state->flags = KVM_STATE_NESTED_GUEST_MODE;
+	test_nested_state_expect_einval(vcpu, state);
+
+	state->flags = 0;
+	test_nested_state(vcpu, state);
+
+	state->flags = KVM_STATE_NESTED_GIF_SET;
+	test_nested_state(vcpu, state);
+
+	/* Enable SVM in the guest EFER. */
+	vcpu_efer_enable_svm(vcpu);
+
+	/* Setting vmcb_pa to a non-aligned address is only fine when not entering guest mode */
+	set_default_svm_state(state, state_sz);
+	state->hdr.svm.vmcb_pa = -1ull;
+	state->flags = 0;
+	test_nested_state(vcpu, state);
+	state->flags = KVM_STATE_NESTED_GUEST_MODE;
+	test_nested_state_expect_einval(vcpu, state);
+
+	/*
+	 * Size must be large enough to fit kvm_nested_state and VMCB
+	 * only when entering guest mode.
+	 */
+	set_default_svm_state(state, state_sz/2);
+	state->flags = 0;
+	test_nested_state(vcpu, state);
+	state->flags = KVM_STATE_NESTED_GUEST_MODE;
+	test_nested_state_expect_einval(vcpu, state);
+
+	/*
+	 * Test that if we leave nesting the state reflects that when we get it
+	 * again, except for vmcb_pa, which is always returned as 0 when not in
+	 * guest mode.
+	 */
+	set_default_svm_state(state, state_sz);
+	state->hdr.svm.vmcb_pa = -1ull;
+	state->flags = KVM_STATE_NESTED_GIF_SET;
+	test_nested_state(vcpu, state);
+	vcpu_nested_state_get(vcpu, state);
+	TEST_ASSERT(state->size >= sizeof(*state) && state->size <= state_sz,
+		    "Size must be between %ld and %d.  The size returned was %d.",
+		    sizeof(*state), state_sz, state->size);
+
+	TEST_ASSERT_EQ(state->hdr.svm.vmcb_pa, 0);
+	TEST_ASSERT_EQ(state->flags, KVM_STATE_NESTED_GIF_SET);
 
 	free(state);
 }
@@ -255,20 +354,20 @@ int main(int argc, char *argv[])
 
 	have_evmcs = kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS);
 
+	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX) ||
+		     kvm_cpu_has(X86_FEATURE_SVM));
 	TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE));
 
-	/*
-	 * AMD currently does not implement set_nested_state, so for now we
-	 * just early out.
-	 */
-	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
-
 	vm = vm_create_with_one_vcpu(&vcpu, NULL);
 
 	/*
-	 * First run tests with VMX disabled to check error handling.
+	 * First run tests with VMX/SVM disabled to check error handling.
+	 * test_{vmx/svm}_nested_state() will re-enable as needed.
 	 */
-	vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_VMX);
+	if (kvm_cpu_has(X86_FEATURE_VMX))
+		vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_VMX);
+	else
+		vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_SVM);
 
 	/* Passing a NULL kvm_nested_state causes a EFAULT. */
 	test_nested_state_expect_efault(vcpu, NULL);
@@ -297,7 +396,10 @@ int main(int argc, char *argv[])
 	state.flags = KVM_STATE_NESTED_RUN_PENDING;
 	test_nested_state_expect_einval(vcpu, &state);
 
-	test_vmx_nested_state(vcpu);
+	if (kvm_cpu_has(X86_FEATURE_VMX))
+		test_vmx_nested_state(vcpu);
+	else
+		test_svm_nested_state(vcpu);
 
 	kvm_vm_free(vm);
 	return 0;
diff --git a/tools/testing/selftests/kvm/x86/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/x86/nested_tsc_adjust_test.c
index 2ceb5c78c442..f0e4adac4751 100644
--- a/tools/testing/selftests/kvm/x86/vmx_tsc_adjust_test.c
+++ b/tools/testing/selftests/kvm/x86/nested_tsc_adjust_test.c
@@ -1,7 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * vmx_tsc_adjust_test
- *
  * Copyright (C) 2018, Google LLC.
  *
  * IA32_TSC_ADJUST test
@@ -22,6 +20,7 @@
 #include "kvm_util.h"
 #include "processor.h"
 #include "vmx.h"
+#include "svm_util.h"
 
 #include <string.h>
 #include <sys/ioctl.h>
@@ -35,6 +34,8 @@
 #define TSC_ADJUST_VALUE (1ll << 32)
 #define TSC_OFFSET_VALUE -(1ll << 48)
 
+#define L2_GUEST_STACK_SIZE 64
+
 enum {
 	PORT_ABORT = 0x1000,
 	PORT_REPORT,
@@ -52,9 +53,9 @@ enum {
 /* The virtual machine object. */
 static struct kvm_vm *vm;
 
-static void check_ia32_tsc_adjust(int64_t max)
+static void check_ia32_tsc_adjust(s64 max)
 {
-	int64_t adjust;
+	s64 adjust;
 
 	adjust = rdmsr(MSR_IA32_TSC_ADJUST);
 	GUEST_SYNC(adjust);
@@ -63,7 +64,7 @@ static void check_ia32_tsc_adjust(int64_t max)
 
 static void l2_guest_code(void)
 {
-	uint64_t l1_tsc = rdtsc() - TSC_OFFSET_VALUE;
+	u64 l1_tsc = rdtsc() - TSC_OFFSET_VALUE;
 
 	wrmsr(MSR_IA32_TSC, l1_tsc - TSC_ADJUST_VALUE);
 	check_ia32_tsc_adjust(-2 * TSC_ADJUST_VALUE);
@@ -72,46 +73,51 @@ static void l2_guest_code(void)
 	__asm__ __volatile__("vmcall");
 }
 
-static void l1_guest_code(struct vmx_pages *vmx_pages)
+static void l1_guest_code(void *data)
 {
-#define L2_GUEST_STACK_SIZE 64
 	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
-	uint32_t control;
-	uintptr_t save_cr3;
 
+	/* Set TSC from L1 and make sure TSC_ADJUST is updated correctly */
 	GUEST_ASSERT(rdtsc() < TSC_ADJUST_VALUE);
 	wrmsr(MSR_IA32_TSC, rdtsc() - TSC_ADJUST_VALUE);
 	check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE);
 
-	GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
-	GUEST_ASSERT(load_vmcs(vmx_pages));
-
-	/* Prepare the VMCS for L2 execution. */
-	prepare_vmcs(vmx_pages, l2_guest_code,
-		     &l2_guest_stack[L2_GUEST_STACK_SIZE]);
-	control = vmreadz(CPU_BASED_VM_EXEC_CONTROL);
-	control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETTING;
-	vmwrite(CPU_BASED_VM_EXEC_CONTROL, control);
-	vmwrite(TSC_OFFSET, TSC_OFFSET_VALUE);
-
-	/* Jump into L2.  First, test failure to load guest CR3.  */
-	save_cr3 = vmreadz(GUEST_CR3);
-	vmwrite(GUEST_CR3, -1ull);
-	GUEST_ASSERT(!vmlaunch());
-	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) ==
-		     (EXIT_REASON_FAILED_VMENTRY | EXIT_REASON_INVALID_STATE));
-	check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE);
-	vmwrite(GUEST_CR3, save_cr3);
-
-	GUEST_ASSERT(!vmlaunch());
-	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+	/*
+	 * Run L2 with TSC_OFFSET. L2 will write to TSC, and L1 is not
+	 * intercepting the write so it should update L1's TSC_ADJUST.
+	 */
+	if (this_cpu_has(X86_FEATURE_VMX)) {
+		struct vmx_pages *vmx_pages = data;
+		u32 control;
+
+		GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+		GUEST_ASSERT(load_vmcs(vmx_pages));
+
+		prepare_vmcs(vmx_pages, l2_guest_code,
+			     &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+		control = vmreadz(CPU_BASED_VM_EXEC_CONTROL);
+		control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETTING;
+		vmwrite(CPU_BASED_VM_EXEC_CONTROL, control);
+		vmwrite(TSC_OFFSET, TSC_OFFSET_VALUE);
+
+		GUEST_ASSERT(!vmlaunch());
+		GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+	} else {
+		struct svm_test_data *svm = data;
+
+		generic_svm_setup(svm, l2_guest_code,
+				  &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+		svm->vmcb->control.tsc_offset = TSC_OFFSET_VALUE;
+		run_guest(svm->vmcb, svm->vmcb_gpa);
+		GUEST_ASSERT(svm->vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+	}
 
 	check_ia32_tsc_adjust(-2 * TSC_ADJUST_VALUE);
-
 	GUEST_DONE();
 }
 
-static void report(int64_t val)
+static void report(s64 val)
 {
 	pr_info("IA32_TSC_ADJUST is %ld (%lld * TSC_ADJUST_VALUE + %lld).\n",
 		val, val / TSC_ADJUST_VALUE, val % TSC_ADJUST_VALUE);
@@ -119,16 +125,19 @@ static void report(int64_t val)
 
 int main(int argc, char *argv[])
 {
-	vm_vaddr_t vmx_pages_gva;
+	gva_t nested_gva;
 	struct kvm_vcpu *vcpu;
 
-	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX) ||
+		     kvm_cpu_has(X86_FEATURE_SVM));
 
-	vm = vm_create_with_one_vcpu(&vcpu, (void *) l1_guest_code);
+	vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+	if (kvm_cpu_has(X86_FEATURE_VMX))
+		vcpu_alloc_vmx(vm, &nested_gva);
+	else
+		vcpu_alloc_svm(vm, &nested_gva);
 
-	/* Allocate VMX pages and shared descriptors (vmx_pages). */
-	vcpu_alloc_vmx(vm, &vmx_pages_gva);
-	vcpu_args_set(vcpu, 1, vmx_pages_gva);
+	vcpu_args_set(vcpu, 1, nested_gva);
 
 	for (;;) {
 		struct ucall uc;
diff --git a/tools/testing/selftests/kvm/x86/vmx_nested_tsc_scaling_test.c b/tools/testing/selftests/kvm/x86/nested_tsc_scaling_test.c
index 1759fa5cb3f2..190e93af20a1 100644
--- a/tools/testing/selftests/kvm/x86/vmx_nested_tsc_scaling_test.c
+++ b/tools/testing/selftests/kvm/x86/nested_tsc_scaling_test.c
@@ -13,12 +13,13 @@
 
 #include "kvm_util.h"
 #include "vmx.h"
+#include "svm_util.h"
 #include "kselftest.h"
 
 /* L2 is scaled up (from L1's perspective) by this factor */
 #define L2_SCALE_FACTOR 4ULL
 
-#define TSC_OFFSET_L2 ((uint64_t) -33125236320908)
+#define TSC_OFFSET_L2 ((u64)-33125236320908)
 #define TSC_MULTIPLIER_L2 (L2_SCALE_FACTOR << 48)
 
 #define L2_GUEST_STACK_SIZE 64
@@ -34,9 +35,9 @@ enum { USLEEP, UCHECK_L1, UCHECK_L2 };
  * measurements, a difference of 1% between the actual and the expected value
  * is tolerated.
  */
-static void compare_tsc_freq(uint64_t actual, uint64_t expected)
+static void compare_tsc_freq(u64 actual, u64 expected)
 {
-	uint64_t tolerance, thresh_low, thresh_high;
+	u64 tolerance, thresh_low, thresh_high;
 
 	tolerance = expected / 100;
 	thresh_low = expected - tolerance;
@@ -54,7 +55,7 @@ static void compare_tsc_freq(uint64_t actual, uint64_t expected)
 
 static void check_tsc_freq(int level)
 {
-	uint64_t tsc_start, tsc_end, tsc_freq;
+	u64 tsc_start, tsc_end, tsc_freq;
 
 	/*
 	 * Reading the TSC twice with about a second's difference should give
@@ -79,10 +80,33 @@ static void l2_guest_code(void)
 	__asm__ __volatile__("vmcall");
 }
 
-static void l1_guest_code(struct vmx_pages *vmx_pages)
+static void l1_svm_code(struct svm_test_data *svm)
 {
 	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
-	uint32_t control;
+
+	/* check that L1's frequency looks alright before launching L2 */
+	check_tsc_freq(UCHECK_L1);
+
+	generic_svm_setup(svm, l2_guest_code,
+			  &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+	/* enable TSC scaling for L2 */
+	wrmsr(MSR_AMD64_TSC_RATIO, L2_SCALE_FACTOR << 32);
+
+	/* launch L2 */
+	run_guest(svm->vmcb, svm->vmcb_gpa);
+	GUEST_ASSERT(svm->vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+
+	/* check that L1's frequency still looks good */
+	check_tsc_freq(UCHECK_L1);
+
+	GUEST_DONE();
+}
+
+static void l1_vmx_code(struct vmx_pages *vmx_pages)
+{
+	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+	u32 control;
 
 	/* check that L1's frequency looks alright before launching L2 */
 	check_tsc_freq(UCHECK_L1);
@@ -116,20 +140,29 @@ static void l1_guest_code(struct vmx_pages *vmx_pages)
 	GUEST_DONE();
 }
 
+static void l1_guest_code(void *data)
+{
+	if (this_cpu_has(X86_FEATURE_VMX))
+		l1_vmx_code(data);
+	else
+		l1_svm_code(data);
+}
+
 int main(int argc, char *argv[])
 {
 	struct kvm_vcpu *vcpu;
 	struct kvm_vm *vm;
-	vm_vaddr_t vmx_pages_gva;
+	gva_t guest_gva = 0;
 
-	uint64_t tsc_start, tsc_end;
-	uint64_t tsc_khz;
-	uint64_t l1_scale_factor;
-	uint64_t l0_tsc_freq = 0;
-	uint64_t l1_tsc_freq = 0;
-	uint64_t l2_tsc_freq = 0;
+	u64 tsc_start, tsc_end;
+	u64 tsc_khz;
+	u64 l1_scale_factor;
+	u64 l0_tsc_freq = 0;
+	u64 l1_tsc_freq = 0;
+	u64 l2_tsc_freq = 0;
 
-	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX) ||
+		     kvm_cpu_has(X86_FEATURE_SVM));
 	TEST_REQUIRE(kvm_has_cap(KVM_CAP_TSC_CONTROL));
 	TEST_REQUIRE(sys_clocksource_is_based_on_tsc());
 
@@ -152,8 +185,13 @@ int main(int argc, char *argv[])
 	printf("real TSC frequency is around: %"PRIu64"\n", l0_tsc_freq);
 
 	vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
-	vcpu_alloc_vmx(vm, &vmx_pages_gva);
-	vcpu_args_set(vcpu, 1, vmx_pages_gva);
+
+	if (kvm_cpu_has(X86_FEATURE_VMX))
+		vcpu_alloc_vmx(vm, &guest_gva);
+	else
+		vcpu_alloc_svm(vm, &guest_gva);
+
+	vcpu_args_set(vcpu, 1, guest_gva);
 
 	tsc_khz = __vcpu_ioctl(vcpu, KVM_GET_TSC_KHZ, NULL);
 	TEST_ASSERT(tsc_khz != -1, "vcpu ioctl KVM_GET_TSC_KHZ failed");
diff --git a/tools/testing/selftests/kvm/x86/nested_vmsave_vmload_test.c b/tools/testing/selftests/kvm/x86/nested_vmsave_vmload_test.c
new file mode 100644
index 000000000000..85d3f4cc76f3
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/nested_vmsave_vmload_test.c
@@ -0,0 +1,197 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2026, Google LLC.
+ */
+#include "kvm_util.h"
+#include "vmx.h"
+#include "svm_util.h"
+#include "kselftest.h"
+
+/*
+ * Allocate two VMCB pages for testing. Both pages have different GVAs (shared
+ * by both L1 and L2) and L1 GPAs. A single L2 GPA is used such that:
+ * - L2 GPA == L1 GPA for VMCB0.
+ * - L2 GPA is mapped to L1 GPA for VMCB1 using NPT in L1.
+ *
+ * This allows testing whether the GPA used by VMSAVE/VMLOAD in L2 is
+ * interpreted as a direct L1 GPA or translated using NPT as an L2 GPA, depends
+ * on which VMCB is accessed.
+ */
+#define TEST_MEM_SLOT_INDEX		1
+#define TEST_MEM_PAGES			2
+#define TEST_MEM_BASE			0xc0000000
+
+#define TEST_GUEST_ADDR(idx)		(TEST_MEM_BASE + (idx) * PAGE_SIZE)
+
+#define TEST_VMCB_L1_GPA(idx)		TEST_GUEST_ADDR(idx)
+#define TEST_VMCB_GVA(idx)		TEST_GUEST_ADDR(idx)
+
+#define TEST_VMCB_L2_GPA		TEST_VMCB_L1_GPA(0)
+
+#define L2_GUEST_STACK_SIZE		64
+
+static void l2_guest_code_vmsave(void)
+{
+	asm volatile("vmsave %0" : : "a"(TEST_VMCB_L2_GPA) : "memory");
+}
+
+static void l2_guest_code_vmload(void)
+{
+	asm volatile("vmload %0" : : "a"(TEST_VMCB_L2_GPA) : "memory");
+}
+
+static void l2_guest_code_vmcb(int vmcb_idx)
+{
+	wrmsr(MSR_KERNEL_GS_BASE, 0xaaaa);
+	l2_guest_code_vmsave();
+
+	/* Verify the VMCB used by VMSAVE and update KERNEL_GS_BASE to 0xbbbb */
+	GUEST_SYNC(vmcb_idx);
+
+	l2_guest_code_vmload();
+	GUEST_ASSERT_EQ(rdmsr(MSR_KERNEL_GS_BASE), 0xbbbb);
+
+	/* Reset MSR_KERNEL_GS_BASE */
+	wrmsr(MSR_KERNEL_GS_BASE, 0);
+	l2_guest_code_vmsave();
+
+	vmmcall();
+}
+
+static void l2_guest_code_vmcb0(void)
+{
+	l2_guest_code_vmcb(0);
+}
+
+static void l2_guest_code_vmcb1(void)
+{
+	l2_guest_code_vmcb(1);
+}
+
+static void l1_guest_code(struct svm_test_data *svm)
+{
+	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+	/* Each test case initializes the guest RIP below */
+	generic_svm_setup(svm, NULL, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+	/* Set VMSAVE/VMLOAD intercepts and make sure they work with.. */
+	svm->vmcb->control.intercept |= (BIT_ULL(INTERCEPT_VMSAVE) |
+					 BIT_ULL(INTERCEPT_VMLOAD));
+
+	 /* ..SVM_MISC2_ENABLE_V_VMLOAD_VMSAVE cleared.. */
+	svm->vmcb->control.misc_ctl2 &= ~SVM_MISC2_ENABLE_V_VMLOAD_VMSAVE;
+
+	svm->vmcb->save.rip = (u64)l2_guest_code_vmsave;
+	run_guest(svm->vmcb, svm->vmcb_gpa);
+	GUEST_ASSERT_EQ(svm->vmcb->control.exit_code, SVM_EXIT_VMSAVE);
+
+	svm->vmcb->save.rip = (u64)l2_guest_code_vmload;
+	run_guest(svm->vmcb, svm->vmcb_gpa);
+	GUEST_ASSERT_EQ(svm->vmcb->control.exit_code, SVM_EXIT_VMLOAD);
+
+	/* ..and SVM_MISC2_ENABLE_V_VMLOAD_VMSAVE set */
+	svm->vmcb->control.misc_ctl2 |= SVM_MISC2_ENABLE_V_VMLOAD_VMSAVE;
+
+	svm->vmcb->save.rip = (u64)l2_guest_code_vmsave;
+	run_guest(svm->vmcb, svm->vmcb_gpa);
+	GUEST_ASSERT_EQ(svm->vmcb->control.exit_code, SVM_EXIT_VMSAVE);
+
+	svm->vmcb->save.rip = (u64)l2_guest_code_vmload;
+	run_guest(svm->vmcb, svm->vmcb_gpa);
+	GUEST_ASSERT_EQ(svm->vmcb->control.exit_code, SVM_EXIT_VMLOAD);
+
+	/* Now clear the intercepts to test VMSAVE/VMLOAD behavior */
+	svm->vmcb->control.intercept &= ~(BIT_ULL(INTERCEPT_VMSAVE) |
+					  BIT_ULL(INTERCEPT_VMLOAD));
+
+	/*
+	 * Without SVM_MISC2_ENABLE_V_VMLOAD_VMSAVE, the GPA will be
+	 * interpreted as an L1 GPA, so VMCB0 should be used.
+	 */
+	svm->vmcb->save.rip = (u64)l2_guest_code_vmcb0;
+	svm->vmcb->control.misc_ctl2 &= ~SVM_MISC2_ENABLE_V_VMLOAD_VMSAVE;
+	run_guest(svm->vmcb, svm->vmcb_gpa);
+	GUEST_ASSERT_EQ(svm->vmcb->control.exit_code, SVM_EXIT_VMMCALL);
+
+	/*
+	 * With SVM_MISC2_ENABLE_V_VMLOAD_VMSAVE, the GPA will be interpeted as
+	 * an L2 GPA, and translated through the NPT to VMCB1.
+	 */
+	svm->vmcb->save.rip = (u64)l2_guest_code_vmcb1;
+	svm->vmcb->control.misc_ctl2 |= SVM_MISC2_ENABLE_V_VMLOAD_VMSAVE;
+	run_guest(svm->vmcb, svm->vmcb_gpa);
+	GUEST_ASSERT_EQ(svm->vmcb->control.exit_code, SVM_EXIT_VMMCALL);
+
+	GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+	gva_t nested_gva = 0;
+	struct vmcb *test_vmcb[2];
+	struct kvm_vcpu *vcpu;
+	struct kvm_vm *vm;
+	int i;
+
+	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM));
+	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_NPT));
+	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_V_VMSAVE_VMLOAD));
+
+	vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+	vm_enable_tdp(vm);
+
+	vcpu_alloc_svm(vm, &nested_gva);
+	vcpu_args_set(vcpu, 1, nested_gva);
+
+	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+				    TEST_MEM_BASE, TEST_MEM_SLOT_INDEX,
+				    TEST_MEM_PAGES, 0);
+
+	for (i = 0; i <= 1; i++) {
+		virt_map(vm, TEST_VMCB_GVA(i), TEST_VMCB_L1_GPA(i), 1);
+		test_vmcb[i] = (struct vmcb *)addr_gva2hva(vm, TEST_VMCB_GVA(i));
+	}
+
+	tdp_identity_map_default_memslots(vm);
+
+	/*
+	 * L2 GPA == L1_GPA(0), but map it to L1_GPA(1), to allow testing
+	 * whether the L2 GPA is interpreted as an L1 GPA or translated through
+	 * the NPT.
+	 */
+	TEST_ASSERT_EQ(TEST_VMCB_L2_GPA, TEST_VMCB_L1_GPA(0));
+	tdp_map(vm, TEST_VMCB_L2_GPA, TEST_VMCB_L1_GPA(1), PAGE_SIZE);
+
+	for (;;) {
+		struct ucall uc;
+
+		vcpu_run(vcpu);
+		TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+		switch (get_ucall(vcpu, &uc)) {
+		case UCALL_ABORT:
+			REPORT_GUEST_ASSERT(uc);
+		case UCALL_SYNC:
+			i = uc.args[1];
+			TEST_ASSERT(i == 0 || i == 1, "Unexpected VMCB idx: %d", i);
+
+			/*
+			 * Check that only the expected VMCB has KERNEL_GS_BASE
+			 * set to 0xaaaa, and update it to 0xbbbb.
+			 */
+			TEST_ASSERT_EQ(test_vmcb[i]->save.kernel_gs_base, 0xaaaa);
+			TEST_ASSERT_EQ(test_vmcb[1-i]->save.kernel_gs_base, 0);
+			test_vmcb[i]->save.kernel_gs_base = 0xbbbb;
+			break;
+		case UCALL_DONE:
+			goto done;
+		default:
+			TEST_FAIL("Unknown ucall %lu", uc.cmd);
+		}
+	}
+
+done:
+	kvm_vm_free(vm);
+	return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/nx_huge_pages_test.c b/tools/testing/selftests/kvm/x86/nx_huge_pages_test.c
index e7efb2b35f8b..70950067b989 100644
--- a/tools/testing/selftests/kvm/x86/nx_huge_pages_test.c
+++ b/tools/testing/selftests/kvm/x86/nx_huge_pages_test.c
@@ -32,7 +32,7 @@
 #define RETURN_OPCODE 0xC3
 
 /* Call the specified memory address. */
-static void guest_do_CALL(uint64_t target)
+static void guest_do_CALL(u64 target)
 {
 	((void (*)(void)) target)();
 }
@@ -46,14 +46,14 @@ static void guest_do_CALL(uint64_t target)
  */
 void guest_code(void)
 {
-	uint64_t hpage_1 = HPAGE_GVA;
-	uint64_t hpage_2 = hpage_1 + (PAGE_SIZE * 512);
-	uint64_t hpage_3 = hpage_2 + (PAGE_SIZE * 512);
+	u64 hpage_1 = HPAGE_GVA;
+	u64 hpage_2 = hpage_1 + (PAGE_SIZE * 512);
+	u64 hpage_3 = hpage_2 + (PAGE_SIZE * 512);
 
-	READ_ONCE(*(uint64_t *)hpage_1);
+	READ_ONCE(*(u64 *)hpage_1);
 	GUEST_SYNC(1);
 
-	READ_ONCE(*(uint64_t *)hpage_2);
+	READ_ONCE(*(u64 *)hpage_2);
 	GUEST_SYNC(2);
 
 	guest_do_CALL(hpage_1);
@@ -62,10 +62,10 @@ void guest_code(void)
 	guest_do_CALL(hpage_3);
 	GUEST_SYNC(4);
 
-	READ_ONCE(*(uint64_t *)hpage_1);
+	READ_ONCE(*(u64 *)hpage_1);
 	GUEST_SYNC(5);
 
-	READ_ONCE(*(uint64_t *)hpage_3);
+	READ_ONCE(*(u64 *)hpage_3);
 	GUEST_SYNC(6);
 }
 
@@ -73,7 +73,7 @@ static void check_2m_page_count(struct kvm_vm *vm, int expected_pages_2m)
 {
 	int actual_pages_2m;
 
-	actual_pages_2m = vm_get_stat(vm, "pages_2m");
+	actual_pages_2m = vm_get_stat(vm, pages_2m);
 
 	TEST_ASSERT(actual_pages_2m == expected_pages_2m,
 		    "Unexpected 2m page count. Expected %d, got %d",
@@ -84,7 +84,7 @@ static void check_split_count(struct kvm_vm *vm, int expected_splits)
 {
 	int actual_splits;
 
-	actual_splits = vm_get_stat(vm, "nx_lpage_splits");
+	actual_splits = vm_get_stat(vm, nx_lpage_splits);
 
 	TEST_ASSERT(actual_splits == expected_splits,
 		    "Unexpected NX huge page split count. Expected %d, got %d",
@@ -107,7 +107,7 @@ void run_test(int reclaim_period_ms, bool disable_nx_huge_pages,
 {
 	struct kvm_vcpu *vcpu;
 	struct kvm_vm *vm;
-	uint64_t nr_bytes;
+	u64 nr_bytes;
 	void *hva;
 	int r;
 
diff --git a/tools/testing/selftests/kvm/x86/platform_info_test.c b/tools/testing/selftests/kvm/x86/platform_info_test.c
index 9cbf283ebc55..80bb07e6531c 100644
--- a/tools/testing/selftests/kvm/x86/platform_info_test.c
+++ b/tools/testing/selftests/kvm/x86/platform_info_test.c
@@ -23,8 +23,8 @@
 
 static void guest_code(void)
 {
-	uint64_t msr_platform_info;
-	uint8_t vector;
+	u64 msr_platform_info;
+	u8 vector;
 
 	GUEST_SYNC(true);
 	msr_platform_info = rdmsr(MSR_PLATFORM_INFO);
@@ -42,7 +42,7 @@ int main(int argc, char *argv[])
 {
 	struct kvm_vcpu *vcpu;
 	struct kvm_vm *vm;
-	uint64_t msr_platform_info;
+	u64 msr_platform_info;
 	struct ucall uc;
 
 	TEST_REQUIRE(kvm_has_cap(KVM_CAP_MSR_PLATFORM_INFO));
diff --git a/tools/testing/selftests/kvm/x86/pmu_counters_test.c b/tools/testing/selftests/kvm/x86/pmu_counters_test.c
index 698cb36989db..dc6afac3aa91 100644
--- a/tools/testing/selftests/kvm/x86/pmu_counters_test.c
+++ b/tools/testing/selftests/kvm/x86/pmu_counters_test.c
@@ -14,10 +14,10 @@
 #define NUM_BRANCH_INSNS_RETIRED	(NUM_LOOPS)
 
 /*
- * Number of instructions in each loop. 1 CLFLUSH/CLFLUSHOPT/NOP, 1 MFENCE,
- * 1 LOOP.
+ * Number of instructions in each loop. 1 ENTER, 1 CLFLUSH/CLFLUSHOPT/NOP,
+ * 1 MFENCE, 1 MOV, 1 LEAVE, 1 LOOP.
  */
-#define NUM_INSNS_PER_LOOP		3
+#define NUM_INSNS_PER_LOOP		6
 
 /*
  * Number of "extra" instructions that will be counted, i.e. the number of
@@ -29,19 +29,74 @@
 /* Total number of instructions retired within the measured section. */
 #define NUM_INSNS_RETIRED		(NUM_LOOPS * NUM_INSNS_PER_LOOP + NUM_EXTRA_INSNS)
 
+/* Track which architectural events are supported by hardware. */
+static u32 hardware_pmu_arch_events;
 
-static uint8_t kvm_pmu_version;
+static u8 kvm_pmu_version;
 static bool kvm_has_perf_caps;
 
+#define X86_PMU_FEATURE_NULL						\
+({									\
+	struct kvm_x86_pmu_feature feature = {};			\
+									\
+	feature;							\
+})
+
+static bool pmu_is_null_feature(struct kvm_x86_pmu_feature event)
+{
+	return !(*(u64 *)&event);
+}
+
+struct kvm_intel_pmu_event {
+	struct kvm_x86_pmu_feature gp_event;
+	struct kvm_x86_pmu_feature fixed_event;
+};
+
+/*
+ * Wrap the array to appease the compiler, as the macros used to construct each
+ * kvm_x86_pmu_feature use syntax that's only valid in function scope, and the
+ * compiler often thinks the feature definitions aren't compile-time constants.
+ */
+static struct kvm_intel_pmu_event intel_event_to_feature(u8 idx)
+{
+	const struct kvm_intel_pmu_event __intel_event_to_feature[] = {
+		[INTEL_ARCH_CPU_CYCLES_INDEX]		 = { X86_PMU_FEATURE_CPU_CYCLES, X86_PMU_FEATURE_CPU_CYCLES_FIXED },
+		[INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX]	 = { X86_PMU_FEATURE_INSNS_RETIRED, X86_PMU_FEATURE_INSNS_RETIRED_FIXED },
+		/*
+		 * Note, the fixed counter for reference cycles is NOT the same as the
+		 * general purpose architectural event.  The fixed counter explicitly
+		 * counts at the same frequency as the TSC, whereas the GP event counts
+		 * at a fixed, but uarch specific, frequency.  Bundle them here for
+		 * simplicity.
+		 */
+		[INTEL_ARCH_REFERENCE_CYCLES_INDEX]	 = { X86_PMU_FEATURE_REFERENCE_CYCLES, X86_PMU_FEATURE_REFERENCE_TSC_CYCLES_FIXED },
+		[INTEL_ARCH_LLC_REFERENCES_INDEX]	 = { X86_PMU_FEATURE_LLC_REFERENCES, X86_PMU_FEATURE_NULL },
+		[INTEL_ARCH_LLC_MISSES_INDEX]		 = { X86_PMU_FEATURE_LLC_MISSES, X86_PMU_FEATURE_NULL },
+		[INTEL_ARCH_BRANCHES_RETIRED_INDEX]	 = { X86_PMU_FEATURE_BRANCH_INSNS_RETIRED, X86_PMU_FEATURE_NULL },
+		[INTEL_ARCH_BRANCHES_MISPREDICTED_INDEX] = { X86_PMU_FEATURE_BRANCHES_MISPREDICTED, X86_PMU_FEATURE_NULL },
+		[INTEL_ARCH_TOPDOWN_SLOTS_INDEX]	 = { X86_PMU_FEATURE_TOPDOWN_SLOTS, X86_PMU_FEATURE_TOPDOWN_SLOTS_FIXED },
+		[INTEL_ARCH_TOPDOWN_BE_BOUND_INDEX]	 = { X86_PMU_FEATURE_TOPDOWN_BE_BOUND, X86_PMU_FEATURE_NULL },
+		[INTEL_ARCH_TOPDOWN_BAD_SPEC_INDEX]	 = { X86_PMU_FEATURE_TOPDOWN_BAD_SPEC, X86_PMU_FEATURE_NULL },
+		[INTEL_ARCH_TOPDOWN_FE_BOUND_INDEX]	 = { X86_PMU_FEATURE_TOPDOWN_FE_BOUND, X86_PMU_FEATURE_NULL },
+		[INTEL_ARCH_TOPDOWN_RETIRING_INDEX]	 = { X86_PMU_FEATURE_TOPDOWN_RETIRING, X86_PMU_FEATURE_NULL },
+		[INTEL_ARCH_LBR_INSERTS_INDEX]		 = { X86_PMU_FEATURE_LBR_INSERTS, X86_PMU_FEATURE_NULL },
+	};
+
+	kvm_static_assert(ARRAY_SIZE(__intel_event_to_feature) == NR_INTEL_ARCH_EVENTS);
+
+	return __intel_event_to_feature[idx];
+}
+
 static struct kvm_vm *pmu_vm_create_with_one_vcpu(struct kvm_vcpu **vcpu,
 						  void *guest_code,
-						  uint8_t pmu_version,
-						  uint64_t perf_capabilities)
+						  u8 pmu_version,
+						  u64 perf_capabilities)
 {
 	struct kvm_vm *vm;
 
 	vm = vm_create_with_one_vcpu(vcpu, guest_code);
 	sync_global_to_guest(vm, kvm_pmu_version);
+	sync_global_to_guest(vm, hardware_pmu_arch_events);
 
 	/*
 	 * Set PERF_CAPABILITIES before PMU version as KVM disallows enabling
@@ -77,7 +132,7 @@ static void run_vcpu(struct kvm_vcpu *vcpu)
 	} while (uc.cmd != UCALL_DONE);
 }
 
-static uint8_t guest_get_pmu_version(void)
+static u8 guest_get_pmu_version(void)
 {
 	/*
 	 * Return the effective PMU version, i.e. the minimum between what KVM
@@ -86,7 +141,7 @@ static uint8_t guest_get_pmu_version(void)
 	 * supported by KVM to verify KVM doesn't freak out and do something
 	 * bizarre with an architecturally valid, but unsupported, version.
 	 */
-	return min_t(uint8_t, kvm_pmu_version, this_cpu_property(X86_PROPERTY_PMU_VERSION));
+	return min_t(u8, kvm_pmu_version, this_cpu_property(X86_PROPERTY_PMU_VERSION));
 }
 
 /*
@@ -98,22 +153,28 @@ static uint8_t guest_get_pmu_version(void)
  * Sanity check that in all cases, the event doesn't count when it's disabled,
  * and that KVM correctly emulates the write of an arbitrary value.
  */
-static void guest_assert_event_count(uint8_t idx,
-				     struct kvm_x86_pmu_feature event,
-				     uint32_t pmc, uint32_t pmc_msr)
+static void guest_assert_event_count(u8 idx, u32 pmc, u32 pmc_msr)
 {
-	uint64_t count;
+	u64 count;
 
 	count = _rdpmc(pmc);
-	if (!this_pmu_has(event))
+	if (!(hardware_pmu_arch_events & BIT(idx)))
 		goto sanity_checks;
 
 	switch (idx) {
 	case INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX:
-		GUEST_ASSERT_EQ(count, NUM_INSNS_RETIRED);
+		/* Relax precise count check due to VM-EXIT/VM-ENTRY overcount issue */
+		if (this_pmu_has_errata(INSTRUCTIONS_RETIRED_OVERCOUNT))
+			GUEST_ASSERT(count >= NUM_INSNS_RETIRED);
+		else
+			GUEST_ASSERT_EQ(count, NUM_INSNS_RETIRED);
 		break;
 	case INTEL_ARCH_BRANCHES_RETIRED_INDEX:
-		GUEST_ASSERT_EQ(count, NUM_BRANCH_INSNS_RETIRED);
+		/* Relax precise count check due to VM-EXIT/VM-ENTRY overcount issue */
+		if (this_pmu_has_errata(BRANCHES_RETIRED_OVERCOUNT))
+			GUEST_ASSERT(count >= NUM_BRANCH_INSNS_RETIRED);
+		else
+			GUEST_ASSERT_EQ(count, NUM_BRANCH_INSNS_RETIRED);
 		break;
 	case INTEL_ARCH_LLC_REFERENCES_INDEX:
 	case INTEL_ARCH_LLC_MISSES_INDEX:
@@ -123,10 +184,15 @@ static void guest_assert_event_count(uint8_t idx,
 		fallthrough;
 	case INTEL_ARCH_CPU_CYCLES_INDEX:
 	case INTEL_ARCH_REFERENCE_CYCLES_INDEX:
+	case INTEL_ARCH_TOPDOWN_BE_BOUND_INDEX:
+	case INTEL_ARCH_TOPDOWN_FE_BOUND_INDEX:
 		GUEST_ASSERT_NE(count, 0);
 		break;
 	case INTEL_ARCH_TOPDOWN_SLOTS_INDEX:
-		GUEST_ASSERT(count >= NUM_INSNS_RETIRED);
+	case INTEL_ARCH_TOPDOWN_RETIRING_INDEX:
+		__GUEST_ASSERT(count >= NUM_INSNS_RETIRED,
+			       "Expected top-down slots >= %u, got count = %lu",
+			       NUM_INSNS_RETIRED, count);
 		break;
 	default:
 		break;
@@ -160,83 +226,52 @@ do {										\
 	__asm__ __volatile__("wrmsr\n\t"					\
 			     " mov $" __stringify(NUM_LOOPS) ", %%ecx\n\t"	\
 			     "1:\n\t"						\
+			     FEP "enter $0, $0\n\t"				\
 			     clflush "\n\t"					\
 			     "mfence\n\t"					\
+			     "mov %[m], %%eax\n\t"				\
+			     FEP "leave\n\t"					\
 			     FEP "loop 1b\n\t"					\
 			     FEP "mov %%edi, %%ecx\n\t"				\
 			     FEP "xor %%eax, %%eax\n\t"				\
 			     FEP "xor %%edx, %%edx\n\t"				\
 			     "wrmsr\n\t"					\
-			     :: "a"((uint32_t)_value), "d"(_value >> 32),	\
-				"c"(_msr), "D"(_msr)				\
+			     :: "a"((u32)_value), "d"(_value >> 32),	\
+				"c"(_msr), "D"(_msr), [m]"m"(kvm_pmu_version)	\
 	);									\
 } while (0)
 
-#define GUEST_TEST_EVENT(_idx, _event, _pmc, _pmc_msr, _ctrl_msr, _value, FEP)	\
+#define GUEST_TEST_EVENT(_idx, _pmc, _pmc_msr, _ctrl_msr, _value, FEP)		\
 do {										\
-	wrmsr(pmc_msr, 0);							\
+	wrmsr(_pmc_msr, 0);							\
 										\
 	if (this_cpu_has(X86_FEATURE_CLFLUSHOPT))				\
-		GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflushopt .", FEP);	\
+		GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflushopt %[m]", FEP);	\
 	else if (this_cpu_has(X86_FEATURE_CLFLUSH))				\
-		GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflush .", FEP);	\
+		GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflush  %[m]", FEP);	\
 	else									\
 		GUEST_MEASURE_EVENT(_ctrl_msr, _value, "nop", FEP);		\
 										\
-	guest_assert_event_count(_idx, _event, _pmc, _pmc_msr);			\
+	guest_assert_event_count(_idx, _pmc, _pmc_msr);				\
 } while (0)
 
-static void __guest_test_arch_event(uint8_t idx, struct kvm_x86_pmu_feature event,
-				    uint32_t pmc, uint32_t pmc_msr,
-				    uint32_t ctrl_msr, uint64_t ctrl_msr_value)
+static void __guest_test_arch_event(u8 idx, u32 pmc, u32 pmc_msr,
+				    u32 ctrl_msr, u64 ctrl_msr_value)
 {
-	GUEST_TEST_EVENT(idx, event, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, "");
+	GUEST_TEST_EVENT(idx, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, "");
 
 	if (is_forced_emulation_enabled)
-		GUEST_TEST_EVENT(idx, event, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, KVM_FEP);
-}
-
-#define X86_PMU_FEATURE_NULL						\
-({									\
-	struct kvm_x86_pmu_feature feature = {};			\
-									\
-	feature;							\
-})
-
-static bool pmu_is_null_feature(struct kvm_x86_pmu_feature event)
-{
-	return !(*(u64 *)&event);
+		GUEST_TEST_EVENT(idx, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, KVM_FEP);
 }
 
-static void guest_test_arch_event(uint8_t idx)
+static void guest_test_arch_event(u8 idx)
 {
-	const struct {
-		struct kvm_x86_pmu_feature gp_event;
-		struct kvm_x86_pmu_feature fixed_event;
-	} intel_event_to_feature[] = {
-		[INTEL_ARCH_CPU_CYCLES_INDEX]		 = { X86_PMU_FEATURE_CPU_CYCLES, X86_PMU_FEATURE_CPU_CYCLES_FIXED },
-		[INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX]	 = { X86_PMU_FEATURE_INSNS_RETIRED, X86_PMU_FEATURE_INSNS_RETIRED_FIXED },
-		/*
-		 * Note, the fixed counter for reference cycles is NOT the same
-		 * as the general purpose architectural event.  The fixed counter
-		 * explicitly counts at the same frequency as the TSC, whereas
-		 * the GP event counts at a fixed, but uarch specific, frequency.
-		 * Bundle them here for simplicity.
-		 */
-		[INTEL_ARCH_REFERENCE_CYCLES_INDEX]	 = { X86_PMU_FEATURE_REFERENCE_CYCLES, X86_PMU_FEATURE_REFERENCE_TSC_CYCLES_FIXED },
-		[INTEL_ARCH_LLC_REFERENCES_INDEX]	 = { X86_PMU_FEATURE_LLC_REFERENCES, X86_PMU_FEATURE_NULL },
-		[INTEL_ARCH_LLC_MISSES_INDEX]		 = { X86_PMU_FEATURE_LLC_MISSES, X86_PMU_FEATURE_NULL },
-		[INTEL_ARCH_BRANCHES_RETIRED_INDEX]	 = { X86_PMU_FEATURE_BRANCH_INSNS_RETIRED, X86_PMU_FEATURE_NULL },
-		[INTEL_ARCH_BRANCHES_MISPREDICTED_INDEX] = { X86_PMU_FEATURE_BRANCHES_MISPREDICTED, X86_PMU_FEATURE_NULL },
-		[INTEL_ARCH_TOPDOWN_SLOTS_INDEX]	 = { X86_PMU_FEATURE_TOPDOWN_SLOTS, X86_PMU_FEATURE_TOPDOWN_SLOTS_FIXED },
-	};
-
-	uint32_t nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
-	uint32_t pmu_version = guest_get_pmu_version();
+	u32 nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
+	u32 pmu_version = guest_get_pmu_version();
 	/* PERF_GLOBAL_CTRL exists only for Architectural PMU Version 2+. */
 	bool guest_has_perf_global_ctrl = pmu_version >= 2;
 	struct kvm_x86_pmu_feature gp_event, fixed_event;
-	uint32_t base_pmc_msr;
+	u32 base_pmc_msr;
 	unsigned int i;
 
 	/* The host side shouldn't invoke this without a guest PMU. */
@@ -248,13 +283,13 @@ static void guest_test_arch_event(uint8_t idx)
 	else
 		base_pmc_msr = MSR_IA32_PERFCTR0;
 
-	gp_event = intel_event_to_feature[idx].gp_event;
+	gp_event = intel_event_to_feature(idx).gp_event;
 	GUEST_ASSERT_EQ(idx, gp_event.f.bit);
 
 	GUEST_ASSERT(nr_gp_counters);
 
 	for (i = 0; i < nr_gp_counters; i++) {
-		uint64_t eventsel = ARCH_PERFMON_EVENTSEL_OS |
+		u64 eventsel = ARCH_PERFMON_EVENTSEL_OS |
 				    ARCH_PERFMON_EVENTSEL_ENABLE |
 				    intel_pmu_arch_events[idx];
 
@@ -262,14 +297,14 @@ static void guest_test_arch_event(uint8_t idx)
 		if (guest_has_perf_global_ctrl)
 			wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, BIT_ULL(i));
 
-		__guest_test_arch_event(idx, gp_event, i, base_pmc_msr + i,
+		__guest_test_arch_event(idx, i, base_pmc_msr + i,
 					MSR_P6_EVNTSEL0 + i, eventsel);
 	}
 
 	if (!guest_has_perf_global_ctrl)
 		return;
 
-	fixed_event = intel_event_to_feature[idx].fixed_event;
+	fixed_event = intel_event_to_feature(idx).fixed_event;
 	if (pmu_is_null_feature(fixed_event) || !this_pmu_has(fixed_event))
 		return;
 
@@ -277,7 +312,7 @@ static void guest_test_arch_event(uint8_t idx)
 
 	wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL));
 
-	__guest_test_arch_event(idx, fixed_event, i | INTEL_RDPMC_FIXED,
+	__guest_test_arch_event(idx, i | INTEL_RDPMC_FIXED,
 				MSR_CORE_PERF_FIXED_CTR0 + i,
 				MSR_CORE_PERF_GLOBAL_CTRL,
 				FIXED_PMC_GLOBAL_CTRL_ENABLE(i));
@@ -285,7 +320,7 @@ static void guest_test_arch_event(uint8_t idx)
 
 static void guest_test_arch_events(void)
 {
-	uint8_t i;
+	u8 i;
 
 	for (i = 0; i < NR_INTEL_ARCH_EVENTS; i++)
 		guest_test_arch_event(i);
@@ -293,8 +328,8 @@ static void guest_test_arch_events(void)
 	GUEST_DONE();
 }
 
-static void test_arch_events(uint8_t pmu_version, uint64_t perf_capabilities,
-			     uint8_t length, uint8_t unavailable_mask)
+static void test_arch_events(u8 pmu_version, u64 perf_capabilities,
+			     u8 length, u32 unavailable_mask)
 {
 	struct kvm_vcpu *vcpu;
 	struct kvm_vm *vm;
@@ -303,6 +338,9 @@ static void test_arch_events(uint8_t pmu_version, uint64_t perf_capabilities,
 	if (!pmu_version)
 		return;
 
+	unavailable_mask &= GENMASK(X86_PROPERTY_PMU_EVENTS_MASK.hi_bit,
+				    X86_PROPERTY_PMU_EVENTS_MASK.lo_bit);
+
 	vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_arch_events,
 					 pmu_version, perf_capabilities);
 
@@ -327,19 +365,19 @@ static void test_arch_events(uint8_t pmu_version, uint64_t perf_capabilities,
 
 #define GUEST_ASSERT_PMC_MSR_ACCESS(insn, msr, expect_gp, vector)		\
 __GUEST_ASSERT(expect_gp ? vector == GP_VECTOR : !vector,			\
-	       "Expected %s on " #insn "(0x%x), got vector %u",			\
-	       expect_gp ? "#GP" : "no fault", msr, vector)			\
+	       "Expected %s on " #insn "(0x%x), got %s",			\
+	       expect_gp ? "#GP" : "no fault", msr, ex_str(vector))		\
 
 #define GUEST_ASSERT_PMC_VALUE(insn, msr, val, expected)			\
-	__GUEST_ASSERT(val == expected_val,					\
+	__GUEST_ASSERT(val == expected,					\
 		       "Expected " #insn "(0x%x) to yield 0x%lx, got 0x%lx",	\
-		       msr, expected_val, val);
+		       msr, expected, val);
 
-static void guest_test_rdpmc(uint32_t rdpmc_idx, bool expect_success,
-			     uint64_t expected_val)
+static void guest_test_rdpmc(u32 rdpmc_idx, bool expect_success,
+			     u64 expected_val)
 {
-	uint8_t vector;
-	uint64_t val;
+	u8 vector;
+	u64 val;
 
 	vector = rdpmc_safe(rdpmc_idx, &val);
 	GUEST_ASSERT_PMC_MSR_ACCESS(RDPMC, rdpmc_idx, !expect_success, vector);
@@ -355,19 +393,19 @@ static void guest_test_rdpmc(uint32_t rdpmc_idx, bool expect_success,
 		GUEST_ASSERT_PMC_VALUE(RDPMC, rdpmc_idx, val, expected_val);
 }
 
-static void guest_rd_wr_counters(uint32_t base_msr, uint8_t nr_possible_counters,
-				 uint8_t nr_counters, uint32_t or_mask)
+static void guest_rd_wr_counters(u32 base_msr, u8 nr_possible_counters,
+				 u8 nr_counters, u32 or_mask)
 {
 	const bool pmu_has_fast_mode = !guest_get_pmu_version();
-	uint8_t i;
+	u8 i;
 
 	for (i = 0; i < nr_possible_counters; i++) {
 		/*
 		 * TODO: Test a value that validates full-width writes and the
 		 * width of the counters.
 		 */
-		const uint64_t test_val = 0xffff;
-		const uint32_t msr = base_msr + i;
+		const u64 test_val = 0xffff;
+		const u32 msr = base_msr + i;
 
 		/*
 		 * Fixed counters are supported if the counter is less than the
@@ -380,12 +418,12 @@ static void guest_rd_wr_counters(uint32_t base_msr, uint8_t nr_possible_counters
 		 * KVM drops writes to MSR_P6_PERFCTR[0|1] if the counters are
 		 * unsupported, i.e. doesn't #GP and reads back '0'.
 		 */
-		const uint64_t expected_val = expect_success ? test_val : 0;
+		const u64 expected_val = expect_success ? test_val : 0;
 		const bool expect_gp = !expect_success && msr != MSR_P6_PERFCTR0 &&
 				       msr != MSR_P6_PERFCTR1;
-		uint32_t rdpmc_idx;
-		uint8_t vector;
-		uint64_t val;
+		u32 rdpmc_idx;
+		u8 vector;
+		u64 val;
 
 		vector = wrmsr_safe(msr, test_val);
 		GUEST_ASSERT_PMC_MSR_ACCESS(WRMSR, msr, expect_gp, vector);
@@ -423,9 +461,9 @@ static void guest_rd_wr_counters(uint32_t base_msr, uint8_t nr_possible_counters
 
 static void guest_test_gp_counters(void)
 {
-	uint8_t pmu_version = guest_get_pmu_version();
-	uint8_t nr_gp_counters = 0;
-	uint32_t base_msr;
+	u8 pmu_version = guest_get_pmu_version();
+	u8 nr_gp_counters = 0;
+	u32 base_msr;
 
 	if (pmu_version)
 		nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
@@ -439,7 +477,7 @@ static void guest_test_gp_counters(void)
 	 * counters, of which there are none.
 	 */
 	if (pmu_version > 1) {
-		uint64_t global_ctrl = rdmsr(MSR_CORE_PERF_GLOBAL_CTRL);
+		u64 global_ctrl = rdmsr(MSR_CORE_PERF_GLOBAL_CTRL);
 
 		if (nr_gp_counters)
 			GUEST_ASSERT_EQ(global_ctrl, GENMASK_ULL(nr_gp_counters - 1, 0));
@@ -457,8 +495,8 @@ static void guest_test_gp_counters(void)
 	GUEST_DONE();
 }
 
-static void test_gp_counters(uint8_t pmu_version, uint64_t perf_capabilities,
-			     uint8_t nr_gp_counters)
+static void test_gp_counters(u8 pmu_version, u64 perf_capabilities,
+			     u8 nr_gp_counters)
 {
 	struct kvm_vcpu *vcpu;
 	struct kvm_vm *vm;
@@ -476,9 +514,9 @@ static void test_gp_counters(uint8_t pmu_version, uint64_t perf_capabilities,
 
 static void guest_test_fixed_counters(void)
 {
-	uint64_t supported_bitmask = 0;
-	uint8_t nr_fixed_counters = 0;
-	uint8_t i;
+	u64 supported_bitmask = 0;
+	u8 nr_fixed_counters = 0;
+	u8 i;
 
 	/* Fixed counters require Architectural vPMU Version 2+. */
 	if (guest_get_pmu_version() >= 2)
@@ -495,8 +533,8 @@ static void guest_test_fixed_counters(void)
 			     nr_fixed_counters, supported_bitmask);
 
 	for (i = 0; i < MAX_NR_FIXED_COUNTERS; i++) {
-		uint8_t vector;
-		uint64_t val;
+		u8 vector;
+		u64 val;
 
 		if (i >= nr_fixed_counters && !(supported_bitmask & BIT_ULL(i))) {
 			vector = wrmsr_safe(MSR_CORE_PERF_FIXED_CTR_CTRL,
@@ -523,9 +561,8 @@ static void guest_test_fixed_counters(void)
 	GUEST_DONE();
 }
 
-static void test_fixed_counters(uint8_t pmu_version, uint64_t perf_capabilities,
-				uint8_t nr_fixed_counters,
-				uint32_t supported_bitmask)
+static void test_fixed_counters(u8 pmu_version, u64 perf_capabilities,
+				u8 nr_fixed_counters, u32 supported_bitmask)
 {
 	struct kvm_vcpu *vcpu;
 	struct kvm_vm *vm;
@@ -545,40 +582,67 @@ static void test_fixed_counters(uint8_t pmu_version, uint64_t perf_capabilities,
 
 static void test_intel_counters(void)
 {
-	uint8_t nr_arch_events = kvm_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH);
-	uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
-	uint8_t nr_gp_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
-	uint8_t pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION);
+	u8 nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
+	u8 nr_gp_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
+	u8 pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION);
 	unsigned int i;
-	uint8_t v, j;
-	uint32_t k;
+	u8 v, j;
+	u32 k;
 
-	const uint64_t perf_caps[] = {
+	const u64 perf_caps[] = {
 		0,
 		PMU_CAP_FW_WRITES,
 	};
 
 	/*
+	 * To keep the total runtime reasonable, test only a handful of select,
+	 * semi-arbitrary values for the mask of unavailable PMU events.  Test
+	 * 0 (all events available) and all ones (no events available) as well
+	 * as alternating bit sequencues, e.g. to detect if KVM is checking the
+	 * wrong bit(s).
+	 */
+	const u32 unavailable_masks[] = {
+		0x0,
+		0xffffffffu,
+		0xaaaaaaaau,
+		0x55555555u,
+		0xf0f0f0f0u,
+		0x0f0f0f0fu,
+		0xa0a0a0a0u,
+		0x0a0a0a0au,
+		0x50505050u,
+		0x05050505u,
+	};
+
+	/*
 	 * Test up to PMU v5, which is the current maximum version defined by
 	 * Intel, i.e. is the last version that is guaranteed to be backwards
 	 * compatible with KVM's existing behavior.
 	 */
-	uint8_t max_pmu_version = max_t(typeof(pmu_version), pmu_version, 5);
+	u8 max_pmu_version = max_t(typeof(pmu_version), pmu_version, 5);
 
 	/*
 	 * Detect the existence of events that aren't supported by selftests.
-	 * This will (obviously) fail any time the kernel adds support for a
-	 * new event, but it's worth paying that price to keep the test fresh.
+	 * This will (obviously) fail any time hardware adds support for a new
+	 * event, but it's worth paying that price to keep the test fresh.
 	 */
-	TEST_ASSERT(nr_arch_events <= NR_INTEL_ARCH_EVENTS,
+	TEST_ASSERT(this_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH) <= NR_INTEL_ARCH_EVENTS,
 		    "New architectural event(s) detected; please update this test (length = %u, mask = %x)",
-		    nr_arch_events, kvm_cpu_property(X86_PROPERTY_PMU_EVENTS_MASK));
+		    this_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH),
+		    this_cpu_property(X86_PROPERTY_PMU_EVENTS_MASK));
 
 	/*
-	 * Force iterating over known arch events regardless of whether or not
-	 * KVM/hardware supports a given event.
+	 * Iterate over known arch events irrespective of KVM/hardware support
+	 * to verify that KVM doesn't reject programming of events just because
+	 * the *architectural* encoding is unsupported.  Track which events are
+	 * supported in hardware; the guest side will validate supported events
+	 * count correctly, even if *enumeration* of the event is unsupported
+	 * by KVM and/or isn't exposed to the guest.
 	 */
-	nr_arch_events = max_t(typeof(nr_arch_events), nr_arch_events, NR_INTEL_ARCH_EVENTS);
+	for (i = 0; i < NR_INTEL_ARCH_EVENTS; i++) {
+		if (this_pmu_has(intel_event_to_feature(i).gp_event))
+			hardware_pmu_arch_events |= BIT(i);
+	}
 
 	for (v = 0; v <= max_pmu_version; v++) {
 		for (i = 0; i < ARRAY_SIZE(perf_caps); i++) {
@@ -587,16 +651,7 @@ static void test_intel_counters(void)
 
 			pr_info("Testing arch events, PMU version %u, perf_caps = %lx\n",
 				v, perf_caps[i]);
-			/*
-			 * To keep the total runtime reasonable, test every
-			 * possible non-zero, non-reserved bitmap combination
-			 * only with the native PMU version and the full bit
-			 * vector length.
-			 */
-			if (v == pmu_version) {
-				for (k = 1; k < (BIT(nr_arch_events) - 1); k++)
-					test_arch_events(v, perf_caps[i], nr_arch_events, k);
-			}
+
 			/*
 			 * Test single bits for all PMU version and lengths up
 			 * the number of events +1 (to verify KVM doesn't do
@@ -604,12 +659,9 @@ static void test_intel_counters(void)
 			 * host length).  Explicitly test a mask of '0' and all
 			 * ones i.e. all events being available and unavailable.
 			 */
-			for (j = 0; j <= nr_arch_events + 1; j++) {
-				test_arch_events(v, perf_caps[i], j, 0);
-				test_arch_events(v, perf_caps[i], j, 0xff);
-
-				for (k = 0; k < nr_arch_events; k++)
-					test_arch_events(v, perf_caps[i], j, BIT(k));
+			for (j = 0; j <= NR_INTEL_ARCH_EVENTS + 1; j++) {
+				for (k = 1; k < ARRAY_SIZE(unavailable_masks); k++)
+					test_arch_events(v, perf_caps[i], j, unavailable_masks[k]);
 			}
 
 			pr_info("Testing GP counters, PMU version %u, perf_caps = %lx\n",
diff --git a/tools/testing/selftests/kvm/x86/pmu_event_filter_test.c b/tools/testing/selftests/kvm/x86/pmu_event_filter_test.c
index c15513cd74d1..c1232344fda8 100644
--- a/tools/testing/selftests/kvm/x86/pmu_event_filter_test.c
+++ b/tools/testing/selftests/kvm/x86/pmu_event_filter_test.c
@@ -53,11 +53,11 @@ static const struct __kvm_pmu_event_filter base_event_filter = {
 };
 
 struct {
-	uint64_t loads;
-	uint64_t stores;
-	uint64_t loads_stores;
-	uint64_t branches_retired;
-	uint64_t instructions_retired;
+	u64 loads;
+	u64 stores;
+	u64 loads_stores;
+	u64 branches_retired;
+	u64 instructions_retired;
 } pmc_results;
 
 /*
@@ -75,9 +75,9 @@ static void guest_gp_handler(struct ex_regs *regs)
  *
  * Return on success. GUEST_SYNC(0) on error.
  */
-static void check_msr(uint32_t msr, uint64_t bits_to_flip)
+static void check_msr(u32 msr, u64 bits_to_flip)
 {
-	uint64_t v = rdmsr(msr) ^ bits_to_flip;
+	u64 v = rdmsr(msr) ^ bits_to_flip;
 
 	wrmsr(msr, v);
 	if (rdmsr(msr) != v)
@@ -89,10 +89,10 @@ static void check_msr(uint32_t msr, uint64_t bits_to_flip)
 		GUEST_SYNC(-EIO);
 }
 
-static void run_and_measure_loop(uint32_t msr_base)
+static void run_and_measure_loop(u32 msr_base)
 {
-	const uint64_t branches_retired = rdmsr(msr_base + 0);
-	const uint64_t insn_retired = rdmsr(msr_base + 1);
+	const u64 branches_retired = rdmsr(msr_base + 0);
+	const u64 insn_retired = rdmsr(msr_base + 1);
 
 	__asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
 
@@ -147,7 +147,7 @@ static void amd_guest_code(void)
  * Run the VM to the next GUEST_SYNC(value), and return the value passed
  * to the sync. Any other exit from the guest is fatal.
  */
-static uint64_t run_vcpu_to_sync(struct kvm_vcpu *vcpu)
+static u64 run_vcpu_to_sync(struct kvm_vcpu *vcpu)
 {
 	struct ucall uc;
 
@@ -161,7 +161,7 @@ static uint64_t run_vcpu_to_sync(struct kvm_vcpu *vcpu)
 
 static void run_vcpu_and_sync_pmc_results(struct kvm_vcpu *vcpu)
 {
-	uint64_t r;
+	u64 r;
 
 	memset(&pmc_results, 0, sizeof(pmc_results));
 	sync_global_to_guest(vcpu->vm, pmc_results);
@@ -182,7 +182,7 @@ static void run_vcpu_and_sync_pmc_results(struct kvm_vcpu *vcpu)
  */
 static bool sanity_check_pmu(struct kvm_vcpu *vcpu)
 {
-	uint64_t r;
+	u64 r;
 
 	vm_install_exception_handler(vcpu->vm, GP_VECTOR, guest_gp_handler);
 	r = run_vcpu_to_sync(vcpu);
@@ -195,7 +195,7 @@ static bool sanity_check_pmu(struct kvm_vcpu *vcpu)
  * Remove the first occurrence of 'event' (if any) from the filter's
  * event list.
  */
-static void remove_event(struct __kvm_pmu_event_filter *f, uint64_t event)
+static void remove_event(struct __kvm_pmu_event_filter *f, u64 event)
 {
 	bool found = false;
 	int i;
@@ -212,10 +212,12 @@ static void remove_event(struct __kvm_pmu_event_filter *f, uint64_t event)
 
 #define ASSERT_PMC_COUNTING_INSTRUCTIONS()						\
 do {											\
-	uint64_t br = pmc_results.branches_retired;					\
-	uint64_t ir = pmc_results.instructions_retired;					\
+	u64 br = pmc_results.branches_retired;					\
+	u64 ir = pmc_results.instructions_retired;					\
+	bool br_matched = this_pmu_has_errata(BRANCHES_RETIRED_OVERCOUNT) ?		\
+			  br >= NUM_BRANCHES : br == NUM_BRANCHES;			\
 											\
-	if (br && br != NUM_BRANCHES)							\
+	if (br && !br_matched)								\
 		pr_info("%s: Branch instructions retired = %lu (expected %u)\n",	\
 			__func__, br, NUM_BRANCHES);					\
 	TEST_ASSERT(br, "%s: Branch instructions retired = %lu (expected > 0)",		\
@@ -226,8 +228,8 @@ do {											\
 
 #define ASSERT_PMC_NOT_COUNTING_INSTRUCTIONS()						\
 do {											\
-	uint64_t br = pmc_results.branches_retired;					\
-	uint64_t ir = pmc_results.instructions_retired;					\
+	u64 br = pmc_results.branches_retired;					\
+	u64 ir = pmc_results.instructions_retired;					\
 											\
 	TEST_ASSERT(!br, "%s: Branch instructions retired = %lu (expected 0)",		\
 		    __func__, br);							\
@@ -359,7 +361,8 @@ static bool use_intel_pmu(void)
  */
 static bool use_amd_pmu(void)
 {
-	return host_cpu_is_amd && kvm_cpu_family() >= 0x17;
+	return (host_cpu_is_amd && kvm_cpu_family() >= 0x17) ||
+	       host_cpu_is_hygon;
 }
 
 /*
@@ -375,7 +378,7 @@ static bool use_amd_pmu(void)
 
 static bool supports_event_mem_inst_retired(void)
 {
-	uint32_t eax, ebx, ecx, edx;
+	u32 eax, ebx, ecx, edx;
 
 	cpuid(1, &eax, &ebx, &ecx, &edx);
 	if (x86_family(eax) == 0x6) {
@@ -412,15 +415,15 @@ static bool supports_event_mem_inst_retired(void)
 #define EXCLUDE_MASKED_ENTRY(event_select, mask, match) \
 	KVM_PMU_ENCODE_MASKED_ENTRY(event_select, mask, match, true)
 
-static void masked_events_guest_test(uint32_t msr_base)
+static void masked_events_guest_test(u32 msr_base)
 {
 	/*
 	 * The actual value of the counters don't determine the outcome of
 	 * the test.  Only that they are zero or non-zero.
 	 */
-	const uint64_t loads = rdmsr(msr_base + 0);
-	const uint64_t stores = rdmsr(msr_base + 1);
-	const uint64_t loads_stores = rdmsr(msr_base + 2);
+	const u64 loads = rdmsr(msr_base + 0);
+	const u64 stores = rdmsr(msr_base + 1);
+	const u64 loads_stores = rdmsr(msr_base + 2);
 	int val;
 
 
@@ -473,7 +476,7 @@ static void amd_masked_events_guest_code(void)
 }
 
 static void run_masked_events_test(struct kvm_vcpu *vcpu,
-				   const uint64_t masked_events[],
+				   const u64 masked_events[],
 				   const int nmasked_events)
 {
 	struct __kvm_pmu_event_filter f = {
@@ -482,7 +485,7 @@ static void run_masked_events_test(struct kvm_vcpu *vcpu,
 		.flags = KVM_PMU_EVENT_FLAG_MASKED_EVENTS,
 	};
 
-	memcpy(f.events, masked_events, sizeof(uint64_t) * nmasked_events);
+	memcpy(f.events, masked_events, sizeof(u64) * nmasked_events);
 	test_with_filter(vcpu, &f);
 }
 
@@ -491,12 +494,12 @@ static void run_masked_events_test(struct kvm_vcpu *vcpu,
 #define ALLOW_LOADS_STORES	BIT(2)
 
 struct masked_events_test {
-	uint64_t intel_events[MAX_TEST_EVENTS];
-	uint64_t intel_event_end;
-	uint64_t amd_events[MAX_TEST_EVENTS];
-	uint64_t amd_event_end;
+	u64 intel_events[MAX_TEST_EVENTS];
+	u64 intel_event_end;
+	u64 amd_events[MAX_TEST_EVENTS];
+	u64 amd_event_end;
 	const char *msg;
-	uint32_t flags;
+	u32 flags;
 };
 
 /*
@@ -579,9 +582,9 @@ const struct masked_events_test test_cases[] = {
 };
 
 static int append_test_events(const struct masked_events_test *test,
-			      uint64_t *events, int nevents)
+			      u64 *events, int nevents)
 {
-	const uint64_t *evts;
+	const u64 *evts;
 	int i;
 
 	evts = use_intel_pmu() ? test->intel_events : test->amd_events;
@@ -600,7 +603,7 @@ static bool bool_eq(bool a, bool b)
 	return a == b;
 }
 
-static void run_masked_events_tests(struct kvm_vcpu *vcpu, uint64_t *events,
+static void run_masked_events_tests(struct kvm_vcpu *vcpu, u64 *events,
 				    int nevents)
 {
 	int ntests = ARRAY_SIZE(test_cases);
@@ -627,7 +630,7 @@ static void run_masked_events_tests(struct kvm_vcpu *vcpu, uint64_t *events,
 	}
 }
 
-static void add_dummy_events(uint64_t *events, int nevents)
+static void add_dummy_events(u64 *events, int nevents)
 {
 	int i;
 
@@ -647,7 +650,7 @@ static void add_dummy_events(uint64_t *events, int nevents)
 static void test_masked_events(struct kvm_vcpu *vcpu)
 {
 	int nevents = KVM_PMU_EVENT_FILTER_MAX_EVENTS - MAX_TEST_EVENTS;
-	uint64_t events[KVM_PMU_EVENT_FILTER_MAX_EVENTS];
+	u64 events[KVM_PMU_EVENT_FILTER_MAX_EVENTS];
 
 	/* Run the test cases against a sparse PMU event filter. */
 	run_masked_events_tests(vcpu, events, 0);
@@ -665,8 +668,8 @@ static int set_pmu_event_filter(struct kvm_vcpu *vcpu,
 	return __vm_ioctl(vcpu->vm, KVM_SET_PMU_EVENT_FILTER, f);
 }
 
-static int set_pmu_single_event_filter(struct kvm_vcpu *vcpu, uint64_t event,
-				       uint32_t flags, uint32_t action)
+static int set_pmu_single_event_filter(struct kvm_vcpu *vcpu, u64 event,
+				       u32 flags, u32 action)
 {
 	struct __kvm_pmu_event_filter f = {
 		.nevents = 1,
@@ -682,9 +685,9 @@ static int set_pmu_single_event_filter(struct kvm_vcpu *vcpu, uint64_t event,
 
 static void test_filter_ioctl(struct kvm_vcpu *vcpu)
 {
-	uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
+	u8 nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
 	struct __kvm_pmu_event_filter f;
-	uint64_t e = ~0ul;
+	u64 e = ~0ul;
 	int r;
 
 	/*
@@ -726,7 +729,7 @@ static void test_filter_ioctl(struct kvm_vcpu *vcpu)
 	TEST_ASSERT(!r, "Masking non-existent fixed counters should be allowed");
 }
 
-static void intel_run_fixed_counter_guest_code(uint8_t idx)
+static void intel_run_fixed_counter_guest_code(u8 idx)
 {
 	for (;;) {
 		wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
@@ -742,8 +745,8 @@ static void intel_run_fixed_counter_guest_code(uint8_t idx)
 	}
 }
 
-static uint64_t test_with_fixed_counter_filter(struct kvm_vcpu *vcpu,
-					       uint32_t action, uint32_t bitmap)
+static u64 test_with_fixed_counter_filter(struct kvm_vcpu *vcpu,
+					  u32 action, u32 bitmap)
 {
 	struct __kvm_pmu_event_filter f = {
 		.action = action,
@@ -754,9 +757,9 @@ static uint64_t test_with_fixed_counter_filter(struct kvm_vcpu *vcpu,
 	return run_vcpu_to_sync(vcpu);
 }
 
-static uint64_t test_set_gp_and_fixed_event_filter(struct kvm_vcpu *vcpu,
-						   uint32_t action,
-						   uint32_t bitmap)
+static u64 test_set_gp_and_fixed_event_filter(struct kvm_vcpu *vcpu,
+					      u32 action,
+					      u32 bitmap)
 {
 	struct __kvm_pmu_event_filter f = base_event_filter;
 
@@ -767,12 +770,12 @@ static uint64_t test_set_gp_and_fixed_event_filter(struct kvm_vcpu *vcpu,
 	return run_vcpu_to_sync(vcpu);
 }
 
-static void __test_fixed_counter_bitmap(struct kvm_vcpu *vcpu, uint8_t idx,
-					uint8_t nr_fixed_counters)
+static void __test_fixed_counter_bitmap(struct kvm_vcpu *vcpu, u8 idx,
+					u8 nr_fixed_counters)
 {
 	unsigned int i;
-	uint32_t bitmap;
-	uint64_t count;
+	u32 bitmap;
+	u64 count;
 
 	TEST_ASSERT(nr_fixed_counters < sizeof(bitmap) * 8,
 		    "Invalid nr_fixed_counters");
@@ -812,10 +815,10 @@ static void __test_fixed_counter_bitmap(struct kvm_vcpu *vcpu, uint8_t idx,
 
 static void test_fixed_counter_bitmap(void)
 {
-	uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
+	u8 nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
 	struct kvm_vm *vm;
 	struct kvm_vcpu *vcpu;
-	uint8_t idx;
+	u8 idx;
 
 	/*
 	 * Check that pmu_event_filter works as expected when it's applied to
diff --git a/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c b/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c
index 82a8d88b5338..1d2f5d4fd45d 100644
--- a/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c
+++ b/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c
@@ -23,13 +23,13 @@
 #include <processor.h>
 
 #define BASE_DATA_SLOT		10
-#define BASE_DATA_GPA		((uint64_t)(1ull << 32))
-#define PER_CPU_DATA_SIZE	((uint64_t)(SZ_2M + PAGE_SIZE))
+#define BASE_DATA_GPA		((u64)(1ull << 32))
+#define PER_CPU_DATA_SIZE	((u64)(SZ_2M + PAGE_SIZE))
 
 /* Horrific macro so that the line info is captured accurately :-( */
 #define memcmp_g(gpa, pattern,  size)								\
 do {												\
-	uint8_t *mem = (uint8_t *)gpa;								\
+	u8 *mem = (u8 *)gpa;									\
 	size_t i;										\
 												\
 	for (i = 0; i < size; i++)								\
@@ -38,7 +38,7 @@ do {												\
 			       pattern, i, gpa + i, mem[i]);					\
 } while (0)
 
-static void memcmp_h(uint8_t *mem, uint64_t gpa, uint8_t pattern, size_t size)
+static void memcmp_h(u8 *mem, gpa_t gpa, u8 pattern, size_t size)
 {
 	size_t i;
 
@@ -70,13 +70,13 @@ enum ucall_syncs {
 	SYNC_PRIVATE,
 };
 
-static void guest_sync_shared(uint64_t gpa, uint64_t size,
-			      uint8_t current_pattern, uint8_t new_pattern)
+static void guest_sync_shared(gpa_t gpa, u64 size,
+			      u8 current_pattern, u8 new_pattern)
 {
 	GUEST_SYNC5(SYNC_SHARED, gpa, size, current_pattern, new_pattern);
 }
 
-static void guest_sync_private(uint64_t gpa, uint64_t size, uint8_t pattern)
+static void guest_sync_private(gpa_t gpa, u64 size, u8 pattern)
 {
 	GUEST_SYNC4(SYNC_PRIVATE, gpa, size, pattern);
 }
@@ -86,10 +86,10 @@ static void guest_sync_private(uint64_t gpa, uint64_t size, uint8_t pattern)
 #define MAP_GPA_SHARED		BIT(1)
 #define MAP_GPA_DO_FALLOCATE	BIT(2)
 
-static void guest_map_mem(uint64_t gpa, uint64_t size, bool map_shared,
+static void guest_map_mem(gpa_t gpa, u64 size, bool map_shared,
 			  bool do_fallocate)
 {
-	uint64_t flags = MAP_GPA_SET_ATTRIBUTES;
+	u64 flags = MAP_GPA_SET_ATTRIBUTES;
 
 	if (map_shared)
 		flags |= MAP_GPA_SHARED;
@@ -98,19 +98,19 @@ static void guest_map_mem(uint64_t gpa, uint64_t size, bool map_shared,
 	kvm_hypercall_map_gpa_range(gpa, size, flags);
 }
 
-static void guest_map_shared(uint64_t gpa, uint64_t size, bool do_fallocate)
+static void guest_map_shared(gpa_t gpa, u64 size, bool do_fallocate)
 {
 	guest_map_mem(gpa, size, true, do_fallocate);
 }
 
-static void guest_map_private(uint64_t gpa, uint64_t size, bool do_fallocate)
+static void guest_map_private(gpa_t gpa, u64 size, bool do_fallocate)
 {
 	guest_map_mem(gpa, size, false, do_fallocate);
 }
 
 struct {
-	uint64_t offset;
-	uint64_t size;
+	u64 offset;
+	u64 size;
 } static const test_ranges[] = {
 	GUEST_STAGE(0, PAGE_SIZE),
 	GUEST_STAGE(0, SZ_2M),
@@ -119,11 +119,11 @@ struct {
 	GUEST_STAGE(SZ_2M, PAGE_SIZE),
 };
 
-static void guest_test_explicit_conversion(uint64_t base_gpa, bool do_fallocate)
+static void guest_test_explicit_conversion(u64 base_gpa, bool do_fallocate)
 {
-	const uint8_t def_p = 0xaa;
-	const uint8_t init_p = 0xcc;
-	uint64_t j;
+	const u8 def_p = 0xaa;
+	const u8 init_p = 0xcc;
+	u64 j;
 	int i;
 
 	/* Memory should be shared by default. */
@@ -134,12 +134,12 @@ static void guest_test_explicit_conversion(uint64_t base_gpa, bool do_fallocate)
 	memcmp_g(base_gpa, init_p, PER_CPU_DATA_SIZE);
 
 	for (i = 0; i < ARRAY_SIZE(test_ranges); i++) {
-		uint64_t gpa = base_gpa + test_ranges[i].offset;
-		uint64_t size = test_ranges[i].size;
-		uint8_t p1 = 0x11;
-		uint8_t p2 = 0x22;
-		uint8_t p3 = 0x33;
-		uint8_t p4 = 0x44;
+		gpa_t gpa = base_gpa + test_ranges[i].offset;
+		u64 size = test_ranges[i].size;
+		u8 p1 = 0x11;
+		u8 p2 = 0x22;
+		u8 p3 = 0x33;
+		u8 p4 = 0x44;
 
 		/*
 		 * Set the test region to pattern one to differentiate it from
@@ -214,10 +214,10 @@ skip:
 	}
 }
 
-static void guest_punch_hole(uint64_t gpa, uint64_t size)
+static void guest_punch_hole(gpa_t gpa, u64 size)
 {
 	/* "Mapping" memory shared via fallocate() is done via PUNCH_HOLE. */
-	uint64_t flags = MAP_GPA_SHARED | MAP_GPA_DO_FALLOCATE;
+	u64 flags = MAP_GPA_SHARED | MAP_GPA_DO_FALLOCATE;
 
 	kvm_hypercall_map_gpa_range(gpa, size, flags);
 }
@@ -227,9 +227,9 @@ static void guest_punch_hole(uint64_t gpa, uint64_t size)
  * proper conversion.  Freeing (PUNCH_HOLE) should zap SPTEs, and reallocating
  * (subsequent fault) should zero memory.
  */
-static void guest_test_punch_hole(uint64_t base_gpa, bool precise)
+static void guest_test_punch_hole(u64 base_gpa, bool precise)
 {
-	const uint8_t init_p = 0xcc;
+	const u8 init_p = 0xcc;
 	int i;
 
 	/*
@@ -239,8 +239,8 @@ static void guest_test_punch_hole(uint64_t base_gpa, bool precise)
 	guest_map_private(base_gpa, PER_CPU_DATA_SIZE, false);
 
 	for (i = 0; i < ARRAY_SIZE(test_ranges); i++) {
-		uint64_t gpa = base_gpa + test_ranges[i].offset;
-		uint64_t size = test_ranges[i].size;
+		gpa_t gpa = base_gpa + test_ranges[i].offset;
+		u64 size = test_ranges[i].size;
 
 		/*
 		 * Free all memory before each iteration, even for the !precise
@@ -268,7 +268,7 @@ static void guest_test_punch_hole(uint64_t base_gpa, bool precise)
 	}
 }
 
-static void guest_code(uint64_t base_gpa)
+static void guest_code(u64 base_gpa)
 {
 	/*
 	 * Run the conversion test twice, with and without doing fallocate() on
@@ -289,8 +289,8 @@ static void guest_code(uint64_t base_gpa)
 static void handle_exit_hypercall(struct kvm_vcpu *vcpu)
 {
 	struct kvm_run *run = vcpu->run;
-	uint64_t gpa = run->hypercall.args[0];
-	uint64_t size = run->hypercall.args[1] * PAGE_SIZE;
+	gpa_t gpa = run->hypercall.args[0];
+	u64 size = run->hypercall.args[1] * PAGE_SIZE;
 	bool set_attributes = run->hypercall.args[2] & MAP_GPA_SET_ATTRIBUTES;
 	bool map_shared = run->hypercall.args[2] & MAP_GPA_SHARED;
 	bool do_fallocate = run->hypercall.args[2] & MAP_GPA_DO_FALLOCATE;
@@ -337,7 +337,7 @@ static void *__test_mem_conversions(void *__vcpu)
 		case UCALL_ABORT:
 			REPORT_GUEST_ASSERT(uc);
 		case UCALL_SYNC: {
-			uint64_t gpa  = uc.args[1];
+			gpa_t gpa  = uc.args[1];
 			size_t size = uc.args[2];
 			size_t i;
 
@@ -347,7 +347,7 @@ static void *__test_mem_conversions(void *__vcpu)
 
 			for (i = 0; i < size; i += vm->page_size) {
 				size_t nr_bytes = min_t(size_t, vm->page_size, size - i);
-				uint8_t *hva = addr_gpa2hva(vm, gpa + i);
+				u8 *hva = addr_gpa2hva(vm, gpa + i);
 
 				/* In all cases, the host should observe the shared data. */
 				memcmp_h(hva, gpa + i, uc.args[3], nr_bytes);
@@ -366,8 +366,8 @@ static void *__test_mem_conversions(void *__vcpu)
 	}
 }
 
-static void test_mem_conversions(enum vm_mem_backing_src_type src_type, uint32_t nr_vcpus,
-				 uint32_t nr_memslots)
+static void test_mem_conversions(enum vm_mem_backing_src_type src_type, u32 nr_vcpus,
+				 u32 nr_memslots)
 {
 	/*
 	 * Allocate enough memory so that each vCPU's chunk of memory can be
@@ -380,7 +380,7 @@ static void test_mem_conversions(enum vm_mem_backing_src_type src_type, uint32_t
 	struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
 	pthread_t threads[KVM_MAX_VCPUS];
 	struct kvm_vm *vm;
-	int memfd, i, r;
+	int memfd, i;
 
 	const struct vm_shape shape = {
 		.mode = VM_MODE_DEFAULT,
@@ -402,7 +402,7 @@ static void test_mem_conversions(enum vm_mem_backing_src_type src_type, uint32_t
 			   KVM_MEM_GUEST_MEMFD, memfd, slot_size * i);
 
 	for (i = 0; i < nr_vcpus; i++) {
-		uint64_t gpa =  BASE_DATA_GPA + i * per_cpu_size;
+		gpa_t gpa =  BASE_DATA_GPA + i * per_cpu_size;
 
 		vcpu_args_set(vcpus[i], 1, gpa);
 
@@ -428,11 +428,8 @@ static void test_mem_conversions(enum vm_mem_backing_src_type src_type, uint32_t
 	 * should prevent the VM from being fully destroyed until the last
 	 * reference to the guest_memfd is also put.
 	 */
-	r = fallocate(memfd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0, memfd_size);
-	TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r));
-
-	r = fallocate(memfd, FALLOC_FL_KEEP_SIZE, 0, memfd_size);
-	TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r));
+	kvm_fallocate(memfd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0, memfd_size);
+	kvm_fallocate(memfd, FALLOC_FL_KEEP_SIZE, 0, memfd_size);
 
 	close(memfd);
 }
@@ -453,8 +450,8 @@ static void usage(const char *cmd)
 int main(int argc, char *argv[])
 {
 	enum vm_mem_backing_src_type src_type = DEFAULT_VM_MEM_SRC;
-	uint32_t nr_memslots = 1;
-	uint32_t nr_vcpus = 1;
+	u32 nr_memslots = 1;
+	u32 nr_vcpus = 1;
 	int opt;
 
 	TEST_REQUIRE(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM));
diff --git a/tools/testing/selftests/kvm/x86/private_mem_kvm_exits_test.c b/tools/testing/selftests/kvm/x86/private_mem_kvm_exits_test.c
index 13e72fcec8dd..10db9fe6d906 100644
--- a/tools/testing/selftests/kvm/x86/private_mem_kvm_exits_test.c
+++ b/tools/testing/selftests/kvm/x86/private_mem_kvm_exits_test.c
@@ -17,17 +17,17 @@
 #define EXITS_TEST_SIZE (EXITS_TEST_NPAGES * PAGE_SIZE)
 #define EXITS_TEST_SLOT 10
 
-static uint64_t guest_repeatedly_read(void)
+static u64 guest_repeatedly_read(void)
 {
-	volatile uint64_t value;
+	volatile u64 value;
 
 	while (true)
-		value = *((uint64_t *) EXITS_TEST_GVA);
+		value = *((u64 *)EXITS_TEST_GVA);
 
 	return value;
 }
 
-static uint32_t run_vcpu_get_exit_reason(struct kvm_vcpu *vcpu)
+static u32 run_vcpu_get_exit_reason(struct kvm_vcpu *vcpu)
 {
 	int r;
 
@@ -50,7 +50,7 @@ static void test_private_access_memslot_deleted(void)
 	struct kvm_vcpu *vcpu;
 	pthread_t vm_thread;
 	void *thread_return;
-	uint32_t exit_reason;
+	u32 exit_reason;
 
 	vm = vm_create_shape_with_one_vcpu(protected_vm_shape, &vcpu,
 					   guest_repeatedly_read);
@@ -72,7 +72,7 @@ static void test_private_access_memslot_deleted(void)
 	vm_mem_region_delete(vm, EXITS_TEST_SLOT);
 
 	pthread_join(vm_thread, &thread_return);
-	exit_reason = (uint32_t)(uint64_t)thread_return;
+	exit_reason = (u32)(u64)thread_return;
 
 	TEST_ASSERT_EQ(exit_reason, KVM_EXIT_MEMORY_FAULT);
 	TEST_ASSERT_EQ(vcpu->run->memory_fault.flags, KVM_MEMORY_EXIT_FLAG_PRIVATE);
@@ -86,7 +86,7 @@ static void test_private_access_memslot_not_private(void)
 {
 	struct kvm_vm *vm;
 	struct kvm_vcpu *vcpu;
-	uint32_t exit_reason;
+	u32 exit_reason;
 
 	vm = vm_create_shape_with_one_vcpu(protected_vm_shape, &vcpu,
 					   guest_repeatedly_read);
diff --git a/tools/testing/selftests/kvm/x86/set_boot_cpu_id.c b/tools/testing/selftests/kvm/x86/set_boot_cpu_id.c
index 49913784bc82..8e3898646c69 100644
--- a/tools/testing/selftests/kvm/x86/set_boot_cpu_id.c
+++ b/tools/testing/selftests/kvm/x86/set_boot_cpu_id.c
@@ -86,11 +86,11 @@ static void run_vcpu(struct kvm_vcpu *vcpu)
 	}
 }
 
-static struct kvm_vm *create_vm(uint32_t nr_vcpus, uint32_t bsp_vcpu_id,
+static struct kvm_vm *create_vm(u32 nr_vcpus, u32 bsp_vcpu_id,
 				struct kvm_vcpu *vcpus[])
 {
 	struct kvm_vm *vm;
-	uint32_t i;
+	u32 i;
 
 	vm = vm_create(nr_vcpus);
 
@@ -104,7 +104,7 @@ static struct kvm_vm *create_vm(uint32_t nr_vcpus, uint32_t bsp_vcpu_id,
 	return vm;
 }
 
-static void run_vm_bsp(uint32_t bsp_vcpu_id)
+static void run_vm_bsp(u32 bsp_vcpu_id)
 {
 	struct kvm_vcpu *vcpus[2];
 	struct kvm_vm *vm;
diff --git a/tools/testing/selftests/kvm/x86/set_sregs_test.c b/tools/testing/selftests/kvm/x86/set_sregs_test.c
index f4095a3d1278..8e654cc9ab16 100644
--- a/tools/testing/selftests/kvm/x86/set_sregs_test.c
+++ b/tools/testing/selftests/kvm/x86/set_sregs_test.c
@@ -46,9 +46,9 @@ do {										\
 				X86_CR4_MCE | X86_CR4_PGE | X86_CR4_PCE |	\
 				X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT)
 
-static uint64_t calc_supported_cr4_feature_bits(void)
+static u64 calc_supported_cr4_feature_bits(void)
 {
-	uint64_t cr4 = KVM_ALWAYS_ALLOWED_CR4;
+	u64 cr4 = KVM_ALWAYS_ALLOWED_CR4;
 
 	if (kvm_cpu_has(X86_FEATURE_UMIP))
 		cr4 |= X86_CR4_UMIP;
@@ -74,7 +74,7 @@ static uint64_t calc_supported_cr4_feature_bits(void)
 	return cr4;
 }
 
-static void test_cr_bits(struct kvm_vcpu *vcpu, uint64_t cr4)
+static void test_cr_bits(struct kvm_vcpu *vcpu, u64 cr4)
 {
 	struct kvm_sregs sregs;
 	int rc, i;
diff --git a/tools/testing/selftests/kvm/x86/sev_init2_tests.c b/tools/testing/selftests/kvm/x86/sev_init2_tests.c
index 3fb967f40c6a..8eeba2327c7c 100644
--- a/tools/testing/selftests/kvm/x86/sev_init2_tests.c
+++ b/tools/testing/selftests/kvm/x86/sev_init2_tests.c
@@ -28,12 +28,13 @@
 int kvm_fd;
 u64 supported_vmsa_features;
 bool have_sev_es;
+bool have_snp;
 
 static int __sev_ioctl(int vm_fd, int cmd_id, void *data)
 {
 	struct kvm_sev_cmd cmd = {
 		.id = cmd_id,
-		.data = (uint64_t)data,
+		.data = (u64)data,
 		.sev_fd = open_sev_dev_path_or_exit(),
 	};
 	int ret;
@@ -83,6 +84,9 @@ void test_vm_types(void)
 	if (have_sev_es)
 		test_init2(KVM_X86_SEV_ES_VM, &(struct kvm_sev_init){});
 
+	if (have_snp)
+		test_init2(KVM_X86_SNP_VM, &(struct kvm_sev_init){});
+
 	test_init2_invalid(0, &(struct kvm_sev_init){},
 			   "VM type is KVM_X86_DEFAULT_VM");
 	if (kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM))
@@ -90,7 +94,7 @@ void test_vm_types(void)
 				   "VM type is KVM_X86_SW_PROTECTED_VM");
 }
 
-void test_flags(uint32_t vm_type)
+void test_flags(u32 vm_type)
 {
 	int i;
 
@@ -100,7 +104,7 @@ void test_flags(uint32_t vm_type)
 			"invalid flag");
 }
 
-void test_features(uint32_t vm_type, uint64_t supported_features)
+void test_features(u32 vm_type, u64 supported_features)
 {
 	int i;
 
@@ -138,15 +142,24 @@ int main(int argc, char *argv[])
 		    "sev-es: KVM_CAP_VM_TYPES (%x) does not match cpuid (checking %x)",
 		    kvm_check_cap(KVM_CAP_VM_TYPES), 1 << KVM_X86_SEV_ES_VM);
 
+	have_snp = kvm_cpu_has(X86_FEATURE_SEV_SNP);
+	TEST_ASSERT(have_snp == !!(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SNP_VM)),
+		    "sev-snp: KVM_CAP_VM_TYPES (%x) indicates SNP support (bit %d), but CPUID does not",
+		    kvm_check_cap(KVM_CAP_VM_TYPES), KVM_X86_SNP_VM);
+
 	test_vm_types();
 
 	test_flags(KVM_X86_SEV_VM);
 	if (have_sev_es)
 		test_flags(KVM_X86_SEV_ES_VM);
+	if (have_snp)
+		test_flags(KVM_X86_SNP_VM);
 
 	test_features(KVM_X86_SEV_VM, 0);
 	if (have_sev_es)
 		test_features(KVM_X86_SEV_ES_VM, supported_vmsa_features);
+	if (have_snp)
+		test_features(KVM_X86_SNP_VM, supported_vmsa_features);
 
 	return 0;
 }
diff --git a/tools/testing/selftests/kvm/x86/sev_migrate_tests.c b/tools/testing/selftests/kvm/x86/sev_migrate_tests.c
index 0a6dfba3905b..6b0928e69051 100644
--- a/tools/testing/selftests/kvm/x86/sev_migrate_tests.c
+++ b/tools/testing/selftests/kvm/x86/sev_migrate_tests.c
@@ -36,8 +36,6 @@ static struct kvm_vm *sev_vm_create(bool es)
 
 	sev_vm_launch(vm, es ? SEV_POLICY_ES : 0);
 
-	if (es)
-		vm_sev_ioctl(vm, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL);
 	return vm;
 }
 
diff --git a/tools/testing/selftests/kvm/x86/sev_smoke_test.c b/tools/testing/selftests/kvm/x86/sev_smoke_test.c
index d97816dc476a..1a49ee391586 100644
--- a/tools/testing/selftests/kvm/x86/sev_smoke_test.c
+++ b/tools/testing/selftests/kvm/x86/sev_smoke_test.c
@@ -13,21 +13,61 @@
 #include "linux/psp-sev.h"
 #include "sev.h"
 
+static void guest_sev_test_msr(u32 msr)
+{
+	u64 val = rdmsr(msr);
+
+	wrmsr(msr, val);
+	GUEST_ASSERT(val == rdmsr(msr));
+}
+
+#define guest_sev_test_reg(reg)			\
+do {						\
+	u64 val = get_##reg();			\
+						\
+	set_##reg(val);				\
+	GUEST_ASSERT(val == get_##reg());	\
+} while (0)
+
+static void guest_sev_test_regs(void)
+{
+	guest_sev_test_msr(MSR_EFER);
+	guest_sev_test_reg(cr0);
+	guest_sev_test_reg(cr3);
+	guest_sev_test_reg(cr4);
+	guest_sev_test_reg(cr8);
+}
 
 #define XFEATURE_MASK_X87_AVX (XFEATURE_MASK_FP | XFEATURE_MASK_SSE | XFEATURE_MASK_YMM)
 
+static void guest_snp_code(void)
+{
+	u64 sev_msr = rdmsr(MSR_AMD64_SEV);
+
+	GUEST_ASSERT(sev_msr & MSR_AMD64_SEV_ENABLED);
+	GUEST_ASSERT(sev_msr & MSR_AMD64_SEV_ES_ENABLED);
+	GUEST_ASSERT(sev_msr & MSR_AMD64_SEV_SNP_ENABLED);
+
+	guest_sev_test_regs();
+
+	wrmsr(MSR_AMD64_SEV_ES_GHCB, GHCB_MSR_TERM_REQ);
+	vmgexit();
+}
+
 static void guest_sev_es_code(void)
 {
 	/* TODO: Check CPUID after GHCB-based hypercall support is added. */
 	GUEST_ASSERT(rdmsr(MSR_AMD64_SEV) & MSR_AMD64_SEV_ENABLED);
 	GUEST_ASSERT(rdmsr(MSR_AMD64_SEV) & MSR_AMD64_SEV_ES_ENABLED);
 
+	guest_sev_test_regs();
+
 	/*
 	 * TODO: Add GHCB and ucall support for SEV-ES guests.  For now, simply
 	 * force "termination" to signal "done" via the GHCB MSR protocol.
 	 */
 	wrmsr(MSR_AMD64_SEV_ES_GHCB, GHCB_MSR_TERM_REQ);
-	__asm__ __volatile__("rep; vmmcall");
+	vmgexit();
 }
 
 static void guest_sev_code(void)
@@ -35,6 +75,8 @@ static void guest_sev_code(void)
 	GUEST_ASSERT(this_cpu_has(X86_FEATURE_SEV));
 	GUEST_ASSERT(rdmsr(MSR_AMD64_SEV) & MSR_AMD64_SEV_ENABLED);
 
+	guest_sev_test_regs();
+
 	GUEST_DONE();
 }
 
@@ -62,19 +104,19 @@ static void compare_xsave(u8 *from_host, u8 *from_guest)
 		abort();
 }
 
-static void test_sync_vmsa(uint32_t policy)
+static void test_sync_vmsa(u32 type, u64 policy)
 {
 	struct kvm_vcpu *vcpu;
 	struct kvm_vm *vm;
-	vm_vaddr_t gva;
+	gva_t gva;
 	void *hva;
 
 	double x87val = M_PI;
 	struct kvm_xsave __attribute__((aligned(64))) xsave = { 0 };
 
-	vm = vm_sev_create_with_one_vcpu(KVM_X86_SEV_ES_VM, guest_code_xsave, &vcpu);
-	gva = vm_vaddr_alloc_shared(vm, PAGE_SIZE, KVM_UTIL_MIN_VADDR,
-				    MEM_REGION_TEST_DATA);
+	vm = vm_sev_create_with_one_vcpu(type, guest_code_xsave, &vcpu);
+	gva = vm_alloc_shared(vm, PAGE_SIZE, KVM_UTIL_MIN_VADDR,
+			      MEM_REGION_TEST_DATA);
 	hva = addr_gva2hva(vm, gva);
 
 	vcpu_args_set(vcpu, 1, gva);
@@ -89,10 +131,10 @@ static void test_sync_vmsa(uint32_t policy)
 	    : "ymm4", "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)");
 	vcpu_xsave_set(vcpu, &xsave);
 
-	vm_sev_launch(vm, SEV_POLICY_ES | policy, NULL);
+	vm_sev_launch(vm, policy, NULL);
 
 	/* This page is shared, so make it decrypted.  */
-	memset(hva, 0, 4096);
+	memset(hva, 0, PAGE_SIZE);
 
 	vcpu_run(vcpu);
 
@@ -108,14 +150,12 @@ static void test_sync_vmsa(uint32_t policy)
 	kvm_vm_free(vm);
 }
 
-static void test_sev(void *guest_code, uint64_t policy)
+static void test_sev(void *guest_code, u32 type, u64 policy)
 {
 	struct kvm_vcpu *vcpu;
 	struct kvm_vm *vm;
 	struct ucall uc;
 
-	uint32_t type = policy & SEV_POLICY_ES ? KVM_X86_SEV_ES_VM : KVM_X86_SEV_VM;
-
 	vm = vm_sev_create_with_one_vcpu(type, guest_code, &vcpu);
 
 	/* TODO: Validate the measurement is as expected. */
@@ -124,7 +164,7 @@ static void test_sev(void *guest_code, uint64_t policy)
 	for (;;) {
 		vcpu_run(vcpu);
 
-		if (policy & SEV_POLICY_ES) {
+		if (is_sev_es_vm(vm)) {
 			TEST_ASSERT(vcpu->run->exit_reason == KVM_EXIT_SYSTEM_EVENT,
 				    "Wanted SYSTEM_EVENT, got %s",
 				    exit_reason_str(vcpu->run->exit_reason));
@@ -161,16 +201,14 @@ static void guest_shutdown_code(void)
 	__asm__ __volatile__("ud2");
 }
 
-static void test_sev_es_shutdown(void)
+static void test_sev_shutdown(u32 type, u64 policy)
 {
 	struct kvm_vcpu *vcpu;
 	struct kvm_vm *vm;
 
-	uint32_t type = KVM_X86_SEV_ES_VM;
-
 	vm = vm_sev_create_with_one_vcpu(type, guest_shutdown_code, &vcpu);
 
-	vm_sev_launch(vm, SEV_POLICY_ES, NULL);
+	vm_sev_launch(vm, policy, NULL);
 
 	vcpu_run(vcpu);
 	TEST_ASSERT(vcpu->run->exit_reason == KVM_EXIT_SHUTDOWN,
@@ -180,27 +218,42 @@ static void test_sev_es_shutdown(void)
 	kvm_vm_free(vm);
 }
 
-int main(int argc, char *argv[])
+static void test_sev_smoke(void *guest, u32 type, u64 policy)
 {
 	const u64 xf_mask = XFEATURE_MASK_X87_AVX;
 
-	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SEV));
-
-	test_sev(guest_sev_code, SEV_POLICY_NO_DBG);
-	test_sev(guest_sev_code, 0);
+	if (type == KVM_X86_SNP_VM)
+		test_sev(guest, type, policy | SNP_POLICY_DBG);
+	else
+		test_sev(guest, type, policy | SEV_POLICY_NO_DBG);
+	test_sev(guest, type, policy);
 
-	if (kvm_cpu_has(X86_FEATURE_SEV_ES)) {
-		test_sev(guest_sev_es_code, SEV_POLICY_ES | SEV_POLICY_NO_DBG);
-		test_sev(guest_sev_es_code, SEV_POLICY_ES);
+	if (type == KVM_X86_SEV_VM)
+		return;
 
-		test_sev_es_shutdown();
+	test_sev_shutdown(type, policy);
 
-		if (kvm_has_cap(KVM_CAP_XCRS) &&
-		    (xgetbv(0) & kvm_cpu_supported_xcr0() & xf_mask) == xf_mask) {
-			test_sync_vmsa(0);
-			test_sync_vmsa(SEV_POLICY_NO_DBG);
-		}
+	if (kvm_has_cap(KVM_CAP_XCRS) &&
+	    (xgetbv(0) & kvm_cpu_supported_xcr0() & xf_mask) == xf_mask) {
+		test_sync_vmsa(type, policy);
+		if (type == KVM_X86_SNP_VM)
+			test_sync_vmsa(type, policy | SNP_POLICY_DBG);
+		else
+			test_sync_vmsa(type, policy | SEV_POLICY_NO_DBG);
 	}
+}
+
+int main(int argc, char *argv[])
+{
+	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SEV));
+
+	test_sev_smoke(guest_sev_code, KVM_X86_SEV_VM, 0);
+
+	if (kvm_cpu_has(X86_FEATURE_SEV_ES))
+		test_sev_smoke(guest_sev_es_code, KVM_X86_SEV_ES_VM, SEV_POLICY_ES);
+
+	if (kvm_cpu_has(X86_FEATURE_SEV_SNP))
+		test_sev_smoke(guest_snp_code, KVM_X86_SNP_VM, snp_default_policy());
 
 	return 0;
 }
diff --git a/tools/testing/selftests/kvm/x86/smaller_maxphyaddr_emulation_test.c b/tools/testing/selftests/kvm/x86/smaller_maxphyaddr_emulation_test.c
index fabeeaddfb3a..3dca85e95478 100644
--- a/tools/testing/selftests/kvm/x86/smaller_maxphyaddr_emulation_test.c
+++ b/tools/testing/selftests/kvm/x86/smaller_maxphyaddr_emulation_test.c
@@ -20,8 +20,8 @@
 
 static void guest_code(bool tdp_enabled)
 {
-	uint64_t error_code;
-	uint64_t vector;
+	u64 error_code;
+	u64 vector;
 
 	vector = kvm_asm_safe_ec(FLDS_MEM_EAX, error_code, "a"(MEM_REGION_GVA));
 
@@ -47,9 +47,8 @@ int main(int argc, char *argv[])
 	struct kvm_vcpu *vcpu;
 	struct kvm_vm *vm;
 	struct ucall uc;
-	uint64_t *pte;
-	uint64_t *hva;
-	uint64_t gpa;
+	u64 *hva;
+	gpa_t gpa;
 	int rc;
 
 	TEST_REQUIRE(kvm_has_cap(KVM_CAP_SMALLER_MAXPHYADDR));
@@ -73,8 +72,7 @@ int main(int argc, char *argv[])
 	hva = addr_gpa2hva(vm, MEM_REGION_GPA);
 	memset(hva, 0, PAGE_SIZE);
 
-	pte = vm_get_page_table_entry(vm, MEM_REGION_GVA);
-	*pte |= BIT_ULL(MAXPHYADDR);
+	*vm_get_pte(vm, MEM_REGION_GVA) |= BIT_ULL(MAXPHYADDR);
 
 	vcpu_run(vcpu);
 
diff --git a/tools/testing/selftests/kvm/x86/smm_test.c b/tools/testing/selftests/kvm/x86/smm_test.c
index 55c88d664a94..740051167dbd 100644
--- a/tools/testing/selftests/kvm/x86/smm_test.c
+++ b/tools/testing/selftests/kvm/x86/smm_test.c
@@ -14,13 +14,11 @@
 #include "test_util.h"
 
 #include "kvm_util.h"
+#include "smm.h"
 
 #include "vmx.h"
 #include "svm_util.h"
 
-#define SMRAM_SIZE 65536
-#define SMRAM_MEMSLOT ((1 << 16) | 1)
-#define SMRAM_PAGES (SMRAM_SIZE / PAGE_SIZE)
 #define SMRAM_GPA 0x1000000
 #define SMRAM_STAGE 0xfe
 
@@ -36,13 +34,13 @@
  * independent subset of asm here.
  * SMI handler always report back fixed stage SMRAM_STAGE.
  */
-uint8_t smi_handler[] = {
+u8 smi_handler[] = {
 	0xb0, SMRAM_STAGE,    /* mov $SMRAM_STAGE, %al */
 	0xe4, SYNC_PORT,      /* in $SYNC_PORT, %al */
 	0x0f, 0xaa,           /* rsm */
 };
 
-static inline void sync_with_host(uint64_t phase)
+static inline void sync_with_host(u64 phase)
 {
 	asm volatile("in $" XSTR(SYNC_PORT)", %%al \n"
 		     : "+a" (phase));
@@ -67,7 +65,7 @@ static void guest_code(void *arg)
 {
 	#define L2_GUEST_STACK_SIZE 64
 	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
-	uint64_t apicbase = rdmsr(MSR_IA32_APICBASE);
+	u64 apicbase = rdmsr(MSR_IA32_APICBASE);
 	struct svm_test_data *svm = arg;
 	struct vmx_pages *vmx_pages = arg;
 
@@ -113,21 +111,9 @@ static void guest_code(void *arg)
 	sync_with_host(DONE);
 }
 
-void inject_smi(struct kvm_vcpu *vcpu)
-{
-	struct kvm_vcpu_events events;
-
-	vcpu_events_get(vcpu, &events);
-
-	events.smi.pending = 1;
-	events.flags |= KVM_VCPUEVENT_VALID_SMM;
-
-	vcpu_events_set(vcpu, &events);
-}
-
 int main(int argc, char *argv[])
 {
-	vm_vaddr_t nested_gva = 0;
+	gva_t nested_gva = 0;
 
 	struct kvm_vcpu *vcpu;
 	struct kvm_regs regs;
@@ -140,16 +126,7 @@ int main(int argc, char *argv[])
 	/* Create VM */
 	vm = vm_create_with_one_vcpu(&vcpu, guest_code);
 
-	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, SMRAM_GPA,
-				    SMRAM_MEMSLOT, SMRAM_PAGES, 0);
-	TEST_ASSERT(vm_phy_pages_alloc(vm, SMRAM_PAGES, SMRAM_GPA, SMRAM_MEMSLOT)
-		    == SMRAM_GPA, "could not allocate guest physical addresses?");
-
-	memset(addr_gpa2hva(vm, SMRAM_GPA), 0x0, SMRAM_SIZE);
-	memcpy(addr_gpa2hva(vm, SMRAM_GPA) + 0x8000, smi_handler,
-	       sizeof(smi_handler));
-
-	vcpu_set_msr(vcpu, MSR_IA32_SMBASE, SMRAM_GPA);
+	setup_smram(vm, vcpu, SMRAM_GPA, smi_handler, sizeof(smi_handler));
 
 	if (kvm_has_cap(KVM_CAP_NESTED_STATE)) {
 		if (kvm_cpu_has(X86_FEATURE_SVM))
diff --git a/tools/testing/selftests/kvm/x86/state_test.c b/tools/testing/selftests/kvm/x86/state_test.c
index 141b7fc0c965..409c6cc9f921 100644
--- a/tools/testing/selftests/kvm/x86/state_test.c
+++ b/tools/testing/selftests/kvm/x86/state_test.c
@@ -26,7 +26,9 @@ void svm_l2_guest_code(void)
 	GUEST_SYNC(4);
 	/* Exit to L1 */
 	vmcall();
+	clgi();
 	GUEST_SYNC(6);
+	stgi();
 	/* Done, exit to L1 and never come back.  */
 	vmcall();
 }
@@ -41,6 +43,8 @@ static void svm_l1_guest_code(struct svm_test_data *svm)
 	generic_svm_setup(svm, svm_l2_guest_code,
 			  &l2_guest_stack[L2_GUEST_STACK_SIZE]);
 
+	vmcb->control.int_ctl |= (V_GIF_ENABLE_MASK | V_GIF_MASK);
+
 	GUEST_SYNC(3);
 	run_guest(vmcb, svm->vmcb_gpa);
 	GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
@@ -140,8 +144,8 @@ static void __attribute__((__flatten__)) guest_code(void *arg)
 	GUEST_SYNC(1);
 
 	if (this_cpu_has(X86_FEATURE_XSAVE)) {
-		uint64_t supported_xcr0 = this_cpu_supported_xcr0();
-		uint8_t buffer[4096];
+		u64 supported_xcr0 = this_cpu_supported_xcr0();
+		u8 buffer[PAGE_SIZE];
 
 		memset(buffer, 0xcc, sizeof(buffer));
 
@@ -168,8 +172,8 @@ static void __attribute__((__flatten__)) guest_code(void *arg)
 		}
 
 		if (this_cpu_has(X86_FEATURE_MPX)) {
-			uint64_t bounds[2] = { 10, 0xffffffffull };
-			uint64_t output[2] = { };
+			u64 bounds[2] = { 10, 0xffffffffull };
+			u64 output[2] = { };
 
 			GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_BNDREGS);
 			GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_BNDCSR);
@@ -222,10 +226,39 @@ static void __attribute__((__flatten__)) guest_code(void *arg)
 	GUEST_DONE();
 }
 
+void svm_check_nested_state(int stage, struct kvm_x86_state *state)
+{
+	struct vmcb *vmcb = (struct vmcb *)state->nested.data.svm;
+
+	if (kvm_cpu_has(X86_FEATURE_VGIF)) {
+		if (stage == 4)
+			TEST_ASSERT_EQ(!!(vmcb->control.int_ctl & V_GIF_MASK), 1);
+		if (stage == 6)
+			TEST_ASSERT_EQ(!!(vmcb->control.int_ctl & V_GIF_MASK), 0);
+	}
+
+	if (kvm_cpu_has(X86_FEATURE_NRIPS)) {
+		/*
+		 * GUEST_SYNC() causes IO emulation in KVM, in which case the
+		 * RIP is advanced before exiting to userspace. Hence, the RIP
+		 * in the saved state should be the same as nRIP saved by the
+		 * CPU in the VMCB.
+		 */
+		if (stage == 6)
+			TEST_ASSERT_EQ(vmcb->control.next_rip, state->regs.rip);
+	}
+}
+
+void check_nested_state(int stage, struct kvm_x86_state *state)
+{
+	if (kvm_has_cap(KVM_CAP_NESTED_STATE) && kvm_cpu_has(X86_FEATURE_SVM))
+		svm_check_nested_state(stage, state);
+}
+
 int main(int argc, char *argv[])
 {
-	uint64_t *xstate_bv, saved_xstate_bv;
-	vm_vaddr_t nested_gva = 0;
+	u64 *xstate_bv, saved_xstate_bv;
+	gva_t nested_gva = 0;
 	struct kvm_cpuid2 empty_cpuid = {};
 	struct kvm_regs regs1, regs2;
 	struct kvm_vcpu *vcpu, *vcpuN;
@@ -278,6 +311,8 @@ int main(int argc, char *argv[])
 
 		kvm_vm_release(vm);
 
+		check_nested_state(stage, state);
+
 		/* Restore state in a new VM.  */
 		vcpu = vm_recreate_with_one_vcpu(vm);
 		vcpu_load_state(vcpu, state);
@@ -296,7 +331,7 @@ int main(int argc, char *argv[])
 		 * supported features, even if something goes awry in saving
 		 * the original snapshot.
 		 */
-		xstate_bv = (void *)&((uint8_t *)state->xsave->region)[512];
+		xstate_bv = (void *)&((u8 *)state->xsave->region)[512];
 		saved_xstate_bv = *xstate_bv;
 
 		vcpuN = __vm_vcpu_add(vm, vcpu->id + 1);
diff --git a/tools/testing/selftests/kvm/x86/svm_int_ctl_test.c b/tools/testing/selftests/kvm/x86/svm_int_ctl_test.c
index 916e04248fbb..d3cc5e4f7883 100644
--- a/tools/testing/selftests/kvm/x86/svm_int_ctl_test.c
+++ b/tools/testing/selftests/kvm/x86/svm_int_ctl_test.c
@@ -42,10 +42,7 @@ static void l2_guest_code(struct svm_test_data *svm)
 	x2apic_write_reg(APIC_ICR,
 		APIC_DEST_SELF | APIC_INT_ASSERT | INTR_IRQ_NUMBER);
 
-	__asm__ __volatile__(
-		"sti\n"
-		"nop\n"
-	);
+	sti_nop();
 
 	GUEST_ASSERT(vintr_irq_called);
 	GUEST_ASSERT(intr_irq_called);
@@ -85,7 +82,7 @@ static void l1_guest_code(struct svm_test_data *svm)
 int main(int argc, char *argv[])
 {
 	struct kvm_vcpu *vcpu;
-	vm_vaddr_t svm_gva;
+	gva_t svm_gva;
 	struct kvm_vm *vm;
 	struct ucall uc;
 
diff --git a/tools/testing/selftests/kvm/x86/svm_lbr_nested_state.c b/tools/testing/selftests/kvm/x86/svm_lbr_nested_state.c
new file mode 100644
index 000000000000..7fbfaa054c95
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/svm_lbr_nested_state.c
@@ -0,0 +1,145 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2026, Google, Inc.
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+
+
+#define L2_GUEST_STACK_SIZE 64
+
+#define DO_BRANCH() do { asm volatile("jmp 1f\n 1: nop"); } while (0)
+
+struct lbr_branch {
+	u64 from, to;
+};
+
+volatile struct lbr_branch l2_branch;
+
+#define RECORD_AND_CHECK_BRANCH(b)					\
+do {									\
+	wrmsr(MSR_IA32_DEBUGCTLMSR, DEBUGCTLMSR_LBR);			\
+	DO_BRANCH();							\
+	(b)->from = rdmsr(MSR_IA32_LASTBRANCHFROMIP);			\
+	(b)->to = rdmsr(MSR_IA32_LASTBRANCHTOIP);			\
+	/* Disable LBR right after to avoid overriding the IPs */	\
+	wrmsr(MSR_IA32_DEBUGCTLMSR, 0);					\
+									\
+	GUEST_ASSERT_NE((b)->from, 0);					\
+	GUEST_ASSERT_NE((b)->to, 0);					\
+} while (0)
+
+#define CHECK_BRANCH_MSRS(b)						\
+do {									\
+	GUEST_ASSERT_EQ((b)->from, rdmsr(MSR_IA32_LASTBRANCHFROMIP));	\
+	GUEST_ASSERT_EQ((b)->to, rdmsr(MSR_IA32_LASTBRANCHTOIP));	\
+} while (0)
+
+#define CHECK_BRANCH_VMCB(b, vmcb)					\
+do {									\
+	GUEST_ASSERT_EQ((b)->from, vmcb->save.br_from);			\
+	GUEST_ASSERT_EQ((b)->to, vmcb->save.br_to);			\
+} while (0)
+
+static void l2_guest_code(struct svm_test_data *svm)
+{
+	/* Record a branch, trigger save/restore, and make sure LBRs are intact */
+	RECORD_AND_CHECK_BRANCH(&l2_branch);
+	GUEST_SYNC(true);
+	CHECK_BRANCH_MSRS(&l2_branch);
+	vmmcall();
+}
+
+static void l1_guest_code(struct svm_test_data *svm, bool nested_lbrv)
+{
+	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+	struct vmcb *vmcb = svm->vmcb;
+	struct lbr_branch l1_branch;
+
+	/* Record a branch, trigger save/restore, and make sure LBRs are intact */
+	RECORD_AND_CHECK_BRANCH(&l1_branch);
+	GUEST_SYNC(true);
+	CHECK_BRANCH_MSRS(&l1_branch);
+
+	/* Run L2, which will also do the same */
+	generic_svm_setup(svm, l2_guest_code,
+			  &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+	if (nested_lbrv)
+		vmcb->control.misc_ctl2 = SVM_MISC2_ENABLE_V_LBR;
+	else
+		vmcb->control.misc_ctl2 &= ~SVM_MISC2_ENABLE_V_LBR;
+
+	run_guest(vmcb, svm->vmcb_gpa);
+	GUEST_ASSERT(svm->vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+
+	/* Trigger save/restore one more time before checking, just for kicks */
+	GUEST_SYNC(true);
+
+	/*
+	 * If LBR_CTL_ENABLE is set, L1 and L2 should have separate LBR MSRs, so
+	 * expect L1's LBRs to remain intact and L2 LBRs to be in the VMCB.
+	 * Otherwise, the MSRs are shared between L1 & L2 so expect L2's LBRs.
+	 */
+	if (nested_lbrv) {
+		CHECK_BRANCH_MSRS(&l1_branch);
+		CHECK_BRANCH_VMCB(&l2_branch, vmcb);
+	} else {
+		CHECK_BRANCH_MSRS(&l2_branch);
+	}
+	GUEST_DONE();
+}
+
+void test_lbrv_nested_state(bool nested_lbrv)
+{
+	struct kvm_x86_state *state = NULL;
+	struct kvm_vcpu *vcpu;
+	struct kvm_vm *vm;
+	struct ucall uc;
+	gva_t svm_gva;
+
+	pr_info("Testing with nested LBRV %s\n", nested_lbrv ? "enabled" : "disabled");
+
+	vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+	vcpu_alloc_svm(vm, &svm_gva);
+	vcpu_args_set(vcpu, 2, svm_gva, nested_lbrv);
+
+	for (;;) {
+		vcpu_run(vcpu);
+		TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+		switch (get_ucall(vcpu, &uc)) {
+		case UCALL_SYNC:
+			/* Save the vCPU state and restore it in a new VM on sync */
+			pr_info("Guest triggered save/restore.\n");
+			state = vcpu_save_state(vcpu);
+			kvm_vm_release(vm);
+			vcpu = vm_recreate_with_one_vcpu(vm);
+			vcpu_load_state(vcpu, state);
+			kvm_x86_state_cleanup(state);
+			break;
+		case UCALL_ABORT:
+			REPORT_GUEST_ASSERT(uc);
+			/* NOT REACHED */
+		case UCALL_DONE:
+			goto done;
+		default:
+			TEST_FAIL("Unknown ucall %lu", uc.cmd);
+		}
+	}
+done:
+	kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM));
+	TEST_REQUIRE(kvm_is_lbrv_enabled());
+
+	test_lbrv_nested_state(/*nested_lbrv=*/false);
+	test_lbrv_nested_state(/*nested_lbrv=*/true);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/svm_nested_clear_efer_svme.c b/tools/testing/selftests/kvm/x86/svm_nested_clear_efer_svme.c
new file mode 100644
index 000000000000..6a89eaffc657
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/svm_nested_clear_efer_svme.c
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2026, Google LLC.
+ */
+#include "kvm_util.h"
+#include "vmx.h"
+#include "svm_util.h"
+#include "kselftest.h"
+
+
+#define L2_GUEST_STACK_SIZE 64
+
+static void l2_guest_code(void)
+{
+	unsigned long efer = rdmsr(MSR_EFER);
+
+	/* generic_svm_setup() initializes EFER_SVME set for L2 */
+	GUEST_ASSERT(efer & EFER_SVME);
+	wrmsr(MSR_EFER, efer & ~EFER_SVME);
+
+	/* Unreachable, L1 should be shutdown */
+	GUEST_ASSERT(0);
+}
+
+static void l1_guest_code(struct svm_test_data *svm)
+{
+	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+	generic_svm_setup(svm, l2_guest_code,
+			  &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+	run_guest(svm->vmcb, svm->vmcb_gpa);
+
+	/* Unreachable, L1 should be shutdown */
+	GUEST_ASSERT(0);
+}
+
+int main(int argc, char *argv[])
+{
+	struct kvm_vcpu *vcpu;
+	struct kvm_vm *vm;
+	gva_t nested_gva = 0;
+
+	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM));
+
+	vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+
+	vcpu_alloc_svm(vm, &nested_gva);
+	vcpu_args_set(vcpu, 1, nested_gva);
+
+	vcpu_run(vcpu);
+	TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_SHUTDOWN);
+
+	kvm_vm_free(vm);
+	return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/svm_nested_shutdown_test.c b/tools/testing/selftests/kvm/x86/svm_nested_shutdown_test.c
index 00135cbba35e..c6ea3d609a62 100644
--- a/tools/testing/selftests/kvm/x86/svm_nested_shutdown_test.c
+++ b/tools/testing/selftests/kvm/x86/svm_nested_shutdown_test.c
@@ -42,7 +42,7 @@ static void l1_guest_code(struct svm_test_data *svm, struct idt_entry *idt)
 int main(int argc, char *argv[])
 {
 	struct kvm_vcpu *vcpu;
-	vm_vaddr_t svm_gva;
+	gva_t svm_gva;
 	struct kvm_vm *vm;
 
 	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM));
diff --git a/tools/testing/selftests/kvm/x86/svm_nested_soft_inject_test.c b/tools/testing/selftests/kvm/x86/svm_nested_soft_inject_test.c
index 7b6481d6c0d3..f72f11d4c4f8 100644
--- a/tools/testing/selftests/kvm/x86/svm_nested_soft_inject_test.c
+++ b/tools/testing/selftests/kvm/x86/svm_nested_soft_inject_test.c
@@ -76,7 +76,7 @@ static void l2_guest_code_nmi(void)
 	ud2();
 }
 
-static void l1_guest_code(struct svm_test_data *svm, uint64_t is_nmi, uint64_t idt_alt)
+static void l1_guest_code(struct svm_test_data *svm, u64 is_nmi, u64 idt_alt)
 {
 	#define L2_GUEST_STACK_SIZE 64
 	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
@@ -103,7 +103,7 @@ static void l1_guest_code(struct svm_test_data *svm, uint64_t is_nmi, uint64_t i
 
 	run_guest(vmcb, svm->vmcb_gpa);
 	__GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL,
-		       "Expected VMMCAL #VMEXIT, got '0x%x', info1 = '0x%lx, info2 = '0x%lx'",
+		       "Expected VMMCAL #VMEXIT, got '0x%lx', info1 = '0x%lx, info2 = '0x%lx'",
 		       vmcb->control.exit_code,
 		       vmcb->control.exit_info_1, vmcb->control.exit_info_2);
 
@@ -133,7 +133,7 @@ static void l1_guest_code(struct svm_test_data *svm, uint64_t is_nmi, uint64_t i
 
 	run_guest(vmcb, svm->vmcb_gpa);
 	__GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_HLT,
-		       "Expected HLT #VMEXIT, got '0x%x', info1 = '0x%lx, info2 = '0x%lx'",
+		       "Expected HLT #VMEXIT, got '0x%lx', info1 = '0x%lx, info2 = '0x%lx'",
 		       vmcb->control.exit_code,
 		       vmcb->control.exit_info_1, vmcb->control.exit_info_2);
 
@@ -144,8 +144,8 @@ static void run_test(bool is_nmi)
 {
 	struct kvm_vcpu *vcpu;
 	struct kvm_vm *vm;
-	vm_vaddr_t svm_gva;
-	vm_vaddr_t idt_alt_vm;
+	gva_t svm_gva;
+	gva_t idt_alt_vm;
 	struct kvm_guest_debug debug;
 
 	pr_info("Running %s test\n", is_nmi ? "NMI" : "soft int");
@@ -161,14 +161,14 @@ static void run_test(bool is_nmi)
 	if (!is_nmi) {
 		void *idt, *idt_alt;
 
-		idt_alt_vm = vm_vaddr_alloc_page(vm);
+		idt_alt_vm = vm_alloc_page(vm);
 		idt_alt = addr_gva2hva(vm, idt_alt_vm);
 		idt = addr_gva2hva(vm, vm->arch.idt);
 		memcpy(idt_alt, idt, getpagesize());
 	} else {
 		idt_alt_vm = 0;
 	}
-	vcpu_args_set(vcpu, 3, svm_gva, (uint64_t)is_nmi, (uint64_t)idt_alt_vm);
+	vcpu_args_set(vcpu, 3, svm_gva, (u64)is_nmi, (u64)idt_alt_vm);
 
 	memset(&debug, 0, sizeof(debug));
 	vcpu_guest_debug_set(vcpu, &debug);
diff --git a/tools/testing/selftests/kvm/x86/svm_nested_vmcb12_gpa.c b/tools/testing/selftests/kvm/x86/svm_nested_vmcb12_gpa.c
new file mode 100644
index 000000000000..a4935ce2fb99
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/svm_nested_vmcb12_gpa.c
@@ -0,0 +1,176 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2026, Google LLC.
+ */
+#include "kvm_util.h"
+#include "vmx.h"
+#include "svm_util.h"
+#include "kselftest.h"
+#include "kvm_test_harness.h"
+#include "test_util.h"
+
+
+#define L2_GUEST_STACK_SIZE 64
+
+#define SYNC_GP 101
+#define SYNC_L2_STARTED 102
+
+static unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+static void guest_gp_handler(struct ex_regs *regs)
+{
+	GUEST_SYNC(SYNC_GP);
+}
+
+static void l2_code(void)
+{
+	GUEST_SYNC(SYNC_L2_STARTED);
+	vmcall();
+}
+
+static void l1_vmrun(struct svm_test_data *svm, gpa_t gpa)
+{
+	generic_svm_setup(svm, l2_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+	asm volatile ("vmrun %[gpa]" : : [gpa] "a" (gpa) : "memory");
+}
+
+static void l1_vmload(struct svm_test_data *svm, gpa_t gpa)
+{
+	generic_svm_setup(svm, l2_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+	asm volatile ("vmload %[gpa]" : : [gpa] "a" (gpa) : "memory");
+}
+
+static void l1_vmsave(struct svm_test_data *svm, gpa_t gpa)
+{
+	generic_svm_setup(svm, l2_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+	asm volatile ("vmsave %[gpa]" : : [gpa] "a" (gpa) : "memory");
+}
+
+static void l1_vmexit(struct svm_test_data *svm, gpa_t gpa)
+{
+	generic_svm_setup(svm, l2_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+	run_guest(svm->vmcb, svm->vmcb_gpa);
+	GUEST_ASSERT(svm->vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+	GUEST_DONE();
+}
+
+static u64 unmappable_gpa(struct kvm_vcpu *vcpu)
+{
+	struct userspace_mem_region *region;
+	u64 region_gpa_end, vm_gpa_end = 0;
+	int i;
+
+	hash_for_each(vcpu->vm->regions.slot_hash, i, region, slot_node) {
+		region_gpa_end = region->region.guest_phys_addr + region->region.memory_size;
+		vm_gpa_end = max(vm_gpa_end, region_gpa_end);
+	}
+
+	return vm_gpa_end;
+}
+
+static void test_invalid_vmcb12(struct kvm_vcpu *vcpu)
+{
+	gva_t nested_gva = 0;
+	struct ucall uc;
+
+
+	vm_install_exception_handler(vcpu->vm, GP_VECTOR, guest_gp_handler);
+	vcpu_alloc_svm(vcpu->vm, &nested_gva);
+	vcpu_args_set(vcpu, 2, nested_gva, -1ULL);
+	vcpu_run(vcpu);
+
+	TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+	TEST_ASSERT_EQ(get_ucall(vcpu, &uc), UCALL_SYNC);
+	TEST_ASSERT_EQ(uc.args[1], SYNC_GP);
+}
+
+static void test_unmappable_vmcb12(struct kvm_vcpu *vcpu)
+{
+	gva_t nested_gva = 0;
+
+	vcpu_alloc_svm(vcpu->vm, &nested_gva);
+	vcpu_args_set(vcpu, 2, nested_gva, unmappable_gpa(vcpu));
+	vcpu_run(vcpu);
+
+	TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_INTERNAL_ERROR);
+	TEST_ASSERT_EQ(vcpu->run->emulation_failure.suberror, KVM_INTERNAL_ERROR_EMULATION);
+}
+
+static void test_unmappable_vmcb12_vmexit(struct kvm_vcpu *vcpu)
+{
+	struct kvm_x86_state *state;
+	gva_t nested_gva = 0;
+	struct ucall uc;
+
+	/*
+	 * Enter L2 (with a legit vmcb12 GPA), then overwrite vmcb12 GPA with an
+	 * unmappable GPA. KVM will fail to map vmcb12 on nested VM-Exit and
+	 * cause a shutdown.
+	 */
+	vcpu_alloc_svm(vcpu->vm, &nested_gva);
+	vcpu_args_set(vcpu, 2, nested_gva, unmappable_gpa(vcpu));
+	vcpu_run(vcpu);
+	TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+	TEST_ASSERT_EQ(get_ucall(vcpu, &uc), UCALL_SYNC);
+	TEST_ASSERT_EQ(uc.args[1], SYNC_L2_STARTED);
+
+	state = vcpu_save_state(vcpu);
+	state->nested.hdr.svm.vmcb_pa = unmappable_gpa(vcpu);
+	vcpu_load_state(vcpu, state);
+	vcpu_run(vcpu);
+	TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_SHUTDOWN);
+
+	kvm_x86_state_cleanup(state);
+}
+
+KVM_ONE_VCPU_TEST_SUITE(vmcb12_gpa);
+
+KVM_ONE_VCPU_TEST(vmcb12_gpa, vmrun_invalid, l1_vmrun)
+{
+	test_invalid_vmcb12(vcpu);
+}
+
+KVM_ONE_VCPU_TEST(vmcb12_gpa, vmload_invalid, l1_vmload)
+{
+	test_invalid_vmcb12(vcpu);
+}
+
+KVM_ONE_VCPU_TEST(vmcb12_gpa, vmsave_invalid, l1_vmsave)
+{
+	test_invalid_vmcb12(vcpu);
+}
+
+KVM_ONE_VCPU_TEST(vmcb12_gpa, vmrun_unmappable, l1_vmrun)
+{
+	test_unmappable_vmcb12(vcpu);
+}
+
+KVM_ONE_VCPU_TEST(vmcb12_gpa, vmload_unmappable, l1_vmload)
+{
+	test_unmappable_vmcb12(vcpu);
+}
+
+KVM_ONE_VCPU_TEST(vmcb12_gpa, vmsave_unmappable, l1_vmsave)
+{
+	test_unmappable_vmcb12(vcpu);
+}
+
+/*
+ * Invalid vmcb12_gpa cannot be test for #VMEXIT as KVM_SET_NESTED_STATE will
+ * reject it.
+ */
+KVM_ONE_VCPU_TEST(vmcb12_gpa, vmexit_unmappable, l1_vmexit)
+{
+	test_unmappable_vmcb12_vmexit(vcpu);
+}
+
+int main(int argc, char *argv[])
+{
+	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM));
+
+	return test_harness_run(argc, argv);
+}
diff --git a/tools/testing/selftests/kvm/x86/svm_vmcall_test.c b/tools/testing/selftests/kvm/x86/svm_vmcall_test.c
index 8a62cca28cfb..b1887242f3b8 100644
--- a/tools/testing/selftests/kvm/x86/svm_vmcall_test.c
+++ b/tools/testing/selftests/kvm/x86/svm_vmcall_test.c
@@ -36,7 +36,7 @@ static void l1_guest_code(struct svm_test_data *svm)
 int main(int argc, char *argv[])
 {
 	struct kvm_vcpu *vcpu;
-	vm_vaddr_t svm_gva;
+	gva_t svm_gva;
 	struct kvm_vm *vm;
 
 	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM));
diff --git a/tools/testing/selftests/kvm/x86/sync_regs_test.c b/tools/testing/selftests/kvm/x86/sync_regs_test.c
index 8fa3948b0170..e0c52321f87c 100644
--- a/tools/testing/selftests/kvm/x86/sync_regs_test.c
+++ b/tools/testing/selftests/kvm/x86/sync_regs_test.c
@@ -20,7 +20,7 @@
 #include "kvm_util.h"
 #include "processor.h"
 
-#define UCALL_PIO_PORT ((uint16_t)0x1000)
+#define UCALL_PIO_PORT ((u16)0x1000)
 
 struct ucall uc_none = {
 	.cmd = UCALL_NONE,
diff --git a/tools/testing/selftests/kvm/x86/triple_fault_event_test.c b/tools/testing/selftests/kvm/x86/triple_fault_event_test.c
index 56306a19144a..f1c488e0d497 100644
--- a/tools/testing/selftests/kvm/x86/triple_fault_event_test.c
+++ b/tools/testing/selftests/kvm/x86/triple_fault_event_test.c
@@ -72,13 +72,13 @@ int main(void)
 
 
 	if (has_vmx) {
-		vm_vaddr_t vmx_pages_gva;
+		gva_t vmx_pages_gva;
 
 		vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code_vmx);
 		vcpu_alloc_vmx(vm, &vmx_pages_gva);
 		vcpu_args_set(vcpu, 1, vmx_pages_gva);
 	} else {
-		vm_vaddr_t svm_gva;
+		gva_t svm_gva;
 
 		vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code_svm);
 		vcpu_alloc_svm(vm, &svm_gva);
diff --git a/tools/testing/selftests/kvm/x86/tsc_msrs_test.c b/tools/testing/selftests/kvm/x86/tsc_msrs_test.c
index 12b0964f4f13..91583969a14f 100644
--- a/tools/testing/selftests/kvm/x86/tsc_msrs_test.c
+++ b/tools/testing/selftests/kvm/x86/tsc_msrs_test.c
@@ -95,7 +95,7 @@ int main(void)
 {
 	struct kvm_vcpu *vcpu;
 	struct kvm_vm *vm;
-	uint64_t val;
+	u64 val;
 
 	ksft_print_header();
 	ksft_set_plan(5);
diff --git a/tools/testing/selftests/kvm/x86/tsc_scaling_sync.c b/tools/testing/selftests/kvm/x86/tsc_scaling_sync.c
index 59c7304f805e..59da8d4da607 100644
--- a/tools/testing/selftests/kvm/x86/tsc_scaling_sync.c
+++ b/tools/testing/selftests/kvm/x86/tsc_scaling_sync.c
@@ -21,10 +21,10 @@ pthread_spinlock_t create_lock;
 #define TEST_TSC_KHZ    2345678UL
 #define TEST_TSC_OFFSET 200000000
 
-uint64_t tsc_sync;
+u64 tsc_sync;
 static void guest_code(void)
 {
-	uint64_t start_tsc, local_tsc, tmp;
+	u64 start_tsc, local_tsc, tmp;
 
 	start_tsc = rdtsc();
 	do {
diff --git a/tools/testing/selftests/kvm/x86/ucna_injection_test.c b/tools/testing/selftests/kvm/x86/ucna_injection_test.c
index 57f157c06b39..df1ec8209c76 100644
--- a/tools/testing/selftests/kvm/x86/ucna_injection_test.c
+++ b/tools/testing/selftests/kvm/x86/ucna_injection_test.c
@@ -45,7 +45,7 @@
 
 #define MCI_CTL2_RESERVED_BIT BIT_ULL(29)
 
-static uint64_t supported_mcg_caps;
+static u64 supported_mcg_caps;
 
 /*
  * Record states about the injected UCNA.
@@ -53,30 +53,30 @@ static uint64_t supported_mcg_caps;
  * handler. Variables without the 'i_' prefixes are recorded in guest main
  * execution thread.
  */
-static volatile uint64_t i_ucna_rcvd;
-static volatile uint64_t i_ucna_addr;
-static volatile uint64_t ucna_addr;
-static volatile uint64_t ucna_addr2;
+static volatile u64 i_ucna_rcvd;
+static volatile u64 i_ucna_addr;
+static volatile u64 ucna_addr;
+static volatile u64 ucna_addr2;
 
 struct thread_params {
 	struct kvm_vcpu *vcpu;
-	uint64_t *p_i_ucna_rcvd;
-	uint64_t *p_i_ucna_addr;
-	uint64_t *p_ucna_addr;
-	uint64_t *p_ucna_addr2;
+	u64 *p_i_ucna_rcvd;
+	u64 *p_i_ucna_addr;
+	u64 *p_ucna_addr;
+	u64 *p_ucna_addr2;
 };
 
 static void verify_apic_base_addr(void)
 {
-	uint64_t msr = rdmsr(MSR_IA32_APICBASE);
-	uint64_t base = GET_APIC_BASE(msr);
+	u64 msr = rdmsr(MSR_IA32_APICBASE);
+	u64 base = GET_APIC_BASE(msr);
 
 	GUEST_ASSERT(base == APIC_DEFAULT_GPA);
 }
 
 static void ucna_injection_guest_code(void)
 {
-	uint64_t ctl2;
+	u64 ctl2;
 	verify_apic_base_addr();
 	xapic_enable();
 
@@ -86,7 +86,7 @@ static void ucna_injection_guest_code(void)
 	wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 | MCI_CTL2_CMCI_EN);
 
 	/* Enables interrupt in guest. */
-	asm volatile("sti");
+	sti();
 
 	/* Let user space inject the first UCNA */
 	GUEST_SYNC(SYNC_FIRST_UCNA);
@@ -106,7 +106,7 @@ static void ucna_injection_guest_code(void)
 
 static void cmci_disabled_guest_code(void)
 {
-	uint64_t ctl2 = rdmsr(MSR_IA32_MCx_CTL2(UCNA_BANK));
+	u64 ctl2 = rdmsr(MSR_IA32_MCx_CTL2(UCNA_BANK));
 	wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 | MCI_CTL2_CMCI_EN);
 
 	GUEST_DONE();
@@ -114,7 +114,7 @@ static void cmci_disabled_guest_code(void)
 
 static void cmci_enabled_guest_code(void)
 {
-	uint64_t ctl2 = rdmsr(MSR_IA32_MCx_CTL2(UCNA_BANK));
+	u64 ctl2 = rdmsr(MSR_IA32_MCx_CTL2(UCNA_BANK));
 	wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 | MCI_CTL2_RESERVED_BIT);
 
 	GUEST_DONE();
@@ -145,14 +145,15 @@ static void run_vcpu_expect_gp(struct kvm_vcpu *vcpu)
 	printf("vCPU received GP in guest.\n");
 }
 
-static void inject_ucna(struct kvm_vcpu *vcpu, uint64_t addr) {
+static void inject_ucna(struct kvm_vcpu *vcpu, u64 addr)
+{
 	/*
 	 * A UCNA error is indicated with VAL=1, UC=1, PCC=0, S=0 and AR=0 in
 	 * the IA32_MCi_STATUS register.
 	 * MSCOD=1 (BIT[16] - MscodDataRdErr).
 	 * MCACOD=0x0090 (Memory controller error format, channel 0)
 	 */
-	uint64_t status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN |
+	u64 status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN |
 			  MCI_STATUS_MISCV | MCI_STATUS_ADDRV | 0x10090;
 	struct kvm_x86_mce mce = {};
 	mce.status = status;
@@ -216,10 +217,10 @@ static void test_ucna_injection(struct kvm_vcpu *vcpu, struct thread_params *par
 {
 	struct kvm_vm *vm = vcpu->vm;
 	params->vcpu = vcpu;
-	params->p_i_ucna_rcvd = (uint64_t *)addr_gva2hva(vm, (uint64_t)&i_ucna_rcvd);
-	params->p_i_ucna_addr = (uint64_t *)addr_gva2hva(vm, (uint64_t)&i_ucna_addr);
-	params->p_ucna_addr = (uint64_t *)addr_gva2hva(vm, (uint64_t)&ucna_addr);
-	params->p_ucna_addr2 = (uint64_t *)addr_gva2hva(vm, (uint64_t)&ucna_addr2);
+	params->p_i_ucna_rcvd = (u64 *)addr_gva2hva(vm, (u64)&i_ucna_rcvd);
+	params->p_i_ucna_addr = (u64 *)addr_gva2hva(vm, (u64)&i_ucna_addr);
+	params->p_ucna_addr = (u64 *)addr_gva2hva(vm, (u64)&ucna_addr);
+	params->p_ucna_addr2 = (u64 *)addr_gva2hva(vm, (u64)&ucna_addr2);
 
 	run_ucna_injection(params);
 
@@ -242,7 +243,7 @@ static void test_ucna_injection(struct kvm_vcpu *vcpu, struct thread_params *par
 
 static void setup_mce_cap(struct kvm_vcpu *vcpu, bool enable_cmci_p)
 {
-	uint64_t mcg_caps = MCG_CTL_P | MCG_SER_P | MCG_LMCE_P | KVM_MAX_MCE_BANKS;
+	u64 mcg_caps = MCG_CTL_P | MCG_SER_P | MCG_LMCE_P | KVM_MAX_MCE_BANKS;
 	if (enable_cmci_p)
 		mcg_caps |= MCG_CMCI_P;
 
@@ -250,7 +251,7 @@ static void setup_mce_cap(struct kvm_vcpu *vcpu, bool enable_cmci_p)
 	vcpu_ioctl(vcpu, KVM_X86_SETUP_MCE, &mcg_caps);
 }
 
-static struct kvm_vcpu *create_vcpu_with_mce_cap(struct kvm_vm *vm, uint32_t vcpuid,
+static struct kvm_vcpu *create_vcpu_with_mce_cap(struct kvm_vm *vm, u32 vcpuid,
 						 bool enable_cmci_p, void *guest_code)
 {
 	struct kvm_vcpu *vcpu = vm_vcpu_add(vm, vcpuid, guest_code);
diff --git a/tools/testing/selftests/kvm/x86/userspace_io_test.c b/tools/testing/selftests/kvm/x86/userspace_io_test.c
index 9481cbcf284f..9c5a87576c2e 100644
--- a/tools/testing/selftests/kvm/x86/userspace_io_test.c
+++ b/tools/testing/selftests/kvm/x86/userspace_io_test.c
@@ -10,7 +10,7 @@
 #include "kvm_util.h"
 #include "processor.h"
 
-static void guest_ins_port80(uint8_t *buffer, unsigned int count)
+static void guest_ins_port80(u8 *buffer, unsigned int count)
 {
 	unsigned long end;
 
@@ -26,7 +26,7 @@ static void guest_ins_port80(uint8_t *buffer, unsigned int count)
 
 static void guest_code(void)
 {
-	uint8_t buffer[8192];
+	u8 buffer[8192];
 	int i;
 
 	/*
@@ -85,7 +85,7 @@ int main(int argc, char *argv[])
 			regs.rcx = 1;
 		if (regs.rcx == 3)
 			regs.rcx = 8192;
-		memset((void *)run + run->io.data_offset, 0xaa, 4096);
+		memset((void *)run + run->io.data_offset, 0xaa, PAGE_SIZE);
 		vcpu_regs_set(vcpu, &regs);
 	}
 
diff --git a/tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c b/tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c
index 32b2794b78fe..2808ce727e5f 100644
--- a/tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c
+++ b/tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c
@@ -23,21 +23,21 @@ struct kvm_msr_filter filter_allow = {
 			.nmsrs = 1,
 			/* Test an MSR the kernel knows about. */
 			.base = MSR_IA32_XSS,
-			.bitmap = (uint8_t*)&deny_bits,
+			.bitmap = (u8 *)&deny_bits,
 		}, {
 			.flags = KVM_MSR_FILTER_READ |
 				 KVM_MSR_FILTER_WRITE,
 			.nmsrs = 1,
 			/* Test an MSR the kernel doesn't know about. */
 			.base = MSR_IA32_FLUSH_CMD,
-			.bitmap = (uint8_t*)&deny_bits,
+			.bitmap = (u8 *)&deny_bits,
 		}, {
 			.flags = KVM_MSR_FILTER_READ |
 				 KVM_MSR_FILTER_WRITE,
 			.nmsrs = 1,
 			/* Test a fabricated MSR that no one knows about. */
 			.base = MSR_NON_EXISTENT,
-			.bitmap = (uint8_t*)&deny_bits,
+			.bitmap = (u8 *)&deny_bits,
 		},
 	},
 };
@@ -49,7 +49,7 @@ struct kvm_msr_filter filter_fs = {
 			.flags = KVM_MSR_FILTER_READ,
 			.nmsrs = 1,
 			.base = MSR_FS_BASE,
-			.bitmap = (uint8_t*)&deny_bits,
+			.bitmap = (u8 *)&deny_bits,
 		},
 	},
 };
@@ -61,12 +61,12 @@ struct kvm_msr_filter filter_gs = {
 			.flags = KVM_MSR_FILTER_READ,
 			.nmsrs = 1,
 			.base = MSR_GS_BASE,
-			.bitmap = (uint8_t*)&deny_bits,
+			.bitmap = (u8 *)&deny_bits,
 		},
 	},
 };
 
-static uint64_t msr_non_existent_data;
+static u64 msr_non_existent_data;
 static int guest_exception_count;
 static u32 msr_reads, msr_writes;
 
@@ -77,7 +77,7 @@ static u8 bitmap_c0000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
 static u8 bitmap_c0000000_read[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
 static u8 bitmap_deadbeef[1] = { 0x1 };
 
-static void deny_msr(uint8_t *bitmap, u32 msr)
+static void deny_msr(u8 *bitmap, u32 msr)
 {
 	u32 idx = msr & (KVM_MSR_FILTER_MAX_BITMAP_SIZE - 1);
 
@@ -142,26 +142,26 @@ struct kvm_msr_filter no_filter_deny = {
  * Note: Force test_rdmsr() to not be inlined to prevent the labels,
  * rdmsr_start and rdmsr_end, from being defined multiple times.
  */
-static noinline uint64_t test_rdmsr(uint32_t msr)
+static noinline u64 test_rdmsr(u32 msr)
 {
-	uint32_t a, d;
+	u32 a, d;
 
 	guest_exception_count = 0;
 
 	__asm__ __volatile__("rdmsr_start: rdmsr; rdmsr_end:" :
 			"=a"(a), "=d"(d) : "c"(msr) : "memory");
 
-	return a | ((uint64_t) d << 32);
+	return a | ((u64)d << 32);
 }
 
 /*
  * Note: Force test_wrmsr() to not be inlined to prevent the labels,
  * wrmsr_start and wrmsr_end, from being defined multiple times.
  */
-static noinline void test_wrmsr(uint32_t msr, uint64_t value)
+static noinline void test_wrmsr(u32 msr, u64 value)
 {
-	uint32_t a = value;
-	uint32_t d = value >> 32;
+	u32 a = value;
+	u32 d = value >> 32;
 
 	guest_exception_count = 0;
 
@@ -176,26 +176,26 @@ extern char wrmsr_start, wrmsr_end;
  * Note: Force test_em_rdmsr() to not be inlined to prevent the labels,
  * rdmsr_start and rdmsr_end, from being defined multiple times.
  */
-static noinline uint64_t test_em_rdmsr(uint32_t msr)
+static noinline u64 test_em_rdmsr(u32 msr)
 {
-	uint32_t a, d;
+	u32 a, d;
 
 	guest_exception_count = 0;
 
 	__asm__ __volatile__(KVM_FEP "em_rdmsr_start: rdmsr; em_rdmsr_end:" :
 			"=a"(a), "=d"(d) : "c"(msr) : "memory");
 
-	return a | ((uint64_t) d << 32);
+	return a | ((u64)d << 32);
 }
 
 /*
  * Note: Force test_em_wrmsr() to not be inlined to prevent the labels,
  * wrmsr_start and wrmsr_end, from being defined multiple times.
  */
-static noinline void test_em_wrmsr(uint32_t msr, uint64_t value)
+static noinline void test_em_wrmsr(u32 msr, u64 value)
 {
-	uint32_t a = value;
-	uint32_t d = value >> 32;
+	u32 a = value;
+	u32 d = value >> 32;
 
 	guest_exception_count = 0;
 
@@ -208,7 +208,7 @@ extern char em_wrmsr_start, em_wrmsr_end;
 
 static void guest_code_filter_allow(void)
 {
-	uint64_t data;
+	u64 data;
 
 	/*
 	 * Test userspace intercepting rdmsr / wrmsr for MSR_IA32_XSS.
@@ -328,7 +328,7 @@ static void guest_code_filter_deny(void)
 
 static void guest_code_permission_bitmap(void)
 {
-	uint64_t data;
+	u64 data;
 
 	data = test_rdmsr(MSR_FS_BASE);
 	GUEST_ASSERT(data == MSR_FS_BASE);
@@ -343,6 +343,12 @@ static void guest_code_permission_bitmap(void)
 	data = test_rdmsr(MSR_GS_BASE);
 	GUEST_ASSERT(data == MSR_GS_BASE);
 
+	/* Access the MSRs again to ensure KVM has disabled interception.*/
+	data = test_rdmsr(MSR_FS_BASE);
+	GUEST_ASSERT(data != MSR_FS_BASE);
+	data = test_rdmsr(MSR_GS_BASE);
+	GUEST_ASSERT(data != MSR_GS_BASE);
+
 	GUEST_DONE();
 }
 
@@ -385,7 +391,7 @@ static void check_for_guest_assert(struct kvm_vcpu *vcpu)
 	}
 }
 
-static void process_rdmsr(struct kvm_vcpu *vcpu, uint32_t msr_index)
+static void process_rdmsr(struct kvm_vcpu *vcpu, u32 msr_index)
 {
 	struct kvm_run *run = vcpu->run;
 
@@ -417,7 +423,7 @@ static void process_rdmsr(struct kvm_vcpu *vcpu, uint32_t msr_index)
 	}
 }
 
-static void process_wrmsr(struct kvm_vcpu *vcpu, uint32_t msr_index)
+static void process_wrmsr(struct kvm_vcpu *vcpu, u32 msr_index)
 {
 	struct kvm_run *run = vcpu->run;
 
@@ -458,7 +464,7 @@ static void process_ucall_done(struct kvm_vcpu *vcpu)
 		    uc.cmd, UCALL_DONE);
 }
 
-static uint64_t process_ucall(struct kvm_vcpu *vcpu)
+static u64 process_ucall(struct kvm_vcpu *vcpu)
 {
 	struct ucall uc = {};
 
@@ -483,20 +489,20 @@ static uint64_t process_ucall(struct kvm_vcpu *vcpu)
 }
 
 static void run_guest_then_process_rdmsr(struct kvm_vcpu *vcpu,
-					 uint32_t msr_index)
+					 u32 msr_index)
 {
 	vcpu_run(vcpu);
 	process_rdmsr(vcpu, msr_index);
 }
 
 static void run_guest_then_process_wrmsr(struct kvm_vcpu *vcpu,
-					 uint32_t msr_index)
+					 u32 msr_index)
 {
 	vcpu_run(vcpu);
 	process_wrmsr(vcpu, msr_index);
 }
 
-static uint64_t run_guest_then_process_ucall(struct kvm_vcpu *vcpu)
+static u64 run_guest_then_process_ucall(struct kvm_vcpu *vcpu)
 {
 	vcpu_run(vcpu);
 	return process_ucall(vcpu);
@@ -513,7 +519,7 @@ KVM_ONE_VCPU_TEST_SUITE(user_msr);
 KVM_ONE_VCPU_TEST(user_msr, msr_filter_allow, guest_code_filter_allow)
 {
 	struct kvm_vm *vm = vcpu->vm;
-	uint64_t cmd;
+	u64 cmd;
 	int rc;
 
 	rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR);
@@ -682,6 +688,8 @@ KVM_ONE_VCPU_TEST(user_msr, msr_permission_bitmap, guest_code_permission_bitmap)
 		    "Expected ucall state to be UCALL_SYNC.");
 	vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_gs);
 	run_guest_then_process_rdmsr(vcpu, MSR_GS_BASE);
+
+	vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_allow);
 	run_guest_then_process_ucall_done(vcpu);
 }
 
@@ -724,7 +732,7 @@ static void run_msr_filter_flag_test(struct kvm_vm *vm)
 				.flags = KVM_MSR_FILTER_READ,
 				.nmsrs = 1,
 				.base = 0,
-				.bitmap = (uint8_t *)&deny_bits,
+				.bitmap = (u8 *)&deny_bits,
 			},
 		},
 	};
diff --git a/tools/testing/selftests/kvm/x86/vmx_apic_access_test.c b/tools/testing/selftests/kvm/x86/vmx_apic_access_test.c
index a81a24761aac..1720113eae79 100644
--- a/tools/testing/selftests/kvm/x86/vmx_apic_access_test.c
+++ b/tools/testing/selftests/kvm/x86/vmx_apic_access_test.c
@@ -38,7 +38,7 @@ static void l1_guest_code(struct vmx_pages *vmx_pages, unsigned long high_gpa)
 {
 #define L2_GUEST_STACK_SIZE 64
 	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
-	uint32_t control;
+	u32 control;
 
 	GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
 	GUEST_ASSERT(load_vmcs(vmx_pages));
@@ -72,7 +72,7 @@ static void l1_guest_code(struct vmx_pages *vmx_pages, unsigned long high_gpa)
 int main(int argc, char *argv[])
 {
 	unsigned long apic_access_addr = ~0ul;
-	vm_vaddr_t vmx_pages_gva;
+	gva_t vmx_pages_gva;
 	unsigned long high_gpa;
 	struct vmx_pages *vmx;
 	bool done = false;
diff --git a/tools/testing/selftests/kvm/x86/vmx_apicv_updates_test.c b/tools/testing/selftests/kvm/x86/vmx_apicv_updates_test.c
new file mode 100644
index 000000000000..80a4fd1e5bbb
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/vmx_apicv_updates_test.c
@@ -0,0 +1,155 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+#define GOOD_IPI_VECTOR 0xe0
+#define BAD_IPI_VECTOR 0xf0
+
+static volatile int good_ipis_received;
+
+static void good_ipi_handler(struct ex_regs *regs)
+{
+	good_ipis_received++;
+}
+
+static void bad_ipi_handler(struct ex_regs *regs)
+{
+	GUEST_FAIL("Received \"bad\" IPI; ICR MMIO write should have been ignored");
+}
+
+static void l2_guest_code(void)
+{
+	x2apic_enable();
+	vmcall();
+
+	xapic_enable();
+	xapic_write_reg(APIC_ID, 1 << 24);
+	vmcall();
+}
+
+static void l1_guest_code(struct vmx_pages *vmx_pages)
+{
+#define L2_GUEST_STACK_SIZE 64
+	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+	u32 control;
+
+	GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+	GUEST_ASSERT(load_vmcs(vmx_pages));
+
+	/* Prepare the VMCS for L2 execution. */
+	prepare_vmcs(vmx_pages, l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+	control = vmreadz(CPU_BASED_VM_EXEC_CONTROL);
+	control |= CPU_BASED_USE_MSR_BITMAPS;
+	vmwrite(CPU_BASED_VM_EXEC_CONTROL, control);
+
+	/* Modify APIC ID to coerce KVM into inhibiting APICv. */
+	xapic_enable();
+	xapic_write_reg(APIC_ID, 1 << 24);
+
+	/*
+	 * Generate+receive an IRQ without doing EOI to get an IRQ set in vISR
+	 * but not SVI.  APICv should be inhibited due to running with a
+	 * modified APIC ID.
+	 */
+	xapic_write_reg(APIC_ICR, APIC_DEST_SELF | APIC_DM_FIXED | GOOD_IPI_VECTOR);
+	GUEST_ASSERT_EQ(xapic_read_reg(APIC_ID), 1 << 24);
+
+	/* Enable IRQs and verify the IRQ was received. */
+	sti_nop();
+	GUEST_ASSERT_EQ(good_ipis_received, 1);
+
+	/*
+	 * Run L2 to switch to x2APIC mode, which in turn will uninhibit APICv,
+	 * as KVM should force the APIC ID back to its default.
+	 */
+	GUEST_ASSERT(!vmlaunch());
+	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+	vmwrite(GUEST_RIP, vmreadz(GUEST_RIP) + vmreadz(VM_EXIT_INSTRUCTION_LEN));
+	GUEST_ASSERT(rdmsr(MSR_IA32_APICBASE) & MSR_IA32_APICBASE_EXTD);
+
+	/*
+	 * Scribble the APIC access page to verify KVM disabled xAPIC
+	 * virtualization in vmcs01, and to verify that KVM flushes L1's TLB
+	 * when L2 switches back to accelerated xAPIC mode.
+	 */
+	xapic_write_reg(APIC_ICR2, 0xdeadbeefu);
+	xapic_write_reg(APIC_ICR, APIC_DEST_SELF | APIC_DM_FIXED | BAD_IPI_VECTOR);
+
+	/*
+	 * Verify the IRQ is still in-service and emit an EOI to verify KVM
+	 * propagates the highest vISR vector to SVI when APICv is activated
+	 * (and does so even if APICv was uninhibited while L2 was active).
+	 */
+	GUEST_ASSERT_EQ(x2apic_read_reg(APIC_ISR + APIC_VECTOR_TO_REG_OFFSET(GOOD_IPI_VECTOR)),
+			BIT(APIC_VECTOR_TO_BIT_NUMBER(GOOD_IPI_VECTOR)));
+	x2apic_write_reg(APIC_EOI, 0);
+	GUEST_ASSERT_EQ(x2apic_read_reg(APIC_ISR + APIC_VECTOR_TO_REG_OFFSET(GOOD_IPI_VECTOR)), 0);
+
+	/*
+	 * Run L2 one more time to switch back to xAPIC mode to verify that KVM
+	 * handles the x2APIC => xAPIC transition and inhibits APICv while L2
+	 * is active.
+	 */
+	GUEST_ASSERT(!vmresume());
+	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+	GUEST_ASSERT(!(rdmsr(MSR_IA32_APICBASE) & MSR_IA32_APICBASE_EXTD));
+
+	xapic_write_reg(APIC_ICR, APIC_DEST_SELF | APIC_DM_FIXED | GOOD_IPI_VECTOR);
+	/* Re-enable IRQs, as VM-Exit clears RFLAGS.IF. */
+	sti_nop();
+	GUEST_ASSERT_EQ(good_ipis_received, 2);
+
+	GUEST_ASSERT_EQ(xapic_read_reg(APIC_ISR + APIC_VECTOR_TO_REG_OFFSET(GOOD_IPI_VECTOR)),
+			BIT(APIC_VECTOR_TO_BIT_NUMBER(GOOD_IPI_VECTOR)));
+	xapic_write_reg(APIC_EOI, 0);
+	GUEST_ASSERT_EQ(xapic_read_reg(APIC_ISR + APIC_VECTOR_TO_REG_OFFSET(GOOD_IPI_VECTOR)), 0);
+	GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+	gva_t vmx_pages_gva;
+	struct vmx_pages *vmx;
+	struct kvm_vcpu *vcpu;
+	struct kvm_vm *vm;
+	struct ucall uc;
+
+	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+
+	vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+
+	vmx = vcpu_alloc_vmx(vm, &vmx_pages_gva);
+	prepare_virtualize_apic_accesses(vmx, vm);
+	vcpu_args_set(vcpu, 1, vmx_pages_gva);
+
+	virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
+	vm_install_exception_handler(vm, BAD_IPI_VECTOR, bad_ipi_handler);
+	vm_install_exception_handler(vm, GOOD_IPI_VECTOR, good_ipi_handler);
+
+	vcpu_run(vcpu);
+	TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+	switch (get_ucall(vcpu, &uc)) {
+	case UCALL_ABORT:
+		REPORT_GUEST_ASSERT(uc);
+		/* NOT REACHED */
+	case UCALL_DONE:
+		break;
+	default:
+		TEST_FAIL("Unexpected ucall %lu", uc.cmd);
+	}
+
+	/*
+	 * Verify at least two IRQs were injected.  Unfortunately, KVM counts
+	 * re-injected IRQs (e.g. if delivering the IRQ hits an EPT violation),
+	 * so being more precise isn't possible given the current stats.
+	 */
+	TEST_ASSERT(vcpu_get_stat(vcpu, irq_injections) >= 2,
+		    "Wanted at least 2 IRQ injections, got %lu\n",
+		    vcpu_get_stat(vcpu, irq_injections));
+
+	kvm_vm_free(vm);
+	return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c b/tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c
deleted file mode 100644
index fa512d033205..000000000000
--- a/tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c
+++ /dev/null
@@ -1,179 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * KVM dirty page logging test
- *
- * Copyright (C) 2018, Red Hat, Inc.
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <linux/bitmap.h>
-#include <linux/bitops.h>
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "vmx.h"
-
-/* The memory slot index to track dirty pages */
-#define TEST_MEM_SLOT_INDEX		1
-#define TEST_MEM_PAGES			3
-
-/* L1 guest test virtual memory offset */
-#define GUEST_TEST_MEM			0xc0000000
-
-/* L2 guest test virtual memory offset */
-#define NESTED_TEST_MEM1		0xc0001000
-#define NESTED_TEST_MEM2		0xc0002000
-
-static void l2_guest_code(u64 *a, u64 *b)
-{
-	READ_ONCE(*a);
-	WRITE_ONCE(*a, 1);
-	GUEST_SYNC(true);
-	GUEST_SYNC(false);
-
-	WRITE_ONCE(*b, 1);
-	GUEST_SYNC(true);
-	WRITE_ONCE(*b, 1);
-	GUEST_SYNC(true);
-	GUEST_SYNC(false);
-
-	/* Exit to L1 and never come back.  */
-	vmcall();
-}
-
-static void l2_guest_code_ept_enabled(void)
-{
-	l2_guest_code((u64 *)NESTED_TEST_MEM1, (u64 *)NESTED_TEST_MEM2);
-}
-
-static void l2_guest_code_ept_disabled(void)
-{
-	/* Access the same L1 GPAs as l2_guest_code_ept_enabled() */
-	l2_guest_code((u64 *)GUEST_TEST_MEM, (u64 *)GUEST_TEST_MEM);
-}
-
-void l1_guest_code(struct vmx_pages *vmx)
-{
-#define L2_GUEST_STACK_SIZE 64
-	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
-	void *l2_rip;
-
-	GUEST_ASSERT(vmx->vmcs_gpa);
-	GUEST_ASSERT(prepare_for_vmx_operation(vmx));
-	GUEST_ASSERT(load_vmcs(vmx));
-
-	if (vmx->eptp_gpa)
-		l2_rip = l2_guest_code_ept_enabled;
-	else
-		l2_rip = l2_guest_code_ept_disabled;
-
-	prepare_vmcs(vmx, l2_rip, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
-
-	GUEST_SYNC(false);
-	GUEST_ASSERT(!vmlaunch());
-	GUEST_SYNC(false);
-	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
-	GUEST_DONE();
-}
-
-static void test_vmx_dirty_log(bool enable_ept)
-{
-	vm_vaddr_t vmx_pages_gva = 0;
-	struct vmx_pages *vmx;
-	unsigned long *bmap;
-	uint64_t *host_test_mem;
-
-	struct kvm_vcpu *vcpu;
-	struct kvm_vm *vm;
-	struct ucall uc;
-	bool done = false;
-
-	pr_info("Nested EPT: %s\n", enable_ept ? "enabled" : "disabled");
-
-	/* Create VM */
-	vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
-	vmx = vcpu_alloc_vmx(vm, &vmx_pages_gva);
-	vcpu_args_set(vcpu, 1, vmx_pages_gva);
-
-	/* Add an extra memory slot for testing dirty logging */
-	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
-				    GUEST_TEST_MEM,
-				    TEST_MEM_SLOT_INDEX,
-				    TEST_MEM_PAGES,
-				    KVM_MEM_LOG_DIRTY_PAGES);
-
-	/*
-	 * Add an identity map for GVA range [0xc0000000, 0xc0002000).  This
-	 * affects both L1 and L2.  However...
-	 */
-	virt_map(vm, GUEST_TEST_MEM, GUEST_TEST_MEM, TEST_MEM_PAGES);
-
-	/*
-	 * ... pages in the L2 GPA range [0xc0001000, 0xc0003000) will map to
-	 * 0xc0000000.
-	 *
-	 * Note that prepare_eptp should be called only L1's GPA map is done,
-	 * meaning after the last call to virt_map.
-	 *
-	 * When EPT is disabled, the L2 guest code will still access the same L1
-	 * GPAs as the EPT enabled case.
-	 */
-	if (enable_ept) {
-		prepare_eptp(vmx, vm, 0);
-		nested_map_memslot(vmx, vm, 0);
-		nested_map(vmx, vm, NESTED_TEST_MEM1, GUEST_TEST_MEM, 4096);
-		nested_map(vmx, vm, NESTED_TEST_MEM2, GUEST_TEST_MEM, 4096);
-	}
-
-	bmap = bitmap_zalloc(TEST_MEM_PAGES);
-	host_test_mem = addr_gpa2hva(vm, GUEST_TEST_MEM);
-
-	while (!done) {
-		memset(host_test_mem, 0xaa, TEST_MEM_PAGES * 4096);
-		vcpu_run(vcpu);
-		TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
-		switch (get_ucall(vcpu, &uc)) {
-		case UCALL_ABORT:
-			REPORT_GUEST_ASSERT(uc);
-			/* NOT REACHED */
-		case UCALL_SYNC:
-			/*
-			 * The nested guest wrote at offset 0x1000 in the memslot, but the
-			 * dirty bitmap must be filled in according to L1 GPA, not L2.
-			 */
-			kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap);
-			if (uc.args[1]) {
-				TEST_ASSERT(test_bit(0, bmap), "Page 0 incorrectly reported clean");
-				TEST_ASSERT(host_test_mem[0] == 1, "Page 0 not written by guest");
-			} else {
-				TEST_ASSERT(!test_bit(0, bmap), "Page 0 incorrectly reported dirty");
-				TEST_ASSERT(host_test_mem[0] == 0xaaaaaaaaaaaaaaaaULL, "Page 0 written by guest");
-			}
-
-			TEST_ASSERT(!test_bit(1, bmap), "Page 1 incorrectly reported dirty");
-			TEST_ASSERT(host_test_mem[4096 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 1 written by guest");
-			TEST_ASSERT(!test_bit(2, bmap), "Page 2 incorrectly reported dirty");
-			TEST_ASSERT(host_test_mem[8192 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 2 written by guest");
-			break;
-		case UCALL_DONE:
-			done = true;
-			break;
-		default:
-			TEST_FAIL("Unknown ucall %lu", uc.cmd);
-		}
-	}
-}
-
-int main(int argc, char *argv[])
-{
-	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
-
-	test_vmx_dirty_log(/*enable_ept=*/false);
-
-	if (kvm_cpu_has_ept())
-		test_vmx_dirty_log(/*enable_ept=*/true);
-
-	return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86/vmx_exception_with_invalid_guest_state.c b/tools/testing/selftests/kvm/x86/vmx_exception_with_invalid_guest_state.c
index 3fd6eceab46f..2cae86d9d5e2 100644
--- a/tools/testing/selftests/kvm/x86/vmx_exception_with_invalid_guest_state.c
+++ b/tools/testing/selftests/kvm/x86/vmx_exception_with_invalid_guest_state.c
@@ -110,7 +110,7 @@ int main(int argc, char *argv[])
 	struct kvm_vm *vm;
 
 	TEST_REQUIRE(host_cpu_is_intel);
-	TEST_REQUIRE(!vm_is_unrestricted_guest(NULL));
+	TEST_REQUIRE(!kvm_is_unrestricted_guest_enabled());
 
 	vm = vm_create_with_one_vcpu(&vcpu, guest_code);
 	get_set_sigalrm_vcpu(vcpu);
diff --git a/tools/testing/selftests/kvm/x86/vmx_invalid_nested_guest_state.c b/tools/testing/selftests/kvm/x86/vmx_invalid_nested_guest_state.c
index a100ee5f0009..a2eaceed9ad5 100644
--- a/tools/testing/selftests/kvm/x86/vmx_invalid_nested_guest_state.c
+++ b/tools/testing/selftests/kvm/x86/vmx_invalid_nested_guest_state.c
@@ -52,7 +52,7 @@ static void l1_guest_code(struct vmx_pages *vmx_pages)
 
 int main(int argc, char *argv[])
 {
-	vm_vaddr_t vmx_pages_gva;
+	gva_t vmx_pages_gva;
 	struct kvm_sregs sregs;
 	struct kvm_vcpu *vcpu;
 	struct kvm_run *run;
diff --git a/tools/testing/selftests/kvm/x86/vmx_msrs_test.c b/tools/testing/selftests/kvm/x86/vmx_msrs_test.c
index 90720b6205f4..c1e8632a1bb6 100644
--- a/tools/testing/selftests/kvm/x86/vmx_msrs_test.c
+++ b/tools/testing/selftests/kvm/x86/vmx_msrs_test.c
@@ -12,11 +12,10 @@
 #include "kvm_util.h"
 #include "vmx.h"
 
-static void vmx_fixed1_msr_test(struct kvm_vcpu *vcpu, uint32_t msr_index,
-				  uint64_t mask)
+static void vmx_fixed1_msr_test(struct kvm_vcpu *vcpu, u32 msr_index, u64 mask)
 {
-	uint64_t val = vcpu_get_msr(vcpu, msr_index);
-	uint64_t bit;
+	u64 val = vcpu_get_msr(vcpu, msr_index);
+	u64 bit;
 
 	mask &= val;
 
@@ -26,11 +25,10 @@ static void vmx_fixed1_msr_test(struct kvm_vcpu *vcpu, uint32_t msr_index,
 	}
 }
 
-static void vmx_fixed0_msr_test(struct kvm_vcpu *vcpu, uint32_t msr_index,
-				uint64_t mask)
+static void vmx_fixed0_msr_test(struct kvm_vcpu *vcpu, u32 msr_index, u64 mask)
 {
-	uint64_t val = vcpu_get_msr(vcpu, msr_index);
-	uint64_t bit;
+	u64 val = vcpu_get_msr(vcpu, msr_index);
+	u64 bit;
 
 	mask = ~mask | val;
 
@@ -40,7 +38,7 @@ static void vmx_fixed0_msr_test(struct kvm_vcpu *vcpu, uint32_t msr_index,
 	}
 }
 
-static void vmx_fixed0and1_msr_test(struct kvm_vcpu *vcpu, uint32_t msr_index)
+static void vmx_fixed0and1_msr_test(struct kvm_vcpu *vcpu, u32 msr_index)
 {
 	vmx_fixed0_msr_test(vcpu, msr_index, GENMASK_ULL(31, 0));
 	vmx_fixed1_msr_test(vcpu, msr_index, GENMASK_ULL(63, 32));
@@ -68,10 +66,10 @@ static void vmx_save_restore_msrs_test(struct kvm_vcpu *vcpu)
 }
 
 static void __ia32_feature_control_msr_test(struct kvm_vcpu *vcpu,
-					    uint64_t msr_bit,
+					    u64 msr_bit,
 					    struct kvm_x86_cpu_feature feature)
 {
-	uint64_t val;
+	u64 val;
 
 	vcpu_clear_cpuid_feature(vcpu, feature);
 
@@ -90,7 +88,7 @@ static void __ia32_feature_control_msr_test(struct kvm_vcpu *vcpu,
 
 static void ia32_feature_control_msr_test(struct kvm_vcpu *vcpu)
 {
-	uint64_t supported_bits = FEAT_CTL_LOCKED |
+	u64 supported_bits = FEAT_CTL_LOCKED |
 				  FEAT_CTL_VMX_ENABLED_INSIDE_SMX |
 				  FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX |
 				  FEAT_CTL_SGX_LC_ENABLED |
diff --git a/tools/testing/selftests/kvm/x86/vmx_nested_la57_state_test.c b/tools/testing/selftests/kvm/x86/vmx_nested_la57_state_test.c
new file mode 100644
index 000000000000..f13dee317383
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/vmx_nested_la57_state_test.c
@@ -0,0 +1,132 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2025, Google LLC.
+ *
+ * Test KVM's ability to save and restore nested state when the L1 guest
+ * is using 5-level paging and the L2 guest is using 4-level paging.
+ *
+ * This test would have failed prior to commit 9245fd6b8531 ("KVM: x86:
+ * model canonical checks more precisely").
+ */
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+#define LA57_GS_BASE 0xff2bc0311fb00000ull
+
+static void l2_guest_code(void)
+{
+	/*
+	 * Sync with L0 to trigger save/restore.  After
+	 * resuming, execute VMCALL to exit back to L1.
+	 */
+	GUEST_SYNC(1);
+	vmcall();
+}
+
+static void l1_guest_code(struct vmx_pages *vmx_pages)
+{
+#define L2_GUEST_STACK_SIZE 64
+	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+	u64 guest_cr4;
+	gpa_t pml5_pa, pml4_pa;
+	u64 *pml5;
+	u64 exit_reason;
+
+	/* Set GS_BASE to a value that is only canonical with LA57. */
+	wrmsr(MSR_GS_BASE, LA57_GS_BASE);
+	GUEST_ASSERT(rdmsr(MSR_GS_BASE) == LA57_GS_BASE);
+
+	GUEST_ASSERT(vmx_pages->vmcs_gpa);
+	GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+	GUEST_ASSERT(load_vmcs(vmx_pages));
+
+	prepare_vmcs(vmx_pages, l2_guest_code,
+		     &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+	/*
+	 * Set up L2 with a 4-level page table by pointing its CR3 to
+	 * L1's first PML4 table and clearing CR4.LA57. This creates
+	 * the CR4.LA57 mismatch that exercises the bug.
+	 */
+	pml5_pa = get_cr3() & PHYSICAL_PAGE_MASK;
+	pml5 = (u64 *)pml5_pa;
+	pml4_pa = pml5[0] & PHYSICAL_PAGE_MASK;
+	vmwrite(GUEST_CR3, pml4_pa);
+
+	guest_cr4 = vmreadz(GUEST_CR4);
+	guest_cr4 &= ~X86_CR4_LA57;
+	vmwrite(GUEST_CR4, guest_cr4);
+
+	GUEST_ASSERT(!vmlaunch());
+
+	exit_reason = vmreadz(VM_EXIT_REASON);
+	GUEST_ASSERT(exit_reason == EXIT_REASON_VMCALL);
+}
+
+void guest_code(struct vmx_pages *vmx_pages)
+{
+	l1_guest_code(vmx_pages);
+	GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+	gva_t vmx_pages_gva = 0;
+	struct kvm_vm *vm;
+	struct kvm_vcpu *vcpu;
+	struct kvm_x86_state *state;
+	struct ucall uc;
+	int stage;
+
+	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_LA57));
+	TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE));
+
+	vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+	/*
+	 * L1 needs to read its own PML5 table to set up L2. Identity map
+	 * the PML5 table to facilitate this.
+	 */
+	virt_map(vm, vm->mmu.pgd, vm->mmu.pgd, 1);
+
+	vcpu_alloc_vmx(vm, &vmx_pages_gva);
+	vcpu_args_set(vcpu, 1, vmx_pages_gva);
+
+	for (stage = 1;; stage++) {
+		vcpu_run(vcpu);
+		TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+		switch (get_ucall(vcpu, &uc)) {
+		case UCALL_ABORT:
+			REPORT_GUEST_ASSERT(uc);
+			/* NOT REACHED */
+		case UCALL_SYNC:
+			break;
+		case UCALL_DONE:
+			goto done;
+		default:
+			TEST_FAIL("Unknown ucall %lu", uc.cmd);
+		}
+
+		TEST_ASSERT(uc.args[1] == stage,
+			    "Expected stage %d, got stage %lu", stage, (ulong)uc.args[1]);
+		if (stage == 1) {
+			pr_info("L2 is active; performing save/restore.\n");
+			state = vcpu_save_state(vcpu);
+
+			kvm_vm_release(vm);
+
+			/* Restore state in a new VM. */
+			vcpu = vm_recreate_with_one_vcpu(vm);
+			vcpu_load_state(vcpu, state);
+			kvm_x86_state_cleanup(state);
+		}
+	}
+
+done:
+	kvm_vm_free(vm);
+	return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/vmx_pmu_caps_test.c b/tools/testing/selftests/kvm/x86/vmx_pmu_caps_test.c
index a1f5ff45d518..d004108dbdc6 100644
--- a/tools/testing/selftests/kvm/x86/vmx_pmu_caps_test.c
+++ b/tools/testing/selftests/kvm/x86/vmx_pmu_caps_test.c
@@ -29,7 +29,7 @@ static union perf_capabilities {
 		u64 pebs_baseline:1;
 		u64	perf_metrics:1;
 		u64	pebs_output_pt_available:1;
-		u64	anythread_deprecated:1;
+		u64	pebs_timing_info:1;
 	};
 	u64	capabilities;
 } host_cap;
@@ -44,6 +44,7 @@ static const union perf_capabilities immutable_caps = {
 	.pebs_arch_reg = 1,
 	.pebs_format = -1,
 	.pebs_baseline = 1,
+	.pebs_timing_info = 1,
 };
 
 static const union perf_capabilities format_caps = {
@@ -51,16 +52,16 @@ static const union perf_capabilities format_caps = {
 	.pebs_format = -1,
 };
 
-static void guest_test_perf_capabilities_gp(uint64_t val)
+static void guest_test_perf_capabilities_gp(u64 val)
 {
-	uint8_t vector = wrmsr_safe(MSR_IA32_PERF_CAPABILITIES, val);
+	u8 vector = wrmsr_safe(MSR_IA32_PERF_CAPABILITIES, val);
 
 	__GUEST_ASSERT(vector == GP_VECTOR,
-		       "Expected #GP for value '0x%lx', got vector '0x%x'",
-		       val, vector);
+		       "Expected #GP for value '0x%lx', got %s",
+		       val, ex_str(vector));
 }
 
-static void guest_code(uint64_t current_val)
+static void guest_code(u64 current_val)
 {
 	int i;
 
@@ -128,7 +129,7 @@ KVM_ONE_VCPU_TEST(vmx_pmu_caps, basic_perf_capabilities, guest_code)
 
 KVM_ONE_VCPU_TEST(vmx_pmu_caps, fungible_perf_capabilities, guest_code)
 {
-	const uint64_t fungible_caps = host_cap.capabilities & ~immutable_caps.capabilities;
+	const u64 fungible_caps = host_cap.capabilities & ~immutable_caps.capabilities;
 	int bit;
 
 	for_each_set_bit(bit, &fungible_caps, 64) {
@@ -147,7 +148,7 @@ KVM_ONE_VCPU_TEST(vmx_pmu_caps, fungible_perf_capabilities, guest_code)
  */
 KVM_ONE_VCPU_TEST(vmx_pmu_caps, immutable_perf_capabilities, guest_code)
 {
-	const uint64_t reserved_caps = (~host_cap.capabilities |
+	const u64 reserved_caps = (~host_cap.capabilities |
 					immutable_caps.capabilities) &
 				       ~format_caps.capabilities;
 	union perf_capabilities val = host_cap;
@@ -209,7 +210,7 @@ KVM_ONE_VCPU_TEST(vmx_pmu_caps, lbr_perf_capabilities, guest_code)
 
 KVM_ONE_VCPU_TEST(vmx_pmu_caps, perf_capabilities_unsupported, guest_code)
 {
-	uint64_t val;
+	u64 val;
 	int i, r;
 
 	vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
diff --git a/tools/testing/selftests/kvm/x86/vmx_preemption_timer_test.c b/tools/testing/selftests/kvm/x86/vmx_preemption_timer_test.c
index 00dd2ac07a61..1b7b6ba23de7 100644
--- a/tools/testing/selftests/kvm/x86/vmx_preemption_timer_test.c
+++ b/tools/testing/selftests/kvm/x86/vmx_preemption_timer_test.c
@@ -152,7 +152,7 @@ void guest_code(struct vmx_pages *vmx_pages)
 
 int main(int argc, char *argv[])
 {
-	vm_vaddr_t vmx_pages_gva = 0;
+	gva_t vmx_pages_gva = 0;
 
 	struct kvm_regs regs1, regs2;
 	struct kvm_vm *vm;
diff --git a/tools/testing/selftests/kvm/x86/xapic_ipi_test.c b/tools/testing/selftests/kvm/x86/xapic_ipi_test.c
index a76078a08ff8..39ce9a9369f5 100644
--- a/tools/testing/selftests/kvm/x86/xapic_ipi_test.c
+++ b/tools/testing/selftests/kvm/x86/xapic_ipi_test.c
@@ -48,20 +48,20 @@
  * Incremented in the IPI handler. Provides evidence to the sender that the IPI
  * arrived at the destination
  */
-static volatile uint64_t ipis_rcvd;
+static volatile u64 ipis_rcvd;
 
 /* Data struct shared between host main thread and vCPUs */
 struct test_data_page {
-	uint32_t halter_apic_id;
-	volatile uint64_t hlt_count;
-	volatile uint64_t wake_count;
-	uint64_t ipis_sent;
-	uint64_t migrations_attempted;
-	uint64_t migrations_completed;
-	uint32_t icr;
-	uint32_t icr2;
-	uint32_t halter_tpr;
-	uint32_t halter_ppr;
+	u32 halter_apic_id;
+	volatile u64 hlt_count;
+	volatile u64 wake_count;
+	u64 ipis_sent;
+	u64 migrations_attempted;
+	u64 migrations_completed;
+	u32 icr;
+	u32 icr2;
+	u32 halter_tpr;
+	u32 halter_ppr;
 
 	/*
 	 *  Record local version register as a cross-check that APIC access
@@ -69,19 +69,19 @@ struct test_data_page {
 	 *  arch/x86/kvm/lapic.c). If test is failing, check that values match
 	 *  to determine whether APIC access exits are working.
 	 */
-	uint32_t halter_lvr;
+	u32 halter_lvr;
 };
 
 struct thread_params {
 	struct test_data_page *data;
 	struct kvm_vcpu *vcpu;
-	uint64_t *pipis_rcvd; /* host address of ipis_rcvd global */
+	u64 *pipis_rcvd; /* host address of ipis_rcvd global */
 };
 
 void verify_apic_base_addr(void)
 {
-	uint64_t msr = rdmsr(MSR_IA32_APICBASE);
-	uint64_t base = GET_APIC_BASE(msr);
+	u64 msr = rdmsr(MSR_IA32_APICBASE);
+	u64 base = GET_APIC_BASE(msr);
 
 	GUEST_ASSERT(base == APIC_DEFAULT_GPA);
 }
@@ -106,7 +106,8 @@ static void halter_guest_code(struct test_data_page *data)
 		data->halter_tpr = xapic_read_reg(APIC_TASKPRI);
 		data->halter_ppr = xapic_read_reg(APIC_PROCPRI);
 		data->hlt_count++;
-		asm volatile("sti; hlt; cli");
+		safe_halt();
+		cli();
 		data->wake_count++;
 	}
 }
@@ -124,12 +125,12 @@ static void guest_ipi_handler(struct ex_regs *regs)
 
 static void sender_guest_code(struct test_data_page *data)
 {
-	uint64_t last_wake_count;
-	uint64_t last_hlt_count;
-	uint64_t last_ipis_rcvd_count;
-	uint32_t icr_val;
-	uint32_t icr2_val;
-	uint64_t tsc_start;
+	u64 last_wake_count;
+	u64 last_hlt_count;
+	u64 last_ipis_rcvd_count;
+	u32 icr_val;
+	u32 icr2_val;
+	u64 tsc_start;
 
 	verify_apic_base_addr();
 	xapic_enable();
@@ -247,7 +248,7 @@ static void cancel_join_vcpu_thread(pthread_t thread, struct kvm_vcpu *vcpu)
 }
 
 void do_migrations(struct test_data_page *data, int run_secs, int delay_usecs,
-		   uint64_t *pipis_rcvd)
+		   u64 *pipis_rcvd)
 {
 	long pages_not_moved;
 	unsigned long nodemask = 0;
@@ -255,20 +256,19 @@ void do_migrations(struct test_data_page *data, int run_secs, int delay_usecs,
 	int nodes = 0;
 	time_t start_time, last_update, now;
 	time_t interval_secs = 1;
-	int i, r;
+	int i;
 	int from, to;
 	unsigned long bit;
-	uint64_t hlt_count;
-	uint64_t wake_count;
-	uint64_t ipis_sent;
+	u64 hlt_count;
+	u64 wake_count;
+	u64 ipis_sent;
 
 	fprintf(stderr, "Calling migrate_pages every %d microseconds\n",
 		delay_usecs);
 
 	/* Get set of first 64 numa nodes available */
-	r = get_mempolicy(NULL, &nodemask, sizeof(nodemask) * 8,
+	kvm_get_mempolicy(NULL, &nodemask, sizeof(nodemask) * 8,
 			  0, MPOL_F_MEMS_ALLOWED);
-	TEST_ASSERT(r == 0, "get_mempolicy failed errno=%d", errno);
 
 	fprintf(stderr, "Numa nodes found amongst first %lu possible nodes "
 		"(each 1-bit indicates node is present): %#lx\n",
@@ -393,12 +393,12 @@ int main(int argc, char *argv[])
 	int run_secs = 0;
 	int delay_usecs = 0;
 	struct test_data_page *data;
-	vm_vaddr_t test_data_page_vaddr;
+	gva_t test_data_page_gva;
 	bool migrate = false;
 	pthread_t threads[2];
 	struct thread_params params[2];
 	struct kvm_vm *vm;
-	uint64_t *pipis_rcvd;
+	u64 *pipis_rcvd;
 
 	get_cmdline_args(argc, argv, &run_secs, &migrate, &delay_usecs);
 	if (run_secs <= 0)
@@ -414,16 +414,16 @@ int main(int argc, char *argv[])
 
 	params[1].vcpu = vm_vcpu_add(vm, 1, sender_guest_code);
 
-	test_data_page_vaddr = vm_vaddr_alloc_page(vm);
-	data = addr_gva2hva(vm, test_data_page_vaddr);
+	test_data_page_gva = vm_alloc_page(vm);
+	data = addr_gva2hva(vm, test_data_page_gva);
 	memset(data, 0, sizeof(*data));
 	params[0].data = data;
 	params[1].data = data;
 
-	vcpu_args_set(params[0].vcpu, 1, test_data_page_vaddr);
-	vcpu_args_set(params[1].vcpu, 1, test_data_page_vaddr);
+	vcpu_args_set(params[0].vcpu, 1, test_data_page_gva);
+	vcpu_args_set(params[1].vcpu, 1, test_data_page_gva);
 
-	pipis_rcvd = (uint64_t *)addr_gva2hva(vm, (uint64_t)&ipis_rcvd);
+	pipis_rcvd = (u64 *)addr_gva2hva(vm, (u64)&ipis_rcvd);
 	params[0].pipis_rcvd = pipis_rcvd;
 	params[1].pipis_rcvd = pipis_rcvd;
 
@@ -465,6 +465,19 @@ int main(int argc, char *argv[])
 	cancel_join_vcpu_thread(threads[0], params[0].vcpu);
 	cancel_join_vcpu_thread(threads[1], params[1].vcpu);
 
+	/*
+	 * If the host support Idle HLT, i.e. KVM *might* be using Idle HLT,
+	 * then the number of HLT exits may be less than the number of HLTs
+	 * that were executed, as Idle HLT elides the exit if the vCPU has an
+	 * unmasked, pending IRQ (or NMI).
+	 */
+	if (this_cpu_has(X86_FEATURE_IDLE_HLT))
+		TEST_ASSERT(data->hlt_count >= vcpu_get_stat(params[0].vcpu, halt_exits),
+			    "HLT insns = %lu, HLT exits = %lu",
+			    data->hlt_count, vcpu_get_stat(params[0].vcpu, halt_exits));
+	else
+		TEST_ASSERT_EQ(data->hlt_count, vcpu_get_stat(params[0].vcpu, halt_exits));
+
 	fprintf(stderr,
 		"Test successful after running for %d seconds.\n"
 		"Sending vCPU sent %lu IPIs to halting vCPU\n"
diff --git a/tools/testing/selftests/kvm/x86/xapic_state_test.c b/tools/testing/selftests/kvm/x86/xapic_state_test.c
index 88bcca188799..637bb90c1d93 100644
--- a/tools/testing/selftests/kvm/x86/xapic_state_test.c
+++ b/tools/testing/selftests/kvm/x86/xapic_state_test.c
@@ -18,12 +18,12 @@ struct xapic_vcpu {
 
 static void xapic_guest_code(void)
 {
-	asm volatile("cli");
+	cli();
 
 	xapic_enable();
 
 	while (1) {
-		uint64_t val = (u64)xapic_read_reg(APIC_IRR) |
+		u64 val = (u64)xapic_read_reg(APIC_IRR) |
 			       (u64)xapic_read_reg(APIC_IRR + 0x10) << 32;
 
 		xapic_write_reg(APIC_ICR2, val >> 32);
@@ -38,12 +38,12 @@ static void xapic_guest_code(void)
 
 static void x2apic_guest_code(void)
 {
-	asm volatile("cli");
+	cli();
 
 	x2apic_enable();
 
 	do {
-		uint64_t val = x2apic_read_reg(APIC_IRR) |
+		u64 val = x2apic_read_reg(APIC_IRR) |
 			       x2apic_read_reg(APIC_IRR + 0x10) << 32;
 
 		if (val & X2APIC_RSVD_BITS_MASK) {
@@ -56,12 +56,12 @@ static void x2apic_guest_code(void)
 	} while (1);
 }
 
-static void ____test_icr(struct xapic_vcpu *x, uint64_t val)
+static void ____test_icr(struct xapic_vcpu *x, u64 val)
 {
 	struct kvm_vcpu *vcpu = x->vcpu;
 	struct kvm_lapic_state xapic;
 	struct ucall uc;
-	uint64_t icr;
+	u64 icr;
 
 	/*
 	 * Tell the guest what ICR value to write.  Use the IRR to pass info,
@@ -93,7 +93,7 @@ static void ____test_icr(struct xapic_vcpu *x, uint64_t val)
 		TEST_ASSERT_EQ(icr, val & ~APIC_ICR_BUSY);
 }
 
-static void __test_icr(struct xapic_vcpu *x, uint64_t val)
+static void __test_icr(struct xapic_vcpu *x, u64 val)
 {
 	/*
 	 * The BUSY bit is reserved on both AMD and Intel, but only AMD treats
@@ -109,7 +109,7 @@ static void __test_icr(struct xapic_vcpu *x, uint64_t val)
 static void test_icr(struct xapic_vcpu *x)
 {
 	struct kvm_vcpu *vcpu = x->vcpu;
-	uint64_t icr, i, j;
+	u64 icr, i, j;
 
 	icr = APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_FIXED;
 	for (i = 0; i <= 0xff; i++)
@@ -120,8 +120,8 @@ static void test_icr(struct xapic_vcpu *x)
 		__test_icr(x, icr | i);
 
 	/*
-	 * Send all flavors of IPIs to non-existent vCPUs.  TODO: use number of
-	 * vCPUs, not vcpu.id + 1.  Arbitrarily use vector 0xff.
+	 * Send all flavors of IPIs to non-existent vCPUs. Arbitrarily use
+	 * vector 0xff.
 	 */
 	icr = APIC_INT_ASSERT | 0xff;
 	for (i = 0; i < 0xff; i++) {
@@ -142,9 +142,9 @@ static void test_icr(struct xapic_vcpu *x)
 	__test_icr(x, -1ull & ~APIC_DM_FIXED_MASK);
 }
 
-static void __test_apic_id(struct kvm_vcpu *vcpu, uint64_t apic_base)
+static void __test_apic_id(struct kvm_vcpu *vcpu, u64 apic_base)
 {
-	uint32_t apic_id, expected;
+	u32 apic_id, expected;
 	struct kvm_lapic_state xapic;
 
 	vcpu_set_msr(vcpu, MSR_IA32_APICBASE, apic_base);
@@ -170,9 +170,9 @@ static void __test_apic_id(struct kvm_vcpu *vcpu, uint64_t apic_base)
  */
 static void test_apic_id(void)
 {
-	const uint32_t NR_VCPUS = 3;
+	const u32 NR_VCPUS = 3;
 	struct kvm_vcpu *vcpus[NR_VCPUS];
-	uint64_t apic_base;
+	u64 apic_base;
 	struct kvm_vm *vm;
 	int i;
 
@@ -248,7 +248,7 @@ int main(int argc, char *argv[])
 	 * drops writes, AMD does not).  Account for the errata when checking
 	 * that KVM reads back what was written.
 	 */
-	x.has_xavic_errata = host_cpu_is_amd &&
+	x.has_xavic_errata = host_cpu_is_amd_compatible &&
 			     get_kvm_amd_param_bool("avic");
 
 	vcpu_clear_cpuid_feature(x.vcpu, X86_FEATURE_X2APIC);
diff --git a/tools/testing/selftests/kvm/x86/xapic_tpr_test.c b/tools/testing/selftests/kvm/x86/xapic_tpr_test.c
new file mode 100644
index 000000000000..ab25db2235d5
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/xapic_tpr_test.c
@@ -0,0 +1,276 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <fcntl.h>
+#include <stdatomic.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <unistd.h>
+
+#include "apic.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "test_util.h"
+
+static bool is_x2apic;
+
+#define IRQ_VECTOR 0x20
+
+/* See also the comment at similar assertion in memslot_perf_test.c */
+static_assert(ATOMIC_INT_LOCK_FREE == 2, "atomic int is not lockless");
+
+static atomic_uint tpr_guest_irq_sync_val;
+
+static void tpr_guest_irq_sync_flag_reset(void)
+{
+	atomic_store_explicit(&tpr_guest_irq_sync_val, 0,
+			      memory_order_release);
+}
+
+static unsigned int tpr_guest_irq_sync_val_get(void)
+{
+	return atomic_load_explicit(&tpr_guest_irq_sync_val,
+				    memory_order_acquire);
+}
+
+static void tpr_guest_irq_sync_val_inc(void)
+{
+	atomic_fetch_add_explicit(&tpr_guest_irq_sync_val, 1,
+				  memory_order_acq_rel);
+}
+
+static void tpr_guest_irq_handler_xapic(struct ex_regs *regs)
+{
+	tpr_guest_irq_sync_val_inc();
+
+	xapic_write_reg(APIC_EOI, 0);
+}
+
+static void tpr_guest_irq_handler_x2apic(struct ex_regs *regs)
+{
+	tpr_guest_irq_sync_val_inc();
+
+	x2apic_write_reg(APIC_EOI, 0);
+}
+
+static void tpr_guest_irq_queue(void)
+{
+	if (is_x2apic) {
+		x2apic_write_reg(APIC_SELF_IPI, IRQ_VECTOR);
+	} else {
+		u32 icr, icr2;
+
+		icr = APIC_DEST_SELF | APIC_DEST_PHYSICAL | APIC_DM_FIXED |
+			IRQ_VECTOR;
+		icr2 = 0;
+
+		xapic_write_reg(APIC_ICR2, icr2);
+		xapic_write_reg(APIC_ICR, icr);
+	}
+}
+
+static u8 tpr_guest_tpr_get(void)
+{
+	u32 taskpri;
+
+	if (is_x2apic)
+		taskpri = x2apic_read_reg(APIC_TASKPRI);
+	else
+		taskpri = xapic_read_reg(APIC_TASKPRI);
+
+	return GET_APIC_PRI(taskpri);
+}
+
+static u8 tpr_guest_ppr_get(void)
+{
+	u32 procpri;
+
+	if (is_x2apic)
+		procpri = x2apic_read_reg(APIC_PROCPRI);
+	else
+		procpri = xapic_read_reg(APIC_PROCPRI);
+
+	return GET_APIC_PRI(procpri);
+}
+
+static u8 tpr_guest_cr8_get(void)
+{
+	u64 cr8;
+
+	asm volatile ("mov %%cr8, %[cr8]\n\t" : [cr8] "=r"(cr8));
+
+	return cr8 & GENMASK(3, 0);
+}
+
+static void tpr_guest_check_tpr_ppr_cr8_equal(void)
+{
+	u8 tpr;
+
+	tpr = tpr_guest_tpr_get();
+
+	GUEST_ASSERT_EQ(tpr_guest_ppr_get(), tpr);
+	GUEST_ASSERT_EQ(tpr_guest_cr8_get(), tpr);
+}
+
+static void tpr_guest_code(void)
+{
+	cli();
+
+	if (is_x2apic)
+		x2apic_enable();
+	else
+		xapic_enable();
+
+	GUEST_ASSERT_EQ(tpr_guest_tpr_get(), 0);
+	tpr_guest_check_tpr_ppr_cr8_equal();
+
+	tpr_guest_irq_queue();
+
+	/* TPR = 0 but IRQ masked by IF=0, should not fire */
+	udelay(1000);
+	GUEST_ASSERT_EQ(tpr_guest_irq_sync_val_get(), 0);
+
+	sti();
+
+	/* IF=1 now, IRQ should fire */
+	while (tpr_guest_irq_sync_val_get() == 0)
+		cpu_relax();
+	GUEST_ASSERT_EQ(tpr_guest_irq_sync_val_get(), 1);
+
+	GUEST_SYNC(true);
+	tpr_guest_check_tpr_ppr_cr8_equal();
+
+	tpr_guest_irq_queue();
+
+	/* IRQ masked by barely high enough TPR now, should not fire */
+	udelay(1000);
+	GUEST_ASSERT_EQ(tpr_guest_irq_sync_val_get(), 1);
+
+	GUEST_SYNC(false);
+	tpr_guest_check_tpr_ppr_cr8_equal();
+
+	/* TPR barely low enough now to unmask IRQ, should fire */
+	while (tpr_guest_irq_sync_val_get() == 1)
+		cpu_relax();
+	GUEST_ASSERT_EQ(tpr_guest_irq_sync_val_get(), 2);
+
+	GUEST_DONE();
+}
+
+static u8 lapic_tpr_get(struct kvm_lapic_state *xapic)
+{
+	return GET_APIC_PRI(*((u32 *)&xapic->regs[APIC_TASKPRI]));
+}
+
+static void lapic_tpr_set(struct kvm_lapic_state *xapic, u8 val)
+{
+	u32 *taskpri = (u32 *)&xapic->regs[APIC_TASKPRI];
+
+	*taskpri = SET_APIC_PRI(*taskpri, val);
+}
+
+static u8 sregs_tpr(struct kvm_sregs *sregs)
+{
+	return sregs->cr8 & GENMASK(3, 0);
+}
+
+static void test_tpr_check_tpr_zero(struct kvm_vcpu *vcpu)
+{
+	struct kvm_lapic_state xapic;
+
+	vcpu_ioctl(vcpu, KVM_GET_LAPIC, &xapic);
+
+	TEST_ASSERT_EQ(lapic_tpr_get(&xapic), 0);
+}
+
+static void test_tpr_check_tpr_cr8_equal(struct kvm_vcpu *vcpu)
+{
+	struct kvm_sregs sregs;
+	struct kvm_lapic_state xapic;
+
+	vcpu_sregs_get(vcpu, &sregs);
+	vcpu_ioctl(vcpu, KVM_GET_LAPIC, &xapic);
+
+	TEST_ASSERT_EQ(sregs_tpr(&sregs), lapic_tpr_get(&xapic));
+}
+
+static void test_tpr_set_tpr_for_irq(struct kvm_vcpu *vcpu, bool mask)
+{
+	struct kvm_lapic_state xapic;
+	u8 tpr;
+
+	static_assert(IRQ_VECTOR >= 16, "invalid IRQ vector number");
+	tpr = IRQ_VECTOR / 16;
+	if (!mask)
+		tpr--;
+
+	vcpu_ioctl(vcpu, KVM_GET_LAPIC, &xapic);
+	lapic_tpr_set(&xapic, tpr);
+	vcpu_ioctl(vcpu, KVM_SET_LAPIC, &xapic);
+}
+
+static void test_tpr(bool __is_x2apic)
+{
+	struct kvm_vcpu *vcpu;
+	struct kvm_vm *vm;
+	bool done = false;
+
+	is_x2apic = __is_x2apic;
+
+	vm = vm_create_with_one_vcpu(&vcpu, tpr_guest_code);
+	if (is_x2apic) {
+		vm_install_exception_handler(vm, IRQ_VECTOR,
+					     tpr_guest_irq_handler_x2apic);
+	} else {
+		vm_install_exception_handler(vm, IRQ_VECTOR,
+					     tpr_guest_irq_handler_xapic);
+		vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_X2APIC);
+		virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
+	}
+
+	sync_global_to_guest(vcpu->vm, is_x2apic);
+
+	/* According to the SDM/APM the TPR value at reset is 0 */
+	test_tpr_check_tpr_zero(vcpu);
+	test_tpr_check_tpr_cr8_equal(vcpu);
+
+	tpr_guest_irq_sync_flag_reset();
+	sync_global_to_guest(vcpu->vm, tpr_guest_irq_sync_val);
+
+	while (!done) {
+		struct ucall uc;
+
+		alarm(2);
+		vcpu_run(vcpu);
+		alarm(0);
+
+		switch (get_ucall(vcpu, &uc)) {
+		case UCALL_ABORT:
+			REPORT_GUEST_ASSERT(uc);
+			break;
+		case UCALL_DONE:
+			test_tpr_check_tpr_cr8_equal(vcpu);
+			done = true;
+			break;
+		case UCALL_SYNC:
+			test_tpr_check_tpr_cr8_equal(vcpu);
+			test_tpr_set_tpr_for_irq(vcpu, uc.args[1]);
+			break;
+		default:
+			TEST_FAIL("Unknown ucall result 0x%lx", uc.cmd);
+			break;
+		}
+	}
+	kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+	/*
+	 * Use separate VMs for the xAPIC and x2APIC tests so that x2APIC can
+	 * be fully hidden from the guest.  KVM disallows changing CPUID after
+	 * KVM_RUN and AVIC is disabled if _any_ vCPU is allowed to use x2APIC.
+	 */
+	test_tpr(false);
+	test_tpr(true);
+}
diff --git a/tools/testing/selftests/kvm/x86/xcr0_cpuid_test.c b/tools/testing/selftests/kvm/x86/xcr0_cpuid_test.c
index c8a5c5e51661..40dc9e6b3fad 100644
--- a/tools/testing/selftests/kvm/x86/xcr0_cpuid_test.c
+++ b/tools/testing/selftests/kvm/x86/xcr0_cpuid_test.c
@@ -21,7 +21,7 @@
  */
 #define ASSERT_XFEATURE_DEPENDENCIES(supported_xcr0, xfeatures, dependencies)		\
 do {											\
-	uint64_t __supported = (supported_xcr0) & ((xfeatures) | (dependencies));	\
+	u64 __supported = (supported_xcr0) & ((xfeatures) | (dependencies));	\
 											\
 	__GUEST_ASSERT((__supported & (xfeatures)) != (xfeatures) ||			\
 		       __supported == ((xfeatures) | (dependencies)),			\
@@ -39,7 +39,7 @@ do {											\
  */
 #define ASSERT_ALL_OR_NONE_XFEATURE(supported_xcr0, xfeatures)		\
 do {									\
-	uint64_t __supported = (supported_xcr0) & (xfeatures);		\
+	u64 __supported = (supported_xcr0) & (xfeatures);		\
 									\
 	__GUEST_ASSERT(!__supported || __supported == (xfeatures),	\
 		       "supported = 0x%lx, xfeatures = 0x%llx",		\
@@ -48,8 +48,8 @@ do {									\
 
 static void guest_code(void)
 {
-	uint64_t initial_xcr0;
-	uint64_t supported_xcr0;
+	u64 initial_xcr0;
+	u64 supported_xcr0;
 	int i, vector;
 
 	set_cr4(get_cr4() | X86_CR4_OSXSAVE);
@@ -81,13 +81,13 @@ static void guest_code(void)
 
 	vector = xsetbv_safe(0, XFEATURE_MASK_FP);
 	__GUEST_ASSERT(!vector,
-		       "Expected success on XSETBV(FP), got vector '0x%x'",
-		       vector);
+		       "Expected success on XSETBV(FP), got %s",
+		       ex_str(vector));
 
 	vector = xsetbv_safe(0, supported_xcr0);
 	__GUEST_ASSERT(!vector,
-		       "Expected success on XSETBV(0x%lx), got vector '0x%x'",
-		       supported_xcr0, vector);
+		       "Expected success on XSETBV(0x%lx), got %s",
+		       supported_xcr0, ex_str(vector));
 
 	for (i = 0; i < 64; i++) {
 		if (supported_xcr0 & BIT_ULL(i))
@@ -95,8 +95,8 @@ static void guest_code(void)
 
 		vector = xsetbv_safe(0, supported_xcr0 | BIT_ULL(i));
 		__GUEST_ASSERT(vector == GP_VECTOR,
-			       "Expected #GP on XSETBV(0x%llx), supported XCR0 = %lx, got vector '0x%x'",
-			       BIT_ULL(i), supported_xcr0, vector);
+			       "Expected #GP on XSETBV(0x%llx), supported XCR0 = %lx, got %s",
+			       BIT_ULL(i), supported_xcr0, ex_str(vector));
 	}
 
 	GUEST_DONE();
diff --git a/tools/testing/selftests/kvm/x86/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86/xen_shinfo_test.c
index a59b3c799bb2..5076f6a75455 100644
--- a/tools/testing/selftests/kvm/x86/xen_shinfo_test.c
+++ b/tools/testing/selftests/kvm/x86/xen_shinfo_test.c
@@ -116,15 +116,15 @@ struct pvclock_wall_clock {
 } __attribute__((__packed__));
 
 struct vcpu_runstate_info {
-	uint32_t state;
-	uint64_t state_entry_time;
-	uint64_t time[5]; /* Extra field for overrun check */
+	u32 state;
+	u64 state_entry_time;
+	u64 time[5]; /* Extra field for overrun check */
 };
 
 struct compat_vcpu_runstate_info {
-	uint32_t state;
-	uint64_t state_entry_time;
-	uint64_t time[5];
+	u32 state;
+	u64 state_entry_time;
+	u64 time[5];
 } __attribute__((__packed__));
 
 struct arch_vcpu_info {
@@ -133,8 +133,8 @@ struct arch_vcpu_info {
 };
 
 struct vcpu_info {
-	uint8_t evtchn_upcall_pending;
-	uint8_t evtchn_upcall_mask;
+	u8 evtchn_upcall_pending;
+	u8 evtchn_upcall_mask;
 	unsigned long evtchn_pending_sel;
 	struct arch_vcpu_info arch;
 	struct pvclock_vcpu_time_info time;
@@ -145,7 +145,7 @@ struct shared_info {
 	unsigned long evtchn_pending[64];
 	unsigned long evtchn_mask[64];
 	struct pvclock_wall_clock wc;
-	uint32_t wc_sec_hi;
+	u32 wc_sec_hi;
 	/* arch_shared_info here */
 };
 
@@ -191,10 +191,7 @@ static void guest_code(void)
 	struct vcpu_runstate_info *rs = (void *)RUNSTATE_VADDR;
 	int i;
 
-	__asm__ __volatile__(
-		"sti\n"
-		"nop\n"
-	);
+	sti_nop();
 
 	/* Trigger an interrupt injection */
 	GUEST_SYNC(TEST_INJECT_VECTOR);
@@ -550,15 +547,9 @@ int main(int argc, char *argv[])
 	int irq_fd[2] = { -1, -1 };
 
 	if (do_eventfd_tests) {
-		irq_fd[0] = eventfd(0, 0);
-		irq_fd[1] = eventfd(0, 0);
+		irq_fd[0] = kvm_new_eventfd();
+		irq_fd[1] = kvm_new_eventfd();
 
-		/* Unexpected, but not a KVM failure */
-		if (irq_fd[0] == -1 || irq_fd[1] == -1)
-			do_evtchn_tests = do_eventfd_tests = false;
-	}
-
-	if (do_eventfd_tests) {
 		irq_routes.info.nr = 2;
 
 		irq_routes.entries[0].gsi = 32;
@@ -575,15 +566,8 @@ int main(int argc, char *argv[])
 
 		vm_ioctl(vm, KVM_SET_GSI_ROUTING, &irq_routes.info);
 
-		struct kvm_irqfd ifd = { };
-
-		ifd.fd = irq_fd[0];
-		ifd.gsi = 32;
-		vm_ioctl(vm, KVM_IRQFD, &ifd);
-
-		ifd.fd = irq_fd[1];
-		ifd.gsi = 33;
-		vm_ioctl(vm, KVM_IRQFD, &ifd);
+		kvm_assign_irqfd(vm, 32, irq_fd[0]);
+		kvm_assign_irqfd(vm, 33, irq_fd[1]);
 
 		struct sigaction sa = { };
 		sa.sa_handler = handle_alrm;
@@ -674,7 +658,7 @@ int main(int argc, char *argv[])
 					printf("Testing RUNSTATE_ADJUST\n");
 				rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST;
 				memset(&rst.u, 0, sizeof(rst.u));
-				rst.u.runstate.state = (uint64_t)-1;
+				rst.u.runstate.state = (u64)-1;
 				rst.u.runstate.time_blocked =
 					0x5a - rs->time[RUNSTATE_blocked];
 				rst.u.runstate.time_offline =
@@ -1129,7 +1113,7 @@ int main(int argc, char *argv[])
 			/* Don't change the address, just trigger a write */
 			struct kvm_xen_vcpu_attr adj = {
 				.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST,
-				.u.runstate.state = (uint64_t)-1
+				.u.runstate.state = (u64)-1
 			};
 			vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &adj);
 
diff --git a/tools/testing/selftests/kvm/x86/xss_msr_test.c b/tools/testing/selftests/kvm/x86/xss_msr_test.c
index f331a4e9bae3..12c63df6bbce 100644
--- a/tools/testing/selftests/kvm/x86/xss_msr_test.c
+++ b/tools/testing/selftests/kvm/x86/xss_msr_test.c
@@ -17,7 +17,7 @@ int main(int argc, char *argv[])
 	bool xss_in_msr_list;
 	struct kvm_vm *vm;
 	struct kvm_vcpu *vcpu;
-	uint64_t xss_val;
+	u64 xss_val;
 	int i, r;
 
 	/* Create VM */