summaryrefslogtreecommitdiff
path: root/tools/testing/selftests/kvm
diff options
context:
space:
mode:
Diffstat (limited to 'tools/testing/selftests/kvm')
-rw-r--r--tools/testing/selftests/kvm/Makefile4
-rw-r--r--tools/testing/selftests/kvm/Makefile.kvm120
-rw-r--r--tools/testing/selftests/kvm/access_tracking_perf_test.c279
-rw-r--r--tools/testing/selftests/kvm/arch_timer.c7
-rw-r--r--tools/testing/selftests/kvm/arm64/aarch32_id_regs.c2
-rw-r--r--tools/testing/selftests/kvm/arm64/arch_timer.c13
-rw-r--r--tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c81
-rw-r--r--tools/testing/selftests/kvm/arm64/at.c156
-rw-r--r--tools/testing/selftests/kvm/arm64/debug-exceptions.c16
-rw-r--r--tools/testing/selftests/kvm/arm64/external_aborts.c415
-rw-r--r--tools/testing/selftests/kvm/arm64/get-reg-list.c305
-rw-r--r--tools/testing/selftests/kvm/arm64/hello_el2.c71
-rw-r--r--tools/testing/selftests/kvm/arm64/host_sve.c127
-rw-r--r--tools/testing/selftests/kvm/arm64/hypercalls.c2
-rw-r--r--tools/testing/selftests/kvm/arm64/idreg-idst.c117
-rw-r--r--tools/testing/selftests/kvm/arm64/kvm-uuid.c70
-rw-r--r--tools/testing/selftests/kvm/arm64/mmio_abort.c159
-rw-r--r--tools/testing/selftests/kvm/arm64/no-vgic-v3.c175
-rw-r--r--tools/testing/selftests/kvm/arm64/no-vgic.c297
-rw-r--r--tools/testing/selftests/kvm/arm64/page_fault_test.c8
-rw-r--r--tools/testing/selftests/kvm/arm64/psci_test.c13
-rw-r--r--tools/testing/selftests/kvm/arm64/sea_to_user.c331
-rw-r--r--tools/testing/selftests/kvm/arm64/set_id_regs.c179
-rw-r--r--tools/testing/selftests/kvm/arm64/smccc_filter.c17
-rw-r--r--tools/testing/selftests/kvm/arm64/vgic_init.c261
-rw-r--r--tools/testing/selftests/kvm/arm64/vgic_irq.c303
-rw-r--r--tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c15
-rw-r--r--tools/testing/selftests/kvm/arm64/vgic_v5.c228
-rw-r--r--tools/testing/selftests/kvm/arm64/vpmu_counter_access.c77
-rw-r--r--tools/testing/selftests/kvm/config1
-rw-r--r--tools/testing/selftests/kvm/dirty_log_perf_test.c35
-rw-r--r--tools/testing/selftests/kvm/dirty_log_test.c4
-rw-r--r--tools/testing/selftests/kvm/get-reg-list.c9
-rw-r--r--tools/testing/selftests/kvm/guest_memfd_test.c431
-rw-r--r--tools/testing/selftests/kvm/include/arm64/arch_timer.h24
-rw-r--r--tools/testing/selftests/kvm/include/arm64/gic.h1
-rw-r--r--tools/testing/selftests/kvm/include/arm64/gic_v3_its.h1
-rw-r--r--tools/testing/selftests/kvm/include/arm64/gic_v5.h150
-rw-r--r--tools/testing/selftests/kvm/include/arm64/kvm_util_arch.h7
-rw-r--r--tools/testing/selftests/kvm/include/arm64/processor.h165
-rw-r--r--tools/testing/selftests/kvm/include/arm64/vgic.h3
-rw-r--r--tools/testing/selftests/kvm/include/kvm_syscalls.h82
-rw-r--r--tools/testing/selftests/kvm/include/kvm_util.h207
-rw-r--r--tools/testing/selftests/kvm/include/kvm_util_types.h2
-rw-r--r--tools/testing/selftests/kvm/include/loongarch/arch_timer.h85
-rw-r--r--tools/testing/selftests/kvm/include/loongarch/kvm_util_arch.h8
-rw-r--r--tools/testing/selftests/kvm/include/loongarch/pmu.h66
-rw-r--r--tools/testing/selftests/kvm/include/loongarch/processor.h235
-rw-r--r--tools/testing/selftests/kvm/include/loongarch/ucall.h20
-rw-r--r--tools/testing/selftests/kvm/include/lru_gen_util.h51
-rw-r--r--tools/testing/selftests/kvm/include/numaif.h110
-rw-r--r--tools/testing/selftests/kvm/include/riscv/kvm_util_arch.h1
-rw-r--r--tools/testing/selftests/kvm/include/riscv/processor.h26
-rw-r--r--tools/testing/selftests/kvm/include/riscv/sbi.h37
-rw-r--r--tools/testing/selftests/kvm/include/s390/kvm_util_arch.h1
-rw-r--r--tools/testing/selftests/kvm/include/test_util.h20
-rw-r--r--tools/testing/selftests/kvm/include/x86/apic.h7
-rw-r--r--tools/testing/selftests/kvm/include/x86/kvm_util_arch.h22
-rw-r--r--tools/testing/selftests/kvm/include/x86/pmu.h26
-rw-r--r--tools/testing/selftests/kvm/include/x86/processor.h149
-rw-r--r--tools/testing/selftests/kvm/include/x86/sev.h53
-rw-r--r--tools/testing/selftests/kvm/include/x86/smm.h17
-rw-r--r--tools/testing/selftests/kvm/include/x86/svm.h17
-rw-r--r--tools/testing/selftests/kvm/include/x86/svm_util.h9
-rw-r--r--tools/testing/selftests/kvm/include/x86/vmx.h17
-rw-r--r--tools/testing/selftests/kvm/irqfd_test.c143
-rw-r--r--tools/testing/selftests/kvm/kvm_binary_stats_test.c4
-rw-r--r--tools/testing/selftests/kvm/kvm_page_table_test.c3
-rw-r--r--tools/testing/selftests/kvm/lib/arm64/gic.c6
-rw-r--r--tools/testing/selftests/kvm/lib/arm64/gic_private.h1
-rw-r--r--tools/testing/selftests/kvm/lib/arm64/gic_v3.c22
-rw-r--r--tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c19
-rw-r--r--tools/testing/selftests/kvm/lib/arm64/processor.c224
-rw-r--r--tools/testing/selftests/kvm/lib/arm64/vgic.c64
-rw-r--r--tools/testing/selftests/kvm/lib/guest_modes.c41
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util.c399
-rw-r--r--tools/testing/selftests/kvm/lib/loongarch/exception.S65
-rw-r--r--tools/testing/selftests/kvm/lib/loongarch/processor.c402
-rw-r--r--tools/testing/selftests/kvm/lib/loongarch/ucall.c38
-rw-r--r--tools/testing/selftests/kvm/lib/lru_gen_util.c387
-rw-r--r--tools/testing/selftests/kvm/lib/memstress.c6
-rw-r--r--tools/testing/selftests/kvm/lib/riscv/handlers.S139
-rw-r--r--tools/testing/selftests/kvm/lib/riscv/processor.c108
-rw-r--r--tools/testing/selftests/kvm/lib/s390/processor.c21
-rw-r--r--tools/testing/selftests/kvm/lib/sparsebit.c4
-rw-r--r--tools/testing/selftests/kvm/lib/test_util.c49
-rw-r--r--tools/testing/selftests/kvm/lib/x86/memstress.c65
-rw-r--r--tools/testing/selftests/kvm/lib/x86/pmu.c49
-rw-r--r--tools/testing/selftests/kvm/lib/x86/processor.c384
-rw-r--r--tools/testing/selftests/kvm/lib/x86/sev.c76
-rw-r--r--tools/testing/selftests/kvm/lib/x86/svm.c27
-rw-r--r--tools/testing/selftests/kvm/lib/x86/vmx.c254
-rw-r--r--tools/testing/selftests/kvm/loongarch/arch_timer.c200
-rw-r--r--tools/testing/selftests/kvm/loongarch/pmu_test.c205
-rw-r--r--tools/testing/selftests/kvm/memslot_modification_stress_test.c1
-rw-r--r--tools/testing/selftests/kvm/memslot_perf_test.c1
-rw-r--r--tools/testing/selftests/kvm/mmu_stress_test.c15
-rw-r--r--tools/testing/selftests/kvm/pre_fault_memory_test.c161
-rw-r--r--tools/testing/selftests/kvm/riscv/arch_timer.c2
-rw-r--r--tools/testing/selftests/kvm/riscv/ebreak_test.c2
-rw-r--r--tools/testing/selftests/kvm/riscv/get-reg-list.c216
-rw-r--r--tools/testing/selftests/kvm/riscv/sbi_pmu_test.c44
-rw-r--r--tools/testing/selftests/kvm/rseq_test.c32
-rw-r--r--tools/testing/selftests/kvm/s390/cmma_test.c2
-rw-r--r--tools/testing/selftests/kvm/s390/cpumodel_subfuncs_test.c2
-rw-r--r--tools/testing/selftests/kvm/s390/irq_routing.c75
-rw-r--r--tools/testing/selftests/kvm/s390/keyop.c299
-rw-r--r--tools/testing/selftests/kvm/s390/ucontrol_test.c16
-rw-r--r--tools/testing/selftests/kvm/s390/user_operexec.c140
-rw-r--r--tools/testing/selftests/kvm/set_memory_region_test.c28
-rw-r--r--tools/testing/selftests/kvm/steal_time.c196
-rw-r--r--tools/testing/selftests/kvm/x86/amx_test.c144
-rw-r--r--tools/testing/selftests/kvm/x86/aperfmperf_test.c213
-rw-r--r--tools/testing/selftests/kvm/x86/cpuid_test.c15
-rw-r--r--tools/testing/selftests/kvm/x86/evmcs_smm_controls_test.c150
-rw-r--r--tools/testing/selftests/kvm/x86/fastops_test.c209
-rw-r--r--tools/testing/selftests/kvm/x86/fix_hypercall_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86/hyperv_cpuid.c23
-rw-r--r--tools/testing/selftests/kvm/x86/hyperv_features.c18
-rw-r--r--tools/testing/selftests/kvm/x86/hyperv_ipi.c18
-rw-r--r--tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c4
-rw-r--r--tools/testing/selftests/kvm/x86/kvm_buslock_test.c135
-rw-r--r--tools/testing/selftests/kvm/x86/monitor_mwait_test.c115
-rw-r--r--tools/testing/selftests/kvm/x86/msrs_test.c489
-rw-r--r--tools/testing/selftests/kvm/x86/nested_close_kvm_test.c (renamed from tools/testing/selftests/kvm/x86/vmx_close_while_nested_test.c)42
-rw-r--r--tools/testing/selftests/kvm/x86/nested_dirty_log_test.c293
-rw-r--r--tools/testing/selftests/kvm/x86/nested_invalid_cr3_test.c116
-rw-r--r--tools/testing/selftests/kvm/x86/nested_set_state_test.c (renamed from tools/testing/selftests/kvm/x86/vmx_set_nested_state_test.c)128
-rw-r--r--tools/testing/selftests/kvm/x86/nested_tsc_adjust_test.c (renamed from tools/testing/selftests/kvm/x86/vmx_tsc_adjust_test.c)79
-rw-r--r--tools/testing/selftests/kvm/x86/nested_tsc_scaling_test.c (renamed from tools/testing/selftests/kvm/x86/vmx_nested_tsc_scaling_test.c)48
-rw-r--r--tools/testing/selftests/kvm/x86/nested_vmsave_vmload_test.c197
-rw-r--r--tools/testing/selftests/kvm/x86/pmu_counters_test.c75
-rw-r--r--tools/testing/selftests/kvm/x86/pmu_event_filter_test.c7
-rw-r--r--tools/testing/selftests/kvm/x86/private_mem_conversions_test.c9
-rw-r--r--tools/testing/selftests/kvm/x86/sev_init2_tests.c13
-rw-r--r--tools/testing/selftests/kvm/x86/sev_migrate_tests.c2
-rw-r--r--tools/testing/selftests/kvm/x86/sev_smoke_test.c107
-rw-r--r--tools/testing/selftests/kvm/x86/smaller_maxphyaddr_emulation_test.c4
-rw-r--r--tools/testing/selftests/kvm/x86/smm_test.c27
-rw-r--r--tools/testing/selftests/kvm/x86/state_test.c37
-rw-r--r--tools/testing/selftests/kvm/x86/svm_lbr_nested_state.c145
-rw-r--r--tools/testing/selftests/kvm/x86/svm_nested_clear_efer_svme.c55
-rw-r--r--tools/testing/selftests/kvm/x86/svm_nested_soft_inject_test.c4
-rw-r--r--tools/testing/selftests/kvm/x86/svm_nested_vmcb12_gpa.c176
-rw-r--r--tools/testing/selftests/kvm/x86/userspace_io_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c8
-rw-r--r--tools/testing/selftests/kvm/x86/vmx_apicv_updates_test.c155
-rw-r--r--tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c179
-rw-r--r--tools/testing/selftests/kvm/x86/vmx_exception_with_invalid_guest_state.c2
-rw-r--r--tools/testing/selftests/kvm/x86/vmx_nested_la57_state_test.c132
-rw-r--r--tools/testing/selftests/kvm/x86/vmx_pmu_caps_test.c7
-rw-r--r--tools/testing/selftests/kvm/x86/xapic_ipi_test.c5
-rw-r--r--tools/testing/selftests/kvm/x86/xapic_state_test.c6
-rw-r--r--tools/testing/selftests/kvm/x86/xapic_tpr_test.c276
-rw-r--r--tools/testing/selftests/kvm/x86/xcr0_cpuid_test.c12
-rw-r--r--tools/testing/selftests/kvm/x86/xen_shinfo_test.c21
156 files changed, 12303 insertions, 2185 deletions
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 20af35a91d6f..f2b223072b62 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -3,10 +3,10 @@ top_srcdir = ../../../..
include $(top_srcdir)/scripts/subarch.include
ARCH ?= $(SUBARCH)
-ifeq ($(ARCH),$(filter $(ARCH),arm64 s390 riscv x86 x86_64))
+ifeq ($(ARCH),$(filter $(ARCH),arm64 s390 riscv x86 x86_64 loongarch))
# Top-level selftests allows ARCH=x86_64 :-(
ifeq ($(ARCH),x86_64)
- ARCH := x86
+ override ARCH := x86
endif
include Makefile.kvm
else
diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm
index f773f8f99249..9118a5a51b89 100644
--- a/tools/testing/selftests/kvm/Makefile.kvm
+++ b/tools/testing/selftests/kvm/Makefile.kvm
@@ -8,6 +8,7 @@ LIBKVM += lib/elf.c
LIBKVM += lib/guest_modes.c
LIBKVM += lib/io.c
LIBKVM += lib/kvm_util.c
+LIBKVM += lib/lru_gen_util.c
LIBKVM += lib/memstress.c
LIBKVM += lib/guest_sprintf.c
LIBKVM += lib/rbtree.c
@@ -47,15 +48,34 @@ LIBKVM_riscv += lib/riscv/handlers.S
LIBKVM_riscv += lib/riscv/processor.c
LIBKVM_riscv += lib/riscv/ucall.c
+LIBKVM_loongarch += lib/loongarch/processor.c
+LIBKVM_loongarch += lib/loongarch/ucall.c
+LIBKVM_loongarch += lib/loongarch/exception.S
+
# Non-compiled test targets
TEST_PROGS_x86 += x86/nx_huge_pages_test.sh
+# Compiled test targets valid on all architectures with libkvm support
+TEST_GEN_PROGS_COMMON = demand_paging_test
+TEST_GEN_PROGS_COMMON += dirty_log_test
+TEST_GEN_PROGS_COMMON += guest_print_test
+TEST_GEN_PROGS_COMMON += irqfd_test
+TEST_GEN_PROGS_COMMON += kvm_binary_stats_test
+TEST_GEN_PROGS_COMMON += kvm_create_max_vcpus
+TEST_GEN_PROGS_COMMON += kvm_page_table_test
+TEST_GEN_PROGS_COMMON += set_memory_region_test
+TEST_GEN_PROGS_COMMON += memslot_modification_stress_test
+TEST_GEN_PROGS_COMMON += memslot_perf_test
+
# Compiled test targets
-TEST_GEN_PROGS_x86 = x86/cpuid_test
+TEST_GEN_PROGS_x86 = $(TEST_GEN_PROGS_COMMON)
+TEST_GEN_PROGS_x86 += x86/cpuid_test
TEST_GEN_PROGS_x86 += x86/cr4_cpuid_sync_test
TEST_GEN_PROGS_x86 += x86/dirty_log_page_splitting_test
TEST_GEN_PROGS_x86 += x86/feature_msrs_test
+TEST_GEN_PROGS_x86 += x86/evmcs_smm_controls_test
TEST_GEN_PROGS_x86 += x86/exit_on_emulation_failure_test
+TEST_GEN_PROGS_x86 += x86/fastops_test
TEST_GEN_PROGS_x86 += x86/fix_hypercall_test
TEST_GEN_PROGS_x86 += x86/hwcr_msr_test
TEST_GEN_PROGS_x86 += x86/hyperv_clock
@@ -68,9 +88,18 @@ TEST_GEN_PROGS_x86 += x86/hyperv_svm_test
TEST_GEN_PROGS_x86 += x86/hyperv_tlb_flush
TEST_GEN_PROGS_x86 += x86/kvm_clock_test
TEST_GEN_PROGS_x86 += x86/kvm_pv_test
+TEST_GEN_PROGS_x86 += x86/kvm_buslock_test
TEST_GEN_PROGS_x86 += x86/monitor_mwait_test
+TEST_GEN_PROGS_x86 += x86/msrs_test
+TEST_GEN_PROGS_x86 += x86/nested_close_kvm_test
+TEST_GEN_PROGS_x86 += x86/nested_dirty_log_test
TEST_GEN_PROGS_x86 += x86/nested_emulation_test
TEST_GEN_PROGS_x86 += x86/nested_exceptions_test
+TEST_GEN_PROGS_x86 += x86/nested_invalid_cr3_test
+TEST_GEN_PROGS_x86 += x86/nested_set_state_test
+TEST_GEN_PROGS_x86 += x86/nested_tsc_adjust_test
+TEST_GEN_PROGS_x86 += x86/nested_tsc_scaling_test
+TEST_GEN_PROGS_x86 += x86/nested_vmsave_vmload_test
TEST_GEN_PROGS_x86 += x86/platform_info_test
TEST_GEN_PROGS_x86 += x86/pmu_counters_test
TEST_GEN_PROGS_x86 += x86/pmu_event_filter_test
@@ -84,25 +113,26 @@ TEST_GEN_PROGS_x86 += x86/state_test
TEST_GEN_PROGS_x86 += x86/vmx_preemption_timer_test
TEST_GEN_PROGS_x86 += x86/svm_vmcall_test
TEST_GEN_PROGS_x86 += x86/svm_int_ctl_test
+TEST_GEN_PROGS_x86 += x86/svm_nested_clear_efer_svme
TEST_GEN_PROGS_x86 += x86/svm_nested_shutdown_test
TEST_GEN_PROGS_x86 += x86/svm_nested_soft_inject_test
+TEST_GEN_PROGS_x86 += x86/svm_nested_vmcb12_gpa
+TEST_GEN_PROGS_x86 += x86/svm_lbr_nested_state
TEST_GEN_PROGS_x86 += x86/tsc_scaling_sync
TEST_GEN_PROGS_x86 += x86/sync_regs_test
TEST_GEN_PROGS_x86 += x86/ucna_injection_test
TEST_GEN_PROGS_x86 += x86/userspace_io_test
TEST_GEN_PROGS_x86 += x86/userspace_msr_exit_test
TEST_GEN_PROGS_x86 += x86/vmx_apic_access_test
-TEST_GEN_PROGS_x86 += x86/vmx_close_while_nested_test
-TEST_GEN_PROGS_x86 += x86/vmx_dirty_log_test
+TEST_GEN_PROGS_x86 += x86/vmx_apicv_updates_test
TEST_GEN_PROGS_x86 += x86/vmx_exception_with_invalid_guest_state
TEST_GEN_PROGS_x86 += x86/vmx_msrs_test
TEST_GEN_PROGS_x86 += x86/vmx_invalid_nested_guest_state
-TEST_GEN_PROGS_x86 += x86/vmx_set_nested_state_test
-TEST_GEN_PROGS_x86 += x86/vmx_tsc_adjust_test
-TEST_GEN_PROGS_x86 += x86/vmx_nested_tsc_scaling_test
+TEST_GEN_PROGS_x86 += x86/vmx_nested_la57_state_test
TEST_GEN_PROGS_x86 += x86/apic_bus_clock_test
TEST_GEN_PROGS_x86 += x86/xapic_ipi_test
TEST_GEN_PROGS_x86 += x86/xapic_state_test
+TEST_GEN_PROGS_x86 += x86/xapic_tpr_test
TEST_GEN_PROGS_x86 += x86/xcr0_cpuid_test
TEST_GEN_PROGS_x86 += x86/xss_msr_test
TEST_GEN_PROGS_x86 += x86/debug_regs
@@ -117,63 +147,56 @@ TEST_GEN_PROGS_x86 += x86/amx_test
TEST_GEN_PROGS_x86 += x86/max_vcpuid_cap_test
TEST_GEN_PROGS_x86 += x86/triple_fault_event_test
TEST_GEN_PROGS_x86 += x86/recalc_apic_map_test
+TEST_GEN_PROGS_x86 += x86/aperfmperf_test
TEST_GEN_PROGS_x86 += access_tracking_perf_test
TEST_GEN_PROGS_x86 += coalesced_io_test
-TEST_GEN_PROGS_x86 += demand_paging_test
-TEST_GEN_PROGS_x86 += dirty_log_test
TEST_GEN_PROGS_x86 += dirty_log_perf_test
TEST_GEN_PROGS_x86 += guest_memfd_test
-TEST_GEN_PROGS_x86 += guest_print_test
TEST_GEN_PROGS_x86 += hardware_disable_test
-TEST_GEN_PROGS_x86 += kvm_create_max_vcpus
-TEST_GEN_PROGS_x86 += kvm_page_table_test
-TEST_GEN_PROGS_x86 += memslot_modification_stress_test
-TEST_GEN_PROGS_x86 += memslot_perf_test
TEST_GEN_PROGS_x86 += mmu_stress_test
TEST_GEN_PROGS_x86 += rseq_test
-TEST_GEN_PROGS_x86 += set_memory_region_test
TEST_GEN_PROGS_x86 += steal_time
-TEST_GEN_PROGS_x86 += kvm_binary_stats_test
TEST_GEN_PROGS_x86 += system_counter_offset_test
TEST_GEN_PROGS_x86 += pre_fault_memory_test
# Compiled outputs used by test targets
TEST_GEN_PROGS_EXTENDED_x86 += x86/nx_huge_pages_test
+TEST_GEN_PROGS_arm64 = $(TEST_GEN_PROGS_COMMON)
TEST_GEN_PROGS_arm64 += arm64/aarch32_id_regs
TEST_GEN_PROGS_arm64 += arm64/arch_timer_edge_cases
+TEST_GEN_PROGS_arm64 += arm64/at
TEST_GEN_PROGS_arm64 += arm64/debug-exceptions
+TEST_GEN_PROGS_arm64 += arm64/hello_el2
+TEST_GEN_PROGS_arm64 += arm64/host_sve
TEST_GEN_PROGS_arm64 += arm64/hypercalls
-TEST_GEN_PROGS_arm64 += arm64/mmio_abort
+TEST_GEN_PROGS_arm64 += arm64/external_aborts
TEST_GEN_PROGS_arm64 += arm64/page_fault_test
TEST_GEN_PROGS_arm64 += arm64/psci_test
+TEST_GEN_PROGS_arm64 += arm64/sea_to_user
TEST_GEN_PROGS_arm64 += arm64/set_id_regs
TEST_GEN_PROGS_arm64 += arm64/smccc_filter
TEST_GEN_PROGS_arm64 += arm64/vcpu_width_config
TEST_GEN_PROGS_arm64 += arm64/vgic_init
TEST_GEN_PROGS_arm64 += arm64/vgic_irq
TEST_GEN_PROGS_arm64 += arm64/vgic_lpi_stress
+TEST_GEN_PROGS_arm64 += arm64/vgic_v5
TEST_GEN_PROGS_arm64 += arm64/vpmu_counter_access
-TEST_GEN_PROGS_arm64 += arm64/no-vgic-v3
+TEST_GEN_PROGS_arm64 += arm64/no-vgic
+TEST_GEN_PROGS_arm64 += arm64/idreg-idst
+TEST_GEN_PROGS_arm64 += arm64/kvm-uuid
TEST_GEN_PROGS_arm64 += access_tracking_perf_test
TEST_GEN_PROGS_arm64 += arch_timer
TEST_GEN_PROGS_arm64 += coalesced_io_test
-TEST_GEN_PROGS_arm64 += demand_paging_test
-TEST_GEN_PROGS_arm64 += dirty_log_test
TEST_GEN_PROGS_arm64 += dirty_log_perf_test
-TEST_GEN_PROGS_arm64 += guest_print_test
TEST_GEN_PROGS_arm64 += get-reg-list
-TEST_GEN_PROGS_arm64 += kvm_create_max_vcpus
-TEST_GEN_PROGS_arm64 += kvm_page_table_test
-TEST_GEN_PROGS_arm64 += memslot_modification_stress_test
-TEST_GEN_PROGS_arm64 += memslot_perf_test
+TEST_GEN_PROGS_arm64 += guest_memfd_test
TEST_GEN_PROGS_arm64 += mmu_stress_test
TEST_GEN_PROGS_arm64 += rseq_test
-TEST_GEN_PROGS_arm64 += set_memory_region_test
TEST_GEN_PROGS_arm64 += steal_time
-TEST_GEN_PROGS_arm64 += kvm_binary_stats_test
-TEST_GEN_PROGS_s390 = s390/memop
+TEST_GEN_PROGS_s390 = $(TEST_GEN_PROGS_COMMON)
+TEST_GEN_PROGS_s390 += s390/memop
TEST_GEN_PROGS_s390 += s390/resets
TEST_GEN_PROGS_s390 += s390/sync_regs_test
TEST_GEN_PROGS_s390 += s390/tprot
@@ -182,29 +205,40 @@ TEST_GEN_PROGS_s390 += s390/debug_test
TEST_GEN_PROGS_s390 += s390/cpumodel_subfuncs_test
TEST_GEN_PROGS_s390 += s390/shared_zeropage_test
TEST_GEN_PROGS_s390 += s390/ucontrol_test
-TEST_GEN_PROGS_s390 += demand_paging_test
-TEST_GEN_PROGS_s390 += dirty_log_test
-TEST_GEN_PROGS_s390 += guest_print_test
-TEST_GEN_PROGS_s390 += kvm_create_max_vcpus
-TEST_GEN_PROGS_s390 += kvm_page_table_test
+TEST_GEN_PROGS_s390 += s390/user_operexec
+TEST_GEN_PROGS_s390 += s390/keyop
TEST_GEN_PROGS_s390 += rseq_test
-TEST_GEN_PROGS_s390 += set_memory_region_test
-TEST_GEN_PROGS_s390 += kvm_binary_stats_test
+TEST_GEN_PROGS_s390 += s390/irq_routing
+TEST_GEN_PROGS_s390 += mmu_stress_test
+TEST_GEN_PROGS_riscv = $(TEST_GEN_PROGS_COMMON)
TEST_GEN_PROGS_riscv += riscv/sbi_pmu_test
TEST_GEN_PROGS_riscv += riscv/ebreak_test
+TEST_GEN_PROGS_riscv += access_tracking_perf_test
TEST_GEN_PROGS_riscv += arch_timer
TEST_GEN_PROGS_riscv += coalesced_io_test
-TEST_GEN_PROGS_riscv += demand_paging_test
-TEST_GEN_PROGS_riscv += dirty_log_test
+TEST_GEN_PROGS_riscv += dirty_log_perf_test
TEST_GEN_PROGS_riscv += get-reg-list
-TEST_GEN_PROGS_riscv += guest_print_test
-TEST_GEN_PROGS_riscv += kvm_binary_stats_test
-TEST_GEN_PROGS_riscv += kvm_create_max_vcpus
-TEST_GEN_PROGS_riscv += kvm_page_table_test
-TEST_GEN_PROGS_riscv += set_memory_region_test
+TEST_GEN_PROGS_riscv += mmu_stress_test
+TEST_GEN_PROGS_riscv += rseq_test
TEST_GEN_PROGS_riscv += steal_time
+TEST_GEN_PROGS_loongarch = loongarch/pmu_test
+TEST_GEN_PROGS_loongarch += arch_timer
+TEST_GEN_PROGS_loongarch += coalesced_io_test
+TEST_GEN_PROGS_loongarch += demand_paging_test
+TEST_GEN_PROGS_loongarch += dirty_log_perf_test
+TEST_GEN_PROGS_loongarch += dirty_log_test
+TEST_GEN_PROGS_loongarch += guest_print_test
+TEST_GEN_PROGS_loongarch += hardware_disable_test
+TEST_GEN_PROGS_loongarch += kvm_binary_stats_test
+TEST_GEN_PROGS_loongarch += kvm_create_max_vcpus
+TEST_GEN_PROGS_loongarch += kvm_page_table_test
+TEST_GEN_PROGS_loongarch += memslot_modification_stress_test
+TEST_GEN_PROGS_loongarch += memslot_perf_test
+TEST_GEN_PROGS_loongarch += set_memory_region_test
+TEST_GEN_PROGS_loongarch += steal_time
+
SPLIT_TESTS += arch_timer
SPLIT_TESTS += get-reg-list
@@ -219,6 +253,7 @@ OVERRIDE_TARGETS = 1
# importantly defines, i.e. overwrites, $(CC) (unless `make -e` or `make CC=`,
# which causes the environment variable to override the makefile).
include ../lib.mk
+include ../cgroup/lib/libcgroup.mk
INSTALL_HDR_PATH = $(top_srcdir)/usr
LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/
@@ -226,6 +261,7 @@ LINUX_TOOL_INCLUDE = $(top_srcdir)/tools/include
LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/$(ARCH)/include
CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \
-Wno-gnu-variable-sized-type-not-at-end -MD -MP -DCONFIG_64BIT \
+ -U_FORTIFY_SOURCE \
-fno-builtin-memcmp -fno-builtin-memcpy \
-fno-builtin-memset -fno-builtin-strnlen \
-fno-stack-protector -fno-PIE -fno-strict-aliasing \
@@ -272,7 +308,7 @@ LIBKVM_S := $(filter %.S,$(LIBKVM))
LIBKVM_C_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_C))
LIBKVM_S_OBJ := $(patsubst %.S, $(OUTPUT)/%.o, $(LIBKVM_S))
LIBKVM_STRING_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_STRING))
-LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ) $(LIBKVM_STRING_OBJ)
+LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ) $(LIBKVM_STRING_OBJ) $(LIBCGROUP_O)
SPLIT_TEST_GEN_PROGS := $(patsubst %, $(OUTPUT)/%, $(SPLIT_TESTS))
SPLIT_TEST_GEN_OBJ := $(patsubst %, $(OUTPUT)/$(ARCH)/%.o, $(SPLIT_TESTS))
diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c
index 447e619cf856..b058f27b2141 100644
--- a/tools/testing/selftests/kvm/access_tracking_perf_test.c
+++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c
@@ -7,9 +7,11 @@
* This test measures the performance effects of KVM's access tracking.
* Access tracking is driven by the MMU notifiers test_young, clear_young, and
* clear_flush_young. These notifiers do not have a direct userspace API,
- * however the clear_young notifier can be triggered by marking a pages as idle
- * in /sys/kernel/mm/page_idle/bitmap. This test leverages that mechanism to
- * enable access tracking on guest memory.
+ * however the clear_young notifier can be triggered either by
+ * 1. marking a pages as idle in /sys/kernel/mm/page_idle/bitmap OR
+ * 2. adding a new MGLRU generation using the lru_gen debugfs file.
+ * This test leverages page_idle to enable access tracking on guest memory
+ * unless MGLRU is enabled, in which case MGLRU is used.
*
* To measure performance this test runs a VM with a configurable number of
* vCPUs that each touch every page in disjoint regions of memory. Performance
@@ -17,10 +19,11 @@
* predefined region.
*
* Note that a deterministic correctness test of access tracking is not possible
- * by using page_idle as it exists today. This is for a few reasons:
+ * by using page_idle or MGLRU aging as it exists today. This is for a few
+ * reasons:
*
- * 1. page_idle only issues clear_young notifiers, which lack a TLB flush. This
- * means subsequent guest accesses are not guaranteed to see page table
+ * 1. page_idle and MGLRU only issue clear_young notifiers, which lack a TLB flush.
+ * This means subsequent guest accesses are not guaranteed to see page table
* updates made by KVM until some time in the future.
*
* 2. page_idle only operates on LRU pages. Newly allocated pages are not
@@ -47,10 +50,19 @@
#include "memstress.h"
#include "guest_modes.h"
#include "processor.h"
+#include "ucall_common.h"
+
+#include "cgroup_util.h"
+#include "lru_gen_util.h"
+
+static const char *TEST_MEMCG_NAME = "access_tracking_perf_test";
/* Global variable used to synchronize all of the vCPU threads. */
static int iteration;
+/* The cgroup memory controller root. Needed for lru_gen-based aging. */
+char cgroup_root[PATH_MAX];
+
/* Defines what vCPU threads should do during a given iteration. */
static enum {
/* Run the vCPU to access all its memory. */
@@ -65,6 +77,25 @@ static int vcpu_last_completed_iteration[KVM_MAX_VCPUS];
/* Whether to overlap the regions of memory vCPUs access. */
static bool overlap_memory_access;
+/*
+ * If the test should only warn if there are too many idle pages (i.e., it is
+ * expected).
+ * -1: Not yet set.
+ * 0: We do not expect too many idle pages, so FAIL if too many idle pages.
+ * 1: Having too many idle pages is expected, so merely print a warning if
+ * too many idle pages are found.
+ */
+static int idle_pages_warn_only = -1;
+
+/* Whether or not to use MGLRU instead of page_idle for access tracking */
+static bool use_lru_gen;
+
+/* Total number of pages to expect in the memcg after touching everything */
+static long test_pages;
+
+/* Last generation we found the pages in */
+static int lru_gen_last_gen = -1;
+
struct test_params {
/* The backing source for the region of memory. */
enum vm_mem_backing_src_type backing_src;
@@ -123,8 +154,24 @@ static void mark_page_idle(int page_idle_fd, uint64_t pfn)
"Set page_idle bits for PFN 0x%" PRIx64, pfn);
}
-static void mark_vcpu_memory_idle(struct kvm_vm *vm,
- struct memstress_vcpu_args *vcpu_args)
+static void too_many_idle_pages(long idle_pages, long total_pages, int vcpu_idx)
+{
+ char prefix[18] = {};
+
+ if (vcpu_idx >= 0)
+ snprintf(prefix, 18, "vCPU%d: ", vcpu_idx);
+
+ TEST_ASSERT(idle_pages_warn_only,
+ "%sToo many pages still idle (%lu out of %lu)",
+ prefix, idle_pages, total_pages);
+
+ printf("WARNING: %sToo many pages still idle (%lu out of %lu), "
+ "this will affect performance results.\n",
+ prefix, idle_pages, total_pages);
+}
+
+static void pageidle_mark_vcpu_memory_idle(struct kvm_vm *vm,
+ struct memstress_vcpu_args *vcpu_args)
{
int vcpu_idx = vcpu_args->vcpu_idx;
uint64_t base_gva = vcpu_args->gva;
@@ -177,27 +224,79 @@ static void mark_vcpu_memory_idle(struct kvm_vm *vm,
* arbitrary; high enough that we ensure most memory access went through
* access tracking but low enough as to not make the test too brittle
* over time and across architectures.
- *
- * When running the guest as a nested VM, "warn" instead of asserting
- * as the TLB size is effectively unlimited and the KVM doesn't
- * explicitly flush the TLB when aging SPTEs. As a result, more pages
- * are cached and the guest won't see the "idle" bit cleared.
*/
- if (still_idle >= pages / 10) {
-#ifdef __x86_64__
- TEST_ASSERT(this_cpu_has(X86_FEATURE_HYPERVISOR),
- "vCPU%d: Too many pages still idle (%lu out of %lu)",
- vcpu_idx, still_idle, pages);
-#endif
- printf("WARNING: vCPU%d: Too many pages still idle (%lu out of %lu), "
- "this will affect performance results.\n",
- vcpu_idx, still_idle, pages);
- }
+ if (still_idle >= pages / 10)
+ too_many_idle_pages(still_idle, pages,
+ overlap_memory_access ? -1 : vcpu_idx);
close(page_idle_fd);
close(pagemap_fd);
}
+int find_generation(struct memcg_stats *stats, long total_pages)
+{
+ /*
+ * For finding the generation that contains our pages, use the same
+ * 90% threshold that page_idle uses.
+ */
+ int gen = lru_gen_find_generation(stats, total_pages * 9 / 10);
+
+ if (gen >= 0)
+ return gen;
+
+ if (!idle_pages_warn_only) {
+ TEST_FAIL("Could not find a generation with 90%% of guest memory (%ld pages).",
+ total_pages * 9 / 10);
+ return gen;
+ }
+
+ /*
+ * We couldn't find a generation with 90% of guest memory, which can
+ * happen if access tracking is unreliable. Simply look for a majority
+ * of pages.
+ */
+ puts("WARNING: Couldn't find a generation with 90% of guest memory. "
+ "Performance results may not be accurate.");
+ gen = lru_gen_find_generation(stats, total_pages / 2);
+ TEST_ASSERT(gen >= 0,
+ "Could not find a generation with 50%% of guest memory (%ld pages).",
+ total_pages / 2);
+ return gen;
+}
+
+static void lru_gen_mark_memory_idle(struct kvm_vm *vm)
+{
+ struct timespec ts_start;
+ struct timespec ts_elapsed;
+ struct memcg_stats stats;
+ int new_gen;
+
+ /* Make a new generation */
+ clock_gettime(CLOCK_MONOTONIC, &ts_start);
+ lru_gen_do_aging(&stats, TEST_MEMCG_NAME);
+ ts_elapsed = timespec_elapsed(ts_start);
+
+ /* Check the generation again */
+ new_gen = find_generation(&stats, test_pages);
+
+ /*
+ * This function should only be invoked with newly-accessed pages,
+ * so pages should always move to a newer generation.
+ */
+ if (new_gen <= lru_gen_last_gen) {
+ /* We did not move to a newer generation. */
+ long idle_pages = lru_gen_sum_memcg_stats_for_gen(lru_gen_last_gen,
+ &stats);
+
+ too_many_idle_pages(min_t(long, idle_pages, test_pages),
+ test_pages, -1);
+ }
+ pr_info("%-30s: %ld.%09lds\n",
+ "Mark memory idle (lru_gen)", ts_elapsed.tv_sec,
+ ts_elapsed.tv_nsec);
+ lru_gen_last_gen = new_gen;
+}
+
static void assert_ucall(struct kvm_vcpu *vcpu, uint64_t expected_ucall)
{
struct ucall uc;
@@ -237,7 +336,7 @@ static void vcpu_thread_main(struct memstress_vcpu_args *vcpu_args)
assert_ucall(vcpu, UCALL_SYNC);
break;
case ITERATION_MARK_IDLE:
- mark_vcpu_memory_idle(vm, vcpu_args);
+ pageidle_mark_vcpu_memory_idle(vm, vcpu_args);
break;
}
@@ -289,15 +388,18 @@ static void access_memory(struct kvm_vm *vm, int nr_vcpus,
static void mark_memory_idle(struct kvm_vm *vm, int nr_vcpus)
{
+ if (use_lru_gen)
+ return lru_gen_mark_memory_idle(vm);
+
/*
* Even though this parallelizes the work across vCPUs, this is still a
* very slow operation because page_idle forces the test to mark one pfn
- * at a time and the clear_young notifier serializes on the KVM MMU
+ * at a time and the clear_young notifier may serialize on the KVM MMU
* lock.
*/
pr_debug("Marking VM memory idle (slow)...\n");
iteration_work = ITERATION_MARK_IDLE;
- run_iteration(vm, nr_vcpus, "Mark memory idle");
+ run_iteration(vm, nr_vcpus, "Mark memory idle (page_idle)");
}
static void run_test(enum vm_guest_mode mode, void *arg)
@@ -309,11 +411,38 @@ static void run_test(enum vm_guest_mode mode, void *arg)
vm = memstress_create_vm(mode, nr_vcpus, params->vcpu_memory_bytes, 1,
params->backing_src, !overlap_memory_access);
+ /*
+ * If guest_page_size is larger than the host's page size, the
+ * guest (memstress) will only fault in a subset of the host's pages.
+ */
+ test_pages = params->nr_vcpus * params->vcpu_memory_bytes /
+ max(memstress_args.guest_page_size,
+ (uint64_t)getpagesize());
+
memstress_start_vcpu_threads(nr_vcpus, vcpu_thread_main);
pr_info("\n");
access_memory(vm, nr_vcpus, ACCESS_WRITE, "Populating memory");
+ if (use_lru_gen) {
+ struct memcg_stats stats;
+
+ /*
+ * Do a page table scan now. Following initial population, aging
+ * may not cause the pages to move to a newer generation. Do
+ * an aging pass now so that future aging passes always move
+ * pages to a newer generation.
+ */
+ printf("Initial aging pass (lru_gen)\n");
+ lru_gen_do_aging(&stats, TEST_MEMCG_NAME);
+ TEST_ASSERT(lru_gen_sum_memcg_stats(&stats) >= test_pages,
+ "Not all pages accounted for (looking for %ld). "
+ "Was the memcg set up correctly?", test_pages);
+ access_memory(vm, nr_vcpus, ACCESS_WRITE, "Re-populating memory");
+ lru_gen_read_memcg_stats(&stats, TEST_MEMCG_NAME);
+ lru_gen_last_gen = find_generation(&stats, test_pages);
+ }
+
/* As a control, read and write to the populated memory first. */
access_memory(vm, nr_vcpus, ACCESS_WRITE, "Writing to populated memory");
access_memory(vm, nr_vcpus, ACCESS_READ, "Reading from populated memory");
@@ -328,6 +457,37 @@ static void run_test(enum vm_guest_mode mode, void *arg)
memstress_destroy_vm(vm);
}
+static int access_tracking_unreliable(void)
+{
+#ifdef __x86_64__
+ /*
+ * When running nested, the TLB size may be effectively unlimited (for
+ * example, this is the case when running on KVM L0), and KVM doesn't
+ * explicitly flush the TLB when aging SPTEs. As a result, more pages
+ * are cached and the guest won't see the "idle" bit cleared.
+ */
+ if (this_cpu_has(X86_FEATURE_HYPERVISOR)) {
+ puts("Skipping idle page count sanity check, because the test is run nested");
+ return 1;
+ }
+#endif
+ /*
+ * When NUMA balancing is enabled, guest memory will be unmapped to get
+ * NUMA faults, dropping the Accessed bits.
+ */
+ if (is_numa_balancing_enabled()) {
+ puts("Skipping idle page count sanity check, because NUMA balancing is enabled");
+ return 1;
+ }
+ return 0;
+}
+
+static int run_test_for_each_guest_mode(const char *cgroup, void *arg)
+{
+ for_each_guest_mode(run_test, arg);
+ return 0;
+}
+
static void help(char *name)
{
puts("");
@@ -342,11 +502,22 @@ static void help(char *name)
printf(" -v: specify the number of vCPUs to run.\n");
printf(" -o: Overlap guest memory accesses instead of partitioning\n"
" them into a separate region of memory for each vCPU.\n");
+ printf(" -w: Control whether the test warns or fails if more than 10%%\n"
+ " of pages are still seen as idle/old after accessing guest\n"
+ " memory. >0 == warn only, 0 == fail, <0 == auto. For auto\n"
+ " mode, the test fails by default, but switches to warn only\n"
+ " if NUMA balancing is enabled or the test detects it's running\n"
+ " in a VM.\n");
backing_src_help("-s");
puts("");
exit(0);
}
+void destroy_cgroup(char *cg)
+{
+ printf("Destroying cgroup: %s\n", cg);
+}
+
int main(int argc, char *argv[])
{
struct test_params params = {
@@ -354,12 +525,13 @@ int main(int argc, char *argv[])
.vcpu_memory_bytes = DEFAULT_PER_VCPU_MEM_SIZE,
.nr_vcpus = 1,
};
+ char *new_cg = NULL;
int page_idle_fd;
int opt;
guest_modes_append_default();
- while ((opt = getopt(argc, argv, "hm:b:v:os:")) != -1) {
+ while ((opt = getopt(argc, argv, "hm:b:v:os:w:")) != -1) {
switch (opt) {
case 'm':
guest_modes_cmdline(optarg);
@@ -376,6 +548,11 @@ int main(int argc, char *argv[])
case 's':
params.backing_src = parse_backing_src_type(optarg);
break;
+ case 'w':
+ idle_pages_warn_only =
+ atoi_non_negative("Idle pages warning",
+ optarg);
+ break;
case 'h':
default:
help(argv[0]);
@@ -383,12 +560,50 @@ int main(int argc, char *argv[])
}
}
- page_idle_fd = open("/sys/kernel/mm/page_idle/bitmap", O_RDWR);
- __TEST_REQUIRE(page_idle_fd >= 0,
- "CONFIG_IDLE_PAGE_TRACKING is not enabled");
- close(page_idle_fd);
+ if (idle_pages_warn_only == -1)
+ idle_pages_warn_only = access_tracking_unreliable();
+
+ if (lru_gen_usable()) {
+ bool cg_created = true;
+ int ret;
- for_each_guest_mode(run_test, &params);
+ puts("Using lru_gen for aging");
+ use_lru_gen = true;
+
+ if (cg_find_controller_root(cgroup_root, sizeof(cgroup_root), "memory"))
+ ksft_exit_skip("Cannot find memory cgroup controller\n");
+
+ new_cg = cg_name(cgroup_root, TEST_MEMCG_NAME);
+ printf("Creating cgroup: %s\n", new_cg);
+ if (cg_create(new_cg)) {
+ if (errno == EEXIST) {
+ printf("Found existing cgroup");
+ cg_created = false;
+ } else {
+ ksft_exit_skip("could not create new cgroup: %s\n", new_cg);
+ }
+ }
+
+ /*
+ * This will fork off a new process to run the test within
+ * a new memcg, so we need to properly propagate the return
+ * value up.
+ */
+ ret = cg_run(new_cg, &run_test_for_each_guest_mode, &params);
+ if (cg_created)
+ cg_destroy(new_cg);
+ if (ret < 0)
+ TEST_FAIL("child did not spawn or was abnormally killed");
+ if (ret)
+ return ret;
+ } else {
+ page_idle_fd = __open_path_or_exit("/sys/kernel/mm/page_idle/bitmap", O_RDWR,
+ "Is CONFIG_IDLE_PAGE_TRACKING enabled?");
+ close(page_idle_fd);
+
+ puts("Using page_idle for aging");
+ run_test_for_each_guest_mode(NULL, &params);
+ }
return 0;
}
diff --git a/tools/testing/selftests/kvm/arch_timer.c b/tools/testing/selftests/kvm/arch_timer.c
index acb2cb596332..cf8fb67104f1 100644
--- a/tools/testing/selftests/kvm/arch_timer.c
+++ b/tools/testing/selftests/kvm/arch_timer.c
@@ -98,16 +98,11 @@ static uint32_t test_get_pcpu(void)
static int test_migrate_vcpu(unsigned int vcpu_idx)
{
int ret;
- cpu_set_t cpuset;
uint32_t new_pcpu = test_get_pcpu();
- CPU_ZERO(&cpuset);
- CPU_SET(new_pcpu, &cpuset);
-
pr_debug("Migrating vCPU: %u to pCPU: %u\n", vcpu_idx, new_pcpu);
- ret = pthread_setaffinity_np(pt_vcpu_run[vcpu_idx],
- sizeof(cpuset), &cpuset);
+ ret = __pin_task_to_cpu(pt_vcpu_run[vcpu_idx], new_pcpu);
/* Allow the error where the vCPU thread is already finished */
TEST_ASSERT(ret == 0 || ret == ESRCH,
diff --git a/tools/testing/selftests/kvm/arm64/aarch32_id_regs.c b/tools/testing/selftests/kvm/arm64/aarch32_id_regs.c
index cef8f7323ceb..713005b6f508 100644
--- a/tools/testing/selftests/kvm/arm64/aarch32_id_regs.c
+++ b/tools/testing/selftests/kvm/arm64/aarch32_id_regs.c
@@ -146,7 +146,7 @@ static bool vcpu_aarch64_only(struct kvm_vcpu *vcpu)
val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1));
- el0 = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0), val);
+ el0 = FIELD_GET(ID_AA64PFR0_EL1_EL0, val);
return el0 == ID_AA64PFR0_EL1_EL0_IMP;
}
diff --git a/tools/testing/selftests/kvm/arm64/arch_timer.c b/tools/testing/selftests/kvm/arm64/arch_timer.c
index eeba1cc87ff8..d592a4515399 100644
--- a/tools/testing/selftests/kvm/arm64/arch_timer.c
+++ b/tools/testing/selftests/kvm/arm64/arch_timer.c
@@ -165,10 +165,8 @@ static void guest_code(void)
static void test_init_timer_irq(struct kvm_vm *vm)
{
/* Timer initid should be same for all the vCPUs, so query only vCPU-0 */
- vcpu_device_attr_get(vcpus[0], KVM_ARM_VCPU_TIMER_CTRL,
- KVM_ARM_VCPU_TIMER_IRQ_PTIMER, &ptimer_irq);
- vcpu_device_attr_get(vcpus[0], KVM_ARM_VCPU_TIMER_CTRL,
- KVM_ARM_VCPU_TIMER_IRQ_VTIMER, &vtimer_irq);
+ ptimer_irq = vcpu_get_ptimer_irq(vcpus[0]);
+ vtimer_irq = vcpu_get_vtimer_irq(vcpus[0]);
sync_global_to_guest(vm, ptimer_irq);
sync_global_to_guest(vm, vtimer_irq);
@@ -176,14 +174,14 @@ static void test_init_timer_irq(struct kvm_vm *vm)
pr_debug("ptimer_irq: %d; vtimer_irq: %d\n", ptimer_irq, vtimer_irq);
}
-static int gic_fd;
-
struct kvm_vm *test_vm_create(void)
{
struct kvm_vm *vm;
unsigned int i;
int nr_vcpus = test_args.nr_vcpus;
+ TEST_REQUIRE(kvm_supports_vgic_v3());
+
vm = vm_create_with_vcpus(nr_vcpus, guest_code, vcpus);
vm_init_descriptor_tables(vm);
@@ -204,8 +202,6 @@ struct kvm_vm *test_vm_create(void)
vcpu_init_descriptor_tables(vcpus[i]);
test_init_timer_irq(vm);
- gic_fd = vgic_v3_setup(vm, nr_vcpus, 64);
- __TEST_REQUIRE(gic_fd >= 0, "Failed to create vgic-v3");
/* Make all the test's cmdline args visible to the guest */
sync_global_to_guest(vm, test_args);
@@ -215,6 +211,5 @@ struct kvm_vm *test_vm_create(void)
void test_vm_cleanup(struct kvm_vm *vm)
{
- close(gic_fd);
kvm_vm_free(vm);
}
diff --git a/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c
index a36a7e2db434..993c9e38e729 100644
--- a/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c
+++ b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c
@@ -22,7 +22,8 @@
#include "gic.h"
#include "vgic.h"
-static const uint64_t CVAL_MAX = ~0ULL;
+/* Depends on counter width. */
+static uint64_t CVAL_MAX;
/* tval is a signed 32-bit int. */
static const int32_t TVAL_MAX = INT32_MAX;
static const int32_t TVAL_MIN = INT32_MIN;
@@ -30,8 +31,8 @@ static const int32_t TVAL_MIN = INT32_MIN;
/* After how much time we say there is no IRQ. */
static const uint32_t TIMEOUT_NO_IRQ_US = 50000;
-/* A nice counter value to use as the starting one for most tests. */
-static const uint64_t DEF_CNT = (CVAL_MAX / 2);
+/* Counter value to use as the starting one for most tests. Set to CVAL_MAX/2 */
+static uint64_t DEF_CNT;
/* Number of runs. */
static const uint32_t NR_TEST_ITERS_DEF = 5;
@@ -191,8 +192,8 @@ static void set_tval_irq(enum arch_timer timer, uint64_t tval_cycles,
{
atomic_set(&shared_data.handled, 0);
atomic_set(&shared_data.spurious, 0);
- timer_set_ctl(timer, ctl);
timer_set_tval(timer, tval_cycles);
+ timer_set_ctl(timer, ctl);
}
static void set_xval_irq(enum arch_timer timer, uint64_t xval, uint32_t ctl,
@@ -732,12 +733,6 @@ static void test_move_counters_ahead_of_timers(enum arch_timer timer)
test_set_cnt_after_tval(timer, 0, tval, (uint64_t) tval + 1,
wm);
}
-
- for (i = 0; i < ARRAY_SIZE(sleep_method); i++) {
- sleep_method_t sm = sleep_method[i];
-
- test_set_cnt_after_cval_no_irq(timer, 0, DEF_CNT, CVAL_MAX, sm);
- }
}
/*
@@ -849,17 +844,17 @@ static void guest_code(enum arch_timer timer)
GUEST_DONE();
}
+static cpu_set_t default_cpuset;
+
static uint32_t next_pcpu(void)
{
uint32_t max = get_nprocs();
uint32_t cur = sched_getcpu();
uint32_t next = cur;
- cpu_set_t cpuset;
+ cpu_set_t cpuset = default_cpuset;
TEST_ASSERT(max > 1, "Need at least two physical cpus");
- sched_getaffinity(0, sizeof(cpuset), &cpuset);
-
do {
next = (next + 1) % CPU_SETSIZE;
} while (!CPU_ISSET(next, &cpuset));
@@ -867,25 +862,6 @@ static uint32_t next_pcpu(void)
return next;
}
-static void migrate_self(uint32_t new_pcpu)
-{
- int ret;
- cpu_set_t cpuset;
- pthread_t thread;
-
- thread = pthread_self();
-
- CPU_ZERO(&cpuset);
- CPU_SET(new_pcpu, &cpuset);
-
- pr_debug("Migrating from %u to %u\n", sched_getcpu(), new_pcpu);
-
- ret = pthread_setaffinity_np(thread, sizeof(cpuset), &cpuset);
-
- TEST_ASSERT(ret == 0, "Failed to migrate to pCPU: %u; ret: %d\n",
- new_pcpu, ret);
-}
-
static void kvm_set_cntxct(struct kvm_vcpu *vcpu, uint64_t cnt,
enum arch_timer timer)
{
@@ -912,7 +888,7 @@ static void handle_sync(struct kvm_vcpu *vcpu, struct ucall *uc)
sched_yield();
break;
case USERSPACE_MIGRATE_SELF:
- migrate_self(next_pcpu());
+ pin_self_to_cpu(next_pcpu());
break;
default:
break;
@@ -924,7 +900,7 @@ static void test_run(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
struct ucall uc;
/* Start on CPU 0 */
- migrate_self(0);
+ pin_self_to_cpu(0);
while (true) {
vcpu_run(vcpu);
@@ -948,10 +924,8 @@ static void test_run(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
static void test_init_timer_irq(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
{
- vcpu_device_attr_get(vcpu, KVM_ARM_VCPU_TIMER_CTRL,
- KVM_ARM_VCPU_TIMER_IRQ_PTIMER, &ptimer_irq);
- vcpu_device_attr_get(vcpu, KVM_ARM_VCPU_TIMER_CTRL,
- KVM_ARM_VCPU_TIMER_IRQ_VTIMER, &vtimer_irq);
+ ptimer_irq = vcpu_get_ptimer_irq(vcpu);
+ vtimer_irq = vcpu_get_vtimer_irq(vcpu);
sync_global_to_guest(vm, ptimer_irq);
sync_global_to_guest(vm, vtimer_irq);
@@ -973,8 +947,15 @@ static void test_vm_create(struct kvm_vm **vm, struct kvm_vcpu **vcpu,
vcpu_args_set(*vcpu, 1, timer);
test_init_timer_irq(*vm, *vcpu);
- vgic_v3_setup(*vm, 1, 64);
+
sync_global_to_guest(*vm, test_args);
+ sync_global_to_guest(*vm, CVAL_MAX);
+ sync_global_to_guest(*vm, DEF_CNT);
+}
+
+static void test_vm_cleanup(struct kvm_vm *vm)
+{
+ kvm_vm_free(vm);
}
static void test_print_help(char *name)
@@ -986,7 +967,7 @@ static void test_print_help(char *name)
pr_info("\t-b: Test both physical and virtual timers (default: true)\n");
pr_info("\t-l: Delta (in ms) used for long wait time test (default: %u)\n",
LONG_WAIT_TEST_MS);
- pr_info("\t-l: Delta (in ms) used for wait times (default: %u)\n",
+ pr_info("\t-w: Delta (in ms) used for wait times (default: %u)\n",
WAIT_TEST_MS);
pr_info("\t-p: Test physical timer (default: true)\n");
pr_info("\t-v: Test virtual timer (default: true)\n");
@@ -1035,6 +1016,17 @@ static bool parse_args(int argc, char *argv[])
return false;
}
+static void set_counter_defaults(void)
+{
+ const uint64_t MIN_ROLLOVER_SECS = 40ULL * 365 * 24 * 3600;
+ uint64_t freq = read_sysreg(CNTFRQ_EL0);
+ int width = ilog2(MIN_ROLLOVER_SECS * freq);
+
+ width = clamp(width, 56, 64);
+ CVAL_MAX = GENMASK_ULL(width - 1, 0);
+ DEF_CNT = CVAL_MAX / 2;
+}
+
int main(int argc, char *argv[])
{
struct kvm_vcpu *vcpu;
@@ -1043,19 +1035,24 @@ int main(int argc, char *argv[])
/* Tell stdout not to buffer its content */
setbuf(stdout, NULL);
+ TEST_REQUIRE(kvm_supports_vgic_v3());
+
if (!parse_args(argc, argv))
exit(KSFT_SKIP);
+ sched_getaffinity(0, sizeof(default_cpuset), &default_cpuset);
+ set_counter_defaults();
+
if (test_args.test_virtual) {
test_vm_create(&vm, &vcpu, VIRTUAL);
test_run(vm, vcpu);
- kvm_vm_free(vm);
+ test_vm_cleanup(vm);
}
if (test_args.test_physical) {
test_vm_create(&vm, &vcpu, PHYSICAL);
test_run(vm, vcpu);
- kvm_vm_free(vm);
+ test_vm_cleanup(vm);
}
return 0;
diff --git a/tools/testing/selftests/kvm/arm64/at.c b/tools/testing/selftests/kvm/arm64/at.c
new file mode 100644
index 000000000000..ce5d312ef6ba
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/at.c
@@ -0,0 +1,156 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * at - Test for KVM's AT emulation in the EL2&0 and EL1&0 translation regimes.
+ */
+#include "kvm_util.h"
+#include "processor.h"
+#include "test_util.h"
+#include "ucall.h"
+
+#include <asm/sysreg.h>
+
+#define TEST_ADDR 0x80000000
+
+enum {
+ CLEAR_ACCESS_FLAG,
+};
+
+static u64 *ptep_hva;
+
+#define copy_el2_to_el1(reg) \
+ write_sysreg_s(read_sysreg_s(SYS_##reg##_EL1), SYS_##reg##_EL12)
+
+/* Yes, this is an ugly hack */
+#define __at(op, addr) write_sysreg_s(addr, op)
+
+#define test_at_insn(op, expect_fault) \
+do { \
+ u64 par, fsc; \
+ bool fault; \
+ \
+ GUEST_SYNC(CLEAR_ACCESS_FLAG); \
+ \
+ __at(OP_AT_##op, TEST_ADDR); \
+ isb(); \
+ par = read_sysreg(par_el1); \
+ \
+ fault = par & SYS_PAR_EL1_F; \
+ fsc = FIELD_GET(SYS_PAR_EL1_FST, par); \
+ \
+ __GUEST_ASSERT((expect_fault) == fault, \
+ "AT "#op": %sexpected fault (par: %lx)1", \
+ (expect_fault) ? "" : "un", par); \
+ if ((expect_fault)) { \
+ __GUEST_ASSERT(fsc == ESR_ELx_FSC_ACCESS_L(3), \
+ "AT "#op": expected access flag fault (par: %lx)", \
+ par); \
+ } else { \
+ GUEST_ASSERT_EQ(FIELD_GET(SYS_PAR_EL1_ATTR, par), MAIR_ATTR_NORMAL); \
+ GUEST_ASSERT_EQ(FIELD_GET(SYS_PAR_EL1_SH, par), PTE_SHARED >> 8); \
+ GUEST_ASSERT_EQ(par & SYS_PAR_EL1_PA, TEST_ADDR); \
+ } \
+} while (0)
+
+static void test_at(bool expect_fault)
+{
+ test_at_insn(S1E2R, expect_fault);
+ test_at_insn(S1E2W, expect_fault);
+
+ /* Reuse the stage-1 MMU context from EL2 at EL1 */
+ copy_el2_to_el1(SCTLR);
+ copy_el2_to_el1(MAIR);
+ copy_el2_to_el1(TCR);
+ copy_el2_to_el1(TTBR0);
+ copy_el2_to_el1(TTBR1);
+
+ /* Disable stage-2 translation and enter a non-host context */
+ write_sysreg(0, vtcr_el2);
+ write_sysreg(0, vttbr_el2);
+ sysreg_clear_set(hcr_el2, HCR_EL2_TGE | HCR_EL2_VM, 0);
+ isb();
+
+ test_at_insn(S1E1R, expect_fault);
+ test_at_insn(S1E1W, expect_fault);
+}
+
+static void guest_code(void)
+{
+ sysreg_clear_set(tcr_el1, TCR_HA, 0);
+ isb();
+
+ test_at(true);
+
+ if (!SYS_FIELD_GET(ID_AA64MMFR1_EL1, HAFDBS, read_sysreg(id_aa64mmfr1_el1)))
+ GUEST_DONE();
+
+ sysreg_clear_set(tcr_el1, 0, TCR_HA);
+ isb();
+ test_at(false);
+
+ GUEST_DONE();
+}
+
+static void handle_sync(struct kvm_vcpu *vcpu, struct ucall *uc)
+{
+ switch (uc->args[1]) {
+ case CLEAR_ACCESS_FLAG:
+ /*
+ * Delete + reinstall the memslot to invalidate stage-2
+ * mappings of the stage-1 page tables, allowing KVM to
+ * potentially use the 'slow' AT emulation path.
+ *
+ * This and clearing the access flag from host userspace
+ * ensures that the access flag cannot be set speculatively
+ * and is reliably cleared at the time of the AT instruction.
+ */
+ clear_bit(__ffs(PTE_AF), ptep_hva);
+ vm_mem_region_reload(vcpu->vm, vcpu->vm->memslots[MEM_REGION_PT]);
+ break;
+ default:
+ TEST_FAIL("Unexpected SYNC arg: %lu", uc->args[1]);
+ }
+}
+
+static void run_test(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ while (true) {
+ vcpu_run(vcpu);
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_DONE:
+ return;
+ case UCALL_SYNC:
+ handle_sync(vcpu, &uc);
+ continue;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ return;
+ default:
+ TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
+ }
+ }
+}
+
+int main(void)
+{
+ struct kvm_vcpu_init init;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ TEST_REQUIRE(kvm_check_cap(KVM_CAP_ARM_EL2));
+
+ vm = vm_create(1);
+
+ kvm_get_default_vcpu_target(vm, &init);
+ init.features[0] |= BIT(KVM_ARM_VCPU_HAS_EL2);
+ vcpu = aarch64_vcpu_add(vm, 0, &init, guest_code);
+ kvm_arch_vm_finalize_vcpus(vm);
+
+ virt_map(vm, TEST_ADDR, TEST_ADDR, 1);
+ ptep_hva = virt_get_pte_hva_at_level(vm, TEST_ADDR, 3);
+ run_test(vcpu);
+
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/arm64/debug-exceptions.c b/tools/testing/selftests/kvm/arm64/debug-exceptions.c
index c7fb55c9135b..1d431de8729c 100644
--- a/tools/testing/selftests/kvm/arm64/debug-exceptions.c
+++ b/tools/testing/selftests/kvm/arm64/debug-exceptions.c
@@ -116,12 +116,12 @@ static void reset_debug_state(void)
/* Reset all bcr/bvr/wcr/wvr registers */
dfr0 = read_sysreg(id_aa64dfr0_el1);
- brps = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_BRPs), dfr0);
+ brps = FIELD_GET(ID_AA64DFR0_EL1_BRPs, dfr0);
for (i = 0; i <= brps; i++) {
write_dbgbcr(i, 0);
write_dbgbvr(i, 0);
}
- wrps = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_WRPs), dfr0);
+ wrps = FIELD_GET(ID_AA64DFR0_EL1_WRPs, dfr0);
for (i = 0; i <= wrps; i++) {
write_dbgwcr(i, 0);
write_dbgwvr(i, 0);
@@ -140,7 +140,7 @@ static void enable_os_lock(void)
static void enable_monitor_debug_exceptions(void)
{
- uint32_t mdscr;
+ uint64_t mdscr;
asm volatile("msr daifclr, #8");
@@ -223,7 +223,7 @@ void install_hw_bp_ctx(uint8_t addr_bp, uint8_t ctx_bp, uint64_t addr,
static void install_ss(void)
{
- uint32_t mdscr;
+ uint64_t mdscr;
asm volatile("msr daifclr, #8");
@@ -418,7 +418,7 @@ static void guest_code_ss(int test_cnt)
static int debug_version(uint64_t id_aa64dfr0)
{
- return FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DebugVer), id_aa64dfr0);
+ return FIELD_GET(ID_AA64DFR0_EL1_DebugVer, id_aa64dfr0);
}
static void test_guest_debug_exceptions(uint8_t bpn, uint8_t wpn, uint8_t ctx_bpn)
@@ -539,14 +539,14 @@ void test_guest_debug_exceptions_all(uint64_t aa64dfr0)
int b, w, c;
/* Number of breakpoints */
- brp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_BRPs), aa64dfr0) + 1;
+ brp_num = FIELD_GET(ID_AA64DFR0_EL1_BRPs, aa64dfr0) + 1;
__TEST_REQUIRE(brp_num >= 2, "At least two breakpoints are required");
/* Number of watchpoints */
- wrp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_WRPs), aa64dfr0) + 1;
+ wrp_num = FIELD_GET(ID_AA64DFR0_EL1_WRPs, aa64dfr0) + 1;
/* Number of context aware breakpoints */
- ctx_brp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_CTX_CMPs), aa64dfr0) + 1;
+ ctx_brp_num = FIELD_GET(ID_AA64DFR0_EL1_CTX_CMPs, aa64dfr0) + 1;
pr_debug("%s brp_num:%d, wrp_num:%d, ctx_brp_num:%d\n", __func__,
brp_num, wrp_num, ctx_brp_num);
diff --git a/tools/testing/selftests/kvm/arm64/external_aborts.c b/tools/testing/selftests/kvm/arm64/external_aborts.c
new file mode 100644
index 000000000000..d8fe17a6cc59
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/external_aborts.c
@@ -0,0 +1,415 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * external_abort - Tests for userspace external abort injection
+ *
+ * Copyright (c) 2024 Google LLC
+ */
+#include "processor.h"
+#include "test_util.h"
+
+#define MMIO_ADDR 0x8000000ULL
+#define EXPECTED_SERROR_ISS (ESR_ELx_ISV | 0x1d1ed)
+
+static u64 expected_abort_pc;
+
+static void expect_sea_handler(struct ex_regs *regs)
+{
+ u64 esr = read_sysreg(esr_el1);
+
+ GUEST_ASSERT_EQ(regs->pc, expected_abort_pc);
+ GUEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_DABT_CUR);
+ GUEST_ASSERT_EQ(esr & ESR_ELx_FSC_TYPE, ESR_ELx_FSC_EXTABT);
+
+ GUEST_DONE();
+}
+
+static void unexpected_dabt_handler(struct ex_regs *regs)
+{
+ GUEST_FAIL("Unexpected data abort at PC: %lx\n", regs->pc);
+}
+
+static struct kvm_vm *vm_create_with_dabt_handler(struct kvm_vcpu **vcpu, void *guest_code,
+ handler_fn dabt_handler)
+{
+ struct kvm_vm *vm = vm_create_with_one_vcpu(vcpu, guest_code);
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(*vcpu);
+ vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, ESR_ELx_EC_DABT_CUR, dabt_handler);
+
+ virt_map(vm, MMIO_ADDR, MMIO_ADDR, 1);
+
+ return vm;
+}
+
+static void vcpu_inject_sea(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vcpu_events events = {};
+
+ events.exception.ext_dabt_pending = true;
+ vcpu_events_set(vcpu, &events);
+}
+
+static bool vcpu_has_ras(struct kvm_vcpu *vcpu)
+{
+ u64 pfr0 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1));
+
+ return SYS_FIELD_GET(ID_AA64PFR0_EL1, RAS, pfr0);
+}
+
+static bool guest_has_ras(void)
+{
+ return SYS_FIELD_GET(ID_AA64PFR0_EL1, RAS, read_sysreg(id_aa64pfr0_el1));
+}
+
+static void vcpu_inject_serror(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vcpu_events events = {};
+
+ events.exception.serror_pending = true;
+ if (vcpu_has_ras(vcpu)) {
+ events.exception.serror_has_esr = true;
+ events.exception.serror_esr = EXPECTED_SERROR_ISS;
+ }
+
+ vcpu_events_set(vcpu, &events);
+}
+
+static void __vcpu_run_expect(struct kvm_vcpu *vcpu, unsigned int cmd)
+{
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ default:
+ if (uc.cmd == cmd)
+ return;
+
+ TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
+ }
+}
+
+static void vcpu_run_expect_done(struct kvm_vcpu *vcpu)
+{
+ __vcpu_run_expect(vcpu, UCALL_DONE);
+}
+
+static void vcpu_run_expect_sync(struct kvm_vcpu *vcpu)
+{
+ __vcpu_run_expect(vcpu, UCALL_SYNC);
+}
+
+extern char test_mmio_abort_insn;
+
+static noinline void test_mmio_abort_guest(void)
+{
+ WRITE_ONCE(expected_abort_pc, (u64)&test_mmio_abort_insn);
+
+ asm volatile("test_mmio_abort_insn:\n\t"
+ "ldr x0, [%0]\n\t"
+ : : "r" (MMIO_ADDR) : "x0", "memory");
+
+ GUEST_FAIL("MMIO instruction should not retire");
+}
+
+/*
+ * Test that KVM doesn't complete MMIO emulation when userspace has made an
+ * external abort pending for the instruction.
+ */
+static void test_mmio_abort(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_abort_guest,
+ expect_sea_handler);
+ struct kvm_run *run = vcpu->run;
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_MMIO);
+ TEST_ASSERT_EQ(run->mmio.phys_addr, MMIO_ADDR);
+ TEST_ASSERT_EQ(run->mmio.len, sizeof(unsigned long));
+ TEST_ASSERT(!run->mmio.is_write, "Expected MMIO read");
+
+ vcpu_inject_sea(vcpu);
+ vcpu_run_expect_done(vcpu);
+ kvm_vm_free(vm);
+}
+
+extern char test_mmio_nisv_insn;
+
+static void test_mmio_nisv_guest(void)
+{
+ WRITE_ONCE(expected_abort_pc, (u64)&test_mmio_nisv_insn);
+
+ asm volatile("test_mmio_nisv_insn:\n\t"
+ "ldr x0, [%0], #8\n\t"
+ : : "r" (MMIO_ADDR) : "x0", "memory");
+
+ GUEST_FAIL("MMIO instruction should not retire");
+}
+
+/*
+ * Test that the KVM_RUN ioctl fails for ESR_EL2.ISV=0 MMIO aborts if userspace
+ * hasn't enabled KVM_CAP_ARM_NISV_TO_USER.
+ */
+static void test_mmio_nisv(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_nisv_guest,
+ unexpected_dabt_handler);
+
+ TEST_ASSERT(_vcpu_run(vcpu), "Expected nonzero return code from KVM_RUN");
+ TEST_ASSERT_EQ(errno, ENOSYS);
+
+ kvm_vm_free(vm);
+}
+
+/*
+ * Test that ESR_EL2.ISV=0 MMIO aborts reach userspace and that an injected SEA
+ * reaches the guest.
+ */
+static void test_mmio_nisv_abort(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_nisv_guest,
+ expect_sea_handler);
+ struct kvm_run *run = vcpu->run;
+
+ vm_enable_cap(vm, KVM_CAP_ARM_NISV_TO_USER, 1);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_ARM_NISV);
+ TEST_ASSERT_EQ(run->arm_nisv.fault_ipa, MMIO_ADDR);
+
+ vcpu_inject_sea(vcpu);
+ vcpu_run_expect_done(vcpu);
+ kvm_vm_free(vm);
+}
+
+static void unexpected_serror_handler(struct ex_regs *regs)
+{
+ GUEST_FAIL("Took unexpected SError exception");
+}
+
+static void test_serror_masked_guest(void)
+{
+ GUEST_ASSERT(read_sysreg(isr_el1) & ISR_EL1_A);
+
+ isb();
+
+ GUEST_DONE();
+}
+
+static void test_serror_masked(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_serror_masked_guest,
+ unexpected_dabt_handler);
+
+ vm_install_exception_handler(vm, VECTOR_ERROR_CURRENT, unexpected_serror_handler);
+
+ vcpu_inject_serror(vcpu);
+ vcpu_run_expect_done(vcpu);
+ kvm_vm_free(vm);
+}
+
+static void expect_serror_handler(struct ex_regs *regs)
+{
+ u64 esr = read_sysreg(esr_el1);
+
+ GUEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_SERROR);
+ if (guest_has_ras())
+ GUEST_ASSERT_EQ(ESR_ELx_ISS(esr), EXPECTED_SERROR_ISS);
+
+ GUEST_DONE();
+}
+
+static void test_serror_guest(void)
+{
+ GUEST_ASSERT(read_sysreg(isr_el1) & ISR_EL1_A);
+
+ local_serror_enable();
+ isb();
+ local_serror_disable();
+
+ GUEST_FAIL("Should've taken pending SError exception");
+}
+
+static void test_serror(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_serror_guest,
+ unexpected_dabt_handler);
+
+ vm_install_exception_handler(vm, VECTOR_ERROR_CURRENT, expect_serror_handler);
+
+ vcpu_inject_serror(vcpu);
+ vcpu_run_expect_done(vcpu);
+ kvm_vm_free(vm);
+}
+
+static void expect_sea_s1ptw_handler(struct ex_regs *regs)
+{
+ u64 esr = read_sysreg(esr_el1);
+
+ GUEST_ASSERT_EQ(regs->pc, expected_abort_pc);
+ GUEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_DABT_CUR);
+ GUEST_ASSERT_EQ((esr & ESR_ELx_FSC), ESR_ELx_FSC_SEA_TTW(3));
+
+ GUEST_DONE();
+}
+
+static noinline void test_s1ptw_abort_guest(void)
+{
+ extern char test_s1ptw_abort_insn;
+
+ WRITE_ONCE(expected_abort_pc, (u64)&test_s1ptw_abort_insn);
+
+ asm volatile("test_s1ptw_abort_insn:\n\t"
+ "ldr x0, [%0]\n\t"
+ : : "r" (MMIO_ADDR) : "x0", "memory");
+
+ GUEST_FAIL("Load on S1PTW abort should not retire");
+}
+
+static void test_s1ptw_abort(void)
+{
+ struct kvm_vcpu *vcpu;
+ u64 *ptep, bad_pa;
+ struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_s1ptw_abort_guest,
+ expect_sea_s1ptw_handler);
+
+ ptep = virt_get_pte_hva_at_level(vm, MMIO_ADDR, 2);
+ bad_pa = BIT(vm->pa_bits) - vm->page_size;
+
+ *ptep &= ~GENMASK(47, 12);
+ *ptep |= bad_pa;
+
+ vcpu_run_expect_done(vcpu);
+ kvm_vm_free(vm);
+}
+
+static void test_serror_emulated_guest(void)
+{
+ GUEST_ASSERT(!(read_sysreg(isr_el1) & ISR_EL1_A));
+
+ local_serror_enable();
+ GUEST_SYNC(0);
+ local_serror_disable();
+
+ GUEST_FAIL("Should've taken unmasked SError exception");
+}
+
+static void test_serror_emulated(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_serror_emulated_guest,
+ unexpected_dabt_handler);
+
+ vm_install_exception_handler(vm, VECTOR_ERROR_CURRENT, expect_serror_handler);
+
+ vcpu_run_expect_sync(vcpu);
+ vcpu_inject_serror(vcpu);
+ vcpu_run_expect_done(vcpu);
+ kvm_vm_free(vm);
+}
+
+static void test_mmio_ease_guest(void)
+{
+ sysreg_clear_set_s(SYS_SCTLR2_EL1, 0, SCTLR2_EL1_EASE);
+ isb();
+
+ test_mmio_abort_guest();
+}
+
+/*
+ * Test that KVM doesn't complete MMIO emulation when userspace has made an
+ * external abort pending for the instruction.
+ */
+static void test_mmio_ease(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_ease_guest,
+ unexpected_dabt_handler);
+ struct kvm_run *run = vcpu->run;
+ u64 pfr1;
+
+ pfr1 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1));
+ if (!SYS_FIELD_GET(ID_AA64PFR1_EL1, DF2, pfr1)) {
+ pr_debug("Skipping %s\n", __func__);
+ return;
+ }
+
+ /*
+ * SCTLR2_ELx.EASE changes the exception vector to the SError vector but
+ * doesn't further modify the exception context (e.g. ESR_ELx, FAR_ELx).
+ */
+ vm_install_exception_handler(vm, VECTOR_ERROR_CURRENT, expect_sea_handler);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_MMIO);
+ TEST_ASSERT_EQ(run->mmio.phys_addr, MMIO_ADDR);
+ TEST_ASSERT_EQ(run->mmio.len, sizeof(unsigned long));
+ TEST_ASSERT(!run->mmio.is_write, "Expected MMIO read");
+
+ vcpu_inject_sea(vcpu);
+ vcpu_run_expect_done(vcpu);
+ kvm_vm_free(vm);
+}
+
+static void test_serror_amo_guest(void)
+{
+ /*
+ * The ISB is entirely unnecessary (and highlights how FEAT_NV2 is borked)
+ * since the write is redirected to memory. But don't write (intentionally)
+ * broken code!
+ */
+ sysreg_clear_set(hcr_el2, HCR_EL2_AMO | HCR_EL2_TGE, 0);
+ isb();
+
+ GUEST_SYNC(0);
+ GUEST_ASSERT(read_sysreg(isr_el1) & ISR_EL1_A);
+
+ /*
+ * KVM treats the effective value of AMO as 1 when
+ * HCR_EL2.{E2H,TGE} = {1, 0}, meaning the SError will be taken when
+ * unmasked.
+ */
+ local_serror_enable();
+ isb();
+ local_serror_disable();
+
+ GUEST_FAIL("Should've taken pending SError exception");
+}
+
+static void test_serror_amo(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_serror_amo_guest,
+ unexpected_dabt_handler);
+
+ vm_install_exception_handler(vm, VECTOR_ERROR_CURRENT, expect_serror_handler);
+ vcpu_run_expect_sync(vcpu);
+ vcpu_inject_serror(vcpu);
+ vcpu_run_expect_done(vcpu);
+ kvm_vm_free(vm);
+}
+
+int main(void)
+{
+ test_mmio_abort();
+ test_mmio_nisv();
+ test_mmio_nisv_abort();
+ test_serror();
+ test_serror_masked();
+ test_serror_emulated();
+ test_mmio_ease();
+ test_s1ptw_abort();
+
+ if (!test_supports_el2())
+ return 0;
+
+ test_serror_amo();
+}
diff --git a/tools/testing/selftests/kvm/arm64/get-reg-list.c b/tools/testing/selftests/kvm/arm64/get-reg-list.c
index d01798b6b3b4..0a3a94c4cca1 100644
--- a/tools/testing/selftests/kvm/arm64/get-reg-list.c
+++ b/tools/testing/selftests/kvm/arm64/get-reg-list.c
@@ -15,6 +15,12 @@
#include "test_util.h"
#include "processor.h"
+#define SYS_REG(r) ARM64_SYS_REG(sys_reg_Op0(SYS_ ## r), \
+ sys_reg_Op1(SYS_ ## r), \
+ sys_reg_CRn(SYS_ ## r), \
+ sys_reg_CRm(SYS_ ## r), \
+ sys_reg_Op2(SYS_ ## r))
+
struct feature_id_reg {
__u64 reg;
__u64 id_reg;
@@ -22,37 +28,48 @@ struct feature_id_reg {
__u64 feat_min;
};
-static struct feature_id_reg feat_id_regs[] = {
- {
- ARM64_SYS_REG(3, 0, 2, 0, 3), /* TCR2_EL1 */
- ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */
- 0,
- 1
- },
- {
- ARM64_SYS_REG(3, 0, 10, 2, 2), /* PIRE0_EL1 */
- ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */
- 8,
- 1
- },
- {
- ARM64_SYS_REG(3, 0, 10, 2, 3), /* PIR_EL1 */
- ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */
- 8,
- 1
- },
- {
- ARM64_SYS_REG(3, 0, 10, 2, 4), /* POR_EL1 */
- ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */
- 16,
- 1
- },
- {
- ARM64_SYS_REG(3, 3, 10, 2, 4), /* POR_EL0 */
- ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */
- 16,
- 1
+#define FEAT(id, f, v) \
+ .id_reg = SYS_REG(id), \
+ .feat_shift = id ## _ ## f ## _SHIFT, \
+ .feat_min = id ## _ ## f ## _ ## v
+
+#define REG_FEAT(r, id, f, v) \
+ { \
+ .reg = SYS_REG(r), \
+ FEAT(id, f, v) \
}
+
+static struct feature_id_reg feat_id_regs[] = {
+ REG_FEAT(TCR2_EL1, ID_AA64MMFR3_EL1, TCRX, IMP),
+ REG_FEAT(TCR2_EL2, ID_AA64MMFR3_EL1, TCRX, IMP),
+ REG_FEAT(PIRE0_EL1, ID_AA64MMFR3_EL1, S1PIE, IMP),
+ REG_FEAT(PIRE0_EL2, ID_AA64MMFR3_EL1, S1PIE, IMP),
+ REG_FEAT(PIR_EL1, ID_AA64MMFR3_EL1, S1PIE, IMP),
+ REG_FEAT(PIR_EL2, ID_AA64MMFR3_EL1, S1PIE, IMP),
+ REG_FEAT(POR_EL1, ID_AA64MMFR3_EL1, S1POE, IMP),
+ REG_FEAT(POR_EL0, ID_AA64MMFR3_EL1, S1POE, IMP),
+ REG_FEAT(POR_EL2, ID_AA64MMFR3_EL1, S1POE, IMP),
+ REG_FEAT(HCRX_EL2, ID_AA64MMFR1_EL1, HCX, IMP),
+ REG_FEAT(HFGRTR_EL2, ID_AA64MMFR0_EL1, FGT, IMP),
+ REG_FEAT(HFGWTR_EL2, ID_AA64MMFR0_EL1, FGT, IMP),
+ REG_FEAT(HFGITR_EL2, ID_AA64MMFR0_EL1, FGT, IMP),
+ REG_FEAT(HDFGRTR_EL2, ID_AA64MMFR0_EL1, FGT, IMP),
+ REG_FEAT(HDFGWTR_EL2, ID_AA64MMFR0_EL1, FGT, IMP),
+ REG_FEAT(HAFGRTR_EL2, ID_AA64MMFR0_EL1, FGT, IMP),
+ REG_FEAT(HFGRTR2_EL2, ID_AA64MMFR0_EL1, FGT, FGT2),
+ REG_FEAT(HFGWTR2_EL2, ID_AA64MMFR0_EL1, FGT, FGT2),
+ REG_FEAT(HFGITR2_EL2, ID_AA64MMFR0_EL1, FGT, FGT2),
+ REG_FEAT(HDFGRTR2_EL2, ID_AA64MMFR0_EL1, FGT, FGT2),
+ REG_FEAT(HDFGWTR2_EL2, ID_AA64MMFR0_EL1, FGT, FGT2),
+ REG_FEAT(ZCR_EL2, ID_AA64PFR0_EL1, SVE, IMP),
+ REG_FEAT(SCTLR2_EL1, ID_AA64MMFR3_EL1, SCTLRX, IMP),
+ REG_FEAT(SCTLR2_EL2, ID_AA64MMFR3_EL1, SCTLRX, IMP),
+ REG_FEAT(VDISR_EL2, ID_AA64PFR0_EL1, RAS, IMP),
+ REG_FEAT(VSESR_EL2, ID_AA64PFR0_EL1, RAS, IMP),
+ REG_FEAT(VNCR_EL2, ID_AA64MMFR4_EL1, NV_frac, NV2_ONLY),
+ REG_FEAT(CNTHV_CTL_EL2, ID_AA64MMFR1_EL1, VH, IMP),
+ REG_FEAT(CNTHV_CVAL_EL2,ID_AA64MMFR1_EL1, VH, IMP),
+ REG_FEAT(ZCR_EL2, ID_AA64PFR0_EL1, SVE, IMP),
};
bool filter_reg(__u64 reg)
@@ -333,9 +350,20 @@ static __u64 base_regs[] = {
KVM_REG_ARM_FW_FEAT_BMAP_REG(1), /* KVM_REG_ARM_STD_HYP_BMAP */
KVM_REG_ARM_FW_FEAT_BMAP_REG(2), /* KVM_REG_ARM_VENDOR_HYP_BMAP */
KVM_REG_ARM_FW_FEAT_BMAP_REG(3), /* KVM_REG_ARM_VENDOR_HYP_BMAP_2 */
- ARM64_SYS_REG(3, 3, 14, 3, 1), /* CNTV_CTL_EL0 */
- ARM64_SYS_REG(3, 3, 14, 3, 2), /* CNTV_CVAL_EL0 */
- ARM64_SYS_REG(3, 3, 14, 0, 2),
+
+ /*
+ * EL0 Virtual Timer Registers
+ *
+ * WARNING:
+ * KVM_REG_ARM_TIMER_CVAL and KVM_REG_ARM_TIMER_CNT are not defined
+ * with the appropriate register encodings. Their values have been
+ * accidentally swapped. As this is set API, the definitions here
+ * must be used, rather than ones derived from the encodings.
+ */
+ KVM_ARM64_SYS_REG(SYS_CNTV_CTL_EL0),
+ KVM_REG_ARM_TIMER_CVAL,
+ KVM_REG_ARM_TIMER_CNT,
+
ARM64_SYS_REG(3, 0, 0, 0, 0), /* MIDR_EL1 */
ARM64_SYS_REG(3, 0, 0, 0, 6), /* REVIDR_EL1 */
ARM64_SYS_REG(3, 1, 0, 0, 1), /* CLIDR_EL1 */
@@ -469,6 +497,7 @@ static __u64 base_regs[] = {
ARM64_SYS_REG(3, 0, 1, 0, 0), /* SCTLR_EL1 */
ARM64_SYS_REG(3, 0, 1, 0, 1), /* ACTLR_EL1 */
ARM64_SYS_REG(3, 0, 1, 0, 2), /* CPACR_EL1 */
+ KVM_ARM64_SYS_REG(SYS_SCTLR2_EL1),
ARM64_SYS_REG(3, 0, 2, 0, 0), /* TTBR0_EL1 */
ARM64_SYS_REG(3, 0, 2, 0, 1), /* TTBR1_EL1 */
ARM64_SYS_REG(3, 0, 2, 0, 2), /* TCR_EL1 */
@@ -686,6 +715,67 @@ static __u64 pauth_generic_regs[] = {
ARM64_SYS_REG(3, 0, 2, 3, 1), /* APGAKEYHI_EL1 */
};
+static __u64 el2_regs[] = {
+ SYS_REG(VPIDR_EL2),
+ SYS_REG(VMPIDR_EL2),
+ SYS_REG(SCTLR_EL2),
+ SYS_REG(ACTLR_EL2),
+ SYS_REG(SCTLR2_EL2),
+ SYS_REG(HCR_EL2),
+ SYS_REG(MDCR_EL2),
+ SYS_REG(CPTR_EL2),
+ SYS_REG(HSTR_EL2),
+ SYS_REG(HFGRTR_EL2),
+ SYS_REG(HFGWTR_EL2),
+ SYS_REG(HFGITR_EL2),
+ SYS_REG(HACR_EL2),
+ SYS_REG(ZCR_EL2),
+ SYS_REG(HCRX_EL2),
+ SYS_REG(TTBR0_EL2),
+ SYS_REG(TTBR1_EL2),
+ SYS_REG(TCR_EL2),
+ SYS_REG(TCR2_EL2),
+ SYS_REG(VTTBR_EL2),
+ SYS_REG(VTCR_EL2),
+ SYS_REG(VNCR_EL2),
+ SYS_REG(HDFGRTR2_EL2),
+ SYS_REG(HDFGWTR2_EL2),
+ SYS_REG(HFGRTR2_EL2),
+ SYS_REG(HFGWTR2_EL2),
+ SYS_REG(HDFGRTR_EL2),
+ SYS_REG(HDFGWTR_EL2),
+ SYS_REG(HAFGRTR_EL2),
+ SYS_REG(HFGITR2_EL2),
+ SYS_REG(SPSR_EL2),
+ SYS_REG(ELR_EL2),
+ SYS_REG(AFSR0_EL2),
+ SYS_REG(AFSR1_EL2),
+ SYS_REG(ESR_EL2),
+ SYS_REG(FAR_EL2),
+ SYS_REG(HPFAR_EL2),
+ SYS_REG(MAIR_EL2),
+ SYS_REG(PIRE0_EL2),
+ SYS_REG(PIR_EL2),
+ SYS_REG(POR_EL2),
+ SYS_REG(AMAIR_EL2),
+ SYS_REG(VBAR_EL2),
+ SYS_REG(CONTEXTIDR_EL2),
+ SYS_REG(TPIDR_EL2),
+ SYS_REG(CNTVOFF_EL2),
+ SYS_REG(CNTHCTL_EL2),
+ SYS_REG(CNTHP_CTL_EL2),
+ SYS_REG(CNTHP_CVAL_EL2),
+ SYS_REG(CNTHV_CTL_EL2),
+ SYS_REG(CNTHV_CVAL_EL2),
+ SYS_REG(SP_EL2),
+ SYS_REG(VDISR_EL2),
+ SYS_REG(VSESR_EL2),
+};
+
+static __u64 el2_e2h0_regs[] = {
+ /* Empty */
+};
+
#define BASE_SUBLIST \
{ "base", .regs = base_regs, .regs_n = ARRAY_SIZE(base_regs), }
#define VREGS_SUBLIST \
@@ -712,6 +802,23 @@ static __u64 pauth_generic_regs[] = {
.regs = pauth_generic_regs, \
.regs_n = ARRAY_SIZE(pauth_generic_regs), \
}
+#define EL2_SUBLIST \
+ { \
+ .name = "EL2", \
+ .capability = KVM_CAP_ARM_EL2, \
+ .feature = KVM_ARM_VCPU_HAS_EL2, \
+ .regs = el2_regs, \
+ .regs_n = ARRAY_SIZE(el2_regs), \
+ }
+#define EL2_E2H0_SUBLIST \
+ EL2_SUBLIST, \
+ { \
+ .name = "EL2 E2H0", \
+ .capability = KVM_CAP_ARM_EL2_E2H0, \
+ .feature = KVM_ARM_VCPU_HAS_EL2_E2H0, \
+ .regs = el2_e2h0_regs, \
+ .regs_n = ARRAY_SIZE(el2_e2h0_regs), \
+ }
static struct vcpu_reg_list vregs_config = {
.sublists = {
@@ -761,6 +868,124 @@ static struct vcpu_reg_list pauth_pmu_config = {
},
};
+static struct vcpu_reg_list el2_vregs_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ EL2_SUBLIST,
+ VREGS_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_reg_list el2_vregs_pmu_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ EL2_SUBLIST,
+ VREGS_SUBLIST,
+ PMU_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_reg_list el2_sve_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ EL2_SUBLIST,
+ SVE_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_reg_list el2_sve_pmu_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ EL2_SUBLIST,
+ SVE_SUBLIST,
+ PMU_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_reg_list el2_pauth_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ EL2_SUBLIST,
+ VREGS_SUBLIST,
+ PAUTH_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_reg_list el2_pauth_pmu_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ EL2_SUBLIST,
+ VREGS_SUBLIST,
+ PAUTH_SUBLIST,
+ PMU_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_reg_list el2_e2h0_vregs_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ EL2_E2H0_SUBLIST,
+ VREGS_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_reg_list el2_e2h0_vregs_pmu_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ EL2_E2H0_SUBLIST,
+ VREGS_SUBLIST,
+ PMU_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_reg_list el2_e2h0_sve_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ EL2_E2H0_SUBLIST,
+ SVE_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_reg_list el2_e2h0_sve_pmu_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ EL2_E2H0_SUBLIST,
+ SVE_SUBLIST,
+ PMU_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_reg_list el2_e2h0_pauth_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ EL2_E2H0_SUBLIST,
+ VREGS_SUBLIST,
+ PAUTH_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_reg_list el2_e2h0_pauth_pmu_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ EL2_E2H0_SUBLIST,
+ VREGS_SUBLIST,
+ PAUTH_SUBLIST,
+ PMU_SUBLIST,
+ {0},
+ },
+};
+
struct vcpu_reg_list *vcpu_configs[] = {
&vregs_config,
&vregs_pmu_config,
@@ -768,5 +993,19 @@ struct vcpu_reg_list *vcpu_configs[] = {
&sve_pmu_config,
&pauth_config,
&pauth_pmu_config,
+
+ &el2_vregs_config,
+ &el2_vregs_pmu_config,
+ &el2_sve_config,
+ &el2_sve_pmu_config,
+ &el2_pauth_config,
+ &el2_pauth_pmu_config,
+
+ &el2_e2h0_vregs_config,
+ &el2_e2h0_vregs_pmu_config,
+ &el2_e2h0_sve_config,
+ &el2_e2h0_sve_pmu_config,
+ &el2_e2h0_pauth_config,
+ &el2_e2h0_pauth_pmu_config,
};
int vcpu_configs_n = ARRAY_SIZE(vcpu_configs);
diff --git a/tools/testing/selftests/kvm/arm64/hello_el2.c b/tools/testing/selftests/kvm/arm64/hello_el2.c
new file mode 100644
index 000000000000..bbe6862c6ab1
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/hello_el2.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * hello_el2 - Basic KVM selftest for VM running at EL2 with E2H=RES1
+ *
+ * Copyright 2025 Google LLC
+ */
+#include "kvm_util.h"
+#include "processor.h"
+#include "test_util.h"
+#include "ucall.h"
+
+#include <asm/sysreg.h>
+
+static void guest_code(void)
+{
+ u64 mmfr0 = read_sysreg_s(SYS_ID_AA64MMFR0_EL1);
+ u64 mmfr1 = read_sysreg_s(SYS_ID_AA64MMFR1_EL1);
+ u64 mmfr4 = read_sysreg_s(SYS_ID_AA64MMFR4_EL1);
+ u8 e2h0 = SYS_FIELD_GET(ID_AA64MMFR4_EL1, E2H0, mmfr4);
+
+ GUEST_ASSERT_EQ(get_current_el(), 2);
+ GUEST_ASSERT(read_sysreg(hcr_el2) & HCR_EL2_E2H);
+ GUEST_ASSERT_EQ(SYS_FIELD_GET(ID_AA64MMFR1_EL1, VH, mmfr1),
+ ID_AA64MMFR1_EL1_VH_IMP);
+
+ /*
+ * Traps of the complete ID register space are IMPDEF without FEAT_FGT,
+ * which is really annoying to deal with in KVM describing E2H as RES1.
+ *
+ * If the implementation doesn't honor the trap then expect the register
+ * to return all zeros.
+ */
+ if (e2h0 == ID_AA64MMFR4_EL1_E2H0_IMP)
+ GUEST_ASSERT_EQ(SYS_FIELD_GET(ID_AA64MMFR0_EL1, FGT, mmfr0),
+ ID_AA64MMFR0_EL1_FGT_NI);
+ else
+ GUEST_ASSERT_EQ(e2h0, ID_AA64MMFR4_EL1_E2H0_NI_NV1);
+
+ GUEST_DONE();
+}
+
+int main(void)
+{
+ struct kvm_vcpu_init init;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct ucall uc;
+
+ TEST_REQUIRE(kvm_check_cap(KVM_CAP_ARM_EL2));
+
+ vm = vm_create(1);
+
+ kvm_get_default_vcpu_target(vm, &init);
+ init.features[0] |= BIT(KVM_ARM_VCPU_HAS_EL2);
+ vcpu = aarch64_vcpu_add(vm, 0, &init, guest_code);
+ kvm_arch_vm_finalize_vcpus(vm);
+
+ vcpu_run(vcpu);
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_DONE:
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ default:
+ TEST_FAIL("Unhandled ucall: %ld\n", uc.cmd);
+ }
+
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/arm64/host_sve.c b/tools/testing/selftests/kvm/arm64/host_sve.c
new file mode 100644
index 000000000000..3826772fd470
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/host_sve.c
@@ -0,0 +1,127 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Host SVE: Check FPSIMD/SVE/SME save/restore over KVM_RUN ioctls.
+ *
+ * Copyright 2025 Arm, Ltd
+ */
+
+#include <errno.h>
+#include <signal.h>
+#include <sys/auxv.h>
+#include <asm/kvm.h>
+#include <kvm_util.h>
+
+#include "ucall_common.h"
+
+static void guest_code(void)
+{
+ for (int i = 0; i < 10; i++) {
+ GUEST_UCALL_NONE();
+ }
+
+ GUEST_DONE();
+}
+
+void handle_sigill(int sig, siginfo_t *info, void *ctx)
+{
+ ucontext_t *uctx = ctx;
+
+ printf(" < host signal %d >\n", sig);
+
+ /*
+ * Skip the UDF
+ */
+ uctx->uc_mcontext.pc += 4;
+}
+
+void register_sigill_handler(void)
+{
+ struct sigaction sa = {
+ .sa_sigaction = handle_sigill,
+ .sa_flags = SA_SIGINFO,
+ };
+ sigaction(SIGILL, &sa, NULL);
+}
+
+static void do_sve_roundtrip(void)
+{
+ unsigned long before, after;
+
+ /*
+ * Set all bits in a predicate register, force a save/restore via a
+ * SIGILL (which handle_sigill() will recover from), then report
+ * whether the value has changed.
+ */
+ asm volatile(
+ " .arch_extension sve\n"
+ " ptrue p0.B\n"
+ " cntp %[before], p0, p0.B\n"
+ " udf #0\n"
+ " cntp %[after], p0, p0.B\n"
+ : [before] "=r" (before),
+ [after] "=r" (after)
+ :
+ : "p0"
+ );
+
+ if (before != after) {
+ TEST_FAIL("Signal roundtrip discarded predicate bits (%ld => %ld)\n",
+ before, after);
+ } else {
+ printf("Signal roundtrip preserved predicate bits (%ld => %ld)\n",
+ before, after);
+ }
+}
+
+static void test_run(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct ucall uc;
+ bool guest_done = false;
+
+ register_sigill_handler();
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ do_sve_roundtrip();
+
+ while (!guest_done) {
+
+ printf("Running VCPU...\n");
+ vcpu_run(vcpu);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_NONE:
+ do_sve_roundtrip();
+ do_sve_roundtrip();
+ break;
+ case UCALL_DONE:
+ guest_done = true;
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ default:
+ TEST_FAIL("Unexpected guest exit");
+ }
+ }
+
+ kvm_vm_free(vm);
+}
+
+int main(void)
+{
+ /*
+ * This is testing the host environment, we don't care about
+ * guest SVE support.
+ */
+ if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) {
+ printf("SVE not supported\n");
+ return KSFT_SKIP;
+ }
+
+ test_run();
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/arm64/hypercalls.c b/tools/testing/selftests/kvm/arm64/hypercalls.c
index 44cfcf8a7f46..bf038a0371f4 100644
--- a/tools/testing/selftests/kvm/arm64/hypercalls.c
+++ b/tools/testing/selftests/kvm/arm64/hypercalls.c
@@ -108,7 +108,7 @@ static void guest_test_hvc(const struct test_hvc_info *hc_info)
for (i = 0; i < hvc_info_arr_sz; i++, hc_info++) {
memset(&res, 0, sizeof(res));
- smccc_hvc(hc_info->func_id, hc_info->arg1, 0, 0, 0, 0, 0, 0, &res);
+ do_smccc(hc_info->func_id, hc_info->arg1, 0, 0, 0, 0, 0, 0, &res);
switch (stage) {
case TEST_STAGE_HVC_IFACE_FEAT_DISABLED:
diff --git a/tools/testing/selftests/kvm/arm64/idreg-idst.c b/tools/testing/selftests/kvm/arm64/idreg-idst.c
new file mode 100644
index 000000000000..9ca9f125abdb
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/idreg-idst.c
@@ -0,0 +1,117 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Access all FEAT_IDST-handled registers that depend on more than
+ * just FEAT_AA64, and fail if we don't get an a trap with an 0x18 EC.
+ */
+
+#include <test_util.h>
+#include <kvm_util.h>
+#include <processor.h>
+
+static volatile bool sys64, undef;
+
+#define __check_sr_read(r) \
+ ({ \
+ uint64_t val; \
+ \
+ sys64 = false; \
+ undef = false; \
+ dsb(sy); \
+ val = read_sysreg_s(SYS_ ## r); \
+ val; \
+ })
+
+/* Fatal checks */
+#define check_sr_read(r) \
+ do { \
+ __check_sr_read(r); \
+ __GUEST_ASSERT(!undef, #r " unexpected UNDEF"); \
+ __GUEST_ASSERT(sys64, #r " didn't trap"); \
+ } while(0)
+
+
+static void guest_code(void)
+{
+ check_sr_read(CCSIDR2_EL1);
+ check_sr_read(SMIDR_EL1);
+ check_sr_read(GMID_EL1);
+
+ GUEST_DONE();
+}
+
+static void guest_sys64_handler(struct ex_regs *regs)
+{
+ sys64 = true;
+ undef = false;
+ regs->pc += 4;
+}
+
+static void guest_undef_handler(struct ex_regs *regs)
+{
+ sys64 = false;
+ undef = true;
+ regs->pc += 4;
+}
+
+static void test_run_vcpu(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ do {
+ vcpu_run(vcpu);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_PRINTF:
+ printf("%s", uc.buffer);
+ break;
+ case UCALL_DONE:
+ break;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+ } while (uc.cmd != UCALL_DONE);
+}
+
+static void test_guest_feat_idst(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ /* This VM has no MTE, no SME, no CCIDX */
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vcpu);
+
+ vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+ ESR_ELx_EC_SYS64, guest_sys64_handler);
+ vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+ ESR_ELx_EC_UNKNOWN, guest_undef_handler);
+
+ test_run_vcpu(vcpu);
+
+ kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ uint64_t mmfr2;
+
+ test_disable_default_vgic();
+
+ vm = vm_create_with_one_vcpu(&vcpu, NULL);
+ mmfr2 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64MMFR2_EL1));
+ __TEST_REQUIRE(FIELD_GET(ID_AA64MMFR2_EL1_IDS, mmfr2) > 0,
+ "FEAT_IDST not supported");
+ kvm_vm_free(vm);
+
+ test_guest_feat_idst();
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/arm64/kvm-uuid.c b/tools/testing/selftests/kvm/arm64/kvm-uuid.c
new file mode 100644
index 000000000000..b5be9133535a
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/kvm-uuid.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0
+
+// Check that nobody has tampered with KVM's UID
+
+#include <errno.h>
+#include <linux/arm-smccc.h>
+#include <asm/kvm.h>
+#include <kvm_util.h>
+
+#include "processor.h"
+
+/*
+ * Do NOT redefine these constants, or try to replace them with some
+ * "common" version. They are hardcoded here to detect any potential
+ * breakage happening in the rest of the kernel.
+ *
+ * KVM UID value: 28b46fb6-2ec5-11e9-a9ca-4b564d003a74
+ */
+#define ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_0 0xb66fb428U
+#define ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_1 0xe911c52eU
+#define ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_2 0x564bcaa9U
+#define ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_3 0x743a004dU
+
+static void guest_code(void)
+{
+ struct arm_smccc_res res = {};
+
+ do_smccc(ARM_SMCCC_VENDOR_HYP_CALL_UID_FUNC_ID, 0, 0, 0, 0, 0, 0, 0, &res);
+
+ __GUEST_ASSERT(res.a0 == ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_0 &&
+ res.a1 == ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_1 &&
+ res.a2 == ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_2 &&
+ res.a3 == ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_3,
+ "Unexpected KVM-specific UID %lx %lx %lx %lx\n", res.a0, res.a1, res.a2, res.a3);
+ GUEST_DONE();
+}
+
+int main (int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct ucall uc;
+ bool guest_done = false;
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ while (!guest_done) {
+ vcpu_run(vcpu);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ break;
+ case UCALL_DONE:
+ guest_done = true;
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_PRINTF:
+ printf("%s", uc.buffer);
+ break;
+ default:
+ TEST_FAIL("Unexpected guest exit");
+ }
+ }
+
+ kvm_vm_free(vm);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/arm64/mmio_abort.c b/tools/testing/selftests/kvm/arm64/mmio_abort.c
deleted file mode 100644
index 8b7a80a51b1c..000000000000
--- a/tools/testing/selftests/kvm/arm64/mmio_abort.c
+++ /dev/null
@@ -1,159 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * mmio_abort - Tests for userspace MMIO abort injection
- *
- * Copyright (c) 2024 Google LLC
- */
-#include "processor.h"
-#include "test_util.h"
-
-#define MMIO_ADDR 0x8000000ULL
-
-static u64 expected_abort_pc;
-
-static void expect_sea_handler(struct ex_regs *regs)
-{
- u64 esr = read_sysreg(esr_el1);
-
- GUEST_ASSERT_EQ(regs->pc, expected_abort_pc);
- GUEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_DABT_CUR);
- GUEST_ASSERT_EQ(esr & ESR_ELx_FSC_TYPE, ESR_ELx_FSC_EXTABT);
-
- GUEST_DONE();
-}
-
-static void unexpected_dabt_handler(struct ex_regs *regs)
-{
- GUEST_FAIL("Unexpected data abort at PC: %lx\n", regs->pc);
-}
-
-static struct kvm_vm *vm_create_with_dabt_handler(struct kvm_vcpu **vcpu, void *guest_code,
- handler_fn dabt_handler)
-{
- struct kvm_vm *vm = vm_create_with_one_vcpu(vcpu, guest_code);
-
- vm_init_descriptor_tables(vm);
- vcpu_init_descriptor_tables(*vcpu);
- vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, ESR_ELx_EC_DABT_CUR, dabt_handler);
-
- virt_map(vm, MMIO_ADDR, MMIO_ADDR, 1);
-
- return vm;
-}
-
-static void vcpu_inject_extabt(struct kvm_vcpu *vcpu)
-{
- struct kvm_vcpu_events events = {};
-
- events.exception.ext_dabt_pending = true;
- vcpu_events_set(vcpu, &events);
-}
-
-static void vcpu_run_expect_done(struct kvm_vcpu *vcpu)
-{
- struct ucall uc;
-
- vcpu_run(vcpu);
- switch (get_ucall(vcpu, &uc)) {
- case UCALL_ABORT:
- REPORT_GUEST_ASSERT(uc);
- break;
- case UCALL_DONE:
- break;
- default:
- TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
- }
-}
-
-extern char test_mmio_abort_insn;
-
-static void test_mmio_abort_guest(void)
-{
- WRITE_ONCE(expected_abort_pc, (u64)&test_mmio_abort_insn);
-
- asm volatile("test_mmio_abort_insn:\n\t"
- "ldr x0, [%0]\n\t"
- : : "r" (MMIO_ADDR) : "x0", "memory");
-
- GUEST_FAIL("MMIO instruction should not retire");
-}
-
-/*
- * Test that KVM doesn't complete MMIO emulation when userspace has made an
- * external abort pending for the instruction.
- */
-static void test_mmio_abort(void)
-{
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_abort_guest,
- expect_sea_handler);
- struct kvm_run *run = vcpu->run;
-
- vcpu_run(vcpu);
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_MMIO);
- TEST_ASSERT_EQ(run->mmio.phys_addr, MMIO_ADDR);
- TEST_ASSERT_EQ(run->mmio.len, sizeof(unsigned long));
- TEST_ASSERT(!run->mmio.is_write, "Expected MMIO read");
-
- vcpu_inject_extabt(vcpu);
- vcpu_run_expect_done(vcpu);
- kvm_vm_free(vm);
-}
-
-extern char test_mmio_nisv_insn;
-
-static void test_mmio_nisv_guest(void)
-{
- WRITE_ONCE(expected_abort_pc, (u64)&test_mmio_nisv_insn);
-
- asm volatile("test_mmio_nisv_insn:\n\t"
- "ldr x0, [%0], #8\n\t"
- : : "r" (MMIO_ADDR) : "x0", "memory");
-
- GUEST_FAIL("MMIO instruction should not retire");
-}
-
-/*
- * Test that the KVM_RUN ioctl fails for ESR_EL2.ISV=0 MMIO aborts if userspace
- * hasn't enabled KVM_CAP_ARM_NISV_TO_USER.
- */
-static void test_mmio_nisv(void)
-{
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_nisv_guest,
- unexpected_dabt_handler);
-
- TEST_ASSERT(_vcpu_run(vcpu), "Expected nonzero return code from KVM_RUN");
- TEST_ASSERT_EQ(errno, ENOSYS);
-
- kvm_vm_free(vm);
-}
-
-/*
- * Test that ESR_EL2.ISV=0 MMIO aborts reach userspace and that an injected SEA
- * reaches the guest.
- */
-static void test_mmio_nisv_abort(void)
-{
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_nisv_guest,
- expect_sea_handler);
- struct kvm_run *run = vcpu->run;
-
- vm_enable_cap(vm, KVM_CAP_ARM_NISV_TO_USER, 1);
-
- vcpu_run(vcpu);
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_ARM_NISV);
- TEST_ASSERT_EQ(run->arm_nisv.fault_ipa, MMIO_ADDR);
-
- vcpu_inject_extabt(vcpu);
- vcpu_run_expect_done(vcpu);
- kvm_vm_free(vm);
-}
-
-int main(void)
-{
- test_mmio_abort();
- test_mmio_nisv();
- test_mmio_nisv_abort();
-}
diff --git a/tools/testing/selftests/kvm/arm64/no-vgic-v3.c b/tools/testing/selftests/kvm/arm64/no-vgic-v3.c
deleted file mode 100644
index ebd70430c89d..000000000000
--- a/tools/testing/selftests/kvm/arm64/no-vgic-v3.c
+++ /dev/null
@@ -1,175 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-// Check that, on a GICv3 system, not configuring GICv3 correctly
-// results in all of the sysregs generating an UNDEF exception.
-
-#include <test_util.h>
-#include <kvm_util.h>
-#include <processor.h>
-
-static volatile bool handled;
-
-#define __check_sr_read(r) \
- ({ \
- uint64_t val; \
- \
- handled = false; \
- dsb(sy); \
- val = read_sysreg_s(SYS_ ## r); \
- val; \
- })
-
-#define __check_sr_write(r) \
- do { \
- handled = false; \
- dsb(sy); \
- write_sysreg_s(0, SYS_ ## r); \
- isb(); \
- } while(0)
-
-/* Fatal checks */
-#define check_sr_read(r) \
- do { \
- __check_sr_read(r); \
- __GUEST_ASSERT(handled, #r " no read trap"); \
- } while(0)
-
-#define check_sr_write(r) \
- do { \
- __check_sr_write(r); \
- __GUEST_ASSERT(handled, #r " no write trap"); \
- } while(0)
-
-#define check_sr_rw(r) \
- do { \
- check_sr_read(r); \
- check_sr_write(r); \
- } while(0)
-
-static void guest_code(void)
-{
- uint64_t val;
-
- /*
- * Check that we advertise that ID_AA64PFR0_EL1.GIC == 0, having
- * hidden the feature at runtime without any other userspace action.
- */
- __GUEST_ASSERT(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_GIC),
- read_sysreg(id_aa64pfr0_el1)) == 0,
- "GICv3 wrongly advertised");
-
- /*
- * Access all GICv3 registers, and fail if we don't get an UNDEF.
- * Note that we happily access all the APxRn registers without
- * checking their existance, as all we want to see is a failure.
- */
- check_sr_rw(ICC_PMR_EL1);
- check_sr_read(ICC_IAR0_EL1);
- check_sr_write(ICC_EOIR0_EL1);
- check_sr_rw(ICC_HPPIR0_EL1);
- check_sr_rw(ICC_BPR0_EL1);
- check_sr_rw(ICC_AP0R0_EL1);
- check_sr_rw(ICC_AP0R1_EL1);
- check_sr_rw(ICC_AP0R2_EL1);
- check_sr_rw(ICC_AP0R3_EL1);
- check_sr_rw(ICC_AP1R0_EL1);
- check_sr_rw(ICC_AP1R1_EL1);
- check_sr_rw(ICC_AP1R2_EL1);
- check_sr_rw(ICC_AP1R3_EL1);
- check_sr_write(ICC_DIR_EL1);
- check_sr_read(ICC_RPR_EL1);
- check_sr_write(ICC_SGI1R_EL1);
- check_sr_write(ICC_ASGI1R_EL1);
- check_sr_write(ICC_SGI0R_EL1);
- check_sr_read(ICC_IAR1_EL1);
- check_sr_write(ICC_EOIR1_EL1);
- check_sr_rw(ICC_HPPIR1_EL1);
- check_sr_rw(ICC_BPR1_EL1);
- check_sr_rw(ICC_CTLR_EL1);
- check_sr_rw(ICC_IGRPEN0_EL1);
- check_sr_rw(ICC_IGRPEN1_EL1);
-
- /*
- * ICC_SRE_EL1 may not be trappable, as ICC_SRE_EL2.Enable can
- * be RAO/WI. Engage in non-fatal accesses, starting with a
- * write of 0 to try and disable SRE, and let's see if it
- * sticks.
- */
- __check_sr_write(ICC_SRE_EL1);
- if (!handled)
- GUEST_PRINTF("ICC_SRE_EL1 write not trapping (OK)\n");
-
- val = __check_sr_read(ICC_SRE_EL1);
- if (!handled) {
- __GUEST_ASSERT((val & BIT(0)),
- "ICC_SRE_EL1 not trapped but ICC_SRE_EL1.SRE not set\n");
- GUEST_PRINTF("ICC_SRE_EL1 read not trapping (OK)\n");
- }
-
- GUEST_DONE();
-}
-
-static void guest_undef_handler(struct ex_regs *regs)
-{
- /* Success, we've gracefully exploded! */
- handled = true;
- regs->pc += 4;
-}
-
-static void test_run_vcpu(struct kvm_vcpu *vcpu)
-{
- struct ucall uc;
-
- do {
- vcpu_run(vcpu);
-
- switch (get_ucall(vcpu, &uc)) {
- case UCALL_ABORT:
- REPORT_GUEST_ASSERT(uc);
- break;
- case UCALL_PRINTF:
- printf("%s", uc.buffer);
- break;
- case UCALL_DONE:
- break;
- default:
- TEST_FAIL("Unknown ucall %lu", uc.cmd);
- }
- } while (uc.cmd != UCALL_DONE);
-}
-
-static void test_guest_no_gicv3(void)
-{
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm;
-
- /* Create a VM without a GICv3 */
- vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-
- vm_init_descriptor_tables(vm);
- vcpu_init_descriptor_tables(vcpu);
-
- vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
- ESR_ELx_EC_UNKNOWN, guest_undef_handler);
-
- test_run_vcpu(vcpu);
-
- kvm_vm_free(vm);
-}
-
-int main(int argc, char *argv[])
-{
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm;
- uint64_t pfr0;
-
- vm = vm_create_with_one_vcpu(&vcpu, NULL);
- pfr0 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1));
- __TEST_REQUIRE(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_GIC), pfr0),
- "GICv3 not supported.");
- kvm_vm_free(vm);
-
- test_guest_no_gicv3();
-
- return 0;
-}
diff --git a/tools/testing/selftests/kvm/arm64/no-vgic.c b/tools/testing/selftests/kvm/arm64/no-vgic.c
new file mode 100644
index 000000000000..b14686ef17d1
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/no-vgic.c
@@ -0,0 +1,297 @@
+// SPDX-License-Identifier: GPL-2.0
+
+// Check that, on a GICv3-capable system (GICv3 native, or GICv5 with
+// FEAT_GCIE_LEGACY), not configuring GICv3 correctly results in all
+// of the sysregs generating an UNDEF exception. Do the same for GICv5
+// on a GICv5 host.
+
+#include <test_util.h>
+#include <kvm_util.h>
+#include <processor.h>
+
+#include <arm64/gic_v5.h>
+
+static volatile bool handled;
+
+#define __check_sr_read(r) \
+ ({ \
+ uint64_t val; \
+ \
+ handled = false; \
+ dsb(sy); \
+ val = read_sysreg_s(SYS_ ## r); \
+ val; \
+ })
+
+#define __check_sr_write(r) \
+ do { \
+ handled = false; \
+ dsb(sy); \
+ write_sysreg_s(0, SYS_ ## r); \
+ isb(); \
+ } while (0)
+
+#define __check_gicv5_gicr_op(r) \
+ ({ \
+ uint64_t val; \
+ \
+ handled = false; \
+ dsb(sy); \
+ val = read_sysreg_s(GICV5_OP_GICR_ ## r); \
+ val; \
+ })
+
+#define __check_gicv5_gic_op(r) \
+ do { \
+ handled = false; \
+ dsb(sy); \
+ write_sysreg_s(0, GICV5_OP_GIC_ ## r); \
+ isb(); \
+ } while (0)
+
+/* Fatal checks */
+#define check_sr_read(r) \
+ do { \
+ __check_sr_read(r); \
+ __GUEST_ASSERT(handled, #r " no read trap"); \
+ } while (0)
+
+#define check_sr_write(r) \
+ do { \
+ __check_sr_write(r); \
+ __GUEST_ASSERT(handled, #r " no write trap"); \
+ } while (0)
+
+#define check_sr_rw(r) \
+ do { \
+ check_sr_read(r); \
+ check_sr_write(r); \
+ } while (0)
+
+#define check_gicv5_gicr_op(r) \
+ do { \
+ __check_gicv5_gicr_op(r); \
+ __GUEST_ASSERT(handled, #r " no read trap"); \
+ } while (0)
+
+#define check_gicv5_gic_op(r) \
+ do { \
+ __check_gicv5_gic_op(r); \
+ __GUEST_ASSERT(handled, #r " no write trap"); \
+ } while (0)
+
+static void guest_code_gicv3(void)
+{
+ uint64_t val;
+
+ /*
+ * Check that we advertise that ID_AA64PFR0_EL1.GIC == 0, having
+ * hidden the feature at runtime without any other userspace action.
+ */
+ __GUEST_ASSERT(FIELD_GET(ID_AA64PFR0_EL1_GIC,
+ read_sysreg(id_aa64pfr0_el1)) == 0,
+ "GICv3 wrongly advertised");
+
+ /*
+ * Access all GICv3 registers, and fail if we don't get an UNDEF.
+ * Note that we happily access all the APxRn registers without
+ * checking their existence, as all we want to see is a failure.
+ */
+ check_sr_rw(ICC_PMR_EL1);
+ check_sr_read(ICC_IAR0_EL1);
+ check_sr_write(ICC_EOIR0_EL1);
+ check_sr_rw(ICC_HPPIR0_EL1);
+ check_sr_rw(ICC_BPR0_EL1);
+ check_sr_rw(ICC_AP0R0_EL1);
+ check_sr_rw(ICC_AP0R1_EL1);
+ check_sr_rw(ICC_AP0R2_EL1);
+ check_sr_rw(ICC_AP0R3_EL1);
+ check_sr_rw(ICC_AP1R0_EL1);
+ check_sr_rw(ICC_AP1R1_EL1);
+ check_sr_rw(ICC_AP1R2_EL1);
+ check_sr_rw(ICC_AP1R3_EL1);
+ check_sr_write(ICC_DIR_EL1);
+ check_sr_read(ICC_RPR_EL1);
+ check_sr_write(ICC_SGI1R_EL1);
+ check_sr_write(ICC_ASGI1R_EL1);
+ check_sr_write(ICC_SGI0R_EL1);
+ check_sr_read(ICC_IAR1_EL1);
+ check_sr_write(ICC_EOIR1_EL1);
+ check_sr_rw(ICC_HPPIR1_EL1);
+ check_sr_rw(ICC_BPR1_EL1);
+ check_sr_rw(ICC_CTLR_EL1);
+ check_sr_rw(ICC_IGRPEN0_EL1);
+ check_sr_rw(ICC_IGRPEN1_EL1);
+
+ /*
+ * ICC_SRE_EL1 may not be trappable, as ICC_SRE_EL2.Enable can
+ * be RAO/WI. Engage in non-fatal accesses, starting with a
+ * write of 0 to try and disable SRE, and let's see if it
+ * sticks.
+ */
+ __check_sr_write(ICC_SRE_EL1);
+ if (!handled)
+ GUEST_PRINTF("ICC_SRE_EL1 write not trapping (OK)\n");
+
+ val = __check_sr_read(ICC_SRE_EL1);
+ if (!handled) {
+ __GUEST_ASSERT((val & BIT(0)),
+ "ICC_SRE_EL1 not trapped but ICC_SRE_EL1.SRE not set\n");
+ GUEST_PRINTF("ICC_SRE_EL1 read not trapping (OK)\n");
+ }
+
+ GUEST_DONE();
+}
+
+static void guest_code_gicv5(void)
+{
+ /*
+ * Check that we advertise that ID_AA64PFR2_EL1.GCIE == 0, having
+ * hidden the feature at runtime without any other userspace action.
+ */
+ __GUEST_ASSERT(FIELD_GET(ID_AA64PFR2_EL1_GCIE,
+ read_sysreg_s(SYS_ID_AA64PFR2_EL1)) == 0,
+ "GICv5 wrongly advertised");
+
+ /*
+ * Try all GICv5 instructions, and fail if we don't get an UNDEF.
+ */
+ check_gicv5_gic_op(CDAFF);
+ check_gicv5_gic_op(CDDI);
+ check_gicv5_gic_op(CDDIS);
+ check_gicv5_gic_op(CDEOI);
+ check_gicv5_gic_op(CDHM);
+ check_gicv5_gic_op(CDPEND);
+ check_gicv5_gic_op(CDPRI);
+ check_gicv5_gic_op(CDRCFG);
+ check_gicv5_gicr_op(CDIA);
+ check_gicv5_gicr_op(CDNMIA);
+
+ /* Check General System Register acccesses */
+ check_sr_rw(ICC_APR_EL1);
+ check_sr_rw(ICC_CR0_EL1);
+ check_sr_read(ICC_HPPIR_EL1);
+ check_sr_read(ICC_IAFFIDR_EL1);
+ check_sr_rw(ICC_ICSR_EL1);
+ check_sr_read(ICC_IDR0_EL1);
+ check_sr_rw(ICC_PCR_EL1);
+
+ /* Check PPI System Register accessess */
+ check_sr_rw(ICC_PPI_CACTIVER0_EL1);
+ check_sr_rw(ICC_PPI_CACTIVER1_EL1);
+ check_sr_rw(ICC_PPI_SACTIVER0_EL1);
+ check_sr_rw(ICC_PPI_SACTIVER1_EL1);
+ check_sr_rw(ICC_PPI_CPENDR0_EL1);
+ check_sr_rw(ICC_PPI_CPENDR1_EL1);
+ check_sr_rw(ICC_PPI_SPENDR0_EL1);
+ check_sr_rw(ICC_PPI_SPENDR1_EL1);
+ check_sr_rw(ICC_PPI_ENABLER0_EL1);
+ check_sr_rw(ICC_PPI_ENABLER1_EL1);
+ check_sr_read(ICC_PPI_HMR0_EL1);
+ check_sr_read(ICC_PPI_HMR1_EL1);
+ check_sr_rw(ICC_PPI_PRIORITYR0_EL1);
+ check_sr_rw(ICC_PPI_PRIORITYR1_EL1);
+ check_sr_rw(ICC_PPI_PRIORITYR2_EL1);
+ check_sr_rw(ICC_PPI_PRIORITYR3_EL1);
+ check_sr_rw(ICC_PPI_PRIORITYR4_EL1);
+ check_sr_rw(ICC_PPI_PRIORITYR5_EL1);
+ check_sr_rw(ICC_PPI_PRIORITYR6_EL1);
+ check_sr_rw(ICC_PPI_PRIORITYR7_EL1);
+ check_sr_rw(ICC_PPI_PRIORITYR8_EL1);
+ check_sr_rw(ICC_PPI_PRIORITYR9_EL1);
+ check_sr_rw(ICC_PPI_PRIORITYR10_EL1);
+ check_sr_rw(ICC_PPI_PRIORITYR11_EL1);
+ check_sr_rw(ICC_PPI_PRIORITYR12_EL1);
+ check_sr_rw(ICC_PPI_PRIORITYR13_EL1);
+ check_sr_rw(ICC_PPI_PRIORITYR14_EL1);
+ check_sr_rw(ICC_PPI_PRIORITYR15_EL1);
+
+ GUEST_DONE();
+}
+
+static void guest_undef_handler(struct ex_regs *regs)
+{
+ /* Success, we've gracefully exploded! */
+ handled = true;
+ regs->pc += 4;
+}
+
+static void test_run_vcpu(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ do {
+ vcpu_run(vcpu);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_PRINTF:
+ printf("%s", uc.buffer);
+ break;
+ case UCALL_DONE:
+ break;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+ } while (uc.cmd != UCALL_DONE);
+}
+
+static void test_guest_no_vgic(void *guest_code)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ /* Create a VM without a GIC */
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vcpu);
+
+ vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+ ESR_ELx_EC_UNKNOWN, guest_undef_handler);
+
+ test_run_vcpu(vcpu);
+
+ kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ bool has_v3, has_v5;
+ uint64_t pfr;
+
+ test_disable_default_vgic();
+
+ vm = vm_create_with_one_vcpu(&vcpu, NULL);
+
+ pfr = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1));
+ has_v3 = !!FIELD_GET(ID_AA64PFR0_EL1_GIC, pfr);
+
+ pfr = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR2_EL1));
+ has_v5 = !!FIELD_GET(ID_AA64PFR2_EL1_GCIE, pfr);
+
+ kvm_vm_free(vm);
+
+ __TEST_REQUIRE(has_v3 || has_v5,
+ "Neither GICv3 nor GICv5 supported.");
+
+ if (has_v3) {
+ pr_info("Testing no-vgic-v3\n");
+ test_guest_no_vgic(guest_code_gicv3);
+ } else {
+ pr_info("No GICv3 support: skipping no-vgic-v3 test\n");
+ }
+
+ if (has_v5) {
+ pr_info("Testing no-vgic-v5\n");
+ test_guest_no_vgic(guest_code_gicv5);
+ } else {
+ pr_info("No GICv5 support: skipping no-vgic-v5 test\n");
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/arm64/page_fault_test.c b/tools/testing/selftests/kvm/arm64/page_fault_test.c
index ec33a8f9c908..4ccbd389d133 100644
--- a/tools/testing/selftests/kvm/arm64/page_fault_test.c
+++ b/tools/testing/selftests/kvm/arm64/page_fault_test.c
@@ -95,14 +95,14 @@ static bool guest_check_lse(void)
uint64_t isar0 = read_sysreg(id_aa64isar0_el1);
uint64_t atomic;
- atomic = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_ATOMIC), isar0);
+ atomic = FIELD_GET(ID_AA64ISAR0_EL1_ATOMIC, isar0);
return atomic >= 2;
}
static bool guest_check_dc_zva(void)
{
uint64_t dczid = read_sysreg(dczid_el0);
- uint64_t dzp = FIELD_GET(ARM64_FEATURE_MASK(DCZID_EL0_DZP), dczid);
+ uint64_t dzp = FIELD_GET(DCZID_EL0_DZP, dczid);
return dzp == 0;
}
@@ -195,11 +195,11 @@ static bool guest_set_ha(void)
uint64_t hadbs, tcr;
/* Skip if HA is not supported. */
- hadbs = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_HAFDBS), mmfr1);
+ hadbs = FIELD_GET(ID_AA64MMFR1_EL1_HAFDBS, mmfr1);
if (hadbs == 0)
return false;
- tcr = read_sysreg(tcr_el1) | TCR_EL1_HA;
+ tcr = read_sysreg(tcr_el1) | TCR_HA;
write_sysreg(tcr, tcr_el1);
isb();
diff --git a/tools/testing/selftests/kvm/arm64/psci_test.c b/tools/testing/selftests/kvm/arm64/psci_test.c
index ab491ee9e5f7..98e49f710aef 100644
--- a/tools/testing/selftests/kvm/arm64/psci_test.c
+++ b/tools/testing/selftests/kvm/arm64/psci_test.c
@@ -27,7 +27,7 @@ static uint64_t psci_cpu_on(uint64_t target_cpu, uint64_t entry_addr,
{
struct arm_smccc_res res;
- smccc_hvc(PSCI_0_2_FN64_CPU_ON, target_cpu, entry_addr, context_id,
+ do_smccc(PSCI_0_2_FN64_CPU_ON, target_cpu, entry_addr, context_id,
0, 0, 0, 0, &res);
return res.a0;
@@ -38,7 +38,7 @@ static uint64_t psci_affinity_info(uint64_t target_affinity,
{
struct arm_smccc_res res;
- smccc_hvc(PSCI_0_2_FN64_AFFINITY_INFO, target_affinity, lowest_affinity_level,
+ do_smccc(PSCI_0_2_FN64_AFFINITY_INFO, target_affinity, lowest_affinity_level,
0, 0, 0, 0, 0, &res);
return res.a0;
@@ -48,7 +48,7 @@ static uint64_t psci_system_suspend(uint64_t entry_addr, uint64_t context_id)
{
struct arm_smccc_res res;
- smccc_hvc(PSCI_1_0_FN64_SYSTEM_SUSPEND, entry_addr, context_id,
+ do_smccc(PSCI_1_0_FN64_SYSTEM_SUSPEND, entry_addr, context_id,
0, 0, 0, 0, 0, &res);
return res.a0;
@@ -58,7 +58,7 @@ static uint64_t psci_system_off2(uint64_t type, uint64_t cookie)
{
struct arm_smccc_res res;
- smccc_hvc(PSCI_1_3_FN64_SYSTEM_OFF2, type, cookie, 0, 0, 0, 0, 0, &res);
+ do_smccc(PSCI_1_3_FN64_SYSTEM_OFF2, type, cookie, 0, 0, 0, 0, 0, &res);
return res.a0;
}
@@ -67,7 +67,7 @@ static uint64_t psci_features(uint32_t func_id)
{
struct arm_smccc_res res;
- smccc_hvc(PSCI_1_0_FN_PSCI_FEATURES, func_id, 0, 0, 0, 0, 0, 0, &res);
+ do_smccc(PSCI_1_0_FN_PSCI_FEATURES, func_id, 0, 0, 0, 0, 0, 0, &res);
return res.a0;
}
@@ -89,12 +89,13 @@ static struct kvm_vm *setup_vm(void *guest_code, struct kvm_vcpu **source,
vm = vm_create(2);
- vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init);
+ kvm_get_default_vcpu_target(vm, &init);
init.features[0] |= (1 << KVM_ARM_VCPU_PSCI_0_2);
*source = aarch64_vcpu_add(vm, 0, &init, guest_code);
*target = aarch64_vcpu_add(vm, 1, &init, guest_code);
+ kvm_arch_vm_finalize_vcpus(vm);
return vm;
}
diff --git a/tools/testing/selftests/kvm/arm64/sea_to_user.c b/tools/testing/selftests/kvm/arm64/sea_to_user.c
new file mode 100644
index 000000000000..573dd790aeb8
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/sea_to_user.c
@@ -0,0 +1,331 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Test KVM returns to userspace with KVM_EXIT_ARM_SEA if host APEI fails
+ * to handle SEA and userspace has opt-ed in KVM_CAP_ARM_SEA_TO_USER.
+ *
+ * After reaching userspace with expected arm_sea info, also test userspace
+ * injecting a synchronous external data abort into the guest.
+ *
+ * This test utilizes EINJ to generate a REAL synchronous external data
+ * abort by consuming a recoverable uncorrectable memory error. Therefore
+ * the device under test must support EINJ in both firmware and host kernel,
+ * including the notrigger feature. Otherwise the test will be skipped.
+ * The under-test platform's APEI should be unable to claim SEA. Otherwise
+ * the test will also be skipped.
+ */
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "guest_modes.h"
+
+#define PAGE_PRESENT (1ULL << 63)
+#define PAGE_PHYSICAL 0x007fffffffffffffULL
+#define PAGE_ADDR_MASK (~(0xfffULL))
+
+/* Group ISV and ISS[23:14]. */
+#define ESR_ELx_INST_SYNDROME ((ESR_ELx_ISV) | (ESR_ELx_SAS) | \
+ (ESR_ELx_SSE) | (ESR_ELx_SRT_MASK) | \
+ (ESR_ELx_SF) | (ESR_ELx_AR))
+
+#define EINJ_ETYPE "/sys/kernel/debug/apei/einj/error_type"
+#define EINJ_ADDR "/sys/kernel/debug/apei/einj/param1"
+#define EINJ_MASK "/sys/kernel/debug/apei/einj/param2"
+#define EINJ_FLAGS "/sys/kernel/debug/apei/einj/flags"
+#define EINJ_NOTRIGGER "/sys/kernel/debug/apei/einj/notrigger"
+#define EINJ_DOIT "/sys/kernel/debug/apei/einj/error_inject"
+/* Memory Uncorrectable non-fatal. */
+#define ERROR_TYPE_MEMORY_UER 0x10
+/* Memory address and mask valid (param1 and param2). */
+#define MASK_MEMORY_UER 0b10
+
+/* Guest virtual address region = [2G, 3G). */
+#define START_GVA 0x80000000UL
+#define VM_MEM_SIZE 0x40000000UL
+/* Note: EINJ_OFFSET must < VM_MEM_SIZE. */
+#define EINJ_OFFSET 0x01234badUL
+#define EINJ_GVA ((START_GVA) + (EINJ_OFFSET))
+
+static vm_paddr_t einj_gpa;
+static void *einj_hva;
+static uint64_t einj_hpa;
+static bool far_invalid;
+
+static uint64_t translate_to_host_paddr(unsigned long vaddr)
+{
+ uint64_t pinfo;
+ int64_t offset = vaddr / getpagesize() * sizeof(pinfo);
+ int fd;
+ uint64_t page_addr;
+ uint64_t paddr;
+
+ fd = open("/proc/self/pagemap", O_RDONLY);
+ if (fd < 0)
+ ksft_exit_fail_perror("Failed to open /proc/self/pagemap");
+ if (pread(fd, &pinfo, sizeof(pinfo), offset) != sizeof(pinfo)) {
+ close(fd);
+ ksft_exit_fail_perror("Failed to read /proc/self/pagemap");
+ }
+
+ close(fd);
+
+ if ((pinfo & PAGE_PRESENT) == 0)
+ ksft_exit_fail_perror("Page not present");
+
+ page_addr = (pinfo & PAGE_PHYSICAL) << MIN_PAGE_SHIFT;
+ paddr = page_addr + (vaddr & (getpagesize() - 1));
+ return paddr;
+}
+
+static void write_einj_entry(const char *einj_path, uint64_t val)
+{
+ char cmd[256] = {0};
+ FILE *cmdfile = NULL;
+
+ sprintf(cmd, "echo %#lx > %s", val, einj_path);
+ cmdfile = popen(cmd, "r");
+
+ if (pclose(cmdfile) == 0)
+ ksft_print_msg("echo %#lx > %s - done\n", val, einj_path);
+ else
+ ksft_exit_fail_perror("Failed to write EINJ entry");
+}
+
+static void inject_uer(uint64_t paddr)
+{
+ if (access("/sys/firmware/acpi/tables/EINJ", R_OK) == -1)
+ ksft_test_result_skip("EINJ table no available in firmware");
+
+ if (access(EINJ_ETYPE, R_OK | W_OK) == -1)
+ ksft_test_result_skip("EINJ module probably not loaded?");
+
+ write_einj_entry(EINJ_ETYPE, ERROR_TYPE_MEMORY_UER);
+ write_einj_entry(EINJ_FLAGS, MASK_MEMORY_UER);
+ write_einj_entry(EINJ_ADDR, paddr);
+ write_einj_entry(EINJ_MASK, ~0x0UL);
+ write_einj_entry(EINJ_NOTRIGGER, 1);
+ write_einj_entry(EINJ_DOIT, 1);
+}
+
+/*
+ * When host APEI successfully claims the SEA caused by guest_code, kernel
+ * will send SIGBUS signal with BUS_MCEERR_AR to test thread.
+ *
+ * We set up this SIGBUS handler to skip the test for that case.
+ */
+static void sigbus_signal_handler(int sig, siginfo_t *si, void *v)
+{
+ ksft_print_msg("SIGBUS (%d) received, dumping siginfo...\n", sig);
+ ksft_print_msg("si_signo=%d, si_errno=%d, si_code=%d, si_addr=%p\n",
+ si->si_signo, si->si_errno, si->si_code, si->si_addr);
+ if (si->si_code == BUS_MCEERR_AR)
+ ksft_test_result_skip("SEA is claimed by host APEI\n");
+ else
+ ksft_test_result_fail("Exit with signal unhandled\n");
+
+ exit(0);
+}
+
+static void setup_sigbus_handler(void)
+{
+ struct sigaction act;
+
+ memset(&act, 0, sizeof(act));
+ sigemptyset(&act.sa_mask);
+ act.sa_sigaction = sigbus_signal_handler;
+ act.sa_flags = SA_SIGINFO;
+ TEST_ASSERT(sigaction(SIGBUS, &act, NULL) == 0,
+ "Failed to setup SIGBUS handler");
+}
+
+static void guest_code(void)
+{
+ uint64_t guest_data;
+
+ /* Consumes error will cause a SEA. */
+ guest_data = *(uint64_t *)EINJ_GVA;
+
+ GUEST_FAIL("Poison not protected by SEA: gva=%#lx, guest_data=%#lx\n",
+ EINJ_GVA, guest_data);
+}
+
+static void expect_sea_handler(struct ex_regs *regs)
+{
+ u64 esr = read_sysreg(esr_el1);
+ u64 far = read_sysreg(far_el1);
+ bool expect_far_invalid = far_invalid;
+
+ GUEST_PRINTF("Handling Guest SEA\n");
+ GUEST_PRINTF("ESR_EL1=%#lx, FAR_EL1=%#lx\n", esr, far);
+
+ GUEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_DABT_CUR);
+ GUEST_ASSERT_EQ(esr & ESR_ELx_FSC_TYPE, ESR_ELx_FSC_EXTABT);
+
+ if (expect_far_invalid) {
+ GUEST_ASSERT_EQ(esr & ESR_ELx_FnV, ESR_ELx_FnV);
+ GUEST_PRINTF("Guest observed garbage value in FAR\n");
+ } else {
+ GUEST_ASSERT_EQ(esr & ESR_ELx_FnV, 0);
+ GUEST_ASSERT_EQ(far, EINJ_GVA);
+ }
+
+ GUEST_DONE();
+}
+
+static void vcpu_inject_sea(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vcpu_events events = {};
+
+ events.exception.ext_dabt_pending = true;
+ vcpu_events_set(vcpu, &events);
+}
+
+static void run_vm(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+ bool guest_done = false;
+ struct kvm_run *run = vcpu->run;
+ u64 esr;
+
+ /* Resume the vCPU after error injection to consume the error. */
+ vcpu_run(vcpu);
+
+ ksft_print_msg("Dump kvm_run info about KVM_EXIT_%s\n",
+ exit_reason_str(run->exit_reason));
+ ksft_print_msg("kvm_run.arm_sea: esr=%#llx, flags=%#llx\n",
+ run->arm_sea.esr, run->arm_sea.flags);
+ ksft_print_msg("kvm_run.arm_sea: gva=%#llx, gpa=%#llx\n",
+ run->arm_sea.gva, run->arm_sea.gpa);
+
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_ARM_SEA);
+
+ esr = run->arm_sea.esr;
+ TEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_DABT_LOW);
+ TEST_ASSERT_EQ(esr & ESR_ELx_FSC_TYPE, ESR_ELx_FSC_EXTABT);
+ TEST_ASSERT_EQ(ESR_ELx_ISS2(esr), 0);
+ TEST_ASSERT_EQ((esr & ESR_ELx_INST_SYNDROME), 0);
+ TEST_ASSERT_EQ(esr & ESR_ELx_VNCR, 0);
+
+ if (!(esr & ESR_ELx_FnV)) {
+ ksft_print_msg("Expect gva to match given FnV bit is 0\n");
+ TEST_ASSERT_EQ(run->arm_sea.gva, EINJ_GVA);
+ }
+
+ if (run->arm_sea.flags & KVM_EXIT_ARM_SEA_FLAG_GPA_VALID) {
+ ksft_print_msg("Expect gpa to match given KVM_EXIT_ARM_SEA_FLAG_GPA_VALID is set\n");
+ TEST_ASSERT_EQ(run->arm_sea.gpa, einj_gpa & PAGE_ADDR_MASK);
+ }
+
+ far_invalid = esr & ESR_ELx_FnV;
+
+ /* Inject a SEA into guest and expect handled in SEA handler. */
+ vcpu_inject_sea(vcpu);
+
+ /* Expect the guest to reach GUEST_DONE gracefully. */
+ do {
+ vcpu_run(vcpu);
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_PRINTF:
+ ksft_print_msg("From guest: %s", uc.buffer);
+ break;
+ case UCALL_DONE:
+ ksft_print_msg("Guest done gracefully!\n");
+ guest_done = 1;
+ break;
+ case UCALL_ABORT:
+ ksft_print_msg("Guest aborted!\n");
+ guest_done = 1;
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ default:
+ TEST_FAIL("Unexpected ucall: %lu\n", uc.cmd);
+ }
+ } while (!guest_done);
+}
+
+static struct kvm_vm *vm_create_with_sea_handler(struct kvm_vcpu **vcpu)
+{
+ size_t backing_page_size;
+ size_t guest_page_size;
+ size_t alignment;
+ uint64_t num_guest_pages;
+ vm_paddr_t start_gpa;
+ enum vm_mem_backing_src_type src_type = VM_MEM_SRC_ANONYMOUS_HUGETLB_1GB;
+ struct kvm_vm *vm;
+
+ backing_page_size = get_backing_src_pagesz(src_type);
+ guest_page_size = vm_guest_mode_params[VM_MODE_DEFAULT].page_size;
+ alignment = max(backing_page_size, guest_page_size);
+ num_guest_pages = VM_MEM_SIZE / guest_page_size;
+
+ vm = __vm_create_with_one_vcpu(vcpu, num_guest_pages, guest_code);
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(*vcpu);
+
+ vm_install_sync_handler(vm,
+ /*vector=*/VECTOR_SYNC_CURRENT,
+ /*ec=*/ESR_ELx_EC_DABT_CUR,
+ /*handler=*/expect_sea_handler);
+
+ start_gpa = (vm->max_gfn - num_guest_pages) * guest_page_size;
+ start_gpa = align_down(start_gpa, alignment);
+
+ vm_userspace_mem_region_add(
+ /*vm=*/vm,
+ /*src_type=*/src_type,
+ /*guest_paddr=*/start_gpa,
+ /*slot=*/1,
+ /*npages=*/num_guest_pages,
+ /*flags=*/0);
+
+ virt_map(vm, START_GVA, start_gpa, num_guest_pages);
+
+ ksft_print_msg("Mapped %#lx pages: gva=%#lx to gpa=%#lx\n",
+ num_guest_pages, START_GVA, start_gpa);
+ return vm;
+}
+
+static void vm_inject_memory_uer(struct kvm_vm *vm)
+{
+ uint64_t guest_data;
+
+ einj_gpa = addr_gva2gpa(vm, EINJ_GVA);
+ einj_hva = addr_gva2hva(vm, EINJ_GVA);
+
+ /* Populate certain data before injecting UER. */
+ *(uint64_t *)einj_hva = 0xBAADCAFE;
+ guest_data = *(uint64_t *)einj_hva;
+ ksft_print_msg("Before EINJect: data=%#lx\n",
+ guest_data);
+
+ einj_hpa = translate_to_host_paddr((unsigned long)einj_hva);
+
+ ksft_print_msg("EINJ_GVA=%#lx, einj_gpa=%#lx, einj_hva=%p, einj_hpa=%#lx\n",
+ EINJ_GVA, einj_gpa, einj_hva, einj_hpa);
+
+ inject_uer(einj_hpa);
+ ksft_print_msg("Memory UER EINJected\n");
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_SEA_TO_USER));
+
+ setup_sigbus_handler();
+
+ vm = vm_create_with_sea_handler(&vcpu);
+ vm_enable_cap(vm, KVM_CAP_ARM_SEA_TO_USER, 0);
+ vm_inject_memory_uer(vm);
+ run_vm(vm, vcpu);
+ kvm_vm_free(vm);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/arm64/set_id_regs.c b/tools/testing/selftests/kvm/arm64/set_id_regs.c
index 322b9d3b0125..3a7e5fe9ae7a 100644
--- a/tools/testing/selftests/kvm/arm64/set_id_regs.c
+++ b/tools/testing/selftests/kvm/arm64/set_id_regs.c
@@ -37,6 +37,9 @@ struct reg_ftr_bits {
* For FTR_LOWER_SAFE, safe_val is used as the minimal safe value.
*/
int64_t safe_val;
+
+ /* Allowed to be changed by the host after run */
+ bool mutable;
};
struct test_feature_reg {
@@ -44,7 +47,7 @@ struct test_feature_reg {
const struct reg_ftr_bits *ftr_bits;
};
-#define __REG_FTR_BITS(NAME, SIGNED, TYPE, SHIFT, MASK, SAFE_VAL) \
+#define __REG_FTR_BITS(NAME, SIGNED, TYPE, SHIFT, MASK, SAFE_VAL, MUT) \
{ \
.name = #NAME, \
.sign = SIGNED, \
@@ -52,15 +55,20 @@ struct test_feature_reg {
.shift = SHIFT, \
.mask = MASK, \
.safe_val = SAFE_VAL, \
+ .mutable = MUT, \
}
#define REG_FTR_BITS(type, reg, field, safe_val) \
__REG_FTR_BITS(reg##_##field, FTR_UNSIGNED, type, reg##_##field##_SHIFT, \
- reg##_##field##_MASK, safe_val)
+ reg##_##field##_MASK, safe_val, false)
+
+#define REG_FTR_BITS_MUTABLE(type, reg, field, safe_val) \
+ __REG_FTR_BITS(reg##_##field, FTR_UNSIGNED, type, reg##_##field##_SHIFT, \
+ reg##_##field##_MASK, safe_val, true)
#define S_REG_FTR_BITS(type, reg, field, safe_val) \
__REG_FTR_BITS(reg##_##field, FTR_SIGNED, type, reg##_##field##_SHIFT, \
- reg##_##field##_MASK, safe_val)
+ reg##_##field##_MASK, safe_val, false)
#define REG_FTR_END \
{ \
@@ -91,7 +99,6 @@ static const struct reg_ftr_bits ftr_id_aa64isar0_el1[] = {
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SM3, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SHA3, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, RDM, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, TME, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, ATOMIC, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, CRC32, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SHA2, 0),
@@ -123,20 +130,30 @@ static const struct reg_ftr_bits ftr_id_aa64isar2_el1[] = {
REG_FTR_END,
};
+static const struct reg_ftr_bits ftr_id_aa64isar3_el1[] = {
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR3_EL1, FPRCVT, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR3_EL1, LSUI, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR3_EL1, LSFE, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR3_EL1, FAMINMAX, 0),
+ REG_FTR_END,
+};
+
static const struct reg_ftr_bits ftr_id_aa64pfr0_el1[] = {
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, CSV3, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, CSV2, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, DIT, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, SEL2, 0),
- REG_FTR_BITS(FTR_EXACT, ID_AA64PFR0_EL1, GIC, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL3, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL2, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL1, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL0, 0),
+ /* GICv3 support will be forced at run time if available */
+ REG_FTR_BITS_MUTABLE(FTR_EXACT, ID_AA64PFR0_EL1, GIC, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL3, 1),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL2, 1),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL1, 1),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL0, 1),
REG_FTR_END,
};
static const struct reg_ftr_bits ftr_id_aa64pfr1_el1[] = {
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, DF2, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, CSV2_frac, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, SSBS, ID_AA64PFR1_EL1_SSBS_NI),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, BT, 0),
@@ -162,7 +179,9 @@ static const struct reg_ftr_bits ftr_id_aa64mmfr0_el1[] = {
static const struct reg_ftr_bits ftr_id_aa64mmfr1_el1[] = {
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, TIDCP1, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, AFP, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, HCX, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, ETS, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, TWED, 0),
REG_FTR_BITS(FTR_HIGHER_SAFE, ID_AA64MMFR1_EL1, SpecSEI, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, PAN, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, LO, 0),
@@ -185,6 +204,14 @@ static const struct reg_ftr_bits ftr_id_aa64mmfr2_el1[] = {
REG_FTR_END,
};
+static const struct reg_ftr_bits ftr_id_aa64mmfr3_el1[] = {
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR3_EL1, S1POE, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR3_EL1, S1PIE, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR3_EL1, SCTLRX, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR3_EL1, TCRX, 0),
+ REG_FTR_END,
+};
+
static const struct reg_ftr_bits ftr_id_aa64zfr0_el1[] = {
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, F64MM, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, F32MM, 0),
@@ -210,11 +237,13 @@ static struct test_feature_reg test_regs[] = {
TEST_REG(SYS_ID_AA64ISAR0_EL1, ftr_id_aa64isar0_el1),
TEST_REG(SYS_ID_AA64ISAR1_EL1, ftr_id_aa64isar1_el1),
TEST_REG(SYS_ID_AA64ISAR2_EL1, ftr_id_aa64isar2_el1),
+ TEST_REG(SYS_ID_AA64ISAR3_EL1, ftr_id_aa64isar3_el1),
TEST_REG(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0_el1),
TEST_REG(SYS_ID_AA64PFR1_EL1, ftr_id_aa64pfr1_el1),
TEST_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0_el1),
TEST_REG(SYS_ID_AA64MMFR1_EL1, ftr_id_aa64mmfr1_el1),
TEST_REG(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2_el1),
+ TEST_REG(SYS_ID_AA64MMFR3_EL1, ftr_id_aa64mmfr3_el1),
TEST_REG(SYS_ID_AA64ZFR0_EL1, ftr_id_aa64zfr0_el1),
};
@@ -227,11 +256,16 @@ static void guest_code(void)
GUEST_REG_SYNC(SYS_ID_AA64ISAR0_EL1);
GUEST_REG_SYNC(SYS_ID_AA64ISAR1_EL1);
GUEST_REG_SYNC(SYS_ID_AA64ISAR2_EL1);
+ GUEST_REG_SYNC(SYS_ID_AA64ISAR3_EL1);
GUEST_REG_SYNC(SYS_ID_AA64PFR0_EL1);
+ GUEST_REG_SYNC(SYS_ID_AA64PFR1_EL1);
GUEST_REG_SYNC(SYS_ID_AA64MMFR0_EL1);
GUEST_REG_SYNC(SYS_ID_AA64MMFR1_EL1);
GUEST_REG_SYNC(SYS_ID_AA64MMFR2_EL1);
+ GUEST_REG_SYNC(SYS_ID_AA64MMFR3_EL1);
GUEST_REG_SYNC(SYS_ID_AA64ZFR0_EL1);
+ GUEST_REG_SYNC(SYS_MPIDR_EL1);
+ GUEST_REG_SYNC(SYS_CLIDR_EL1);
GUEST_REG_SYNC(SYS_CTR_EL0);
GUEST_REG_SYNC(SYS_MIDR_EL1);
GUEST_REG_SYNC(SYS_REVIDR_EL1);
@@ -243,7 +277,9 @@ static void guest_code(void)
/* Return a safe value to a given ftr_bits an ftr value */
uint64_t get_safe_value(const struct reg_ftr_bits *ftr_bits, uint64_t ftr)
{
- uint64_t ftr_max = GENMASK_ULL(ARM64_FEATURE_FIELD_BITS - 1, 0);
+ uint64_t ftr_max = ftr_bits->mask >> ftr_bits->shift;
+
+ TEST_ASSERT(ftr_max > 1, "This test doesn't support single bit features");
if (ftr_bits->sign == FTR_UNSIGNED) {
switch (ftr_bits->type) {
@@ -295,7 +331,9 @@ uint64_t get_safe_value(const struct reg_ftr_bits *ftr_bits, uint64_t ftr)
/* Return an invalid value to a given ftr_bits an ftr value */
uint64_t get_invalid_value(const struct reg_ftr_bits *ftr_bits, uint64_t ftr)
{
- uint64_t ftr_max = GENMASK_ULL(ARM64_FEATURE_FIELD_BITS - 1, 0);
+ uint64_t ftr_max = ftr_bits->mask >> ftr_bits->shift;
+
+ TEST_ASSERT(ftr_max > 1, "This test doesn't support single bit features");
if (ftr_bits->sign == FTR_UNSIGNED) {
switch (ftr_bits->type) {
@@ -543,12 +581,101 @@ static void test_user_set_mpam_reg(struct kvm_vcpu *vcpu)
ksft_test_result_fail("ID_AA64PFR1_EL1.MPAM_frac value should not be ignored\n");
}
+#define MTE_IDREG_TEST 1
+static void test_user_set_mte_reg(struct kvm_vcpu *vcpu)
+{
+ uint64_t masks[KVM_ARM_FEATURE_ID_RANGE_SIZE];
+ struct reg_mask_range range = {
+ .addr = (__u64)masks,
+ };
+ uint64_t val;
+ uint64_t mte;
+ uint64_t mte_frac;
+ int idx, err;
+
+ val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1));
+ mte = FIELD_GET(ID_AA64PFR1_EL1_MTE, val);
+ if (!mte) {
+ ksft_test_result_skip("MTE capability not supported, nothing to test\n");
+ return;
+ }
+
+ /* Get writable masks for feature ID registers */
+ memset(range.reserved, 0, sizeof(range.reserved));
+ vm_ioctl(vcpu->vm, KVM_ARM_GET_REG_WRITABLE_MASKS, &range);
+
+ idx = encoding_to_range_idx(SYS_ID_AA64PFR1_EL1);
+ if ((masks[idx] & ID_AA64PFR1_EL1_MTE_frac_MASK) == ID_AA64PFR1_EL1_MTE_frac_MASK) {
+ ksft_test_result_skip("ID_AA64PFR1_EL1.MTE_frac is officially writable, nothing to test\n");
+ return;
+ }
+
+ /*
+ * When MTE is supported but MTE_ASYMM is not (ID_AA64PFR1_EL1.MTE == 2)
+ * ID_AA64PFR1_EL1.MTE_frac == 0xF indicates MTE_ASYNC is unsupported
+ * and MTE_frac == 0 indicates it is supported.
+ *
+ * As MTE_frac was previously unconditionally read as 0, check
+ * that the set to 0 succeeds but does not change MTE_frac
+ * from unsupported (0xF) to supported (0).
+ *
+ */
+ mte_frac = FIELD_GET(ID_AA64PFR1_EL1_MTE_frac, val);
+ if (mte != ID_AA64PFR1_EL1_MTE_MTE2 ||
+ mte_frac != ID_AA64PFR1_EL1_MTE_frac_NI) {
+ ksft_test_result_skip("MTE_ASYNC or MTE_ASYMM are supported, nothing to test\n");
+ return;
+ }
+
+ /* Try to set MTE_frac=0. */
+ val &= ~ID_AA64PFR1_EL1_MTE_frac_MASK;
+ val |= FIELD_PREP(ID_AA64PFR1_EL1_MTE_frac_MASK, 0);
+ err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1), val);
+ if (err) {
+ ksft_test_result_fail("ID_AA64PFR1_EL1.MTE_frac=0 was not accepted\n");
+ return;
+ }
+
+ val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1));
+ mte_frac = FIELD_GET(ID_AA64PFR1_EL1_MTE_frac, val);
+ if (mte_frac == ID_AA64PFR1_EL1_MTE_frac_NI)
+ ksft_test_result_pass("ID_AA64PFR1_EL1.MTE_frac=0 accepted and still 0xF\n");
+ else
+ ksft_test_result_pass("ID_AA64PFR1_EL1.MTE_frac no longer 0xF\n");
+}
+
+static uint64_t reset_mutable_bits(uint32_t id, uint64_t val)
+{
+ struct test_feature_reg *reg = NULL;
+
+ for (int i = 0; i < ARRAY_SIZE(test_regs); i++) {
+ if (test_regs[i].reg == id) {
+ reg = &test_regs[i];
+ break;
+ }
+ }
+
+ if (!reg)
+ return val;
+
+ for (const struct reg_ftr_bits *bits = reg->ftr_bits; bits->type != FTR_END; bits++) {
+ if (bits->mutable) {
+ val &= ~bits->mask;
+ val |= bits->safe_val << bits->shift;
+ }
+ }
+
+ return val;
+}
+
static void test_guest_reg_read(struct kvm_vcpu *vcpu)
{
bool done = false;
struct ucall uc;
while (!done) {
+ uint64_t val;
+
vcpu_run(vcpu);
switch (get_ucall(vcpu, &uc)) {
@@ -556,9 +683,11 @@ static void test_guest_reg_read(struct kvm_vcpu *vcpu)
REPORT_GUEST_ASSERT(uc);
break;
case UCALL_SYNC:
+ val = test_reg_vals[encoding_to_range_idx(uc.args[2])];
+ val = reset_mutable_bits(uc.args[2], val);
+
/* Make sure the written values are seen by guest */
- TEST_ASSERT_EQ(test_reg_vals[encoding_to_range_idx(uc.args[2])],
- uc.args[3]);
+ TEST_ASSERT_EQ(val, reset_mutable_bits(uc.args[2], uc.args[3]));
break;
case UCALL_DONE:
done = true;
@@ -584,7 +713,7 @@ static void test_clidr(struct kvm_vcpu *vcpu)
clidr = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CLIDR_EL1));
/* find the first empty level in the cache hierarchy */
- for (level = 1; level < 7; level++) {
+ for (level = 1; level <= 7; level++) {
if (!CLIDR_CTYPE(clidr, level))
break;
}
@@ -649,7 +778,8 @@ static void test_assert_id_reg_unchanged(struct kvm_vcpu *vcpu, uint32_t encodin
uint64_t observed;
observed = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(encoding));
- TEST_ASSERT_EQ(test_reg_vals[idx], observed);
+ TEST_ASSERT_EQ(reset_mutable_bits(encoding, test_reg_vals[idx]),
+ reset_mutable_bits(encoding, observed));
}
static void test_reset_preserves_id_regs(struct kvm_vcpu *vcpu)
@@ -679,29 +809,29 @@ int main(void)
struct kvm_vm *vm;
bool aarch64_only;
uint64_t val, el0;
- int test_cnt;
+ int test_cnt, i, j;
TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES));
TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_WRITABLE_IMP_ID_REGS));
+ test_wants_mte();
+
vm = vm_create(1);
vm_enable_cap(vm, KVM_CAP_ARM_WRITABLE_IMP_ID_REGS, 0);
vcpu = vm_vcpu_add(vm, 0, guest_code);
+ kvm_arch_vm_finalize_vcpus(vm);
/* Check for AARCH64 only system */
val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1));
- el0 = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0), val);
+ el0 = FIELD_GET(ID_AA64PFR0_EL1_EL0, val);
aarch64_only = (el0 == ID_AA64PFR0_EL1_EL0_IMP);
ksft_print_header();
- test_cnt = ARRAY_SIZE(ftr_id_aa64dfr0_el1) + ARRAY_SIZE(ftr_id_dfr0_el1) +
- ARRAY_SIZE(ftr_id_aa64isar0_el1) + ARRAY_SIZE(ftr_id_aa64isar1_el1) +
- ARRAY_SIZE(ftr_id_aa64isar2_el1) + ARRAY_SIZE(ftr_id_aa64pfr0_el1) +
- ARRAY_SIZE(ftr_id_aa64pfr1_el1) + ARRAY_SIZE(ftr_id_aa64mmfr0_el1) +
- ARRAY_SIZE(ftr_id_aa64mmfr1_el1) + ARRAY_SIZE(ftr_id_aa64mmfr2_el1) +
- ARRAY_SIZE(ftr_id_aa64zfr0_el1) - ARRAY_SIZE(test_regs) + 3 +
- MPAM_IDREG_TEST;
+ test_cnt = 3 + MPAM_IDREG_TEST + MTE_IDREG_TEST;
+ for (i = 0; i < ARRAY_SIZE(test_regs); i++)
+ for (j = 0; test_regs[i].ftr_bits[j].type != FTR_END; j++)
+ test_cnt++;
ksft_set_plan(test_cnt);
@@ -709,6 +839,7 @@ int main(void)
test_vcpu_ftr_id_regs(vcpu);
test_vcpu_non_ftr_id_regs(vcpu);
test_user_set_mpam_reg(vcpu);
+ test_user_set_mte_reg(vcpu);
test_guest_reg_read(vcpu);
diff --git a/tools/testing/selftests/kvm/arm64/smccc_filter.c b/tools/testing/selftests/kvm/arm64/smccc_filter.c
index 2d189f3da228..1763b9d45400 100644
--- a/tools/testing/selftests/kvm/arm64/smccc_filter.c
+++ b/tools/testing/selftests/kvm/arm64/smccc_filter.c
@@ -22,8 +22,20 @@ enum smccc_conduit {
SMC_INSN,
};
+static bool test_runs_at_el2(void)
+{
+ struct kvm_vm *vm = vm_create(1);
+ struct kvm_vcpu_init init;
+
+ kvm_get_default_vcpu_target(vm, &init);
+ kvm_vm_free(vm);
+
+ return init.features[0] & BIT(KVM_ARM_VCPU_HAS_EL2);
+}
+
#define for_each_conduit(conduit) \
- for (conduit = HVC_INSN; conduit <= SMC_INSN; conduit++)
+ for (conduit = test_runs_at_el2() ? SMC_INSN : HVC_INSN; \
+ conduit <= SMC_INSN; conduit++)
static void guest_main(uint32_t func_id, enum smccc_conduit conduit)
{
@@ -64,7 +76,7 @@ static struct kvm_vm *setup_vm(struct kvm_vcpu **vcpu)
struct kvm_vm *vm;
vm = vm_create(1);
- vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init);
+ kvm_get_default_vcpu_target(vm, &init);
/*
* Enable in-kernel emulation of PSCI to ensure that calls are denied
@@ -73,6 +85,7 @@ static struct kvm_vm *setup_vm(struct kvm_vcpu **vcpu)
init.features[0] |= (1 << KVM_ARM_VCPU_PSCI_0_2);
*vcpu = aarch64_vcpu_add(vm, 0, &init, guest_main);
+ kvm_arch_vm_finalize_vcpus(vm);
return vm;
}
diff --git a/tools/testing/selftests/kvm/arm64/vgic_init.c b/tools/testing/selftests/kvm/arm64/vgic_init.c
index b3b5fb0ff0a9..8d6d3a4ae4db 100644
--- a/tools/testing/selftests/kvm/arm64/vgic_init.c
+++ b/tools/testing/selftests/kvm/arm64/vgic_init.c
@@ -9,17 +9,18 @@
#include <asm/kvm.h>
#include <asm/kvm_para.h>
+#include <arm64/gic_v3.h>
+
#include "test_util.h"
#include "kvm_util.h"
#include "processor.h"
#include "vgic.h"
+#include "gic_v3.h"
#define NR_VCPUS 4
#define REG_OFFSET(vcpu, offset) (((uint64_t)vcpu << 32) | offset)
-#define GICR_TYPER 0x8
-
#define VGIC_DEV_IS_V2(_d) ((_d) == KVM_DEV_TYPE_ARM_VGIC_V2)
#define VGIC_DEV_IS_V3(_d) ((_d) == KVM_DEV_TYPE_ARM_VGIC_V3)
@@ -675,6 +676,44 @@ static void test_v3_its_region(void)
vm_gic_destroy(&v);
}
+static void test_v3_nassgicap(void)
+{
+ struct kvm_vcpu *vcpus[NR_VCPUS];
+ bool has_nassgicap;
+ struct vm_gic vm;
+ u32 typer2;
+ int ret;
+
+ vm = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS, vcpus);
+ kvm_device_attr_get(vm.gic_fd, KVM_DEV_ARM_VGIC_GRP_DIST_REGS,
+ GICD_TYPER2, &typer2);
+ has_nassgicap = typer2 & GICD_TYPER2_nASSGIcap;
+
+ typer2 |= GICD_TYPER2_nASSGIcap;
+ ret = __kvm_device_attr_set(vm.gic_fd, KVM_DEV_ARM_VGIC_GRP_DIST_REGS,
+ GICD_TYPER2, &typer2);
+ if (has_nassgicap)
+ TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_DEVICE_ATTR_SET, ret));
+ else
+ TEST_ASSERT(ret && errno == EINVAL,
+ "Enabled nASSGIcap even though it's unavailable");
+
+ typer2 &= ~GICD_TYPER2_nASSGIcap;
+ kvm_device_attr_set(vm.gic_fd, KVM_DEV_ARM_VGIC_GRP_DIST_REGS,
+ GICD_TYPER2, &typer2);
+
+ kvm_device_attr_set(vm.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+ KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
+
+ typer2 ^= GICD_TYPER2_nASSGIcap;
+ ret = __kvm_device_attr_set(vm.gic_fd, KVM_DEV_ARM_VGIC_GRP_DIST_REGS,
+ GICD_TYPER2, &typer2);
+ TEST_ASSERT(ret && errno == EBUSY,
+ "Changed nASSGIcap after initializing the VGIC");
+
+ vm_gic_destroy(&vm);
+}
+
/*
* Returns 0 if it's possible to create GIC device of a given type (V2 or V3).
*/
@@ -715,6 +754,220 @@ int test_kvm_device(uint32_t gic_dev_type)
return 0;
}
+struct sr_def {
+ const char *name;
+ u32 encoding;
+};
+
+#define PACK_SR(r) \
+ ((sys_reg_Op0(r) << 14) | \
+ (sys_reg_Op1(r) << 11) | \
+ (sys_reg_CRn(r) << 7) | \
+ (sys_reg_CRm(r) << 3) | \
+ (sys_reg_Op2(r)))
+
+#define SR(r) \
+ { \
+ .name = #r, \
+ .encoding = r, \
+ }
+
+static const struct sr_def sysregs_el1[] = {
+ SR(SYS_ICC_PMR_EL1),
+ SR(SYS_ICC_BPR0_EL1),
+ SR(SYS_ICC_AP0R0_EL1),
+ SR(SYS_ICC_AP0R1_EL1),
+ SR(SYS_ICC_AP0R2_EL1),
+ SR(SYS_ICC_AP0R3_EL1),
+ SR(SYS_ICC_AP1R0_EL1),
+ SR(SYS_ICC_AP1R1_EL1),
+ SR(SYS_ICC_AP1R2_EL1),
+ SR(SYS_ICC_AP1R3_EL1),
+ SR(SYS_ICC_BPR1_EL1),
+ SR(SYS_ICC_CTLR_EL1),
+ SR(SYS_ICC_SRE_EL1),
+ SR(SYS_ICC_IGRPEN0_EL1),
+ SR(SYS_ICC_IGRPEN1_EL1),
+};
+
+static const struct sr_def sysregs_el2[] = {
+ SR(SYS_ICH_AP0R0_EL2),
+ SR(SYS_ICH_AP0R1_EL2),
+ SR(SYS_ICH_AP0R2_EL2),
+ SR(SYS_ICH_AP0R3_EL2),
+ SR(SYS_ICH_AP1R0_EL2),
+ SR(SYS_ICH_AP1R1_EL2),
+ SR(SYS_ICH_AP1R2_EL2),
+ SR(SYS_ICH_AP1R3_EL2),
+ SR(SYS_ICH_HCR_EL2),
+ SR(SYS_ICC_SRE_EL2),
+ SR(SYS_ICH_VTR_EL2),
+ SR(SYS_ICH_VMCR_EL2),
+ SR(SYS_ICH_LR0_EL2),
+ SR(SYS_ICH_LR1_EL2),
+ SR(SYS_ICH_LR2_EL2),
+ SR(SYS_ICH_LR3_EL2),
+ SR(SYS_ICH_LR4_EL2),
+ SR(SYS_ICH_LR5_EL2),
+ SR(SYS_ICH_LR6_EL2),
+ SR(SYS_ICH_LR7_EL2),
+ SR(SYS_ICH_LR8_EL2),
+ SR(SYS_ICH_LR9_EL2),
+ SR(SYS_ICH_LR10_EL2),
+ SR(SYS_ICH_LR11_EL2),
+ SR(SYS_ICH_LR12_EL2),
+ SR(SYS_ICH_LR13_EL2),
+ SR(SYS_ICH_LR14_EL2),
+ SR(SYS_ICH_LR15_EL2),
+};
+
+static void test_sysreg_array(int gic, const struct sr_def *sr, int nr,
+ int (*check)(int, const struct sr_def *, const char *))
+{
+ for (int i = 0; i < nr; i++) {
+ u64 val;
+ u64 attr;
+ int ret;
+
+ /* Assume MPIDR_EL1.Aff*=0 */
+ attr = PACK_SR(sr[i].encoding);
+
+ /*
+ * The API is braindead. A register can be advertised as
+ * available, and yet not be readable or writable.
+ * ICC_APnR{1,2,3}_EL1 are examples of such non-sense, and
+ * ICH_APnR{1,2,3}_EL2 do follow suit for consistency.
+ *
+ * On the bright side, no known HW is implementing more than
+ * 5 bits of priority, so we're safe. Sort of...
+ */
+ ret = __kvm_has_device_attr(gic, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS,
+ attr);
+ TEST_ASSERT(ret == 0, "%s unavailable", sr[i].name);
+
+ /* Check that we can write back what we read */
+ ret = __kvm_device_attr_get(gic, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS,
+ attr, &val);
+ TEST_ASSERT(ret == 0 || !check(gic, &sr[i], "read"), "%s unreadable", sr[i].name);
+ ret = __kvm_device_attr_set(gic, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS,
+ attr, &val);
+ TEST_ASSERT(ret == 0 || !check(gic, &sr[i], "write"), "%s unwritable", sr[i].name);
+ }
+}
+
+static u8 get_ctlr_pribits(int gic)
+{
+ int ret;
+ u64 val;
+ u8 pri;
+
+ ret = __kvm_device_attr_get(gic, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS,
+ PACK_SR(SYS_ICC_CTLR_EL1), &val);
+ TEST_ASSERT(ret == 0, "ICC_CTLR_EL1 unreadable");
+
+ pri = FIELD_GET(ICC_CTLR_EL1_PRI_BITS_MASK, val) + 1;
+ TEST_ASSERT(pri >= 5 && pri <= 7, "Bad pribits %d", pri);
+
+ return pri;
+}
+
+static int check_unaccessible_el1_regs(int gic, const struct sr_def *sr, const char *what)
+{
+ switch (sr->encoding) {
+ case SYS_ICC_AP0R1_EL1:
+ case SYS_ICC_AP1R1_EL1:
+ if (get_ctlr_pribits(gic) >= 6)
+ return -EINVAL;
+ break;
+ case SYS_ICC_AP0R2_EL1:
+ case SYS_ICC_AP0R3_EL1:
+ case SYS_ICC_AP1R2_EL1:
+ case SYS_ICC_AP1R3_EL1:
+ if (get_ctlr_pribits(gic) == 7)
+ return 0;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ pr_info("SKIP %s for %s\n", sr->name, what);
+ return 0;
+}
+
+static u8 get_vtr_pribits(int gic)
+{
+ int ret;
+ u64 val;
+ u8 pri;
+
+ ret = __kvm_device_attr_get(gic, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS,
+ PACK_SR(SYS_ICH_VTR_EL2), &val);
+ TEST_ASSERT(ret == 0, "ICH_VTR_EL2 unreadable");
+
+ pri = FIELD_GET(ICH_VTR_EL2_PRIbits, val) + 1;
+ TEST_ASSERT(pri >= 5 && pri <= 7, "Bad pribits %d", pri);
+
+ return pri;
+}
+
+static int check_unaccessible_el2_regs(int gic, const struct sr_def *sr, const char *what)
+{
+ switch (sr->encoding) {
+ case SYS_ICH_AP0R1_EL2:
+ case SYS_ICH_AP1R1_EL2:
+ if (get_vtr_pribits(gic) >= 6)
+ return -EINVAL;
+ break;
+ case SYS_ICH_AP0R2_EL2:
+ case SYS_ICH_AP0R3_EL2:
+ case SYS_ICH_AP1R2_EL2:
+ case SYS_ICH_AP1R3_EL2:
+ if (get_vtr_pribits(gic) == 7)
+ return -EINVAL;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ pr_info("SKIP %s for %s\n", sr->name, what);
+ return 0;
+}
+
+static void test_v3_sysregs(void)
+{
+ struct kvm_vcpu_init init = {};
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ u32 feat = 0;
+ int gic;
+
+ if (kvm_check_cap(KVM_CAP_ARM_EL2))
+ feat |= BIT(KVM_ARM_VCPU_HAS_EL2);
+
+ vm = vm_create(1);
+
+ vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init);
+ init.features[0] |= feat;
+
+ vcpu = aarch64_vcpu_add(vm, 0, &init, NULL);
+ TEST_ASSERT(vcpu, "Can't create a vcpu?");
+
+ gic = kvm_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_V3);
+ TEST_ASSERT(gic >= 0, "No GIC???");
+
+ kvm_device_attr_set(gic, KVM_DEV_ARM_VGIC_GRP_CTRL,
+ KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
+
+ test_sysreg_array(gic, sysregs_el1, ARRAY_SIZE(sysregs_el1), check_unaccessible_el1_regs);
+ if (feat)
+ test_sysreg_array(gic, sysregs_el2, ARRAY_SIZE(sysregs_el2), check_unaccessible_el2_regs);
+ else
+ pr_info("SKIP EL2 registers, not available\n");
+
+ close(gic);
+ kvm_vm_free(vm);
+}
+
void run_tests(uint32_t gic_dev_type)
{
test_vcpus_then_vgic(gic_dev_type);
@@ -730,6 +983,8 @@ void run_tests(uint32_t gic_dev_type)
test_v3_last_bit_single_rdist();
test_v3_redist_ipa_range_check_at_vcpu_run();
test_v3_its_region();
+ test_v3_sysregs();
+ test_v3_nassgicap();
}
}
@@ -739,6 +994,8 @@ int main(int ac, char **av)
int pa_bits;
int cnt_impl = 0;
+ test_disable_default_vgic();
+
pa_bits = vm_guest_mode_params[VM_MODE_DEFAULT].pa_bits;
max_phys_size = 1ULL << pa_bits;
diff --git a/tools/testing/selftests/kvm/arm64/vgic_irq.c b/tools/testing/selftests/kvm/arm64/vgic_irq.c
index f4ac28d53747..2fb2c7939fe9 100644
--- a/tools/testing/selftests/kvm/arm64/vgic_irq.c
+++ b/tools/testing/selftests/kvm/arm64/vgic_irq.c
@@ -29,6 +29,7 @@ struct test_args {
bool level_sensitive; /* 1 is level, 0 is edge */
int kvm_max_routes; /* output of KVM_CAP_IRQ_ROUTING */
bool kvm_supports_irqfd; /* output of KVM_CAP_IRQFD */
+ uint32_t shared_data;
};
/*
@@ -205,7 +206,7 @@ static void kvm_inject_call(kvm_inject_cmd cmd, uint32_t first_intid,
do { \
uint32_t _intid; \
_intid = gic_get_and_ack_irq(); \
- GUEST_ASSERT(_intid == 0 || _intid == IAR_SPURIOUS); \
+ GUEST_ASSERT(_intid == IAR_SPURIOUS); \
} while (0)
#define CAT_HELPER(a, b) a ## b
@@ -359,8 +360,9 @@ static uint32_t wait_for_and_activate_irq(void)
* interrupts for the whole test.
*/
static void test_inject_preemption(struct test_args *args,
- uint32_t first_intid, int num,
- kvm_inject_cmd cmd)
+ uint32_t first_intid, int num,
+ const unsigned long *exclude,
+ kvm_inject_cmd cmd)
{
uint32_t intid, prio, step = KVM_PRIO_STEPS;
int i;
@@ -379,6 +381,10 @@ static void test_inject_preemption(struct test_args *args,
for (i = 0; i < num; i++) {
uint32_t tmp;
intid = i + first_intid;
+
+ if (exclude && test_bit(i, exclude))
+ continue;
+
KVM_INJECT(cmd, intid);
/* Each successive IRQ will preempt the previous one. */
tmp = wait_for_and_activate_irq();
@@ -390,15 +396,33 @@ static void test_inject_preemption(struct test_args *args,
/* finish handling the IRQs starting with the highest priority one. */
for (i = 0; i < num; i++) {
intid = num - i - 1 + first_intid;
+
+ if (exclude && test_bit(intid - first_intid, exclude))
+ continue;
+
gic_set_eoi(intid);
- if (args->eoi_split)
- gic_set_dir(intid);
+ }
+
+ if (args->eoi_split) {
+ for (i = 0; i < num; i++) {
+ intid = i + first_intid;
+
+ if (exclude && test_bit(i, exclude))
+ continue;
+
+ if (args->eoi_split)
+ gic_set_dir(intid);
+ }
}
local_irq_enable();
- for (i = 0; i < num; i++)
+ for (i = 0; i < num; i++) {
+ if (exclude && test_bit(i, exclude))
+ continue;
+
GUEST_ASSERT(!gic_irq_get_active(i + first_intid));
+ }
GUEST_ASSERT_EQ(gic_read_ap1r0(), 0);
GUEST_ASSERT_IAR_EMPTY();
@@ -436,33 +460,32 @@ static void test_injection_failure(struct test_args *args,
static void test_preemption(struct test_args *args, struct kvm_inject_desc *f)
{
- /*
- * Test up to 4 levels of preemption. The reason is that KVM doesn't
- * currently implement the ability to have more than the number-of-LRs
- * number of concurrently active IRQs. The number of LRs implemented is
- * IMPLEMENTATION DEFINED, however, it seems that most implement 4.
- */
+ /* Timer PPIs cannot be injected from userspace */
+ static const unsigned long ppi_exclude = (BIT(27 - MIN_PPI) |
+ BIT(30 - MIN_PPI) |
+ BIT(28 - MIN_PPI) |
+ BIT(26 - MIN_PPI));
+
if (f->sgi)
- test_inject_preemption(args, MIN_SGI, 4, f->cmd);
+ test_inject_preemption(args, MIN_SGI, 16, NULL, f->cmd);
if (f->ppi)
- test_inject_preemption(args, MIN_PPI, 4, f->cmd);
+ test_inject_preemption(args, MIN_PPI, 16, &ppi_exclude, f->cmd);
if (f->spi)
- test_inject_preemption(args, MIN_SPI, 4, f->cmd);
+ test_inject_preemption(args, MIN_SPI, 31, NULL, f->cmd);
}
static void test_restore_active(struct test_args *args, struct kvm_inject_desc *f)
{
- /* Test up to 4 active IRQs. Same reason as in test_preemption. */
if (f->sgi)
- guest_restore_active(args, MIN_SGI, 4, f->cmd);
+ guest_restore_active(args, MIN_SGI, 16, f->cmd);
if (f->ppi)
- guest_restore_active(args, MIN_PPI, 4, f->cmd);
+ guest_restore_active(args, MIN_PPI, 16, f->cmd);
if (f->spi)
- guest_restore_active(args, MIN_SPI, 4, f->cmd);
+ guest_restore_active(args, MIN_SPI, 31, f->cmd);
}
static void guest_code(struct test_args *args)
@@ -473,12 +496,12 @@ static void guest_code(struct test_args *args)
gic_init(GIC_V3, 1);
- for (i = 0; i < nr_irqs; i++)
- gic_irq_enable(i);
-
for (i = MIN_SPI; i < nr_irqs; i++)
gic_irq_set_config(i, !level_sensitive);
+ for (i = 0; i < nr_irqs; i++)
+ gic_irq_enable(i);
+
gic_set_eoi_split(args->eoi_split);
reset_priorities(args);
@@ -620,18 +643,12 @@ static void kvm_routing_and_irqfd_check(struct kvm_vm *vm,
* that no actual interrupt was injected for those cases.
*/
- for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) {
- fd[f] = eventfd(0, 0);
- TEST_ASSERT(fd[f] != -1, __KVM_SYSCALL_ERROR("eventfd()", fd[f]));
- }
+ for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++)
+ fd[f] = kvm_new_eventfd();
for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) {
- struct kvm_irqfd irqfd = {
- .fd = fd[f],
- .gsi = i - MIN_SPI,
- };
assert(i <= (uint64_t)UINT_MAX);
- vm_ioctl(vm, KVM_IRQFD, &irqfd);
+ kvm_assign_irqfd(vm, i - MIN_SPI, fd[f]);
}
for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) {
@@ -642,7 +659,7 @@ static void kvm_routing_and_irqfd_check(struct kvm_vm *vm,
}
for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++)
- close(fd[f]);
+ kvm_close(fd[f]);
}
/* handles the valid case: intid=0xffffffff num=1 */
@@ -758,7 +775,6 @@ static void test_vgic(uint32_t nr_irqs, bool level_sensitive, bool eoi_split)
vcpu_args_set(vcpu, 1, args_gva);
gic_fd = vgic_v3_setup(vm, 1, nr_irqs);
- __TEST_REQUIRE(gic_fd >= 0, "Failed to create vgic-v3, skipping");
vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT,
guest_irq_handlers[args.eoi_split][args.level_sensitive]);
@@ -786,6 +802,221 @@ done:
kvm_vm_free(vm);
}
+static void guest_code_asym_dir(struct test_args *args, int cpuid)
+{
+ gic_init(GIC_V3, 2);
+
+ gic_set_eoi_split(1);
+ gic_set_priority_mask(CPU_PRIO_MASK);
+
+ if (cpuid == 0) {
+ uint32_t intid;
+
+ local_irq_disable();
+
+ gic_set_priority(MIN_PPI, IRQ_DEFAULT_PRIO);
+ gic_irq_enable(MIN_SPI);
+ gic_irq_set_pending(MIN_SPI);
+
+ intid = wait_for_and_activate_irq();
+ GUEST_ASSERT_EQ(intid, MIN_SPI);
+
+ gic_set_eoi(intid);
+ isb();
+
+ WRITE_ONCE(args->shared_data, MIN_SPI);
+ dsb(ishst);
+
+ do {
+ dsb(ishld);
+ } while (READ_ONCE(args->shared_data) == MIN_SPI);
+ GUEST_ASSERT(!gic_irq_get_active(MIN_SPI));
+ } else {
+ do {
+ dsb(ishld);
+ } while (READ_ONCE(args->shared_data) != MIN_SPI);
+
+ gic_set_dir(MIN_SPI);
+ isb();
+
+ WRITE_ONCE(args->shared_data, 0);
+ dsb(ishst);
+ }
+
+ GUEST_DONE();
+}
+
+static void guest_code_group_en(struct test_args *args, int cpuid)
+{
+ uint32_t intid;
+
+ gic_init(GIC_V3, 2);
+
+ gic_set_eoi_split(0);
+ gic_set_priority_mask(CPU_PRIO_MASK);
+ /* SGI0 is G0, which is disabled */
+ gic_irq_set_group(0, 0);
+
+ /* Configure all SGIs with decreasing priority */
+ for (intid = 0; intid < MIN_PPI; intid++) {
+ gic_set_priority(intid, (intid + 1) * 8);
+ gic_irq_enable(intid);
+ gic_irq_set_pending(intid);
+ }
+
+ /* Ack and EOI all G1 interrupts */
+ for (int i = 1; i < MIN_PPI; i++) {
+ intid = wait_for_and_activate_irq();
+
+ GUEST_ASSERT(intid < MIN_PPI);
+ gic_set_eoi(intid);
+ isb();
+ }
+
+ /*
+ * Check that SGI0 is still pending, inactive, and that we cannot
+ * ack anything.
+ */
+ GUEST_ASSERT(gic_irq_get_pending(0));
+ GUEST_ASSERT(!gic_irq_get_active(0));
+ GUEST_ASSERT_IAR_EMPTY();
+ GUEST_ASSERT(read_sysreg_s(SYS_ICC_IAR0_EL1) == IAR_SPURIOUS);
+
+ /* Open the G0 gates, and verify we can ack SGI0 */
+ write_sysreg_s(1, SYS_ICC_IGRPEN0_EL1);
+ isb();
+
+ do {
+ intid = read_sysreg_s(SYS_ICC_IAR0_EL1);
+ } while (intid == IAR_SPURIOUS);
+
+ GUEST_ASSERT(intid == 0);
+ GUEST_DONE();
+}
+
+static void guest_code_timer_spi(struct test_args *args, int cpuid)
+{
+ uint32_t intid;
+ u64 val;
+
+ gic_init(GIC_V3, 2);
+
+ gic_set_eoi_split(1);
+ gic_set_priority_mask(CPU_PRIO_MASK);
+
+ /* Add a pending SPI so that KVM starts trapping DIR */
+ gic_set_priority(MIN_SPI + cpuid, IRQ_DEFAULT_PRIO);
+ gic_irq_set_pending(MIN_SPI + cpuid);
+
+ /* Configure the timer with a higher priority, make it pending */
+ gic_set_priority(27, IRQ_DEFAULT_PRIO - 8);
+
+ isb();
+ val = read_sysreg(cntvct_el0);
+ write_sysreg(val, cntv_cval_el0);
+ write_sysreg(1, cntv_ctl_el0);
+ isb();
+
+ GUEST_ASSERT(gic_irq_get_pending(27));
+
+ /* Enable both interrupts */
+ gic_irq_enable(MIN_SPI + cpuid);
+ gic_irq_enable(27);
+
+ /* The timer must fire */
+ intid = wait_for_and_activate_irq();
+ GUEST_ASSERT(intid == 27);
+
+ /* Check that we can deassert it */
+ write_sysreg(0, cntv_ctl_el0);
+ isb();
+
+ GUEST_ASSERT(!gic_irq_get_pending(27));
+
+ /*
+ * Priority drop, deactivation -- we expect that the host
+ * deactivation will have been effective
+ */
+ gic_set_eoi(27);
+ gic_set_dir(27);
+
+ GUEST_ASSERT(!gic_irq_get_active(27));
+
+ /* Do it one more time */
+ isb();
+ val = read_sysreg(cntvct_el0);
+ write_sysreg(val, cntv_cval_el0);
+ write_sysreg(1, cntv_ctl_el0);
+ isb();
+
+ GUEST_ASSERT(gic_irq_get_pending(27));
+
+ /* The timer must fire again */
+ intid = wait_for_and_activate_irq();
+ GUEST_ASSERT(intid == 27);
+
+ GUEST_DONE();
+}
+
+static void *test_vcpu_run(void *arg)
+{
+ struct kvm_vcpu *vcpu = arg;
+ struct ucall uc;
+
+ while (1) {
+ vcpu_run(vcpu);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_DONE:
+ return NULL;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+ }
+
+ return NULL;
+}
+
+static void test_vgic_two_cpus(void *gcode)
+{
+ pthread_t thr[2];
+ struct kvm_vcpu *vcpus[2];
+ struct test_args args = {};
+ struct kvm_vm *vm;
+ vm_vaddr_t args_gva;
+ int gic_fd, ret;
+
+ vm = vm_create_with_vcpus(2, gcode, vcpus);
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vcpus[0]);
+ vcpu_init_descriptor_tables(vcpus[1]);
+
+ /* Setup the guest args page (so it gets the args). */
+ args_gva = vm_vaddr_alloc_page(vm);
+ memcpy(addr_gva2hva(vm, args_gva), &args, sizeof(args));
+ vcpu_args_set(vcpus[0], 2, args_gva, 0);
+ vcpu_args_set(vcpus[1], 2, args_gva, 1);
+
+ gic_fd = vgic_v3_setup(vm, 2, 64);
+
+ ret = pthread_create(&thr[0], NULL, test_vcpu_run, vcpus[0]);
+ if (ret)
+ TEST_FAIL("Can't create thread for vcpu 0 (%d)\n", ret);
+ ret = pthread_create(&thr[1], NULL, test_vcpu_run, vcpus[1]);
+ if (ret)
+ TEST_FAIL("Can't create thread for vcpu 1 (%d)\n", ret);
+
+ pthread_join(thr[0], NULL);
+ pthread_join(thr[1], NULL);
+
+ close(gic_fd);
+ kvm_vm_free(vm);
+}
+
static void help(const char *name)
{
printf(
@@ -808,6 +1039,9 @@ int main(int argc, char **argv)
int opt;
bool eoi_split = false;
+ TEST_REQUIRE(kvm_supports_vgic_v3());
+ test_disable_default_vgic();
+
while ((opt = getopt(argc, argv, "hn:e:l:")) != -1) {
switch (opt) {
case 'n':
@@ -839,6 +1073,9 @@ int main(int argc, char **argv)
test_vgic(nr_irqs, false /* level */, true /* eoi_split */);
test_vgic(nr_irqs, true /* level */, false /* eoi_split */);
test_vgic(nr_irqs, true /* level */, true /* eoi_split */);
+ test_vgic_two_cpus(guest_code_asym_dir);
+ test_vgic_two_cpus(guest_code_group_en);
+ test_vgic_two_cpus(guest_code_timer_spi);
} else {
test_vgic(nr_irqs, level_sensitive, eoi_split);
}
diff --git a/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c b/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c
index fc4fe52fb6f8..e857a605f577 100644
--- a/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c
+++ b/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c
@@ -27,7 +27,7 @@ static vm_paddr_t gpa_base;
static struct kvm_vm *vm;
static struct kvm_vcpu **vcpus;
-static int gic_fd, its_fd;
+static int its_fd;
static struct test_data {
bool request_vcpus_stop;
@@ -118,11 +118,16 @@ static void guest_setup_gic(void)
guest_setup_its_mappings();
guest_invalidate_all_rdists();
+
+ /* SYNC to ensure ITS setup is complete */
+ for (cpuid = 0; cpuid < test_data.nr_cpus; cpuid++)
+ its_send_sync_cmd(test_data.cmdq_base_va, cpuid);
}
static void guest_code(size_t nr_lpis)
{
guest_setup_gic();
+ local_irq_enable();
GUEST_SYNC(0);
@@ -214,9 +219,6 @@ static void setup_test_data(void)
static void setup_gic(void)
{
- gic_fd = vgic_v3_setup(vm, test_data.nr_cpus, 64);
- __TEST_REQUIRE(gic_fd >= 0, "Failed to create GICv3");
-
its_fd = vgic_its_setup(vm);
}
@@ -334,7 +336,7 @@ static void setup_vm(void)
{
int i;
- vcpus = malloc(test_data.nr_cpus * sizeof(struct kvm_vcpu));
+ vcpus = malloc(test_data.nr_cpus * sizeof(struct kvm_vcpu *));
TEST_ASSERT(vcpus, "Failed to allocate vCPU array");
vm = vm_create_with_vcpus(test_data.nr_cpus, guest_code, vcpus);
@@ -355,7 +357,6 @@ static void setup_vm(void)
static void destroy_vm(void)
{
close(its_fd);
- close(gic_fd);
kvm_vm_free(vm);
free(vcpus);
}
@@ -374,6 +375,8 @@ int main(int argc, char **argv)
u32 nr_threads;
int c;
+ TEST_REQUIRE(kvm_supports_vgic_v3());
+
while ((c = getopt(argc, argv, "hv:d:e:i:")) != -1) {
switch (c) {
case 'v':
diff --git a/tools/testing/selftests/kvm/arm64/vgic_v5.c b/tools/testing/selftests/kvm/arm64/vgic_v5.c
new file mode 100644
index 000000000000..3ce6cf37a629
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/vgic_v5.c
@@ -0,0 +1,228 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/kernel.h>
+#include <sys/syscall.h>
+#include <asm/kvm.h>
+#include <asm/kvm_para.h>
+
+#include <arm64/gic_v5.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vgic.h"
+
+#define NR_VCPUS 1
+
+struct vm_gic {
+ struct kvm_vm *vm;
+ int gic_fd;
+ uint32_t gic_dev_type;
+};
+
+static uint64_t max_phys_size;
+
+#define GUEST_CMD_IRQ_CDIA 10
+#define GUEST_CMD_IRQ_DIEOI 11
+#define GUEST_CMD_IS_AWAKE 12
+#define GUEST_CMD_IS_READY 13
+
+static void guest_irq_handler(struct ex_regs *regs)
+{
+ bool valid;
+ u32 hwirq;
+ u64 ia;
+ static int count;
+
+ /*
+ * We have pending interrupts. Should never actually enter WFI
+ * here!
+ */
+ wfi();
+ GUEST_SYNC(GUEST_CMD_IS_AWAKE);
+
+ ia = gicr_insn(CDIA);
+ valid = GICV5_GICR_CDIA_VALID(ia);
+
+ GUEST_SYNC(GUEST_CMD_IRQ_CDIA);
+
+ if (!valid)
+ return;
+
+ gsb_ack();
+ isb();
+
+ hwirq = FIELD_GET(GICV5_GICR_CDIA_INTID, ia);
+
+ gic_insn(hwirq, CDDI);
+ gic_insn(0, CDEOI);
+
+ GUEST_SYNC(GUEST_CMD_IRQ_DIEOI);
+
+ if (++count >= 2)
+ GUEST_DONE();
+
+ /* Ask for the next interrupt to be injected */
+ GUEST_SYNC(GUEST_CMD_IS_READY);
+}
+
+static void guest_code(void)
+{
+ local_irq_disable();
+
+ gicv5_cpu_enable_interrupts();
+ local_irq_enable();
+
+ /* Enable the SW_PPI (3) */
+ write_sysreg_s(BIT_ULL(3), SYS_ICC_PPI_ENABLER0_EL1);
+
+ /* Ask for the first interrupt to be injected */
+ GUEST_SYNC(GUEST_CMD_IS_READY);
+
+ /* Loop forever waiting for interrupts */
+ while (1);
+}
+
+
+/* we don't want to assert on run execution, hence that helper */
+static int run_vcpu(struct kvm_vcpu *vcpu)
+{
+ return __vcpu_run(vcpu) ? -errno : 0;
+}
+
+static void vm_gic_destroy(struct vm_gic *v)
+{
+ close(v->gic_fd);
+ kvm_vm_free(v->vm);
+}
+
+static void test_vgic_v5_ppis(uint32_t gic_dev_type)
+{
+ struct kvm_vcpu *vcpus[NR_VCPUS];
+ struct ucall uc;
+ u64 user_ppis[2];
+ struct vm_gic v;
+ int ret, i;
+
+ v.gic_dev_type = gic_dev_type;
+ v.vm = __vm_create(VM_SHAPE_DEFAULT, NR_VCPUS, 0);
+
+ v.gic_fd = kvm_create_device(v.vm, gic_dev_type);
+
+ for (i = 0; i < NR_VCPUS; i++)
+ vcpus[i] = vm_vcpu_add(v.vm, i, guest_code);
+
+ vm_init_descriptor_tables(v.vm);
+ vm_install_exception_handler(v.vm, VECTOR_IRQ_CURRENT, guest_irq_handler);
+
+ for (i = 0; i < NR_VCPUS; i++)
+ vcpu_init_descriptor_tables(vcpus[i]);
+
+ kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+ KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
+
+ /* Read out the PPIs that user space is allowed to drive. */
+ kvm_device_attr_get(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+ KVM_DEV_ARM_VGIC_USERSPACE_PPIS, &user_ppis);
+
+ /* We should always be able to drive the SW_PPI. */
+ TEST_ASSERT(user_ppis[0] & BIT(GICV5_ARCH_PPI_SW_PPI),
+ "SW_PPI is not drivable by userspace");
+
+ while (1) {
+ ret = run_vcpu(vcpus[0]);
+
+ switch (get_ucall(vcpus[0], &uc)) {
+ case UCALL_SYNC:
+ /*
+ * The guest is ready for the next level change. Set
+ * high if ready, and lower if it has been consumed.
+ */
+ if (uc.args[1] == GUEST_CMD_IS_READY ||
+ uc.args[1] == GUEST_CMD_IRQ_DIEOI) {
+ u64 irq;
+ bool level = uc.args[1] == GUEST_CMD_IRQ_DIEOI ? 0 : 1;
+
+ irq = FIELD_PREP(KVM_ARM_IRQ_NUM_MASK, 3);
+ irq |= KVM_ARM_IRQ_TYPE_PPI << KVM_ARM_IRQ_TYPE_SHIFT;
+
+ _kvm_irq_line(v.vm, irq, level);
+ } else if (uc.args[1] == GUEST_CMD_IS_AWAKE) {
+ pr_info("Guest skipping WFI due to pending IRQ\n");
+ } else if (uc.args[1] == GUEST_CMD_IRQ_CDIA) {
+ pr_info("Guest acknowledged IRQ\n");
+ }
+
+ continue;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+ }
+
+done:
+ TEST_ASSERT(ret == 0, "Failed to test GICv5 PPIs");
+
+ vm_gic_destroy(&v);
+}
+
+/*
+ * Returns 0 if it's possible to create GIC device of a given type (V5).
+ */
+int test_kvm_device(uint32_t gic_dev_type)
+{
+ struct kvm_vcpu *vcpus[NR_VCPUS];
+ struct vm_gic v;
+ int ret;
+
+ v.vm = vm_create_with_vcpus(NR_VCPUS, guest_code, vcpus);
+
+ /* try to create a non existing KVM device */
+ ret = __kvm_test_create_device(v.vm, 0);
+ TEST_ASSERT(ret && errno == ENODEV, "unsupported device");
+
+ /* trial mode */
+ ret = __kvm_test_create_device(v.vm, gic_dev_type);
+ if (ret)
+ return ret;
+ v.gic_fd = kvm_create_device(v.vm, gic_dev_type);
+
+ ret = __kvm_create_device(v.vm, gic_dev_type);
+ TEST_ASSERT(ret < 0 && errno == EEXIST, "create GIC device twice");
+
+ vm_gic_destroy(&v);
+
+ return 0;
+}
+
+void run_tests(uint32_t gic_dev_type)
+{
+ pr_info("Test VGICv5 PPIs\n");
+ test_vgic_v5_ppis(gic_dev_type);
+}
+
+int main(int ac, char **av)
+{
+ int ret;
+ int pa_bits;
+
+ test_disable_default_vgic();
+
+ pa_bits = vm_guest_mode_params[VM_MODE_DEFAULT].pa_bits;
+ max_phys_size = 1ULL << pa_bits;
+
+ ret = test_kvm_device(KVM_DEV_TYPE_ARM_VGIC_V5);
+ if (ret) {
+ pr_info("No GICv5 support; Not running GIC_v5 tests.\n");
+ exit(KSFT_SKIP);
+ }
+
+ pr_info("Running VGIC_V5 tests.\n");
+ run_tests(KVM_DEV_TYPE_ARM_VGIC_V5);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/arm64/vpmu_counter_access.c b/tools/testing/selftests/kvm/arm64/vpmu_counter_access.c
index f16b3b27e32e..ae36325c022f 100644
--- a/tools/testing/selftests/kvm/arm64/vpmu_counter_access.c
+++ b/tools/testing/selftests/kvm/arm64/vpmu_counter_access.c
@@ -28,7 +28,6 @@
struct vpmu_vm {
struct kvm_vm *vm;
struct kvm_vcpu *vcpu;
- int gic_fd;
};
static struct vpmu_vm vpmu_vm;
@@ -45,11 +44,6 @@ static uint64_t get_pmcr_n(uint64_t pmcr)
return FIELD_GET(ARMV8_PMU_PMCR_N, pmcr);
}
-static void set_pmcr_n(uint64_t *pmcr, uint64_t pmcr_n)
-{
- u64p_replace_bits((__u64 *) pmcr, pmcr_n, ARMV8_PMU_PMCR_N);
-}
-
static uint64_t get_counters_mask(uint64_t n)
{
uint64_t mask = BIT(ARMV8_PMU_CYCLE_IDX);
@@ -415,10 +409,6 @@ static void create_vpmu_vm(void *guest_code)
.attr = KVM_ARM_VCPU_PMU_V3_IRQ,
.addr = (uint64_t)&irq,
};
- struct kvm_device_attr init_attr = {
- .group = KVM_ARM_VCPU_PMU_V3_CTRL,
- .attr = KVM_ARM_VCPU_PMU_V3_INIT,
- };
/* The test creates the vpmu_vm multiple times. Ensure a clean state */
memset(&vpmu_vm, 0, sizeof(vpmu_vm));
@@ -431,29 +421,25 @@ static void create_vpmu_vm(void *guest_code)
}
/* Create vCPU with PMUv3 */
- vm_ioctl(vpmu_vm.vm, KVM_ARM_PREFERRED_TARGET, &init);
+ kvm_get_default_vcpu_target(vpmu_vm.vm, &init);
init.features[0] |= (1 << KVM_ARM_VCPU_PMU_V3);
vpmu_vm.vcpu = aarch64_vcpu_add(vpmu_vm.vm, 0, &init, guest_code);
vcpu_init_descriptor_tables(vpmu_vm.vcpu);
- vpmu_vm.gic_fd = vgic_v3_setup(vpmu_vm.vm, 1, 64);
- __TEST_REQUIRE(vpmu_vm.gic_fd >= 0,
- "Failed to create vgic-v3, skipping");
+
+ kvm_arch_vm_finalize_vcpus(vpmu_vm.vm);
/* Make sure that PMUv3 support is indicated in the ID register */
dfr0 = vcpu_get_reg(vpmu_vm.vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1));
- pmuver = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), dfr0);
+ pmuver = FIELD_GET(ID_AA64DFR0_EL1_PMUVer, dfr0);
TEST_ASSERT(pmuver != ID_AA64DFR0_EL1_PMUVer_IMP_DEF &&
pmuver >= ID_AA64DFR0_EL1_PMUVer_IMP,
"Unexpected PMUVER (0x%x) on the vCPU with PMUv3", pmuver);
- /* Initialize vPMU */
vcpu_ioctl(vpmu_vm.vcpu, KVM_SET_DEVICE_ATTR, &irq_attr);
- vcpu_ioctl(vpmu_vm.vcpu, KVM_SET_DEVICE_ATTR, &init_attr);
}
static void destroy_vpmu_vm(void)
{
- close(vpmu_vm.gic_fd);
kvm_vm_free(vpmu_vm.vm);
}
@@ -475,33 +461,28 @@ static void run_vcpu(struct kvm_vcpu *vcpu, uint64_t pmcr_n)
}
}
-static void test_create_vpmu_vm_with_pmcr_n(uint64_t pmcr_n, bool expect_fail)
+static void test_create_vpmu_vm_with_nr_counters(unsigned int nr_counters, bool expect_fail)
{
struct kvm_vcpu *vcpu;
- uint64_t pmcr, pmcr_orig;
+ unsigned int prev;
+ int ret;
create_vpmu_vm(guest_code);
vcpu = vpmu_vm.vcpu;
- pmcr_orig = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0));
- pmcr = pmcr_orig;
+ prev = get_pmcr_n(vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0)));
- /*
- * Setting a larger value of PMCR.N should not modify the field, and
- * return a success.
- */
- set_pmcr_n(&pmcr, pmcr_n);
- vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0), pmcr);
- pmcr = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0));
+ ret = __vcpu_device_attr_set(vcpu, KVM_ARM_VCPU_PMU_V3_CTRL,
+ KVM_ARM_VCPU_PMU_V3_SET_NR_COUNTERS, &nr_counters);
if (expect_fail)
- TEST_ASSERT(pmcr_orig == pmcr,
- "PMCR.N modified by KVM to a larger value (PMCR: 0x%lx) for pmcr_n: 0x%lx",
- pmcr, pmcr_n);
+ TEST_ASSERT(ret && errno == EINVAL,
+ "Setting more PMU counters (%u) than available (%u) unexpectedly succeeded",
+ nr_counters, prev);
else
- TEST_ASSERT(pmcr_n == get_pmcr_n(pmcr),
- "Failed to update PMCR.N to %lu (received: %lu)",
- pmcr_n, get_pmcr_n(pmcr));
+ TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_SET_DEVICE_ATTR, ret));
+
+ vcpu_device_attr_set(vcpu, KVM_ARM_VCPU_PMU_V3_CTRL, KVM_ARM_VCPU_PMU_V3_INIT, NULL);
}
/*
@@ -516,11 +497,11 @@ static void run_access_test(uint64_t pmcr_n)
pr_debug("Test with pmcr_n %lu\n", pmcr_n);
- test_create_vpmu_vm_with_pmcr_n(pmcr_n, false);
+ test_create_vpmu_vm_with_nr_counters(pmcr_n, false);
vcpu = vpmu_vm.vcpu;
/* Save the initial sp to restore them later to run the guest again */
- sp = vcpu_get_reg(vcpu, ARM64_CORE_REG(sp_el1));
+ sp = vcpu_get_reg(vcpu, ctxt_reg_alias(vcpu, SYS_SP_EL1));
run_vcpu(vcpu, pmcr_n);
@@ -528,11 +509,11 @@ static void run_access_test(uint64_t pmcr_n)
* Reset and re-initialize the vCPU, and run the guest code again to
* check if PMCR_EL0.N is preserved.
*/
- vm_ioctl(vpmu_vm.vm, KVM_ARM_PREFERRED_TARGET, &init);
+ kvm_get_default_vcpu_target(vpmu_vm.vm, &init);
init.features[0] |= (1 << KVM_ARM_VCPU_PMU_V3);
aarch64_vcpu_setup(vcpu, &init);
vcpu_init_descriptor_tables(vcpu);
- vcpu_set_reg(vcpu, ARM64_CORE_REG(sp_el1), sp);
+ vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_SP_EL1), sp);
vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.pc), (uint64_t)guest_code);
run_vcpu(vcpu, pmcr_n);
@@ -557,7 +538,7 @@ static void run_pmregs_validity_test(uint64_t pmcr_n)
uint64_t set_reg_id, clr_reg_id, reg_val;
uint64_t valid_counters_mask, max_counters_mask;
- test_create_vpmu_vm_with_pmcr_n(pmcr_n, false);
+ test_create_vpmu_vm_with_nr_counters(pmcr_n, false);
vcpu = vpmu_vm.vcpu;
valid_counters_mask = get_counters_mask(pmcr_n);
@@ -611,7 +592,7 @@ static void run_error_test(uint64_t pmcr_n)
{
pr_debug("Error test with pmcr_n %lu (larger than the host)\n", pmcr_n);
- test_create_vpmu_vm_with_pmcr_n(pmcr_n, true);
+ test_create_vpmu_vm_with_nr_counters(pmcr_n, true);
destroy_vpmu_vm();
}
@@ -629,11 +610,25 @@ static uint64_t get_pmcr_n_limit(void)
return get_pmcr_n(pmcr);
}
+static bool kvm_supports_nr_counters_attr(void)
+{
+ bool supported;
+
+ create_vpmu_vm(NULL);
+ supported = !__vcpu_has_device_attr(vpmu_vm.vcpu, KVM_ARM_VCPU_PMU_V3_CTRL,
+ KVM_ARM_VCPU_PMU_V3_SET_NR_COUNTERS);
+ destroy_vpmu_vm();
+
+ return supported;
+}
+
int main(void)
{
uint64_t i, pmcr_n;
TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_PMU_V3));
+ TEST_REQUIRE(kvm_supports_vgic_v3());
+ TEST_REQUIRE(kvm_supports_nr_counters_attr());
pmcr_n = get_pmcr_n_limit();
for (i = 0; i <= pmcr_n; i++) {
diff --git a/tools/testing/selftests/kvm/config b/tools/testing/selftests/kvm/config
index 8835fed09e9f..96d874b239eb 100644
--- a/tools/testing/selftests/kvm/config
+++ b/tools/testing/selftests/kvm/config
@@ -1,5 +1,6 @@
CONFIG_KVM=y
CONFIG_KVM_INTEL=y
CONFIG_KVM_AMD=y
+CONFIG_EVENTFD=y
CONFIG_USERFAULTFD=y
CONFIG_IDLE_PAGE_TRACKING=y
diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c
index e79817bd0e29..0a1ea1d1e2d8 100644
--- a/tools/testing/selftests/kvm/dirty_log_perf_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c
@@ -20,38 +20,6 @@
#include "guest_modes.h"
#include "ucall_common.h"
-#ifdef __aarch64__
-#include "arm64/vgic.h"
-
-static int gic_fd;
-
-static void arch_setup_vm(struct kvm_vm *vm, unsigned int nr_vcpus)
-{
- /*
- * The test can still run even if hardware does not support GICv3, as it
- * is only an optimization to reduce guest exits.
- */
- gic_fd = vgic_v3_setup(vm, nr_vcpus, 64);
-}
-
-static void arch_cleanup_vm(struct kvm_vm *vm)
-{
- if (gic_fd > 0)
- close(gic_fd);
-}
-
-#else /* __aarch64__ */
-
-static void arch_setup_vm(struct kvm_vm *vm, unsigned int nr_vcpus)
-{
-}
-
-static void arch_cleanup_vm(struct kvm_vm *vm)
-{
-}
-
-#endif
-
/* How many host loops to run by default (one KVM_GET_DIRTY_LOG for each loop)*/
#define TEST_HOST_LOOP_N 2UL
@@ -166,8 +134,6 @@ static void run_test(enum vm_guest_mode mode, void *arg)
vm_enable_cap(vm, KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2,
dirty_log_manual_caps);
- arch_setup_vm(vm, nr_vcpus);
-
/* Start the iterations */
iteration = 0;
host_quit = false;
@@ -285,7 +251,6 @@ static void run_test(enum vm_guest_mode mode, void *arg)
}
memstress_free_bitmaps(bitmaps, p->slots);
- arch_cleanup_vm(vm);
memstress_destroy_vm(vm);
}
diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c
index 23593d9eeba9..7627b328f18a 100644
--- a/tools/testing/selftests/kvm/dirty_log_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_test.c
@@ -585,6 +585,7 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, struct kvm_vcpu **vcpu,
log_mode_create_vm_done(vm);
*vcpu = vm_vcpu_add(vm, 0, guest_code);
+ kvm_arch_vm_finalize_vcpus(vm);
return vm;
}
@@ -640,9 +641,6 @@ static void run_test(enum vm_guest_mode mode, void *arg)
}
#ifdef __s390x__
- /* Align to 1M (segment size) */
- guest_test_phys_mem = align_down(guest_test_phys_mem, 1 << 20);
-
/*
* The workaround in guest_code() to write all pages prior to the first
* iteration isn't compatible with the dirty ring, as the dirty ring
diff --git a/tools/testing/selftests/kvm/get-reg-list.c b/tools/testing/selftests/kvm/get-reg-list.c
index 91f05f78e824..f4644c9d2d3b 100644
--- a/tools/testing/selftests/kvm/get-reg-list.c
+++ b/tools/testing/selftests/kvm/get-reg-list.c
@@ -116,10 +116,13 @@ void __weak finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c)
}
#ifdef __aarch64__
-static void prepare_vcpu_init(struct vcpu_reg_list *c, struct kvm_vcpu_init *init)
+static void prepare_vcpu_init(struct kvm_vm *vm, struct vcpu_reg_list *c,
+ struct kvm_vcpu_init *init)
{
struct vcpu_reg_sublist *s;
+ vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, init);
+
for_each_sublist(c, s)
if (s->capability)
init->features[s->feature / 32] |= 1 << (s->feature % 32);
@@ -127,10 +130,10 @@ static void prepare_vcpu_init(struct vcpu_reg_list *c, struct kvm_vcpu_init *ini
static struct kvm_vcpu *vcpu_config_get_vcpu(struct vcpu_reg_list *c, struct kvm_vm *vm)
{
- struct kvm_vcpu_init init = { .target = -1, };
+ struct kvm_vcpu_init init;
struct kvm_vcpu *vcpu;
- prepare_vcpu_init(c, &init);
+ prepare_vcpu_init(vm, c, &init);
vcpu = __vm_vcpu_add(vm, 0);
aarch64_vcpu_setup(vcpu, &init);
diff --git a/tools/testing/selftests/kvm/guest_memfd_test.c b/tools/testing/selftests/kvm/guest_memfd_test.c
index ce687f8d248f..ec7644aae999 100644
--- a/tools/testing/selftests/kvm/guest_memfd_test.c
+++ b/tools/testing/selftests/kvm/guest_memfd_test.c
@@ -13,14 +13,19 @@
#include <linux/bitmap.h>
#include <linux/falloc.h>
+#include <linux/sizes.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "kvm_util.h"
+#include "numaif.h"
#include "test_util.h"
+#include "ucall_common.h"
-static void test_file_read_write(int fd)
+static size_t page_size;
+
+static void test_file_read_write(int fd, size_t total_size)
{
char buf[64];
@@ -34,15 +39,235 @@ static void test_file_read_write(int fd)
"pwrite on a guest_mem fd should fail");
}
-static void test_mmap(int fd, size_t page_size)
+static void test_mmap_cow(int fd, size_t size)
+{
+ void *mem;
+
+ mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
+ TEST_ASSERT(mem == MAP_FAILED, "Copy-on-write not allowed by guest_memfd.");
+}
+
+static void test_mmap_supported(int fd, size_t total_size)
+{
+ const char val = 0xaa;
+ char *mem;
+ size_t i;
+ int ret;
+
+ mem = kvm_mmap(total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd);
+
+ memset(mem, val, total_size);
+ for (i = 0; i < total_size; i++)
+ TEST_ASSERT_EQ(READ_ONCE(mem[i]), val);
+
+ ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0,
+ page_size);
+ TEST_ASSERT(!ret, "fallocate the first page should succeed.");
+
+ for (i = 0; i < page_size; i++)
+ TEST_ASSERT_EQ(READ_ONCE(mem[i]), 0x00);
+ for (; i < total_size; i++)
+ TEST_ASSERT_EQ(READ_ONCE(mem[i]), val);
+
+ memset(mem, val, page_size);
+ for (i = 0; i < total_size; i++)
+ TEST_ASSERT_EQ(READ_ONCE(mem[i]), val);
+
+ kvm_munmap(mem, total_size);
+}
+
+static void test_mbind(int fd, size_t total_size)
+{
+ const unsigned long nodemask_0 = 1; /* nid: 0 */
+ unsigned long nodemask = 0;
+ unsigned long maxnode = BITS_PER_TYPE(nodemask);
+ int policy;
+ char *mem;
+ int ret;
+
+ if (!is_multi_numa_node_system())
+ return;
+
+ mem = kvm_mmap(total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd);
+
+ /* Test MPOL_INTERLEAVE policy */
+ kvm_mbind(mem, page_size * 2, MPOL_INTERLEAVE, &nodemask_0, maxnode, 0);
+ kvm_get_mempolicy(&policy, &nodemask, maxnode, mem, MPOL_F_ADDR);
+ TEST_ASSERT(policy == MPOL_INTERLEAVE && nodemask == nodemask_0,
+ "Wanted MPOL_INTERLEAVE (%u) and nodemask 0x%lx, got %u and 0x%lx",
+ MPOL_INTERLEAVE, nodemask_0, policy, nodemask);
+
+ /* Test basic MPOL_BIND policy */
+ kvm_mbind(mem + page_size * 2, page_size * 2, MPOL_BIND, &nodemask_0, maxnode, 0);
+ kvm_get_mempolicy(&policy, &nodemask, maxnode, mem + page_size * 2, MPOL_F_ADDR);
+ TEST_ASSERT(policy == MPOL_BIND && nodemask == nodemask_0,
+ "Wanted MPOL_BIND (%u) and nodemask 0x%lx, got %u and 0x%lx",
+ MPOL_BIND, nodemask_0, policy, nodemask);
+
+ /* Test MPOL_DEFAULT policy */
+ kvm_mbind(mem, total_size, MPOL_DEFAULT, NULL, 0, 0);
+ kvm_get_mempolicy(&policy, &nodemask, maxnode, mem, MPOL_F_ADDR);
+ TEST_ASSERT(policy == MPOL_DEFAULT && !nodemask,
+ "Wanted MPOL_DEFAULT (%u) and nodemask 0x0, got %u and 0x%lx",
+ MPOL_DEFAULT, policy, nodemask);
+
+ /* Test with invalid policy */
+ ret = mbind(mem, page_size, 999, &nodemask_0, maxnode, 0);
+ TEST_ASSERT(ret == -1 && errno == EINVAL,
+ "mbind with invalid policy should fail with EINVAL");
+
+ kvm_munmap(mem, total_size);
+}
+
+static void test_numa_allocation(int fd, size_t total_size)
+{
+ unsigned long node0_mask = 1; /* Node 0 */
+ unsigned long node1_mask = 2; /* Node 1 */
+ unsigned long maxnode = 8;
+ void *pages[4];
+ int status[4];
+ char *mem;
+ int i;
+
+ if (!is_multi_numa_node_system())
+ return;
+
+ mem = kvm_mmap(total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd);
+
+ for (i = 0; i < 4; i++)
+ pages[i] = (char *)mem + page_size * i;
+
+ /* Set NUMA policy after allocation */
+ memset(mem, 0xaa, page_size);
+ kvm_mbind(pages[0], page_size, MPOL_BIND, &node0_mask, maxnode, 0);
+ kvm_fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, page_size);
+
+ /* Set NUMA policy before allocation */
+ kvm_mbind(pages[0], page_size * 2, MPOL_BIND, &node1_mask, maxnode, 0);
+ kvm_mbind(pages[2], page_size * 2, MPOL_BIND, &node0_mask, maxnode, 0);
+ memset(mem, 0xaa, total_size);
+
+ /* Validate if pages are allocated on specified NUMA nodes */
+ kvm_move_pages(0, 4, pages, NULL, status, 0);
+ TEST_ASSERT(status[0] == 1, "Expected page 0 on node 1, got it on node %d", status[0]);
+ TEST_ASSERT(status[1] == 1, "Expected page 1 on node 1, got it on node %d", status[1]);
+ TEST_ASSERT(status[2] == 0, "Expected page 2 on node 0, got it on node %d", status[2]);
+ TEST_ASSERT(status[3] == 0, "Expected page 3 on node 0, got it on node %d", status[3]);
+
+ /* Punch hole for all pages */
+ kvm_fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, total_size);
+
+ /* Change NUMA policy nodes and reallocate */
+ kvm_mbind(pages[0], page_size * 2, MPOL_BIND, &node0_mask, maxnode, 0);
+ kvm_mbind(pages[2], page_size * 2, MPOL_BIND, &node1_mask, maxnode, 0);
+ memset(mem, 0xaa, total_size);
+
+ kvm_move_pages(0, 4, pages, NULL, status, 0);
+ TEST_ASSERT(status[0] == 0, "Expected page 0 on node 0, got it on node %d", status[0]);
+ TEST_ASSERT(status[1] == 0, "Expected page 1 on node 0, got it on node %d", status[1]);
+ TEST_ASSERT(status[2] == 1, "Expected page 2 on node 1, got it on node %d", status[2]);
+ TEST_ASSERT(status[3] == 1, "Expected page 3 on node 1, got it on node %d", status[3]);
+
+ kvm_munmap(mem, total_size);
+}
+
+static void test_collapse(int fd, uint64_t flags)
+{
+ const size_t pmd_size = get_trans_hugepagesz();
+ void *reserved_addr;
+ void *aligned_addr;
+ char *mem;
+ off_t i;
+
+ /*
+ * To even reach the point where the guest_memfd folios will
+ * get collapsed, both the userspace address and the offset
+ * within the guest_memfd have to be aligned to pmd_size.
+ *
+ * To achieve that alignment, reserve virtual address space
+ * with regular mmap, then use MAP_FIXED to allocate memory
+ * from a pmd_size-aligned offset (0) at a known, available
+ * virtual address.
+ */
+ reserved_addr = kvm_mmap(pmd_size * 2, PROT_NONE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1);
+ aligned_addr = align_ptr_up(reserved_addr, pmd_size);
+
+ mem = mmap(aligned_addr, pmd_size, PROT_READ | PROT_WRITE,
+ MAP_FIXED | MAP_SHARED, fd, 0);
+ TEST_ASSERT(IS_ALIGNED((u64)mem, pmd_size),
+ "Userspace address must be aligned to PMD size.");
+
+ /*
+ * Use reads to populate page table to avoid setting dirty
+ * flag on page.
+ */
+ for (i = 0; i < pmd_size; i += getpagesize())
+ READ_ONCE(mem[i]);
+
+ /*
+ * Advising the use of huge pages in guest_memfd should be
+ * fine...
+ */
+ kvm_madvise(mem, pmd_size, MADV_HUGEPAGE);
+
+ /*
+ * ... but collapsing folios must not be supported to avoid
+ * mapping beyond shared ranges into host userspace page
+ * tables.
+ */
+ TEST_ASSERT_EQ(madvise(mem, pmd_size, MADV_COLLAPSE), -1);
+ TEST_ASSERT_EQ(errno, EINVAL);
+
+ /*
+ * Removing from host page tables and re-faulting should be
+ * fine; should not end up faulting in a collapsed/huge folio.
+ */
+ kvm_madvise(mem, pmd_size, MADV_DONTNEED);
+ READ_ONCE(mem[0]);
+
+ kvm_munmap(reserved_addr, pmd_size * 2);
+}
+
+static void test_fault_sigbus(int fd, size_t accessible_size, size_t map_size)
+{
+ const char val = 0xaa;
+ char *mem;
+ size_t i;
+
+ mem = kvm_mmap(map_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd);
+
+ TEST_EXPECT_SIGBUS(memset(mem, val, map_size));
+ TEST_EXPECT_SIGBUS((void)READ_ONCE(mem[accessible_size]));
+
+ for (i = 0; i < accessible_size; i++)
+ TEST_ASSERT_EQ(READ_ONCE(mem[i]), val);
+
+ kvm_munmap(mem, map_size);
+}
+
+static void test_fault_overflow(int fd, size_t total_size)
+{
+ test_fault_sigbus(fd, total_size, total_size * 4);
+}
+
+static void test_fault_private(int fd, size_t total_size)
+{
+ test_fault_sigbus(fd, 0, total_size);
+}
+
+static void test_mmap_not_supported(int fd, size_t total_size)
{
char *mem;
mem = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
TEST_ASSERT_EQ(mem, MAP_FAILED);
+
+ mem = mmap(NULL, total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+ TEST_ASSERT_EQ(mem, MAP_FAILED);
}
-static void test_file_size(int fd, size_t page_size, size_t total_size)
+static void test_file_size(int fd, size_t total_size)
{
struct stat sb;
int ret;
@@ -53,7 +278,7 @@ static void test_file_size(int fd, size_t page_size, size_t total_size)
TEST_ASSERT_EQ(sb.st_blksize, page_size);
}
-static void test_fallocate(int fd, size_t page_size, size_t total_size)
+static void test_fallocate(int fd, size_t total_size)
{
int ret;
@@ -90,7 +315,7 @@ static void test_fallocate(int fd, size_t page_size, size_t total_size)
TEST_ASSERT(!ret, "fallocate to restore punched hole should succeed");
}
-static void test_invalid_punch_hole(int fd, size_t page_size, size_t total_size)
+static void test_invalid_punch_hole(int fd, size_t total_size)
{
struct {
off_t offset;
@@ -120,26 +345,18 @@ static void test_invalid_punch_hole(int fd, size_t page_size, size_t total_size)
}
}
-static void test_create_guest_memfd_invalid(struct kvm_vm *vm)
+static void test_create_guest_memfd_invalid_sizes(struct kvm_vm *vm,
+ uint64_t guest_memfd_flags)
{
- size_t page_size = getpagesize();
- uint64_t flag;
size_t size;
int fd;
for (size = 1; size < page_size; size++) {
- fd = __vm_create_guest_memfd(vm, size, 0);
- TEST_ASSERT(fd == -1 && errno == EINVAL,
+ fd = __vm_create_guest_memfd(vm, size, guest_memfd_flags);
+ TEST_ASSERT(fd < 0 && errno == EINVAL,
"guest_memfd() with non-page-aligned page size '0x%lx' should fail with EINVAL",
size);
}
-
- for (flag = BIT(0); flag; flag <<= 1) {
- fd = __vm_create_guest_memfd(vm, page_size, flag);
- TEST_ASSERT(fd == -1 && errno == EINVAL,
- "guest_memfd() with flag '0x%lx' should fail with EINVAL",
- flag);
- }
}
static void test_create_guest_memfd_multiple(struct kvm_vm *vm)
@@ -147,53 +364,193 @@ static void test_create_guest_memfd_multiple(struct kvm_vm *vm)
int fd1, fd2, ret;
struct stat st1, st2;
- fd1 = __vm_create_guest_memfd(vm, 4096, 0);
+ fd1 = __vm_create_guest_memfd(vm, page_size, 0);
TEST_ASSERT(fd1 != -1, "memfd creation should succeed");
ret = fstat(fd1, &st1);
TEST_ASSERT(ret != -1, "memfd fstat should succeed");
- TEST_ASSERT(st1.st_size == 4096, "memfd st_size should match requested size");
+ TEST_ASSERT(st1.st_size == page_size, "memfd st_size should match requested size");
- fd2 = __vm_create_guest_memfd(vm, 8192, 0);
+ fd2 = __vm_create_guest_memfd(vm, page_size * 2, 0);
TEST_ASSERT(fd2 != -1, "memfd creation should succeed");
ret = fstat(fd2, &st2);
TEST_ASSERT(ret != -1, "memfd fstat should succeed");
- TEST_ASSERT(st2.st_size == 8192, "second memfd st_size should match requested size");
+ TEST_ASSERT(st2.st_size == page_size * 2, "second memfd st_size should match requested size");
ret = fstat(fd1, &st1);
TEST_ASSERT(ret != -1, "memfd fstat should succeed");
- TEST_ASSERT(st1.st_size == 4096, "first memfd st_size should still match requested size");
+ TEST_ASSERT(st1.st_size == page_size, "first memfd st_size should still match requested size");
TEST_ASSERT(st1.st_ino != st2.st_ino, "different memfd should have different inode numbers");
close(fd2);
close(fd1);
}
-int main(int argc, char *argv[])
+static void test_guest_memfd_flags(struct kvm_vm *vm)
{
- size_t page_size;
- size_t total_size;
+ uint64_t valid_flags = vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS);
+ uint64_t flag;
int fd;
- struct kvm_vm *vm;
- TEST_REQUIRE(kvm_has_cap(KVM_CAP_GUEST_MEMFD));
+ for (flag = BIT(0); flag; flag <<= 1) {
+ fd = __vm_create_guest_memfd(vm, page_size, flag);
+ if (flag & valid_flags) {
+ TEST_ASSERT(fd >= 0,
+ "guest_memfd() with flag '0x%lx' should succeed",
+ flag);
+ close(fd);
+ } else {
+ TEST_ASSERT(fd < 0 && errno == EINVAL,
+ "guest_memfd() with flag '0x%lx' should fail with EINVAL",
+ flag);
+ }
+ }
+}
- page_size = getpagesize();
- total_size = page_size * 4;
+#define __gmem_test(__test, __vm, __flags, __gmem_size) \
+do { \
+ int fd = vm_create_guest_memfd(__vm, __gmem_size, __flags); \
+ \
+ test_##__test(fd, __gmem_size); \
+ close(fd); \
+} while (0)
- vm = vm_create_barebones();
+#define gmem_test(__test, __vm, __flags) \
+ __gmem_test(__test, __vm, __flags, page_size * 4)
- test_create_guest_memfd_invalid(vm);
+static void __test_guest_memfd(struct kvm_vm *vm, uint64_t flags)
+{
test_create_guest_memfd_multiple(vm);
+ test_create_guest_memfd_invalid_sizes(vm, flags);
+
+ gmem_test(file_read_write, vm, flags);
+
+ if (flags & GUEST_MEMFD_FLAG_MMAP) {
+ if (flags & GUEST_MEMFD_FLAG_INIT_SHARED) {
+ size_t pmd_size = get_trans_hugepagesz();
+
+ gmem_test(mmap_supported, vm, flags);
+ gmem_test(fault_overflow, vm, flags);
+ gmem_test(numa_allocation, vm, flags);
+ __gmem_test(collapse, vm, flags, pmd_size);
+ } else {
+ gmem_test(fault_private, vm, flags);
+ }
+
+ gmem_test(mmap_cow, vm, flags);
+ gmem_test(mbind, vm, flags);
+ } else {
+ gmem_test(mmap_not_supported, vm, flags);
+ }
+
+ gmem_test(file_size, vm, flags);
+ gmem_test(fallocate, vm, flags);
+ gmem_test(invalid_punch_hole, vm, flags);
+}
+
+static void test_guest_memfd(unsigned long vm_type)
+{
+ struct kvm_vm *vm = vm_create_barebones_type(vm_type);
+ uint64_t flags;
+
+ test_guest_memfd_flags(vm);
+
+ __test_guest_memfd(vm, 0);
+
+ flags = vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS);
+ if (flags & GUEST_MEMFD_FLAG_MMAP)
+ __test_guest_memfd(vm, GUEST_MEMFD_FLAG_MMAP);
+
+ /* MMAP should always be supported if INIT_SHARED is supported. */
+ if (flags & GUEST_MEMFD_FLAG_INIT_SHARED)
+ __test_guest_memfd(vm, GUEST_MEMFD_FLAG_MMAP |
+ GUEST_MEMFD_FLAG_INIT_SHARED);
+
+ kvm_vm_free(vm);
+}
+
+static void guest_code(uint8_t *mem, uint64_t size)
+{
+ size_t i;
+
+ for (i = 0; i < size; i++)
+ __GUEST_ASSERT(mem[i] == 0xaa,
+ "Guest expected 0xaa at offset %lu, got 0x%x", i, mem[i]);
+
+ memset(mem, 0xff, size);
+ GUEST_DONE();
+}
+
+static void test_guest_memfd_guest(void)
+{
+ /*
+ * Skip the first 4gb and slot0. slot0 maps <1gb and is used to back
+ * the guest's code, stack, and page tables, and low memory contains
+ * the PCI hole and other MMIO regions that need to be avoided.
+ */
+ const uint64_t gpa = SZ_4G;
+ const int slot = 1;
+
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ uint8_t *mem;
+ size_t size;
+ int fd, i;
+
+ if (!kvm_check_cap(KVM_CAP_GUEST_MEMFD_FLAGS))
+ return;
+
+ vm = __vm_create_shape_with_one_vcpu(VM_SHAPE_DEFAULT, &vcpu, 1, guest_code);
+
+ TEST_ASSERT(vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS) & GUEST_MEMFD_FLAG_MMAP,
+ "Default VM type should support MMAP, supported flags = 0x%x",
+ vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS));
+ TEST_ASSERT(vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS) & GUEST_MEMFD_FLAG_INIT_SHARED,
+ "Default VM type should support INIT_SHARED, supported flags = 0x%x",
+ vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS));
+
+ size = vm->page_size;
+ fd = vm_create_guest_memfd(vm, size, GUEST_MEMFD_FLAG_MMAP |
+ GUEST_MEMFD_FLAG_INIT_SHARED);
+ vm_set_user_memory_region2(vm, slot, KVM_MEM_GUEST_MEMFD, gpa, size, NULL, fd, 0);
- fd = vm_create_guest_memfd(vm, total_size, 0);
+ mem = kvm_mmap(size, PROT_READ | PROT_WRITE, MAP_SHARED, fd);
+ memset(mem, 0xaa, size);
+ kvm_munmap(mem, size);
- test_file_read_write(fd);
- test_mmap(fd, page_size);
- test_file_size(fd, page_size, total_size);
- test_fallocate(fd, page_size, total_size);
- test_invalid_punch_hole(fd, page_size, total_size);
+ virt_pg_map(vm, gpa, gpa);
+ vcpu_args_set(vcpu, 2, gpa, size);
+ vcpu_run(vcpu);
+
+ TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
+
+ mem = kvm_mmap(size, PROT_READ | PROT_WRITE, MAP_SHARED, fd);
+ for (i = 0; i < size; i++)
+ TEST_ASSERT_EQ(mem[i], 0xff);
close(fd);
+ kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+ unsigned long vm_types, vm_type;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_GUEST_MEMFD));
+
+ page_size = getpagesize();
+
+ /*
+ * Not all architectures support KVM_CAP_VM_TYPES. However, those that
+ * support guest_memfd have that support for the default VM type.
+ */
+ vm_types = kvm_check_cap(KVM_CAP_VM_TYPES);
+ if (!vm_types)
+ vm_types = BIT(VM_TYPE_DEFAULT);
+
+ for_each_set_bit(vm_type, &vm_types, BITS_PER_TYPE(vm_types))
+ test_guest_memfd(vm_type);
+
+ test_guest_memfd_guest();
}
diff --git a/tools/testing/selftests/kvm/include/arm64/arch_timer.h b/tools/testing/selftests/kvm/include/arm64/arch_timer.h
index bf461de34785..e2c4e9f0010f 100644
--- a/tools/testing/selftests/kvm/include/arm64/arch_timer.h
+++ b/tools/testing/selftests/kvm/include/arm64/arch_timer.h
@@ -155,4 +155,28 @@ static inline void timer_set_next_tval_ms(enum arch_timer timer, uint32_t msec)
timer_set_tval(timer, msec_to_cycles(msec));
}
+static inline u32 vcpu_get_vtimer_irq(struct kvm_vcpu *vcpu)
+{
+ u32 intid;
+ u64 attr;
+
+ attr = vcpu_has_el2(vcpu) ? KVM_ARM_VCPU_TIMER_IRQ_HVTIMER :
+ KVM_ARM_VCPU_TIMER_IRQ_VTIMER;
+ vcpu_device_attr_get(vcpu, KVM_ARM_VCPU_TIMER_CTRL, attr, &intid);
+
+ return intid;
+}
+
+static inline u32 vcpu_get_ptimer_irq(struct kvm_vcpu *vcpu)
+{
+ u32 intid;
+ u64 attr;
+
+ attr = vcpu_has_el2(vcpu) ? KVM_ARM_VCPU_TIMER_IRQ_HPTIMER :
+ KVM_ARM_VCPU_TIMER_IRQ_PTIMER;
+ vcpu_device_attr_get(vcpu, KVM_ARM_VCPU_TIMER_CTRL, attr, &intid);
+
+ return intid;
+}
+
#endif /* SELFTEST_KVM_ARCH_TIMER_H */
diff --git a/tools/testing/selftests/kvm/include/arm64/gic.h b/tools/testing/selftests/kvm/include/arm64/gic.h
index baeb3c859389..cc7a7f34ed37 100644
--- a/tools/testing/selftests/kvm/include/arm64/gic.h
+++ b/tools/testing/selftests/kvm/include/arm64/gic.h
@@ -57,6 +57,7 @@ void gic_irq_set_pending(unsigned int intid);
void gic_irq_clear_pending(unsigned int intid);
bool gic_irq_get_pending(unsigned int intid);
void gic_irq_set_config(unsigned int intid, bool is_edge);
+void gic_irq_set_group(unsigned int intid, bool group);
void gic_rdist_enable_lpis(vm_paddr_t cfg_table, size_t cfg_table_size,
vm_paddr_t pend_table);
diff --git a/tools/testing/selftests/kvm/include/arm64/gic_v3_its.h b/tools/testing/selftests/kvm/include/arm64/gic_v3_its.h
index 3722ed9c8f96..58feef3eb386 100644
--- a/tools/testing/selftests/kvm/include/arm64/gic_v3_its.h
+++ b/tools/testing/selftests/kvm/include/arm64/gic_v3_its.h
@@ -15,5 +15,6 @@ void its_send_mapc_cmd(void *cmdq_base, u32 vcpu_id, u32 collection_id, bool val
void its_send_mapti_cmd(void *cmdq_base, u32 device_id, u32 event_id,
u32 collection_id, u32 intid);
void its_send_invall_cmd(void *cmdq_base, u32 collection_id);
+void its_send_sync_cmd(void *cmdq_base, u32 vcpu_id);
#endif // __SELFTESTS_GIC_V3_ITS_H__
diff --git a/tools/testing/selftests/kvm/include/arm64/gic_v5.h b/tools/testing/selftests/kvm/include/arm64/gic_v5.h
new file mode 100644
index 000000000000..eb523d9277cf
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/arm64/gic_v5.h
@@ -0,0 +1,150 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef __SELFTESTS_GIC_V5_H
+#define __SELFTESTS_GIC_V5_H
+
+#include <asm/barrier.h>
+#include <asm/sysreg.h>
+
+#include <linux/bitfield.h>
+
+#include "processor.h"
+
+/*
+ * Definitions for GICv5 instructions for the Current Domain
+ */
+#define GICV5_OP_GIC_CDAFF sys_insn(1, 0, 12, 1, 3)
+#define GICV5_OP_GIC_CDDI sys_insn(1, 0, 12, 2, 0)
+#define GICV5_OP_GIC_CDDIS sys_insn(1, 0, 12, 1, 0)
+#define GICV5_OP_GIC_CDHM sys_insn(1, 0, 12, 2, 1)
+#define GICV5_OP_GIC_CDEN sys_insn(1, 0, 12, 1, 1)
+#define GICV5_OP_GIC_CDEOI sys_insn(1, 0, 12, 1, 7)
+#define GICV5_OP_GIC_CDPEND sys_insn(1, 0, 12, 1, 4)
+#define GICV5_OP_GIC_CDPRI sys_insn(1, 0, 12, 1, 2)
+#define GICV5_OP_GIC_CDRCFG sys_insn(1, 0, 12, 1, 5)
+#define GICV5_OP_GICR_CDIA sys_insn(1, 0, 12, 3, 0)
+#define GICV5_OP_GICR_CDNMIA sys_insn(1, 0, 12, 3, 1)
+
+/* Definitions for GIC CDAFF */
+#define GICV5_GIC_CDAFF_IAFFID_MASK GENMASK_ULL(47, 32)
+#define GICV5_GIC_CDAFF_TYPE_MASK GENMASK_ULL(31, 29)
+#define GICV5_GIC_CDAFF_IRM_MASK BIT_ULL(28)
+#define GICV5_GIC_CDAFF_ID_MASK GENMASK_ULL(23, 0)
+
+/* Definitions for GIC CDDI */
+#define GICV5_GIC_CDDI_TYPE_MASK GENMASK_ULL(31, 29)
+#define GICV5_GIC_CDDI_ID_MASK GENMASK_ULL(23, 0)
+
+/* Definitions for GIC CDDIS */
+#define GICV5_GIC_CDDIS_TYPE_MASK GENMASK_ULL(31, 29)
+#define GICV5_GIC_CDDIS_TYPE(r) FIELD_GET(GICV5_GIC_CDDIS_TYPE_MASK, r)
+#define GICV5_GIC_CDDIS_ID_MASK GENMASK_ULL(23, 0)
+#define GICV5_GIC_CDDIS_ID(r) FIELD_GET(GICV5_GIC_CDDIS_ID_MASK, r)
+
+/* Definitions for GIC CDEN */
+#define GICV5_GIC_CDEN_TYPE_MASK GENMASK_ULL(31, 29)
+#define GICV5_GIC_CDEN_ID_MASK GENMASK_ULL(23, 0)
+
+/* Definitions for GIC CDHM */
+#define GICV5_GIC_CDHM_HM_MASK BIT_ULL(32)
+#define GICV5_GIC_CDHM_TYPE_MASK GENMASK_ULL(31, 29)
+#define GICV5_GIC_CDHM_ID_MASK GENMASK_ULL(23, 0)
+
+/* Definitions for GIC CDPEND */
+#define GICV5_GIC_CDPEND_PENDING_MASK BIT_ULL(32)
+#define GICV5_GIC_CDPEND_TYPE_MASK GENMASK_ULL(31, 29)
+#define GICV5_GIC_CDPEND_ID_MASK GENMASK_ULL(23, 0)
+
+/* Definitions for GIC CDPRI */
+#define GICV5_GIC_CDPRI_PRIORITY_MASK GENMASK_ULL(39, 35)
+#define GICV5_GIC_CDPRI_TYPE_MASK GENMASK_ULL(31, 29)
+#define GICV5_GIC_CDPRI_ID_MASK GENMASK_ULL(23, 0)
+
+/* Definitions for GIC CDRCFG */
+#define GICV5_GIC_CDRCFG_TYPE_MASK GENMASK_ULL(31, 29)
+#define GICV5_GIC_CDRCFG_ID_MASK GENMASK_ULL(23, 0)
+
+/* Definitions for GICR CDIA */
+#define GICV5_GICR_CDIA_VALID_MASK BIT_ULL(32)
+#define GICV5_GICR_CDIA_VALID(r) FIELD_GET(GICV5_GICR_CDIA_VALID_MASK, r)
+#define GICV5_GICR_CDIA_TYPE_MASK GENMASK_ULL(31, 29)
+#define GICV5_GICR_CDIA_ID_MASK GENMASK_ULL(23, 0)
+#define GICV5_GICR_CDIA_INTID GENMASK_ULL(31, 0)
+
+/* Definitions for GICR CDNMIA */
+#define GICV5_GICR_CDNMIA_VALID_MASK BIT_ULL(32)
+#define GICV5_GICR_CDNMIA_VALID(r) FIELD_GET(GICV5_GICR_CDNMIA_VALID_MASK, r)
+#define GICV5_GICR_CDNMIA_TYPE_MASK GENMASK_ULL(31, 29)
+#define GICV5_GICR_CDNMIA_ID_MASK GENMASK_ULL(23, 0)
+
+#define gicr_insn(insn) read_sysreg_s(GICV5_OP_GICR_##insn)
+#define gic_insn(v, insn) write_sysreg_s(v, GICV5_OP_GIC_##insn)
+
+#define __GIC_BARRIER_INSN(op0, op1, CRn, CRm, op2, Rt) \
+ __emit_inst(0xd5000000 | \
+ sys_insn((op0), (op1), (CRn), (CRm), (op2)) | \
+ ((Rt) & 0x1f))
+
+#define GSB_SYS_BARRIER_INSN __GIC_BARRIER_INSN(1, 0, 12, 0, 0, 31)
+#define GSB_ACK_BARRIER_INSN __GIC_BARRIER_INSN(1, 0, 12, 0, 1, 31)
+
+#define gsb_ack() asm volatile(GSB_ACK_BARRIER_INSN : : : "memory")
+#define gsb_sys() asm volatile(GSB_SYS_BARRIER_INSN : : : "memory")
+
+#define REPEAT_BYTE(x) ((~0ul / 0xff) * (x))
+
+#define GICV5_IRQ_DEFAULT_PRI 0b10000
+
+#define GICV5_ARCH_PPI_SW_PPI 0x3
+
+void gicv5_ppi_priority_init(void)
+{
+ write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_DEFAULT_PRI), SYS_ICC_PPI_PRIORITYR0_EL1);
+ write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_DEFAULT_PRI), SYS_ICC_PPI_PRIORITYR1_EL1);
+ write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_DEFAULT_PRI), SYS_ICC_PPI_PRIORITYR2_EL1);
+ write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_DEFAULT_PRI), SYS_ICC_PPI_PRIORITYR3_EL1);
+ write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_DEFAULT_PRI), SYS_ICC_PPI_PRIORITYR4_EL1);
+ write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_DEFAULT_PRI), SYS_ICC_PPI_PRIORITYR5_EL1);
+ write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_DEFAULT_PRI), SYS_ICC_PPI_PRIORITYR6_EL1);
+ write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_DEFAULT_PRI), SYS_ICC_PPI_PRIORITYR7_EL1);
+ write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_DEFAULT_PRI), SYS_ICC_PPI_PRIORITYR8_EL1);
+ write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_DEFAULT_PRI), SYS_ICC_PPI_PRIORITYR9_EL1);
+ write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_DEFAULT_PRI), SYS_ICC_PPI_PRIORITYR10_EL1);
+ write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_DEFAULT_PRI), SYS_ICC_PPI_PRIORITYR11_EL1);
+ write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_DEFAULT_PRI), SYS_ICC_PPI_PRIORITYR12_EL1);
+ write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_DEFAULT_PRI), SYS_ICC_PPI_PRIORITYR13_EL1);
+ write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_DEFAULT_PRI), SYS_ICC_PPI_PRIORITYR14_EL1);
+ write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_DEFAULT_PRI), SYS_ICC_PPI_PRIORITYR15_EL1);
+
+ /*
+ * Context syncronization required to make sure system register writes
+ * effects are synchronised.
+ */
+ isb();
+}
+
+void gicv5_cpu_disable_interrupts(void)
+{
+ u64 cr0;
+
+ cr0 = FIELD_PREP(ICC_CR0_EL1_EN, 0);
+ write_sysreg_s(cr0, SYS_ICC_CR0_EL1);
+}
+
+void gicv5_cpu_enable_interrupts(void)
+{
+ u64 cr0, pcr;
+
+ write_sysreg_s(0, SYS_ICC_PPI_ENABLER0_EL1);
+ write_sysreg_s(0, SYS_ICC_PPI_ENABLER1_EL1);
+
+ gicv5_ppi_priority_init();
+
+ pcr = FIELD_PREP(ICC_PCR_EL1_PRIORITY, GICV5_IRQ_DEFAULT_PRI);
+ write_sysreg_s(pcr, SYS_ICC_PCR_EL1);
+
+ cr0 = FIELD_PREP(ICC_CR0_EL1_EN, 1);
+ write_sysreg_s(cr0, SYS_ICC_CR0_EL1);
+}
+
+#endif
diff --git a/tools/testing/selftests/kvm/include/arm64/kvm_util_arch.h b/tools/testing/selftests/kvm/include/arm64/kvm_util_arch.h
index e43a57d99b56..4a2033708227 100644
--- a/tools/testing/selftests/kvm/include/arm64/kvm_util_arch.h
+++ b/tools/testing/selftests/kvm/include/arm64/kvm_util_arch.h
@@ -2,6 +2,11 @@
#ifndef SELFTEST_KVM_UTIL_ARCH_H
#define SELFTEST_KVM_UTIL_ARCH_H
-struct kvm_vm_arch {};
+struct kvm_mmu_arch {};
+
+struct kvm_vm_arch {
+ bool has_gic;
+ int gic_fd;
+};
#endif // SELFTEST_KVM_UTIL_ARCH_H
diff --git a/tools/testing/selftests/kvm/include/arm64/processor.h b/tools/testing/selftests/kvm/include/arm64/processor.h
index 1e8d0d531fbd..ac97a1c436fc 100644
--- a/tools/testing/selftests/kvm/include/arm64/processor.h
+++ b/tools/testing/selftests/kvm/include/arm64/processor.h
@@ -62,6 +62,71 @@
MAIR_ATTRIDX(MAIR_ATTR_NORMAL, MT_NORMAL) | \
MAIR_ATTRIDX(MAIR_ATTR_NORMAL_WT, MT_NORMAL_WT))
+/* TCR_EL1 specific flags */
+#define TCR_T0SZ_OFFSET 0
+#define TCR_T0SZ(x) ((UL(64) - (x)) << TCR_T0SZ_OFFSET)
+
+#define TCR_IRGN0_SHIFT 8
+#define TCR_IRGN0_MASK (UL(3) << TCR_IRGN0_SHIFT)
+#define TCR_IRGN0_NC (UL(0) << TCR_IRGN0_SHIFT)
+#define TCR_IRGN0_WBWA (UL(1) << TCR_IRGN0_SHIFT)
+#define TCR_IRGN0_WT (UL(2) << TCR_IRGN0_SHIFT)
+#define TCR_IRGN0_WBnWA (UL(3) << TCR_IRGN0_SHIFT)
+
+#define TCR_ORGN0_SHIFT 10
+#define TCR_ORGN0_MASK (UL(3) << TCR_ORGN0_SHIFT)
+#define TCR_ORGN0_NC (UL(0) << TCR_ORGN0_SHIFT)
+#define TCR_ORGN0_WBWA (UL(1) << TCR_ORGN0_SHIFT)
+#define TCR_ORGN0_WT (UL(2) << TCR_ORGN0_SHIFT)
+#define TCR_ORGN0_WBnWA (UL(3) << TCR_ORGN0_SHIFT)
+
+#define TCR_SH0_SHIFT 12
+#define TCR_SH0_MASK (UL(3) << TCR_SH0_SHIFT)
+#define TCR_SH0_INNER (UL(3) << TCR_SH0_SHIFT)
+
+#define TCR_TG0_SHIFT 14
+#define TCR_TG0_MASK (UL(3) << TCR_TG0_SHIFT)
+#define TCR_TG0_4K (UL(0) << TCR_TG0_SHIFT)
+#define TCR_TG0_64K (UL(1) << TCR_TG0_SHIFT)
+#define TCR_TG0_16K (UL(2) << TCR_TG0_SHIFT)
+
+#define TCR_EPD1_SHIFT 23
+#define TCR_EPD1_MASK (UL(1) << TCR_EPD1_SHIFT)
+
+#define TCR_IPS_SHIFT 32
+#define TCR_IPS_MASK (UL(7) << TCR_IPS_SHIFT)
+#define TCR_IPS_52_BITS (UL(6) << TCR_IPS_SHIFT)
+#define TCR_IPS_48_BITS (UL(5) << TCR_IPS_SHIFT)
+#define TCR_IPS_40_BITS (UL(2) << TCR_IPS_SHIFT)
+#define TCR_IPS_36_BITS (UL(1) << TCR_IPS_SHIFT)
+
+#define TCR_TBI1 (UL(1) << 38)
+#define TCR_HA (UL(1) << 39)
+#define TCR_DS (UL(1) << 59)
+
+/*
+ * AttrIndx[2:0] encoding (mapping attributes defined in the MAIR* registers).
+ */
+#define PTE_ATTRINDX(t) ((t) << 2)
+#define PTE_ATTRINDX_MASK GENMASK(4, 2)
+#define PTE_ATTRINDX_SHIFT 2
+
+#define PTE_VALID BIT(0)
+#define PGD_TYPE_TABLE BIT(1)
+#define PUD_TYPE_TABLE BIT(1)
+#define PMD_TYPE_TABLE BIT(1)
+#define PTE_TYPE_PAGE BIT(1)
+
+#define PTE_SHARED (UL(3) << 8) /* SH[1:0], inner shareable */
+#define PTE_AF BIT(10)
+
+#define PTE_ADDR_MASK(page_shift) GENMASK(47, (page_shift))
+#define PTE_ADDR_51_48 GENMASK(15, 12)
+#define PTE_ADDR_51_48_SHIFT 12
+#define PTE_ADDR_MASK_LPA2(page_shift) GENMASK(49, (page_shift))
+#define PTE_ADDR_51_50_LPA2 GENMASK(9, 8)
+#define PTE_ADDR_51_50_LPA2_SHIFT 8
+
void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init);
struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
struct kvm_vcpu_init *init, void *guest_code);
@@ -102,12 +167,6 @@ enum {
(v) == VECTOR_SYNC_LOWER_64 || \
(v) == VECTOR_SYNC_LOWER_32)
-/* Access flag */
-#define PTE_AF (1ULL << 10)
-
-/* Access flag update enable/disable */
-#define TCR_EL1_HA (1ULL << 39)
-
void aarch64_get_supported_page_sizes(uint32_t ipa, uint32_t *ipa4k,
uint32_t *ipa16k, uint32_t *ipa64k);
@@ -120,6 +179,7 @@ void vm_install_exception_handler(struct kvm_vm *vm,
void vm_install_sync_handler(struct kvm_vm *vm,
int vector, int ec, handler_fn handler);
+uint64_t *virt_get_pte_hva_at_level(struct kvm_vm *vm, vm_vaddr_t gva, int level);
uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva);
static inline void cpu_relax(void)
@@ -199,6 +259,16 @@ static inline void local_irq_disable(void)
asm volatile("msr daifset, #3" : : : "memory");
}
+static inline void local_serror_enable(void)
+{
+ asm volatile("msr daifclr, #4" : : : "memory");
+}
+
+static inline void local_serror_disable(void)
+{
+ asm volatile("msr daifset, #4" : : : "memory");
+}
+
/**
* struct arm_smccc_res - Result from SMC/HVC call
* @a0-a3 result values from registers 0 to 3
@@ -235,4 +305,87 @@ void smccc_smc(uint32_t function_id, uint64_t arg0, uint64_t arg1,
/* Execute a Wait For Interrupt instruction. */
void wfi(void);
+void test_wants_mte(void);
+void test_disable_default_vgic(void);
+
+bool vm_supports_el2(struct kvm_vm *vm);
+
+static inline bool test_supports_el2(void)
+{
+ struct kvm_vm *vm = vm_create(1);
+ bool supported = vm_supports_el2(vm);
+
+ kvm_vm_free(vm);
+ return supported;
+}
+
+static inline bool vcpu_has_el2(struct kvm_vcpu *vcpu)
+{
+ return vcpu->init.features[0] & BIT(KVM_ARM_VCPU_HAS_EL2);
+}
+
+#define MAPPED_EL2_SYSREG(el2, el1) \
+ case SYS_##el1: \
+ if (vcpu_has_el2(vcpu)) \
+ alias = SYS_##el2; \
+ break
+
+
+static __always_inline u64 ctxt_reg_alias(struct kvm_vcpu *vcpu, u32 encoding)
+{
+ u32 alias = encoding;
+
+ BUILD_BUG_ON(!__builtin_constant_p(encoding));
+
+ switch (encoding) {
+ MAPPED_EL2_SYSREG(SCTLR_EL2, SCTLR_EL1);
+ MAPPED_EL2_SYSREG(CPTR_EL2, CPACR_EL1);
+ MAPPED_EL2_SYSREG(TTBR0_EL2, TTBR0_EL1);
+ MAPPED_EL2_SYSREG(TTBR1_EL2, TTBR1_EL1);
+ MAPPED_EL2_SYSREG(TCR_EL2, TCR_EL1);
+ MAPPED_EL2_SYSREG(VBAR_EL2, VBAR_EL1);
+ MAPPED_EL2_SYSREG(AFSR0_EL2, AFSR0_EL1);
+ MAPPED_EL2_SYSREG(AFSR1_EL2, AFSR1_EL1);
+ MAPPED_EL2_SYSREG(ESR_EL2, ESR_EL1);
+ MAPPED_EL2_SYSREG(FAR_EL2, FAR_EL1);
+ MAPPED_EL2_SYSREG(MAIR_EL2, MAIR_EL1);
+ MAPPED_EL2_SYSREG(TCR2_EL2, TCR2_EL1);
+ MAPPED_EL2_SYSREG(PIR_EL2, PIR_EL1);
+ MAPPED_EL2_SYSREG(PIRE0_EL2, PIRE0_EL1);
+ MAPPED_EL2_SYSREG(POR_EL2, POR_EL1);
+ MAPPED_EL2_SYSREG(AMAIR_EL2, AMAIR_EL1);
+ MAPPED_EL2_SYSREG(ELR_EL2, ELR_EL1);
+ MAPPED_EL2_SYSREG(SPSR_EL2, SPSR_EL1);
+ MAPPED_EL2_SYSREG(ZCR_EL2, ZCR_EL1);
+ MAPPED_EL2_SYSREG(CONTEXTIDR_EL2, CONTEXTIDR_EL1);
+ MAPPED_EL2_SYSREG(SCTLR2_EL2, SCTLR2_EL1);
+ MAPPED_EL2_SYSREG(CNTHCTL_EL2, CNTKCTL_EL1);
+ case SYS_SP_EL1:
+ if (!vcpu_has_el2(vcpu))
+ return ARM64_CORE_REG(sp_el1);
+
+ alias = SYS_SP_EL2;
+ break;
+ default:
+ BUILD_BUG();
+ }
+
+ return KVM_ARM64_SYS_REG(alias);
+}
+
+void kvm_get_default_vcpu_target(struct kvm_vm *vm, struct kvm_vcpu_init *init);
+
+static inline unsigned int get_current_el(void)
+{
+ return (read_sysreg(CurrentEL) >> 2) & 0x3;
+}
+
+#define do_smccc(...) \
+do { \
+ if (get_current_el() == 2) \
+ smccc_smc(__VA_ARGS__); \
+ else \
+ smccc_hvc(__VA_ARGS__); \
+} while (0)
+
#endif /* SELFTEST_KVM_PROCESSOR_H */
diff --git a/tools/testing/selftests/kvm/include/arm64/vgic.h b/tools/testing/selftests/kvm/include/arm64/vgic.h
index c481d0c00a5d..688beccc9436 100644
--- a/tools/testing/selftests/kvm/include/arm64/vgic.h
+++ b/tools/testing/selftests/kvm/include/arm64/vgic.h
@@ -16,6 +16,9 @@
((uint64_t)(flags) << 12) | \
index)
+bool kvm_supports_vgic_v3(void);
+int __vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs);
+void __vgic_v3_init(int fd);
int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs);
#define VGIC_MAX_RESERVED 1023
diff --git a/tools/testing/selftests/kvm/include/kvm_syscalls.h b/tools/testing/selftests/kvm/include/kvm_syscalls.h
new file mode 100644
index 000000000000..843c9904c46f
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/kvm_syscalls.h
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_SYSCALLS_H
+#define SELFTEST_KVM_SYSCALLS_H
+
+#include <sys/syscall.h>
+
+#define MAP_ARGS0(m,...)
+#define MAP_ARGS1(m,t,a,...) m(t,a)
+#define MAP_ARGS2(m,t,a,...) m(t,a), MAP_ARGS1(m,__VA_ARGS__)
+#define MAP_ARGS3(m,t,a,...) m(t,a), MAP_ARGS2(m,__VA_ARGS__)
+#define MAP_ARGS4(m,t,a,...) m(t,a), MAP_ARGS3(m,__VA_ARGS__)
+#define MAP_ARGS5(m,t,a,...) m(t,a), MAP_ARGS4(m,__VA_ARGS__)
+#define MAP_ARGS6(m,t,a,...) m(t,a), MAP_ARGS5(m,__VA_ARGS__)
+#define MAP_ARGS(n,...) MAP_ARGS##n(__VA_ARGS__)
+
+#define __DECLARE_ARGS(t, a) t a
+#define __UNPACK_ARGS(t, a) a
+
+#define DECLARE_ARGS(nr_args, args...) MAP_ARGS(nr_args, __DECLARE_ARGS, args)
+#define UNPACK_ARGS(nr_args, args...) MAP_ARGS(nr_args, __UNPACK_ARGS, args)
+
+#define __KVM_SYSCALL_ERROR(_name, _ret) \
+ "%s failed, rc: %i errno: %i (%s)", (_name), (_ret), errno, strerror(errno)
+
+/* Define a kvm_<syscall>() API to assert success. */
+#define __KVM_SYSCALL_DEFINE(name, nr_args, args...) \
+static inline void kvm_##name(DECLARE_ARGS(nr_args, args)) \
+{ \
+ int r; \
+ \
+ r = name(UNPACK_ARGS(nr_args, args)); \
+ TEST_ASSERT(!r, __KVM_SYSCALL_ERROR(#name, r)); \
+}
+
+/*
+ * Macro to define syscall APIs, either because KVM selftests doesn't link to
+ * the standard library, e.g. libnuma, or because there is no library that yet
+ * provides the syscall. These
+ */
+#define KVM_SYSCALL_DEFINE(name, nr_args, args...) \
+static inline long name(DECLARE_ARGS(nr_args, args)) \
+{ \
+ return syscall(__NR_##name, UNPACK_ARGS(nr_args, args)); \
+} \
+__KVM_SYSCALL_DEFINE(name, nr_args, args)
+
+/*
+ * Special case mmap(), as KVM selftest rarely/never specific an address,
+ * rarely specify an offset, and because the unique return code requires
+ * special handling anyways.
+ */
+static inline void *__kvm_mmap(size_t size, int prot, int flags, int fd,
+ off_t offset)
+{
+ void *mem;
+
+ mem = mmap(NULL, size, prot, flags, fd, offset);
+ TEST_ASSERT(mem != MAP_FAILED, __KVM_SYSCALL_ERROR("mmap()",
+ (int)(unsigned long)MAP_FAILED));
+ return mem;
+}
+
+static inline void *kvm_mmap(size_t size, int prot, int flags, int fd)
+{
+ return __kvm_mmap(size, prot, flags, fd, 0);
+}
+
+static inline int kvm_dup(int fd)
+{
+ int new_fd = dup(fd);
+
+ TEST_ASSERT(new_fd >= 0, __KVM_SYSCALL_ERROR("dup()", new_fd));
+ return new_fd;
+}
+
+__KVM_SYSCALL_DEFINE(munmap, 2, void *, mem, size_t, size);
+__KVM_SYSCALL_DEFINE(close, 1, int, fd);
+__KVM_SYSCALL_DEFINE(fallocate, 4, int, fd, int, mode, loff_t, offset, loff_t, len);
+__KVM_SYSCALL_DEFINE(ftruncate, 2, unsigned int, fd, off_t, length);
+__KVM_SYSCALL_DEFINE(madvise, 3, void *, addr, size_t, length, int, advice);
+
+#endif /* SELFTEST_KVM_SYSCALLS_H */
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index 373912464fb4..f861242b4ae8 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -18,8 +18,12 @@
#include <asm/atomic.h>
#include <asm/kvm.h>
+#include <sys/eventfd.h>
#include <sys/ioctl.h>
+#include <pthread.h>
+
+#include "kvm_syscalls.h"
#include "kvm_util_arch.h"
#include "kvm_util_types.h"
#include "sparsebit.h"
@@ -61,6 +65,9 @@ struct kvm_vcpu {
#ifdef __x86_64__
struct kvm_cpuid2 *cpuid;
#endif
+#ifdef __aarch64__
+ struct kvm_vcpu_init init;
+#endif
struct kvm_binary_stats stats;
struct kvm_dirty_gfn *dirty_gfns;
uint32_t fetch_index;
@@ -81,12 +88,19 @@ enum kvm_mem_region_type {
NR_MEM_REGIONS,
};
+struct kvm_mmu {
+ bool pgd_created;
+ uint64_t pgd;
+ int pgtable_levels;
+
+ struct kvm_mmu_arch arch;
+};
+
struct kvm_vm {
int mode;
unsigned long type;
int kvm_fd;
int fd;
- unsigned int pgtable_levels;
unsigned int page_size;
unsigned int page_shift;
unsigned int pa_bits;
@@ -97,13 +111,18 @@ struct kvm_vm {
struct sparsebit *vpages_valid;
struct sparsebit *vpages_mapped;
bool has_irqchip;
- bool pgd_created;
vm_paddr_t ucall_mmio_addr;
- vm_paddr_t pgd;
vm_vaddr_t handlers;
uint32_t dirty_ring_size;
uint64_t gpa_tag_mask;
+ /*
+ * "mmu" is the guest's stage-1, with a short name because the vast
+ * majority of tests only care about the stage-1 MMU.
+ */
+ struct kvm_mmu mmu;
+ struct kvm_mmu stage2_mmu;
+
struct kvm_vm_arch arch;
struct kvm_binary_stats stats;
@@ -171,13 +190,25 @@ enum vm_guest_mode {
VM_MODE_P40V48_4K,
VM_MODE_P40V48_16K,
VM_MODE_P40V48_64K,
- VM_MODE_PXXV48_4K, /* For 48bits VA but ANY bits PA */
+ VM_MODE_PXXVYY_4K, /* For 48-bit or 57-bit VA, depending on host support */
VM_MODE_P47V64_4K,
VM_MODE_P44V64_4K,
VM_MODE_P36V48_4K,
VM_MODE_P36V48_16K,
VM_MODE_P36V48_64K,
+ VM_MODE_P47V47_16K,
VM_MODE_P36V47_16K,
+
+ VM_MODE_P56V57_4K, /* For riscv64 */
+ VM_MODE_P56V48_4K,
+ VM_MODE_P56V39_4K,
+ VM_MODE_P50V57_4K,
+ VM_MODE_P50V48_4K,
+ VM_MODE_P50V39_4K,
+ VM_MODE_P41V57_4K,
+ VM_MODE_P41V48_4K,
+ VM_MODE_P41V39_4K,
+
NUM_VM_MODES,
};
@@ -202,17 +233,17 @@ kvm_static_assert(sizeof(struct vm_shape) == sizeof(uint64_t));
shape; \
})
-#if defined(__aarch64__)
-
extern enum vm_guest_mode vm_mode_default;
+#if defined(__aarch64__)
+
#define VM_MODE_DEFAULT vm_mode_default
#define MIN_PAGE_SHIFT 12U
#define ptes_per_page(page_size) ((page_size) / 8)
#elif defined(__x86_64__)
-#define VM_MODE_DEFAULT VM_MODE_PXXV48_4K
+#define VM_MODE_DEFAULT VM_MODE_PXXVYY_4K
#define MIN_PAGE_SHIFT 12U
#define ptes_per_page(page_size) ((page_size) / 8)
@@ -228,7 +259,12 @@ extern enum vm_guest_mode vm_mode_default;
#error "RISC-V 32-bit kvm selftests not supported"
#endif
-#define VM_MODE_DEFAULT VM_MODE_P40V48_4K
+#define VM_MODE_DEFAULT vm_mode_default
+#define MIN_PAGE_SHIFT 12U
+#define ptes_per_page(page_size) ((page_size) / 8)
+
+#elif defined(__loongarch__)
+#define VM_MODE_DEFAULT VM_MODE_P47V47_16K
#define MIN_PAGE_SHIFT 12U
#define ptes_per_page(page_size) ((page_size) / 8)
@@ -247,16 +283,22 @@ struct vm_guest_mode_params {
};
extern const struct vm_guest_mode_params vm_guest_mode_params[];
+int __open_path_or_exit(const char *path, int flags, const char *enoent_help);
int open_path_or_exit(const char *path, int flags);
int open_kvm_dev_path_or_exit(void);
-bool get_kvm_param_bool(const char *param);
-bool get_kvm_intel_param_bool(const char *param);
-bool get_kvm_amd_param_bool(const char *param);
+int kvm_get_module_param_integer(const char *module_name, const char *param);
+bool kvm_get_module_param_bool(const char *module_name, const char *param);
-int get_kvm_param_integer(const char *param);
-int get_kvm_intel_param_integer(const char *param);
-int get_kvm_amd_param_integer(const char *param);
+static inline bool get_kvm_param_bool(const char *param)
+{
+ return kvm_get_module_param_bool("kvm", param);
+}
+
+static inline int get_kvm_param_integer(const char *param)
+{
+ return kvm_get_module_param_integer("kvm", param);
+}
unsigned int kvm_check_cap(long cap);
@@ -265,9 +307,6 @@ static inline bool kvm_has_cap(long cap)
return kvm_check_cap(cap);
}
-#define __KVM_SYSCALL_ERROR(_name, _ret) \
- "%s failed, rc: %i errno: %i (%s)", (_name), (_ret), errno, strerror(errno)
-
/*
* Use the "inner", double-underscore macro when reporting errors from within
* other macros so that the name of ioctl() and not its literal numeric value
@@ -496,6 +535,45 @@ static inline int vm_get_stats_fd(struct kvm_vm *vm)
return fd;
}
+static inline int __kvm_irqfd(struct kvm_vm *vm, uint32_t gsi, int eventfd,
+ uint32_t flags)
+{
+ struct kvm_irqfd irqfd = {
+ .fd = eventfd,
+ .gsi = gsi,
+ .flags = flags,
+ .resamplefd = -1,
+ };
+
+ return __vm_ioctl(vm, KVM_IRQFD, &irqfd);
+}
+
+static inline void kvm_irqfd(struct kvm_vm *vm, uint32_t gsi, int eventfd,
+ uint32_t flags)
+{
+ int ret = __kvm_irqfd(vm, gsi, eventfd, flags);
+
+ TEST_ASSERT_VM_VCPU_IOCTL(!ret, KVM_IRQFD, ret, vm);
+}
+
+static inline void kvm_assign_irqfd(struct kvm_vm *vm, uint32_t gsi, int eventfd)
+{
+ kvm_irqfd(vm, gsi, eventfd, 0);
+}
+
+static inline void kvm_deassign_irqfd(struct kvm_vm *vm, uint32_t gsi, int eventfd)
+{
+ kvm_irqfd(vm, gsi, eventfd, KVM_IRQFD_FLAG_DEASSIGN);
+}
+
+static inline int kvm_new_eventfd(void)
+{
+ int fd = eventfd(0, 0);
+
+ TEST_ASSERT(fd >= 0, __KVM_SYSCALL_ERROR("eventfd()", fd));
+ return fd;
+}
+
static inline void read_stats_header(int stats_fd, struct kvm_stats_header *header)
{
ssize_t ret;
@@ -549,6 +627,41 @@ void kvm_get_stat(struct kvm_binary_stats *stats, const char *name,
#define vm_get_stat(vm, stat) __get_stat(&(vm)->stats, stat)
#define vcpu_get_stat(vcpu, stat) __get_stat(&(vcpu)->stats, stat)
+static inline bool read_smt_control(char *buf, size_t buf_size)
+{
+ FILE *f = fopen("/sys/devices/system/cpu/smt/control", "r");
+ bool ret;
+
+ if (!f)
+ return false;
+
+ ret = fread(buf, sizeof(*buf), buf_size, f) > 0;
+ fclose(f);
+
+ return ret;
+}
+
+static inline bool is_smt_possible(void)
+{
+ char buf[16];
+
+ if (read_smt_control(buf, sizeof(buf)) &&
+ (!strncmp(buf, "forceoff", 8) || !strncmp(buf, "notsupported", 12)))
+ return false;
+
+ return true;
+}
+
+static inline bool is_smt_on(void)
+{
+ char buf[16];
+
+ if (read_smt_control(buf, sizeof(buf)) && !strncmp(buf, "on", 2))
+ return true;
+
+ return false;
+}
+
void vm_create_irqchip(struct kvm_vm *vm);
static inline int __vm_create_guest_memfd(struct kvm_vm *vm, uint64_t size,
@@ -583,12 +696,12 @@ int __vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flag
uint32_t guest_memfd, uint64_t guest_memfd_offset);
void vm_userspace_mem_region_add(struct kvm_vm *vm,
- enum vm_mem_backing_src_type src_type,
- uint64_t guest_paddr, uint32_t slot, uint64_t npages,
- uint32_t flags);
+ enum vm_mem_backing_src_type src_type,
+ uint64_t gpa, uint32_t slot, uint64_t npages,
+ uint32_t flags);
void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
- uint64_t guest_paddr, uint32_t slot, uint64_t npages,
- uint32_t flags, int guest_memfd_fd, uint64_t guest_memfd_offset);
+ uint64_t gpa, uint32_t slot, uint64_t npages, uint32_t flags,
+ int guest_memfd_fd, uint64_t guest_memfd_offset);
#ifndef vm_arch_has_protected_memory
static inline bool vm_arch_has_protected_memory(struct kvm_vm *vm)
@@ -598,6 +711,7 @@ static inline bool vm_arch_has_protected_memory(struct kvm_vm *vm)
#endif
void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags);
+void vm_mem_region_reload(struct kvm_vm *vm, uint32_t slot);
void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa);
void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot);
struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id);
@@ -848,7 +962,7 @@ void *vcpu_map_dirty_ring(struct kvm_vcpu *vcpu);
* VM VCPU Args Set
*
* Input Args:
- * vm - Virtual Machine
+ * vcpu - vCPU
* num - number of arguments
* ... - arguments, each of type uint64_t
*
@@ -972,7 +1086,34 @@ struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm);
void kvm_set_files_rlimit(uint32_t nr_vcpus);
-void kvm_pin_this_task_to_pcpu(uint32_t pcpu);
+int __pin_task_to_cpu(pthread_t task, int cpu);
+
+static inline void pin_task_to_cpu(pthread_t task, int cpu)
+{
+ int r;
+
+ r = __pin_task_to_cpu(task, cpu);
+ TEST_ASSERT(!r, "Failed to set thread affinity to pCPU '%u'", cpu);
+}
+
+static inline int pin_task_to_any_cpu(pthread_t task)
+{
+ int cpu = sched_getcpu();
+
+ pin_task_to_cpu(task, cpu);
+ return cpu;
+}
+
+static inline void pin_self_to_cpu(int cpu)
+{
+ pin_task_to_cpu(pthread_self(), cpu);
+}
+
+static inline int pin_self_to_any_cpu(void)
+{
+ return pin_task_to_any_cpu(pthread_self());
+}
+
void kvm_print_vcpu_pinning_help(void);
void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[],
int nr_vcpus);
@@ -986,10 +1127,6 @@ vm_adjust_num_guest_pages(enum vm_guest_mode mode, unsigned int num_guest_pages)
{
unsigned int n;
n = vm_num_guest_pages(mode, vm_num_host_pages(mode, num_guest_pages));
-#ifdef __s390x__
- /* s390 requires 1M aligned guest sizes */
- n = (n + 255) & ~255;
-#endif
return n;
}
@@ -1086,6 +1223,7 @@ void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr);
static inline void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
{
virt_arch_pg_map(vm, vaddr, paddr);
+ sparsebit_set(vm->vpages_mapped, vaddr >> vm->page_shift);
}
@@ -1139,17 +1277,26 @@ static inline int __vm_disable_nx_huge_pages(struct kvm_vm *vm)
return __vm_enable_cap(vm, KVM_CAP_VM_DISABLE_NX_HUGE_PAGES, 0);
}
+static inline uint64_t vm_page_align(struct kvm_vm *vm, uint64_t v)
+{
+ return (v + vm->page_size - 1) & ~(vm->page_size - 1);
+}
+
/*
- * Arch hook that is invoked via a constructor, i.e. before exeucting main(),
+ * Arch hook that is invoked via a constructor, i.e. before executing main(),
* to allow for arch-specific setup that is common to all tests, e.g. computing
* the default guest "mode".
*/
void kvm_selftest_arch_init(void);
-void kvm_arch_vm_post_create(struct kvm_vm *vm);
+void kvm_arch_vm_post_create(struct kvm_vm *vm, unsigned int nr_vcpus);
+void kvm_arch_vm_finalize_vcpus(struct kvm_vm *vm);
+void kvm_arch_vm_release(struct kvm_vm *vm);
bool vm_is_gpa_protected(struct kvm_vm *vm, vm_paddr_t paddr);
uint32_t guest_get_vcpuid(void);
+bool kvm_arch_has_default_irqchip(void);
+
#endif /* SELFTEST_KVM_UTIL_H */
diff --git a/tools/testing/selftests/kvm/include/kvm_util_types.h b/tools/testing/selftests/kvm/include/kvm_util_types.h
index ec787b97cf18..0366e9bce7f9 100644
--- a/tools/testing/selftests/kvm/include/kvm_util_types.h
+++ b/tools/testing/selftests/kvm/include/kvm_util_types.h
@@ -17,4 +17,6 @@
typedef uint64_t vm_paddr_t; /* Virtual Machine (Guest) physical address */
typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */
+#define INVALID_GPA (~(uint64_t)0)
+
#endif /* SELFTEST_KVM_UTIL_TYPES_H */
diff --git a/tools/testing/selftests/kvm/include/loongarch/arch_timer.h b/tools/testing/selftests/kvm/include/loongarch/arch_timer.h
new file mode 100644
index 000000000000..2ed106b32c81
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/loongarch/arch_timer.h
@@ -0,0 +1,85 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * LoongArch Constant Timer specific interface
+ */
+#ifndef SELFTEST_KVM_ARCH_TIMER_H
+#define SELFTEST_KVM_ARCH_TIMER_H
+
+#include "processor.h"
+
+/* LoongArch timer frequency is constant 100MHZ */
+#define TIMER_FREQ (100UL << 20)
+#define msec_to_cycles(msec) (TIMER_FREQ * (unsigned long)(msec) / 1000)
+#define usec_to_cycles(usec) (TIMER_FREQ * (unsigned long)(usec) / 1000000)
+#define cycles_to_usec(cycles) ((unsigned long)(cycles) * 1000000 / TIMER_FREQ)
+
+static inline unsigned long timer_get_cycles(void)
+{
+ unsigned long val = 0;
+
+ __asm__ __volatile__(
+ "rdtime.d %0, $zero\n\t"
+ : "=r"(val)
+ :
+ );
+
+ return val;
+}
+
+static inline unsigned long timer_get_cfg(void)
+{
+ return csr_read(LOONGARCH_CSR_TCFG);
+}
+
+static inline unsigned long timer_get_val(void)
+{
+ return csr_read(LOONGARCH_CSR_TVAL);
+}
+
+static inline void disable_timer(void)
+{
+ csr_write(0, LOONGARCH_CSR_TCFG);
+}
+
+static inline void timer_irq_enable(void)
+{
+ unsigned long val;
+
+ val = csr_read(LOONGARCH_CSR_ECFG);
+ val |= ECFGF_TIMER;
+ csr_write(val, LOONGARCH_CSR_ECFG);
+}
+
+static inline void timer_irq_disable(void)
+{
+ unsigned long val;
+
+ val = csr_read(LOONGARCH_CSR_ECFG);
+ val &= ~ECFGF_TIMER;
+ csr_write(val, LOONGARCH_CSR_ECFG);
+}
+
+static inline void timer_set_next_cmp_ms(unsigned int msec, bool period)
+{
+ unsigned long val;
+
+ val = msec_to_cycles(msec) & CSR_TCFG_VAL;
+ val |= CSR_TCFG_EN;
+ if (period)
+ val |= CSR_TCFG_PERIOD;
+ csr_write(val, LOONGARCH_CSR_TCFG);
+}
+
+static inline void __delay(uint64_t cycles)
+{
+ uint64_t start = timer_get_cycles();
+
+ while ((timer_get_cycles() - start) < cycles)
+ cpu_relax();
+}
+
+static inline void udelay(unsigned long usec)
+{
+ __delay(usec_to_cycles(usec));
+}
+#endif /* SELFTEST_KVM_ARCH_TIMER_H */
diff --git a/tools/testing/selftests/kvm/include/loongarch/kvm_util_arch.h b/tools/testing/selftests/kvm/include/loongarch/kvm_util_arch.h
new file mode 100644
index 000000000000..d5095900e442
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/loongarch/kvm_util_arch.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_UTIL_ARCH_H
+#define SELFTEST_KVM_UTIL_ARCH_H
+
+struct kvm_mmu_arch {};
+struct kvm_vm_arch {};
+
+#endif // SELFTEST_KVM_UTIL_ARCH_H
diff --git a/tools/testing/selftests/kvm/include/loongarch/pmu.h b/tools/testing/selftests/kvm/include/loongarch/pmu.h
new file mode 100644
index 000000000000..478e6a9bbb2b
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/loongarch/pmu.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * LoongArch PMU specific interface
+ */
+#ifndef SELFTEST_KVM_PMU_H
+#define SELFTEST_KVM_PMU_H
+
+#include "processor.h"
+
+#define LOONGARCH_CPUCFG6 0x6
+#define CPUCFG6_PMP BIT(0)
+#define CPUCFG6_PAMVER GENMASK(3, 1)
+#define CPUCFG6_PMNUM GENMASK(7, 4)
+#define CPUCFG6_PMNUM_SHIFT 4
+#define CPUCFG6_PMBITS GENMASK(13, 8)
+#define CPUCFG6_PMBITS_SHIFT 8
+#define CPUCFG6_UPM BIT(14)
+
+/* Performance Counter registers */
+#define LOONGARCH_CSR_PERFCTRL0 0x200 /* perf event 0 config */
+#define LOONGARCH_CSR_PERFCNTR0 0x201 /* perf event 0 count value */
+#define LOONGARCH_CSR_PERFCTRL1 0x202 /* perf event 1 config */
+#define LOONGARCH_CSR_PERFCNTR1 0x203 /* perf event 1 count value */
+#define LOONGARCH_CSR_PERFCTRL2 0x204 /* perf event 2 config */
+#define LOONGARCH_CSR_PERFCNTR2 0x205 /* perf event 2 count value */
+#define LOONGARCH_CSR_PERFCTRL3 0x206 /* perf event 3 config */
+#define LOONGARCH_CSR_PERFCNTR3 0x207 /* perf event 3 count value */
+#define CSR_PERFCTRL_PLV0 BIT(16)
+#define CSR_PERFCTRL_PLV1 BIT(17)
+#define CSR_PERFCTRL_PLV2 BIT(18)
+#define CSR_PERFCTRL_PLV3 BIT(19)
+#define CSR_PERFCTRL_PMIE BIT(20)
+#define PMU_ENVENT_ENABLED (CSR_PERFCTRL_PLV0 | CSR_PERFCTRL_PLV1 | CSR_PERFCTRL_PLV2 | CSR_PERFCTRL_PLV3)
+
+/* Hardware event codes (from LoongArch perf_event.c */
+#define LOONGARCH_PMU_EVENT_CYCLES 0x00 /* CPU cycles */
+#define LOONGARCH_PMU_EVENT_INSTR_RETIRED 0x01 /* Instructions retired */
+#define PERF_COUNT_HW_BRANCH_INSTRUCTIONS 0x02 /* Branch instructions */
+#define PERF_COUNT_HW_BRANCH_MISSES 0x03 /* Branch misses */
+
+#define NUM_LOOPS 1000
+#define EXPECTED_INSTR_MIN (NUM_LOOPS + 10) /* Loop + overhead */
+#define EXPECTED_CYCLES_MIN NUM_LOOPS /* At least 1 cycle per iteration */
+#define UPPER_BOUND (10 * NUM_LOOPS)
+
+#define PMU_OVERFLOW (1ULL << 63)
+
+static inline void pmu_irq_enable(void)
+{
+ unsigned long val;
+
+ val = csr_read(LOONGARCH_CSR_ECFG);
+ val |= ECFGF_PMU;
+ csr_write(val, LOONGARCH_CSR_ECFG);
+}
+
+static inline void pmu_irq_disable(void)
+{
+ unsigned long val;
+
+ val = csr_read(LOONGARCH_CSR_ECFG);
+ val &= ~ECFGF_PMU;
+ csr_write(val, LOONGARCH_CSR_ECFG);
+}
+
+#endif
diff --git a/tools/testing/selftests/kvm/include/loongarch/processor.h b/tools/testing/selftests/kvm/include/loongarch/processor.h
new file mode 100644
index 000000000000..93dc1fbd2e79
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/loongarch/processor.h
@@ -0,0 +1,235 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef SELFTEST_KVM_PROCESSOR_H
+#define SELFTEST_KVM_PROCESSOR_H
+
+#ifndef __ASSEMBLER__
+#include "ucall_common.h"
+
+#else
+/* general registers */
+#define zero $r0
+#define ra $r1
+#define tp $r2
+#define sp $r3
+#define a0 $r4
+#define a1 $r5
+#define a2 $r6
+#define a3 $r7
+#define a4 $r8
+#define a5 $r9
+#define a6 $r10
+#define a7 $r11
+#define t0 $r12
+#define t1 $r13
+#define t2 $r14
+#define t3 $r15
+#define t4 $r16
+#define t5 $r17
+#define t6 $r18
+#define t7 $r19
+#define t8 $r20
+#define u0 $r21
+#define fp $r22
+#define s0 $r23
+#define s1 $r24
+#define s2 $r25
+#define s3 $r26
+#define s4 $r27
+#define s5 $r28
+#define s6 $r29
+#define s7 $r30
+#define s8 $r31
+#endif
+
+/*
+ * LoongArch page table entry definition
+ * Original header file arch/loongarch/include/asm/loongarch.h
+ */
+#define _PAGE_VALID_SHIFT 0
+#define _PAGE_DIRTY_SHIFT 1
+#define _PAGE_PLV_SHIFT 2 /* 2~3, two bits */
+#define PLV_KERN 0
+#define PLV_USER 3
+#define PLV_MASK 0x3
+#define _CACHE_SHIFT 4 /* 4~5, two bits */
+#define _PAGE_PRESENT_SHIFT 7
+#define _PAGE_WRITE_SHIFT 8
+
+#define _PAGE_VALID BIT_ULL(_PAGE_VALID_SHIFT)
+#define _PAGE_PRESENT BIT_ULL(_PAGE_PRESENT_SHIFT)
+#define _PAGE_WRITE BIT_ULL(_PAGE_WRITE_SHIFT)
+#define _PAGE_DIRTY BIT_ULL(_PAGE_DIRTY_SHIFT)
+#define _PAGE_USER (PLV_USER << _PAGE_PLV_SHIFT)
+#define __READABLE (_PAGE_VALID)
+#define __WRITEABLE (_PAGE_DIRTY | _PAGE_WRITE)
+/* Coherent Cached */
+#define _CACHE_CC BIT_ULL(_CACHE_SHIFT)
+#define PS_4K 0x0000000c
+#define PS_16K 0x0000000e
+#define PS_64K 0x00000010
+#define PS_DEFAULT_SIZE PS_16K
+
+/* LoongArch Basic CSR registers */
+#define LOONGARCH_CSR_CRMD 0x0 /* Current mode info */
+#define CSR_CRMD_PG_SHIFT 4
+#define CSR_CRMD_PG BIT_ULL(CSR_CRMD_PG_SHIFT)
+#define CSR_CRMD_IE_SHIFT 2
+#define CSR_CRMD_IE BIT_ULL(CSR_CRMD_IE_SHIFT)
+#define CSR_CRMD_PLV_SHIFT 0
+#define CSR_CRMD_PLV_WIDTH 2
+#define CSR_CRMD_PLV (0x3UL << CSR_CRMD_PLV_SHIFT)
+#define PLV_MASK 0x3
+#define LOONGARCH_CSR_PRMD 0x1
+#define LOONGARCH_CSR_EUEN 0x2
+#define LOONGARCH_CSR_ECFG 0x4
+#define ECFGB_PMU 10
+#define ECFGF_PMU (BIT_ULL(ECFGB_PMU))
+#define ECFGB_TIMER 11
+#define ECFGF_TIMER (BIT_ULL(ECFGB_TIMER))
+#define LOONGARCH_CSR_ESTAT 0x5 /* Exception status */
+#define CSR_ESTAT_EXC_SHIFT 16
+#define CSR_ESTAT_EXC_WIDTH 6
+#define CSR_ESTAT_EXC (0x3f << CSR_ESTAT_EXC_SHIFT)
+#define EXCCODE_INT 0 /* Interrupt */
+#define INT_PMI 10 /* PMU interrupt */
+#define INT_TI 11 /* Timer interrupt*/
+#define LOONGARCH_CSR_ERA 0x6 /* ERA */
+#define LOONGARCH_CSR_BADV 0x7 /* Bad virtual address */
+#define LOONGARCH_CSR_EENTRY 0xc
+#define LOONGARCH_CSR_TLBIDX 0x10 /* TLB Index, EHINV, PageSize */
+#define CSR_TLBIDX_PS_SHIFT 24
+#define CSR_TLBIDX_PS_WIDTH 6
+#define CSR_TLBIDX_PS (0x3fUL << CSR_TLBIDX_PS_SHIFT)
+#define CSR_TLBIDX_SIZEM 0x3f000000
+#define CSR_TLBIDX_SIZE CSR_TLBIDX_PS_SHIFT
+#define LOONGARCH_CSR_ASID 0x18 /* ASID */
+#define LOONGARCH_CSR_PGDL 0x19
+#define LOONGARCH_CSR_PGDH 0x1a
+/* Page table base */
+#define LOONGARCH_CSR_PGD 0x1b
+#define LOONGARCH_CSR_PWCTL0 0x1c
+#define LOONGARCH_CSR_PWCTL1 0x1d
+#define LOONGARCH_CSR_STLBPGSIZE 0x1e
+#define LOONGARCH_CSR_CPUID 0x20
+#define LOONGARCH_CSR_KS0 0x30
+#define LOONGARCH_CSR_KS1 0x31
+#define LOONGARCH_CSR_TMID 0x40
+#define LOONGARCH_CSR_TCFG 0x41
+#define CSR_TCFG_VAL (BIT_ULL(48) - BIT_ULL(2))
+#define CSR_TCFG_PERIOD_SHIFT 1
+#define CSR_TCFG_PERIOD (0x1UL << CSR_TCFG_PERIOD_SHIFT)
+#define CSR_TCFG_EN (0x1UL)
+#define LOONGARCH_CSR_TVAL 0x42
+#define LOONGARCH_CSR_TINTCLR 0x44 /* Timer interrupt clear */
+#define CSR_TINTCLR_TI_SHIFT 0
+#define CSR_TINTCLR_TI (1 << CSR_TINTCLR_TI_SHIFT)
+/* TLB refill exception entry */
+#define LOONGARCH_CSR_TLBRENTRY 0x88
+#define LOONGARCH_CSR_TLBRSAVE 0x8b
+#define LOONGARCH_CSR_TLBREHI 0x8e
+#define CSR_TLBREHI_PS_SHIFT 0
+#define CSR_TLBREHI_PS (0x3fUL << CSR_TLBREHI_PS_SHIFT)
+
+#define read_cpucfg(reg) \
+({ \
+ register unsigned long __v; \
+ __asm__ __volatile__( \
+ "cpucfg %0, %1\n\t" \
+ : "=r" (__v) \
+ : "r" (reg) \
+ : "memory"); \
+ __v; \
+})
+
+#define csr_read(csr) \
+({ \
+ register unsigned long __v; \
+ __asm__ __volatile__( \
+ "csrrd %[val], %[reg]\n\t" \
+ : [val] "=r" (__v) \
+ : [reg] "i" (csr) \
+ : "memory"); \
+ __v; \
+})
+
+#define csr_write(v, csr) \
+({ \
+ register unsigned long __v = v; \
+ __asm__ __volatile__ ( \
+ "csrwr %[val], %[reg]\n\t" \
+ : [val] "+r" (__v) \
+ : [reg] "i" (csr) \
+ : "memory"); \
+ __v; \
+})
+
+#define EXREGS_GPRS (32)
+
+#ifndef __ASSEMBLER__
+void handle_tlb_refill(void);
+void handle_exception(void);
+
+struct ex_regs {
+ unsigned long regs[EXREGS_GPRS];
+ unsigned long pc;
+ unsigned long estat;
+ unsigned long badv;
+ unsigned long prmd;
+};
+
+#define PC_OFFSET_EXREGS offsetof(struct ex_regs, pc)
+#define ESTAT_OFFSET_EXREGS offsetof(struct ex_regs, estat)
+#define BADV_OFFSET_EXREGS offsetof(struct ex_regs, badv)
+#define PRMD_OFFSET_EXREGS offsetof(struct ex_regs, prmd)
+#define EXREGS_SIZE sizeof(struct ex_regs)
+
+#define VECTOR_NUM 64
+
+typedef void(*handler_fn)(struct ex_regs *);
+
+struct handlers {
+ handler_fn exception_handlers[VECTOR_NUM];
+};
+
+void loongarch_vcpu_setup(struct kvm_vcpu *vcpu);
+void vm_init_descriptor_tables(struct kvm_vm *vm);
+void vm_install_exception_handler(struct kvm_vm *vm, int vector, handler_fn handler);
+
+static inline void cpu_relax(void)
+{
+ asm volatile("nop" ::: "memory");
+}
+
+static inline void local_irq_enable(void)
+{
+ unsigned int flags = CSR_CRMD_IE;
+ register unsigned int mask asm("$t0") = CSR_CRMD_IE;
+
+ __asm__ __volatile__(
+ "csrxchg %[val], %[mask], %[reg]\n\t"
+ : [val] "+r" (flags)
+ : [mask] "r" (mask), [reg] "i" (LOONGARCH_CSR_CRMD)
+ : "memory");
+}
+
+static inline void local_irq_disable(void)
+{
+ unsigned int flags = 0;
+ register unsigned int mask asm("$t0") = CSR_CRMD_IE;
+
+ __asm__ __volatile__(
+ "csrxchg %[val], %[mask], %[reg]\n\t"
+ : [val] "+r" (flags)
+ : [mask] "r" (mask), [reg] "i" (LOONGARCH_CSR_CRMD)
+ : "memory");
+}
+#else
+#define PC_OFFSET_EXREGS ((EXREGS_GPRS + 0) * 8)
+#define ESTAT_OFFSET_EXREGS ((EXREGS_GPRS + 1) * 8)
+#define BADV_OFFSET_EXREGS ((EXREGS_GPRS + 2) * 8)
+#define PRMD_OFFSET_EXREGS ((EXREGS_GPRS + 3) * 8)
+#define EXREGS_SIZE ((EXREGS_GPRS + 4) * 8)
+#endif
+
+#endif /* SELFTEST_KVM_PROCESSOR_H */
diff --git a/tools/testing/selftests/kvm/include/loongarch/ucall.h b/tools/testing/selftests/kvm/include/loongarch/ucall.h
new file mode 100644
index 000000000000..4ec801f37f00
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/loongarch/ucall.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_UCALL_H
+#define SELFTEST_KVM_UCALL_H
+
+#include "kvm_util.h"
+
+#define UCALL_EXIT_REASON KVM_EXIT_MMIO
+
+/*
+ * ucall_exit_mmio_addr holds per-VM values (global data is duplicated by each
+ * VM), it must not be accessed from host code.
+ */
+extern vm_vaddr_t *ucall_exit_mmio_addr;
+
+static inline void ucall_arch_do_ucall(vm_vaddr_t uc)
+{
+ WRITE_ONCE(*ucall_exit_mmio_addr, uc);
+}
+
+#endif
diff --git a/tools/testing/selftests/kvm/include/lru_gen_util.h b/tools/testing/selftests/kvm/include/lru_gen_util.h
new file mode 100644
index 000000000000..d32ff5d8ffd0
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/lru_gen_util.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Tools for integrating with lru_gen, like parsing the lru_gen debugfs output.
+ *
+ * Copyright (C) 2025, Google LLC.
+ */
+#ifndef SELFTEST_KVM_LRU_GEN_UTIL_H
+#define SELFTEST_KVM_LRU_GEN_UTIL_H
+
+#include <inttypes.h>
+#include <limits.h>
+#include <stdlib.h>
+
+#include "test_util.h"
+
+#define MAX_NR_GENS 16 /* MAX_NR_GENS in include/linux/mmzone.h */
+#define MAX_NR_NODES 4 /* Maximum number of nodes supported by the test */
+
+#define LRU_GEN_DEBUGFS "/sys/kernel/debug/lru_gen"
+#define LRU_GEN_ENABLED_PATH "/sys/kernel/mm/lru_gen/enabled"
+#define LRU_GEN_ENABLED 1
+#define LRU_GEN_MM_WALK 2
+
+struct generation_stats {
+ int gen;
+ long age_ms;
+ long nr_anon;
+ long nr_file;
+};
+
+struct node_stats {
+ int node;
+ int nr_gens; /* Number of populated gens entries. */
+ struct generation_stats gens[MAX_NR_GENS];
+};
+
+struct memcg_stats {
+ unsigned long memcg_id;
+ int nr_nodes; /* Number of populated nodes entries. */
+ struct node_stats nodes[MAX_NR_NODES];
+};
+
+void lru_gen_read_memcg_stats(struct memcg_stats *stats, const char *memcg);
+long lru_gen_sum_memcg_stats(const struct memcg_stats *stats);
+long lru_gen_sum_memcg_stats_for_gen(int gen, const struct memcg_stats *stats);
+void lru_gen_do_aging(struct memcg_stats *stats, const char *memcg);
+int lru_gen_find_generation(const struct memcg_stats *stats,
+ unsigned long total_pages);
+bool lru_gen_usable(void);
+
+#endif /* SELFTEST_KVM_LRU_GEN_UTIL_H */
diff --git a/tools/testing/selftests/kvm/include/numaif.h b/tools/testing/selftests/kvm/include/numaif.h
index b020547403fd..29572a6d789c 100644
--- a/tools/testing/selftests/kvm/include/numaif.h
+++ b/tools/testing/selftests/kvm/include/numaif.h
@@ -1,55 +1,83 @@
/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * tools/testing/selftests/kvm/include/numaif.h
- *
- * Copyright (C) 2020, Google LLC.
- *
- * This work is licensed under the terms of the GNU GPL, version 2.
- *
- * Header file that provides access to NUMA API functions not explicitly
- * exported to user space.
- */
+/* Copyright (C) 2020, Google LLC. */
#ifndef SELFTEST_KVM_NUMAIF_H
#define SELFTEST_KVM_NUMAIF_H
-#define __NR_get_mempolicy 239
-#define __NR_migrate_pages 256
+#include <dirent.h>
-/* System calls */
-long get_mempolicy(int *policy, const unsigned long *nmask,
- unsigned long maxnode, void *addr, int flags)
+#include <linux/mempolicy.h>
+
+#include "kvm_syscalls.h"
+
+KVM_SYSCALL_DEFINE(get_mempolicy, 5, int *, policy, const unsigned long *, nmask,
+ unsigned long, maxnode, void *, addr, int, flags);
+
+KVM_SYSCALL_DEFINE(set_mempolicy, 3, int, mode, const unsigned long *, nmask,
+ unsigned long, maxnode);
+
+KVM_SYSCALL_DEFINE(set_mempolicy_home_node, 4, unsigned long, start,
+ unsigned long, len, unsigned long, home_node,
+ unsigned long, flags);
+
+KVM_SYSCALL_DEFINE(migrate_pages, 4, int, pid, unsigned long, maxnode,
+ const unsigned long *, frommask, const unsigned long *, tomask);
+
+KVM_SYSCALL_DEFINE(move_pages, 6, int, pid, unsigned long, count, void *, pages,
+ const int *, nodes, int *, status, int, flags);
+
+KVM_SYSCALL_DEFINE(mbind, 6, void *, addr, unsigned long, size, int, mode,
+ const unsigned long *, nodemask, unsigned long, maxnode,
+ unsigned int, flags);
+
+static inline int get_max_numa_node(void)
{
- return syscall(__NR_get_mempolicy, policy, nmask,
- maxnode, addr, flags);
+ struct dirent *de;
+ int max_node = 0;
+ DIR *d;
+
+ /*
+ * Assume there's a single node if the kernel doesn't support NUMA,
+ * or if no nodes are found.
+ */
+ d = opendir("/sys/devices/system/node");
+ if (!d)
+ return 0;
+
+ while ((de = readdir(d)) != NULL) {
+ int node_id;
+ char *endptr;
+
+ if (strncmp(de->d_name, "node", 4) != 0)
+ continue;
+
+ node_id = strtol(de->d_name + 4, &endptr, 10);
+ if (*endptr != '\0')
+ continue;
+
+ if (node_id > max_node)
+ max_node = node_id;
+ }
+ closedir(d);
+
+ return max_node;
}
-long migrate_pages(int pid, unsigned long maxnode,
- const unsigned long *frommask,
- const unsigned long *tomask)
+static bool is_numa_available(void)
{
- return syscall(__NR_migrate_pages, pid, maxnode, frommask, tomask);
+ /*
+ * Probe for NUMA by doing a dummy get_mempolicy(). If the syscall
+ * fails with ENOSYS, then the kernel was built without NUMA support.
+ * if the syscall fails with EPERM, then the process/user lacks the
+ * necessary capabilities (CAP_SYS_NICE).
+ */
+ return !get_mempolicy(NULL, NULL, 0, NULL, 0) ||
+ (errno != ENOSYS && errno != EPERM);
}
-/* Policies */
-#define MPOL_DEFAULT 0
-#define MPOL_PREFERRED 1
-#define MPOL_BIND 2
-#define MPOL_INTERLEAVE 3
-
-#define MPOL_MAX MPOL_INTERLEAVE
-
-/* Flags for get_mem_policy */
-#define MPOL_F_NODE (1<<0) /* return next il node or node of address */
- /* Warning: MPOL_F_NODE is unsupported and
- * subject to change. Don't use.
- */
-#define MPOL_F_ADDR (1<<1) /* look up vma using address */
-#define MPOL_F_MEMS_ALLOWED (1<<2) /* query nodes allowed in cpuset */
-
-/* Flags for mbind */
-#define MPOL_MF_STRICT (1<<0) /* Verify existing pages in the mapping */
-#define MPOL_MF_MOVE (1<<1) /* Move pages owned by this process to conform to mapping */
-#define MPOL_MF_MOVE_ALL (1<<2) /* Move every page to conform to mapping */
+static inline bool is_multi_numa_node_system(void)
+{
+ return is_numa_available() && get_max_numa_node() >= 1;
+}
#endif /* SELFTEST_KVM_NUMAIF_H */
diff --git a/tools/testing/selftests/kvm/include/riscv/kvm_util_arch.h b/tools/testing/selftests/kvm/include/riscv/kvm_util_arch.h
index e43a57d99b56..d5095900e442 100644
--- a/tools/testing/selftests/kvm/include/riscv/kvm_util_arch.h
+++ b/tools/testing/selftests/kvm/include/riscv/kvm_util_arch.h
@@ -2,6 +2,7 @@
#ifndef SELFTEST_KVM_UTIL_ARCH_H
#define SELFTEST_KVM_UTIL_ARCH_H
+struct kvm_mmu_arch {};
struct kvm_vm_arch {};
#endif // SELFTEST_KVM_UTIL_ARCH_H
diff --git a/tools/testing/selftests/kvm/include/riscv/processor.h b/tools/testing/selftests/kvm/include/riscv/processor.h
index 5f389166338c..4dade8c4d18e 100644
--- a/tools/testing/selftests/kvm/include/riscv/processor.h
+++ b/tools/testing/selftests/kvm/include/riscv/processor.h
@@ -9,8 +9,22 @@
#include <linux/stringify.h>
#include <asm/csr.h>
+#include <asm/vdso/processor.h>
#include "kvm_util.h"
+#define INSN_OPCODE_MASK 0x007c
+#define INSN_OPCODE_SHIFT 2
+#define INSN_OPCODE_SYSTEM 28
+
+#define INSN_MASK_FUNCT3 0x7000
+#define INSN_SHIFT_FUNCT3 12
+
+#define INSN_CSR_MASK 0xfff00000
+#define INSN_CSR_SHIFT 20
+
+#define GET_RM(insn) (((insn) & INSN_MASK_FUNCT3) >> INSN_SHIFT_FUNCT3)
+#define GET_CSR_NUM(insn) (((insn) & INSN_CSR_MASK) >> INSN_CSR_SHIFT)
+
static inline uint64_t __kvm_reg_id(uint64_t type, uint64_t subtype,
uint64_t idx, uint64_t size)
{
@@ -60,7 +74,8 @@ static inline bool __vcpu_has_sbi_ext(struct kvm_vcpu *vcpu, uint64_t sbi_ext)
return __vcpu_has_ext(vcpu, RISCV_SBI_EXT_REG(sbi_ext));
}
-struct ex_regs {
+struct pt_regs {
+ unsigned long epc;
unsigned long ra;
unsigned long sp;
unsigned long gp;
@@ -92,16 +107,19 @@ struct ex_regs {
unsigned long t4;
unsigned long t5;
unsigned long t6;
- unsigned long epc;
+ /* Supervisor/Machine CSRs */
unsigned long status;
+ unsigned long badaddr;
unsigned long cause;
+ /* a0 value before the syscall */
+ unsigned long orig_a0;
};
#define NR_VECTORS 2
#define NR_EXCEPTIONS 32
#define EC_MASK (NR_EXCEPTIONS - 1)
-typedef void(*exception_handler_fn)(struct ex_regs *);
+typedef void(*exception_handler_fn)(struct pt_regs *);
void vm_init_vector_tables(struct kvm_vm *vm);
void vcpu_init_vector_tables(struct kvm_vcpu *vcpu);
@@ -174,4 +192,6 @@ static inline void local_irq_disable(void)
csr_clear(CSR_SSTATUS, SR_SIE);
}
+unsigned long riscv64_get_satp_mode(void);
+
#endif /* SELFTEST_KVM_PROCESSOR_H */
diff --git a/tools/testing/selftests/kvm/include/riscv/sbi.h b/tools/testing/selftests/kvm/include/riscv/sbi.h
index 046b432ae896..16f1815ac48f 100644
--- a/tools/testing/selftests/kvm/include/riscv/sbi.h
+++ b/tools/testing/selftests/kvm/include/riscv/sbi.h
@@ -97,6 +97,43 @@ enum sbi_pmu_hw_generic_events_t {
SBI_PMU_HW_GENERAL_MAX,
};
+enum sbi_pmu_fw_generic_events_t {
+ SBI_PMU_FW_MISALIGNED_LOAD = 0,
+ SBI_PMU_FW_MISALIGNED_STORE = 1,
+ SBI_PMU_FW_ACCESS_LOAD = 2,
+ SBI_PMU_FW_ACCESS_STORE = 3,
+ SBI_PMU_FW_ILLEGAL_INSN = 4,
+ SBI_PMU_FW_SET_TIMER = 5,
+ SBI_PMU_FW_IPI_SENT = 6,
+ SBI_PMU_FW_IPI_RCVD = 7,
+ SBI_PMU_FW_FENCE_I_SENT = 8,
+ SBI_PMU_FW_FENCE_I_RCVD = 9,
+ SBI_PMU_FW_SFENCE_VMA_SENT = 10,
+ SBI_PMU_FW_SFENCE_VMA_RCVD = 11,
+ SBI_PMU_FW_SFENCE_VMA_ASID_SENT = 12,
+ SBI_PMU_FW_SFENCE_VMA_ASID_RCVD = 13,
+
+ SBI_PMU_FW_HFENCE_GVMA_SENT = 14,
+ SBI_PMU_FW_HFENCE_GVMA_RCVD = 15,
+ SBI_PMU_FW_HFENCE_GVMA_VMID_SENT = 16,
+ SBI_PMU_FW_HFENCE_GVMA_VMID_RCVD = 17,
+
+ SBI_PMU_FW_HFENCE_VVMA_SENT = 18,
+ SBI_PMU_FW_HFENCE_VVMA_RCVD = 19,
+ SBI_PMU_FW_HFENCE_VVMA_ASID_SENT = 20,
+ SBI_PMU_FW_HFENCE_VVMA_ASID_RCVD = 21,
+ SBI_PMU_FW_MAX,
+};
+
+/* SBI PMU event types */
+enum sbi_pmu_event_type {
+ SBI_PMU_EVENT_TYPE_HW = 0x0,
+ SBI_PMU_EVENT_TYPE_CACHE = 0x1,
+ SBI_PMU_EVENT_TYPE_RAW = 0x2,
+ SBI_PMU_EVENT_TYPE_RAW_V2 = 0x3,
+ SBI_PMU_EVENT_TYPE_FW = 0xf,
+};
+
/* SBI PMU counter types */
enum sbi_pmu_ctr_type {
SBI_PMU_CTR_TYPE_HW = 0x0,
diff --git a/tools/testing/selftests/kvm/include/s390/kvm_util_arch.h b/tools/testing/selftests/kvm/include/s390/kvm_util_arch.h
index e43a57d99b56..d5095900e442 100644
--- a/tools/testing/selftests/kvm/include/s390/kvm_util_arch.h
+++ b/tools/testing/selftests/kvm/include/s390/kvm_util_arch.h
@@ -2,6 +2,7 @@
#ifndef SELFTEST_KVM_UTIL_ARCH_H
#define SELFTEST_KVM_UTIL_ARCH_H
+struct kvm_mmu_arch {};
struct kvm_vm_arch {};
#endif // SELFTEST_KVM_UTIL_ARCH_H
diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h
index 77d13d7920cb..b4872ba8ed12 100644
--- a/tools/testing/selftests/kvm/include/test_util.h
+++ b/tools/testing/selftests/kvm/include/test_util.h
@@ -8,6 +8,8 @@
#ifndef SELFTEST_KVM_TEST_UTIL_H
#define SELFTEST_KVM_TEST_UTIL_H
+#include <setjmp.h>
+#include <signal.h>
#include <stdlib.h>
#include <stdarg.h>
#include <stdbool.h>
@@ -78,6 +80,23 @@ do { \
__builtin_unreachable(); \
} while (0)
+extern sigjmp_buf expect_sigbus_jmpbuf;
+void expect_sigbus_handler(int signum);
+
+#define TEST_EXPECT_SIGBUS(action) \
+do { \
+ struct sigaction sa_old, sa_new = { \
+ .sa_handler = expect_sigbus_handler, \
+ }; \
+ \
+ sigaction(SIGBUS, &sa_new, &sa_old); \
+ if (sigsetjmp(expect_sigbus_jmpbuf, 1) == 0) { \
+ action; \
+ TEST_FAIL("'%s' should have triggered SIGBUS", #action); \
+ } \
+ sigaction(SIGBUS, &sa_old, NULL); \
+} while (0)
+
size_t parse_size(const char *size);
int64_t timespec_to_ns(struct timespec ts);
@@ -153,6 +172,7 @@ bool is_backing_src_hugetlb(uint32_t i);
void backing_src_help(const char *flag);
enum vm_mem_backing_src_type parse_backing_src_type(const char *type_name);
long get_run_delay(void);
+bool is_numa_balancing_enabled(void);
/*
* Whether or not the given source type is shared memory (as opposed to
diff --git a/tools/testing/selftests/kvm/include/x86/apic.h b/tools/testing/selftests/kvm/include/x86/apic.h
index 80fe9f69b38d..5ca6bacbd70e 100644
--- a/tools/testing/selftests/kvm/include/x86/apic.h
+++ b/tools/testing/selftests/kvm/include/x86/apic.h
@@ -28,10 +28,13 @@
#define GET_APIC_ID_FIELD(x) (((x) >> 24) & 0xFF)
#define APIC_TASKPRI 0x80
#define APIC_PROCPRI 0xA0
+#define GET_APIC_PRI(x) (((x) & GENMASK(7, 4)) >> 4)
+#define SET_APIC_PRI(x, y) (((x) & ~GENMASK(7, 4)) | (y << 4))
#define APIC_EOI 0xB0
#define APIC_SPIV 0xF0
#define APIC_SPIV_FOCUS_DISABLED (1 << 9)
#define APIC_SPIV_APIC_ENABLED (1 << 8)
+#define APIC_ISR 0x100
#define APIC_IRR 0x200
#define APIC_ICR 0x300
#define APIC_LVTCMCI 0x2f0
@@ -67,6 +70,10 @@
#define APIC_TMICT 0x380
#define APIC_TMCCT 0x390
#define APIC_TDCR 0x3E0
+#define APIC_SELF_IPI 0x3F0
+
+#define APIC_VECTOR_TO_BIT_NUMBER(v) ((unsigned int)(v) % 32)
+#define APIC_VECTOR_TO_REG_OFFSET(v) ((unsigned int)(v) / 32 * 0x10)
void apic_disable(void);
void xapic_enable(void);
diff --git a/tools/testing/selftests/kvm/include/x86/kvm_util_arch.h b/tools/testing/selftests/kvm/include/x86/kvm_util_arch.h
index 972bb1c4ab4c..be35d26bb320 100644
--- a/tools/testing/selftests/kvm/include/x86/kvm_util_arch.h
+++ b/tools/testing/selftests/kvm/include/x86/kvm_util_arch.h
@@ -10,6 +10,28 @@
extern bool is_forced_emulation_enabled;
+struct pte_masks {
+ uint64_t present;
+ uint64_t writable;
+ uint64_t user;
+ uint64_t readable;
+ uint64_t executable;
+ uint64_t accessed;
+ uint64_t dirty;
+ uint64_t huge;
+ uint64_t nx;
+ uint64_t c;
+ uint64_t s;
+
+ uint64_t always_set;
+};
+
+struct kvm_mmu_arch {
+ struct pte_masks pte_masks;
+};
+
+struct kvm_mmu;
+
struct kvm_vm_arch {
vm_vaddr_t gdt;
vm_vaddr_t tss;
diff --git a/tools/testing/selftests/kvm/include/x86/pmu.h b/tools/testing/selftests/kvm/include/x86/pmu.h
index 3c10c4dc0ae8..72575eadb63a 100644
--- a/tools/testing/selftests/kvm/include/x86/pmu.h
+++ b/tools/testing/selftests/kvm/include/x86/pmu.h
@@ -5,8 +5,11 @@
#ifndef SELFTEST_KVM_PMU_H
#define SELFTEST_KVM_PMU_H
+#include <stdbool.h>
#include <stdint.h>
+#include <linux/bits.h>
+
#define KVM_PMU_EVENT_FILTER_MAX_EVENTS 300
/*
@@ -61,6 +64,11 @@
#define INTEL_ARCH_BRANCHES_RETIRED RAW_EVENT(0xc4, 0x00)
#define INTEL_ARCH_BRANCHES_MISPREDICTED RAW_EVENT(0xc5, 0x00)
#define INTEL_ARCH_TOPDOWN_SLOTS RAW_EVENT(0xa4, 0x01)
+#define INTEL_ARCH_TOPDOWN_BE_BOUND RAW_EVENT(0xa4, 0x02)
+#define INTEL_ARCH_TOPDOWN_BAD_SPEC RAW_EVENT(0x73, 0x00)
+#define INTEL_ARCH_TOPDOWN_FE_BOUND RAW_EVENT(0x9c, 0x01)
+#define INTEL_ARCH_TOPDOWN_RETIRING RAW_EVENT(0xc2, 0x02)
+#define INTEL_ARCH_LBR_INSERTS RAW_EVENT(0xe4, 0x01)
#define AMD_ZEN_CORE_CYCLES RAW_EVENT(0x76, 0x00)
#define AMD_ZEN_INSTRUCTIONS_RETIRED RAW_EVENT(0xc0, 0x00)
@@ -80,6 +88,11 @@ enum intel_pmu_architectural_events {
INTEL_ARCH_BRANCHES_RETIRED_INDEX,
INTEL_ARCH_BRANCHES_MISPREDICTED_INDEX,
INTEL_ARCH_TOPDOWN_SLOTS_INDEX,
+ INTEL_ARCH_TOPDOWN_BE_BOUND_INDEX,
+ INTEL_ARCH_TOPDOWN_BAD_SPEC_INDEX,
+ INTEL_ARCH_TOPDOWN_FE_BOUND_INDEX,
+ INTEL_ARCH_TOPDOWN_RETIRING_INDEX,
+ INTEL_ARCH_LBR_INSERTS_INDEX,
NR_INTEL_ARCH_EVENTS,
};
@@ -94,4 +107,17 @@ enum amd_pmu_zen_events {
extern const uint64_t intel_pmu_arch_events[];
extern const uint64_t amd_pmu_zen_events[];
+enum pmu_errata {
+ INSTRUCTIONS_RETIRED_OVERCOUNT,
+ BRANCHES_RETIRED_OVERCOUNT,
+};
+extern uint64_t pmu_errata_mask;
+
+void kvm_init_pmu_errata(void);
+
+static inline bool this_pmu_has_errata(enum pmu_errata errata)
+{
+ return pmu_errata_mask & BIT_ULL(errata);
+}
+
#endif /* SELFTEST_KVM_PMU_H */
diff --git a/tools/testing/selftests/kvm/include/x86/processor.h b/tools/testing/selftests/kvm/include/x86/processor.h
index 32ab6ca7ec32..d8634a760a60 100644
--- a/tools/testing/selftests/kvm/include/x86/processor.h
+++ b/tools/testing/selftests/kvm/include/x86/processor.h
@@ -21,6 +21,8 @@
extern bool host_cpu_is_intel;
extern bool host_cpu_is_amd;
+extern bool host_cpu_is_hygon;
+extern bool host_cpu_is_amd_compatible;
extern uint64_t guest_tsc_khz;
#ifndef MAX_NR_CPUID_ENTRIES
@@ -34,6 +36,8 @@ extern uint64_t guest_tsc_khz;
#define NMI_VECTOR 0x02
+const char *ex_str(int vector);
+
#define X86_EFLAGS_FIXED (1u << 1)
#define X86_CR4_VME (1ul << 0)
@@ -199,10 +203,12 @@ struct kvm_x86_cpu_feature {
#define X86_FEATURE_TSCRATEMSR KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 4)
#define X86_FEATURE_PAUSEFILTER KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 10)
#define X86_FEATURE_PFTHRESHOLD KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 12)
+#define X86_FEATURE_V_VMSAVE_VMLOAD KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 15)
#define X86_FEATURE_VGIF KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 16)
#define X86_FEATURE_IDLE_HLT KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 30)
#define X86_FEATURE_SEV KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 1)
#define X86_FEATURE_SEV_ES KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 3)
+#define X86_FEATURE_SEV_SNP KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 4)
#define X86_FEATURE_PERFMON_V2 KVM_X86_CPU_FEATURE(0x80000022, 0, EAX, 0)
#define X86_FEATURE_LBR_PMC_FREEZE KVM_X86_CPU_FEATURE(0x80000022, 0, EAX, 2)
@@ -264,7 +270,7 @@ struct kvm_x86_cpu_property {
#define X86_PROPERTY_PMU_NR_GP_COUNTERS KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 8, 15)
#define X86_PROPERTY_PMU_GP_COUNTERS_BIT_WIDTH KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 16, 23)
#define X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 24, 31)
-#define X86_PROPERTY_PMU_EVENTS_MASK KVM_X86_CPU_PROPERTY(0xa, 0, EBX, 0, 7)
+#define X86_PROPERTY_PMU_EVENTS_MASK KVM_X86_CPU_PROPERTY(0xa, 0, EBX, 0, 12)
#define X86_PROPERTY_PMU_FIXED_COUNTERS_BITMASK KVM_X86_CPU_PROPERTY(0xa, 0, ECX, 0, 31)
#define X86_PROPERTY_PMU_NR_FIXED_COUNTERS KVM_X86_CPU_PROPERTY(0xa, 0, EDX, 0, 4)
#define X86_PROPERTY_PMU_FIXED_COUNTERS_BIT_WIDTH KVM_X86_CPU_PROPERTY(0xa, 0, EDX, 5, 12)
@@ -331,6 +337,11 @@ struct kvm_x86_pmu_feature {
#define X86_PMU_FEATURE_BRANCH_INSNS_RETIRED KVM_X86_PMU_FEATURE(EBX, 5)
#define X86_PMU_FEATURE_BRANCHES_MISPREDICTED KVM_X86_PMU_FEATURE(EBX, 6)
#define X86_PMU_FEATURE_TOPDOWN_SLOTS KVM_X86_PMU_FEATURE(EBX, 7)
+#define X86_PMU_FEATURE_TOPDOWN_BE_BOUND KVM_X86_PMU_FEATURE(EBX, 8)
+#define X86_PMU_FEATURE_TOPDOWN_BAD_SPEC KVM_X86_PMU_FEATURE(EBX, 9)
+#define X86_PMU_FEATURE_TOPDOWN_FE_BOUND KVM_X86_PMU_FEATURE(EBX, 10)
+#define X86_PMU_FEATURE_TOPDOWN_RETIRING KVM_X86_PMU_FEATURE(EBX, 11)
+#define X86_PMU_FEATURE_LBR_INSERTS KVM_X86_PMU_FEATURE(EBX, 12)
#define X86_PMU_FEATURE_INSNS_RETIRED_FIXED KVM_X86_PMU_FEATURE(ECX, 0)
#define X86_PMU_FEATURE_CPU_CYCLES_FIXED KVM_X86_PMU_FEATURE(ECX, 1)
@@ -354,16 +365,6 @@ static inline unsigned int x86_model(unsigned int eax)
return ((eax >> 12) & 0xf0) | ((eax >> 4) & 0x0f);
}
-/* Page table bitfield declarations */
-#define PTE_PRESENT_MASK BIT_ULL(0)
-#define PTE_WRITABLE_MASK BIT_ULL(1)
-#define PTE_USER_MASK BIT_ULL(2)
-#define PTE_ACCESSED_MASK BIT_ULL(5)
-#define PTE_DIRTY_MASK BIT_ULL(6)
-#define PTE_LARGE_MASK BIT_ULL(7)
-#define PTE_GLOBAL_MASK BIT_ULL(8)
-#define PTE_NX_MASK BIT_ULL(63)
-
#define PHYSICAL_PAGE_MASK GENMASK_ULL(51, 12)
#define PAGE_SHIFT 12
@@ -428,8 +429,10 @@ struct kvm_x86_state {
static inline uint64_t get_desc64_base(const struct desc64 *desc)
{
- return ((uint64_t)desc->base3 << 32) |
- (desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24));
+ return (uint64_t)desc->base3 << 32 |
+ (uint64_t)desc->base2 << 24 |
+ (uint64_t)desc->base1 << 16 |
+ (uint64_t)desc->base0;
}
static inline uint64_t rdtsc(void)
@@ -556,6 +559,11 @@ static inline uint64_t get_cr0(void)
return cr0;
}
+static inline void set_cr0(uint64_t val)
+{
+ __asm__ __volatile__("mov %0, %%cr0" : : "r" (val) : "memory");
+}
+
static inline uint64_t get_cr3(void)
{
uint64_t cr3;
@@ -565,6 +573,11 @@ static inline uint64_t get_cr3(void)
return cr3;
}
+static inline void set_cr3(uint64_t val)
+{
+ __asm__ __volatile__("mov %0, %%cr3" : : "r" (val) : "memory");
+}
+
static inline uint64_t get_cr4(void)
{
uint64_t cr4;
@@ -579,6 +592,19 @@ static inline void set_cr4(uint64_t val)
__asm__ __volatile__("mov %0, %%cr4" : : "r" (val) : "memory");
}
+static inline uint64_t get_cr8(void)
+{
+ uint64_t cr8;
+
+ __asm__ __volatile__("mov %%cr8, %[cr8]" : [cr8]"=r"(cr8));
+ return cr8;
+}
+
+static inline void set_cr8(uint64_t val)
+{
+ __asm__ __volatile__("mov %0, %%cr8" : : "r" (val) : "memory");
+}
+
static inline void set_idt(const struct desc_ptr *idt_desc)
{
__asm__ __volatile__("lidt %0"::"m"(*idt_desc));
@@ -693,6 +719,11 @@ static inline bool this_cpu_is_amd(void)
return this_cpu_vendor_string_is("AuthenticAMD");
}
+static inline bool this_cpu_is_hygon(void)
+{
+ return this_cpu_vendor_string_is("HygonGenuine");
+}
+
static inline uint32_t __this_cpu_has(uint32_t function, uint32_t index,
uint8_t reg, uint8_t lo, uint8_t hi)
{
@@ -1149,7 +1180,6 @@ do { \
void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits);
void kvm_init_vm_address_properties(struct kvm_vm *vm);
-bool vm_is_unrestricted_guest(struct kvm_vm *vm);
struct ex_regs {
uint64_t rax, rcx, rdx, rbx;
@@ -1179,6 +1209,12 @@ struct idt_entry {
void vm_install_exception_handler(struct kvm_vm *vm, int vector,
void (*handler)(struct ex_regs *));
+/*
+ * Exception fixup morphs #DE to an arbitrary magic vector so that '0' can be
+ * used to signal "no expcetion".
+ */
+#define KVM_MAGIC_DE_VECTOR 0xff
+
/* If a toddler were to say "abracadabra". */
#define KVM_EXCEPTION_MAGIC 0xabacadabaULL
@@ -1314,6 +1350,26 @@ static inline uint8_t xsetbv_safe(uint32_t index, uint64_t value)
bool kvm_is_tdp_enabled(void);
+static inline bool get_kvm_intel_param_bool(const char *param)
+{
+ return kvm_get_module_param_bool("kvm_intel", param);
+}
+
+static inline bool get_kvm_amd_param_bool(const char *param)
+{
+ return kvm_get_module_param_bool("kvm_amd", param);
+}
+
+static inline int get_kvm_intel_param_integer(const char *param)
+{
+ return kvm_get_module_param_integer("kvm_intel", param);
+}
+
+static inline int get_kvm_amd_param_integer(const char *param)
+{
+ return kvm_get_module_param_integer("kvm_amd", param);
+}
+
static inline bool kvm_is_pmu_enabled(void)
{
return get_kvm_param_bool("enable_pmu");
@@ -1324,9 +1380,22 @@ static inline bool kvm_is_forced_emulation_enabled(void)
return !!get_kvm_param_integer("force_emulation_prefix");
}
-uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr,
- int *level);
-uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr);
+static inline bool kvm_is_unrestricted_guest_enabled(void)
+{
+ return get_kvm_intel_param_bool("unrestricted_guest");
+}
+
+static inline bool kvm_is_ignore_msrs(void)
+{
+ return get_kvm_param_bool("ignore_msrs");
+}
+
+static inline bool kvm_is_lbrv_enabled(void)
+{
+ return !!get_kvm_amd_param_integer("lbrv");
+}
+
+uint64_t *vm_get_pte(struct kvm_vm *vm, uint64_t vaddr);
uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2,
uint64_t a3);
@@ -1398,7 +1467,7 @@ enum pg_level {
PG_LEVEL_2M,
PG_LEVEL_1G,
PG_LEVEL_512G,
- PG_LEVEL_NUM
+ PG_LEVEL_256T
};
#define PG_LEVEL_SHIFT(_level) ((_level - 1) * 9 + 12)
@@ -1408,10 +1477,52 @@ enum pg_level {
#define PG_SIZE_2M PG_LEVEL_SIZE(PG_LEVEL_2M)
#define PG_SIZE_1G PG_LEVEL_SIZE(PG_LEVEL_1G)
-void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level);
+#define PTE_PRESENT_MASK(mmu) ((mmu)->arch.pte_masks.present)
+#define PTE_WRITABLE_MASK(mmu) ((mmu)->arch.pte_masks.writable)
+#define PTE_USER_MASK(mmu) ((mmu)->arch.pte_masks.user)
+#define PTE_READABLE_MASK(mmu) ((mmu)->arch.pte_masks.readable)
+#define PTE_EXECUTABLE_MASK(mmu) ((mmu)->arch.pte_masks.executable)
+#define PTE_ACCESSED_MASK(mmu) ((mmu)->arch.pte_masks.accessed)
+#define PTE_DIRTY_MASK(mmu) ((mmu)->arch.pte_masks.dirty)
+#define PTE_HUGE_MASK(mmu) ((mmu)->arch.pte_masks.huge)
+#define PTE_NX_MASK(mmu) ((mmu)->arch.pte_masks.nx)
+#define PTE_C_BIT_MASK(mmu) ((mmu)->arch.pte_masks.c)
+#define PTE_S_BIT_MASK(mmu) ((mmu)->arch.pte_masks.s)
+#define PTE_ALWAYS_SET_MASK(mmu) ((mmu)->arch.pte_masks.always_set)
+
+/*
+ * For PTEs without a PRESENT bit (i.e. EPT entries), treat the PTE as present
+ * if it's executable or readable, as EPT supports execute-only PTEs, but not
+ * write-only PTEs.
+ */
+#define is_present_pte(mmu, pte) \
+ (PTE_PRESENT_MASK(mmu) ? \
+ !!(*(pte) & PTE_PRESENT_MASK(mmu)) : \
+ !!(*(pte) & (PTE_READABLE_MASK(mmu) | PTE_EXECUTABLE_MASK(mmu))))
+#define is_executable_pte(mmu, pte) \
+ ((*(pte) & (PTE_EXECUTABLE_MASK(mmu) | PTE_NX_MASK(mmu))) == PTE_EXECUTABLE_MASK(mmu))
+#define is_writable_pte(mmu, pte) (!!(*(pte) & PTE_WRITABLE_MASK(mmu)))
+#define is_user_pte(mmu, pte) (!!(*(pte) & PTE_USER_MASK(mmu)))
+#define is_accessed_pte(mmu, pte) (!!(*(pte) & PTE_ACCESSED_MASK(mmu)))
+#define is_dirty_pte(mmu, pte) (!!(*(pte) & PTE_DIRTY_MASK(mmu)))
+#define is_huge_pte(mmu, pte) (!!(*(pte) & PTE_HUGE_MASK(mmu)))
+#define is_nx_pte(mmu, pte) (!is_executable_pte(mmu, pte))
+
+void tdp_mmu_init(struct kvm_vm *vm, int pgtable_levels,
+ struct pte_masks *pte_masks);
+
+void __virt_pg_map(struct kvm_vm *vm, struct kvm_mmu *mmu, uint64_t vaddr,
+ uint64_t paddr, int level);
void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
uint64_t nr_bytes, int level);
+void vm_enable_tdp(struct kvm_vm *vm);
+bool kvm_cpu_has_tdp(void);
+void tdp_map(struct kvm_vm *vm, uint64_t nested_paddr, uint64_t paddr, uint64_t size);
+void tdp_identity_map_default_memslots(struct kvm_vm *vm);
+void tdp_identity_map_1g(struct kvm_vm *vm, uint64_t addr, uint64_t size);
+uint64_t *tdp_get_pte(struct kvm_vm *vm, uint64_t l2_gpa);
+
/*
* Basic CPU control in CR0
*/
diff --git a/tools/testing/selftests/kvm/include/x86/sev.h b/tools/testing/selftests/kvm/include/x86/sev.h
index 82c11c81a956..008b4169f5e2 100644
--- a/tools/testing/selftests/kvm/include/x86/sev.h
+++ b/tools/testing/selftests/kvm/include/x86/sev.h
@@ -25,19 +25,51 @@ enum sev_guest_state {
#define SEV_POLICY_NO_DBG (1UL << 0)
#define SEV_POLICY_ES (1UL << 2)
+#define SNP_POLICY_SMT (1ULL << 16)
+#define SNP_POLICY_RSVD_MBO (1ULL << 17)
+#define SNP_POLICY_DBG (1ULL << 19)
+
#define GHCB_MSR_TERM_REQ 0x100
+static inline bool is_sev_snp_vm(struct kvm_vm *vm)
+{
+ return vm->type == KVM_X86_SNP_VM;
+}
+
+static inline bool is_sev_es_vm(struct kvm_vm *vm)
+{
+ return is_sev_snp_vm(vm) || vm->type == KVM_X86_SEV_ES_VM;
+}
+
+static inline bool is_sev_vm(struct kvm_vm *vm)
+{
+ return is_sev_es_vm(vm) || vm->type == KVM_X86_SEV_VM;
+}
+
void sev_vm_launch(struct kvm_vm *vm, uint32_t policy);
void sev_vm_launch_measure(struct kvm_vm *vm, uint8_t *measurement);
void sev_vm_launch_finish(struct kvm_vm *vm);
+void snp_vm_launch_start(struct kvm_vm *vm, uint64_t policy);
+void snp_vm_launch_update(struct kvm_vm *vm);
+void snp_vm_launch_finish(struct kvm_vm *vm);
struct kvm_vm *vm_sev_create_with_one_vcpu(uint32_t type, void *guest_code,
struct kvm_vcpu **cpu);
-void vm_sev_launch(struct kvm_vm *vm, uint32_t policy, uint8_t *measurement);
+void vm_sev_launch(struct kvm_vm *vm, uint64_t policy, uint8_t *measurement);
kvm_static_assert(SEV_RET_SUCCESS == 0);
/*
+ * A SEV-SNP VM requires the policy reserved bit to always be set.
+ * The SMT policy bit is also required to be set based on SMT being
+ * available and active on the system.
+ */
+static inline u64 snp_default_policy(void)
+{
+ return SNP_POLICY_RSVD_MBO | (is_smt_on() ? SNP_POLICY_SMT : 0);
+}
+
+/*
* The KVM_MEMORY_ENCRYPT_OP uAPI is utter garbage and takes an "unsigned long"
* instead of a proper struct. The size of the parameter is embedded in the
* ioctl number, i.e. is ABI and thus immutable. Hack around the mess by
@@ -70,6 +102,12 @@ kvm_static_assert(SEV_RET_SUCCESS == 0);
void sev_vm_init(struct kvm_vm *vm);
void sev_es_vm_init(struct kvm_vm *vm);
+void snp_vm_init(struct kvm_vm *vm);
+
+static inline void vmgexit(void)
+{
+ __asm__ __volatile__("rep; vmmcall");
+}
static inline void sev_register_encrypted_memory(struct kvm_vm *vm,
struct userspace_mem_region *region)
@@ -93,4 +131,17 @@ static inline void sev_launch_update_data(struct kvm_vm *vm, vm_paddr_t gpa,
vm_sev_ioctl(vm, KVM_SEV_LAUNCH_UPDATE_DATA, &update_data);
}
+static inline void snp_launch_update_data(struct kvm_vm *vm, vm_paddr_t gpa,
+ uint64_t hva, uint64_t size, uint8_t type)
+{
+ struct kvm_sev_snp_launch_update update_data = {
+ .uaddr = hva,
+ .gfn_start = gpa >> PAGE_SHIFT,
+ .len = size,
+ .type = type,
+ };
+
+ vm_sev_ioctl(vm, KVM_SEV_SNP_LAUNCH_UPDATE, &update_data);
+}
+
#endif /* SELFTEST_KVM_SEV_H */
diff --git a/tools/testing/selftests/kvm/include/x86/smm.h b/tools/testing/selftests/kvm/include/x86/smm.h
new file mode 100644
index 000000000000..19337c34f13e
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/x86/smm.h
@@ -0,0 +1,17 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#ifndef SELFTEST_KVM_SMM_H
+#define SELFTEST_KVM_SMM_H
+
+#include "kvm_util.h"
+
+#define SMRAM_SIZE 65536
+#define SMRAM_MEMSLOT ((1 << 16) | 1)
+#define SMRAM_PAGES (SMRAM_SIZE / PAGE_SIZE)
+
+void setup_smram(struct kvm_vm *vm, struct kvm_vcpu *vcpu,
+ uint64_t smram_gpa,
+ const void *smi_handler, size_t handler_size);
+
+void inject_smi(struct kvm_vcpu *vcpu);
+
+#endif /* SELFTEST_KVM_SMM_H */
diff --git a/tools/testing/selftests/kvm/include/x86/svm.h b/tools/testing/selftests/kvm/include/x86/svm.h
index 29cffd0a9181..c8539166270e 100644
--- a/tools/testing/selftests/kvm/include/x86/svm.h
+++ b/tools/testing/selftests/kvm/include/x86/svm.h
@@ -92,19 +92,18 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
u32 int_vector;
u32 int_state;
u8 reserved_3[4];
- u32 exit_code;
- u32 exit_code_hi;
+ u64 exit_code;
u64 exit_info_1;
u64 exit_info_2;
u32 exit_int_info;
u32 exit_int_info_err;
- u64 nested_ctl;
+ u64 misc_ctl;
u64 avic_vapic_bar;
u8 reserved_4[8];
u32 event_inj;
u32 event_inj_err;
u64 nested_cr3;
- u64 virt_ext;
+ u64 misc_ctl2;
u32 clean;
u32 reserved_5;
u64 next_rip;
@@ -156,9 +155,6 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
#define AVIC_ENABLE_SHIFT 31
#define AVIC_ENABLE_MASK (1 << AVIC_ENABLE_SHIFT)
-#define LBR_CTL_ENABLE_MASK BIT_ULL(0)
-#define VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK BIT_ULL(1)
-
#define SVM_INTERRUPT_SHADOW_MASK 1
#define SVM_IOIO_STR_SHIFT 2
@@ -176,8 +172,11 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
#define SVM_VM_CR_SVM_LOCK_MASK 0x0008ULL
#define SVM_VM_CR_SVM_DIS_MASK 0x0010ULL
-#define SVM_NESTED_CTL_NP_ENABLE BIT(0)
-#define SVM_NESTED_CTL_SEV_ENABLE BIT(1)
+#define SVM_MISC_ENABLE_NP BIT(0)
+#define SVM_MISC_ENABLE_SEV BIT(1)
+
+#define SVM_MISC2_ENABLE_V_LBR BIT_ULL(0)
+#define SVM_MISC2_ENABLE_V_VMLOAD_VMSAVE BIT_ULL(1)
struct __attribute__ ((__packed__)) vmcb_seg {
u16 selector;
diff --git a/tools/testing/selftests/kvm/include/x86/svm_util.h b/tools/testing/selftests/kvm/include/x86/svm_util.h
index b74c6dcddcbd..5d7c42534bc4 100644
--- a/tools/testing/selftests/kvm/include/x86/svm_util.h
+++ b/tools/testing/selftests/kvm/include/x86/svm_util.h
@@ -27,6 +27,9 @@ struct svm_test_data {
void *msr; /* gva */
void *msr_hva;
uint64_t msr_gpa;
+
+ /* NPT */
+ uint64_t ncr3_gpa;
};
static inline void vmmcall(void)
@@ -57,6 +60,12 @@ struct svm_test_data *vcpu_alloc_svm(struct kvm_vm *vm, vm_vaddr_t *p_svm_gva);
void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_rsp);
void run_guest(struct vmcb *vmcb, uint64_t vmcb_gpa);
+static inline bool kvm_cpu_has_npt(void)
+{
+ return kvm_cpu_has(X86_FEATURE_NPT);
+}
+void vm_enable_npt(struct kvm_vm *vm);
+
int open_sev_dev_path_or_exit(void);
#endif /* SELFTEST_KVM_SVM_UTILS_H */
diff --git a/tools/testing/selftests/kvm/include/x86/vmx.h b/tools/testing/selftests/kvm/include/x86/vmx.h
index edb3c391b982..92b918700d24 100644
--- a/tools/testing/selftests/kvm/include/x86/vmx.h
+++ b/tools/testing/selftests/kvm/include/x86/vmx.h
@@ -520,13 +520,11 @@ struct vmx_pages {
uint64_t vmwrite_gpa;
void *vmwrite;
- void *eptp_hva;
- uint64_t eptp_gpa;
- void *eptp;
-
void *apic_access_hva;
uint64_t apic_access_gpa;
void *apic_access;
+
+ uint64_t eptp_gpa;
};
union vmx_basic {
@@ -559,17 +557,8 @@ bool load_vmcs(struct vmx_pages *vmx);
bool ept_1g_pages_supported(void);
-void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint64_t nested_paddr, uint64_t paddr);
-void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint64_t nested_paddr, uint64_t paddr, uint64_t size);
-void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint32_t memslot);
-void nested_identity_map_1g(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint64_t addr, uint64_t size);
bool kvm_cpu_has_ept(void);
-void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint32_t eptp_memslot);
+void vm_enable_ept(struct kvm_vm *vm);
void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm);
#endif /* SELFTEST_KVM_VMX_H */
diff --git a/tools/testing/selftests/kvm/irqfd_test.c b/tools/testing/selftests/kvm/irqfd_test.c
new file mode 100644
index 000000000000..5d7590d01868
--- /dev/null
+++ b/tools/testing/selftests/kvm/irqfd_test.c
@@ -0,0 +1,143 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <errno.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <signal.h>
+#include <stdint.h>
+#include <sys/sysinfo.h>
+
+#include "kvm_util.h"
+
+static struct kvm_vm *vm1;
+static struct kvm_vm *vm2;
+static int __eventfd;
+static bool done;
+
+/*
+ * KVM de-assigns based on eventfd *and* GSI, but requires unique eventfds when
+ * assigning (the API isn't symmetrical). Abuse the oddity and use a per-task
+ * GSI base to avoid false failures due to cross-task de-assign, i.e. so that
+ * the secondary doesn't de-assign the primary's eventfd and cause assign to
+ * unexpectedly succeed on the primary.
+ */
+#define GSI_BASE_PRIMARY 0x20
+#define GSI_BASE_SECONDARY 0x30
+
+static void juggle_eventfd_secondary(struct kvm_vm *vm, int eventfd)
+{
+ int r, i;
+
+ /*
+ * The secondary task can encounter EBADF since the primary can close
+ * the eventfd at any time. And because the primary can recreate the
+ * eventfd, at the safe fd in the file table, the secondary can also
+ * encounter "unexpected" success, e.g. if the close+recreate happens
+ * between the first and second assignments. The secondary's role is
+ * mostly to antagonize KVM, not to detect bugs.
+ */
+ for (i = 0; i < 2; i++) {
+ r = __kvm_irqfd(vm, GSI_BASE_SECONDARY, eventfd, 0);
+ TEST_ASSERT(!r || errno == EBUSY || errno == EBADF,
+ "Wanted success, EBUSY, or EBADF, r = %d, errno = %d",
+ r, errno);
+
+ /* De-assign should succeed unless the eventfd was closed. */
+ r = __kvm_irqfd(vm, GSI_BASE_SECONDARY + i, eventfd, KVM_IRQFD_FLAG_DEASSIGN);
+ TEST_ASSERT(!r || errno == EBADF,
+ "De-assign should succeed unless the fd was closed");
+ }
+}
+
+static void *secondary_irqfd_juggler(void *ign)
+{
+ while (!READ_ONCE(done)) {
+ juggle_eventfd_secondary(vm1, READ_ONCE(__eventfd));
+ juggle_eventfd_secondary(vm2, READ_ONCE(__eventfd));
+ }
+
+ return NULL;
+}
+
+static void juggle_eventfd_primary(struct kvm_vm *vm, int eventfd)
+{
+ int r1, r2;
+
+ /*
+ * At least one of the assigns should fail. KVM disallows assigning a
+ * single eventfd to multiple GSIs (or VMs), so it's possible that both
+ * assignments can fail, too.
+ */
+ r1 = __kvm_irqfd(vm, GSI_BASE_PRIMARY, eventfd, 0);
+ TEST_ASSERT(!r1 || errno == EBUSY,
+ "Wanted success or EBUSY, r = %d, errno = %d", r1, errno);
+
+ r2 = __kvm_irqfd(vm, GSI_BASE_PRIMARY + 1, eventfd, 0);
+ TEST_ASSERT(r1 || (r2 && errno == EBUSY),
+ "Wanted failure (EBUSY), r1 = %d, r2 = %d, errno = %d",
+ r1, r2, errno);
+
+ /*
+ * De-assign should always succeed, even if the corresponding assign
+ * failed.
+ */
+ kvm_irqfd(vm, GSI_BASE_PRIMARY, eventfd, KVM_IRQFD_FLAG_DEASSIGN);
+ kvm_irqfd(vm, GSI_BASE_PRIMARY + 1, eventfd, KVM_IRQFD_FLAG_DEASSIGN);
+}
+
+int main(int argc, char *argv[])
+{
+ pthread_t racing_thread;
+ struct kvm_vcpu *unused;
+ int r, i;
+
+ TEST_REQUIRE(kvm_arch_has_default_irqchip());
+
+ /*
+ * Create "full" VMs, as KVM_IRQFD requires an in-kernel IRQ chip. Also
+ * create an unused vCPU as certain architectures (like arm64) need to
+ * complete IRQ chip initialization after all possible vCPUs for a VM
+ * have been created.
+ */
+ vm1 = vm_create_with_one_vcpu(&unused, NULL);
+ vm2 = vm_create_with_one_vcpu(&unused, NULL);
+
+ WRITE_ONCE(__eventfd, kvm_new_eventfd());
+
+ kvm_irqfd(vm1, 10, __eventfd, 0);
+
+ r = __kvm_irqfd(vm1, 11, __eventfd, 0);
+ TEST_ASSERT(r && errno == EBUSY,
+ "Wanted EBUSY, r = %d, errno = %d", r, errno);
+
+ r = __kvm_irqfd(vm2, 12, __eventfd, 0);
+ TEST_ASSERT(r && errno == EBUSY,
+ "Wanted EBUSY, r = %d, errno = %d", r, errno);
+
+ /*
+ * De-assign all eventfds, along with multiple eventfds that were never
+ * assigned. KVM's ABI is that de-assign is allowed so long as the
+ * eventfd itself is valid.
+ */
+ kvm_irqfd(vm1, 11, READ_ONCE(__eventfd), KVM_IRQFD_FLAG_DEASSIGN);
+ kvm_irqfd(vm1, 12, READ_ONCE(__eventfd), KVM_IRQFD_FLAG_DEASSIGN);
+ kvm_irqfd(vm1, 13, READ_ONCE(__eventfd), KVM_IRQFD_FLAG_DEASSIGN);
+ kvm_irqfd(vm1, 14, READ_ONCE(__eventfd), KVM_IRQFD_FLAG_DEASSIGN);
+ kvm_irqfd(vm1, 10, READ_ONCE(__eventfd), KVM_IRQFD_FLAG_DEASSIGN);
+
+ close(__eventfd);
+
+ pthread_create(&racing_thread, NULL, secondary_irqfd_juggler, vm2);
+
+ for (i = 0; i < 10000; i++) {
+ WRITE_ONCE(__eventfd, kvm_new_eventfd());
+
+ juggle_eventfd_primary(vm1, __eventfd);
+ juggle_eventfd_primary(vm2, __eventfd);
+ close(__eventfd);
+ }
+
+ WRITE_ONCE(done, true);
+ pthread_join(racing_thread, NULL);
+}
diff --git a/tools/testing/selftests/kvm/kvm_binary_stats_test.c b/tools/testing/selftests/kvm/kvm_binary_stats_test.c
index f02355c3c4c2..b7dbde9c0843 100644
--- a/tools/testing/selftests/kvm/kvm_binary_stats_test.c
+++ b/tools/testing/selftests/kvm/kvm_binary_stats_test.c
@@ -239,14 +239,14 @@ int main(int argc, char *argv[])
* single stats file works and doesn't cause explosions.
*/
vm_stats_fds = vm_get_stats_fd(vms[i]);
- stats_test(dup(vm_stats_fds));
+ stats_test(kvm_dup(vm_stats_fds));
/* Verify userspace can instantiate multiple stats files. */
stats_test(vm_get_stats_fd(vms[i]));
for (j = 0; j < max_vcpu; ++j) {
vcpu_stats_fds[j] = vcpu_get_stats_fd(vcpus[i * max_vcpu + j]);
- stats_test(dup(vcpu_stats_fds[j]));
+ stats_test(kvm_dup(vcpu_stats_fds[j]));
stats_test(vcpu_get_stats_fd(vcpus[i * max_vcpu + j]));
}
diff --git a/tools/testing/selftests/kvm/kvm_page_table_test.c b/tools/testing/selftests/kvm/kvm_page_table_test.c
index dd8b12f626d3..c60a24a92829 100644
--- a/tools/testing/selftests/kvm/kvm_page_table_test.c
+++ b/tools/testing/selftests/kvm/kvm_page_table_test.c
@@ -261,9 +261,6 @@ static struct kvm_vm *pre_init_before_test(enum vm_guest_mode mode, void *arg)
guest_page_size;
else
guest_test_phys_mem = p->phys_offset;
-#ifdef __s390x__
- alignment = max(0x100000UL, alignment);
-#endif
guest_test_phys_mem = align_down(guest_test_phys_mem, alignment);
/* Set up the shared data structure test_args */
diff --git a/tools/testing/selftests/kvm/lib/arm64/gic.c b/tools/testing/selftests/kvm/lib/arm64/gic.c
index 7abbf8866512..b023868fe0b8 100644
--- a/tools/testing/selftests/kvm/lib/arm64/gic.c
+++ b/tools/testing/selftests/kvm/lib/arm64/gic.c
@@ -155,3 +155,9 @@ void gic_irq_set_config(unsigned int intid, bool is_edge)
GUEST_ASSERT(gic_common_ops);
gic_common_ops->gic_irq_set_config(intid, is_edge);
}
+
+void gic_irq_set_group(unsigned int intid, bool group)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_irq_set_group(intid, group);
+}
diff --git a/tools/testing/selftests/kvm/lib/arm64/gic_private.h b/tools/testing/selftests/kvm/lib/arm64/gic_private.h
index d24e9ecc96c6..b6a7e30c3eb1 100644
--- a/tools/testing/selftests/kvm/lib/arm64/gic_private.h
+++ b/tools/testing/selftests/kvm/lib/arm64/gic_private.h
@@ -25,6 +25,7 @@ struct gic_common_ops {
void (*gic_irq_clear_pending)(uint32_t intid);
bool (*gic_irq_get_pending)(uint32_t intid);
void (*gic_irq_set_config)(uint32_t intid, bool is_edge);
+ void (*gic_irq_set_group)(uint32_t intid, bool group);
};
extern const struct gic_common_ops gicv3_ops;
diff --git a/tools/testing/selftests/kvm/lib/arm64/gic_v3.c b/tools/testing/selftests/kvm/lib/arm64/gic_v3.c
index 66d05506f78b..50754a27f493 100644
--- a/tools/testing/selftests/kvm/lib/arm64/gic_v3.c
+++ b/tools/testing/selftests/kvm/lib/arm64/gic_v3.c
@@ -293,17 +293,36 @@ static void gicv3_enable_redist(volatile void *redist_base)
}
}
+static void gicv3_set_group(uint32_t intid, bool grp)
+{
+ uint32_t cpu_or_dist;
+ uint32_t val;
+
+ cpu_or_dist = (get_intid_range(intid) == SPI_RANGE) ? DIST_BIT : guest_get_vcpuid();
+ val = gicv3_reg_readl(cpu_or_dist, GICD_IGROUPR + (intid / 32) * 4);
+ if (grp)
+ val |= BIT(intid % 32);
+ else
+ val &= ~BIT(intid % 32);
+ gicv3_reg_writel(cpu_or_dist, GICD_IGROUPR + (intid / 32) * 4, val);
+}
+
static void gicv3_cpu_init(unsigned int cpu)
{
volatile void *sgi_base;
unsigned int i;
volatile void *redist_base_cpu;
+ u64 typer;
GUEST_ASSERT(cpu < gicv3_data.nr_cpus);
redist_base_cpu = gicr_base_cpu(cpu);
sgi_base = sgi_base_from_redist(redist_base_cpu);
+ /* Verify assumption that GICR_TYPER.Processor_number == cpu */
+ typer = readq_relaxed(redist_base_cpu + GICR_TYPER);
+ GUEST_ASSERT_EQ(GICR_TYPER_CPU_NUMBER(typer), cpu);
+
gicv3_enable_redist(redist_base_cpu);
/*
@@ -328,6 +347,8 @@ static void gicv3_cpu_init(unsigned int cpu)
/* Set a default priority threshold */
write_sysreg_s(ICC_PMR_DEF_PRIO, SYS_ICC_PMR_EL1);
+ /* Disable Group-0 interrupts */
+ write_sysreg_s(ICC_IGRPEN0_EL1_MASK, SYS_ICC_IGRPEN1_EL1);
/* Enable non-secure Group-1 interrupts */
write_sysreg_s(ICC_IGRPEN1_EL1_MASK, SYS_ICC_IGRPEN1_EL1);
}
@@ -400,6 +421,7 @@ const struct gic_common_ops gicv3_ops = {
.gic_irq_clear_pending = gicv3_irq_clear_pending,
.gic_irq_get_pending = gicv3_irq_get_pending,
.gic_irq_set_config = gicv3_irq_set_config,
+ .gic_irq_set_group = gicv3_set_group,
};
void gic_rdist_enable_lpis(vm_paddr_t cfg_table, size_t cfg_table_size,
diff --git a/tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c b/tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c
index 09f270545646..7f9fdcf42ae6 100644
--- a/tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c
+++ b/tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c
@@ -15,6 +15,8 @@
#include "gic_v3.h"
#include "processor.h"
+#define GITS_COLLECTION_TARGET_SHIFT 16
+
static u64 its_read_u64(unsigned long offset)
{
return readq_relaxed(GITS_BASE_GVA + offset);
@@ -163,6 +165,11 @@ static void its_encode_collection(struct its_cmd_block *cmd, u16 col)
its_mask_encode(&cmd->raw_cmd[2], col, 15, 0);
}
+static u64 procnum_to_rdbase(u32 vcpu_id)
+{
+ return vcpu_id << GITS_COLLECTION_TARGET_SHIFT;
+}
+
#define GITS_CMDQ_POLL_ITERATIONS 0
static void its_send_cmd(void *cmdq_base, struct its_cmd_block *cmd)
@@ -217,7 +224,7 @@ void its_send_mapc_cmd(void *cmdq_base, u32 vcpu_id, u32 collection_id, bool val
its_encode_cmd(&cmd, GITS_CMD_MAPC);
its_encode_collection(&cmd, collection_id);
- its_encode_target(&cmd, vcpu_id);
+ its_encode_target(&cmd, procnum_to_rdbase(vcpu_id));
its_encode_valid(&cmd, valid);
its_send_cmd(cmdq_base, &cmd);
@@ -246,3 +253,13 @@ void its_send_invall_cmd(void *cmdq_base, u32 collection_id)
its_send_cmd(cmdq_base, &cmd);
}
+
+void its_send_sync_cmd(void *cmdq_base, u32 vcpu_id)
+{
+ struct its_cmd_block cmd = {};
+
+ its_encode_cmd(&cmd, GITS_CMD_SYNC);
+ its_encode_target(&cmd, procnum_to_rdbase(vcpu_id));
+
+ its_send_cmd(cmdq_base, &cmd);
+}
diff --git a/tools/testing/selftests/kvm/lib/arm64/processor.c b/tools/testing/selftests/kvm/lib/arm64/processor.c
index 7ba3aa3755f3..43ea40edc533 100644
--- a/tools/testing/selftests/kvm/lib/arm64/processor.c
+++ b/tools/testing/selftests/kvm/lib/arm64/processor.c
@@ -12,6 +12,7 @@
#include "kvm_util.h"
#include "processor.h"
#include "ucall_common.h"
+#include "vgic.h"
#include <linux/bitfield.h>
#include <linux/sizes.h>
@@ -20,14 +21,9 @@
static vm_vaddr_t exception_handlers;
-static uint64_t page_align(struct kvm_vm *vm, uint64_t v)
-{
- return (v + vm->page_size) & ~(vm->page_size - 1);
-}
-
static uint64_t pgd_index(struct kvm_vm *vm, vm_vaddr_t gva)
{
- unsigned int shift = (vm->pgtable_levels - 1) * (vm->page_shift - 3) + vm->page_shift;
+ unsigned int shift = (vm->mmu.pgtable_levels - 1) * (vm->page_shift - 3) + vm->page_shift;
uint64_t mask = (1UL << (vm->va_bits - shift)) - 1;
return (gva >> shift) & mask;
@@ -38,7 +34,7 @@ static uint64_t pud_index(struct kvm_vm *vm, vm_vaddr_t gva)
unsigned int shift = 2 * (vm->page_shift - 3) + vm->page_shift;
uint64_t mask = (1UL << (vm->page_shift - 3)) - 1;
- TEST_ASSERT(vm->pgtable_levels == 4,
+ TEST_ASSERT(vm->mmu.pgtable_levels == 4,
"Mode %d does not have 4 page table levels", vm->mode);
return (gva >> shift) & mask;
@@ -49,7 +45,7 @@ static uint64_t pmd_index(struct kvm_vm *vm, vm_vaddr_t gva)
unsigned int shift = (vm->page_shift - 3) + vm->page_shift;
uint64_t mask = (1UL << (vm->page_shift - 3)) - 1;
- TEST_ASSERT(vm->pgtable_levels >= 3,
+ TEST_ASSERT(vm->mmu.pgtable_levels >= 3,
"Mode %d does not have >= 3 page table levels", vm->mode);
return (gva >> shift) & mask;
@@ -72,13 +68,13 @@ static uint64_t addr_pte(struct kvm_vm *vm, uint64_t pa, uint64_t attrs)
uint64_t pte;
if (use_lpa2_pte_format(vm)) {
- pte = pa & GENMASK(49, vm->page_shift);
- pte |= FIELD_GET(GENMASK(51, 50), pa) << 8;
- attrs &= ~GENMASK(9, 8);
+ pte = pa & PTE_ADDR_MASK_LPA2(vm->page_shift);
+ pte |= FIELD_GET(GENMASK(51, 50), pa) << PTE_ADDR_51_50_LPA2_SHIFT;
+ attrs &= ~PTE_ADDR_51_50_LPA2;
} else {
- pte = pa & GENMASK(47, vm->page_shift);
+ pte = pa & PTE_ADDR_MASK(vm->page_shift);
if (vm->page_shift == 16)
- pte |= FIELD_GET(GENMASK(51, 48), pa) << 12;
+ pte |= FIELD_GET(GENMASK(51, 48), pa) << PTE_ADDR_51_48_SHIFT;
}
pte |= attrs;
@@ -90,12 +86,12 @@ static uint64_t pte_addr(struct kvm_vm *vm, uint64_t pte)
uint64_t pa;
if (use_lpa2_pte_format(vm)) {
- pa = pte & GENMASK(49, vm->page_shift);
- pa |= FIELD_GET(GENMASK(9, 8), pte) << 50;
+ pa = pte & PTE_ADDR_MASK_LPA2(vm->page_shift);
+ pa |= FIELD_GET(PTE_ADDR_51_50_LPA2, pte) << 50;
} else {
- pa = pte & GENMASK(47, vm->page_shift);
+ pa = pte & PTE_ADDR_MASK(vm->page_shift);
if (vm->page_shift == 16)
- pa |= FIELD_GET(GENMASK(15, 12), pte) << 48;
+ pa |= FIELD_GET(PTE_ADDR_51_48, pte) << 48;
}
return pa;
@@ -103,7 +99,7 @@ static uint64_t pte_addr(struct kvm_vm *vm, uint64_t pte)
static uint64_t ptrs_per_pgd(struct kvm_vm *vm)
{
- unsigned int shift = (vm->pgtable_levels - 1) * (vm->page_shift - 3) + vm->page_shift;
+ unsigned int shift = (vm->mmu.pgtable_levels - 1) * (vm->page_shift - 3) + vm->page_shift;
return 1 << (vm->va_bits - shift);
}
@@ -114,21 +110,22 @@ static uint64_t __maybe_unused ptrs_per_pte(struct kvm_vm *vm)
void virt_arch_pgd_alloc(struct kvm_vm *vm)
{
- size_t nr_pages = page_align(vm, ptrs_per_pgd(vm) * 8) / vm->page_size;
+ size_t nr_pages = vm_page_align(vm, ptrs_per_pgd(vm) * 8) / vm->page_size;
- if (vm->pgd_created)
+ if (vm->mmu.pgd_created)
return;
- vm->pgd = vm_phy_pages_alloc(vm, nr_pages,
- KVM_GUEST_PAGE_TABLE_MIN_PADDR,
- vm->memslots[MEM_REGION_PT]);
- vm->pgd_created = true;
+ vm->mmu.pgd = vm_phy_pages_alloc(vm, nr_pages,
+ KVM_GUEST_PAGE_TABLE_MIN_PADDR,
+ vm->memslots[MEM_REGION_PT]);
+ vm->mmu.pgd_created = true;
}
static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
uint64_t flags)
{
- uint8_t attr_idx = flags & 7;
+ uint8_t attr_idx = flags & (PTE_ATTRINDX_MASK >> PTE_ATTRINDX_SHIFT);
+ uint64_t pg_attr;
uint64_t *ptep;
TEST_ASSERT((vaddr % vm->page_size) == 0,
@@ -145,20 +142,23 @@ static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
" paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
paddr, vm->max_gfn, vm->page_size);
- ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, vaddr) * 8;
+ ptep = addr_gpa2hva(vm, vm->mmu.pgd) + pgd_index(vm, vaddr) * 8;
if (!*ptep)
- *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3);
+ *ptep = addr_pte(vm, vm_alloc_page_table(vm),
+ PGD_TYPE_TABLE | PTE_VALID);
- switch (vm->pgtable_levels) {
+ switch (vm->mmu.pgtable_levels) {
case 4:
ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, vaddr) * 8;
if (!*ptep)
- *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3);
+ *ptep = addr_pte(vm, vm_alloc_page_table(vm),
+ PUD_TYPE_TABLE | PTE_VALID);
/* fall through */
case 3:
ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, vaddr) * 8;
if (!*ptep)
- *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3);
+ *ptep = addr_pte(vm, vm_alloc_page_table(vm),
+ PMD_TYPE_TABLE | PTE_VALID);
/* fall through */
case 2:
ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, vaddr) * 8;
@@ -167,7 +167,11 @@ static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
TEST_FAIL("Page table levels must be 2, 3, or 4");
}
- *ptep = addr_pte(vm, paddr, (attr_idx << 2) | (1 << 10) | 3); /* AF */
+ pg_attr = PTE_AF | PTE_ATTRINDX(attr_idx) | PTE_TYPE_PAGE | PTE_VALID;
+ if (!use_lpa2_pte_format(vm))
+ pg_attr |= PTE_SHARED;
+
+ *ptep = addr_pte(vm, paddr, pg_attr);
}
void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
@@ -177,27 +181,33 @@ void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
_virt_pg_map(vm, vaddr, paddr, attr_idx);
}
-uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva)
+uint64_t *virt_get_pte_hva_at_level(struct kvm_vm *vm, vm_vaddr_t gva, int level)
{
uint64_t *ptep;
- if (!vm->pgd_created)
+ if (!vm->mmu.pgd_created)
goto unmapped_gva;
- ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, gva) * 8;
+ ptep = addr_gpa2hva(vm, vm->mmu.pgd) + pgd_index(vm, gva) * 8;
if (!ptep)
goto unmapped_gva;
+ if (level == 0)
+ return ptep;
- switch (vm->pgtable_levels) {
+ switch (vm->mmu.pgtable_levels) {
case 4:
ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, gva) * 8;
if (!ptep)
goto unmapped_gva;
+ if (level == 1)
+ break;
/* fall through */
case 3:
ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, gva) * 8;
if (!ptep)
goto unmapped_gva;
+ if (level == 2)
+ break;
/* fall through */
case 2:
ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, gva) * 8;
@@ -215,6 +225,11 @@ unmapped_gva:
exit(EXIT_FAILURE);
}
+uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+ return virt_get_pte_hva_at_level(vm, gva, 3);
+}
+
vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
{
uint64_t *ptep = virt_get_pte_hva(vm, gva);
@@ -243,13 +258,13 @@ static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, uint64_t p
void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
{
- int level = 4 - (vm->pgtable_levels - 1);
+ int level = 4 - (vm->mmu.pgtable_levels - 1);
uint64_t pgd, *ptep;
- if (!vm->pgd_created)
+ if (!vm->mmu.pgd_created)
return;
- for (pgd = vm->pgd; pgd < vm->pgd + ptrs_per_pgd(vm) * 8; pgd += 8) {
+ for (pgd = vm->mmu.pgd; pgd < vm->mmu.pgd + ptrs_per_pgd(vm) * 8; pgd += 8) {
ptep = addr_gpa2hva(vm, pgd);
if (!*ptep)
continue;
@@ -258,102 +273,128 @@ void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
}
}
+bool vm_supports_el2(struct kvm_vm *vm)
+{
+ const char *value = getenv("NV");
+
+ if (value && *value == '0')
+ return false;
+
+ return vm_check_cap(vm, KVM_CAP_ARM_EL2) && vm->arch.has_gic;
+}
+
+void kvm_get_default_vcpu_target(struct kvm_vm *vm, struct kvm_vcpu_init *init)
+{
+ struct kvm_vcpu_init preferred = {};
+
+ vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &preferred);
+ if (vm_supports_el2(vm))
+ preferred.features[0] |= BIT(KVM_ARM_VCPU_HAS_EL2);
+
+ *init = preferred;
+}
+
void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init)
{
struct kvm_vcpu_init default_init = { .target = -1, };
struct kvm_vm *vm = vcpu->vm;
uint64_t sctlr_el1, tcr_el1, ttbr0_el1;
- if (!init)
+ if (!init) {
+ kvm_get_default_vcpu_target(vm, &default_init);
init = &default_init;
-
- if (init->target == -1) {
- struct kvm_vcpu_init preferred;
- vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &preferred);
- init->target = preferred.target;
}
vcpu_ioctl(vcpu, KVM_ARM_VCPU_INIT, init);
+ vcpu->init = *init;
/*
* Enable FP/ASIMD to avoid trapping when accessing Q0-Q15
* registers, which the variable argument list macros do.
*/
- vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CPACR_EL1), 3 << 20);
+ vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_CPACR_EL1), 3 << 20);
- sctlr_el1 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1));
- tcr_el1 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TCR_EL1));
+ sctlr_el1 = vcpu_get_reg(vcpu, ctxt_reg_alias(vcpu, SYS_SCTLR_EL1));
+ tcr_el1 = vcpu_get_reg(vcpu, ctxt_reg_alias(vcpu, SYS_TCR_EL1));
/* Configure base granule size */
switch (vm->mode) {
- case VM_MODE_PXXV48_4K:
+ case VM_MODE_PXXVYY_4K:
TEST_FAIL("AArch64 does not support 4K sized pages "
"with ANY-bit physical address ranges");
case VM_MODE_P52V48_64K:
case VM_MODE_P48V48_64K:
case VM_MODE_P40V48_64K:
case VM_MODE_P36V48_64K:
- tcr_el1 |= 1ul << 14; /* TG0 = 64KB */
+ tcr_el1 |= TCR_TG0_64K;
break;
case VM_MODE_P52V48_16K:
case VM_MODE_P48V48_16K:
case VM_MODE_P40V48_16K:
case VM_MODE_P36V48_16K:
case VM_MODE_P36V47_16K:
- tcr_el1 |= 2ul << 14; /* TG0 = 16KB */
+ tcr_el1 |= TCR_TG0_16K;
break;
case VM_MODE_P52V48_4K:
case VM_MODE_P48V48_4K:
case VM_MODE_P40V48_4K:
case VM_MODE_P36V48_4K:
- tcr_el1 |= 0ul << 14; /* TG0 = 4KB */
+ tcr_el1 |= TCR_TG0_4K;
break;
default:
TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode);
}
- ttbr0_el1 = vm->pgd & GENMASK(47, vm->page_shift);
+ ttbr0_el1 = vm->mmu.pgd & GENMASK(47, vm->page_shift);
/* Configure output size */
switch (vm->mode) {
case VM_MODE_P52V48_4K:
case VM_MODE_P52V48_16K:
case VM_MODE_P52V48_64K:
- tcr_el1 |= 6ul << 32; /* IPS = 52 bits */
- ttbr0_el1 |= FIELD_GET(GENMASK(51, 48), vm->pgd) << 2;
+ tcr_el1 |= TCR_IPS_52_BITS;
+ ttbr0_el1 |= FIELD_GET(GENMASK(51, 48), vm->mmu.pgd) << 2;
break;
case VM_MODE_P48V48_4K:
case VM_MODE_P48V48_16K:
case VM_MODE_P48V48_64K:
- tcr_el1 |= 5ul << 32; /* IPS = 48 bits */
+ tcr_el1 |= TCR_IPS_48_BITS;
break;
case VM_MODE_P40V48_4K:
case VM_MODE_P40V48_16K:
case VM_MODE_P40V48_64K:
- tcr_el1 |= 2ul << 32; /* IPS = 40 bits */
+ tcr_el1 |= TCR_IPS_40_BITS;
break;
case VM_MODE_P36V48_4K:
case VM_MODE_P36V48_16K:
case VM_MODE_P36V48_64K:
case VM_MODE_P36V47_16K:
- tcr_el1 |= 1ul << 32; /* IPS = 36 bits */
+ tcr_el1 |= TCR_IPS_36_BITS;
break;
default:
TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode);
}
- sctlr_el1 |= (1 << 0) | (1 << 2) | (1 << 12) /* M | C | I */;
- /* TCR_EL1 |= IRGN0:WBWA | ORGN0:WBWA | SH0:Inner-Shareable */;
- tcr_el1 |= (1 << 8) | (1 << 10) | (3 << 12);
- tcr_el1 |= (64 - vm->va_bits) /* T0SZ */;
+ sctlr_el1 |= SCTLR_ELx_M | SCTLR_ELx_C | SCTLR_ELx_I;
+
+ tcr_el1 |= TCR_IRGN0_WBWA | TCR_ORGN0_WBWA | TCR_SH0_INNER;
+ tcr_el1 |= TCR_T0SZ(vm->va_bits);
+ tcr_el1 |= TCR_TBI1;
+ tcr_el1 |= TCR_EPD1_MASK;
if (use_lpa2_pte_format(vm))
- tcr_el1 |= (1ul << 59) /* DS */;
+ tcr_el1 |= TCR_DS;
- vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1), sctlr_el1);
- vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TCR_EL1), tcr_el1);
- vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_MAIR_EL1), DEFAULT_MAIR_EL1);
- vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TTBR0_EL1), ttbr0_el1);
+ vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_SCTLR_EL1), sctlr_el1);
+ vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_TCR_EL1), tcr_el1);
+ vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_MAIR_EL1), DEFAULT_MAIR_EL1);
+ vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_TTBR0_EL1), ttbr0_el1);
vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TPIDR_EL1), vcpu->id);
+
+ if (!vcpu_has_el2(vcpu))
+ return;
+
+ vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_HCR_EL2),
+ HCR_EL2_RW | HCR_EL2_TGE | HCR_EL2_E2H);
}
void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent)
@@ -387,7 +428,7 @@ static struct kvm_vcpu *__aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
aarch64_vcpu_setup(vcpu, init);
- vcpu_set_reg(vcpu, ARM64_CORE_REG(sp_el1), stack_vaddr + stack_size);
+ vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_SP_EL1), stack_vaddr + stack_size);
return vcpu;
}
@@ -457,7 +498,7 @@ void vcpu_init_descriptor_tables(struct kvm_vcpu *vcpu)
{
extern char vectors;
- vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_VBAR_EL1), (uint64_t)&vectors);
+ vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_VBAR_EL1), (uint64_t)&vectors);
}
void route_exception(struct ex_regs *regs, int vector)
@@ -565,15 +606,15 @@ void aarch64_get_supported_page_sizes(uint32_t ipa, uint32_t *ipa4k,
err = ioctl(vcpu_fd, KVM_GET_ONE_REG, &reg);
TEST_ASSERT(err == 0, KVM_IOCTL_ERROR(KVM_GET_ONE_REG, vcpu_fd));
- gran = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN4), val);
+ gran = FIELD_GET(ID_AA64MMFR0_EL1_TGRAN4, val);
*ipa4k = max_ipa_for_page_size(ipa, gran, ID_AA64MMFR0_EL1_TGRAN4_NI,
ID_AA64MMFR0_EL1_TGRAN4_52_BIT);
- gran = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN64), val);
+ gran = FIELD_GET(ID_AA64MMFR0_EL1_TGRAN64, val);
*ipa64k = max_ipa_for_page_size(ipa, gran, ID_AA64MMFR0_EL1_TGRAN64_NI,
ID_AA64MMFR0_EL1_TGRAN64_IMP);
- gran = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN16), val);
+ gran = FIELD_GET(ID_AA64MMFR0_EL1_TGRAN16, val);
*ipa16k = max_ipa_for_page_size(ipa, gran, ID_AA64MMFR0_EL1_TGRAN16_NI,
ID_AA64MMFR0_EL1_TGRAN16_52_BIT);
@@ -645,3 +686,44 @@ void wfi(void)
{
asm volatile("wfi");
}
+
+static bool request_mte;
+static bool request_vgic = true;
+
+void test_wants_mte(void)
+{
+ request_mte = true;
+}
+
+void test_disable_default_vgic(void)
+{
+ request_vgic = false;
+}
+
+void kvm_arch_vm_post_create(struct kvm_vm *vm, unsigned int nr_vcpus)
+{
+ if (request_mte && vm_check_cap(vm, KVM_CAP_ARM_MTE))
+ vm_enable_cap(vm, KVM_CAP_ARM_MTE, 0);
+
+ if (request_vgic && kvm_supports_vgic_v3()) {
+ vm->arch.gic_fd = __vgic_v3_setup(vm, nr_vcpus, 64);
+ vm->arch.has_gic = true;
+ }
+}
+
+void kvm_arch_vm_finalize_vcpus(struct kvm_vm *vm)
+{
+ if (vm->arch.has_gic)
+ __vgic_v3_init(vm->arch.gic_fd);
+}
+
+void kvm_arch_vm_release(struct kvm_vm *vm)
+{
+ if (vm->arch.has_gic)
+ close(vm->arch.gic_fd);
+}
+
+bool kvm_arch_has_default_irqchip(void)
+{
+ return request_vgic && kvm_supports_vgic_v3();
+}
diff --git a/tools/testing/selftests/kvm/lib/arm64/vgic.c b/tools/testing/selftests/kvm/lib/arm64/vgic.c
index 4427f43f73ea..d0f7bd0984b8 100644
--- a/tools/testing/selftests/kvm/lib/arm64/vgic.c
+++ b/tools/testing/selftests/kvm/lib/arm64/vgic.c
@@ -15,6 +15,17 @@
#include "gic.h"
#include "gic_v3.h"
+bool kvm_supports_vgic_v3(void)
+{
+ struct kvm_vm *vm = vm_create_barebones();
+ int r;
+
+ r = __kvm_test_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_V3);
+ kvm_vm_free(vm);
+
+ return !r;
+}
+
/*
* vGIC-v3 default host setup
*
@@ -30,24 +41,11 @@
* redistributor regions of the guest. Since it depends on the number of
* vCPUs for the VM, it must be called after all the vCPUs have been created.
*/
-int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs)
+int __vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs)
{
int gic_fd;
uint64_t attr;
- struct list_head *iter;
- unsigned int nr_gic_pages, nr_vcpus_created = 0;
-
- TEST_ASSERT(nr_vcpus, "Number of vCPUs cannot be empty");
-
- /*
- * Make sure that the caller is infact calling this
- * function after all the vCPUs are added.
- */
- list_for_each(iter, &vm->vcpus)
- nr_vcpus_created++;
- TEST_ASSERT(nr_vcpus == nr_vcpus_created,
- "Number of vCPUs requested (%u) doesn't match with the ones created for the VM (%u)",
- nr_vcpus, nr_vcpus_created);
+ unsigned int nr_gic_pages;
/* Distributor setup */
gic_fd = __kvm_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_V3);
@@ -56,9 +54,6 @@ int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs)
kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_NR_IRQS, 0, &nr_irqs);
- kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
- KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
-
attr = GICD_BASE_GPA;
kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
KVM_VGIC_V3_ADDR_TYPE_DIST, &attr);
@@ -73,10 +68,39 @@ int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs)
KVM_VGIC_V3_REDIST_SIZE * nr_vcpus);
virt_map(vm, GICR_BASE_GPA, GICR_BASE_GPA, nr_gic_pages);
- kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+ return gic_fd;
+}
+
+void __vgic_v3_init(int fd)
+{
+ kvm_device_attr_set(fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
+}
- return gic_fd;
+int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs)
+{
+ unsigned int nr_vcpus_created = 0;
+ struct list_head *iter;
+ int fd;
+
+ TEST_ASSERT(nr_vcpus, "Number of vCPUs cannot be empty");
+
+ /*
+ * Make sure that the caller is infact calling this
+ * function after all the vCPUs are added.
+ */
+ list_for_each(iter, &vm->vcpus)
+ nr_vcpus_created++;
+ TEST_ASSERT(nr_vcpus == nr_vcpus_created,
+ "Number of vCPUs requested (%u) doesn't match with the ones created for the VM (%u)",
+ nr_vcpus, nr_vcpus_created);
+
+ fd = __vgic_v3_setup(vm, nr_vcpus, nr_irqs);
+ if (fd < 0)
+ return fd;
+
+ __vgic_v3_init(fd);
+ return fd;
}
/* should only work for level sensitive interrupts */
diff --git a/tools/testing/selftests/kvm/lib/guest_modes.c b/tools/testing/selftests/kvm/lib/guest_modes.c
index b04901e55138..ce3099630397 100644
--- a/tools/testing/selftests/kvm/lib/guest_modes.c
+++ b/tools/testing/selftests/kvm/lib/guest_modes.c
@@ -4,7 +4,7 @@
*/
#include "guest_modes.h"
-#ifdef __aarch64__
+#if defined(__aarch64__) || defined(__riscv)
#include "processor.h"
enum vm_guest_mode vm_mode_default;
#endif
@@ -13,9 +13,11 @@ struct guest_mode guest_modes[NUM_VM_MODES];
void guest_modes_append_default(void)
{
-#ifndef __aarch64__
+#if !defined(__aarch64__) && !defined(__riscv)
guest_mode_append(VM_MODE_DEFAULT, true);
-#else
+#endif
+
+#ifdef __aarch64__
{
unsigned int limit = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE);
uint32_t ipa4k, ipa16k, ipa64k;
@@ -74,11 +76,36 @@ void guest_modes_append_default(void)
#ifdef __riscv
{
unsigned int sz = kvm_check_cap(KVM_CAP_VM_GPA_BITS);
+ unsigned long satp_mode = riscv64_get_satp_mode() << SATP_MODE_SHIFT;
+ int i;
- if (sz >= 52)
- guest_mode_append(VM_MODE_P52V48_4K, true);
- if (sz >= 48)
- guest_mode_append(VM_MODE_P48V48_4K, true);
+ switch (sz) {
+ case 59:
+ guest_mode_append(VM_MODE_P56V57_4K, satp_mode >= SATP_MODE_57);
+ guest_mode_append(VM_MODE_P56V48_4K, satp_mode >= SATP_MODE_48);
+ guest_mode_append(VM_MODE_P56V39_4K, satp_mode >= SATP_MODE_39);
+ break;
+ case 50:
+ guest_mode_append(VM_MODE_P50V57_4K, satp_mode >= SATP_MODE_57);
+ guest_mode_append(VM_MODE_P50V48_4K, satp_mode >= SATP_MODE_48);
+ guest_mode_append(VM_MODE_P50V39_4K, satp_mode >= SATP_MODE_39);
+ break;
+ case 41:
+ guest_mode_append(VM_MODE_P41V57_4K, satp_mode >= SATP_MODE_57);
+ guest_mode_append(VM_MODE_P41V48_4K, satp_mode >= SATP_MODE_48);
+ guest_mode_append(VM_MODE_P41V39_4K, satp_mode >= SATP_MODE_39);
+ break;
+ default:
+ break;
+ }
+
+ /* set the first supported mode as default */
+ vm_mode_default = NUM_VM_MODES;
+ for (i = 0; vm_mode_default == NUM_VM_MODES && i < NUM_VM_MODES; i++) {
+ if (guest_modes[i].supported && guest_modes[i].enabled)
+ vm_mode_default = i;
+ }
+ TEST_ASSERT(vm_mode_default != NUM_VM_MODES, "No supported mode!");
}
#endif
}
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 279ad8946040..f5e076591c64 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -24,17 +24,29 @@ uint32_t guest_random_seed;
struct guest_random_state guest_rng;
static uint32_t last_guest_seed;
-static int vcpu_mmap_sz(void);
+static size_t vcpu_mmap_sz(void);
-int open_path_or_exit(const char *path, int flags)
+int __open_path_or_exit(const char *path, int flags, const char *enoent_help)
{
int fd;
fd = open(path, flags);
- __TEST_REQUIRE(fd >= 0 || errno != ENOENT, "Cannot open %s: %s", path, strerror(errno));
- TEST_ASSERT(fd >= 0, "Failed to open '%s'", path);
+ if (fd < 0)
+ goto error;
return fd;
+
+error:
+ if (errno == EACCES || errno == ENOENT)
+ ksft_exit_skip("- Cannot open '%s': %s. %s\n",
+ path, strerror(errno),
+ errno == EACCES ? "Root required?" : enoent_help);
+ TEST_FAIL("Failed to open '%s'", path);
+}
+
+int open_path_or_exit(const char *path, int flags)
+{
+ return __open_path_or_exit(path, flags, "");
}
/*
@@ -48,7 +60,7 @@ int open_path_or_exit(const char *path, int flags)
*/
static int _open_kvm_dev_path_or_exit(int flags)
{
- return open_path_or_exit(KVM_DEV_PATH, flags);
+ return __open_path_or_exit(KVM_DEV_PATH, flags, "Is KVM loaded and enabled?");
}
int open_kvm_dev_path_or_exit(void)
@@ -64,6 +76,9 @@ static ssize_t get_module_param(const char *module_name, const char *param,
ssize_t bytes_read;
int fd, r;
+ /* Verify KVM is loaded, to provide a more helpful SKIP message. */
+ close(open_kvm_dev_path_or_exit());
+
r = snprintf(path, path_size, "/sys/module/%s/parameters/%s",
module_name, param);
TEST_ASSERT(r < path_size,
@@ -80,7 +95,7 @@ static ssize_t get_module_param(const char *module_name, const char *param,
return bytes_read;
}
-static int get_module_param_integer(const char *module_name, const char *param)
+int kvm_get_module_param_integer(const char *module_name, const char *param)
{
/*
* 16 bytes to hold a 64-bit value (1 byte per char), 1 byte for the
@@ -104,7 +119,7 @@ static int get_module_param_integer(const char *module_name, const char *param)
return atoi_paranoid(value);
}
-static bool get_module_param_bool(const char *module_name, const char *param)
+bool kvm_get_module_param_bool(const char *module_name, const char *param)
{
char value;
ssize_t r;
@@ -120,36 +135,6 @@ static bool get_module_param_bool(const char *module_name, const char *param)
TEST_FAIL("Unrecognized value '%c' for boolean module param", value);
}
-bool get_kvm_param_bool(const char *param)
-{
- return get_module_param_bool("kvm", param);
-}
-
-bool get_kvm_intel_param_bool(const char *param)
-{
- return get_module_param_bool("kvm_intel", param);
-}
-
-bool get_kvm_amd_param_bool(const char *param)
-{
- return get_module_param_bool("kvm_amd", param);
-}
-
-int get_kvm_param_integer(const char *param)
-{
- return get_module_param_integer("kvm", param);
-}
-
-int get_kvm_intel_param_integer(const char *param)
-{
- return get_module_param_integer("kvm_intel", param);
-}
-
-int get_kvm_amd_param_integer(const char *param)
-{
- return get_module_param_integer("kvm_amd", param);
-}
-
/*
* Capability
*
@@ -216,13 +201,23 @@ const char *vm_guest_mode_string(uint32_t i)
[VM_MODE_P40V48_4K] = "PA-bits:40, VA-bits:48, 4K pages",
[VM_MODE_P40V48_16K] = "PA-bits:40, VA-bits:48, 16K pages",
[VM_MODE_P40V48_64K] = "PA-bits:40, VA-bits:48, 64K pages",
- [VM_MODE_PXXV48_4K] = "PA-bits:ANY, VA-bits:48, 4K pages",
+ [VM_MODE_PXXVYY_4K] = "PA-bits:ANY, VA-bits:48 or 57, 4K pages",
[VM_MODE_P47V64_4K] = "PA-bits:47, VA-bits:64, 4K pages",
[VM_MODE_P44V64_4K] = "PA-bits:44, VA-bits:64, 4K pages",
[VM_MODE_P36V48_4K] = "PA-bits:36, VA-bits:48, 4K pages",
[VM_MODE_P36V48_16K] = "PA-bits:36, VA-bits:48, 16K pages",
[VM_MODE_P36V48_64K] = "PA-bits:36, VA-bits:48, 64K pages",
+ [VM_MODE_P47V47_16K] = "PA-bits:47, VA-bits:47, 16K pages",
[VM_MODE_P36V47_16K] = "PA-bits:36, VA-bits:47, 16K pages",
+ [VM_MODE_P56V57_4K] = "PA-bits:56, VA-bits:57, 4K pages",
+ [VM_MODE_P56V48_4K] = "PA-bits:56, VA-bits:48, 4K pages",
+ [VM_MODE_P56V39_4K] = "PA-bits:56, VA-bits:39, 4K pages",
+ [VM_MODE_P50V57_4K] = "PA-bits:50, VA-bits:57, 4K pages",
+ [VM_MODE_P50V48_4K] = "PA-bits:50, VA-bits:48, 4K pages",
+ [VM_MODE_P50V39_4K] = "PA-bits:50, VA-bits:39, 4K pages",
+ [VM_MODE_P41V57_4K] = "PA-bits:41, VA-bits:57, 4K pages",
+ [VM_MODE_P41V48_4K] = "PA-bits:41, VA-bits:48, 4K pages",
+ [VM_MODE_P41V39_4K] = "PA-bits:41, VA-bits:39, 4K pages",
};
_Static_assert(sizeof(strings)/sizeof(char *) == NUM_VM_MODES,
"Missing new mode strings?");
@@ -242,13 +237,23 @@ const struct vm_guest_mode_params vm_guest_mode_params[] = {
[VM_MODE_P40V48_4K] = { 40, 48, 0x1000, 12 },
[VM_MODE_P40V48_16K] = { 40, 48, 0x4000, 14 },
[VM_MODE_P40V48_64K] = { 40, 48, 0x10000, 16 },
- [VM_MODE_PXXV48_4K] = { 0, 0, 0x1000, 12 },
+ [VM_MODE_PXXVYY_4K] = { 0, 0, 0x1000, 12 },
[VM_MODE_P47V64_4K] = { 47, 64, 0x1000, 12 },
[VM_MODE_P44V64_4K] = { 44, 64, 0x1000, 12 },
[VM_MODE_P36V48_4K] = { 36, 48, 0x1000, 12 },
[VM_MODE_P36V48_16K] = { 36, 48, 0x4000, 14 },
[VM_MODE_P36V48_64K] = { 36, 48, 0x10000, 16 },
+ [VM_MODE_P47V47_16K] = { 47, 47, 0x4000, 14 },
[VM_MODE_P36V47_16K] = { 36, 47, 0x4000, 14 },
+ [VM_MODE_P56V57_4K] = { 56, 57, 0x1000, 12 },
+ [VM_MODE_P56V48_4K] = { 56, 48, 0x1000, 12 },
+ [VM_MODE_P56V39_4K] = { 56, 39, 0x1000, 12 },
+ [VM_MODE_P50V57_4K] = { 50, 57, 0x1000, 12 },
+ [VM_MODE_P50V48_4K] = { 50, 48, 0x1000, 12 },
+ [VM_MODE_P50V39_4K] = { 50, 39, 0x1000, 12 },
+ [VM_MODE_P41V57_4K] = { 41, 57, 0x1000, 12 },
+ [VM_MODE_P41V48_4K] = { 41, 48, 0x1000, 12 },
+ [VM_MODE_P41V39_4K] = { 41, 39, 0x1000, 12 },
};
_Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES,
"Missing new mode params?");
@@ -294,59 +299,77 @@ struct kvm_vm *____vm_create(struct vm_shape shape)
/* Setup mode specific traits. */
switch (vm->mode) {
case VM_MODE_P52V48_4K:
- vm->pgtable_levels = 4;
+ vm->mmu.pgtable_levels = 4;
break;
case VM_MODE_P52V48_64K:
- vm->pgtable_levels = 3;
+ vm->mmu.pgtable_levels = 3;
break;
case VM_MODE_P48V48_4K:
- vm->pgtable_levels = 4;
+ vm->mmu.pgtable_levels = 4;
break;
case VM_MODE_P48V48_64K:
- vm->pgtable_levels = 3;
+ vm->mmu.pgtable_levels = 3;
break;
case VM_MODE_P40V48_4K:
case VM_MODE_P36V48_4K:
- vm->pgtable_levels = 4;
+ vm->mmu.pgtable_levels = 4;
break;
case VM_MODE_P40V48_64K:
case VM_MODE_P36V48_64K:
- vm->pgtable_levels = 3;
+ vm->mmu.pgtable_levels = 3;
break;
case VM_MODE_P52V48_16K:
case VM_MODE_P48V48_16K:
case VM_MODE_P40V48_16K:
case VM_MODE_P36V48_16K:
- vm->pgtable_levels = 4;
+ vm->mmu.pgtable_levels = 4;
break;
+ case VM_MODE_P47V47_16K:
case VM_MODE_P36V47_16K:
- vm->pgtable_levels = 3;
+ vm->mmu.pgtable_levels = 3;
break;
- case VM_MODE_PXXV48_4K:
+ case VM_MODE_PXXVYY_4K:
#ifdef __x86_64__
kvm_get_cpu_address_width(&vm->pa_bits, &vm->va_bits);
kvm_init_vm_address_properties(vm);
- /*
- * Ignore KVM support for 5-level paging (vm->va_bits == 57),
- * it doesn't take effect unless a CR4.LA57 is set, which it
- * isn't for this mode (48-bit virtual address space).
- */
- TEST_ASSERT(vm->va_bits == 48 || vm->va_bits == 57,
- "Linear address width (%d bits) not supported",
- vm->va_bits);
+
pr_debug("Guest physical address width detected: %d\n",
vm->pa_bits);
- vm->pgtable_levels = 4;
- vm->va_bits = 48;
+ pr_debug("Guest virtual address width detected: %d\n",
+ vm->va_bits);
+
+ if (vm->va_bits == 57) {
+ vm->mmu.pgtable_levels = 5;
+ } else {
+ TEST_ASSERT(vm->va_bits == 48,
+ "Unexpected guest virtual address width: %d",
+ vm->va_bits);
+ vm->mmu.pgtable_levels = 4;
+ }
#else
- TEST_FAIL("VM_MODE_PXXV48_4K not supported on non-x86 platforms");
+ TEST_FAIL("VM_MODE_PXXVYY_4K not supported on non-x86 platforms");
#endif
break;
case VM_MODE_P47V64_4K:
- vm->pgtable_levels = 5;
+ vm->mmu.pgtable_levels = 5;
break;
case VM_MODE_P44V64_4K:
- vm->pgtable_levels = 5;
+ vm->mmu.pgtable_levels = 5;
+ break;
+ case VM_MODE_P56V57_4K:
+ case VM_MODE_P50V57_4K:
+ case VM_MODE_P41V57_4K:
+ vm->mmu.pgtable_levels = 5;
+ break;
+ case VM_MODE_P56V48_4K:
+ case VM_MODE_P50V48_4K:
+ case VM_MODE_P41V48_4K:
+ vm->mmu.pgtable_levels = 4;
+ break;
+ case VM_MODE_P56V39_4K:
+ case VM_MODE_P50V39_4K:
+ case VM_MODE_P41V39_4K:
+ vm->mmu.pgtable_levels = 3;
break;
default:
TEST_FAIL("Unknown guest mode: 0x%x", vm->mode);
@@ -444,6 +467,15 @@ void kvm_set_files_rlimit(uint32_t nr_vcpus)
}
+static bool is_guest_memfd_required(struct vm_shape shape)
+{
+#ifdef __x86_64__
+ return shape.type == KVM_X86_SNP_VM;
+#else
+ return false;
+#endif
+}
+
struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus,
uint64_t nr_extra_pages)
{
@@ -451,7 +483,7 @@ struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus,
nr_extra_pages);
struct userspace_mem_region *slot0;
struct kvm_vm *vm;
- int i;
+ int i, flags;
kvm_set_files_rlimit(nr_runnable_vcpus);
@@ -460,7 +492,15 @@ struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus,
vm = ____vm_create(shape);
- vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0, 0, nr_pages, 0);
+ /*
+ * Force GUEST_MEMFD for the primary memory region if necessary, e.g.
+ * for CoCo VMs that require GUEST_MEMFD backed private memory.
+ */
+ flags = 0;
+ if (is_guest_memfd_required(shape))
+ flags |= KVM_MEM_GUEST_MEMFD;
+
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0, 0, nr_pages, flags);
for (i = 0; i < NR_MEM_REGIONS; i++)
vm->memslots[i] = 0;
@@ -482,7 +522,7 @@ struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus,
guest_rng = new_guest_random_state(guest_random_seed);
sync_global_to_guest(vm, guest_rng);
- kvm_arch_vm_post_create(vm);
+ kvm_arch_vm_post_create(vm, nr_runnable_vcpus);
return vm;
}
@@ -520,6 +560,7 @@ struct kvm_vm *__vm_create_with_vcpus(struct vm_shape shape, uint32_t nr_vcpus,
for (i = 0; i < nr_vcpus; ++i)
vcpus[i] = vm_vcpu_add(vm, i, guest_code);
+ kvm_arch_vm_finalize_vcpus(vm);
return vm;
}
@@ -585,15 +626,14 @@ struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm)
return vm_vcpu_recreate(vm, 0);
}
-void kvm_pin_this_task_to_pcpu(uint32_t pcpu)
+int __pin_task_to_cpu(pthread_t task, int cpu)
{
- cpu_set_t mask;
- int r;
+ cpu_set_t cpuset;
+
+ CPU_ZERO(&cpuset);
+ CPU_SET(cpu, &cpuset);
- CPU_ZERO(&mask);
- CPU_SET(pcpu, &mask);
- r = sched_setaffinity(0, sizeof(mask), &mask);
- TEST_ASSERT(!r, "sched_setaffinity() failed for pCPU '%u'.", pcpu);
+ return pthread_setaffinity_np(task, sizeof(cpuset), &cpuset);
}
static uint32_t parse_pcpu(const char *cpu_str, const cpu_set_t *allowed_mask)
@@ -647,7 +687,7 @@ void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[],
/* 2. Check if the main worker needs to be pinned. */
if (cpu) {
- kvm_pin_this_task_to_pcpu(parse_pcpu(cpu, &allowed_mask));
+ pin_self_to_cpu(parse_pcpu(cpu, &allowed_mask));
cpu = strtok(NULL, delim);
}
@@ -699,8 +739,6 @@ userspace_mem_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end)
static void kvm_stats_release(struct kvm_binary_stats *stats)
{
- int ret;
-
if (stats->fd < 0)
return;
@@ -709,8 +747,7 @@ static void kvm_stats_release(struct kvm_binary_stats *stats)
stats->desc = NULL;
}
- ret = close(stats->fd);
- TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret));
+ kvm_close(stats->fd);
stats->fd = -1;
}
@@ -733,20 +770,14 @@ __weak void vcpu_arch_free(struct kvm_vcpu *vcpu)
*/
static void vm_vcpu_rm(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
{
- int ret;
-
if (vcpu->dirty_gfns) {
- ret = munmap(vcpu->dirty_gfns, vm->dirty_ring_size);
- TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret));
+ kvm_munmap(vcpu->dirty_gfns, vm->dirty_ring_size);
vcpu->dirty_gfns = NULL;
}
- ret = munmap(vcpu->run, vcpu_mmap_sz());
- TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret));
-
- ret = close(vcpu->fd);
- TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret));
+ kvm_munmap(vcpu->run, vcpu_mmap_sz());
+ kvm_close(vcpu->fd);
kvm_stats_release(&vcpu->stats);
list_del(&vcpu->list);
@@ -758,38 +789,32 @@ static void vm_vcpu_rm(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
void kvm_vm_release(struct kvm_vm *vmp)
{
struct kvm_vcpu *vcpu, *tmp;
- int ret;
list_for_each_entry_safe(vcpu, tmp, &vmp->vcpus, list)
vm_vcpu_rm(vmp, vcpu);
- ret = close(vmp->fd);
- TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret));
-
- ret = close(vmp->kvm_fd);
- TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret));
+ kvm_close(vmp->fd);
+ kvm_close(vmp->kvm_fd);
/* Free cached stats metadata and close FD */
kvm_stats_release(&vmp->stats);
+
+ kvm_arch_vm_release(vmp);
}
static void __vm_mem_region_delete(struct kvm_vm *vm,
struct userspace_mem_region *region)
{
- int ret;
-
rb_erase(&region->gpa_node, &vm->regions.gpa_tree);
rb_erase(&region->hva_node, &vm->regions.hva_tree);
hash_del(&region->slot_node);
sparsebit_free(&region->unused_phy_pages);
sparsebit_free(&region->protected_phy_pages);
- ret = munmap(region->mmap_start, region->mmap_size);
- TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret));
+ kvm_munmap(region->mmap_start, region->mmap_size);
if (region->fd >= 0) {
/* There's an extra map when using shared memory. */
- ret = munmap(region->mmap_alias, region->mmap_size);
- TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret));
+ kvm_munmap(region->mmap_alias, region->mmap_size);
close(region->fd);
}
if (region->region.guest_memfd >= 0)
@@ -827,7 +852,7 @@ void kvm_vm_free(struct kvm_vm *vmp)
int kvm_memfd_alloc(size_t size, bool hugepages)
{
int memfd_flags = MFD_CLOEXEC;
- int fd, r;
+ int fd;
if (hugepages)
memfd_flags |= MFD_HUGETLB;
@@ -835,11 +860,8 @@ int kvm_memfd_alloc(size_t size, bool hugepages)
fd = memfd_create("kvm_selftest", memfd_flags);
TEST_ASSERT(fd != -1, __KVM_SYSCALL_ERROR("memfd_create()", fd));
- r = ftruncate(fd, size);
- TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("ftruncate()", r));
-
- r = fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, size);
- TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r));
+ kvm_ftruncate(fd, size);
+ kvm_fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, size);
return fd;
}
@@ -956,14 +978,14 @@ void vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags
/* FIXME: This thing needs to be ripped apart and rewritten. */
void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
- uint64_t guest_paddr, uint32_t slot, uint64_t npages,
- uint32_t flags, int guest_memfd, uint64_t guest_memfd_offset)
+ uint64_t gpa, uint32_t slot, uint64_t npages, uint32_t flags,
+ int guest_memfd, uint64_t guest_memfd_offset)
{
int ret;
struct userspace_mem_region *region;
size_t backing_src_pagesz = get_backing_src_pagesz(src_type);
size_t mem_size = npages * vm->page_size;
- size_t alignment;
+ size_t alignment = 1;
TEST_REQUIRE_SET_USER_MEMORY_REGION2();
@@ -971,30 +993,29 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
"Number of guest pages is not compatible with the host. "
"Try npages=%d", vm_adjust_num_guest_pages(vm->mode, npages));
- TEST_ASSERT((guest_paddr % vm->page_size) == 0, "Guest physical "
+ TEST_ASSERT((gpa % vm->page_size) == 0, "Guest physical "
"address not on a page boundary.\n"
- " guest_paddr: 0x%lx vm->page_size: 0x%x",
- guest_paddr, vm->page_size);
- TEST_ASSERT((((guest_paddr >> vm->page_shift) + npages) - 1)
+ " gpa: 0x%lx vm->page_size: 0x%x",
+ gpa, vm->page_size);
+ TEST_ASSERT((((gpa >> vm->page_shift) + npages) - 1)
<= vm->max_gfn, "Physical range beyond maximum "
"supported physical address,\n"
- " guest_paddr: 0x%lx npages: 0x%lx\n"
+ " gpa: 0x%lx npages: 0x%lx\n"
" vm->max_gfn: 0x%lx vm->page_size: 0x%x",
- guest_paddr, npages, vm->max_gfn, vm->page_size);
+ gpa, npages, vm->max_gfn, vm->page_size);
/*
* Confirm a mem region with an overlapping address doesn't
* already exist.
*/
region = (struct userspace_mem_region *) userspace_mem_region_find(
- vm, guest_paddr, (guest_paddr + npages * vm->page_size) - 1);
+ vm, gpa, (gpa + npages * vm->page_size) - 1);
if (region != NULL)
TEST_FAIL("overlapping userspace_mem_region already "
"exists\n"
- " requested guest_paddr: 0x%lx npages: 0x%lx "
- "page_size: 0x%x\n"
- " existing guest_paddr: 0x%lx size: 0x%lx",
- guest_paddr, npages, vm->page_size,
+ " requested gpa: 0x%lx npages: 0x%lx page_size: 0x%x\n"
+ " existing gpa: 0x%lx size: 0x%lx",
+ gpa, npages, vm->page_size,
(uint64_t) region->region.guest_phys_addr,
(uint64_t) region->region.memory_size);
@@ -1008,8 +1029,7 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
"already exists.\n"
" requested slot: %u paddr: 0x%lx npages: 0x%lx\n"
" existing slot: %u paddr: 0x%lx size: 0x%lx",
- slot, guest_paddr, npages,
- region->region.slot,
+ slot, gpa, npages, region->region.slot,
(uint64_t) region->region.guest_phys_addr,
(uint64_t) region->region.memory_size);
}
@@ -1019,13 +1039,6 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
TEST_ASSERT(region != NULL, "Insufficient Memory");
region->mmap_size = mem_size;
-#ifdef __s390x__
- /* On s390x, the host address must be aligned to 1M (due to PGSTEs) */
- alignment = 0x100000;
-#else
- alignment = 1;
-#endif
-
/*
* When using THP mmap is not guaranteed to returned a hugepage aligned
* address so we have to pad the mmap. Padding is not needed for HugeTLB
@@ -1035,7 +1048,7 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
if (src_type == VM_MEM_SRC_ANONYMOUS_THP)
alignment = max(backing_src_pagesz, alignment);
- TEST_ASSERT_EQ(guest_paddr, align_up(guest_paddr, backing_src_pagesz));
+ TEST_ASSERT_EQ(gpa, align_up(gpa, backing_src_pagesz));
/* Add enough memory to align up if necessary */
if (alignment > 1)
@@ -1046,12 +1059,9 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
region->fd = kvm_memfd_alloc(region->mmap_size,
src_type == VM_MEM_SRC_SHARED_HUGETLB);
- region->mmap_start = mmap(NULL, region->mmap_size,
- PROT_READ | PROT_WRITE,
- vm_mem_backing_src_alias(src_type)->flag,
- region->fd, 0);
- TEST_ASSERT(region->mmap_start != MAP_FAILED,
- __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED));
+ region->mmap_start = kvm_mmap(region->mmap_size, PROT_READ | PROT_WRITE,
+ vm_mem_backing_src_alias(src_type)->flag,
+ region->fd);
TEST_ASSERT(!is_backing_src_hugetlb(src_type) ||
region->mmap_start == align_ptr_up(region->mmap_start, backing_src_pagesz),
@@ -1086,8 +1096,7 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
* needing to track if the fd is owned by the framework
* or by the caller.
*/
- guest_memfd = dup(guest_memfd);
- TEST_ASSERT(guest_memfd >= 0, __KVM_SYSCALL_ERROR("dup()", guest_memfd));
+ guest_memfd = kvm_dup(guest_memfd);
}
region->region.guest_memfd = guest_memfd;
@@ -1099,20 +1108,18 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
region->unused_phy_pages = sparsebit_alloc();
if (vm_arch_has_protected_memory(vm))
region->protected_phy_pages = sparsebit_alloc();
- sparsebit_set_num(region->unused_phy_pages,
- guest_paddr >> vm->page_shift, npages);
+ sparsebit_set_num(region->unused_phy_pages, gpa >> vm->page_shift, npages);
region->region.slot = slot;
region->region.flags = flags;
- region->region.guest_phys_addr = guest_paddr;
+ region->region.guest_phys_addr = gpa;
region->region.memory_size = npages * vm->page_size;
region->region.userspace_addr = (uintptr_t) region->host_mem;
ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, &region->region);
TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION2 IOCTL failed,\n"
" rc: %i errno: %i\n"
" slot: %u flags: 0x%x\n"
- " guest_phys_addr: 0x%lx size: 0x%lx guest_memfd: %d",
- ret, errno, slot, flags,
- guest_paddr, (uint64_t) region->region.memory_size,
+ " guest_phys_addr: 0x%lx size: 0x%llx guest_memfd: %d",
+ ret, errno, slot, flags, gpa, region->region.memory_size,
region->region.guest_memfd);
/* Add to quick lookup data structures */
@@ -1122,12 +1129,10 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
/* If shared memory, create an alias. */
if (region->fd >= 0) {
- region->mmap_alias = mmap(NULL, region->mmap_size,
- PROT_READ | PROT_WRITE,
- vm_mem_backing_src_alias(src_type)->flag,
- region->fd, 0);
- TEST_ASSERT(region->mmap_alias != MAP_FAILED,
- __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED));
+ region->mmap_alias = kvm_mmap(region->mmap_size,
+ PROT_READ | PROT_WRITE,
+ vm_mem_backing_src_alias(src_type)->flag,
+ region->fd);
/* Align host alias address */
region->host_alias = align_ptr_up(region->mmap_alias, alignment);
@@ -1136,10 +1141,10 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
void vm_userspace_mem_region_add(struct kvm_vm *vm,
enum vm_mem_backing_src_type src_type,
- uint64_t guest_paddr, uint32_t slot,
- uint64_t npages, uint32_t flags)
+ uint64_t gpa, uint32_t slot, uint64_t npages,
+ uint32_t flags)
{
- vm_mem_add(vm, src_type, guest_paddr, slot, npages, flags, -1, 0);
+ vm_mem_add(vm, src_type, gpa, slot, npages, flags, -1, 0);
}
/*
@@ -1205,6 +1210,16 @@ void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags)
ret, errno, slot, flags);
}
+void vm_mem_region_reload(struct kvm_vm *vm, uint32_t slot)
+{
+ struct userspace_mem_region *region = memslot2region(vm, slot);
+ struct kvm_userspace_memory_region2 tmp = region->region;
+
+ tmp.memory_size = 0;
+ vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, &tmp);
+ vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, &region->region);
+}
+
/*
* VM Memory Region Move
*
@@ -1287,14 +1302,14 @@ void vm_guest_mem_fallocate(struct kvm_vm *vm, uint64_t base, uint64_t size,
}
/* Returns the size of a vCPU's kvm_run structure. */
-static int vcpu_mmap_sz(void)
+static size_t vcpu_mmap_sz(void)
{
int dev_fd, ret;
dev_fd = open_kvm_dev_path_or_exit();
ret = ioctl(dev_fd, KVM_GET_VCPU_MMAP_SIZE, NULL);
- TEST_ASSERT(ret >= sizeof(struct kvm_run),
+ TEST_ASSERT(ret >= 0 && ret >= sizeof(struct kvm_run),
KVM_IOCTL_ERROR(KVM_GET_VCPU_MMAP_SIZE, ret));
close(dev_fd);
@@ -1335,12 +1350,10 @@ struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
TEST_ASSERT_VM_VCPU_IOCTL(vcpu->fd >= 0, KVM_CREATE_VCPU, vcpu->fd, vm);
TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->run), "vcpu mmap size "
- "smaller than expected, vcpu_mmap_sz: %i expected_min: %zi",
+ "smaller than expected, vcpu_mmap_sz: %zi expected_min: %zi",
vcpu_mmap_sz(), sizeof(*vcpu->run));
- vcpu->run = (struct kvm_run *) mmap(NULL, vcpu_mmap_sz(),
- PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 0);
- TEST_ASSERT(vcpu->run != MAP_FAILED,
- __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED));
+ vcpu->run = kvm_mmap(vcpu_mmap_sz(), PROT_READ | PROT_WRITE,
+ MAP_SHARED, vcpu->fd);
if (kvm_has_cap(KVM_CAP_BINARY_STATS_FD))
vcpu->stats.fd = vcpu_get_stats_fd(vcpu);
@@ -1364,7 +1377,7 @@ struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
* Output Args: None
*
* Return:
- * Lowest virtual address at or below vaddr_min, with at least
+ * Lowest virtual address at or above vaddr_min, with at least
* sz unused bytes. TEST_ASSERT failure if no area of at least
* size sz is available.
*
@@ -1462,8 +1475,6 @@ static vm_vaddr_t ____vm_vaddr_alloc(struct kvm_vm *vm, size_t sz,
pages--, vaddr += vm->page_size, paddr += vm->page_size) {
virt_pg_map(vm, vaddr, paddr);
-
- sparsebit_set(vm->vpages_mapped, vaddr >> vm->page_shift);
}
return vaddr_start;
@@ -1577,7 +1588,6 @@ void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
while (npages--) {
virt_pg_map(vm, vaddr, paddr);
- sparsebit_set(vm->vpages_mapped, vaddr >> vm->page_shift);
vaddr += page_size;
paddr += page_size;
@@ -1696,7 +1706,18 @@ void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa)
/* Create an interrupt controller chip for the specified VM. */
void vm_create_irqchip(struct kvm_vm *vm)
{
- vm_ioctl(vm, KVM_CREATE_IRQCHIP, NULL);
+ int r;
+
+ /*
+ * Allocate a fully in-kernel IRQ chip by default, but fall back to a
+ * split model (x86 only) if that fails (KVM x86 allows compiling out
+ * support for KVM_CREATE_IRQCHIP).
+ */
+ r = __vm_ioctl(vm, KVM_CREATE_IRQCHIP, NULL);
+ if (r && errno == ENOTTY && kvm_has_cap(KVM_CAP_SPLIT_IRQCHIP))
+ vm_enable_cap(vm, KVM_CAP_SPLIT_IRQCHIP, 24);
+ else
+ TEST_ASSERT_VM_VCPU_IOCTL(!r, KVM_CREATE_IRQCHIP, r, vm);
vm->has_irqchip = true;
}
@@ -1776,9 +1797,8 @@ void *vcpu_map_dirty_ring(struct kvm_vcpu *vcpu)
page_size * KVM_DIRTY_LOG_PAGE_OFFSET);
TEST_ASSERT(addr == MAP_FAILED, "Dirty ring mapped exec");
- addr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd,
- page_size * KVM_DIRTY_LOG_PAGE_OFFSET);
- TEST_ASSERT(addr != MAP_FAILED, "Dirty ring map failed");
+ addr = __kvm_mmap(size, PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd,
+ page_size * KVM_DIRTY_LOG_PAGE_OFFSET);
vcpu->dirty_gfns = addr;
vcpu->dirty_gfns_count = size / sizeof(struct kvm_dirty_gfn);
@@ -1962,8 +1982,8 @@ void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
fprintf(stream, "%*sMapped Virtual Pages:\n", indent, "");
sparsebit_dump(stream, vm->vpages_mapped, indent + 2);
fprintf(stream, "%*spgd_created: %u\n", indent, "",
- vm->pgd_created);
- if (vm->pgd_created) {
+ vm->mmu.pgd_created);
+ if (vm->mmu.pgd_created) {
fprintf(stream, "%*sVirtual Translation Tables:\n",
indent + 2, "");
virt_dump(stream, vm, indent + 4);
@@ -2019,9 +2039,9 @@ static struct exit_reason {
KVM_EXIT_STRING(RISCV_SBI),
KVM_EXIT_STRING(RISCV_CSR),
KVM_EXIT_STRING(NOTIFY),
-#ifdef KVM_EXIT_MEMORY_NOT_PRESENT
- KVM_EXIT_STRING(MEMORY_NOT_PRESENT),
-#endif
+ KVM_EXIT_STRING(LOONGARCH_IOCSR),
+ KVM_EXIT_STRING(MEMORY_FAULT),
+ KVM_EXIT_STRING(ARM_SEA),
};
/*
@@ -2286,7 +2306,15 @@ void kvm_get_stat(struct kvm_binary_stats *stats, const char *name,
TEST_FAIL("Unable to find stat '%s'", name);
}
-__weak void kvm_arch_vm_post_create(struct kvm_vm *vm)
+__weak void kvm_arch_vm_post_create(struct kvm_vm *vm, unsigned int nr_vcpus)
+{
+}
+
+__weak void kvm_arch_vm_finalize_vcpus(struct kvm_vm *vm)
+{
+}
+
+__weak void kvm_arch_vm_release(struct kvm_vm *vm)
{
}
@@ -2294,11 +2322,35 @@ __weak void kvm_selftest_arch_init(void)
{
}
+static void report_unexpected_signal(int signum)
+{
+#define KVM_CASE_SIGNUM(sig) \
+ case sig: TEST_FAIL("Unexpected " #sig " (%d)\n", signum)
+
+ switch (signum) {
+ KVM_CASE_SIGNUM(SIGBUS);
+ KVM_CASE_SIGNUM(SIGSEGV);
+ KVM_CASE_SIGNUM(SIGILL);
+ KVM_CASE_SIGNUM(SIGFPE);
+ default:
+ TEST_FAIL("Unexpected signal %d\n", signum);
+ }
+}
+
void __attribute((constructor)) kvm_selftest_init(void)
{
+ struct sigaction sig_sa = {
+ .sa_handler = report_unexpected_signal,
+ };
+
/* Tell stdout not to buffer its content. */
setbuf(stdout, NULL);
+ sigaction(SIGBUS, &sig_sa, NULL);
+ sigaction(SIGSEGV, &sig_sa, NULL);
+ sigaction(SIGILL, &sig_sa, NULL);
+ sigaction(SIGFPE, &sig_sa, NULL);
+
guest_random_seed = last_guest_seed = random();
pr_info("Random seed: 0x%x\n", guest_random_seed);
@@ -2319,3 +2371,8 @@ bool vm_is_gpa_protected(struct kvm_vm *vm, vm_paddr_t paddr)
pg = paddr >> vm->page_shift;
return sparsebit_is_set(region->protected_phy_pages, pg);
}
+
+__weak bool kvm_arch_has_default_irqchip(void)
+{
+ return false;
+}
diff --git a/tools/testing/selftests/kvm/lib/loongarch/exception.S b/tools/testing/selftests/kvm/lib/loongarch/exception.S
new file mode 100644
index 000000000000..3f1e4b67c5ae
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/loongarch/exception.S
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include "processor.h"
+
+/* address of refill exception should be 4K aligned */
+.balign 4096
+.global handle_tlb_refill
+handle_tlb_refill:
+ csrwr t0, LOONGARCH_CSR_TLBRSAVE
+ csrrd t0, LOONGARCH_CSR_PGD
+ lddir t0, t0, 3
+ lddir t0, t0, 1
+ ldpte t0, 0
+ ldpte t0, 1
+ tlbfill
+ csrrd t0, LOONGARCH_CSR_TLBRSAVE
+ ertn
+
+ /*
+ * save and restore all gprs except base register,
+ * and default value of base register is sp ($r3).
+ */
+.macro save_gprs base
+ .irp n,1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
+ st.d $r\n, \base, 8 * \n
+ .endr
+.endm
+
+.macro restore_gprs base
+ .irp n,1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
+ ld.d $r\n, \base, 8 * \n
+ .endr
+.endm
+
+/* address of general exception should be 4K aligned */
+.balign 4096
+.global handle_exception
+handle_exception:
+ csrwr sp, LOONGARCH_CSR_KS0
+ csrrd sp, LOONGARCH_CSR_KS1
+ addi.d sp, sp, -EXREGS_SIZE
+
+ save_gprs sp
+ /* save sp register to stack */
+ csrrd t0, LOONGARCH_CSR_KS0
+ st.d t0, sp, 3 * 8
+
+ csrrd t0, LOONGARCH_CSR_ERA
+ st.d t0, sp, PC_OFFSET_EXREGS
+ csrrd t0, LOONGARCH_CSR_ESTAT
+ st.d t0, sp, ESTAT_OFFSET_EXREGS
+ csrrd t0, LOONGARCH_CSR_BADV
+ st.d t0, sp, BADV_OFFSET_EXREGS
+ csrrd t0, LOONGARCH_CSR_PRMD
+ st.d t0, sp, PRMD_OFFSET_EXREGS
+
+ or a0, sp, zero
+ bl route_exception
+ ld.d t0, sp, PC_OFFSET_EXREGS
+ csrwr t0, LOONGARCH_CSR_ERA
+ ld.d t0, sp, PRMD_OFFSET_EXREGS
+ csrwr t0, LOONGARCH_CSR_PRMD
+ restore_gprs sp
+ csrrd sp, LOONGARCH_CSR_KS0
+ ertn
diff --git a/tools/testing/selftests/kvm/lib/loongarch/processor.c b/tools/testing/selftests/kvm/lib/loongarch/processor.c
new file mode 100644
index 000000000000..ee4ad3b1d2a4
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/loongarch/processor.c
@@ -0,0 +1,402 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <assert.h>
+#include <linux/compiler.h>
+
+#include <asm/kvm.h>
+#include "kvm_util.h"
+#include "pmu.h"
+#include "processor.h"
+#include "ucall_common.h"
+
+#define LOONGARCH_PAGE_TABLE_PHYS_MIN 0x200000
+#define LOONGARCH_GUEST_STACK_VADDR_MIN 0x200000
+
+static vm_paddr_t invalid_pgtable[4];
+static vm_vaddr_t exception_handlers;
+
+static uint64_t virt_pte_index(struct kvm_vm *vm, vm_vaddr_t gva, int level)
+{
+ unsigned int shift;
+ uint64_t mask;
+
+ shift = level * (vm->page_shift - 3) + vm->page_shift;
+ mask = (1UL << (vm->page_shift - 3)) - 1;
+ return (gva >> shift) & mask;
+}
+
+static uint64_t pte_addr(struct kvm_vm *vm, uint64_t entry)
+{
+ return entry & ~((0x1UL << vm->page_shift) - 1);
+}
+
+static uint64_t ptrs_per_pte(struct kvm_vm *vm)
+{
+ return 1 << (vm->page_shift - 3);
+}
+
+static void virt_set_pgtable(struct kvm_vm *vm, vm_paddr_t table, vm_paddr_t child)
+{
+ uint64_t *ptep;
+ int i, ptrs_per_pte;
+
+ ptep = addr_gpa2hva(vm, table);
+ ptrs_per_pte = 1 << (vm->page_shift - 3);
+ for (i = 0; i < ptrs_per_pte; i++)
+ WRITE_ONCE(*(ptep + i), child);
+}
+
+void virt_arch_pgd_alloc(struct kvm_vm *vm)
+{
+ int i;
+ vm_paddr_t child, table;
+
+ if (vm->mmu.pgd_created)
+ return;
+
+ child = table = 0;
+ for (i = 0; i < vm->mmu.pgtable_levels; i++) {
+ invalid_pgtable[i] = child;
+ table = vm_phy_page_alloc(vm, LOONGARCH_PAGE_TABLE_PHYS_MIN,
+ vm->memslots[MEM_REGION_PT]);
+ TEST_ASSERT(table, "Fail to allocate page tale at level %d\n", i);
+ virt_set_pgtable(vm, table, child);
+ child = table;
+ }
+ vm->mmu.pgd = table;
+ vm->mmu.pgd_created = true;
+}
+
+static int virt_pte_none(uint64_t *ptep, int level)
+{
+ return *ptep == invalid_pgtable[level];
+}
+
+static uint64_t *virt_populate_pte(struct kvm_vm *vm, vm_vaddr_t gva, int alloc)
+{
+ int level;
+ uint64_t *ptep;
+ vm_paddr_t child;
+
+ if (!vm->mmu.pgd_created)
+ goto unmapped_gva;
+
+ child = vm->mmu.pgd;
+ level = vm->mmu.pgtable_levels - 1;
+ while (level > 0) {
+ ptep = addr_gpa2hva(vm, child) + virt_pte_index(vm, gva, level) * 8;
+ if (virt_pte_none(ptep, level)) {
+ if (alloc) {
+ child = vm_alloc_page_table(vm);
+ virt_set_pgtable(vm, child, invalid_pgtable[level - 1]);
+ WRITE_ONCE(*ptep, child);
+ } else
+ goto unmapped_gva;
+
+ } else
+ child = pte_addr(vm, *ptep);
+ level--;
+ }
+
+ ptep = addr_gpa2hva(vm, child) + virt_pte_index(vm, gva, level) * 8;
+ return ptep;
+
+unmapped_gva:
+ TEST_FAIL("No mapping for vm virtual address, gva: 0x%lx", gva);
+ exit(EXIT_FAILURE);
+}
+
+vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+ uint64_t *ptep;
+
+ ptep = virt_populate_pte(vm, gva, 0);
+ TEST_ASSERT(*ptep != 0, "Virtual address vaddr: 0x%lx not mapped\n", gva);
+
+ return pte_addr(vm, *ptep) + (gva & (vm->page_size - 1));
+}
+
+void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
+{
+ uint32_t prot_bits;
+ uint64_t *ptep;
+
+ TEST_ASSERT((vaddr % vm->page_size) == 0,
+ "Virtual address not on page boundary,\n"
+ "vaddr: 0x%lx vm->page_size: 0x%x", vaddr, vm->page_size);
+ TEST_ASSERT(sparsebit_is_set(vm->vpages_valid,
+ (vaddr >> vm->page_shift)),
+ "Invalid virtual address, vaddr: 0x%lx", vaddr);
+ TEST_ASSERT((paddr % vm->page_size) == 0,
+ "Physical address not on page boundary,\n"
+ "paddr: 0x%lx vm->page_size: 0x%x", paddr, vm->page_size);
+ TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
+ "Physical address beyond maximum supported,\n"
+ "paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
+ paddr, vm->max_gfn, vm->page_size);
+
+ ptep = virt_populate_pte(vm, vaddr, 1);
+ prot_bits = _PAGE_PRESENT | __READABLE | __WRITEABLE | _CACHE_CC | _PAGE_USER;
+ WRITE_ONCE(*ptep, paddr | prot_bits);
+}
+
+static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, uint64_t page, int level)
+{
+ uint64_t pte, *ptep;
+ static const char * const type[] = { "pte", "pmd", "pud", "pgd"};
+
+ if (level < 0)
+ return;
+
+ for (pte = page; pte < page + ptrs_per_pte(vm) * 8; pte += 8) {
+ ptep = addr_gpa2hva(vm, pte);
+ if (virt_pte_none(ptep, level))
+ continue;
+ fprintf(stream, "%*s%s: %lx: %lx at %p\n",
+ indent, "", type[level], pte, *ptep, ptep);
+ pte_dump(stream, vm, indent + 1, pte_addr(vm, *ptep), level--);
+ }
+}
+
+void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
+{
+ int level;
+
+ if (!vm->mmu.pgd_created)
+ return;
+
+ level = vm->mmu.pgtable_levels - 1;
+ pte_dump(stream, vm, indent, vm->mmu.pgd, level);
+}
+
+void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent)
+{
+}
+
+void assert_on_unhandled_exception(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ if (get_ucall(vcpu, &uc) != UCALL_UNHANDLED)
+ return;
+
+ TEST_FAIL("Unexpected exception (pc:0x%lx, estat:0x%lx, badv:0x%lx)",
+ uc.args[0], uc.args[1], uc.args[2]);
+}
+
+void route_exception(struct ex_regs *regs)
+{
+ int vector;
+ unsigned long pc, estat, badv;
+ struct handlers *handlers;
+
+ handlers = (struct handlers *)exception_handlers;
+ vector = (regs->estat & CSR_ESTAT_EXC) >> CSR_ESTAT_EXC_SHIFT;
+ if (handlers && handlers->exception_handlers[vector])
+ return handlers->exception_handlers[vector](regs);
+
+ pc = regs->pc;
+ badv = regs->badv;
+ estat = regs->estat;
+ ucall(UCALL_UNHANDLED, 3, pc, estat, badv);
+ while (1) ;
+}
+
+void vm_init_descriptor_tables(struct kvm_vm *vm)
+{
+ void *addr;
+
+ vm->handlers = __vm_vaddr_alloc(vm, sizeof(struct handlers),
+ LOONGARCH_GUEST_STACK_VADDR_MIN, MEM_REGION_DATA);
+
+ addr = addr_gva2hva(vm, vm->handlers);
+ memset(addr, 0, vm->page_size);
+ exception_handlers = vm->handlers;
+ sync_global_to_guest(vm, exception_handlers);
+}
+
+void vm_install_exception_handler(struct kvm_vm *vm, int vector, handler_fn handler)
+{
+ struct handlers *handlers = addr_gva2hva(vm, vm->handlers);
+
+ assert(vector < VECTOR_NUM);
+ handlers->exception_handlers[vector] = handler;
+}
+
+uint32_t guest_get_vcpuid(void)
+{
+ return csr_read(LOONGARCH_CSR_CPUID);
+}
+
+void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...)
+{
+ int i;
+ va_list ap;
+ struct kvm_regs regs;
+
+ TEST_ASSERT(num >= 1 && num <= 8, "Unsupported number of args,\n"
+ "num: %u\n", num);
+
+ vcpu_regs_get(vcpu, &regs);
+
+ va_start(ap, num);
+ for (i = 0; i < num; i++)
+ regs.gpr[i + 4] = va_arg(ap, uint64_t);
+ va_end(ap);
+
+ vcpu_regs_set(vcpu, &regs);
+}
+
+static void loongarch_set_reg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t val)
+{
+ __vcpu_set_reg(vcpu, id, val);
+}
+
+static void loongarch_set_cpucfg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t val)
+{
+ uint64_t cfgid;
+
+ cfgid = KVM_REG_LOONGARCH_CPUCFG | KVM_REG_SIZE_U64 | 8 * id;
+ __vcpu_set_reg(vcpu, cfgid, val);
+}
+
+static void loongarch_get_csr(struct kvm_vcpu *vcpu, uint64_t id, void *addr)
+{
+ uint64_t csrid;
+
+ csrid = KVM_REG_LOONGARCH_CSR | KVM_REG_SIZE_U64 | 8 * id;
+ __vcpu_get_reg(vcpu, csrid, addr);
+}
+
+static void loongarch_set_csr(struct kvm_vcpu *vcpu, uint64_t id, uint64_t val)
+{
+ uint64_t csrid;
+
+ csrid = KVM_REG_LOONGARCH_CSR | KVM_REG_SIZE_U64 | 8 * id;
+ __vcpu_set_reg(vcpu, csrid, val);
+}
+
+void loongarch_vcpu_setup(struct kvm_vcpu *vcpu)
+{
+ int width;
+ unsigned int cfg;
+ unsigned long val;
+ struct kvm_vm *vm = vcpu->vm;
+
+ switch (vm->mode) {
+ case VM_MODE_P36V47_16K:
+ case VM_MODE_P47V47_16K:
+ break;
+
+ default:
+ TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode);
+ }
+
+ cfg = read_cpucfg(LOONGARCH_CPUCFG6);
+ loongarch_set_cpucfg(vcpu, LOONGARCH_CPUCFG6, cfg);
+
+ /* kernel mode and page enable mode */
+ val = PLV_KERN | CSR_CRMD_PG;
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_CRMD, val);
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_PRMD, val);
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_EUEN, 1);
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_ECFG, 0);
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_TCFG, 0);
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_ASID, 1);
+
+ /* time count start from 0 */
+ val = 0;
+ loongarch_set_reg(vcpu, KVM_REG_LOONGARCH_COUNTER, val);
+
+ width = vm->page_shift - 3;
+
+ switch (vm->mmu.pgtable_levels) {
+ case 4:
+ /* pud page shift and width */
+ val = (vm->page_shift + width * 2) << 20 | (width << 25);
+ /* fall throuth */
+ case 3:
+ /* pmd page shift and width */
+ val |= (vm->page_shift + width) << 10 | (width << 15);
+ /* pte page shift and width */
+ val |= vm->page_shift | width << 5;
+ break;
+ default:
+ TEST_FAIL("Got %u page table levels, expected 3 or 4", vm->mmu.pgtable_levels);
+ }
+
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_PWCTL0, val);
+
+ /* PGD page shift and width */
+ val = (vm->page_shift + width * (vm->mmu.pgtable_levels - 1)) | width << 6;
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_PWCTL1, val);
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_PGDL, vm->mmu.pgd);
+
+ /*
+ * Refill exception runs on real mode
+ * Entry address should be physical address
+ */
+ val = addr_gva2gpa(vm, (unsigned long)handle_tlb_refill);
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_TLBRENTRY, val);
+
+ /*
+ * General exception runs on page-enabled mode
+ * Entry address should be virtual address
+ */
+ val = (unsigned long)handle_exception;
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_EENTRY, val);
+
+ loongarch_get_csr(vcpu, LOONGARCH_CSR_TLBIDX, &val);
+ val &= ~CSR_TLBIDX_SIZEM;
+ val |= PS_DEFAULT_SIZE << CSR_TLBIDX_SIZE;
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_TLBIDX, val);
+
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_STLBPGSIZE, PS_DEFAULT_SIZE);
+
+ /* LOONGARCH_CSR_KS1 is used for exception stack */
+ val = __vm_vaddr_alloc(vm, vm->page_size,
+ LOONGARCH_GUEST_STACK_VADDR_MIN, MEM_REGION_DATA);
+ TEST_ASSERT(val != 0, "No memory for exception stack");
+ val = val + vm->page_size;
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_KS1, val);
+
+ loongarch_get_csr(vcpu, LOONGARCH_CSR_TLBREHI, &val);
+ val &= ~CSR_TLBREHI_PS;
+ val |= PS_DEFAULT_SIZE << CSR_TLBREHI_PS_SHIFT;
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_TLBREHI, val);
+
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_CPUID, vcpu->id);
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_TMID, vcpu->id);
+}
+
+struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
+{
+ size_t stack_size;
+ uint64_t stack_vaddr;
+ struct kvm_regs regs;
+ struct kvm_vcpu *vcpu;
+
+ vcpu = __vm_vcpu_add(vm, vcpu_id);
+ stack_size = vm->page_size;
+ stack_vaddr = __vm_vaddr_alloc(vm, stack_size,
+ LOONGARCH_GUEST_STACK_VADDR_MIN, MEM_REGION_DATA);
+ TEST_ASSERT(stack_vaddr != 0, "No memory for vm stack");
+
+ loongarch_vcpu_setup(vcpu);
+ /* Setup guest general purpose registers */
+ vcpu_regs_get(vcpu, &regs);
+ regs.gpr[3] = stack_vaddr + stack_size;
+ vcpu_regs_set(vcpu, &regs);
+
+ return vcpu;
+}
+
+void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code)
+{
+ struct kvm_regs regs;
+
+ /* Setup guest PC register */
+ vcpu_regs_get(vcpu, &regs);
+ regs.pc = (uint64_t)guest_code;
+ vcpu_regs_set(vcpu, &regs);
+}
diff --git a/tools/testing/selftests/kvm/lib/loongarch/ucall.c b/tools/testing/selftests/kvm/lib/loongarch/ucall.c
new file mode 100644
index 000000000000..fc6cbb50573f
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/loongarch/ucall.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ucall support. A ucall is a "hypercall to userspace".
+ *
+ */
+#include "kvm_util.h"
+
+/*
+ * ucall_exit_mmio_addr holds per-VM values (global data is duplicated by each
+ * VM), it must not be accessed from host code.
+ */
+vm_vaddr_t *ucall_exit_mmio_addr;
+
+void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
+{
+ vm_vaddr_t mmio_gva = vm_vaddr_unused_gap(vm, vm->page_size, KVM_UTIL_MIN_VADDR);
+
+ virt_map(vm, mmio_gva, mmio_gpa, 1);
+
+ vm->ucall_mmio_addr = mmio_gpa;
+
+ write_guest_global(vm, ucall_exit_mmio_addr, (vm_vaddr_t *)mmio_gva);
+}
+
+void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
+{
+ struct kvm_run *run = vcpu->run;
+
+ if (run->exit_reason == KVM_EXIT_MMIO &&
+ run->mmio.phys_addr == vcpu->vm->ucall_mmio_addr) {
+ TEST_ASSERT(run->mmio.is_write && run->mmio.len == sizeof(uint64_t),
+ "Unexpected ucall exit mmio address access");
+
+ return (void *)(*((uint64_t *)run->mmio.data));
+ }
+
+ return NULL;
+}
diff --git a/tools/testing/selftests/kvm/lib/lru_gen_util.c b/tools/testing/selftests/kvm/lib/lru_gen_util.c
new file mode 100644
index 000000000000..46a14fd63d9e
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/lru_gen_util.c
@@ -0,0 +1,387 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2025, Google LLC.
+ */
+
+#include <time.h>
+
+#include "lru_gen_util.h"
+
+/*
+ * Tracks state while we parse memcg lru_gen stats. The file we're parsing is
+ * structured like this (some extra whitespace elided):
+ *
+ * memcg (id) (path)
+ * node (id)
+ * (gen_nr) (age_in_ms) (nr_anon_pages) (nr_file_pages)
+ */
+struct memcg_stats_parse_context {
+ bool consumed; /* Whether or not this line was consumed */
+ /* Next parse handler to invoke */
+ void (*next_handler)(struct memcg_stats *stats,
+ struct memcg_stats_parse_context *ctx,
+ char *line);
+ int current_node_idx; /* Current index in nodes array */
+ const char *name; /* The name of the memcg we're looking for */
+};
+
+static void memcg_stats_handle_searching(struct memcg_stats *stats,
+ struct memcg_stats_parse_context *ctx,
+ char *line);
+static void memcg_stats_handle_in_memcg(struct memcg_stats *stats,
+ struct memcg_stats_parse_context *ctx,
+ char *line);
+static void memcg_stats_handle_in_node(struct memcg_stats *stats,
+ struct memcg_stats_parse_context *ctx,
+ char *line);
+
+struct split_iterator {
+ char *str;
+ char *save;
+};
+
+static char *split_next(struct split_iterator *it)
+{
+ char *ret = strtok_r(it->str, " \t\n\r", &it->save);
+
+ it->str = NULL;
+ return ret;
+}
+
+static void memcg_stats_handle_searching(struct memcg_stats *stats,
+ struct memcg_stats_parse_context *ctx,
+ char *line)
+{
+ struct split_iterator it = { .str = line };
+ char *prefix = split_next(&it);
+ char *memcg_id = split_next(&it);
+ char *memcg_name = split_next(&it);
+ char *end;
+
+ ctx->consumed = true;
+
+ if (!prefix || strcmp("memcg", prefix))
+ return; /* Not a memcg line (maybe empty), skip */
+
+ TEST_ASSERT(memcg_id && memcg_name,
+ "malformed memcg line; no memcg id or memcg_name");
+
+ if (strcmp(memcg_name + 1, ctx->name))
+ return; /* Wrong memcg, skip */
+
+ /* Found it! */
+
+ stats->memcg_id = strtoul(memcg_id, &end, 10);
+ TEST_ASSERT(*end == '\0', "malformed memcg id '%s'", memcg_id);
+ if (!stats->memcg_id)
+ return; /* Removed memcg? */
+
+ ctx->next_handler = memcg_stats_handle_in_memcg;
+}
+
+static void memcg_stats_handle_in_memcg(struct memcg_stats *stats,
+ struct memcg_stats_parse_context *ctx,
+ char *line)
+{
+ struct split_iterator it = { .str = line };
+ char *prefix = split_next(&it);
+ char *id = split_next(&it);
+ long found_node_id;
+ char *end;
+
+ ctx->consumed = true;
+ ctx->current_node_idx = -1;
+
+ if (!prefix)
+ return; /* Skip empty lines */
+
+ if (!strcmp("memcg", prefix)) {
+ /* Memcg done, found next one; stop. */
+ ctx->next_handler = NULL;
+ return;
+ } else if (strcmp("node", prefix))
+ TEST_ASSERT(false, "found malformed line after 'memcg ...',"
+ "token: '%s'", prefix);
+
+ /* At this point we know we have a node line. Parse the ID. */
+
+ TEST_ASSERT(id, "malformed node line; no node id");
+
+ found_node_id = strtol(id, &end, 10);
+ TEST_ASSERT(*end == '\0', "malformed node id '%s'", id);
+
+ ctx->current_node_idx = stats->nr_nodes++;
+ TEST_ASSERT(ctx->current_node_idx < MAX_NR_NODES,
+ "memcg has stats for too many nodes, max is %d",
+ MAX_NR_NODES);
+ stats->nodes[ctx->current_node_idx].node = found_node_id;
+
+ ctx->next_handler = memcg_stats_handle_in_node;
+}
+
+static void memcg_stats_handle_in_node(struct memcg_stats *stats,
+ struct memcg_stats_parse_context *ctx,
+ char *line)
+{
+ char *my_line = strdup(line);
+ struct split_iterator it = { .str = my_line };
+ char *gen, *age, *nr_anon, *nr_file;
+ struct node_stats *node_stats;
+ struct generation_stats *gen_stats;
+ char *end;
+
+ TEST_ASSERT(it.str, "failed to copy input line");
+
+ gen = split_next(&it);
+
+ if (!gen)
+ goto out_consume; /* Skip empty lines */
+
+ if (!strcmp("memcg", gen) || !strcmp("node", gen)) {
+ /*
+ * Reached next memcg or node section. Don't consume, let the
+ * other handler deal with this.
+ */
+ ctx->next_handler = memcg_stats_handle_in_memcg;
+ goto out;
+ }
+
+ node_stats = &stats->nodes[ctx->current_node_idx];
+ TEST_ASSERT(node_stats->nr_gens < MAX_NR_GENS,
+ "found too many generation lines; max is %d",
+ MAX_NR_GENS);
+ gen_stats = &node_stats->gens[node_stats->nr_gens++];
+
+ age = split_next(&it);
+ nr_anon = split_next(&it);
+ nr_file = split_next(&it);
+
+ TEST_ASSERT(age && nr_anon && nr_file,
+ "malformed generation line; not enough tokens");
+
+ gen_stats->gen = (int)strtol(gen, &end, 10);
+ TEST_ASSERT(*end == '\0', "malformed generation number '%s'", gen);
+
+ gen_stats->age_ms = strtol(age, &end, 10);
+ TEST_ASSERT(*end == '\0', "malformed generation age '%s'", age);
+
+ gen_stats->nr_anon = strtol(nr_anon, &end, 10);
+ TEST_ASSERT(*end == '\0', "malformed anonymous page count '%s'",
+ nr_anon);
+
+ gen_stats->nr_file = strtol(nr_file, &end, 10);
+ TEST_ASSERT(*end == '\0', "malformed file page count '%s'", nr_file);
+
+out_consume:
+ ctx->consumed = true;
+out:
+ free(my_line);
+}
+
+static void print_memcg_stats(const struct memcg_stats *stats, const char *name)
+{
+ int node, gen;
+
+ pr_debug("stats for memcg %s (id %lu):\n", name, stats->memcg_id);
+ for (node = 0; node < stats->nr_nodes; ++node) {
+ pr_debug("\tnode %d\n", stats->nodes[node].node);
+ for (gen = 0; gen < stats->nodes[node].nr_gens; ++gen) {
+ const struct generation_stats *gstats =
+ &stats->nodes[node].gens[gen];
+
+ pr_debug("\t\tgen %d\tage_ms %ld"
+ "\tnr_anon %ld\tnr_file %ld\n",
+ gstats->gen, gstats->age_ms, gstats->nr_anon,
+ gstats->nr_file);
+ }
+ }
+}
+
+/* Re-read lru_gen debugfs information for @memcg into @stats. */
+void lru_gen_read_memcg_stats(struct memcg_stats *stats, const char *memcg)
+{
+ FILE *f;
+ ssize_t read = 0;
+ char *line = NULL;
+ size_t bufsz;
+ struct memcg_stats_parse_context ctx = {
+ .next_handler = memcg_stats_handle_searching,
+ .name = memcg,
+ };
+
+ memset(stats, 0, sizeof(struct memcg_stats));
+
+ f = fopen(LRU_GEN_DEBUGFS, "r");
+ TEST_ASSERT(f, "fopen(%s) failed", LRU_GEN_DEBUGFS);
+
+ while (ctx.next_handler && (read = getline(&line, &bufsz, f)) > 0) {
+ ctx.consumed = false;
+
+ do {
+ ctx.next_handler(stats, &ctx, line);
+ if (!ctx.next_handler)
+ break;
+ } while (!ctx.consumed);
+ }
+
+ if (read < 0 && !feof(f))
+ TEST_ASSERT(false, "getline(%s) failed", LRU_GEN_DEBUGFS);
+
+ TEST_ASSERT(stats->memcg_id > 0, "Couldn't find memcg: %s\n"
+ "Did the memcg get created in the proper mount?",
+ memcg);
+ if (line)
+ free(line);
+ TEST_ASSERT(!fclose(f), "fclose(%s) failed", LRU_GEN_DEBUGFS);
+
+ print_memcg_stats(stats, memcg);
+}
+
+/*
+ * Find all pages tracked by lru_gen for this memcg in generation @target_gen.
+ *
+ * If @target_gen is negative, look for all generations.
+ */
+long lru_gen_sum_memcg_stats_for_gen(int target_gen,
+ const struct memcg_stats *stats)
+{
+ int node, gen;
+ long total_nr = 0;
+
+ for (node = 0; node < stats->nr_nodes; ++node) {
+ const struct node_stats *node_stats = &stats->nodes[node];
+
+ for (gen = 0; gen < node_stats->nr_gens; ++gen) {
+ const struct generation_stats *gen_stats =
+ &node_stats->gens[gen];
+
+ if (target_gen >= 0 && gen_stats->gen != target_gen)
+ continue;
+
+ total_nr += gen_stats->nr_anon + gen_stats->nr_file;
+ }
+ }
+
+ return total_nr;
+}
+
+/* Find all pages tracked by lru_gen for this memcg. */
+long lru_gen_sum_memcg_stats(const struct memcg_stats *stats)
+{
+ return lru_gen_sum_memcg_stats_for_gen(-1, stats);
+}
+
+/*
+ * If lru_gen aging should force page table scanning.
+ *
+ * If you want to set this to false, you will need to do eviction
+ * before doing extra aging passes.
+ */
+static const bool force_scan = true;
+
+static void run_aging_impl(unsigned long memcg_id, int node_id, int max_gen)
+{
+ FILE *f = fopen(LRU_GEN_DEBUGFS, "w");
+ char *command;
+ size_t sz;
+
+ TEST_ASSERT(f, "fopen(%s) failed", LRU_GEN_DEBUGFS);
+ sz = asprintf(&command, "+ %lu %d %d 1 %d\n",
+ memcg_id, node_id, max_gen, force_scan);
+ TEST_ASSERT(sz > 0, "creating aging command failed");
+
+ pr_debug("Running aging command: %s", command);
+ if (fwrite(command, sizeof(char), sz, f) < sz) {
+ TEST_ASSERT(false, "writing aging command %s to %s failed",
+ command, LRU_GEN_DEBUGFS);
+ }
+
+ TEST_ASSERT(!fclose(f), "fclose(%s) failed", LRU_GEN_DEBUGFS);
+}
+
+void lru_gen_do_aging(struct memcg_stats *stats, const char *memcg)
+{
+ int node, gen;
+
+ pr_debug("lru_gen: invoking aging...\n");
+
+ /* Must read memcg stats to construct the proper aging command. */
+ lru_gen_read_memcg_stats(stats, memcg);
+
+ for (node = 0; node < stats->nr_nodes; ++node) {
+ int max_gen = 0;
+
+ for (gen = 0; gen < stats->nodes[node].nr_gens; ++gen) {
+ int this_gen = stats->nodes[node].gens[gen].gen;
+
+ max_gen = max_gen > this_gen ? max_gen : this_gen;
+ }
+
+ run_aging_impl(stats->memcg_id, stats->nodes[node].node,
+ max_gen);
+ }
+
+ /* Re-read so callers get updated information */
+ lru_gen_read_memcg_stats(stats, memcg);
+}
+
+/*
+ * Find which generation contains at least @pages pages, assuming that
+ * such a generation exists.
+ */
+int lru_gen_find_generation(const struct memcg_stats *stats,
+ unsigned long pages)
+{
+ int node, gen, gen_idx, min_gen = INT_MAX, max_gen = -1;
+
+ for (node = 0; node < stats->nr_nodes; ++node)
+ for (gen_idx = 0; gen_idx < stats->nodes[node].nr_gens;
+ ++gen_idx) {
+ gen = stats->nodes[node].gens[gen_idx].gen;
+ max_gen = gen > max_gen ? gen : max_gen;
+ min_gen = gen < min_gen ? gen : min_gen;
+ }
+
+ for (gen = min_gen; gen <= max_gen; ++gen)
+ /* See if this generation has enough pages. */
+ if (lru_gen_sum_memcg_stats_for_gen(gen, stats) > pages)
+ return gen;
+
+ return -1;
+}
+
+bool lru_gen_usable(void)
+{
+ long required_features = LRU_GEN_ENABLED | LRU_GEN_MM_WALK;
+ int lru_gen_fd, lru_gen_debug_fd;
+ char mglru_feature_str[8] = {};
+ long mglru_features;
+
+ lru_gen_fd = open(LRU_GEN_ENABLED_PATH, O_RDONLY);
+ if (lru_gen_fd < 0) {
+ puts("lru_gen: Could not open " LRU_GEN_ENABLED_PATH);
+ return false;
+ }
+ if (read(lru_gen_fd, &mglru_feature_str, 7) < 7) {
+ puts("lru_gen: Could not read from " LRU_GEN_ENABLED_PATH);
+ close(lru_gen_fd);
+ return false;
+ }
+ close(lru_gen_fd);
+
+ mglru_features = strtol(mglru_feature_str, NULL, 16);
+ if ((mglru_features & required_features) != required_features) {
+ printf("lru_gen: missing features, got: 0x%lx, expected: 0x%lx\n",
+ mglru_features, required_features);
+ printf("lru_gen: Try 'echo 0x%lx > /sys/kernel/mm/lru_gen/enabled'\n",
+ required_features);
+ return false;
+ }
+
+ lru_gen_debug_fd = open(LRU_GEN_DEBUGFS, O_RDWR);
+ __TEST_REQUIRE(lru_gen_debug_fd >= 0,
+ "lru_gen: Could not open " LRU_GEN_DEBUGFS ", "
+ "but lru_gen is enabled, so cannot use page_idle.");
+ close(lru_gen_debug_fd);
+ return true;
+}
diff --git a/tools/testing/selftests/kvm/lib/memstress.c b/tools/testing/selftests/kvm/lib/memstress.c
index 313277486a1d..1ea735d66e15 100644
--- a/tools/testing/selftests/kvm/lib/memstress.c
+++ b/tools/testing/selftests/kvm/lib/memstress.c
@@ -196,10 +196,6 @@ struct kvm_vm *memstress_create_vm(enum vm_guest_mode mode, int nr_vcpus,
args->gpa = (region_end_gfn - guest_num_pages - 1) * args->guest_page_size;
args->gpa = align_down(args->gpa, backing_src_pagesz);
-#ifdef __s390x__
- /* Align to 1M (segment size) */
- args->gpa = align_down(args->gpa, 1 << 20);
-#endif
args->size = guest_num_pages * args->guest_page_size;
pr_info("guest physical test memory: [0x%lx, 0x%lx)\n",
args->gpa, args->gpa + args->size);
@@ -265,7 +261,7 @@ static void *vcpu_thread_main(void *data)
int vcpu_idx = vcpu->vcpu_idx;
if (memstress_args.pin_vcpus)
- kvm_pin_this_task_to_pcpu(memstress_args.vcpu_to_pcpu[vcpu_idx]);
+ pin_self_to_cpu(memstress_args.vcpu_to_pcpu[vcpu_idx]);
WRITE_ONCE(vcpu->running, true);
diff --git a/tools/testing/selftests/kvm/lib/riscv/handlers.S b/tools/testing/selftests/kvm/lib/riscv/handlers.S
index aa0abd3f35bb..b787b982e922 100644
--- a/tools/testing/selftests/kvm/lib/riscv/handlers.S
+++ b/tools/testing/selftests/kvm/lib/riscv/handlers.S
@@ -10,85 +10,88 @@
#include <asm/csr.h>
.macro save_context
- addi sp, sp, (-8*34)
- sd x1, 0(sp)
- sd x2, 8(sp)
- sd x3, 16(sp)
- sd x4, 24(sp)
- sd x5, 32(sp)
- sd x6, 40(sp)
- sd x7, 48(sp)
- sd x8, 56(sp)
- sd x9, 64(sp)
- sd x10, 72(sp)
- sd x11, 80(sp)
- sd x12, 88(sp)
- sd x13, 96(sp)
- sd x14, 104(sp)
- sd x15, 112(sp)
- sd x16, 120(sp)
- sd x17, 128(sp)
- sd x18, 136(sp)
- sd x19, 144(sp)
- sd x20, 152(sp)
- sd x21, 160(sp)
- sd x22, 168(sp)
- sd x23, 176(sp)
- sd x24, 184(sp)
- sd x25, 192(sp)
- sd x26, 200(sp)
- sd x27, 208(sp)
- sd x28, 216(sp)
- sd x29, 224(sp)
- sd x30, 232(sp)
- sd x31, 240(sp)
+ addi sp, sp, (-8*36)
+ sd x1, 8(sp)
+ sd x2, 16(sp)
+ sd x3, 24(sp)
+ sd x4, 32(sp)
+ sd x5, 40(sp)
+ sd x6, 48(sp)
+ sd x7, 56(sp)
+ sd x8, 64(sp)
+ sd x9, 72(sp)
+ sd x10, 80(sp)
+ sd x11, 88(sp)
+ sd x12, 96(sp)
+ sd x13, 104(sp)
+ sd x14, 112(sp)
+ sd x15, 120(sp)
+ sd x16, 128(sp)
+ sd x17, 136(sp)
+ sd x18, 144(sp)
+ sd x19, 152(sp)
+ sd x20, 160(sp)
+ sd x21, 168(sp)
+ sd x22, 176(sp)
+ sd x23, 184(sp)
+ sd x24, 192(sp)
+ sd x25, 200(sp)
+ sd x26, 208(sp)
+ sd x27, 216(sp)
+ sd x28, 224(sp)
+ sd x29, 232(sp)
+ sd x30, 240(sp)
+ sd x31, 248(sp)
csrr s0, CSR_SEPC
csrr s1, CSR_SSTATUS
- csrr s2, CSR_SCAUSE
- sd s0, 248(sp)
+ csrr s2, CSR_STVAL
+ csrr s3, CSR_SCAUSE
+ sd s0, 0(sp)
sd s1, 256(sp)
sd s2, 264(sp)
+ sd s3, 272(sp)
.endm
.macro restore_context
+ ld s3, 272(sp)
ld s2, 264(sp)
ld s1, 256(sp)
- ld s0, 248(sp)
- csrw CSR_SCAUSE, s2
+ ld s0, 0(sp)
+ csrw CSR_SCAUSE, s3
csrw CSR_SSTATUS, s1
csrw CSR_SEPC, s0
- ld x31, 240(sp)
- ld x30, 232(sp)
- ld x29, 224(sp)
- ld x28, 216(sp)
- ld x27, 208(sp)
- ld x26, 200(sp)
- ld x25, 192(sp)
- ld x24, 184(sp)
- ld x23, 176(sp)
- ld x22, 168(sp)
- ld x21, 160(sp)
- ld x20, 152(sp)
- ld x19, 144(sp)
- ld x18, 136(sp)
- ld x17, 128(sp)
- ld x16, 120(sp)
- ld x15, 112(sp)
- ld x14, 104(sp)
- ld x13, 96(sp)
- ld x12, 88(sp)
- ld x11, 80(sp)
- ld x10, 72(sp)
- ld x9, 64(sp)
- ld x8, 56(sp)
- ld x7, 48(sp)
- ld x6, 40(sp)
- ld x5, 32(sp)
- ld x4, 24(sp)
- ld x3, 16(sp)
- ld x2, 8(sp)
- ld x1, 0(sp)
- addi sp, sp, (8*34)
+ ld x31, 248(sp)
+ ld x30, 240(sp)
+ ld x29, 232(sp)
+ ld x28, 224(sp)
+ ld x27, 216(sp)
+ ld x26, 208(sp)
+ ld x25, 200(sp)
+ ld x24, 192(sp)
+ ld x23, 184(sp)
+ ld x22, 176(sp)
+ ld x21, 168(sp)
+ ld x20, 160(sp)
+ ld x19, 152(sp)
+ ld x18, 144(sp)
+ ld x17, 136(sp)
+ ld x16, 128(sp)
+ ld x15, 120(sp)
+ ld x14, 112(sp)
+ ld x13, 104(sp)
+ ld x12, 96(sp)
+ ld x11, 88(sp)
+ ld x10, 80(sp)
+ ld x9, 72(sp)
+ ld x8, 64(sp)
+ ld x7, 56(sp)
+ ld x6, 48(sp)
+ ld x5, 40(sp)
+ ld x4, 32(sp)
+ ld x3, 24(sp)
+ ld x2, 16(sp)
+ ld x1, 8(sp)
+ addi sp, sp, (8*36)
.endm
.balign 4
diff --git a/tools/testing/selftests/kvm/lib/riscv/processor.c b/tools/testing/selftests/kvm/lib/riscv/processor.c
index dd663bcf0cc0..067c6b2c15b0 100644
--- a/tools/testing/selftests/kvm/lib/riscv/processor.c
+++ b/tools/testing/selftests/kvm/lib/riscv/processor.c
@@ -8,6 +8,7 @@
#include <linux/compiler.h>
#include <assert.h>
+#include "guest_modes.h"
#include "kvm_util.h"
#include "processor.h"
#include "ucall_common.h"
@@ -26,11 +27,6 @@ bool __vcpu_has_ext(struct kvm_vcpu *vcpu, uint64_t ext)
return !ret && !!value;
}
-static uint64_t page_align(struct kvm_vm *vm, uint64_t v)
-{
- return (v + vm->page_size) & ~(vm->page_size - 1);
-}
-
static uint64_t pte_addr(struct kvm_vm *vm, uint64_t entry)
{
return ((entry & PGTBL_PTE_ADDR_MASK) >> PGTBL_PTE_ADDR_SHIFT) <<
@@ -60,7 +56,7 @@ static uint64_t pte_index(struct kvm_vm *vm, vm_vaddr_t gva, int level)
{
TEST_ASSERT(level > -1,
"Negative page table level (%d) not possible", level);
- TEST_ASSERT(level < vm->pgtable_levels,
+ TEST_ASSERT(level < vm->mmu.pgtable_levels,
"Invalid page table level (%d)", level);
return (gva & pte_index_mask[level]) >> pte_index_shift[level];
@@ -68,21 +64,21 @@ static uint64_t pte_index(struct kvm_vm *vm, vm_vaddr_t gva, int level)
void virt_arch_pgd_alloc(struct kvm_vm *vm)
{
- size_t nr_pages = page_align(vm, ptrs_per_pte(vm) * 8) / vm->page_size;
+ size_t nr_pages = vm_page_align(vm, ptrs_per_pte(vm) * 8) / vm->page_size;
- if (vm->pgd_created)
+ if (vm->mmu.pgd_created)
return;
- vm->pgd = vm_phy_pages_alloc(vm, nr_pages,
- KVM_GUEST_PAGE_TABLE_MIN_PADDR,
- vm->memslots[MEM_REGION_PT]);
- vm->pgd_created = true;
+ vm->mmu.pgd = vm_phy_pages_alloc(vm, nr_pages,
+ KVM_GUEST_PAGE_TABLE_MIN_PADDR,
+ vm->memslots[MEM_REGION_PT]);
+ vm->mmu.pgd_created = true;
}
void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
{
uint64_t *ptep, next_ppn;
- int level = vm->pgtable_levels - 1;
+ int level = vm->mmu.pgtable_levels - 1;
TEST_ASSERT((vaddr % vm->page_size) == 0,
"Virtual address not on page boundary,\n"
@@ -98,7 +94,7 @@ void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
" paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
paddr, vm->max_gfn, vm->page_size);
- ptep = addr_gpa2hva(vm, vm->pgd) + pte_index(vm, vaddr, level) * 8;
+ ptep = addr_gpa2hva(vm, vm->mmu.pgd) + pte_index(vm, vaddr, level) * 8;
if (!*ptep) {
next_ppn = vm_alloc_page_table(vm) >> PGTBL_PAGE_SIZE_SHIFT;
*ptep = (next_ppn << PGTBL_PTE_ADDR_SHIFT) |
@@ -126,12 +122,12 @@ void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
{
uint64_t *ptep;
- int level = vm->pgtable_levels - 1;
+ int level = vm->mmu.pgtable_levels - 1;
- if (!vm->pgd_created)
+ if (!vm->mmu.pgd_created)
goto unmapped_gva;
- ptep = addr_gpa2hva(vm, vm->pgd) + pte_index(vm, gva, level) * 8;
+ ptep = addr_gpa2hva(vm, vm->mmu.pgd) + pte_index(vm, gva, level) * 8;
if (!ptep)
goto unmapped_gva;
level--;
@@ -176,13 +172,14 @@ static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent,
void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
{
- int level = vm->pgtable_levels - 1;
+ struct kvm_mmu *mmu = &vm->mmu;
+ int level = mmu->pgtable_levels - 1;
uint64_t pgd, *ptep;
- if (!vm->pgd_created)
+ if (!mmu->pgd_created)
return;
- for (pgd = vm->pgd; pgd < vm->pgd + ptrs_per_pte(vm) * 8; pgd += 8) {
+ for (pgd = mmu->pgd; pgd < mmu->pgd + ptrs_per_pte(vm) * 8; pgd += 8) {
ptep = addr_gpa2hva(vm, pgd);
if (!*ptep)
continue;
@@ -197,22 +194,41 @@ void riscv_vcpu_mmu_setup(struct kvm_vcpu *vcpu)
{
struct kvm_vm *vm = vcpu->vm;
unsigned long satp;
+ unsigned long satp_mode;
+ unsigned long max_satp_mode;
/*
* The RISC-V Sv48 MMU mode supports 56-bit physical address
* for 48-bit virtual address with 4KB last level page size.
*/
switch (vm->mode) {
- case VM_MODE_P52V48_4K:
- case VM_MODE_P48V48_4K:
- case VM_MODE_P40V48_4K:
+ case VM_MODE_P56V57_4K:
+ case VM_MODE_P50V57_4K:
+ case VM_MODE_P41V57_4K:
+ satp_mode = SATP_MODE_57;
+ break;
+ case VM_MODE_P56V48_4K:
+ case VM_MODE_P50V48_4K:
+ case VM_MODE_P41V48_4K:
+ satp_mode = SATP_MODE_48;
+ break;
+ case VM_MODE_P56V39_4K:
+ case VM_MODE_P50V39_4K:
+ case VM_MODE_P41V39_4K:
+ satp_mode = SATP_MODE_39;
break;
default:
TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode);
}
- satp = (vm->pgd >> PGTBL_PAGE_SIZE_SHIFT) & SATP_PPN;
- satp |= SATP_MODE_48;
+ max_satp_mode = vcpu_get_reg(vcpu, RISCV_CONFIG_REG(satp_mode));
+
+ if ((satp_mode >> SATP_MODE_SHIFT) > max_satp_mode)
+ TEST_FAIL("Unable to set satp mode 0x%lx, max mode 0x%lx\n",
+ satp_mode >> SATP_MODE_SHIFT, max_satp_mode);
+
+ satp = (vm->mmu.pgd >> PGTBL_PAGE_SIZE_SHIFT) & SATP_PPN;
+ satp |= satp_mode;
vcpu_set_reg(vcpu, RISCV_GENERAL_CSR_REG(satp), satp);
}
@@ -402,7 +418,7 @@ struct handlers {
exception_handler_fn exception_handlers[NR_VECTORS][NR_EXCEPTIONS];
};
-void route_exception(struct ex_regs *regs)
+void route_exception(struct pt_regs *regs)
{
struct handlers *handlers = (struct handlers *)exception_handlers;
int vector = 0, ec;
@@ -515,3 +531,43 @@ unsigned long get_host_sbi_spec_version(void)
return ret.value;
}
+
+void kvm_selftest_arch_init(void)
+{
+ /*
+ * riscv64 doesn't have a true default mode, so start by detecting the
+ * supported vm mode.
+ */
+ guest_modes_append_default();
+}
+
+unsigned long riscv64_get_satp_mode(void)
+{
+ int kvm_fd, vm_fd, vcpu_fd, err;
+ uint64_t val;
+ struct kvm_one_reg reg = {
+ .id = RISCV_CONFIG_REG(satp_mode),
+ .addr = (uint64_t)&val,
+ };
+
+ kvm_fd = open_kvm_dev_path_or_exit();
+ vm_fd = __kvm_ioctl(kvm_fd, KVM_CREATE_VM, NULL);
+ TEST_ASSERT(vm_fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VM, vm_fd));
+
+ vcpu_fd = ioctl(vm_fd, KVM_CREATE_VCPU, 0);
+ TEST_ASSERT(vcpu_fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VCPU, vcpu_fd));
+
+ err = ioctl(vcpu_fd, KVM_GET_ONE_REG, &reg);
+ TEST_ASSERT(err == 0, KVM_IOCTL_ERROR(KVM_GET_ONE_REG, vcpu_fd));
+
+ close(vcpu_fd);
+ close(vm_fd);
+ close(kvm_fd);
+
+ return val;
+}
+
+bool kvm_arch_has_default_irqchip(void)
+{
+ return kvm_check_cap(KVM_CAP_IRQCHIP);
+}
diff --git a/tools/testing/selftests/kvm/lib/s390/processor.c b/tools/testing/selftests/kvm/lib/s390/processor.c
index 20cfe970e3e3..6a9a660413a7 100644
--- a/tools/testing/selftests/kvm/lib/s390/processor.c
+++ b/tools/testing/selftests/kvm/lib/s390/processor.c
@@ -17,7 +17,7 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm)
TEST_ASSERT(vm->page_size == PAGE_SIZE, "Unsupported page size: 0x%x",
vm->page_size);
- if (vm->pgd_created)
+ if (vm->mmu.pgd_created)
return;
paddr = vm_phy_pages_alloc(vm, PAGES_PER_REGION,
@@ -25,8 +25,8 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm)
vm->memslots[MEM_REGION_PT]);
memset(addr_gpa2hva(vm, paddr), 0xff, PAGES_PER_REGION * vm->page_size);
- vm->pgd = paddr;
- vm->pgd_created = true;
+ vm->mmu.pgd = paddr;
+ vm->mmu.pgd_created = true;
}
/*
@@ -70,7 +70,7 @@ void virt_arch_pg_map(struct kvm_vm *vm, uint64_t gva, uint64_t gpa)
gva, vm->max_gfn, vm->page_size);
/* Walk through region and segment tables */
- entry = addr_gpa2hva(vm, vm->pgd);
+ entry = addr_gpa2hva(vm, vm->mmu.pgd);
for (ri = 1; ri <= 4; ri++) {
idx = (gva >> (64 - 11 * ri)) & 0x7ffu;
if (entry[idx] & REGION_ENTRY_INVALID)
@@ -94,7 +94,7 @@ vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
TEST_ASSERT(vm->page_size == PAGE_SIZE, "Unsupported page size: 0x%x",
vm->page_size);
- entry = addr_gpa2hva(vm, vm->pgd);
+ entry = addr_gpa2hva(vm, vm->mmu.pgd);
for (ri = 1; ri <= 4; ri++) {
idx = (gva >> (64 - 11 * ri)) & 0x7ffu;
TEST_ASSERT(!(entry[idx] & REGION_ENTRY_INVALID),
@@ -149,10 +149,10 @@ static void virt_dump_region(FILE *stream, struct kvm_vm *vm, uint8_t indent,
void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
{
- if (!vm->pgd_created)
+ if (!vm->mmu.pgd_created)
return;
- virt_dump_region(stream, vm, indent, vm->pgd);
+ virt_dump_region(stream, vm, indent, vm->mmu.pgd);
}
void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code)
@@ -184,7 +184,7 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
vcpu_sregs_get(vcpu, &sregs);
sregs.crs[0] |= 0x00040000; /* Enable floating point regs */
- sregs.crs[1] = vm->pgd | 0xf; /* Primary region table */
+ sregs.crs[1] = vm->mmu.pgd | 0xf; /* Primary region table */
vcpu_sregs_set(vcpu, &sregs);
vcpu->run->psw_mask = 0x0400000180000000ULL; /* DAT enabled + 64 bit mode */
@@ -221,3 +221,8 @@ void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent)
void assert_on_unhandled_exception(struct kvm_vcpu *vcpu)
{
}
+
+bool kvm_arch_has_default_irqchip(void)
+{
+ return true;
+}
diff --git a/tools/testing/selftests/kvm/lib/sparsebit.c b/tools/testing/selftests/kvm/lib/sparsebit.c
index cfed9d26cc71..a99188f87a38 100644
--- a/tools/testing/selftests/kvm/lib/sparsebit.c
+++ b/tools/testing/selftests/kvm/lib/sparsebit.c
@@ -116,7 +116,7 @@
*
* + A node with all mask bits set only occurs when the last bit
* described by the previous node is not equal to this nodes
- * starting index - 1. All such occurences of this condition are
+ * starting index - 1. All such occurrences of this condition are
* avoided by moving the setting of the nodes mask bits into
* the previous nodes num_after setting.
*
@@ -592,7 +592,7 @@ static struct node *node_split(struct sparsebit *s, sparsebit_idx_t idx)
*
* + A node with all mask bits set only occurs when the last bit
* described by the previous node is not equal to this nodes
- * starting index - 1. All such occurences of this condition are
+ * starting index - 1. All such occurrences of this condition are
* avoided by moving the setting of the nodes mask bits into
* the previous nodes num_after setting.
*/
diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c
index 8ed0b74ae837..8a1848586a85 100644
--- a/tools/testing/selftests/kvm/lib/test_util.c
+++ b/tools/testing/selftests/kvm/lib/test_util.c
@@ -18,6 +18,13 @@
#include "test_util.h"
+sigjmp_buf expect_sigbus_jmpbuf;
+
+void __attribute__((used)) expect_sigbus_handler(int signum)
+{
+ siglongjmp(expect_sigbus_jmpbuf, 1);
+}
+
/*
* Random number generator that is usable from guest code. This is the
* Park-Miller LCG using standard constants.
@@ -132,37 +139,57 @@ void print_skip(const char *fmt, ...)
puts(", skipping test");
}
-bool thp_configured(void)
+static bool test_sysfs_path(const char *path)
{
- int ret;
struct stat statbuf;
+ int ret;
- ret = stat("/sys/kernel/mm/transparent_hugepage", &statbuf);
+ ret = stat(path, &statbuf);
TEST_ASSERT(ret == 0 || (ret == -1 && errno == ENOENT),
- "Error in stating /sys/kernel/mm/transparent_hugepage");
+ "Error in stat()ing '%s'", path);
return ret == 0;
}
-size_t get_trans_hugepagesz(void)
+bool thp_configured(void)
+{
+ return test_sysfs_path("/sys/kernel/mm/transparent_hugepage");
+}
+
+static size_t get_sysfs_val(const char *path)
{
size_t size;
FILE *f;
int ret;
- TEST_ASSERT(thp_configured(), "THP is not configured in host kernel");
-
- f = fopen("/sys/kernel/mm/transparent_hugepage/hpage_pmd_size", "r");
- TEST_ASSERT(f != NULL, "Error in opening transparent_hugepage/hpage_pmd_size");
+ f = fopen(path, "r");
+ TEST_ASSERT(f, "Error opening '%s'", path);
ret = fscanf(f, "%ld", &size);
+ TEST_ASSERT(ret > 0, "Error reading '%s'", path);
+
+ /* Re-scan the input stream to verify the entire file was read. */
ret = fscanf(f, "%ld", &size);
- TEST_ASSERT(ret < 1, "Error reading transparent_hugepage/hpage_pmd_size");
- fclose(f);
+ TEST_ASSERT(ret < 1, "Error reading '%s'", path);
+ fclose(f);
return size;
}
+size_t get_trans_hugepagesz(void)
+{
+ TEST_ASSERT(thp_configured(), "THP is not configured in host kernel");
+
+ return get_sysfs_val("/sys/kernel/mm/transparent_hugepage/hpage_pmd_size");
+}
+
+bool is_numa_balancing_enabled(void)
+{
+ if (!test_sysfs_path("/proc/sys/kernel/numa_balancing"))
+ return false;
+ return get_sysfs_val("/proc/sys/kernel/numa_balancing") == 1;
+}
+
size_t get_def_hugetlb_pagesz(void)
{
char buf[64];
diff --git a/tools/testing/selftests/kvm/lib/x86/memstress.c b/tools/testing/selftests/kvm/lib/x86/memstress.c
index 7f5d62a65c68..f53414ba7103 100644
--- a/tools/testing/selftests/kvm/lib/x86/memstress.c
+++ b/tools/testing/selftests/kvm/lib/x86/memstress.c
@@ -13,6 +13,7 @@
#include "kvm_util.h"
#include "memstress.h"
#include "processor.h"
+#include "svm_util.h"
#include "vmx.h"
void memstress_l2_guest_code(uint64_t vcpu_id)
@@ -29,9 +30,10 @@ __asm__(
" ud2;"
);
-static void memstress_l1_guest_code(struct vmx_pages *vmx, uint64_t vcpu_id)
-{
#define L2_GUEST_STACK_SIZE 64
+
+static void l1_vmx_code(struct vmx_pages *vmx, uint64_t vcpu_id)
+{
unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
unsigned long *rsp;
@@ -45,10 +47,34 @@ static void memstress_l1_guest_code(struct vmx_pages *vmx, uint64_t vcpu_id)
prepare_vmcs(vmx, memstress_l2_guest_entry, rsp);
GUEST_ASSERT(!vmlaunch());
- GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+ GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_VMCALL);
GUEST_DONE();
}
+static void l1_svm_code(struct svm_test_data *svm, uint64_t vcpu_id)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ unsigned long *rsp;
+
+
+ rsp = &l2_guest_stack[L2_GUEST_STACK_SIZE - 1];
+ *rsp = vcpu_id;
+ generic_svm_setup(svm, memstress_l2_guest_entry, rsp);
+
+ run_guest(svm->vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT_EQ(svm->vmcb->control.exit_code, SVM_EXIT_VMMCALL);
+ GUEST_DONE();
+}
+
+
+static void memstress_l1_guest_code(void *data, uint64_t vcpu_id)
+{
+ if (this_cpu_has(X86_FEATURE_VMX))
+ l1_vmx_code(data, vcpu_id);
+ else
+ l1_svm_code(data, vcpu_id);
+}
+
uint64_t memstress_nested_pages(int nr_vcpus)
{
/*
@@ -59,46 +85,37 @@ uint64_t memstress_nested_pages(int nr_vcpus)
return 513 + 10 * nr_vcpus;
}
-void memstress_setup_ept(struct vmx_pages *vmx, struct kvm_vm *vm)
+static void memstress_setup_ept_mappings(struct kvm_vm *vm)
{
uint64_t start, end;
- prepare_eptp(vmx, vm, 0);
-
/*
* Identity map the first 4G and the test region with 1G pages so that
* KVM can shadow the EPT12 with the maximum huge page size supported
* by the backing source.
*/
- nested_identity_map_1g(vmx, vm, 0, 0x100000000ULL);
+ tdp_identity_map_1g(vm, 0, 0x100000000ULL);
start = align_down(memstress_args.gpa, PG_SIZE_1G);
end = align_up(memstress_args.gpa + memstress_args.size, PG_SIZE_1G);
- nested_identity_map_1g(vmx, vm, start, end - start);
+ tdp_identity_map_1g(vm, start, end - start);
}
void memstress_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vcpus[])
{
- struct vmx_pages *vmx, *vmx0 = NULL;
struct kvm_regs regs;
- vm_vaddr_t vmx_gva;
+ vm_vaddr_t nested_gva;
int vcpu_id;
- TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
- TEST_REQUIRE(kvm_cpu_has_ept());
+ TEST_REQUIRE(kvm_cpu_has_tdp());
+ vm_enable_tdp(vm);
+ memstress_setup_ept_mappings(vm);
for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
- vmx = vcpu_alloc_vmx(vm, &vmx_gva);
-
- if (vcpu_id == 0) {
- memstress_setup_ept(vmx, vm);
- vmx0 = vmx;
- } else {
- /* Share the same EPT table across all vCPUs. */
- vmx->eptp = vmx0->eptp;
- vmx->eptp_hva = vmx0->eptp_hva;
- vmx->eptp_gpa = vmx0->eptp_gpa;
- }
+ if (kvm_cpu_has(X86_FEATURE_VMX))
+ vcpu_alloc_vmx(vm, &nested_gva);
+ else
+ vcpu_alloc_svm(vm, &nested_gva);
/*
* Override the vCPU to run memstress_l1_guest_code() which will
@@ -107,6 +124,6 @@ void memstress_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vc
vcpu_regs_get(vcpus[vcpu_id], &regs);
regs.rip = (unsigned long) memstress_l1_guest_code;
vcpu_regs_set(vcpus[vcpu_id], &regs);
- vcpu_args_set(vcpus[vcpu_id], 2, vmx_gva, vcpu_id);
+ vcpu_args_set(vcpus[vcpu_id], 2, nested_gva, vcpu_id);
}
}
diff --git a/tools/testing/selftests/kvm/lib/x86/pmu.c b/tools/testing/selftests/kvm/lib/x86/pmu.c
index f31f0427c17c..34cb57d1d671 100644
--- a/tools/testing/selftests/kvm/lib/x86/pmu.c
+++ b/tools/testing/selftests/kvm/lib/x86/pmu.c
@@ -8,6 +8,7 @@
#include <linux/kernel.h>
#include "kvm_util.h"
+#include "processor.h"
#include "pmu.h"
const uint64_t intel_pmu_arch_events[] = {
@@ -19,6 +20,11 @@ const uint64_t intel_pmu_arch_events[] = {
INTEL_ARCH_BRANCHES_RETIRED,
INTEL_ARCH_BRANCHES_MISPREDICTED,
INTEL_ARCH_TOPDOWN_SLOTS,
+ INTEL_ARCH_TOPDOWN_BE_BOUND,
+ INTEL_ARCH_TOPDOWN_BAD_SPEC,
+ INTEL_ARCH_TOPDOWN_FE_BOUND,
+ INTEL_ARCH_TOPDOWN_RETIRING,
+ INTEL_ARCH_LBR_INSERTS,
};
kvm_static_assert(ARRAY_SIZE(intel_pmu_arch_events) == NR_INTEL_ARCH_EVENTS);
@@ -29,3 +35,46 @@ const uint64_t amd_pmu_zen_events[] = {
AMD_ZEN_BRANCHES_MISPREDICTED,
};
kvm_static_assert(ARRAY_SIZE(amd_pmu_zen_events) == NR_AMD_ZEN_EVENTS);
+
+/*
+ * For Intel Atom CPUs, the PMU events "Instruction Retired" or
+ * "Branch Instruction Retired" may be overcounted for some certain
+ * instructions, like FAR CALL/JMP, RETF, IRET, VMENTRY/VMEXIT/VMPTRLD
+ * and complex SGX/SMX/CSTATE instructions/flows.
+ *
+ * The detailed information can be found in the errata (section SRF7):
+ * https://edc.intel.com/content/www/us/en/design/products-and-solutions/processors-and-chipsets/sierra-forest/xeon-6700-series-processor-with-e-cores-specification-update/errata-details/
+ *
+ * For the Atom platforms before Sierra Forest (including Sierra Forest),
+ * Both 2 events "Instruction Retired" and "Branch Instruction Retired" would
+ * be overcounted on these certain instructions, but for Clearwater Forest
+ * only "Instruction Retired" event is overcounted on these instructions.
+ */
+static uint64_t get_pmu_errata(void)
+{
+ if (!this_cpu_is_intel())
+ return 0;
+
+ if (this_cpu_family() != 0x6)
+ return 0;
+
+ switch (this_cpu_model()) {
+ case 0xDD: /* Clearwater Forest */
+ return BIT_ULL(INSTRUCTIONS_RETIRED_OVERCOUNT);
+ case 0xAF: /* Sierra Forest */
+ case 0x4D: /* Avaton, Rangely */
+ case 0x5F: /* Denverton */
+ case 0x86: /* Jacobsville */
+ return BIT_ULL(INSTRUCTIONS_RETIRED_OVERCOUNT) |
+ BIT_ULL(BRANCHES_RETIRED_OVERCOUNT);
+ default:
+ return 0;
+ }
+}
+
+uint64_t pmu_errata_mask;
+
+void kvm_init_pmu_errata(void)
+{
+ pmu_errata_mask = get_pmu_errata();
+}
diff --git a/tools/testing/selftests/kvm/lib/x86/processor.c b/tools/testing/selftests/kvm/lib/x86/processor.c
index bd5a802fa7a5..01f0f97d4430 100644
--- a/tools/testing/selftests/kvm/lib/x86/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86/processor.c
@@ -6,8 +6,12 @@
#include "linux/bitmap.h"
#include "test_util.h"
#include "kvm_util.h"
+#include "pmu.h"
#include "processor.h"
+#include "smm.h"
+#include "svm_util.h"
#include "sev.h"
+#include "vmx.h"
#ifndef NUM_INTERRUPTS
#define NUM_INTERRUPTS 256
@@ -20,9 +24,44 @@
vm_vaddr_t exception_handlers;
bool host_cpu_is_amd;
bool host_cpu_is_intel;
+bool host_cpu_is_hygon;
+bool host_cpu_is_amd_compatible;
bool is_forced_emulation_enabled;
uint64_t guest_tsc_khz;
+const char *ex_str(int vector)
+{
+ switch (vector) {
+#define VEC_STR(v) case v##_VECTOR: return "#" #v
+ case DE_VECTOR: return "no exception";
+ case KVM_MAGIC_DE_VECTOR: return "#DE";
+ VEC_STR(DB);
+ VEC_STR(NMI);
+ VEC_STR(BP);
+ VEC_STR(OF);
+ VEC_STR(BR);
+ VEC_STR(UD);
+ VEC_STR(NM);
+ VEC_STR(DF);
+ VEC_STR(TS);
+ VEC_STR(NP);
+ VEC_STR(SS);
+ VEC_STR(GP);
+ VEC_STR(PF);
+ VEC_STR(MF);
+ VEC_STR(AC);
+ VEC_STR(MC);
+ VEC_STR(XM);
+ VEC_STR(VE);
+ VEC_STR(CP);
+ VEC_STR(HV);
+ VEC_STR(VC);
+ VEC_STR(SX);
+ default: return "#??";
+#undef VEC_STR
+ }
+}
+
static void regs_dump(FILE *stream, struct kvm_regs *regs, uint8_t indent)
{
fprintf(stream, "%*srax: 0x%.16llx rbx: 0x%.16llx "
@@ -122,26 +161,59 @@ bool kvm_is_tdp_enabled(void)
return get_kvm_amd_param_bool("npt");
}
+static void virt_mmu_init(struct kvm_vm *vm, struct kvm_mmu *mmu,
+ struct pte_masks *pte_masks)
+{
+ /* If needed, create the top-level page table. */
+ if (!mmu->pgd_created) {
+ mmu->pgd = vm_alloc_page_table(vm);
+ mmu->pgd_created = true;
+ mmu->arch.pte_masks = *pte_masks;
+ }
+
+ TEST_ASSERT(mmu->pgtable_levels == 4 || mmu->pgtable_levels == 5,
+ "Selftests MMU only supports 4-level and 5-level paging, not %u-level paging",
+ mmu->pgtable_levels);
+}
+
void virt_arch_pgd_alloc(struct kvm_vm *vm)
{
- TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
- "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
+ TEST_ASSERT(vm->mode == VM_MODE_PXXVYY_4K,
+ "Unknown or unsupported guest mode: 0x%x", vm->mode);
+
+ struct pte_masks pte_masks = (struct pte_masks){
+ .present = BIT_ULL(0),
+ .writable = BIT_ULL(1),
+ .user = BIT_ULL(2),
+ .accessed = BIT_ULL(5),
+ .dirty = BIT_ULL(6),
+ .huge = BIT_ULL(7),
+ .nx = BIT_ULL(63),
+ .executable = 0,
+ .c = vm->arch.c_bit,
+ .s = vm->arch.s_bit,
+ };
+
+ virt_mmu_init(vm, &vm->mmu, &pte_masks);
+}
+
+void tdp_mmu_init(struct kvm_vm *vm, int pgtable_levels,
+ struct pte_masks *pte_masks)
+{
+ TEST_ASSERT(!vm->stage2_mmu.pgtable_levels, "TDP MMU already initialized");
- /* If needed, create page map l4 table. */
- if (!vm->pgd_created) {
- vm->pgd = vm_alloc_page_table(vm);
- vm->pgd_created = true;
- }
+ vm->stage2_mmu.pgtable_levels = pgtable_levels;
+ virt_mmu_init(vm, &vm->stage2_mmu, pte_masks);
}
-static void *virt_get_pte(struct kvm_vm *vm, uint64_t *parent_pte,
- uint64_t vaddr, int level)
+static void *virt_get_pte(struct kvm_vm *vm, struct kvm_mmu *mmu,
+ uint64_t *parent_pte, uint64_t vaddr, int level)
{
uint64_t pt_gpa = PTE_GET_PA(*parent_pte);
uint64_t *page_table = addr_gpa2hva(vm, pt_gpa);
int index = (vaddr >> PG_LEVEL_SHIFT(level)) & 0x1ffu;
- TEST_ASSERT((*parent_pte & PTE_PRESENT_MASK) || parent_pte == &vm->pgd,
+ TEST_ASSERT((*parent_pte == mmu->pgd) || is_present_pte(mmu, parent_pte),
"Parent PTE (level %d) not PRESENT for gva: 0x%08lx",
level + 1, vaddr);
@@ -149,20 +221,23 @@ static void *virt_get_pte(struct kvm_vm *vm, uint64_t *parent_pte,
}
static uint64_t *virt_create_upper_pte(struct kvm_vm *vm,
+ struct kvm_mmu *mmu,
uint64_t *parent_pte,
uint64_t vaddr,
uint64_t paddr,
int current_level,
int target_level)
{
- uint64_t *pte = virt_get_pte(vm, parent_pte, vaddr, current_level);
+ uint64_t *pte = virt_get_pte(vm, mmu, parent_pte, vaddr, current_level);
paddr = vm_untag_gpa(vm, paddr);
- if (!(*pte & PTE_PRESENT_MASK)) {
- *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK;
+ if (!is_present_pte(mmu, pte)) {
+ *pte = PTE_PRESENT_MASK(mmu) | PTE_READABLE_MASK(mmu) |
+ PTE_WRITABLE_MASK(mmu) | PTE_EXECUTABLE_MASK(mmu) |
+ PTE_ALWAYS_SET_MASK(mmu);
if (current_level == target_level)
- *pte |= PTE_LARGE_MASK | (paddr & PHYSICAL_PAGE_MASK);
+ *pte |= PTE_HUGE_MASK(mmu) | (paddr & PHYSICAL_PAGE_MASK);
else
*pte |= vm_alloc_page_table(vm) & PHYSICAL_PAGE_MASK;
} else {
@@ -174,21 +249,22 @@ static uint64_t *virt_create_upper_pte(struct kvm_vm *vm,
TEST_ASSERT(current_level != target_level,
"Cannot create hugepage at level: %u, vaddr: 0x%lx",
current_level, vaddr);
- TEST_ASSERT(!(*pte & PTE_LARGE_MASK),
+ TEST_ASSERT(!is_huge_pte(mmu, pte),
"Cannot create page table at level: %u, vaddr: 0x%lx",
current_level, vaddr);
}
return pte;
}
-void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level)
+void __virt_pg_map(struct kvm_vm *vm, struct kvm_mmu *mmu, uint64_t vaddr,
+ uint64_t paddr, int level)
{
const uint64_t pg_size = PG_LEVEL_SIZE(level);
- uint64_t *pml4e, *pdpe, *pde;
- uint64_t *pte;
+ uint64_t *pte = &mmu->pgd;
+ int current_level;
- TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K,
- "Unknown or unsupported guest mode, mode: 0x%x", vm->mode);
+ TEST_ASSERT(vm->mode == VM_MODE_PXXVYY_4K,
+ "Unknown or unsupported guest mode: 0x%x", vm->mode);
TEST_ASSERT((vaddr % pg_size) == 0,
"Virtual address not aligned,\n"
@@ -205,41 +281,43 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level)
TEST_ASSERT(vm_untag_gpa(vm, paddr) == paddr,
"Unexpected bits in paddr: %lx", paddr);
+ TEST_ASSERT(!PTE_EXECUTABLE_MASK(mmu) || !PTE_NX_MASK(mmu),
+ "X and NX bit masks cannot be used simultaneously");
+
/*
* Allocate upper level page tables, if not already present. Return
* early if a hugepage was created.
*/
- pml4e = virt_create_upper_pte(vm, &vm->pgd, vaddr, paddr, PG_LEVEL_512G, level);
- if (*pml4e & PTE_LARGE_MASK)
- return;
-
- pdpe = virt_create_upper_pte(vm, pml4e, vaddr, paddr, PG_LEVEL_1G, level);
- if (*pdpe & PTE_LARGE_MASK)
- return;
-
- pde = virt_create_upper_pte(vm, pdpe, vaddr, paddr, PG_LEVEL_2M, level);
- if (*pde & PTE_LARGE_MASK)
- return;
+ for (current_level = mmu->pgtable_levels;
+ current_level > PG_LEVEL_4K;
+ current_level--) {
+ pte = virt_create_upper_pte(vm, mmu, pte, vaddr, paddr,
+ current_level, level);
+ if (is_huge_pte(mmu, pte))
+ return;
+ }
/* Fill in page table entry. */
- pte = virt_get_pte(vm, pde, vaddr, PG_LEVEL_4K);
- TEST_ASSERT(!(*pte & PTE_PRESENT_MASK),
+ pte = virt_get_pte(vm, mmu, pte, vaddr, PG_LEVEL_4K);
+ TEST_ASSERT(!is_present_pte(mmu, pte),
"PTE already present for 4k page at vaddr: 0x%lx", vaddr);
- *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK | (paddr & PHYSICAL_PAGE_MASK);
+ *pte = PTE_PRESENT_MASK(mmu) | PTE_READABLE_MASK(mmu) |
+ PTE_WRITABLE_MASK(mmu) | PTE_EXECUTABLE_MASK(mmu) |
+ PTE_ALWAYS_SET_MASK(mmu) | (paddr & PHYSICAL_PAGE_MASK);
/*
* Neither SEV nor TDX supports shared page tables, so only the final
* leaf PTE needs manually set the C/S-bit.
*/
if (vm_is_gpa_protected(vm, paddr))
- *pte |= vm->arch.c_bit;
+ *pte |= PTE_C_BIT_MASK(mmu);
else
- *pte |= vm->arch.s_bit;
+ *pte |= PTE_S_BIT_MASK(mmu);
}
void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
{
- __virt_pg_map(vm, vaddr, paddr, PG_LEVEL_4K);
+ __virt_pg_map(vm, &vm->mmu, vaddr, paddr, PG_LEVEL_4K);
}
void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
@@ -254,16 +332,19 @@ void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
nr_bytes, pg_size);
for (i = 0; i < nr_pages; i++) {
- __virt_pg_map(vm, vaddr, paddr, level);
+ __virt_pg_map(vm, &vm->mmu, vaddr, paddr, level);
+ sparsebit_set_num(vm->vpages_mapped, vaddr >> vm->page_shift,
+ nr_bytes / PAGE_SIZE);
vaddr += pg_size;
paddr += pg_size;
}
}
-static bool vm_is_target_pte(uint64_t *pte, int *level, int current_level)
+static bool vm_is_target_pte(struct kvm_mmu *mmu, uint64_t *pte,
+ int *level, int current_level)
{
- if (*pte & PTE_LARGE_MASK) {
+ if (is_huge_pte(mmu, pte)) {
TEST_ASSERT(*level == PG_LEVEL_NONE ||
*level == current_level,
"Unexpected hugepage at level %d", current_level);
@@ -273,60 +354,68 @@ static bool vm_is_target_pte(uint64_t *pte, int *level, int current_level)
return *level == current_level;
}
-uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr,
- int *level)
+static uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm,
+ struct kvm_mmu *mmu,
+ uint64_t vaddr,
+ int *level)
{
- uint64_t *pml4e, *pdpe, *pde;
+ int va_width = 12 + (mmu->pgtable_levels) * 9;
+ uint64_t *pte = &mmu->pgd;
+ int current_level;
TEST_ASSERT(!vm->arch.is_pt_protected,
"Walking page tables of protected guests is impossible");
- TEST_ASSERT(*level >= PG_LEVEL_NONE && *level < PG_LEVEL_NUM,
+ TEST_ASSERT(*level >= PG_LEVEL_NONE && *level <= mmu->pgtable_levels,
"Invalid PG_LEVEL_* '%d'", *level);
- TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
- "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
+ TEST_ASSERT(vm->mode == VM_MODE_PXXVYY_4K,
+ "Unknown or unsupported guest mode: 0x%x", vm->mode);
TEST_ASSERT(sparsebit_is_set(vm->vpages_valid,
(vaddr >> vm->page_shift)),
"Invalid virtual address, vaddr: 0x%lx",
vaddr);
/*
- * Based on the mode check above there are 48 bits in the vaddr, so
- * shift 16 to sign extend the last bit (bit-47),
+ * Check that the vaddr is a sign-extended va_width value.
*/
- TEST_ASSERT(vaddr == (((int64_t)vaddr << 16) >> 16),
- "Canonical check failed. The virtual address is invalid.");
-
- pml4e = virt_get_pte(vm, &vm->pgd, vaddr, PG_LEVEL_512G);
- if (vm_is_target_pte(pml4e, level, PG_LEVEL_512G))
- return pml4e;
+ TEST_ASSERT(vaddr ==
+ (((int64_t)vaddr << (64 - va_width) >> (64 - va_width))),
+ "Canonical check failed. The virtual address is invalid.");
+
+ for (current_level = mmu->pgtable_levels;
+ current_level > PG_LEVEL_4K;
+ current_level--) {
+ pte = virt_get_pte(vm, mmu, pte, vaddr, current_level);
+ if (vm_is_target_pte(mmu, pte, level, current_level))
+ return pte;
+ }
- pdpe = virt_get_pte(vm, pml4e, vaddr, PG_LEVEL_1G);
- if (vm_is_target_pte(pdpe, level, PG_LEVEL_1G))
- return pdpe;
+ return virt_get_pte(vm, mmu, pte, vaddr, PG_LEVEL_4K);
+}
- pde = virt_get_pte(vm, pdpe, vaddr, PG_LEVEL_2M);
- if (vm_is_target_pte(pde, level, PG_LEVEL_2M))
- return pde;
+uint64_t *tdp_get_pte(struct kvm_vm *vm, uint64_t l2_gpa)
+{
+ int level = PG_LEVEL_4K;
- return virt_get_pte(vm, pde, vaddr, PG_LEVEL_4K);
+ return __vm_get_page_table_entry(vm, &vm->stage2_mmu, l2_gpa, &level);
}
-uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr)
+uint64_t *vm_get_pte(struct kvm_vm *vm, uint64_t vaddr)
{
int level = PG_LEVEL_4K;
- return __vm_get_page_table_entry(vm, vaddr, &level);
+ return __vm_get_page_table_entry(vm, &vm->mmu, vaddr, &level);
}
void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
{
+ struct kvm_mmu *mmu = &vm->mmu;
uint64_t *pml4e, *pml4e_start;
uint64_t *pdpe, *pdpe_start;
uint64_t *pde, *pde_start;
uint64_t *pte, *pte_start;
- if (!vm->pgd_created)
+ if (!mmu->pgd_created)
return;
fprintf(stream, "%*s "
@@ -334,47 +423,47 @@ void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
fprintf(stream, "%*s index hvaddr gpaddr "
"addr w exec dirty\n",
indent, "");
- pml4e_start = (uint64_t *) addr_gpa2hva(vm, vm->pgd);
+ pml4e_start = (uint64_t *) addr_gpa2hva(vm, mmu->pgd);
for (uint16_t n1 = 0; n1 <= 0x1ffu; n1++) {
pml4e = &pml4e_start[n1];
- if (!(*pml4e & PTE_PRESENT_MASK))
+ if (!is_present_pte(mmu, pml4e))
continue;
fprintf(stream, "%*spml4e 0x%-3zx %p 0x%-12lx 0x%-10llx %u "
" %u\n",
indent, "",
pml4e - pml4e_start, pml4e,
addr_hva2gpa(vm, pml4e), PTE_GET_PFN(*pml4e),
- !!(*pml4e & PTE_WRITABLE_MASK), !!(*pml4e & PTE_NX_MASK));
+ is_writable_pte(mmu, pml4e), is_nx_pte(mmu, pml4e));
pdpe_start = addr_gpa2hva(vm, *pml4e & PHYSICAL_PAGE_MASK);
for (uint16_t n2 = 0; n2 <= 0x1ffu; n2++) {
pdpe = &pdpe_start[n2];
- if (!(*pdpe & PTE_PRESENT_MASK))
+ if (!is_present_pte(mmu, pdpe))
continue;
fprintf(stream, "%*spdpe 0x%-3zx %p 0x%-12lx 0x%-10llx "
"%u %u\n",
indent, "",
pdpe - pdpe_start, pdpe,
addr_hva2gpa(vm, pdpe),
- PTE_GET_PFN(*pdpe), !!(*pdpe & PTE_WRITABLE_MASK),
- !!(*pdpe & PTE_NX_MASK));
+ PTE_GET_PFN(*pdpe), is_writable_pte(mmu, pdpe),
+ is_nx_pte(mmu, pdpe));
pde_start = addr_gpa2hva(vm, *pdpe & PHYSICAL_PAGE_MASK);
for (uint16_t n3 = 0; n3 <= 0x1ffu; n3++) {
pde = &pde_start[n3];
- if (!(*pde & PTE_PRESENT_MASK))
+ if (!is_present_pte(mmu, pde))
continue;
fprintf(stream, "%*spde 0x%-3zx %p "
"0x%-12lx 0x%-10llx %u %u\n",
indent, "", pde - pde_start, pde,
addr_hva2gpa(vm, pde),
- PTE_GET_PFN(*pde), !!(*pde & PTE_WRITABLE_MASK),
- !!(*pde & PTE_NX_MASK));
+ PTE_GET_PFN(*pde), is_writable_pte(mmu, pde),
+ is_nx_pte(mmu, pde));
pte_start = addr_gpa2hva(vm, *pde & PHYSICAL_PAGE_MASK);
for (uint16_t n4 = 0; n4 <= 0x1ffu; n4++) {
pte = &pte_start[n4];
- if (!(*pte & PTE_PRESENT_MASK))
+ if (!is_present_pte(mmu, pte))
continue;
fprintf(stream, "%*spte 0x%-3zx %p "
"0x%-12lx 0x%-10llx %u %u "
@@ -383,9 +472,9 @@ void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
pte - pte_start, pte,
addr_hva2gpa(vm, pte),
PTE_GET_PFN(*pte),
- !!(*pte & PTE_WRITABLE_MASK),
- !!(*pte & PTE_NX_MASK),
- !!(*pte & PTE_DIRTY_MASK),
+ is_writable_pte(mmu, pte),
+ is_nx_pte(mmu, pte),
+ is_dirty_pte(mmu, pte),
((uint64_t) n1 << 27)
| ((uint64_t) n2 << 18)
| ((uint64_t) n3 << 9)
@@ -396,6 +485,72 @@ void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
}
}
+void vm_enable_tdp(struct kvm_vm *vm)
+{
+ if (kvm_cpu_has(X86_FEATURE_VMX))
+ vm_enable_ept(vm);
+ else
+ vm_enable_npt(vm);
+}
+
+bool kvm_cpu_has_tdp(void)
+{
+ return kvm_cpu_has_ept() || kvm_cpu_has_npt();
+}
+
+void __tdp_map(struct kvm_vm *vm, uint64_t nested_paddr, uint64_t paddr,
+ uint64_t size, int level)
+{
+ size_t page_size = PG_LEVEL_SIZE(level);
+ size_t npages = size / page_size;
+
+ TEST_ASSERT(nested_paddr + size > nested_paddr, "Vaddr overflow");
+ TEST_ASSERT(paddr + size > paddr, "Paddr overflow");
+
+ while (npages--) {
+ __virt_pg_map(vm, &vm->stage2_mmu, nested_paddr, paddr, level);
+ nested_paddr += page_size;
+ paddr += page_size;
+ }
+}
+
+void tdp_map(struct kvm_vm *vm, uint64_t nested_paddr, uint64_t paddr,
+ uint64_t size)
+{
+ __tdp_map(vm, nested_paddr, paddr, size, PG_LEVEL_4K);
+}
+
+/* Prepare an identity extended page table that maps all the
+ * physical pages in VM.
+ */
+void tdp_identity_map_default_memslots(struct kvm_vm *vm)
+{
+ uint32_t s, memslot = 0;
+ sparsebit_idx_t i, last;
+ struct userspace_mem_region *region = memslot2region(vm, memslot);
+
+ /* Only memslot 0 is mapped here, ensure it's the only one being used */
+ for (s = 0; s < NR_MEM_REGIONS; s++)
+ TEST_ASSERT_EQ(vm->memslots[s], 0);
+
+ i = (region->region.guest_phys_addr >> vm->page_shift) - 1;
+ last = i + (region->region.memory_size >> vm->page_shift);
+ for (;;) {
+ i = sparsebit_next_clear(region->unused_phy_pages, i);
+ if (i > last)
+ break;
+
+ tdp_map(vm, (uint64_t)i << vm->page_shift,
+ (uint64_t)i << vm->page_shift, 1 << vm->page_shift);
+ }
+}
+
+/* Identity map a region with 1GiB Pages. */
+void tdp_identity_map_1g(struct kvm_vm *vm, uint64_t addr, uint64_t size)
+{
+ __tdp_map(vm, addr, addr, size, PG_LEVEL_1G);
+}
+
/*
* Set Unusable Segment
*
@@ -466,9 +621,9 @@ static void kvm_seg_set_kernel_data_64bit(struct kvm_segment *segp)
vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
{
int level = PG_LEVEL_NONE;
- uint64_t *pte = __vm_get_page_table_entry(vm, gva, &level);
+ uint64_t *pte = __vm_get_page_table_entry(vm, &vm->mmu, gva, &level);
- TEST_ASSERT(*pte & PTE_PRESENT_MASK,
+ TEST_ASSERT(is_present_pte(&vm->mmu, pte),
"Leaf PTE not PRESENT for gva: 0x%08lx", gva);
/*
@@ -492,7 +647,8 @@ static void vcpu_init_sregs(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
{
struct kvm_sregs sregs;
- TEST_ASSERT_EQ(vm->mode, VM_MODE_PXXV48_4K);
+ TEST_ASSERT(vm->mode == VM_MODE_PXXVYY_4K,
+ "Unknown or unsupported guest mode: 0x%x", vm->mode);
/* Set mode specific system register values. */
vcpu_sregs_get(vcpu, &sregs);
@@ -506,6 +662,8 @@ static void vcpu_init_sregs(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
sregs.cr4 |= X86_CR4_PAE | X86_CR4_OSFXSR;
if (kvm_cpu_has(X86_FEATURE_XSAVE))
sregs.cr4 |= X86_CR4_OSXSAVE;
+ if (vm->mmu.pgtable_levels == 5)
+ sregs.cr4 |= X86_CR4_LA57;
sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX);
kvm_seg_set_unusable(&sregs.ldt);
@@ -515,7 +673,7 @@ static void vcpu_init_sregs(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
kvm_seg_set_kernel_data_64bit(&sregs.gs);
kvm_seg_set_tss_64bit(vm->arch.tss, &sregs.tr);
- sregs.cr3 = vm->pgd;
+ sregs.cr3 = vm->mmu.pgd;
vcpu_sregs_set(vcpu, &sregs);
}
@@ -557,7 +715,7 @@ static bool kvm_fixup_exception(struct ex_regs *regs)
return false;
if (regs->vector == DE_VECTOR)
- return false;
+ regs->vector = KVM_MAGIC_DE_VECTOR;
regs->rip = regs->r11;
regs->r9 = regs->vector;
@@ -625,7 +783,7 @@ void assert_on_unhandled_exception(struct kvm_vcpu *vcpu)
REPORT_GUEST_ASSERT(uc);
}
-void kvm_arch_vm_post_create(struct kvm_vm *vm)
+void kvm_arch_vm_post_create(struct kvm_vm *vm, unsigned int nr_vcpus)
{
int r;
@@ -637,9 +795,12 @@ void kvm_arch_vm_post_create(struct kvm_vm *vm)
sync_global_to_guest(vm, host_cpu_is_intel);
sync_global_to_guest(vm, host_cpu_is_amd);
+ sync_global_to_guest(vm, host_cpu_is_hygon);
+ sync_global_to_guest(vm, host_cpu_is_amd_compatible);
sync_global_to_guest(vm, is_forced_emulation_enabled);
+ sync_global_to_guest(vm, pmu_errata_mask);
- if (vm->type == KVM_X86_SEV_VM || vm->type == KVM_X86_SEV_ES_VM) {
+ if (is_sev_vm(vm)) {
struct kvm_sev_init init = { 0 };
vm_sev_ioctl(vm, KVM_SEV_INIT2, &init);
@@ -1156,7 +1317,7 @@ void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits)
void kvm_init_vm_address_properties(struct kvm_vm *vm)
{
- if (vm->type == KVM_X86_SEV_VM || vm->type == KVM_X86_SEV_ES_VM) {
+ if (is_sev_vm(vm)) {
vm->arch.sev_fd = open_sev_dev_path_or_exit();
vm->arch.c_bit = BIT_ULL(this_cpu_property(X86_PROPERTY_SEV_C_BIT));
vm->gpa_tag_mask = vm->arch.c_bit;
@@ -1192,7 +1353,8 @@ const struct kvm_cpuid_entry2 *get_cpuid_entry(const struct kvm_cpuid2 *cpuid,
"1: vmmcall\n\t" \
"2:" \
: "=a"(r) \
- : [use_vmmcall] "r" (host_cpu_is_amd), inputs); \
+ : [use_vmmcall] "r" (host_cpu_is_amd_compatible), \
+ inputs); \
\
r; \
})
@@ -1232,8 +1394,8 @@ unsigned long vm_compute_max_gfn(struct kvm_vm *vm)
max_gfn = (1ULL << (guest_maxphyaddr - vm->page_shift)) - 1;
- /* Avoid reserved HyperTransport region on AMD processors. */
- if (!host_cpu_is_amd)
+ /* Avoid reserved HyperTransport region on AMD or Hygon processors. */
+ if (!host_cpu_is_amd_compatible)
return max_gfn;
/* On parts with <40 physical address bits, the area is fully hidden */
@@ -1247,7 +1409,7 @@ unsigned long vm_compute_max_gfn(struct kvm_vm *vm)
/*
* Otherwise it's at the top of the physical address space, possibly
- * reduced due to SME by bits 11:6 of CPUID[0x8000001f].EBX. Use
+ * reduced due to SME or CSV by bits 11:6 of CPUID[0x8000001f].EBX. Use
* the old conservative value if MAXPHYADDR is not enumerated.
*/
if (!this_cpu_has_p(X86_PROPERTY_MAX_PHY_ADDR))
@@ -1264,21 +1426,15 @@ done:
return min(max_gfn, ht_gfn - 1);
}
-/* Returns true if kvm_intel was loaded with unrestricted_guest=1. */
-bool vm_is_unrestricted_guest(struct kvm_vm *vm)
-{
- /* Ensure that a KVM vendor-specific module is loaded. */
- if (vm == NULL)
- close(open_kvm_dev_path_or_exit());
-
- return get_kvm_intel_param_bool("unrestricted_guest");
-}
-
void kvm_selftest_arch_init(void)
{
host_cpu_is_intel = this_cpu_is_intel();
host_cpu_is_amd = this_cpu_is_amd();
+ host_cpu_is_hygon = this_cpu_is_hygon();
+ host_cpu_is_amd_compatible = host_cpu_is_amd || host_cpu_is_hygon;
is_forced_emulation_enabled = kvm_is_forced_emulation_enabled();
+
+ kvm_init_pmu_errata();
}
bool sys_clocksource_is_based_on_tsc(void)
@@ -1291,3 +1447,33 @@ bool sys_clocksource_is_based_on_tsc(void)
return ret;
}
+
+bool kvm_arch_has_default_irqchip(void)
+{
+ return true;
+}
+
+void setup_smram(struct kvm_vm *vm, struct kvm_vcpu *vcpu,
+ uint64_t smram_gpa,
+ const void *smi_handler, size_t handler_size)
+{
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, smram_gpa,
+ SMRAM_MEMSLOT, SMRAM_PAGES, 0);
+ TEST_ASSERT(vm_phy_pages_alloc(vm, SMRAM_PAGES, smram_gpa,
+ SMRAM_MEMSLOT) == smram_gpa,
+ "Could not allocate guest physical addresses for SMRAM");
+
+ memset(addr_gpa2hva(vm, smram_gpa), 0x0, SMRAM_SIZE);
+ memcpy(addr_gpa2hva(vm, smram_gpa) + 0x8000, smi_handler, handler_size);
+ vcpu_set_msr(vcpu, MSR_IA32_SMBASE, smram_gpa);
+}
+
+void inject_smi(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vcpu_events events;
+
+ vcpu_events_get(vcpu, &events);
+ events.smi.pending = 1;
+ events.flags |= KVM_VCPUEVENT_VALID_SMM;
+ vcpu_events_set(vcpu, &events);
+}
diff --git a/tools/testing/selftests/kvm/lib/x86/sev.c b/tools/testing/selftests/kvm/lib/x86/sev.c
index e9535ee20b7f..c3a9838f4806 100644
--- a/tools/testing/selftests/kvm/lib/x86/sev.c
+++ b/tools/testing/selftests/kvm/lib/x86/sev.c
@@ -14,7 +14,8 @@
* and find the first range, but that's correct because the condition
* expression would cause us to quit the loop.
*/
-static void encrypt_region(struct kvm_vm *vm, struct userspace_mem_region *region)
+static void encrypt_region(struct kvm_vm *vm, struct userspace_mem_region *region,
+ uint8_t page_type, bool private)
{
const struct sparsebit *protected_phy_pages = region->protected_phy_pages;
const vm_paddr_t gpa_base = region->region.guest_phys_addr;
@@ -24,25 +25,35 @@ static void encrypt_region(struct kvm_vm *vm, struct userspace_mem_region *regio
if (!sparsebit_any_set(protected_phy_pages))
return;
- sev_register_encrypted_memory(vm, region);
+ if (!is_sev_snp_vm(vm))
+ sev_register_encrypted_memory(vm, region);
sparsebit_for_each_set_range(protected_phy_pages, i, j) {
const uint64_t size = (j - i + 1) * vm->page_size;
const uint64_t offset = (i - lowest_page_in_region) * vm->page_size;
- sev_launch_update_data(vm, gpa_base + offset, size);
+ if (private)
+ vm_mem_set_private(vm, gpa_base + offset, size);
+
+ if (is_sev_snp_vm(vm))
+ snp_launch_update_data(vm, gpa_base + offset,
+ (uint64_t)addr_gpa2hva(vm, gpa_base + offset),
+ size, page_type);
+ else
+ sev_launch_update_data(vm, gpa_base + offset, size);
+
}
}
void sev_vm_init(struct kvm_vm *vm)
{
if (vm->type == KVM_X86_DEFAULT_VM) {
- assert(vm->arch.sev_fd == -1);
+ TEST_ASSERT_EQ(vm->arch.sev_fd, -1);
vm->arch.sev_fd = open_sev_dev_path_or_exit();
vm_sev_ioctl(vm, KVM_SEV_INIT, NULL);
} else {
struct kvm_sev_init init = { 0 };
- assert(vm->type == KVM_X86_SEV_VM);
+ TEST_ASSERT_EQ(vm->type, KVM_X86_SEV_VM);
vm_sev_ioctl(vm, KVM_SEV_INIT2, &init);
}
}
@@ -50,16 +61,24 @@ void sev_vm_init(struct kvm_vm *vm)
void sev_es_vm_init(struct kvm_vm *vm)
{
if (vm->type == KVM_X86_DEFAULT_VM) {
- assert(vm->arch.sev_fd == -1);
+ TEST_ASSERT_EQ(vm->arch.sev_fd, -1);
vm->arch.sev_fd = open_sev_dev_path_or_exit();
vm_sev_ioctl(vm, KVM_SEV_ES_INIT, NULL);
} else {
struct kvm_sev_init init = { 0 };
- assert(vm->type == KVM_X86_SEV_ES_VM);
+ TEST_ASSERT_EQ(vm->type, KVM_X86_SEV_ES_VM);
vm_sev_ioctl(vm, KVM_SEV_INIT2, &init);
}
}
+void snp_vm_init(struct kvm_vm *vm)
+{
+ struct kvm_sev_init init = { 0 };
+
+ TEST_ASSERT_EQ(vm->type, KVM_X86_SNP_VM);
+ vm_sev_ioctl(vm, KVM_SEV_INIT2, &init);
+}
+
void sev_vm_launch(struct kvm_vm *vm, uint32_t policy)
{
struct kvm_sev_launch_start launch_start = {
@@ -76,7 +95,7 @@ void sev_vm_launch(struct kvm_vm *vm, uint32_t policy)
TEST_ASSERT_EQ(status.state, SEV_GUEST_STATE_LAUNCH_UPDATE);
hash_for_each(vm->regions.slot_hash, ctr, region, slot_node)
- encrypt_region(vm, region);
+ encrypt_region(vm, region, KVM_SEV_PAGE_TYPE_INVALID, false);
if (policy & SEV_POLICY_ES)
vm_sev_ioctl(vm, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL);
@@ -112,6 +131,33 @@ void sev_vm_launch_finish(struct kvm_vm *vm)
TEST_ASSERT_EQ(status.state, SEV_GUEST_STATE_RUNNING);
}
+void snp_vm_launch_start(struct kvm_vm *vm, uint64_t policy)
+{
+ struct kvm_sev_snp_launch_start launch_start = {
+ .policy = policy,
+ };
+
+ vm_sev_ioctl(vm, KVM_SEV_SNP_LAUNCH_START, &launch_start);
+}
+
+void snp_vm_launch_update(struct kvm_vm *vm)
+{
+ struct userspace_mem_region *region;
+ int ctr;
+
+ hash_for_each(vm->regions.slot_hash, ctr, region, slot_node)
+ encrypt_region(vm, region, KVM_SEV_SNP_PAGE_TYPE_NORMAL, true);
+
+ vm->arch.is_pt_protected = true;
+}
+
+void snp_vm_launch_finish(struct kvm_vm *vm)
+{
+ struct kvm_sev_snp_launch_finish launch_finish = { 0 };
+
+ vm_sev_ioctl(vm, KVM_SEV_SNP_LAUNCH_FINISH, &launch_finish);
+}
+
struct kvm_vm *vm_sev_create_with_one_vcpu(uint32_t type, void *guest_code,
struct kvm_vcpu **cpu)
{
@@ -128,8 +174,20 @@ struct kvm_vm *vm_sev_create_with_one_vcpu(uint32_t type, void *guest_code,
return vm;
}
-void vm_sev_launch(struct kvm_vm *vm, uint32_t policy, uint8_t *measurement)
+void vm_sev_launch(struct kvm_vm *vm, uint64_t policy, uint8_t *measurement)
{
+ if (is_sev_snp_vm(vm)) {
+ vm_enable_cap(vm, KVM_CAP_EXIT_HYPERCALL, BIT(KVM_HC_MAP_GPA_RANGE));
+
+ snp_vm_launch_start(vm, policy);
+
+ snp_vm_launch_update(vm);
+
+ snp_vm_launch_finish(vm);
+
+ return;
+ }
+
sev_vm_launch(vm, policy);
if (!measurement)
diff --git a/tools/testing/selftests/kvm/lib/x86/svm.c b/tools/testing/selftests/kvm/lib/x86/svm.c
index d239c2097391..eb20b00112c7 100644
--- a/tools/testing/selftests/kvm/lib/x86/svm.c
+++ b/tools/testing/selftests/kvm/lib/x86/svm.c
@@ -46,6 +46,9 @@ vcpu_alloc_svm(struct kvm_vm *vm, vm_vaddr_t *p_svm_gva)
svm->msr_gpa = addr_gva2gpa(vm, (uintptr_t)svm->msr);
memset(svm->msr_hva, 0, getpagesize());
+ if (vm->stage2_mmu.pgd_created)
+ svm->ncr3_gpa = vm->stage2_mmu.pgd;
+
*p_svm_gva = svm_gva;
return svm;
}
@@ -59,6 +62,25 @@ static void vmcb_set_seg(struct vmcb_seg *seg, u16 selector,
seg->base = base;
}
+void vm_enable_npt(struct kvm_vm *vm)
+{
+ struct pte_masks pte_masks;
+
+ TEST_ASSERT(kvm_cpu_has_npt(), "KVM doesn't supported nested NPT");
+
+ /*
+ * NPTs use the same PTE format, but deliberately drop the C-bit as the
+ * per-VM shared vs. private information is only meant for stage-1.
+ */
+ pte_masks = vm->mmu.arch.pte_masks;
+ pte_masks.c = 0;
+
+ /* NPT walks are treated as user accesses, so set the 'user' bit. */
+ pte_masks.always_set = pte_masks.user;
+
+ tdp_mmu_init(vm, vm->mmu.pgtable_levels, &pte_masks);
+}
+
void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_rsp)
{
struct vmcb *vmcb = svm->vmcb;
@@ -102,6 +124,11 @@ void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_r
vmcb->save.rip = (u64)guest_rip;
vmcb->save.rsp = (u64)guest_rsp;
guest_regs.rdi = (u64)svm;
+
+ if (svm->ncr3_gpa) {
+ ctrl->misc_ctl |= SVM_MISC_ENABLE_NP;
+ ctrl->nested_cr3 = svm->ncr3_gpa;
+ }
}
/*
diff --git a/tools/testing/selftests/kvm/lib/x86/vmx.c b/tools/testing/selftests/kvm/lib/x86/vmx.c
index d4d1208dd023..c87b340362a9 100644
--- a/tools/testing/selftests/kvm/lib/x86/vmx.c
+++ b/tools/testing/selftests/kvm/lib/x86/vmx.c
@@ -10,38 +10,21 @@
#include "processor.h"
#include "vmx.h"
-#define PAGE_SHIFT_4K 12
-
#define KVM_EPT_PAGE_TABLE_MIN_PADDR 0x1c0000
+#define EPTP_MT_SHIFT 0 /* EPTP memtype bits 2:0 */
+#define EPTP_PWL_SHIFT 3 /* EPTP page walk length bits 5:3 */
+#define EPTP_AD_ENABLED_SHIFT 6 /* EPTP AD enabled bit 6 */
+
+#define EPTP_WB (X86_MEMTYPE_WB << EPTP_MT_SHIFT)
+#define EPTP_PWL_4 (3ULL << EPTP_PWL_SHIFT) /* PWL is (levels - 1) */
+#define EPTP_AD_ENABLED (1ULL << EPTP_AD_ENABLED_SHIFT)
+
bool enable_evmcs;
struct hv_enlightened_vmcs *current_evmcs;
struct hv_vp_assist_page *current_vp_assist;
-struct eptPageTableEntry {
- uint64_t readable:1;
- uint64_t writable:1;
- uint64_t executable:1;
- uint64_t memory_type:3;
- uint64_t ignore_pat:1;
- uint64_t page_size:1;
- uint64_t accessed:1;
- uint64_t dirty:1;
- uint64_t ignored_11_10:2;
- uint64_t address:40;
- uint64_t ignored_62_52:11;
- uint64_t suppress_ve:1;
-};
-
-struct eptPageTablePointer {
- uint64_t memory_type:3;
- uint64_t page_walk_length:3;
- uint64_t ad_enabled:1;
- uint64_t reserved_11_07:5;
- uint64_t address:40;
- uint64_t reserved_63_52:12;
-};
int vcpu_enable_evmcs(struct kvm_vcpu *vcpu)
{
uint16_t evmcs_ver;
@@ -58,6 +41,32 @@ int vcpu_enable_evmcs(struct kvm_vcpu *vcpu)
return evmcs_ver;
}
+void vm_enable_ept(struct kvm_vm *vm)
+{
+ struct pte_masks pte_masks;
+
+ TEST_ASSERT(kvm_cpu_has_ept(), "KVM doesn't support nested EPT");
+
+ /*
+ * EPTs do not have 'present' or 'user' bits, instead bit 0 is the
+ * 'readable' bit.
+ */
+ pte_masks = (struct pte_masks) {
+ .present = 0,
+ .user = 0,
+ .readable = BIT_ULL(0),
+ .writable = BIT_ULL(1),
+ .executable = BIT_ULL(2),
+ .huge = BIT_ULL(7),
+ .accessed = BIT_ULL(8),
+ .dirty = BIT_ULL(9),
+ .nx = 0,
+ };
+
+ /* TODO: Add support for 5-level EPT. */
+ tdp_mmu_init(vm, 4, &pte_masks);
+}
+
/* Allocate memory regions for nested VMX tests.
*
* Input Args:
@@ -107,6 +116,9 @@ vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva)
vmx->vmwrite_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmwrite);
memset(vmx->vmwrite_hva, 0, getpagesize());
+ if (vm->stage2_mmu.pgd_created)
+ vmx->eptp_gpa = vm->stage2_mmu.pgd;
+
*p_vmx_gva = vmx_gva;
return vmx;
}
@@ -196,16 +208,15 @@ static inline void init_vmcs_control_fields(struct vmx_pages *vmx)
vmwrite(PIN_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_TRUE_PINBASED_CTLS));
if (vmx->eptp_gpa) {
- uint64_t ept_paddr;
- struct eptPageTablePointer eptp = {
- .memory_type = X86_MEMTYPE_WB,
- .page_walk_length = 3, /* + 1 */
- .ad_enabled = ept_vpid_cap_supported(VMX_EPT_VPID_CAP_AD_BITS),
- .address = vmx->eptp_gpa >> PAGE_SHIFT_4K,
- };
-
- memcpy(&ept_paddr, &eptp, sizeof(ept_paddr));
- vmwrite(EPT_POINTER, ept_paddr);
+ uint64_t eptp = vmx->eptp_gpa | EPTP_WB | EPTP_PWL_4;
+
+ TEST_ASSERT((vmx->eptp_gpa & ~PHYSICAL_PAGE_MASK) == 0,
+ "Illegal bits set in vmx->eptp_gpa");
+
+ if (ept_vpid_cap_supported(VMX_EPT_VPID_CAP_AD_BITS))
+ eptp |= EPTP_AD_ENABLED;
+
+ vmwrite(EPT_POINTER, eptp);
sec_exec_ctl |= SECONDARY_EXEC_ENABLE_EPT;
}
@@ -362,170 +373,13 @@ void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp)
init_vmcs_guest_state(guest_rip, guest_rsp);
}
-static void nested_create_pte(struct kvm_vm *vm,
- struct eptPageTableEntry *pte,
- uint64_t nested_paddr,
- uint64_t paddr,
- int current_level,
- int target_level)
-{
- if (!pte->readable) {
- pte->writable = true;
- pte->readable = true;
- pte->executable = true;
- pte->page_size = (current_level == target_level);
- if (pte->page_size)
- pte->address = paddr >> vm->page_shift;
- else
- pte->address = vm_alloc_page_table(vm) >> vm->page_shift;
- } else {
- /*
- * Entry already present. Assert that the caller doesn't want
- * a hugepage at this level, and that there isn't a hugepage at
- * this level.
- */
- TEST_ASSERT(current_level != target_level,
- "Cannot create hugepage at level: %u, nested_paddr: 0x%lx",
- current_level, nested_paddr);
- TEST_ASSERT(!pte->page_size,
- "Cannot create page table at level: %u, nested_paddr: 0x%lx",
- current_level, nested_paddr);
- }
-}
-
-
-void __nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint64_t nested_paddr, uint64_t paddr, int target_level)
-{
- const uint64_t page_size = PG_LEVEL_SIZE(target_level);
- struct eptPageTableEntry *pt = vmx->eptp_hva, *pte;
- uint16_t index;
-
- TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
- "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
-
- TEST_ASSERT((nested_paddr >> 48) == 0,
- "Nested physical address 0x%lx requires 5-level paging",
- nested_paddr);
- TEST_ASSERT((nested_paddr % page_size) == 0,
- "Nested physical address not on page boundary,\n"
- " nested_paddr: 0x%lx page_size: 0x%lx",
- nested_paddr, page_size);
- TEST_ASSERT((nested_paddr >> vm->page_shift) <= vm->max_gfn,
- "Physical address beyond beyond maximum supported,\n"
- " nested_paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
- paddr, vm->max_gfn, vm->page_size);
- TEST_ASSERT((paddr % page_size) == 0,
- "Physical address not on page boundary,\n"
- " paddr: 0x%lx page_size: 0x%lx",
- paddr, page_size);
- TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
- "Physical address beyond beyond maximum supported,\n"
- " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
- paddr, vm->max_gfn, vm->page_size);
-
- for (int level = PG_LEVEL_512G; level >= PG_LEVEL_4K; level--) {
- index = (nested_paddr >> PG_LEVEL_SHIFT(level)) & 0x1ffu;
- pte = &pt[index];
-
- nested_create_pte(vm, pte, nested_paddr, paddr, level, target_level);
-
- if (pte->page_size)
- break;
-
- pt = addr_gpa2hva(vm, pte->address * vm->page_size);
- }
-
- /*
- * For now mark these as accessed and dirty because the only
- * testcase we have needs that. Can be reconsidered later.
- */
- pte->accessed = true;
- pte->dirty = true;
-
-}
-
-void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint64_t nested_paddr, uint64_t paddr)
-{
- __nested_pg_map(vmx, vm, nested_paddr, paddr, PG_LEVEL_4K);
-}
-
-/*
- * Map a range of EPT guest physical addresses to the VM's physical address
- *
- * Input Args:
- * vm - Virtual Machine
- * nested_paddr - Nested guest physical address to map
- * paddr - VM Physical Address
- * size - The size of the range to map
- * level - The level at which to map the range
- *
- * Output Args: None
- *
- * Return: None
- *
- * Within the VM given by vm, creates a nested guest translation for the
- * page range starting at nested_paddr to the page range starting at paddr.
- */
-void __nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint64_t nested_paddr, uint64_t paddr, uint64_t size,
- int level)
-{
- size_t page_size = PG_LEVEL_SIZE(level);
- size_t npages = size / page_size;
-
- TEST_ASSERT(nested_paddr + size > nested_paddr, "Vaddr overflow");
- TEST_ASSERT(paddr + size > paddr, "Paddr overflow");
-
- while (npages--) {
- __nested_pg_map(vmx, vm, nested_paddr, paddr, level);
- nested_paddr += page_size;
- paddr += page_size;
- }
-}
-
-void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint64_t nested_paddr, uint64_t paddr, uint64_t size)
-{
- __nested_map(vmx, vm, nested_paddr, paddr, size, PG_LEVEL_4K);
-}
-
-/* Prepare an identity extended page table that maps all the
- * physical pages in VM.
- */
-void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint32_t memslot)
-{
- sparsebit_idx_t i, last;
- struct userspace_mem_region *region =
- memslot2region(vm, memslot);
-
- i = (region->region.guest_phys_addr >> vm->page_shift) - 1;
- last = i + (region->region.memory_size >> vm->page_shift);
- for (;;) {
- i = sparsebit_next_clear(region->unused_phy_pages, i);
- if (i > last)
- break;
-
- nested_map(vmx, vm,
- (uint64_t)i << vm->page_shift,
- (uint64_t)i << vm->page_shift,
- 1 << vm->page_shift);
- }
-}
-
-/* Identity map a region with 1GiB Pages. */
-void nested_identity_map_1g(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint64_t addr, uint64_t size)
-{
- __nested_map(vmx, vm, addr, addr, size, PG_LEVEL_1G);
-}
-
bool kvm_cpu_has_ept(void)
{
uint64_t ctrl;
+ if (!kvm_cpu_has(X86_FEATURE_VMX))
+ return false;
+
ctrl = kvm_get_feature_msr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS) >> 32;
if (!(ctrl & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS))
return false;
@@ -534,16 +388,6 @@ bool kvm_cpu_has_ept(void)
return ctrl & SECONDARY_EXEC_ENABLE_EPT;
}
-void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint32_t eptp_memslot)
-{
- TEST_ASSERT(kvm_cpu_has_ept(), "KVM doesn't support nested EPT");
-
- vmx->eptp = (void *)vm_vaddr_alloc_page(vm);
- vmx->eptp_hva = addr_gva2hva(vm, (uintptr_t)vmx->eptp);
- vmx->eptp_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->eptp);
-}
-
void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm)
{
vmx->apic_access = (void *)vm_vaddr_alloc_page(vm);
diff --git a/tools/testing/selftests/kvm/loongarch/arch_timer.c b/tools/testing/selftests/kvm/loongarch/arch_timer.c
new file mode 100644
index 000000000000..355ecac30954
--- /dev/null
+++ b/tools/testing/selftests/kvm/loongarch/arch_timer.c
@@ -0,0 +1,200 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * The test validates periodic/one-shot constant timer IRQ using
+ * CSR.TCFG and CSR.TVAL registers.
+ */
+#include "arch_timer.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "timer_test.h"
+#include "ucall_common.h"
+
+static void do_idle(void)
+{
+ unsigned int intid;
+ unsigned long estat;
+
+ __asm__ __volatile__("idle 0" : : : "memory");
+
+ estat = csr_read(LOONGARCH_CSR_ESTAT);
+ intid = !!(estat & BIT(INT_TI));
+
+ /* Make sure pending timer IRQ arrived */
+ GUEST_ASSERT_EQ(intid, 1);
+ csr_write(CSR_TINTCLR_TI, LOONGARCH_CSR_TINTCLR);
+}
+
+static void guest_irq_handler(struct ex_regs *regs)
+{
+ unsigned int intid;
+ uint32_t cpu = guest_get_vcpuid();
+ uint64_t xcnt, val, cfg, xcnt_diff_us;
+ struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu];
+
+ intid = !!(regs->estat & BIT(INT_TI));
+
+ /* Make sure we are dealing with the correct timer IRQ */
+ GUEST_ASSERT_EQ(intid, 1);
+
+ cfg = timer_get_cfg();
+ if (cfg & CSR_TCFG_PERIOD) {
+ WRITE_ONCE(shared_data->nr_iter, shared_data->nr_iter - 1);
+ if (shared_data->nr_iter == 0)
+ disable_timer();
+ csr_write(CSR_TINTCLR_TI, LOONGARCH_CSR_TINTCLR);
+ return;
+ }
+
+ /*
+ * On real machine, value of LOONGARCH_CSR_TVAL is BIT_ULL(48) - 1
+ * On virtual machine, its value counts down from BIT_ULL(48) - 1
+ */
+ val = timer_get_val();
+ xcnt = timer_get_cycles();
+ xcnt_diff_us = cycles_to_usec(xcnt - shared_data->xcnt);
+
+ /* Basic 'timer condition met' check */
+ __GUEST_ASSERT(val > cfg,
+ "val = 0x%lx, cfg = 0x%lx, xcnt_diff_us = 0x%lx",
+ val, cfg, xcnt_diff_us);
+
+ csr_write(CSR_TINTCLR_TI, LOONGARCH_CSR_TINTCLR);
+ WRITE_ONCE(shared_data->nr_iter, shared_data->nr_iter + 1);
+}
+
+static void guest_test_period_timer(uint32_t cpu)
+{
+ uint32_t irq_iter, config_iter;
+ uint64_t us;
+ struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu];
+
+ shared_data->nr_iter = test_args.nr_iter;
+ shared_data->xcnt = timer_get_cycles();
+ us = msecs_to_usecs(test_args.timer_period_ms) + test_args.timer_err_margin_us;
+ timer_set_next_cmp_ms(test_args.timer_period_ms, true);
+
+ for (config_iter = 0; config_iter < test_args.nr_iter; config_iter++) {
+ /* Setup a timeout for the interrupt to arrive */
+ udelay(us);
+ }
+
+ irq_iter = READ_ONCE(shared_data->nr_iter);
+ __GUEST_ASSERT(irq_iter == 0,
+ "irq_iter = 0x%x.\n"
+ " Guest period timer interrupt was not triggered within the specified\n"
+ " interval, try to increase the error margin by [-e] option.\n",
+ irq_iter);
+}
+
+static void guest_test_oneshot_timer(uint32_t cpu)
+{
+ uint32_t irq_iter, config_iter;
+ uint64_t us;
+ struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu];
+
+ shared_data->nr_iter = 0;
+ shared_data->guest_stage = 0;
+ us = msecs_to_usecs(test_args.timer_period_ms) + test_args.timer_err_margin_us;
+ for (config_iter = 0; config_iter < test_args.nr_iter; config_iter++) {
+ shared_data->xcnt = timer_get_cycles();
+
+ /* Setup the next interrupt */
+ timer_set_next_cmp_ms(test_args.timer_period_ms, false);
+ /* Setup a timeout for the interrupt to arrive */
+ udelay(us);
+
+ irq_iter = READ_ONCE(shared_data->nr_iter);
+ __GUEST_ASSERT(config_iter + 1 == irq_iter,
+ "config_iter + 1 = 0x%x, irq_iter = 0x%x.\n"
+ " Guest timer interrupt was not triggered within the specified\n"
+ " interval, try to increase the error margin by [-e] option.\n",
+ config_iter + 1, irq_iter);
+ }
+}
+
+static void guest_test_emulate_timer(uint32_t cpu)
+{
+ uint32_t config_iter;
+ uint64_t xcnt_diff_us, us;
+ struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu];
+
+ local_irq_disable();
+ shared_data->nr_iter = 0;
+ us = msecs_to_usecs(test_args.timer_period_ms);
+ for (config_iter = 0; config_iter < test_args.nr_iter; config_iter++) {
+ shared_data->xcnt = timer_get_cycles();
+
+ /* Setup the next interrupt */
+ timer_set_next_cmp_ms(test_args.timer_period_ms, false);
+ do_idle();
+
+ xcnt_diff_us = cycles_to_usec(timer_get_cycles() - shared_data->xcnt);
+ __GUEST_ASSERT(xcnt_diff_us >= us,
+ "xcnt_diff_us = 0x%lx, us = 0x%lx.\n",
+ xcnt_diff_us, us);
+ }
+ local_irq_enable();
+}
+
+static void guest_time_count_test(uint32_t cpu)
+{
+ uint32_t config_iter;
+ unsigned long start, end, prev, us;
+
+ /* Assuming that test case starts to run in 1 second */
+ start = timer_get_cycles();
+ us = msec_to_cycles(1000);
+ __GUEST_ASSERT(start <= us,
+ "start = 0x%lx, us = 0x%lx.\n",
+ start, us);
+
+ us = msec_to_cycles(test_args.timer_period_ms);
+ for (config_iter = 0; config_iter < test_args.nr_iter; config_iter++) {
+ start = timer_get_cycles();
+ end = start + us;
+ /* test time count growing up always */
+ while (start < end) {
+ prev = start;
+ start = timer_get_cycles();
+ __GUEST_ASSERT(prev <= start,
+ "prev = 0x%lx, start = 0x%lx.\n",
+ prev, start);
+ }
+ }
+}
+
+static void guest_code(void)
+{
+ uint32_t cpu = guest_get_vcpuid();
+
+ /* must run at first */
+ guest_time_count_test(cpu);
+
+ timer_irq_enable();
+ local_irq_enable();
+ guest_test_period_timer(cpu);
+ guest_test_oneshot_timer(cpu);
+ guest_test_emulate_timer(cpu);
+
+ GUEST_DONE();
+}
+
+struct kvm_vm *test_vm_create(void)
+{
+ struct kvm_vm *vm;
+ int nr_vcpus = test_args.nr_vcpus;
+
+ vm = vm_create_with_vcpus(nr_vcpus, guest_code, vcpus);
+ vm_init_descriptor_tables(vm);
+ vm_install_exception_handler(vm, EXCCODE_INT, guest_irq_handler);
+
+ /* Make all the test's cmdline args visible to the guest */
+ sync_global_to_guest(vm, test_args);
+
+ return vm;
+}
+
+void test_vm_cleanup(struct kvm_vm *vm)
+{
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/loongarch/pmu_test.c b/tools/testing/selftests/kvm/loongarch/pmu_test.c
new file mode 100644
index 000000000000..88bb530e336e
--- /dev/null
+++ b/tools/testing/selftests/kvm/loongarch/pmu_test.c
@@ -0,0 +1,205 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * LoongArch KVM PMU event counting test
+ *
+ * Test hardware event counting: CPU_CYCLES, INSTR_RETIRED,
+ * BRANCH_INSTRUCTIONS and BRANCH_MISSES.
+ */
+#include <linux/bitops.h>
+#include "kvm_util.h"
+#include "pmu.h"
+#include "loongarch/processor.h"
+
+static int pmu_irq_count;
+
+/* Check PMU support */
+static bool has_pmu_support(void)
+{
+ uint32_t cfg6;
+
+ /* Read CPUCFG6 to check PMU */
+ cfg6 = read_cpucfg(LOONGARCH_CPUCFG6);
+
+ /* Check PMU present bit */
+ if (!(cfg6 & CPUCFG6_PMP))
+ return false;
+
+ /* Check that at least one counter exists */
+ if (((cfg6 & CPUCFG6_PMNUM) >> CPUCFG6_PMNUM_SHIFT) == 0)
+ return false;
+
+ return true;
+}
+
+/* Dump PMU capabilities */
+static void dump_pmu_caps(void)
+{
+ uint32_t cfg6;
+ int nr_counters, counter_bits;
+
+ cfg6 = read_cpucfg(LOONGARCH_CPUCFG6);
+ nr_counters = ((cfg6 & CPUCFG6_PMNUM) >> CPUCFG6_PMNUM_SHIFT) + 1;
+ counter_bits = ((cfg6 & CPUCFG6_PMBITS) >> CPUCFG6_PMBITS_SHIFT) + 1;
+
+ pr_info("PMU capabilities:\n");
+ pr_info(" Counters present: %s\n", cfg6 & CPUCFG6_PMP ? "yes" : "no");
+ pr_info(" Number of counters: %d\n", nr_counters);
+ pr_info(" Counter width: %d bits\n", counter_bits);
+}
+
+/* Guest test code - runs inside VM */
+static void guest_pmu_base_test(void)
+{
+ int i;
+ uint32_t cfg6, pmnum;
+ uint64_t cnt[4];
+
+ cfg6 = read_cpucfg(LOONGARCH_CPUCFG6);
+ pmnum = (cfg6 >> 4) & 0xf;
+ GUEST_PRINTF("CPUCFG6 = 0x%x\n", cfg6);
+ GUEST_PRINTF("PMP enabled: %s\n", (cfg6 & 0x1) ? "YES" : "NO");
+ GUEST_PRINTF("Number of counters (PMNUM): %x\n", pmnum + 1);
+ GUEST_ASSERT(pmnum == 3);
+
+ GUEST_PRINTF("Clean csr_perfcntr0-3\n");
+ csr_write(0, LOONGARCH_CSR_PERFCNTR0);
+ csr_write(0, LOONGARCH_CSR_PERFCNTR1);
+ csr_write(0, LOONGARCH_CSR_PERFCNTR2);
+ csr_write(0, LOONGARCH_CSR_PERFCNTR3);
+ GUEST_PRINTF("Set csr_perfctrl0 for cycles event\n");
+ csr_write(PMU_ENVENT_ENABLED |
+ LOONGARCH_PMU_EVENT_CYCLES, LOONGARCH_CSR_PERFCTRL0);
+ GUEST_PRINTF("Set csr_perfctrl1 for instr_retired event\n");
+ csr_write(PMU_ENVENT_ENABLED |
+ LOONGARCH_PMU_EVENT_INSTR_RETIRED, LOONGARCH_CSR_PERFCTRL1);
+ GUEST_PRINTF("Set csr_perfctrl2 for branch_instructions event\n");
+ csr_write(PMU_ENVENT_ENABLED |
+ PERF_COUNT_HW_BRANCH_INSTRUCTIONS, LOONGARCH_CSR_PERFCTRL2);
+ GUEST_PRINTF("Set csr_perfctrl3 for branch_misses event\n");
+ csr_write(PMU_ENVENT_ENABLED |
+ PERF_COUNT_HW_BRANCH_MISSES, LOONGARCH_CSR_PERFCTRL3);
+
+ for (i = 0; i < NUM_LOOPS; i++)
+ cpu_relax();
+
+ cnt[0] = csr_read(LOONGARCH_CSR_PERFCNTR0);
+ GUEST_PRINTF("csr_perfcntr0 is %lx\n", cnt[0]);
+ cnt[1] = csr_read(LOONGARCH_CSR_PERFCNTR1);
+ GUEST_PRINTF("csr_perfcntr1 is %lx\n", cnt[1]);
+ cnt[2] = csr_read(LOONGARCH_CSR_PERFCNTR2);
+ GUEST_PRINTF("csr_perfcntr2 is %lx\n", cnt[2]);
+ cnt[3] = csr_read(LOONGARCH_CSR_PERFCNTR3);
+ GUEST_PRINTF("csr_perfcntr3 is %lx\n", cnt[3]);
+
+ GUEST_PRINTF("assert csr_perfcntr0 >EXPECTED_CYCLES_MIN && csr_perfcntr0 < UPPER_BOUND\n");
+ GUEST_ASSERT(cnt[0] > EXPECTED_CYCLES_MIN && cnt[0] < UPPER_BOUND);
+ GUEST_PRINTF("assert csr_perfcntr1 > EXPECTED_INSTR_MIN && csr_perfcntr1 < UPPER_BOUND\n");
+ GUEST_ASSERT(cnt[1] > EXPECTED_INSTR_MIN && cnt[1] < UPPER_BOUND);
+ GUEST_PRINTF("assert csr_perfcntr2 > 0 && csr_perfcntr2 < UPPER_BOUND\n");
+ GUEST_ASSERT(cnt[2] > 0 && cnt[2] < UPPER_BOUND);
+ GUEST_PRINTF("assert csr_perfcntr3 > 0 && csr_perfcntr3 < UPPER_BOUND\n");
+ GUEST_ASSERT(cnt[3] > 0 && cnt[3] < UPPER_BOUND);
+}
+
+static void guest_irq_handler(struct ex_regs *regs)
+{
+ unsigned int intid;
+
+ pmu_irq_disable();
+ intid = !!(regs->estat & BIT(INT_PMI));
+ GUEST_ASSERT_EQ(intid, 1);
+ GUEST_PRINTF("Get PMU interrupt\n");
+ WRITE_ONCE(pmu_irq_count, pmu_irq_count + 1);
+}
+
+static void guest_pmu_interrupt_test(void)
+{
+ uint64_t cnt;
+
+ csr_write(PMU_OVERFLOW - 1, LOONGARCH_CSR_PERFCNTR0);
+ csr_write(PMU_ENVENT_ENABLED | CSR_PERFCTRL_PMIE | LOONGARCH_PMU_EVENT_CYCLES, LOONGARCH_CSR_PERFCTRL0);
+
+ cpu_relax();
+
+ GUEST_ASSERT_EQ(pmu_irq_count, 1);
+ cnt = csr_read(LOONGARCH_CSR_PERFCNTR0);
+ GUEST_PRINTF("csr_perfcntr0 is %lx\n", cnt);
+ GUEST_PRINTF("PMU interrupt test success\n");
+
+}
+
+static void guest_code(void)
+{
+ guest_pmu_base_test();
+
+ pmu_irq_enable();
+ local_irq_enable();
+ guest_pmu_interrupt_test();
+
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ int ret = 0;
+ struct kvm_device_attr attr;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct ucall uc;
+
+ /* Check host KVM PMU support */
+ if (!has_pmu_support()) {
+ print_skip("PMU not supported by host hardware\n");
+ dump_pmu_caps();
+ return KSFT_SKIP;
+ }
+ pr_info("Host support PMU\n");
+
+ /* Dump PMU capabilities */
+ dump_pmu_caps();
+
+ vm = vm_create(VM_MODE_P47V47_16K);
+ vcpu = vm_vcpu_add(vm, 0, guest_code);
+
+ pmu_irq_count = 0;
+ vm_init_descriptor_tables(vm);
+ loongarch_vcpu_setup(vcpu);
+ vm_install_exception_handler(vm, EXCCODE_INT, guest_irq_handler);
+ sync_global_to_guest(vm, pmu_irq_count);
+
+ attr.group = KVM_LOONGARCH_VM_FEAT_CTRL,
+ attr.attr = KVM_LOONGARCH_VM_FEAT_PMU,
+
+ ret = ioctl(vm->fd, KVM_HAS_DEVICE_ATTR, &attr);
+
+ if (ret == 0) {
+ pr_info("PMU is enabled in VM\n");
+ } else {
+ print_skip("PMU not enabled by VM config\n");
+ return KSFT_SKIP;
+ }
+
+ while (1) {
+ vcpu_run(vcpu);
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_PRINTF:
+ printf("%s", (const char *)uc.buffer);
+ break;
+ case UCALL_DONE:
+ printf("PMU test PASSED\n");
+ goto done;
+ case UCALL_ABORT:
+ printf("PMU test FAILED\n");
+ ret = -1;
+ goto done;
+ default:
+ printf("Unexpected exit\n");
+ ret = -1;
+ goto done;
+ }
+ }
+
+done:
+ kvm_vm_free(vm);
+ return ret;
+}
diff --git a/tools/testing/selftests/kvm/memslot_modification_stress_test.c b/tools/testing/selftests/kvm/memslot_modification_stress_test.c
index c81a84990eab..3cdfa3b19b85 100644
--- a/tools/testing/selftests/kvm/memslot_modification_stress_test.c
+++ b/tools/testing/selftests/kvm/memslot_modification_stress_test.c
@@ -22,6 +22,7 @@
#include "processor.h"
#include "test_util.h"
#include "guest_modes.h"
+#include "ucall_common.h"
#define DUMMY_MEMSLOT_INDEX 7
diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c
index e3711beff7f3..5087d082c4b0 100644
--- a/tools/testing/selftests/kvm/memslot_perf_test.c
+++ b/tools/testing/selftests/kvm/memslot_perf_test.c
@@ -25,6 +25,7 @@
#include <test_util.h>
#include <kvm_util.h>
#include <processor.h>
+#include <ucall_common.h>
#define MEM_EXTRA_SIZE SZ_64K
diff --git a/tools/testing/selftests/kvm/mmu_stress_test.c b/tools/testing/selftests/kvm/mmu_stress_test.c
index 6a437d2be9fa..51c070556f3e 100644
--- a/tools/testing/selftests/kvm/mmu_stress_test.c
+++ b/tools/testing/selftests/kvm/mmu_stress_test.c
@@ -263,8 +263,10 @@ static void calc_default_nr_vcpus(void)
TEST_ASSERT(!r, "sched_getaffinity failed, errno = %d (%s)",
errno, strerror(errno));
- nr_vcpus = CPU_COUNT(&possible_mask) * 3/4;
+ nr_vcpus = CPU_COUNT(&possible_mask);
TEST_ASSERT(nr_vcpus > 0, "Uh, no CPUs?");
+ if (nr_vcpus >= 2)
+ nr_vcpus = nr_vcpus * 3/4;
}
int main(int argc, char *argv[])
@@ -339,8 +341,7 @@ int main(int argc, char *argv[])
TEST_ASSERT(max_gpa > (4 * slot_size), "MAXPHYADDR <4gb ");
fd = kvm_memfd_alloc(slot_size, hugepages);
- mem = mmap(NULL, slot_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
- TEST_ASSERT(mem != MAP_FAILED, "mmap() failed");
+ mem = kvm_mmap(slot_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd);
TEST_ASSERT(!madvise(mem, slot_size, MADV_NOHUGEPAGE), "madvise() failed");
@@ -361,11 +362,9 @@ int main(int argc, char *argv[])
#ifdef __x86_64__
/* Identity map memory in the guest using 1gb pages. */
- for (i = 0; i < slot_size; i += SZ_1G)
- __virt_pg_map(vm, gpa + i, gpa + i, PG_LEVEL_1G);
+ virt_map_level(vm, gpa, gpa, slot_size, PG_LEVEL_1G);
#else
- for (i = 0; i < slot_size; i += vm->page_size)
- virt_pg_map(vm, gpa + i, gpa + i);
+ virt_map(vm, gpa, gpa, slot_size >> vm->page_shift);
#endif
}
@@ -413,7 +412,7 @@ int main(int argc, char *argv[])
for (slot = (slot - 1) & ~1ull; slot >= first_slot; slot -= 2)
vm_set_user_memory_region(vm, slot, 0, 0, 0, NULL);
- munmap(mem, slot_size / 2);
+ kvm_munmap(mem, slot_size / 2);
/* Sanity check that the vCPUs actually ran. */
for (i = 0; i < nr_vcpus; i++)
diff --git a/tools/testing/selftests/kvm/pre_fault_memory_test.c b/tools/testing/selftests/kvm/pre_fault_memory_test.c
index 0350a8896a2f..f3de0386ba7b 100644
--- a/tools/testing/selftests/kvm/pre_fault_memory_test.c
+++ b/tools/testing/selftests/kvm/pre_fault_memory_test.c
@@ -10,19 +10,20 @@
#include <test_util.h>
#include <kvm_util.h>
#include <processor.h>
+#include <pthread.h>
/* Arbitrarily chosen values */
#define TEST_SIZE (SZ_2M + PAGE_SIZE)
#define TEST_NPAGES (TEST_SIZE / PAGE_SIZE)
#define TEST_SLOT 10
-static void guest_code(uint64_t base_gpa)
+static void guest_code(uint64_t base_gva)
{
volatile uint64_t val __used;
int i;
for (i = 0; i < TEST_NPAGES; i++) {
- uint64_t *src = (uint64_t *)(base_gpa + i * PAGE_SIZE);
+ uint64_t *src = (uint64_t *)(base_gva + i * PAGE_SIZE);
val = *src;
}
@@ -30,18 +31,66 @@ static void guest_code(uint64_t base_gpa)
GUEST_DONE();
}
-static void pre_fault_memory(struct kvm_vcpu *vcpu, u64 gpa, u64 size,
- u64 left)
+struct slot_worker_data {
+ struct kvm_vm *vm;
+ u64 gpa;
+ uint32_t flags;
+ bool worker_ready;
+ bool prefault_ready;
+ bool recreate_slot;
+};
+
+static void *delete_slot_worker(void *__data)
+{
+ struct slot_worker_data *data = __data;
+ struct kvm_vm *vm = data->vm;
+
+ WRITE_ONCE(data->worker_ready, true);
+
+ while (!READ_ONCE(data->prefault_ready))
+ cpu_relax();
+
+ vm_mem_region_delete(vm, TEST_SLOT);
+
+ while (!READ_ONCE(data->recreate_slot))
+ cpu_relax();
+
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, data->gpa,
+ TEST_SLOT, TEST_NPAGES, data->flags);
+
+ return NULL;
+}
+
+static void pre_fault_memory(struct kvm_vcpu *vcpu, u64 base_gpa, u64 offset,
+ u64 size, u64 expected_left, bool private)
{
struct kvm_pre_fault_memory range = {
- .gpa = gpa,
+ .gpa = base_gpa + offset,
.size = size,
.flags = 0,
};
- u64 prev;
+ struct slot_worker_data data = {
+ .vm = vcpu->vm,
+ .gpa = base_gpa,
+ .flags = private ? KVM_MEM_GUEST_MEMFD : 0,
+ };
+ bool slot_recreated = false;
+ pthread_t slot_worker;
int ret, save_errno;
+ u64 prev;
+
+ /*
+ * Concurrently delete (and recreate) the slot to test KVM's handling
+ * of a racing memslot deletion with prefaulting.
+ */
+ pthread_create(&slot_worker, NULL, delete_slot_worker, &data);
+
+ while (!READ_ONCE(data.worker_ready))
+ cpu_relax();
- do {
+ WRITE_ONCE(data.prefault_ready, true);
+
+ for (;;) {
prev = range.size;
ret = __vcpu_ioctl(vcpu, KVM_PRE_FAULT_MEMORY, &range);
save_errno = errno;
@@ -49,22 +98,70 @@ static void pre_fault_memory(struct kvm_vcpu *vcpu, u64 gpa, u64 size,
"%sexpecting range.size to change on %s",
ret < 0 ? "not " : "",
ret < 0 ? "failure" : "success");
- } while (ret >= 0 ? range.size : save_errno == EINTR);
- TEST_ASSERT(range.size == left,
- "Completed with %lld bytes left, expected %" PRId64,
- range.size, left);
+ /*
+ * Immediately retry prefaulting if KVM was interrupted by an
+ * unrelated signal/event.
+ */
+ if (ret < 0 && save_errno == EINTR)
+ continue;
+
+ /*
+ * Tell the worker to recreate the slot in order to complete
+ * prefaulting (if prefault didn't already succeed before the
+ * slot was deleted) and/or to prepare for the next testcase.
+ * Wait for the worker to exit so that the next invocation of
+ * prefaulting is guaranteed to complete (assuming no KVM bugs).
+ */
+ if (!slot_recreated) {
+ WRITE_ONCE(data.recreate_slot, true);
+ pthread_join(slot_worker, NULL);
+ slot_recreated = true;
+
+ /*
+ * Retry prefaulting to get a stable result, i.e. to
+ * avoid seeing random EAGAIN failures. Don't retry if
+ * prefaulting already succeeded, as KVM disallows
+ * prefaulting with size=0, i.e. blindly retrying would
+ * result in test failures due to EINVAL. KVM should
+ * always return success if all bytes are prefaulted,
+ * i.e. there is no need to guard against EAGAIN being
+ * returned.
+ */
+ if (range.size)
+ continue;
+ }
+
+ /*
+ * All done if there are no remaining bytes to prefault, or if
+ * prefaulting failed (EINTR was handled above, and EAGAIN due
+ * to prefaulting a memslot that's being actively deleted should
+ * be impossible since the memslot has already been recreated).
+ */
+ if (!range.size || ret < 0)
+ break;
+ }
- if (left == 0)
- __TEST_ASSERT_VM_VCPU_IOCTL(!ret, "KVM_PRE_FAULT_MEMORY", ret, vcpu->vm);
+ TEST_ASSERT(range.size == expected_left,
+ "Completed with %llu bytes left, expected %lu",
+ range.size, expected_left);
+
+ /*
+ * Assert success if prefaulting the entire range should succeed, i.e.
+ * complete with no bytes remaining. Otherwise prefaulting should have
+ * failed due to ENOENT (due to RET_PF_EMULATE for emulated MMIO when
+ * no memslot exists).
+ */
+ if (!expected_left)
+ TEST_ASSERT_VM_VCPU_IOCTL(!ret, KVM_PRE_FAULT_MEMORY, ret, vcpu->vm);
else
- /* No memory slot causes RET_PF_EMULATE. it results in -ENOENT. */
- __TEST_ASSERT_VM_VCPU_IOCTL(ret && save_errno == ENOENT,
- "KVM_PRE_FAULT_MEMORY", ret, vcpu->vm);
+ TEST_ASSERT_VM_VCPU_IOCTL(ret && save_errno == ENOENT,
+ KVM_PRE_FAULT_MEMORY, ret, vcpu->vm);
}
static void __test_pre_fault_memory(unsigned long vm_type, bool private)
{
+ uint64_t gpa, gva, alignment, guest_page_size;
const struct vm_shape shape = {
.mode = VM_MODE_DEFAULT,
.type = vm_type,
@@ -74,34 +171,26 @@ static void __test_pre_fault_memory(unsigned long vm_type, bool private)
struct kvm_vm *vm;
struct ucall uc;
- uint64_t guest_test_phys_mem;
- uint64_t guest_test_virt_mem;
- uint64_t alignment, guest_page_size;
-
vm = vm_create_shape_with_one_vcpu(shape, &vcpu, guest_code);
alignment = guest_page_size = vm_guest_mode_params[VM_MODE_DEFAULT].page_size;
- guest_test_phys_mem = (vm->max_gfn - TEST_NPAGES) * guest_page_size;
-#ifdef __s390x__
- alignment = max(0x100000UL, guest_page_size);
-#else
+ gpa = (vm->max_gfn - TEST_NPAGES) * guest_page_size;
alignment = SZ_2M;
-#endif
- guest_test_phys_mem = align_down(guest_test_phys_mem, alignment);
- guest_test_virt_mem = guest_test_phys_mem & ((1ULL << (vm->va_bits - 1)) - 1);
+ gpa = align_down(gpa, alignment);
+ gva = gpa & ((1ULL << (vm->va_bits - 1)) - 1);
- vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
- guest_test_phys_mem, TEST_SLOT, TEST_NPAGES,
- private ? KVM_MEM_GUEST_MEMFD : 0);
- virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, TEST_NPAGES);
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, gpa, TEST_SLOT,
+ TEST_NPAGES, private ? KVM_MEM_GUEST_MEMFD : 0);
+ virt_map(vm, gva, gpa, TEST_NPAGES);
if (private)
- vm_mem_set_private(vm, guest_test_phys_mem, TEST_SIZE);
- pre_fault_memory(vcpu, guest_test_phys_mem, SZ_2M, 0);
- pre_fault_memory(vcpu, guest_test_phys_mem + SZ_2M, PAGE_SIZE * 2, PAGE_SIZE);
- pre_fault_memory(vcpu, guest_test_phys_mem + TEST_SIZE, PAGE_SIZE, PAGE_SIZE);
+ vm_mem_set_private(vm, gpa, TEST_SIZE);
+
+ pre_fault_memory(vcpu, gpa, 0, SZ_2M, 0, private);
+ pre_fault_memory(vcpu, gpa, SZ_2M, PAGE_SIZE * 2, PAGE_SIZE, private);
+ pre_fault_memory(vcpu, gpa, TEST_SIZE, PAGE_SIZE, PAGE_SIZE, private);
- vcpu_args_set(vcpu, 1, guest_test_virt_mem);
+ vcpu_args_set(vcpu, 1, gva);
vcpu_run(vcpu);
run = vcpu->run;
diff --git a/tools/testing/selftests/kvm/riscv/arch_timer.c b/tools/testing/selftests/kvm/riscv/arch_timer.c
index 9e370800a6a2..f962fefc48fa 100644
--- a/tools/testing/selftests/kvm/riscv/arch_timer.c
+++ b/tools/testing/selftests/kvm/riscv/arch_timer.c
@@ -15,7 +15,7 @@
static int timer_irq = IRQ_S_TIMER;
-static void guest_irq_handler(struct ex_regs *regs)
+static void guest_irq_handler(struct pt_regs *regs)
{
uint64_t xcnt, xcnt_diff_us, cmp;
unsigned int intid = regs->cause & ~CAUSE_IRQ_FLAG;
diff --git a/tools/testing/selftests/kvm/riscv/ebreak_test.c b/tools/testing/selftests/kvm/riscv/ebreak_test.c
index cfed6c727bfc..739d17befb5a 100644
--- a/tools/testing/selftests/kvm/riscv/ebreak_test.c
+++ b/tools/testing/selftests/kvm/riscv/ebreak_test.c
@@ -27,7 +27,7 @@ static void guest_code(void)
GUEST_DONE();
}
-static void guest_breakpoint_handler(struct ex_regs *regs)
+static void guest_breakpoint_handler(struct pt_regs *regs)
{
WRITE_ONCE(sw_bp_addr, regs->epc);
regs->epc += 4;
diff --git a/tools/testing/selftests/kvm/riscv/get-reg-list.c b/tools/testing/selftests/kvm/riscv/get-reg-list.c
index 8515921dfdbf..8d6b951434eb 100644
--- a/tools/testing/selftests/kvm/riscv/get-reg-list.c
+++ b/tools/testing/selftests/kvm/riscv/get-reg-list.c
@@ -17,6 +17,15 @@ enum {
VCPU_FEATURE_SBI_EXT,
};
+enum {
+ KVM_RISC_V_REG_OFFSET_VSTART = 0,
+ KVM_RISC_V_REG_OFFSET_VL,
+ KVM_RISC_V_REG_OFFSET_VTYPE,
+ KVM_RISC_V_REG_OFFSET_VCSR,
+ KVM_RISC_V_REG_OFFSET_VLENB,
+ KVM_RISC_V_REG_OFFSET_MAX,
+};
+
static bool isa_ext_cant_disable[KVM_RISCV_ISA_EXT_MAX];
bool filter_reg(__u64 reg)
@@ -53,8 +62,11 @@ bool filter_reg(__u64 reg)
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVNAPOT:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVPBMT:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVVPTC:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZAAMO:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZABHA:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZACAS:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZALASR:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZALRSC:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZAWRS:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBA:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBB:
@@ -67,11 +79,14 @@ bool filter_reg(__u64 reg)
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZCB:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZCD:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZCF:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZCLSD:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZCMOP:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZFA:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZFBFMIN:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZFH:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZFHMIN:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOM:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOP:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOZ:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICCRSE:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICNTR:
@@ -81,6 +96,7 @@ bool filter_reg(__u64 reg)
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZIHINTNTL:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZIHINTPAUSE:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZIHPM:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZILSD:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZIMOP:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZKND:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZKNE:
@@ -92,6 +108,8 @@ bool filter_reg(__u64 reg)
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZTSO:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVBB:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVBC:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVFBFMIN:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVFBFWMA:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVFH:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVFHMIN:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVKB:
@@ -117,6 +135,8 @@ bool filter_reg(__u64 reg)
case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_DBCN:
case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_SUSP:
case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_STA:
+ case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_FWFT:
+ case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_MPXY:
case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_EXPERIMENTAL:
case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_VENDOR:
return true;
@@ -141,6 +161,38 @@ bool check_reject_set(int err)
return err == EINVAL;
}
+static int override_vector_reg_size(struct kvm_vcpu *vcpu, struct vcpu_reg_sublist *s,
+ uint64_t feature)
+{
+ unsigned long vlenb_reg = 0;
+ int rc;
+ u64 reg, size;
+
+ /* Enable V extension so that we can get the vlenb register */
+ rc = __vcpu_set_reg(vcpu, feature, 1);
+ if (rc)
+ return rc;
+
+ vlenb_reg = vcpu_get_reg(vcpu, s->regs[KVM_RISC_V_REG_OFFSET_VLENB]);
+ if (!vlenb_reg) {
+ TEST_FAIL("Can't compute vector register size from zero vlenb\n");
+ return -EPERM;
+ }
+
+ size = __builtin_ctzl(vlenb_reg);
+ size <<= KVM_REG_SIZE_SHIFT;
+
+ for (int i = 0; i < 32; i++) {
+ reg = KVM_REG_RISCV | KVM_REG_RISCV_VECTOR | size | KVM_REG_RISCV_VECTOR_REG(i);
+ s->regs[KVM_RISC_V_REG_OFFSET_MAX + i] = reg;
+ }
+
+ /* We should assert if disabling failed here while enabling succeeded before */
+ vcpu_set_reg(vcpu, feature, 0);
+
+ return 0;
+}
+
void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c)
{
unsigned long isa_ext_state[KVM_RISCV_ISA_EXT_MAX] = { 0 };
@@ -170,6 +222,13 @@ void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c)
if (!s->feature)
continue;
+ if (s->feature == KVM_RISCV_ISA_EXT_V) {
+ feature = RISCV_ISA_EXT_REG(s->feature);
+ rc = override_vector_reg_size(vcpu, s, feature);
+ if (rc)
+ goto skip;
+ }
+
switch (s->feature_type) {
case VCPU_FEATURE_ISA_EXT:
feature = RISCV_ISA_EXT_REG(s->feature);
@@ -184,6 +243,7 @@ void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c)
/* Try to enable the desired extension */
__vcpu_set_reg(vcpu, feature, 1);
+skip:
/* Double check whether the desired extension was enabled */
__TEST_REQUIRE(__vcpu_has_ext(vcpu, feature),
"%s not available, skipping tests", s->name);
@@ -204,6 +264,8 @@ static const char *config_id_to_str(const char *prefix, __u64 id)
return "KVM_REG_RISCV_CONFIG_REG(zicbom_block_size)";
case KVM_REG_RISCV_CONFIG_REG(zicboz_block_size):
return "KVM_REG_RISCV_CONFIG_REG(zicboz_block_size)";
+ case KVM_REG_RISCV_CONFIG_REG(zicbop_block_size):
+ return "KVM_REG_RISCV_CONFIG_REG(zicbop_block_size)";
case KVM_REG_RISCV_CONFIG_REG(mvendorid):
return "KVM_REG_RISCV_CONFIG_REG(mvendorid)";
case KVM_REG_RISCV_CONFIG_REG(marchid):
@@ -408,6 +470,35 @@ static const char *fp_d_id_to_str(const char *prefix, __u64 id)
return strdup_printf("%lld /* UNKNOWN */", reg_off);
}
+static const char *vector_id_to_str(const char *prefix, __u64 id)
+{
+ /* reg_off is the offset into struct __riscv_v_ext_state */
+ __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_VECTOR);
+ int reg_index = 0;
+
+ assert((id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_VECTOR);
+
+ if (reg_off >= KVM_REG_RISCV_VECTOR_REG(0))
+ reg_index = reg_off - KVM_REG_RISCV_VECTOR_REG(0);
+ switch (reg_off) {
+ case KVM_REG_RISCV_VECTOR_REG(0) ...
+ KVM_REG_RISCV_VECTOR_REG(31):
+ return strdup_printf("KVM_REG_RISCV_VECTOR_REG(%d)", reg_index);
+ case KVM_REG_RISCV_VECTOR_CSR_REG(vstart):
+ return "KVM_REG_RISCV_VECTOR_CSR_REG(vstart)";
+ case KVM_REG_RISCV_VECTOR_CSR_REG(vl):
+ return "KVM_REG_RISCV_VECTOR_CSR_REG(vl)";
+ case KVM_REG_RISCV_VECTOR_CSR_REG(vtype):
+ return "KVM_REG_RISCV_VECTOR_CSR_REG(vtype)";
+ case KVM_REG_RISCV_VECTOR_CSR_REG(vcsr):
+ return "KVM_REG_RISCV_VECTOR_CSR_REG(vcsr)";
+ case KVM_REG_RISCV_VECTOR_CSR_REG(vlenb):
+ return "KVM_REG_RISCV_VECTOR_CSR_REG(vlenb)";
+ }
+
+ return strdup_printf("%lld /* UNKNOWN */", reg_off);
+}
+
#define KVM_ISA_EXT_ARR(ext) \
[KVM_RISCV_ISA_EXT_##ext] = "KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_" #ext
@@ -434,8 +525,11 @@ static const char *isa_ext_single_id_to_str(__u64 reg_off)
KVM_ISA_EXT_ARR(SVNAPOT),
KVM_ISA_EXT_ARR(SVPBMT),
KVM_ISA_EXT_ARR(SVVPTC),
+ KVM_ISA_EXT_ARR(ZAAMO),
KVM_ISA_EXT_ARR(ZABHA),
KVM_ISA_EXT_ARR(ZACAS),
+ KVM_ISA_EXT_ARR(ZALASR),
+ KVM_ISA_EXT_ARR(ZALRSC),
KVM_ISA_EXT_ARR(ZAWRS),
KVM_ISA_EXT_ARR(ZBA),
KVM_ISA_EXT_ARR(ZBB),
@@ -448,11 +542,14 @@ static const char *isa_ext_single_id_to_str(__u64 reg_off)
KVM_ISA_EXT_ARR(ZCB),
KVM_ISA_EXT_ARR(ZCD),
KVM_ISA_EXT_ARR(ZCF),
+ KVM_ISA_EXT_ARR(ZCLSD),
KVM_ISA_EXT_ARR(ZCMOP),
KVM_ISA_EXT_ARR(ZFA),
+ KVM_ISA_EXT_ARR(ZFBFMIN),
KVM_ISA_EXT_ARR(ZFH),
KVM_ISA_EXT_ARR(ZFHMIN),
KVM_ISA_EXT_ARR(ZICBOM),
+ KVM_ISA_EXT_ARR(ZICBOP),
KVM_ISA_EXT_ARR(ZICBOZ),
KVM_ISA_EXT_ARR(ZICCRSE),
KVM_ISA_EXT_ARR(ZICNTR),
@@ -462,6 +559,7 @@ static const char *isa_ext_single_id_to_str(__u64 reg_off)
KVM_ISA_EXT_ARR(ZIHINTNTL),
KVM_ISA_EXT_ARR(ZIHINTPAUSE),
KVM_ISA_EXT_ARR(ZIHPM),
+ KVM_ISA_EXT_ARR(ZILSD),
KVM_ISA_EXT_ARR(ZIMOP),
KVM_ISA_EXT_ARR(ZKND),
KVM_ISA_EXT_ARR(ZKNE),
@@ -473,6 +571,8 @@ static const char *isa_ext_single_id_to_str(__u64 reg_off)
KVM_ISA_EXT_ARR(ZTSO),
KVM_ISA_EXT_ARR(ZVBB),
KVM_ISA_EXT_ARR(ZVBC),
+ KVM_ISA_EXT_ARR(ZVFBFMIN),
+ KVM_ISA_EXT_ARR(ZVFBFWMA),
KVM_ISA_EXT_ARR(ZVFH),
KVM_ISA_EXT_ARR(ZVFHMIN),
KVM_ISA_EXT_ARR(ZVKB),
@@ -545,6 +645,8 @@ static const char *sbi_ext_single_id_to_str(__u64 reg_off)
KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_DBCN),
KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_SUSP),
KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_STA),
+ KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_FWFT),
+ KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_MPXY),
KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_EXPERIMENTAL),
KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_VENDOR),
};
@@ -601,6 +703,19 @@ static const char *sbi_sta_id_to_str(__u64 reg_off)
return strdup_printf("KVM_REG_RISCV_SBI_STA | %lld /* UNKNOWN */", reg_off);
}
+static const char *sbi_fwft_id_to_str(__u64 reg_off)
+{
+ switch (reg_off) {
+ case 0: return "KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(misaligned_deleg.enable)";
+ case 1: return "KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(misaligned_deleg.flags)";
+ case 2: return "KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(misaligned_deleg.value)";
+ case 3: return "KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(pointer_masking.enable)";
+ case 4: return "KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(pointer_masking.flags)";
+ case 5: return "KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(pointer_masking.value)";
+ }
+ return strdup_printf("KVM_REG_RISCV_SBI_FWFT | %lld /* UNKNOWN */", reg_off);
+}
+
static const char *sbi_id_to_str(const char *prefix, __u64 id)
{
__u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_SBI_STATE);
@@ -613,6 +728,8 @@ static const char *sbi_id_to_str(const char *prefix, __u64 id)
switch (reg_subtype) {
case KVM_REG_RISCV_SBI_STA:
return sbi_sta_id_to_str(reg_off);
+ case KVM_REG_RISCV_SBI_FWFT:
+ return sbi_fwft_id_to_str(reg_off);
}
return strdup_printf("%lld | %lld /* UNKNOWN */", reg_subtype, reg_off);
@@ -635,6 +752,9 @@ void print_reg(const char *prefix, __u64 id)
case KVM_REG_SIZE_U128:
reg_size = "KVM_REG_SIZE_U128";
break;
+ case KVM_REG_SIZE_U256:
+ reg_size = "KVM_REG_SIZE_U256";
+ break;
default:
printf("\tKVM_REG_RISCV | (%lld << KVM_REG_SIZE_SHIFT) | 0x%llx /* UNKNOWN */,\n",
(id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT, id & ~REG_MASK);
@@ -666,6 +786,10 @@ void print_reg(const char *prefix, __u64 id)
printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_FP_D | %s,\n",
reg_size, fp_d_id_to_str(prefix, id));
break;
+ case KVM_REG_RISCV_VECTOR:
+ printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_VECTOR | %s,\n",
+ reg_size, vector_id_to_str(prefix, id));
+ break;
case KVM_REG_RISCV_ISA_EXT:
printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_ISA_EXT | %s,\n",
reg_size, isa_ext_id_to_str(prefix, id));
@@ -691,10 +815,13 @@ void print_reg(const char *prefix, __u64 id)
*/
static __u64 base_regs[] = {
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(isa),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(zicbom_block_size),
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(mvendorid),
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(marchid),
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(mimpid),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(zicboz_block_size),
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(satp_mode),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(zicbop_block_size),
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.pc),
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.ra),
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.sp),
@@ -770,11 +897,26 @@ static __u64 sbi_sta_regs[] = {
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_STA | KVM_REG_RISCV_SBI_STA_REG(shmem_hi),
};
+static __u64 sbi_fwft_regs[] = {
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_FWFT,
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(misaligned_deleg.enable),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(misaligned_deleg.flags),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(misaligned_deleg.value),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(pointer_masking.enable),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(pointer_masking.flags),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(pointer_masking.value),
+};
+
static __u64 zicbom_regs[] = {
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(zicbom_block_size),
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOM,
};
+static __u64 zicbop_regs[] = {
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(zicbop_block_size),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOP,
+};
+
static __u64 zicboz_regs[] = {
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(zicboz_block_size),
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOZ,
@@ -870,6 +1012,48 @@ static __u64 fp_d_regs[] = {
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_D,
};
+/* Define a default vector registers with length. This will be overwritten at runtime */
+static __u64 vector_regs[] = {
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_CSR_REG(vstart),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_CSR_REG(vl),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_CSR_REG(vtype),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_CSR_REG(vcsr),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_CSR_REG(vlenb),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(0),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(1),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(2),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(3),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(4),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(5),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(6),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(7),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(8),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(9),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(10),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(11),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(12),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(13),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(14),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(15),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(16),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(17),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(18),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(19),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(20),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(21),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(22),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(23),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(24),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(25),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(26),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(27),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(28),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(29),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(30),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(31),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_V,
+};
+
#define SUBLIST_BASE \
{"base", .regs = base_regs, .regs_n = ARRAY_SIZE(base_regs), \
.skips_set = base_skips_set, .skips_set_n = ARRAY_SIZE(base_skips_set),}
@@ -879,8 +1063,13 @@ static __u64 fp_d_regs[] = {
#define SUBLIST_SBI_STA \
{"sbi-sta", .feature_type = VCPU_FEATURE_SBI_EXT, .feature = KVM_RISCV_SBI_EXT_STA, \
.regs = sbi_sta_regs, .regs_n = ARRAY_SIZE(sbi_sta_regs),}
+#define SUBLIST_SBI_FWFT \
+ {"sbi-fwft", .feature_type = VCPU_FEATURE_SBI_EXT, .feature = KVM_RISCV_SBI_EXT_FWFT, \
+ .regs = sbi_fwft_regs, .regs_n = ARRAY_SIZE(sbi_fwft_regs),}
#define SUBLIST_ZICBOM \
{"zicbom", .feature = KVM_RISCV_ISA_EXT_ZICBOM, .regs = zicbom_regs, .regs_n = ARRAY_SIZE(zicbom_regs),}
+#define SUBLIST_ZICBOP \
+ {"zicbop", .feature = KVM_RISCV_ISA_EXT_ZICBOP, .regs = zicbop_regs, .regs_n = ARRAY_SIZE(zicbop_regs),}
#define SUBLIST_ZICBOZ \
{"zicboz", .feature = KVM_RISCV_ISA_EXT_ZICBOZ, .regs = zicboz_regs, .regs_n = ARRAY_SIZE(zicboz_regs),}
#define SUBLIST_AIA \
@@ -894,6 +1083,9 @@ static __u64 fp_d_regs[] = {
{"fp_d", .feature = KVM_RISCV_ISA_EXT_D, .regs = fp_d_regs, \
.regs_n = ARRAY_SIZE(fp_d_regs),}
+#define SUBLIST_V \
+ {"v", .feature = KVM_RISCV_ISA_EXT_V, .regs = vector_regs, .regs_n = ARRAY_SIZE(vector_regs),}
+
#define KVM_ISA_EXT_SIMPLE_CONFIG(ext, extu) \
static __u64 regs_##ext[] = { \
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | \
@@ -958,10 +1150,13 @@ KVM_SBI_EXT_SUBLIST_CONFIG(sta, STA);
KVM_SBI_EXT_SIMPLE_CONFIG(pmu, PMU);
KVM_SBI_EXT_SIMPLE_CONFIG(dbcn, DBCN);
KVM_SBI_EXT_SIMPLE_CONFIG(susp, SUSP);
+KVM_SBI_EXT_SIMPLE_CONFIG(mpxy, MPXY);
+KVM_SBI_EXT_SUBLIST_CONFIG(fwft, FWFT);
KVM_ISA_EXT_SUBLIST_CONFIG(aia, AIA);
KVM_ISA_EXT_SUBLIST_CONFIG(fp_f, FP_F);
KVM_ISA_EXT_SUBLIST_CONFIG(fp_d, FP_D);
+KVM_ISA_EXT_SUBLIST_CONFIG(v, V);
KVM_ISA_EXT_SIMPLE_CONFIG(h, H);
KVM_ISA_EXT_SIMPLE_CONFIG(smnpm, SMNPM);
KVM_ISA_EXT_SUBLIST_CONFIG(smstateen, SMSTATEEN);
@@ -974,8 +1169,11 @@ KVM_ISA_EXT_SIMPLE_CONFIG(svinval, SVINVAL);
KVM_ISA_EXT_SIMPLE_CONFIG(svnapot, SVNAPOT);
KVM_ISA_EXT_SIMPLE_CONFIG(svpbmt, SVPBMT);
KVM_ISA_EXT_SIMPLE_CONFIG(svvptc, SVVPTC);
+KVM_ISA_EXT_SIMPLE_CONFIG(zaamo, ZAAMO);
KVM_ISA_EXT_SIMPLE_CONFIG(zabha, ZABHA);
KVM_ISA_EXT_SIMPLE_CONFIG(zacas, ZACAS);
+KVM_ISA_EXT_SIMPLE_CONFIG(zalasr, ZALASR);
+KVM_ISA_EXT_SIMPLE_CONFIG(zalrsc, ZALRSC);
KVM_ISA_EXT_SIMPLE_CONFIG(zawrs, ZAWRS);
KVM_ISA_EXT_SIMPLE_CONFIG(zba, ZBA);
KVM_ISA_EXT_SIMPLE_CONFIG(zbb, ZBB);
@@ -988,11 +1186,14 @@ KVM_ISA_EXT_SIMPLE_CONFIG(zca, ZCA);
KVM_ISA_EXT_SIMPLE_CONFIG(zcb, ZCB);
KVM_ISA_EXT_SIMPLE_CONFIG(zcd, ZCD);
KVM_ISA_EXT_SIMPLE_CONFIG(zcf, ZCF);
+KVM_ISA_EXT_SIMPLE_CONFIG(zclsd, ZCLSD);
KVM_ISA_EXT_SIMPLE_CONFIG(zcmop, ZCMOP);
KVM_ISA_EXT_SIMPLE_CONFIG(zfa, ZFA);
+KVM_ISA_EXT_SIMPLE_CONFIG(zfbfmin, ZFBFMIN);
KVM_ISA_EXT_SIMPLE_CONFIG(zfh, ZFH);
KVM_ISA_EXT_SIMPLE_CONFIG(zfhmin, ZFHMIN);
KVM_ISA_EXT_SUBLIST_CONFIG(zicbom, ZICBOM);
+KVM_ISA_EXT_SUBLIST_CONFIG(zicbop, ZICBOP);
KVM_ISA_EXT_SUBLIST_CONFIG(zicboz, ZICBOZ);
KVM_ISA_EXT_SIMPLE_CONFIG(ziccrse, ZICCRSE);
KVM_ISA_EXT_SIMPLE_CONFIG(zicntr, ZICNTR);
@@ -1002,6 +1203,7 @@ KVM_ISA_EXT_SIMPLE_CONFIG(zifencei, ZIFENCEI);
KVM_ISA_EXT_SIMPLE_CONFIG(zihintntl, ZIHINTNTL);
KVM_ISA_EXT_SIMPLE_CONFIG(zihintpause, ZIHINTPAUSE);
KVM_ISA_EXT_SIMPLE_CONFIG(zihpm, ZIHPM);
+KVM_ISA_EXT_SIMPLE_CONFIG(zilsd, ZILSD);
KVM_ISA_EXT_SIMPLE_CONFIG(zimop, ZIMOP);
KVM_ISA_EXT_SIMPLE_CONFIG(zknd, ZKND);
KVM_ISA_EXT_SIMPLE_CONFIG(zkne, ZKNE);
@@ -1013,6 +1215,8 @@ KVM_ISA_EXT_SIMPLE_CONFIG(zkt, ZKT);
KVM_ISA_EXT_SIMPLE_CONFIG(ztso, ZTSO);
KVM_ISA_EXT_SIMPLE_CONFIG(zvbb, ZVBB);
KVM_ISA_EXT_SIMPLE_CONFIG(zvbc, ZVBC);
+KVM_ISA_EXT_SIMPLE_CONFIG(zvfbfmin, ZVFBFMIN);
+KVM_ISA_EXT_SIMPLE_CONFIG(zvfbfwma, ZVFBFWMA);
KVM_ISA_EXT_SIMPLE_CONFIG(zvfh, ZVFH);
KVM_ISA_EXT_SIMPLE_CONFIG(zvfhmin, ZVFHMIN);
KVM_ISA_EXT_SIMPLE_CONFIG(zvkb, ZVKB);
@@ -1030,10 +1234,13 @@ struct vcpu_reg_list *vcpu_configs[] = {
&config_sbi_pmu,
&config_sbi_dbcn,
&config_sbi_susp,
+ &config_sbi_mpxy,
+ &config_sbi_fwft,
&config_aia,
&config_fp_f,
&config_fp_d,
&config_h,
+ &config_v,
&config_smnpm,
&config_smstateen,
&config_sscofpmf,
@@ -1045,8 +1252,11 @@ struct vcpu_reg_list *vcpu_configs[] = {
&config_svnapot,
&config_svpbmt,
&config_svvptc,
+ &config_zaamo,
&config_zabha,
&config_zacas,
+ &config_zalrsc,
+ &config_zalasr,
&config_zawrs,
&config_zba,
&config_zbb,
@@ -1059,11 +1269,14 @@ struct vcpu_reg_list *vcpu_configs[] = {
&config_zcb,
&config_zcd,
&config_zcf,
+ &config_zclsd,
&config_zcmop,
&config_zfa,
+ &config_zfbfmin,
&config_zfh,
&config_zfhmin,
&config_zicbom,
+ &config_zicbop,
&config_zicboz,
&config_ziccrse,
&config_zicntr,
@@ -1073,6 +1286,7 @@ struct vcpu_reg_list *vcpu_configs[] = {
&config_zihintntl,
&config_zihintpause,
&config_zihpm,
+ &config_zilsd,
&config_zimop,
&config_zknd,
&config_zkne,
@@ -1084,6 +1298,8 @@ struct vcpu_reg_list *vcpu_configs[] = {
&config_ztso,
&config_zvbb,
&config_zvbc,
+ &config_zvfbfmin,
+ &config_zvfbfwma,
&config_zvfh,
&config_zvfhmin,
&config_zvkb,
diff --git a/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c b/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c
index 03406de4989d..cec1621ace23 100644
--- a/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c
+++ b/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c
@@ -73,7 +73,6 @@ unsigned long pmu_csr_read_num(int csr_num)
switch (csr_num) {
switchcase_csr_read_32(CSR_CYCLE, ret)
- switchcase_csr_read_32(CSR_CYCLEH, ret)
default :
break;
}
@@ -128,17 +127,36 @@ static void stop_counter(unsigned long counter, unsigned long stop_flags)
"Unable to stop counter %ld error %ld\n", counter, ret.error);
}
-static void guest_illegal_exception_handler(struct ex_regs *regs)
+static void guest_illegal_exception_handler(struct pt_regs *regs)
{
+ unsigned long insn;
+ int opcode, csr_num, funct3;
+
__GUEST_ASSERT(regs->cause == EXC_INST_ILLEGAL,
"Unexpected exception handler %lx\n", regs->cause);
+ insn = regs->badaddr;
+ opcode = (insn & INSN_OPCODE_MASK) >> INSN_OPCODE_SHIFT;
+ __GUEST_ASSERT(opcode == INSN_OPCODE_SYSTEM,
+ "Unexpected instruction with opcode 0x%x insn 0x%lx\n", opcode, insn);
+
+ csr_num = GET_CSR_NUM(insn);
+ funct3 = GET_RM(insn);
+ /* Validate if it is a CSR read/write operation */
+ __GUEST_ASSERT(funct3 <= 7 && (funct3 != 0 && funct3 != 4),
+ "Unexpected system opcode with funct3 0x%x csr_num 0x%x\n",
+ funct3, csr_num);
+
+ /* Validate if it is a HPMCOUNTER CSR operation */
+ __GUEST_ASSERT((csr_num >= CSR_CYCLE && csr_num <= CSR_HPMCOUNTER31),
+ "Unexpected csr_num 0x%x\n", csr_num);
+
illegal_handler_invoked = true;
/* skip the trapping instruction */
regs->epc += 4;
}
-static void guest_irq_handler(struct ex_regs *regs)
+static void guest_irq_handler(struct pt_regs *regs)
{
unsigned int irq_num = regs->cause & ~CAUSE_IRQ_FLAG;
struct riscv_pmu_snapshot_data *snapshot_data = snapshot_gva;
@@ -418,6 +436,7 @@ static void test_pmu_basic_sanity(void)
struct sbiret ret;
int num_counters = 0, i;
union sbi_pmu_ctr_info ctrinfo;
+ unsigned long fw_eidx;
probe = guest_sbi_probe_extension(SBI_EXT_PMU, &out_val);
GUEST_ASSERT(probe && out_val == 1);
@@ -443,7 +462,24 @@ static void test_pmu_basic_sanity(void)
pmu_csr_read_num(ctrinfo.csr);
GUEST_ASSERT(illegal_handler_invoked);
} else if (ctrinfo.type == SBI_PMU_CTR_TYPE_FW) {
- read_fw_counter(i, ctrinfo);
+ /* Read without configure should fail */
+ ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_FW_READ,
+ i, 0, 0, 0, 0, 0);
+ GUEST_ASSERT(ret.error == SBI_ERR_INVALID_PARAM);
+
+ /*
+ * Try to configure with a common firmware event.
+ * If configuration succeeds, verify we can read it.
+ */
+ fw_eidx = ((unsigned long)SBI_PMU_EVENT_TYPE_FW << 16) |
+ SBI_PMU_FW_ACCESS_LOAD;
+
+ ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_CFG_MATCH,
+ i, 1, 0, fw_eidx, 0, 0);
+ if (ret.error == 0) {
+ GUEST_ASSERT(ret.value == i);
+ read_fw_counter(i, ctrinfo);
+ }
}
}
diff --git a/tools/testing/selftests/kvm/rseq_test.c b/tools/testing/selftests/kvm/rseq_test.c
index e5898678bfab..f80ad6b47d16 100644
--- a/tools/testing/selftests/kvm/rseq_test.c
+++ b/tools/testing/selftests/kvm/rseq_test.c
@@ -196,26 +196,29 @@ static void calc_min_max_cpu(void)
static void help(const char *name)
{
puts("");
- printf("usage: %s [-h] [-u]\n", name);
+ printf("usage: %s [-h] [-u] [-l latency]\n", name);
printf(" -u: Don't sanity check the number of successful KVM_RUNs\n");
+ printf(" -l: Set /dev/cpu_dma_latency to suppress deep sleep states\n");
puts("");
exit(0);
}
int main(int argc, char *argv[])
{
+ int r, i, snapshot, opt, fd = -1, latency = -1;
bool skip_sanity_check = false;
- int r, i, snapshot;
struct kvm_vm *vm;
struct kvm_vcpu *vcpu;
u32 cpu, rseq_cpu;
- int opt;
- while ((opt = getopt(argc, argv, "hu")) != -1) {
+ while ((opt = getopt(argc, argv, "hl:u")) != -1) {
switch (opt) {
case 'u':
skip_sanity_check = true;
break;
+ case 'l':
+ latency = atoi_paranoid(optarg);
+ break;
case 'h':
default:
help(argv[0]);
@@ -243,6 +246,20 @@ int main(int argc, char *argv[])
pthread_create(&migration_thread, NULL, migration_worker,
(void *)(unsigned long)syscall(SYS_gettid));
+ if (latency >= 0) {
+ /*
+ * Writes to cpu_dma_latency persist only while the file is
+ * open, i.e. it allows userspace to provide guaranteed latency
+ * while running a workload. Keep the file open until the test
+ * completes, otherwise writing cpu_dma_latency is meaningless.
+ */
+ fd = open("/dev/cpu_dma_latency", O_RDWR);
+ TEST_ASSERT(fd >= 0, __KVM_SYSCALL_ERROR("open() /dev/cpu_dma_latency", fd));
+
+ r = write(fd, &latency, 4);
+ TEST_ASSERT(r >= 1, "Error setting /dev/cpu_dma_latency");
+ }
+
for (i = 0; !done; i++) {
vcpu_run(vcpu);
TEST_ASSERT(get_ucall(vcpu, NULL) == UCALL_SYNC,
@@ -278,6 +295,9 @@ int main(int argc, char *argv[])
"rseq CPU = %d, sched CPU = %d", rseq_cpu, cpu);
}
+ if (fd > 0)
+ close(fd);
+
/*
* Sanity check that the test was able to enter the guest a reasonable
* number of times, e.g. didn't get stalled too often/long waiting for
@@ -293,8 +313,8 @@ int main(int argc, char *argv[])
TEST_ASSERT(skip_sanity_check || i > (NR_TASK_MIGRATIONS / 2),
"Only performed %d KVM_RUNs, task stalled too much?\n\n"
" Try disabling deep sleep states to reduce CPU wakeup latency,\n"
- " e.g. via cpuidle.off=1 or setting /dev/cpu_dma_latency to '0',\n"
- " or run with -u to disable this sanity check.", i);
+ " e.g. via cpuidle.off=1 or via -l <latency>, or run with -u to\n"
+ " disable this sanity check.", i);
pthread_join(migration_thread, NULL);
diff --git a/tools/testing/selftests/kvm/s390/cmma_test.c b/tools/testing/selftests/kvm/s390/cmma_test.c
index 85cc8c18d6e7..e39a724fe860 100644
--- a/tools/testing/selftests/kvm/s390/cmma_test.c
+++ b/tools/testing/selftests/kvm/s390/cmma_test.c
@@ -145,7 +145,7 @@ static void finish_vm_setup(struct kvm_vm *vm)
slot0 = memslot2region(vm, 0);
ucall_init(vm, slot0->region.guest_phys_addr + slot0->region.memory_size);
- kvm_arch_vm_post_create(vm);
+ kvm_arch_vm_post_create(vm, 0);
}
static struct kvm_vm *create_vm_two_memslots(void)
diff --git a/tools/testing/selftests/kvm/s390/cpumodel_subfuncs_test.c b/tools/testing/selftests/kvm/s390/cpumodel_subfuncs_test.c
index 27255880dabd..aded795d42be 100644
--- a/tools/testing/selftests/kvm/s390/cpumodel_subfuncs_test.c
+++ b/tools/testing/selftests/kvm/s390/cpumodel_subfuncs_test.c
@@ -291,7 +291,7 @@ int main(int argc, char *argv[])
ksft_test_result_pass("%s\n", testlist[idx].subfunc_name);
free(array);
} else {
- ksft_test_result_skip("%s feature is not avaialable\n",
+ ksft_test_result_skip("%s feature is not available\n",
testlist[idx].subfunc_name);
}
}
diff --git a/tools/testing/selftests/kvm/s390/irq_routing.c b/tools/testing/selftests/kvm/s390/irq_routing.c
new file mode 100644
index 000000000000..7819a0af19a8
--- /dev/null
+++ b/tools/testing/selftests/kvm/s390/irq_routing.c
@@ -0,0 +1,75 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * IRQ routing offset tests.
+ *
+ * Copyright IBM Corp. 2026
+ *
+ * Authors:
+ * Janosch Frank <frankja@linux.ibm.com>
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "kselftest.h"
+#include "ucall_common.h"
+
+extern char guest_code[];
+asm("guest_code:\n"
+ "diag %r0,%r0,0\n"
+ "j .\n");
+
+static void test(void)
+{
+ struct kvm_irq_routing *routing;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ vm_paddr_t mem;
+ int ret;
+
+ struct kvm_irq_routing_entry ue = {
+ .type = KVM_IRQ_ROUTING_S390_ADAPTER,
+ .gsi = 1,
+ };
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ mem = vm_phy_pages_alloc(vm, 2, 4096 * 42, 0);
+
+ routing = kvm_gsi_routing_create();
+ routing->nr = 1;
+ routing->entries[0] = ue;
+ routing->entries[0].u.adapter.summary_addr = (uintptr_t)mem;
+ routing->entries[0].u.adapter.ind_addr = (uintptr_t)mem;
+
+ routing->entries[0].u.adapter.summary_offset = 4096 * 8;
+ ret = __vm_ioctl(vm, KVM_SET_GSI_ROUTING, routing);
+ ksft_test_result(ret == -1 && errno == EINVAL, "summary offset outside of page\n");
+
+ routing->entries[0].u.adapter.summary_offset -= 4;
+ ret = __vm_ioctl(vm, KVM_SET_GSI_ROUTING, routing);
+ ksft_test_result(ret == 0, "summary offset inside of page\n");
+
+ routing->entries[0].u.adapter.ind_offset = 4096 * 8;
+ ret = __vm_ioctl(vm, KVM_SET_GSI_ROUTING, routing);
+ ksft_test_result(ret == -1 && errno == EINVAL, "ind offset outside of page\n");
+
+ routing->entries[0].u.adapter.ind_offset -= 4;
+ ret = __vm_ioctl(vm, KVM_SET_GSI_ROUTING, routing);
+ ksft_test_result(ret == 0, "ind offset inside of page\n");
+
+ kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_IRQ_ROUTING));
+
+ ksft_print_header();
+ ksft_set_plan(4);
+ test();
+
+ ksft_finished(); /* Print results and exit() accordingly */
+}
diff --git a/tools/testing/selftests/kvm/s390/keyop.c b/tools/testing/selftests/kvm/s390/keyop.c
new file mode 100644
index 000000000000..c7805e87d12c
--- /dev/null
+++ b/tools/testing/selftests/kvm/s390/keyop.c
@@ -0,0 +1,299 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Test for s390x KVM_S390_KEYOP
+ *
+ * Copyright IBM Corp. 2026
+ *
+ * Authors:
+ * Claudio Imbrenda <imbrenda@linux.ibm.com>
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include <linux/bits.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "kselftest.h"
+#include "processor.h"
+
+#define BUF_PAGES 128UL
+#define GUEST_PAGES 256UL
+
+#define BUF_START_GFN (GUEST_PAGES - BUF_PAGES)
+#define BUF_START_ADDR (BUF_START_GFN << PAGE_SHIFT)
+
+#define KEY_BITS_ACC 0xf0
+#define KEY_BIT_F 0x08
+#define KEY_BIT_R 0x04
+#define KEY_BIT_C 0x02
+
+#define KEY_BITS_RC (KEY_BIT_R | KEY_BIT_C)
+#define KEY_BITS_ALL (KEY_BITS_ACC | KEY_BIT_F | KEY_BITS_RC)
+
+static unsigned char tmp[BUF_PAGES];
+static unsigned char old[BUF_PAGES];
+static unsigned char expected[BUF_PAGES];
+
+static int _get_skeys(struct kvm_vcpu *vcpu, unsigned char skeys[])
+{
+ struct kvm_s390_skeys skeys_ioctl = {
+ .start_gfn = BUF_START_GFN,
+ .count = BUF_PAGES,
+ .skeydata_addr = (unsigned long)skeys,
+ };
+
+ return __vm_ioctl(vcpu->vm, KVM_S390_GET_SKEYS, &skeys_ioctl);
+}
+
+static void get_skeys(struct kvm_vcpu *vcpu, unsigned char skeys[])
+{
+ int r = _get_skeys(vcpu, skeys);
+
+ TEST_ASSERT(!r, "Failed to get storage keys, r=%d", r);
+}
+
+static void set_skeys(struct kvm_vcpu *vcpu, unsigned char skeys[])
+{
+ struct kvm_s390_skeys skeys_ioctl = {
+ .start_gfn = BUF_START_GFN,
+ .count = BUF_PAGES,
+ .skeydata_addr = (unsigned long)skeys,
+ };
+ int r;
+
+ r = __vm_ioctl(vcpu->vm, KVM_S390_SET_SKEYS, &skeys_ioctl);
+ TEST_ASSERT(!r, "Failed to set storage keys, r=%d", r);
+}
+
+static int do_keyop(struct kvm_vcpu *vcpu, int op, unsigned long page_idx, unsigned char skey)
+{
+ struct kvm_s390_keyop keyop = {
+ .guest_addr = BUF_START_ADDR + page_idx * PAGE_SIZE,
+ .key = skey,
+ .operation = op,
+ };
+ int r;
+
+ r = __vm_ioctl(vcpu->vm, KVM_S390_KEYOP, &keyop);
+ TEST_ASSERT(!r, "Failed to perform keyop, r=%d", r);
+ TEST_ASSERT((keyop.key & 1) == 0,
+ "Last bit of key is 1, should be 0! page %lu, new key=%#x, old key=%#x",
+ page_idx, skey, keyop.key);
+
+ return keyop.key;
+}
+
+static void fault_in_buffer(struct kvm_vcpu *vcpu, int where, int cur_loc)
+{
+ unsigned long i;
+ int r;
+
+ if (where != cur_loc)
+ return;
+
+ for (i = 0; i < BUF_PAGES; i++) {
+ r = ioctl(vcpu->fd, KVM_S390_VCPU_FAULT, BUF_START_ADDR + i * PAGE_SIZE);
+ TEST_ASSERT(!r, "Faulting in buffer page %lu, r=%d", i, r);
+ }
+}
+
+static inline void set_pattern(unsigned char skeys[])
+{
+ int i;
+
+ for (i = 0; i < BUF_PAGES; i++)
+ skeys[i] = i << 1;
+}
+
+static void dump_sk(const unsigned char skeys[], const char *descr)
+{
+ int i, j;
+
+ fprintf(stderr, "# %s:\n", descr);
+ for (i = 0; i < BUF_PAGES; i += 32) {
+ fprintf(stderr, "# %3d: ", i);
+ for (j = 0; j < 32; j++)
+ fprintf(stderr, "%02x ", skeys[i + j]);
+ fprintf(stderr, "\n");
+ }
+}
+
+static inline void compare(const unsigned char what[], const unsigned char expected[],
+ const char *descr, int fault_in_loc)
+{
+ int i;
+
+ for (i = 0; i < BUF_PAGES; i++) {
+ if (expected[i] != what[i]) {
+ dump_sk(expected, "Expected");
+ dump_sk(what, "Got");
+ }
+ TEST_ASSERT(expected[i] == what[i],
+ "%s! fault-in location %d, page %d, expected %#x, got %#x",
+ descr, fault_in_loc, i, expected[i], what[i]);
+ }
+}
+
+static inline void clear_all(void)
+{
+ memset(tmp, 0, BUF_PAGES);
+ memset(old, 0, BUF_PAGES);
+ memset(expected, 0, BUF_PAGES);
+}
+
+static void test_init(struct kvm_vcpu *vcpu, int fault_in)
+{
+ /* Set all storage keys to zero */
+ fault_in_buffer(vcpu, fault_in, 1);
+ set_skeys(vcpu, expected);
+
+ fault_in_buffer(vcpu, fault_in, 2);
+ get_skeys(vcpu, tmp);
+ compare(tmp, expected, "Setting keys not zero", fault_in);
+
+ /* Set storage keys to a sequential pattern */
+ fault_in_buffer(vcpu, fault_in, 3);
+ set_pattern(expected);
+ set_skeys(vcpu, expected);
+
+ fault_in_buffer(vcpu, fault_in, 4);
+ get_skeys(vcpu, tmp);
+ compare(tmp, expected, "Setting storage keys failed", fault_in);
+}
+
+static void test_rrbe(struct kvm_vcpu *vcpu, int fault_in)
+{
+ unsigned char k;
+ int i;
+
+ /* Set storage keys to a sequential pattern */
+ fault_in_buffer(vcpu, fault_in, 1);
+ set_pattern(expected);
+ set_skeys(vcpu, expected);
+
+ /* Call the RRBE KEYOP ioctl on each page and verify the result */
+ fault_in_buffer(vcpu, fault_in, 2);
+ for (i = 0; i < BUF_PAGES; i++) {
+ k = do_keyop(vcpu, KVM_S390_KEYOP_RRBE, i, 0xff);
+ TEST_ASSERT((expected[i] & KEY_BITS_RC) == k,
+ "Old R or C value mismatch! expected: %#x, got %#x",
+ expected[i] & KEY_BITS_RC, k);
+ if (i == BUF_PAGES / 2)
+ fault_in_buffer(vcpu, fault_in, 3);
+ }
+
+ for (i = 0; i < BUF_PAGES; i++)
+ expected[i] &= ~KEY_BIT_R;
+
+ /* Verify that only the R bit has been cleared */
+ fault_in_buffer(vcpu, fault_in, 4);
+ get_skeys(vcpu, tmp);
+ compare(tmp, expected, "New value mismatch", fault_in);
+}
+
+static void test_iske(struct kvm_vcpu *vcpu, int fault_in)
+{
+ int i;
+
+ /* Set storage keys to a sequential pattern */
+ fault_in_buffer(vcpu, fault_in, 1);
+ set_pattern(expected);
+ set_skeys(vcpu, expected);
+
+ /* Call the ISKE KEYOP ioctl on each page and verify the result */
+ fault_in_buffer(vcpu, fault_in, 2);
+ for (i = 0; i < BUF_PAGES; i++) {
+ tmp[i] = do_keyop(vcpu, KVM_S390_KEYOP_ISKE, i, 0xff);
+ if (i == BUF_PAGES / 2)
+ fault_in_buffer(vcpu, fault_in, 3);
+ }
+ compare(tmp, expected, "Old value mismatch", fault_in);
+
+ /* Check storage keys have not changed */
+ fault_in_buffer(vcpu, fault_in, 4);
+ get_skeys(vcpu, tmp);
+ compare(tmp, expected, "Storage keys values changed", fault_in);
+}
+
+static void test_sske(struct kvm_vcpu *vcpu, int fault_in)
+{
+ int i;
+
+ /* Set storage keys to a sequential pattern */
+ fault_in_buffer(vcpu, fault_in, 1);
+ set_pattern(tmp);
+ set_skeys(vcpu, tmp);
+
+ /* Call the SSKE KEYOP ioctl on each page and verify the result */
+ fault_in_buffer(vcpu, fault_in, 2);
+ for (i = 0; i < BUF_PAGES; i++) {
+ expected[i] = ~tmp[i] & KEY_BITS_ALL;
+ /* Set the new storage keys to be the bit-inversion of the previous ones */
+ old[i] = do_keyop(vcpu, KVM_S390_KEYOP_SSKE, i, expected[i] | 1);
+ if (i == BUF_PAGES / 2)
+ fault_in_buffer(vcpu, fault_in, 3);
+ }
+ compare(old, tmp, "Old value mismatch", fault_in);
+
+ /* Verify that the storage keys have been set correctly */
+ fault_in_buffer(vcpu, fault_in, 4);
+ get_skeys(vcpu, tmp);
+ compare(tmp, expected, "New value mismatch", fault_in);
+}
+
+static struct testdef {
+ const char *name;
+ void (*test)(struct kvm_vcpu *vcpu, int fault_in_location);
+ int n_fault_in_locations;
+} testplan[] = {
+ { "Initialization", test_init, 5 },
+ { "RRBE", test_rrbe, 5 },
+ { "ISKE", test_iske, 5 },
+ { "SSKE", test_sske, 5 },
+};
+
+static void run_test(void (*the_test)(struct kvm_vcpu *, int), int fault_in_location)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ int r;
+
+ vm = vm_create_barebones();
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0, 0, GUEST_PAGES, 0);
+ vcpu = __vm_vcpu_add(vm, 0);
+
+ r = _get_skeys(vcpu, tmp);
+ TEST_ASSERT(r == KVM_S390_GET_SKEYS_NONE,
+ "Storage keys are not disabled initially, r=%d", r);
+
+ clear_all();
+
+ the_test(vcpu, fault_in_location);
+
+ kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+ int i, f;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_S390_KEYOP));
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_S390_UCONTROL));
+
+ ksft_print_header();
+ for (i = 0, f = 0; i < ARRAY_SIZE(testplan); i++)
+ f += testplan[i].n_fault_in_locations;
+ ksft_set_plan(f);
+
+ for (i = 0; i < ARRAY_SIZE(testplan); i++) {
+ for (f = 0; f < testplan[i].n_fault_in_locations; f++) {
+ run_test(testplan[i].test, f);
+ ksft_test_result_pass("%s (fault-in location %d)\n", testplan[i].name, f);
+ }
+ }
+
+ ksft_finished(); /* Print results and exit() accordingly */
+}
diff --git a/tools/testing/selftests/kvm/s390/ucontrol_test.c b/tools/testing/selftests/kvm/s390/ucontrol_test.c
index d265b34c54be..50bc1c38225a 100644
--- a/tools/testing/selftests/kvm/s390/ucontrol_test.c
+++ b/tools/testing/selftests/kvm/s390/ucontrol_test.c
@@ -142,19 +142,17 @@ FIXTURE_SETUP(uc_kvm)
self->kvm_run_size = ioctl(self->kvm_fd, KVM_GET_VCPU_MMAP_SIZE, NULL);
ASSERT_GE(self->kvm_run_size, sizeof(struct kvm_run))
TH_LOG(KVM_IOCTL_ERROR(KVM_GET_VCPU_MMAP_SIZE, self->kvm_run_size));
- self->run = (struct kvm_run *)mmap(NULL, self->kvm_run_size,
- PROT_READ | PROT_WRITE, MAP_SHARED, self->vcpu_fd, 0);
- ASSERT_NE(self->run, MAP_FAILED);
+ self->run = kvm_mmap(self->kvm_run_size, PROT_READ | PROT_WRITE,
+ MAP_SHARED, self->vcpu_fd);
/**
* For virtual cpus that have been created with S390 user controlled
* virtual machines, the resulting vcpu fd can be memory mapped at page
* offset KVM_S390_SIE_PAGE_OFFSET in order to obtain a memory map of
* the virtual cpu's hardware control block.
*/
- self->sie_block = (struct kvm_s390_sie_block *)mmap(NULL, PAGE_SIZE,
- PROT_READ | PROT_WRITE, MAP_SHARED,
- self->vcpu_fd, KVM_S390_SIE_PAGE_OFFSET << PAGE_SHIFT);
- ASSERT_NE(self->sie_block, MAP_FAILED);
+ self->sie_block = __kvm_mmap(PAGE_SIZE, PROT_READ | PROT_WRITE,
+ MAP_SHARED, self->vcpu_fd,
+ KVM_S390_SIE_PAGE_OFFSET << PAGE_SHIFT);
TH_LOG("VM created %p %p", self->run, self->sie_block);
@@ -186,8 +184,8 @@ FIXTURE_SETUP(uc_kvm)
FIXTURE_TEARDOWN(uc_kvm)
{
- munmap(self->sie_block, PAGE_SIZE);
- munmap(self->run, self->kvm_run_size);
+ kvm_munmap(self->sie_block, PAGE_SIZE);
+ kvm_munmap(self->run, self->kvm_run_size);
close(self->vcpu_fd);
close(self->vm_fd);
close(self->kvm_fd);
diff --git a/tools/testing/selftests/kvm/s390/user_operexec.c b/tools/testing/selftests/kvm/s390/user_operexec.c
new file mode 100644
index 000000000000..714906c1d12a
--- /dev/null
+++ b/tools/testing/selftests/kvm/s390/user_operexec.c
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Test operation exception forwarding.
+ *
+ * Copyright IBM Corp. 2025
+ *
+ * Authors:
+ * Janosch Frank <frankja@linux.ibm.com>
+ */
+#include "kselftest.h"
+#include "kvm_util.h"
+#include "test_util.h"
+#include "sie.h"
+
+#include <linux/kvm.h>
+
+static void guest_code_instr0(void)
+{
+ asm(".word 0x0000");
+}
+
+static void test_user_instr0(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ int rc;
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code_instr0);
+ rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_INSTR0, 0);
+ TEST_ASSERT_EQ(0, rc);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_OPEREXC);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0);
+
+ kvm_vm_free(vm);
+}
+
+static void guest_code_user_operexec(void)
+{
+ asm(".word 0x0807");
+}
+
+static void test_user_operexec(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ int rc;
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code_user_operexec);
+ rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_OPEREXEC, 0);
+ TEST_ASSERT_EQ(0, rc);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_OPEREXC);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0x0807);
+
+ kvm_vm_free(vm);
+
+ /*
+ * Since user_operexec is the superset it can be used for the
+ * 0 instruction.
+ */
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code_instr0);
+ rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_OPEREXEC, 0);
+ TEST_ASSERT_EQ(0, rc);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_OPEREXC);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0);
+
+ kvm_vm_free(vm);
+}
+
+/* combine user_instr0 and user_operexec */
+static void test_user_operexec_combined(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ int rc;
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code_user_operexec);
+ rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_INSTR0, 0);
+ TEST_ASSERT_EQ(0, rc);
+ rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_OPEREXEC, 0);
+ TEST_ASSERT_EQ(0, rc);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_OPEREXC);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0x0807);
+
+ kvm_vm_free(vm);
+
+ /* Reverse enablement order */
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code_user_operexec);
+ rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_OPEREXEC, 0);
+ TEST_ASSERT_EQ(0, rc);
+ rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_INSTR0, 0);
+ TEST_ASSERT_EQ(0, rc);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_OPEREXC);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0x0807);
+
+ kvm_vm_free(vm);
+}
+
+/*
+ * Run all tests above.
+ *
+ * Enablement after VCPU has been added is automatically tested since
+ * we enable the capability after VCPU creation.
+ */
+static struct testdef {
+ const char *name;
+ void (*test)(void);
+} testlist[] = {
+ { "instr0", test_user_instr0 },
+ { "operexec", test_user_operexec },
+ { "operexec_combined", test_user_operexec_combined},
+};
+
+int main(int argc, char *argv[])
+{
+ int idx;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_S390_USER_INSTR0));
+
+ ksft_print_header();
+ ksft_set_plan(ARRAY_SIZE(testlist));
+ for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) {
+ testlist[idx].test();
+ ksft_test_result_pass("%s\n", testlist[idx].name);
+ }
+ ksft_finished();
+}
diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c
index bc440d5aba57..a398dc3a8c4b 100644
--- a/tools/testing/selftests/kvm/set_memory_region_test.c
+++ b/tools/testing/selftests/kvm/set_memory_region_test.c
@@ -350,7 +350,7 @@ static void test_invalid_memory_region_flags(void)
struct kvm_vm *vm;
int r, i;
-#if defined __aarch64__ || defined __riscv || defined __x86_64__
+#if defined __aarch64__ || defined __riscv || defined __x86_64__ || defined __loongarch__
supported_flags |= KVM_MEM_READONLY;
#endif
@@ -413,14 +413,7 @@ static void test_add_max_memory_regions(void)
uint32_t max_mem_slots;
uint32_t slot;
void *mem, *mem_aligned, *mem_extra;
- size_t alignment;
-
-#ifdef __s390x__
- /* On s390x, the host address must be aligned to 1M (due to PGSTEs) */
- alignment = 0x100000;
-#else
- alignment = 1;
-#endif
+ size_t alignment = 1;
max_mem_slots = kvm_check_cap(KVM_CAP_NR_MEMSLOTS);
TEST_ASSERT(max_mem_slots > 0,
@@ -433,10 +426,10 @@ static void test_add_max_memory_regions(void)
pr_info("Adding slots 0..%i, each memory region with %dK size\n",
(max_mem_slots - 1), MEM_REGION_SIZE >> 10);
- mem = mmap(NULL, (size_t)max_mem_slots * MEM_REGION_SIZE + alignment,
- PROT_READ | PROT_WRITE,
- MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0);
- TEST_ASSERT(mem != MAP_FAILED, "Failed to mmap() host");
+
+ mem = kvm_mmap((size_t)max_mem_slots * MEM_REGION_SIZE + alignment,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1);
mem_aligned = (void *)(((size_t) mem + alignment - 1) & ~(alignment - 1));
for (slot = 0; slot < max_mem_slots; slot++)
@@ -446,9 +439,8 @@ static void test_add_max_memory_regions(void)
mem_aligned + (uint64_t)slot * MEM_REGION_SIZE);
/* Check it cannot be added memory slots beyond the limit */
- mem_extra = mmap(NULL, MEM_REGION_SIZE, PROT_READ | PROT_WRITE,
- MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
- TEST_ASSERT(mem_extra != MAP_FAILED, "Failed to mmap() host");
+ mem_extra = kvm_mmap(MEM_REGION_SIZE, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1);
ret = __vm_set_user_memory_region(vm, max_mem_slots, 0,
(uint64_t)max_mem_slots * MEM_REGION_SIZE,
@@ -456,8 +448,8 @@ static void test_add_max_memory_regions(void)
TEST_ASSERT(ret == -1 && errno == EINVAL,
"Adding one more memory slot should fail with EINVAL");
- munmap(mem, (size_t)max_mem_slots * MEM_REGION_SIZE + alignment);
- munmap(mem_extra, MEM_REGION_SIZE);
+ kvm_munmap(mem, (size_t)max_mem_slots * MEM_REGION_SIZE + alignment);
+ kvm_munmap(mem_extra, MEM_REGION_SIZE);
kvm_vm_free(vm);
}
diff --git a/tools/testing/selftests/kvm/steal_time.c b/tools/testing/selftests/kvm/steal_time.c
index cce2520af720..efe56a10d13e 100644
--- a/tools/testing/selftests/kvm/steal_time.c
+++ b/tools/testing/selftests/kvm/steal_time.c
@@ -69,16 +69,10 @@ static bool is_steal_time_supported(struct kvm_vcpu *vcpu)
static void steal_time_init(struct kvm_vcpu *vcpu, uint32_t i)
{
- int ret;
-
/* ST_GPA_BASE is identity mapped */
st_gva[i] = (void *)(ST_GPA_BASE + i * STEAL_TIME_SIZE);
sync_global_to_guest(vcpu->vm, st_gva[i]);
- ret = _vcpu_set_msr(vcpu, MSR_KVM_STEAL_TIME,
- (ulong)st_gva[i] | KVM_STEAL_RESERVED_MASK);
- TEST_ASSERT(ret == 0, "Bad GPA didn't fail");
-
vcpu_set_msr(vcpu, MSR_KVM_STEAL_TIME, (ulong)st_gva[i] | KVM_MSR_ENABLED);
}
@@ -99,6 +93,21 @@ static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpu_idx)
st->pad[8], st->pad[9], st->pad[10]);
}
+static void check_steal_time_uapi(void)
+{
+ struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu;
+ int ret;
+
+ vm = vm_create_with_one_vcpu(&vcpu, NULL);
+
+ ret = _vcpu_set_msr(vcpu, MSR_KVM_STEAL_TIME,
+ (ulong)ST_GPA_BASE | KVM_STEAL_RESERVED_MASK);
+ TEST_ASSERT(ret == 0, "Bad GPA didn't fail");
+
+ kvm_vm_free(vm);
+}
+
#elif defined(__aarch64__)
/* PV_TIME_ST must have 64-byte alignment */
@@ -118,7 +127,7 @@ static int64_t smccc(uint32_t func, uint64_t arg)
{
struct arm_smccc_res res;
- smccc_hvc(func, arg, 0, 0, 0, 0, 0, 0, &res);
+ do_smccc(func, arg, 0, 0, 0, 0, 0, 0, &res);
return res.a0;
}
@@ -170,7 +179,6 @@ static void steal_time_init(struct kvm_vcpu *vcpu, uint32_t i)
{
struct kvm_vm *vm = vcpu->vm;
uint64_t st_ipa;
- int ret;
struct kvm_device_attr dev = {
.group = KVM_ARM_VCPU_PVTIME_CTRL,
@@ -178,21 +186,12 @@ static void steal_time_init(struct kvm_vcpu *vcpu, uint32_t i)
.addr = (uint64_t)&st_ipa,
};
- vcpu_ioctl(vcpu, KVM_HAS_DEVICE_ATTR, &dev);
-
/* ST_GPA_BASE is identity mapped */
st_gva[i] = (void *)(ST_GPA_BASE + i * STEAL_TIME_SIZE);
sync_global_to_guest(vm, st_gva[i]);
- st_ipa = (ulong)st_gva[i] | 1;
- ret = __vcpu_ioctl(vcpu, KVM_SET_DEVICE_ATTR, &dev);
- TEST_ASSERT(ret == -1 && errno == EINVAL, "Bad IPA didn't report EINVAL");
-
st_ipa = (ulong)st_gva[i];
vcpu_ioctl(vcpu, KVM_SET_DEVICE_ATTR, &dev);
-
- ret = __vcpu_ioctl(vcpu, KVM_SET_DEVICE_ATTR, &dev);
- TEST_ASSERT(ret == -1 && errno == EEXIST, "Set IPA twice without EEXIST");
}
static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpu_idx)
@@ -205,6 +204,36 @@ static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpu_idx)
ksft_print_msg(" st_time: %ld\n", st->st_time);
}
+static void check_steal_time_uapi(void)
+{
+ struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu;
+ uint64_t st_ipa;
+ int ret;
+
+ vm = vm_create_with_one_vcpu(&vcpu, NULL);
+
+ struct kvm_device_attr dev = {
+ .group = KVM_ARM_VCPU_PVTIME_CTRL,
+ .attr = KVM_ARM_VCPU_PVTIME_IPA,
+ .addr = (uint64_t)&st_ipa,
+ };
+
+ vcpu_ioctl(vcpu, KVM_HAS_DEVICE_ATTR, &dev);
+
+ st_ipa = (ulong)ST_GPA_BASE | 1;
+ ret = __vcpu_ioctl(vcpu, KVM_SET_DEVICE_ATTR, &dev);
+ TEST_ASSERT(ret == -1 && errno == EINVAL, "Bad IPA didn't report EINVAL");
+
+ st_ipa = (ulong)ST_GPA_BASE;
+ vcpu_ioctl(vcpu, KVM_SET_DEVICE_ATTR, &dev);
+
+ ret = __vcpu_ioctl(vcpu, KVM_SET_DEVICE_ATTR, &dev);
+ TEST_ASSERT(ret == -1 && errno == EEXIST, "Set IPA twice without EEXIST");
+
+ kvm_vm_free(vm);
+}
+
#elif defined(__riscv)
/* SBI STA shmem must have 64-byte alignment */
@@ -301,6 +330,137 @@ static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpu_idx)
pr_info("\n");
}
+static void check_steal_time_uapi(void)
+{
+ struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu;
+ struct kvm_one_reg reg;
+ uint64_t shmem;
+ int ret;
+
+ vm = vm_create_with_one_vcpu(&vcpu, NULL);
+
+ reg.id = KVM_REG_RISCV |
+ KVM_REG_SIZE_ULONG |
+ KVM_REG_RISCV_SBI_STATE |
+ KVM_REG_RISCV_SBI_STA |
+ KVM_REG_RISCV_SBI_STA_REG(shmem_lo);
+ reg.addr = (uint64_t)&shmem;
+
+ shmem = ST_GPA_BASE + 1;
+ ret = __vcpu_ioctl(vcpu, KVM_SET_ONE_REG, &reg);
+ TEST_ASSERT(ret == -1 && errno == EINVAL,
+ "misaligned STA shmem returns -EINVAL");
+
+ shmem = ST_GPA_BASE;
+ ret = __vcpu_ioctl(vcpu, KVM_SET_ONE_REG, &reg);
+ TEST_ASSERT(ret == 0,
+ "aligned STA shmem succeeds");
+
+ shmem = INVALID_GPA;
+ ret = __vcpu_ioctl(vcpu, KVM_SET_ONE_REG, &reg);
+ TEST_ASSERT(ret == 0,
+ "all-ones for STA shmem succeeds");
+
+ kvm_vm_free(vm);
+}
+
+#elif defined(__loongarch__)
+
+/* steal_time must have 64-byte alignment */
+#define STEAL_TIME_SIZE ((sizeof(struct kvm_steal_time) + 63) & ~63)
+#define KVM_STEAL_PHYS_VALID BIT_ULL(0)
+
+struct kvm_steal_time {
+ __u64 steal;
+ __u32 version;
+ __u32 flags;
+ __u8 preempted;
+ __u8 pad[47];
+};
+
+static void check_status(struct kvm_steal_time *st)
+{
+ GUEST_ASSERT(!(READ_ONCE(st->version) & 1));
+ GUEST_ASSERT_EQ(READ_ONCE(st->flags), 0);
+ GUEST_ASSERT_EQ(READ_ONCE(st->preempted), 0);
+}
+
+static void guest_code(int cpu)
+{
+ uint32_t version;
+ struct kvm_steal_time *st = st_gva[cpu];
+
+ memset(st, 0, sizeof(*st));
+ GUEST_SYNC(0);
+
+ check_status(st);
+ WRITE_ONCE(guest_stolen_time[cpu], st->steal);
+ version = READ_ONCE(st->version);
+ check_status(st);
+ GUEST_SYNC(1);
+
+ check_status(st);
+ GUEST_ASSERT(version < READ_ONCE(st->version));
+ WRITE_ONCE(guest_stolen_time[cpu], st->steal);
+ check_status(st);
+ GUEST_DONE();
+}
+
+static bool is_steal_time_supported(struct kvm_vcpu *vcpu)
+{
+ int err;
+ uint64_t val;
+ struct kvm_device_attr attr = {
+ .group = KVM_LOONGARCH_VCPU_CPUCFG,
+ .attr = CPUCFG_KVM_FEATURE,
+ .addr = (uint64_t)&val,
+ };
+
+ err = __vcpu_ioctl(vcpu, KVM_HAS_DEVICE_ATTR, &attr);
+ if (err)
+ return false;
+
+ err = __vcpu_ioctl(vcpu, KVM_GET_DEVICE_ATTR, &attr);
+ if (err)
+ return false;
+
+ return val & BIT(KVM_FEATURE_STEAL_TIME);
+}
+
+static void steal_time_init(struct kvm_vcpu *vcpu, uint32_t i)
+{
+ int err;
+ uint64_t st_gpa;
+ struct kvm_vm *vm = vcpu->vm;
+ struct kvm_device_attr attr = {
+ .group = KVM_LOONGARCH_VCPU_PVTIME_CTRL,
+ .attr = KVM_LOONGARCH_VCPU_PVTIME_GPA,
+ .addr = (uint64_t)&st_gpa,
+ };
+
+ /* ST_GPA_BASE is identity mapped */
+ st_gva[i] = (void *)(ST_GPA_BASE + i * STEAL_TIME_SIZE);
+ sync_global_to_guest(vm, st_gva[i]);
+
+ err = __vcpu_ioctl(vcpu, KVM_HAS_DEVICE_ATTR, &attr);
+ TEST_ASSERT(err == 0, "No PV stealtime Feature");
+
+ st_gpa = (unsigned long)st_gva[i] | KVM_STEAL_PHYS_VALID;
+ err = __vcpu_ioctl(vcpu, KVM_SET_DEVICE_ATTR, &attr);
+ TEST_ASSERT(err == 0, "Fail to set PV stealtime GPA");
+}
+
+static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpu_idx)
+{
+ struct kvm_steal_time *st = addr_gva2hva(vm, (ulong)st_gva[vcpu_idx]);
+
+ ksft_print_msg("VCPU%d:\n", vcpu_idx);
+ ksft_print_msg(" steal: %lld\n", st->steal);
+ ksft_print_msg(" flags: %d\n", st->flags);
+ ksft_print_msg(" version: %d\n", st->version);
+ ksft_print_msg(" preempted: %d\n", st->preempted);
+}
#endif
static void *do_steal_time(void *arg)
@@ -369,6 +529,8 @@ int main(int ac, char **av)
TEST_REQUIRE(is_steal_time_supported(vcpus[0]));
ksft_set_plan(NR_VCPUS);
+ check_steal_time_uapi();
+
/* Run test on each VCPU */
for (i = 0; i < NR_VCPUS; ++i) {
steal_time_init(vcpus[i], i);
diff --git a/tools/testing/selftests/kvm/x86/amx_test.c b/tools/testing/selftests/kvm/x86/amx_test.c
index f4ce5a185a7d..37b166260ee3 100644
--- a/tools/testing/selftests/kvm/x86/amx_test.c
+++ b/tools/testing/selftests/kvm/x86/amx_test.c
@@ -69,6 +69,12 @@ static inline void __tileloadd(void *tile)
: : "a"(tile), "d"(0));
}
+static inline int tileloadd_safe(void *tile)
+{
+ return kvm_asm_safe(".byte 0xc4,0xe2,0x7b,0x4b,0x04,0x10",
+ "a"(tile), "d"(0));
+}
+
static inline void __tilerelease(void)
{
asm volatile(".byte 0xc4, 0xe2, 0x78, 0x49, 0xc0" ::);
@@ -124,27 +130,52 @@ static void set_tilecfg(struct tile_config *cfg)
}
}
+enum {
+ /* Retrieve TMM0 from guest, stash it for TEST_RESTORE_TILEDATA */
+ TEST_SAVE_TILEDATA = 1,
+
+ /* Check TMM0 against tiledata */
+ TEST_COMPARE_TILEDATA = 2,
+
+ /* Restore TMM0 from earlier save */
+ TEST_RESTORE_TILEDATA = 4,
+
+ /* Full VM save/restore */
+ TEST_SAVE_RESTORE = 8,
+};
+
static void __attribute__((__flatten__)) guest_code(struct tile_config *amx_cfg,
struct tile_data *tiledata,
struct xstate *xstate)
{
+ int vector;
+
GUEST_ASSERT(this_cpu_has(X86_FEATURE_XSAVE) &&
this_cpu_has(X86_FEATURE_OSXSAVE));
check_xtile_info();
- GUEST_SYNC(1);
+ GUEST_SYNC(TEST_SAVE_RESTORE);
/* xfd=0, enable amx */
wrmsr(MSR_IA32_XFD, 0);
- GUEST_SYNC(2);
+ GUEST_SYNC(TEST_SAVE_RESTORE);
GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == 0);
set_tilecfg(amx_cfg);
__ldtilecfg(amx_cfg);
- GUEST_SYNC(3);
+ GUEST_SYNC(TEST_SAVE_RESTORE);
/* Check save/restore when trap to userspace */
__tileloadd(tiledata);
- GUEST_SYNC(4);
+ GUEST_SYNC(TEST_SAVE_TILEDATA | TEST_COMPARE_TILEDATA | TEST_SAVE_RESTORE);
+
+ /* xfd=0x40000, disable amx tiledata */
+ wrmsr(MSR_IA32_XFD, XFEATURE_MASK_XTILE_DATA);
+
+ /* host tries setting tiledata while guest XFD is set */
+ GUEST_SYNC(TEST_RESTORE_TILEDATA);
+ GUEST_SYNC(TEST_SAVE_RESTORE);
+
+ wrmsr(MSR_IA32_XFD, 0);
__tilerelease();
- GUEST_SYNC(5);
+ GUEST_SYNC(TEST_SAVE_RESTORE);
/*
* After XSAVEC, XTILEDATA is cleared in the xstate_bv but is set in
* the xcomp_bv.
@@ -154,6 +185,8 @@ static void __attribute__((__flatten__)) guest_code(struct tile_config *amx_cfg,
GUEST_ASSERT(!(xstate->header.xstate_bv & XFEATURE_MASK_XTILE_DATA));
GUEST_ASSERT(xstate->header.xcomp_bv & XFEATURE_MASK_XTILE_DATA);
+ /* #NM test */
+
/* xfd=0x40000, disable amx tiledata */
wrmsr(MSR_IA32_XFD, XFEATURE_MASK_XTILE_DATA);
@@ -166,32 +199,33 @@ static void __attribute__((__flatten__)) guest_code(struct tile_config *amx_cfg,
GUEST_ASSERT(!(xstate->header.xstate_bv & XFEATURE_MASK_XTILE_DATA));
GUEST_ASSERT((xstate->header.xcomp_bv & XFEATURE_MASK_XTILE_DATA));
- GUEST_SYNC(6);
+ GUEST_SYNC(TEST_SAVE_RESTORE);
GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == XFEATURE_MASK_XTILE_DATA);
set_tilecfg(amx_cfg);
__ldtilecfg(amx_cfg);
- /* Trigger #NM exception */
- __tileloadd(tiledata);
- GUEST_SYNC(10);
- GUEST_DONE();
-}
+ /* Trigger #NM exception */
+ vector = tileloadd_safe(tiledata);
+ __GUEST_ASSERT(vector == NM_VECTOR,
+ "Wanted #NM on tileloadd with XFD[18]=1, got %s",
+ ex_str(vector));
-void guest_nm_handler(struct ex_regs *regs)
-{
- /* Check if #NM is triggered by XFEATURE_MASK_XTILE_DATA */
- GUEST_SYNC(7);
GUEST_ASSERT(!(get_cr0() & X86_CR0_TS));
GUEST_ASSERT(rdmsr(MSR_IA32_XFD_ERR) == XFEATURE_MASK_XTILE_DATA);
GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == XFEATURE_MASK_XTILE_DATA);
- GUEST_SYNC(8);
+ GUEST_SYNC(TEST_SAVE_RESTORE);
GUEST_ASSERT(rdmsr(MSR_IA32_XFD_ERR) == XFEATURE_MASK_XTILE_DATA);
GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == XFEATURE_MASK_XTILE_DATA);
/* Clear xfd_err */
wrmsr(MSR_IA32_XFD_ERR, 0);
/* xfd=0, enable amx */
wrmsr(MSR_IA32_XFD, 0);
- GUEST_SYNC(9);
+ GUEST_SYNC(TEST_SAVE_RESTORE);
+
+ __tileloadd(tiledata);
+ GUEST_SYNC(TEST_COMPARE_TILEDATA | TEST_SAVE_RESTORE);
+
+ GUEST_DONE();
}
int main(int argc, char *argv[])
@@ -200,10 +234,10 @@ int main(int argc, char *argv[])
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
struct kvm_x86_state *state;
+ struct kvm_x86_state *tile_state = NULL;
int xsave_restore_size;
vm_vaddr_t amx_cfg, tiledata, xstate;
struct ucall uc;
- u32 amx_offset;
int ret;
/*
@@ -228,9 +262,6 @@ int main(int argc, char *argv[])
vcpu_regs_get(vcpu, &regs1);
- /* Register #NM handler */
- vm_install_exception_handler(vm, NM_VECTOR, guest_nm_handler);
-
/* amx cfg for guest_code */
amx_cfg = vm_vaddr_alloc_page(vm);
memset(addr_gva2hva(vm, amx_cfg), 0x0, getpagesize());
@@ -244,6 +275,7 @@ int main(int argc, char *argv[])
memset(addr_gva2hva(vm, xstate), 0, PAGE_SIZE * DIV_ROUND_UP(XSAVE_SIZE, PAGE_SIZE));
vcpu_args_set(vcpu, 3, amx_cfg, tiledata, xstate);
+ int iter = 0;
for (;;) {
vcpu_run(vcpu);
TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
@@ -253,37 +285,47 @@ int main(int argc, char *argv[])
REPORT_GUEST_ASSERT(uc);
/* NOT REACHED */
case UCALL_SYNC:
- switch (uc.args[1]) {
- case 1:
- case 2:
- case 3:
- case 5:
- case 6:
- case 7:
- case 8:
- fprintf(stderr, "GUEST_SYNC(%ld)\n", uc.args[1]);
- break;
- case 4:
- case 10:
- fprintf(stderr,
- "GUEST_SYNC(%ld), check save/restore status\n", uc.args[1]);
+ ++iter;
+ if (uc.args[1] & TEST_SAVE_TILEDATA) {
+ fprintf(stderr, "GUEST_SYNC #%d, save tiledata\n", iter);
+ tile_state = vcpu_save_state(vcpu);
+ }
+ if (uc.args[1] & TEST_COMPARE_TILEDATA) {
+ fprintf(stderr, "GUEST_SYNC #%d, check TMM0 contents\n", iter);
/* Compacted mode, get amx offset by xsave area
* size subtract 8K amx size.
*/
- amx_offset = xsave_restore_size - NUM_TILES*TILE_SIZE;
- state = vcpu_save_state(vcpu);
- void *amx_start = (void *)state->xsave + amx_offset;
+ u32 amx_offset = xsave_restore_size - NUM_TILES*TILE_SIZE;
+ void *amx_start = (void *)tile_state->xsave + amx_offset;
void *tiles_data = (void *)addr_gva2hva(vm, tiledata);
/* Only check TMM0 register, 1 tile */
ret = memcmp(amx_start, tiles_data, TILE_SIZE);
TEST_ASSERT(ret == 0, "memcmp failed, ret=%d", ret);
+ }
+ if (uc.args[1] & TEST_RESTORE_TILEDATA) {
+ fprintf(stderr, "GUEST_SYNC #%d, before KVM_SET_XSAVE\n", iter);
+ vcpu_xsave_set(vcpu, tile_state->xsave);
+ fprintf(stderr, "GUEST_SYNC #%d, after KVM_SET_XSAVE\n", iter);
+ }
+ if (uc.args[1] & TEST_SAVE_RESTORE) {
+ fprintf(stderr, "GUEST_SYNC #%d, save/restore VM state\n", iter);
+ state = vcpu_save_state(vcpu);
+ memset(&regs1, 0, sizeof(regs1));
+ vcpu_regs_get(vcpu, &regs1);
+
+ kvm_vm_release(vm);
+
+ /* Restore state in a new VM. */
+ vcpu = vm_recreate_with_one_vcpu(vm);
+ vcpu_load_state(vcpu, state);
kvm_x86_state_cleanup(state);
- break;
- case 9:
- fprintf(stderr,
- "GUEST_SYNC(%ld), #NM exception and enable amx\n", uc.args[1]);
- break;
+
+ memset(&regs2, 0, sizeof(regs2));
+ vcpu_regs_get(vcpu, &regs2);
+ TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
+ "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
+ (ulong) regs2.rdi, (ulong) regs2.rsi);
}
break;
case UCALL_DONE:
@@ -293,22 +335,6 @@ int main(int argc, char *argv[])
TEST_FAIL("Unknown ucall %lu", uc.cmd);
}
- state = vcpu_save_state(vcpu);
- memset(&regs1, 0, sizeof(regs1));
- vcpu_regs_get(vcpu, &regs1);
-
- kvm_vm_release(vm);
-
- /* Restore state in a new VM. */
- vcpu = vm_recreate_with_one_vcpu(vm);
- vcpu_load_state(vcpu, state);
- kvm_x86_state_cleanup(state);
-
- memset(&regs2, 0, sizeof(regs2));
- vcpu_regs_get(vcpu, &regs2);
- TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
- "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
- (ulong) regs2.rdi, (ulong) regs2.rsi);
}
done:
kvm_vm_free(vm);
diff --git a/tools/testing/selftests/kvm/x86/aperfmperf_test.c b/tools/testing/selftests/kvm/x86/aperfmperf_test.c
new file mode 100644
index 000000000000..8b15a13df939
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/aperfmperf_test.c
@@ -0,0 +1,213 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Test for KVM_X86_DISABLE_EXITS_APERFMPERF
+ *
+ * Copyright (C) 2025, Google LLC.
+ *
+ * Test the ability to disable VM-exits for rdmsr of IA32_APERF and
+ * IA32_MPERF. When these VM-exits are disabled, reads of these MSRs
+ * return the host's values.
+ *
+ * Note: Requires read access to /dev/cpu/<lpu>/msr to read host MSRs.
+ */
+
+#include <fcntl.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <asm/msr-index.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+#include "test_util.h"
+#include "vmx.h"
+
+#define NUM_ITERATIONS 10000
+
+static int open_dev_msr(int cpu)
+{
+ char path[PATH_MAX];
+
+ snprintf(path, sizeof(path), "/dev/cpu/%d/msr", cpu);
+ return open_path_or_exit(path, O_RDONLY);
+}
+
+static uint64_t read_dev_msr(int msr_fd, uint32_t msr)
+{
+ uint64_t data;
+ ssize_t rc;
+
+ rc = pread(msr_fd, &data, sizeof(data), msr);
+ TEST_ASSERT(rc == sizeof(data), "Read of MSR 0x%x failed", msr);
+
+ return data;
+}
+
+static void guest_read_aperf_mperf(void)
+{
+ int i;
+
+ for (i = 0; i < NUM_ITERATIONS; i++)
+ GUEST_SYNC2(rdmsr(MSR_IA32_APERF), rdmsr(MSR_IA32_MPERF));
+}
+
+#define L2_GUEST_STACK_SIZE 64
+
+static void l2_guest_code(void)
+{
+ guest_read_aperf_mperf();
+ GUEST_DONE();
+}
+
+static void l1_svm_code(struct svm_test_data *svm)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ struct vmcb *vmcb = svm->vmcb;
+
+ generic_svm_setup(svm, l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+ run_guest(vmcb, svm->vmcb_gpa);
+}
+
+static void l1_vmx_code(struct vmx_pages *vmx)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+ GUEST_ASSERT_EQ(prepare_for_vmx_operation(vmx), true);
+ GUEST_ASSERT_EQ(load_vmcs(vmx), true);
+
+ prepare_vmcs(vmx, NULL, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ /*
+ * Enable MSR bitmaps (the bitmap itself is allocated, zeroed, and set
+ * in the VMCS by prepare_vmcs()), as MSR exiting mandatory on Intel.
+ */
+ vmwrite(CPU_BASED_VM_EXEC_CONTROL,
+ vmreadz(CPU_BASED_VM_EXEC_CONTROL) | CPU_BASED_USE_MSR_BITMAPS);
+
+ GUEST_ASSERT(!vmwrite(GUEST_RIP, (u64)l2_guest_code));
+ GUEST_ASSERT(!vmlaunch());
+}
+
+static void guest_code(void *nested_test_data)
+{
+ guest_read_aperf_mperf();
+
+ if (this_cpu_has(X86_FEATURE_SVM))
+ l1_svm_code(nested_test_data);
+ else if (this_cpu_has(X86_FEATURE_VMX))
+ l1_vmx_code(nested_test_data);
+ else
+ GUEST_DONE();
+
+ TEST_FAIL("L2 should have signaled 'done'");
+}
+
+static void guest_no_aperfmperf(void)
+{
+ uint64_t msr_val;
+ uint8_t vector;
+
+ vector = rdmsr_safe(MSR_IA32_APERF, &msr_val);
+ GUEST_ASSERT(vector == GP_VECTOR);
+
+ vector = rdmsr_safe(MSR_IA32_APERF, &msr_val);
+ GUEST_ASSERT(vector == GP_VECTOR);
+
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ const bool has_nested = kvm_cpu_has(X86_FEATURE_SVM) || kvm_cpu_has(X86_FEATURE_VMX);
+ uint64_t host_aperf_before, host_mperf_before;
+ vm_vaddr_t nested_test_data_gva;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ int msr_fd, cpu, i;
+
+ /* Sanity check that APERF/MPERF are unsupported by default. */
+ vm = vm_create_with_one_vcpu(&vcpu, guest_no_aperfmperf);
+ vcpu_run(vcpu);
+ TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
+ kvm_vm_free(vm);
+
+ cpu = pin_self_to_any_cpu();
+
+ msr_fd = open_dev_msr(cpu);
+
+ /*
+ * This test requires a non-standard VM initialization, because
+ * KVM_ENABLE_CAP cannot be used on a VM file descriptor after
+ * a VCPU has been created.
+ */
+ vm = vm_create(1);
+
+ TEST_REQUIRE(vm_check_cap(vm, KVM_CAP_X86_DISABLE_EXITS) &
+ KVM_X86_DISABLE_EXITS_APERFMPERF);
+
+ vm_enable_cap(vm, KVM_CAP_X86_DISABLE_EXITS,
+ KVM_X86_DISABLE_EXITS_APERFMPERF);
+
+ vcpu = vm_vcpu_add(vm, 0, guest_code);
+
+ if (!has_nested)
+ nested_test_data_gva = NONCANONICAL;
+ else if (kvm_cpu_has(X86_FEATURE_SVM))
+ vcpu_alloc_svm(vm, &nested_test_data_gva);
+ else
+ vcpu_alloc_vmx(vm, &nested_test_data_gva);
+
+ vcpu_args_set(vcpu, 1, nested_test_data_gva);
+
+ host_aperf_before = read_dev_msr(msr_fd, MSR_IA32_APERF);
+ host_mperf_before = read_dev_msr(msr_fd, MSR_IA32_MPERF);
+
+ for (i = 0; i <= NUM_ITERATIONS * (1 + has_nested); i++) {
+ uint64_t host_aperf_after, host_mperf_after;
+ uint64_t guest_aperf, guest_mperf;
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_DONE:
+ goto done;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ case UCALL_SYNC:
+ guest_aperf = uc.args[0];
+ guest_mperf = uc.args[1];
+
+ host_aperf_after = read_dev_msr(msr_fd, MSR_IA32_APERF);
+ host_mperf_after = read_dev_msr(msr_fd, MSR_IA32_MPERF);
+
+ TEST_ASSERT(host_aperf_before < guest_aperf,
+ "APERF: host_before (0x%" PRIx64 ") >= guest (0x%" PRIx64 ")",
+ host_aperf_before, guest_aperf);
+ TEST_ASSERT(guest_aperf < host_aperf_after,
+ "APERF: guest (0x%" PRIx64 ") >= host_after (0x%" PRIx64 ")",
+ guest_aperf, host_aperf_after);
+ TEST_ASSERT(host_mperf_before < guest_mperf,
+ "MPERF: host_before (0x%" PRIx64 ") >= guest (0x%" PRIx64 ")",
+ host_mperf_before, guest_mperf);
+ TEST_ASSERT(guest_mperf < host_mperf_after,
+ "MPERF: guest (0x%" PRIx64 ") >= host_after (0x%" PRIx64 ")",
+ guest_mperf, host_mperf_after);
+
+ host_aperf_before = host_aperf_after;
+ host_mperf_before = host_mperf_after;
+
+ break;
+ }
+ }
+ TEST_FAIL("Didn't receive UCALL_DONE\n");
+done:
+ kvm_vm_free(vm);
+ close(msr_fd);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/cpuid_test.c b/tools/testing/selftests/kvm/x86/cpuid_test.c
index 7b3fda6842bc..f9ed14996977 100644
--- a/tools/testing/selftests/kvm/x86/cpuid_test.c
+++ b/tools/testing/selftests/kvm/x86/cpuid_test.c
@@ -155,6 +155,7 @@ struct kvm_cpuid2 *vcpu_alloc_cpuid(struct kvm_vm *vm, vm_vaddr_t *p_gva, struct
static void set_cpuid_after_run(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *ent;
+ struct kvm_sregs sregs;
int rc;
u32 eax, ebx, x;
@@ -162,6 +163,20 @@ static void set_cpuid_after_run(struct kvm_vcpu *vcpu)
rc = __vcpu_set_cpuid(vcpu);
TEST_ASSERT(!rc, "Setting unmodified CPUID after KVM_RUN failed: %d", rc);
+ /*
+ * Toggle CR4 bits that affect dynamic CPUID feature flags to verify
+ * setting unmodified CPUID succeeds with runtime CPUID updates.
+ */
+ vcpu_sregs_get(vcpu, &sregs);
+ if (kvm_cpu_has(X86_FEATURE_XSAVE))
+ sregs.cr4 ^= X86_CR4_OSXSAVE;
+ if (kvm_cpu_has(X86_FEATURE_PKU))
+ sregs.cr4 ^= X86_CR4_PKE;
+ vcpu_sregs_set(vcpu, &sregs);
+
+ rc = __vcpu_set_cpuid(vcpu);
+ TEST_ASSERT(!rc, "Setting unmodified CPUID after KVM_RUN failed: %d", rc);
+
/* Changing CPU features is forbidden */
ent = vcpu_get_cpuid_entry(vcpu, 0x7);
ebx = ent->ebx;
diff --git a/tools/testing/selftests/kvm/x86/evmcs_smm_controls_test.c b/tools/testing/selftests/kvm/x86/evmcs_smm_controls_test.c
new file mode 100644
index 000000000000..af7c90103396
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/evmcs_smm_controls_test.c
@@ -0,0 +1,150 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2026, Red Hat, Inc.
+ *
+ * Test that vmx_leave_smm() validates vmcs12 controls before re-entering
+ * nested guest mode on RSM.
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "smm.h"
+#include "hyperv.h"
+#include "vmx.h"
+
+#define SMRAM_GPA 0x1000000
+#define SMRAM_STAGE 0xfe
+
+#define SYNC_PORT 0xe
+
+#define STR(x) #x
+#define XSTR(s) STR(s)
+
+/*
+ * SMI handler: runs in real-address mode.
+ * Reports SMRAM_STAGE via port IO, then does RSM.
+ */
+static uint8_t smi_handler[] = {
+ 0xb0, SMRAM_STAGE, /* mov $SMRAM_STAGE, %al */
+ 0xe4, SYNC_PORT, /* in $SYNC_PORT, %al */
+ 0x0f, 0xaa, /* rsm */
+};
+
+static inline void sync_with_host(uint64_t phase)
+{
+ asm volatile("in $" XSTR(SYNC_PORT) ", %%al \n"
+ : "+a" (phase));
+}
+
+static void l2_guest_code(void)
+{
+ sync_with_host(1);
+
+ /* After SMI+RSM with invalid controls, we should not reach here. */
+ vmcall();
+}
+
+static void guest_code(struct vmx_pages *vmx_pages,
+ struct hyperv_test_pages *hv_pages)
+{
+#define L2_GUEST_STACK_SIZE 64
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+ /* Set up Hyper-V enlightenments and eVMCS */
+ wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
+ enable_vp_assist(hv_pages->vp_assist_gpa, hv_pages->vp_assist);
+ evmcs_enable();
+
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+ GUEST_ASSERT(load_evmcs(hv_pages));
+ prepare_vmcs(vmx_pages, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ GUEST_ASSERT(!vmlaunch());
+
+ /* L2 exits via vmcall if test fails */
+ sync_with_host(2);
+}
+
+int main(int argc, char *argv[])
+{
+ vm_vaddr_t vmx_pages_gva = 0, hv_pages_gva = 0;
+ struct hyperv_test_pages *hv;
+ struct hv_enlightened_vmcs *evmcs;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct kvm_regs regs;
+ int stage_reported;
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE));
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS));
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_SMM));
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ setup_smram(vm, vcpu, SMRAM_GPA, smi_handler, sizeof(smi_handler));
+
+ vcpu_set_hv_cpuid(vcpu);
+ vcpu_enable_evmcs(vcpu);
+ vcpu_alloc_vmx(vm, &vmx_pages_gva);
+ hv = vcpu_alloc_hyperv_test_pages(vm, &hv_pages_gva);
+ vcpu_args_set(vcpu, 2, vmx_pages_gva, hv_pages_gva);
+
+ vcpu_run(vcpu);
+
+ /* L2 is running and syncs with host. */
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+ vcpu_regs_get(vcpu, &regs);
+ stage_reported = regs.rax & 0xff;
+ TEST_ASSERT(stage_reported == 1,
+ "Expected stage 1, got %d", stage_reported);
+
+ /* Inject SMI while L2 is running. */
+ inject_smi(vcpu);
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+ vcpu_regs_get(vcpu, &regs);
+ stage_reported = regs.rax & 0xff;
+ TEST_ASSERT(stage_reported == SMRAM_STAGE,
+ "Expected SMM handler stage %#x, got %#x",
+ SMRAM_STAGE, stage_reported);
+
+ /*
+ * Guest is now paused in the SMI handler, about to execute RSM.
+ * Hack the eVMCS page to set-up invalid pin-based execution
+ * control (PIN_BASED_VIRTUAL_NMIS without PIN_BASED_NMI_EXITING).
+ */
+ evmcs = hv->enlightened_vmcs_hva;
+ evmcs->pin_based_vm_exec_control |= PIN_BASED_VIRTUAL_NMIS;
+ evmcs->hv_clean_fields = 0;
+
+ /*
+ * Trigger copy_enlightened_to_vmcs12() via KVM_GET_NESTED_STATE,
+ * copying the invalid pin_based_vm_exec_control into cached_vmcs12.
+ */
+ union {
+ struct kvm_nested_state state;
+ char state_[16384];
+ } nested_state_buf;
+
+ memset(&nested_state_buf, 0, sizeof(nested_state_buf));
+ nested_state_buf.state.size = sizeof(nested_state_buf);
+ vcpu_nested_state_get(vcpu, &nested_state_buf.state);
+
+ /*
+ * Resume the guest. The SMI handler executes RSM, which calls
+ * vmx_leave_smm(). nested_vmx_check_controls() should detect
+ * VIRTUAL_NMIS without NMI_EXITING and cause a triple fault.
+ */
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_SHUTDOWN);
+
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/fastops_test.c b/tools/testing/selftests/kvm/x86/fastops_test.c
new file mode 100644
index 000000000000..8926cfe0e209
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/fastops_test.c
@@ -0,0 +1,209 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+/*
+ * Execute a fastop() instruction, with or without forced emulation. BT bit 0
+ * to set RFLAGS.CF based on whether or not the input is even or odd, so that
+ * instructions like ADC and SBB are deterministic.
+ */
+#define fastop(__insn) \
+ "bt $0, %[bt_val]\n\t" \
+ __insn "\n\t" \
+ "pushfq\n\t" \
+ "pop %[flags]\n\t"
+
+#define flags_constraint(flags_val) [flags]"=r"(flags_val)
+#define bt_constraint(__bt_val) [bt_val]"rm"((uint32_t)__bt_val)
+
+#define guest_execute_fastop_1(FEP, insn, __val, __flags) \
+({ \
+ __asm__ __volatile__(fastop(FEP insn " %[val]") \
+ : [val]"+r"(__val), flags_constraint(__flags) \
+ : bt_constraint(__val) \
+ : "cc", "memory"); \
+})
+
+#define guest_test_fastop_1(insn, type_t, __val) \
+({ \
+ type_t val = __val, ex_val = __val, input = __val; \
+ uint64_t flags, ex_flags; \
+ \
+ guest_execute_fastop_1("", insn, ex_val, ex_flags); \
+ guest_execute_fastop_1(KVM_FEP, insn, val, flags); \
+ \
+ __GUEST_ASSERT(val == ex_val, \
+ "Wanted 0x%lx for '%s 0x%lx', got 0x%lx", \
+ (uint64_t)ex_val, insn, (uint64_t)input, (uint64_t)val); \
+ __GUEST_ASSERT(flags == ex_flags, \
+ "Wanted flags 0x%lx for '%s 0x%lx', got 0x%lx", \
+ ex_flags, insn, (uint64_t)input, flags); \
+})
+
+#define guest_execute_fastop_2(FEP, insn, __input, __output, __flags) \
+({ \
+ __asm__ __volatile__(fastop(FEP insn " %[input], %[output]") \
+ : [output]"+r"(__output), flags_constraint(__flags) \
+ : [input]"r"(__input), bt_constraint(__output) \
+ : "cc", "memory"); \
+})
+
+#define guest_test_fastop_2(insn, type_t, __val1, __val2) \
+({ \
+ type_t input = __val1, input2 = __val2, output = __val2, ex_output = __val2; \
+ uint64_t flags, ex_flags; \
+ \
+ guest_execute_fastop_2("", insn, input, ex_output, ex_flags); \
+ guest_execute_fastop_2(KVM_FEP, insn, input, output, flags); \
+ \
+ __GUEST_ASSERT(output == ex_output, \
+ "Wanted 0x%lx for '%s 0x%lx 0x%lx', got 0x%lx", \
+ (uint64_t)ex_output, insn, (uint64_t)input, \
+ (uint64_t)input2, (uint64_t)output); \
+ __GUEST_ASSERT(flags == ex_flags, \
+ "Wanted flags 0x%lx for '%s 0x%lx, 0x%lx', got 0x%lx", \
+ ex_flags, insn, (uint64_t)input, (uint64_t)input2, flags); \
+})
+
+#define guest_execute_fastop_cl(FEP, insn, __shift, __output, __flags) \
+({ \
+ __asm__ __volatile__(fastop(FEP insn " %%cl, %[output]") \
+ : [output]"+r"(__output), flags_constraint(__flags) \
+ : "c"(__shift), bt_constraint(__output) \
+ : "cc", "memory"); \
+})
+
+#define guest_test_fastop_cl(insn, type_t, __val1, __val2) \
+({ \
+ type_t output = __val2, ex_output = __val2, input = __val2; \
+ uint8_t shift = __val1; \
+ uint64_t flags, ex_flags; \
+ \
+ guest_execute_fastop_cl("", insn, shift, ex_output, ex_flags); \
+ guest_execute_fastop_cl(KVM_FEP, insn, shift, output, flags); \
+ \
+ __GUEST_ASSERT(output == ex_output, \
+ "Wanted 0x%lx for '%s 0x%x, 0x%lx', got 0x%lx", \
+ (uint64_t)ex_output, insn, shift, (uint64_t)input, \
+ (uint64_t)output); \
+ __GUEST_ASSERT(flags == ex_flags, \
+ "Wanted flags 0x%lx for '%s 0x%x, 0x%lx', got 0x%lx", \
+ ex_flags, insn, shift, (uint64_t)input, flags); \
+})
+
+#define guest_execute_fastop_div(__KVM_ASM_SAFE, insn, __a, __d, __rm, __flags) \
+({ \
+ uint64_t ign_error_code; \
+ uint8_t vector; \
+ \
+ __asm__ __volatile__(fastop(__KVM_ASM_SAFE(insn " %[denom]")) \
+ : "+a"(__a), "+d"(__d), flags_constraint(__flags), \
+ KVM_ASM_SAFE_OUTPUTS(vector, ign_error_code) \
+ : [denom]"rm"(__rm), bt_constraint(__rm) \
+ : "cc", "memory", KVM_ASM_SAFE_CLOBBERS); \
+ vector; \
+})
+
+#define guest_test_fastop_div(insn, type_t, __val1, __val2) \
+({ \
+ type_t _a = __val1, _d = __val1, rm = __val2; \
+ type_t a = _a, d = _d, ex_a = _a, ex_d = _d; \
+ uint64_t flags, ex_flags; \
+ uint8_t v, ex_v; \
+ \
+ ex_v = guest_execute_fastop_div(KVM_ASM_SAFE, insn, ex_a, ex_d, rm, ex_flags); \
+ v = guest_execute_fastop_div(KVM_ASM_SAFE_FEP, insn, a, d, rm, flags); \
+ \
+ GUEST_ASSERT_EQ(v, ex_v); \
+ __GUEST_ASSERT(v == ex_v, \
+ "Wanted vector 0x%x for '%s 0x%lx:0x%lx/0x%lx', got 0x%x", \
+ ex_v, insn, (uint64_t)_a, (uint64_t)_d, (uint64_t)rm, v); \
+ __GUEST_ASSERT(a == ex_a && d == ex_d, \
+ "Wanted 0x%lx:0x%lx for '%s 0x%lx:0x%lx/0x%lx', got 0x%lx:0x%lx",\
+ (uint64_t)ex_a, (uint64_t)ex_d, insn, (uint64_t)_a, \
+ (uint64_t)_d, (uint64_t)rm, (uint64_t)a, (uint64_t)d); \
+ __GUEST_ASSERT(v || ex_v || (flags == ex_flags), \
+ "Wanted flags 0x%lx for '%s 0x%lx:0x%lx/0x%lx', got 0x%lx", \
+ ex_flags, insn, (uint64_t)_a, (uint64_t)_d, (uint64_t)rm, flags);\
+})
+
+static const uint64_t vals[] = {
+ 0,
+ 1,
+ 2,
+ 4,
+ 7,
+ 0x5555555555555555,
+ 0xaaaaaaaaaaaaaaaa,
+ 0xfefefefefefefefe,
+ 0xffffffffffffffff,
+};
+
+#define guest_test_fastops(type_t, suffix) \
+do { \
+ int i, j; \
+ \
+ for (i = 0; i < ARRAY_SIZE(vals); i++) { \
+ guest_test_fastop_1("dec" suffix, type_t, vals[i]); \
+ guest_test_fastop_1("inc" suffix, type_t, vals[i]); \
+ guest_test_fastop_1("neg" suffix, type_t, vals[i]); \
+ guest_test_fastop_1("not" suffix, type_t, vals[i]); \
+ \
+ for (j = 0; j < ARRAY_SIZE(vals); j++) { \
+ guest_test_fastop_2("add" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("adc" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("and" suffix, type_t, vals[i], vals[j]); \
+if (sizeof(type_t) != 1) { \
+ guest_test_fastop_2("bsf" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("bsr" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("bt" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("btc" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("btr" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("bts" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("imul" suffix, type_t, vals[i], vals[j]); \
+} \
+ guest_test_fastop_2("cmp" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("or" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("sbb" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("sub" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("test" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("xor" suffix, type_t, vals[i], vals[j]); \
+ \
+ guest_test_fastop_cl("rol" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_cl("ror" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_cl("rcl" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_cl("rcr" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_cl("sar" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_cl("shl" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_cl("shr" suffix, type_t, vals[i], vals[j]); \
+ \
+ guest_test_fastop_div("div" suffix, type_t, vals[i], vals[j]); \
+ } \
+ } \
+} while (0)
+
+static void guest_code(void)
+{
+ guest_test_fastops(uint8_t, "b");
+ guest_test_fastops(uint16_t, "w");
+ guest_test_fastops(uint32_t, "l");
+ guest_test_fastops(uint64_t, "q");
+
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ TEST_REQUIRE(is_forced_emulation_enabled);
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
+
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86/fix_hypercall_test.c b/tools/testing/selftests/kvm/x86/fix_hypercall_test.c
index 762628f7d4ba..00b6e85735dd 100644
--- a/tools/testing/selftests/kvm/x86/fix_hypercall_test.c
+++ b/tools/testing/selftests/kvm/x86/fix_hypercall_test.c
@@ -52,7 +52,7 @@ static void guest_main(void)
if (host_cpu_is_intel) {
native_hypercall_insn = vmx_vmcall;
other_hypercall_insn = svm_vmmcall;
- } else if (host_cpu_is_amd) {
+ } else if (host_cpu_is_amd_compatible) {
native_hypercall_insn = svm_vmmcall;
other_hypercall_insn = vmx_vmcall;
} else {
diff --git a/tools/testing/selftests/kvm/x86/hyperv_cpuid.c b/tools/testing/selftests/kvm/x86/hyperv_cpuid.c
index 4e920705681a..3c21af811d8f 100644
--- a/tools/testing/selftests/kvm/x86/hyperv_cpuid.c
+++ b/tools/testing/selftests/kvm/x86/hyperv_cpuid.c
@@ -22,25 +22,6 @@ static void guest_code(void)
{
}
-static bool smt_possible(void)
-{
- char buf[16];
- FILE *f;
- bool res = true;
-
- f = fopen("/sys/devices/system/cpu/smt/control", "r");
- if (f) {
- if (fread(buf, sizeof(*buf), sizeof(buf), f) > 0) {
- if (!strncmp(buf, "forceoff", 8) ||
- !strncmp(buf, "notsupported", 12))
- res = false;
- }
- fclose(f);
- }
-
- return res;
-}
-
static void test_hv_cpuid(struct kvm_vcpu *vcpu, bool evmcs_expected)
{
const bool has_irqchip = !vcpu || vcpu->vm->has_irqchip;
@@ -64,7 +45,7 @@ static void test_hv_cpuid(struct kvm_vcpu *vcpu, bool evmcs_expected)
TEST_ASSERT((entry->function >= 0x40000000) &&
(entry->function <= 0x40000082),
- "function %x is our of supported range",
+ "function %x is out of supported range",
entry->function);
TEST_ASSERT(entry->index == 0,
@@ -93,7 +74,7 @@ static void test_hv_cpuid(struct kvm_vcpu *vcpu, bool evmcs_expected)
case 0x40000004:
test_val = entry->eax & (1UL << 18);
- TEST_ASSERT(!!test_val == !smt_possible(),
+ TEST_ASSERT(!!test_val == !is_smt_possible(),
"NoNonArchitecturalCoreSharing bit"
" doesn't reflect SMT setting");
diff --git a/tools/testing/selftests/kvm/x86/hyperv_features.c b/tools/testing/selftests/kvm/x86/hyperv_features.c
index 068e9c69710d..130b9ce7e5dd 100644
--- a/tools/testing/selftests/kvm/x86/hyperv_features.c
+++ b/tools/testing/selftests/kvm/x86/hyperv_features.c
@@ -54,12 +54,12 @@ static void guest_msr(struct msr_data *msr)
if (msr->fault_expected)
__GUEST_ASSERT(vector == GP_VECTOR,
- "Expected #GP on %sMSR(0x%x), got vector '0x%x'",
- msr->write ? "WR" : "RD", msr->idx, vector);
+ "Expected #GP on %sMSR(0x%x), got %s",
+ msr->write ? "WR" : "RD", msr->idx, ex_str(vector));
else
__GUEST_ASSERT(!vector,
- "Expected success on %sMSR(0x%x), got vector '0x%x'",
- msr->write ? "WR" : "RD", msr->idx, vector);
+ "Expected success on %sMSR(0x%x), got %s",
+ msr->write ? "WR" : "RD", msr->idx, ex_str(vector));
if (vector || is_write_only_msr(msr->idx))
goto done;
@@ -94,7 +94,7 @@ static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall)
if (!(hcall->control & HV_HYPERCALL_FAST_BIT)) {
input = pgs_gpa;
- output = pgs_gpa + 4096;
+ output = pgs_gpa + PAGE_SIZE;
} else {
input = output = 0;
}
@@ -102,12 +102,12 @@ static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall)
vector = __hyperv_hypercall(hcall->control, input, output, &res);
if (hcall->ud_expected) {
__GUEST_ASSERT(vector == UD_VECTOR,
- "Expected #UD for control '%lu', got vector '0x%x'",
- hcall->control, vector);
+ "Expected #UD for control '%lu', got %s",
+ hcall->control, ex_str(vector));
} else {
__GUEST_ASSERT(!vector,
- "Expected no exception for control '%lu', got vector '0x%x'",
- hcall->control, vector);
+ "Expected no exception for control '%lu', got %s",
+ hcall->control, ex_str(vector));
GUEST_ASSERT_EQ(res, hcall->expect);
}
diff --git a/tools/testing/selftests/kvm/x86/hyperv_ipi.c b/tools/testing/selftests/kvm/x86/hyperv_ipi.c
index 2b5b4bc6ef7e..ca61836c4e32 100644
--- a/tools/testing/selftests/kvm/x86/hyperv_ipi.c
+++ b/tools/testing/selftests/kvm/x86/hyperv_ipi.c
@@ -102,7 +102,7 @@ static void sender_guest_code(void *hcall_page, vm_vaddr_t pgs_gpa)
/* 'Slow' HvCallSendSyntheticClusterIpi to RECEIVER_VCPU_ID_1 */
ipi->vector = IPI_VECTOR;
ipi->cpu_mask = 1 << RECEIVER_VCPU_ID_1;
- hyperv_hypercall(HVCALL_SEND_IPI, pgs_gpa, pgs_gpa + 4096);
+ hyperv_hypercall(HVCALL_SEND_IPI, pgs_gpa, pgs_gpa + PAGE_SIZE);
nop_loop();
GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]);
@@ -116,13 +116,13 @@ static void sender_guest_code(void *hcall_page, vm_vaddr_t pgs_gpa)
GUEST_SYNC(stage++);
/* 'Slow' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_1 */
- memset(hcall_page, 0, 4096);
+ memset(hcall_page, 0, PAGE_SIZE);
ipi_ex->vector = IPI_VECTOR;
ipi_ex->vp_set.format = HV_GENERIC_SET_SPARSE_4K;
ipi_ex->vp_set.valid_bank_mask = 1 << 0;
ipi_ex->vp_set.bank_contents[0] = BIT(RECEIVER_VCPU_ID_1);
hyperv_hypercall(HVCALL_SEND_IPI_EX | (1 << HV_HYPERCALL_VARHEAD_OFFSET),
- pgs_gpa, pgs_gpa + 4096);
+ pgs_gpa, pgs_gpa + PAGE_SIZE);
nop_loop();
GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]);
@@ -138,13 +138,13 @@ static void sender_guest_code(void *hcall_page, vm_vaddr_t pgs_gpa)
GUEST_SYNC(stage++);
/* 'Slow' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_2 */
- memset(hcall_page, 0, 4096);
+ memset(hcall_page, 0, PAGE_SIZE);
ipi_ex->vector = IPI_VECTOR;
ipi_ex->vp_set.format = HV_GENERIC_SET_SPARSE_4K;
ipi_ex->vp_set.valid_bank_mask = 1 << 1;
ipi_ex->vp_set.bank_contents[0] = BIT(RECEIVER_VCPU_ID_2 - 64);
hyperv_hypercall(HVCALL_SEND_IPI_EX | (1 << HV_HYPERCALL_VARHEAD_OFFSET),
- pgs_gpa, pgs_gpa + 4096);
+ pgs_gpa, pgs_gpa + PAGE_SIZE);
nop_loop();
GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ipis_expected[0]);
GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]);
@@ -160,14 +160,14 @@ static void sender_guest_code(void *hcall_page, vm_vaddr_t pgs_gpa)
GUEST_SYNC(stage++);
/* 'Slow' HvCallSendSyntheticClusterIpiEx to both RECEIVER_VCPU_ID_{1,2} */
- memset(hcall_page, 0, 4096);
+ memset(hcall_page, 0, PAGE_SIZE);
ipi_ex->vector = IPI_VECTOR;
ipi_ex->vp_set.format = HV_GENERIC_SET_SPARSE_4K;
ipi_ex->vp_set.valid_bank_mask = 1 << 1 | 1;
ipi_ex->vp_set.bank_contents[0] = BIT(RECEIVER_VCPU_ID_1);
ipi_ex->vp_set.bank_contents[1] = BIT(RECEIVER_VCPU_ID_2 - 64);
hyperv_hypercall(HVCALL_SEND_IPI_EX | (2 << HV_HYPERCALL_VARHEAD_OFFSET),
- pgs_gpa, pgs_gpa + 4096);
+ pgs_gpa, pgs_gpa + PAGE_SIZE);
nop_loop();
GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]);
@@ -183,10 +183,10 @@ static void sender_guest_code(void *hcall_page, vm_vaddr_t pgs_gpa)
GUEST_SYNC(stage++);
/* 'Slow' HvCallSendSyntheticClusterIpiEx to HV_GENERIC_SET_ALL */
- memset(hcall_page, 0, 4096);
+ memset(hcall_page, 0, PAGE_SIZE);
ipi_ex->vector = IPI_VECTOR;
ipi_ex->vp_set.format = HV_GENERIC_SET_ALL;
- hyperv_hypercall(HVCALL_SEND_IPI_EX, pgs_gpa, pgs_gpa + 4096);
+ hyperv_hypercall(HVCALL_SEND_IPI_EX, pgs_gpa, pgs_gpa + PAGE_SIZE);
nop_loop();
GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]);
diff --git a/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c b/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c
index 077cd0ec3040..c542cc4762b1 100644
--- a/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c
+++ b/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c
@@ -619,9 +619,9 @@ int main(int argc, char *argv[])
*/
gva = vm_vaddr_unused_gap(vm, NTEST_PAGES * PAGE_SIZE, KVM_UTIL_MIN_VADDR);
for (i = 0; i < NTEST_PAGES; i++) {
- pte = vm_get_page_table_entry(vm, data->test_pages + i * PAGE_SIZE);
+ pte = vm_get_pte(vm, data->test_pages + i * PAGE_SIZE);
gpa = addr_hva2gpa(vm, pte);
- __virt_pg_map(vm, gva + PAGE_SIZE * i, gpa & PAGE_MASK, PG_LEVEL_4K);
+ virt_pg_map(vm, gva + PAGE_SIZE * i, gpa & PAGE_MASK);
data->test_pages_pte[i] = gva + (gpa & ~PAGE_MASK);
}
diff --git a/tools/testing/selftests/kvm/x86/kvm_buslock_test.c b/tools/testing/selftests/kvm/x86/kvm_buslock_test.c
new file mode 100644
index 000000000000..d88500c118eb
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/kvm_buslock_test.c
@@ -0,0 +1,135 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2024 Advanced Micro Devices, Inc.
+ */
+#include <linux/atomic.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+#include "vmx.h"
+#include "test_util.h"
+
+#define NR_BUS_LOCKS_PER_LEVEL 100
+#define CACHE_LINE_SIZE 64
+
+/*
+ * To generate a bus lock, carve out a buffer that precisely occupies two cache
+ * lines and perform an atomic access that splits the two lines.
+ */
+static u8 buffer[CACHE_LINE_SIZE * 2] __aligned(CACHE_LINE_SIZE);
+static atomic_t *val = (void *)&buffer[CACHE_LINE_SIZE - (sizeof(*val) / 2)];
+
+static void guest_generate_buslocks(void)
+{
+ for (int i = 0; i < NR_BUS_LOCKS_PER_LEVEL; i++)
+ atomic_inc(val);
+}
+
+#define L2_GUEST_STACK_SIZE 64
+
+static void l2_guest_code(void)
+{
+ guest_generate_buslocks();
+ GUEST_DONE();
+}
+
+static void l1_svm_code(struct svm_test_data *svm)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ struct vmcb *vmcb = svm->vmcb;
+
+ generic_svm_setup(svm, l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+ run_guest(vmcb, svm->vmcb_gpa);
+}
+
+static void l1_vmx_code(struct vmx_pages *vmx)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+ GUEST_ASSERT_EQ(prepare_for_vmx_operation(vmx), true);
+ GUEST_ASSERT_EQ(load_vmcs(vmx), true);
+
+ prepare_vmcs(vmx, NULL, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ GUEST_ASSERT(!vmwrite(GUEST_RIP, (u64)l2_guest_code));
+ GUEST_ASSERT(!vmlaunch());
+}
+
+static void guest_code(void *test_data)
+{
+ guest_generate_buslocks();
+
+ if (this_cpu_has(X86_FEATURE_SVM))
+ l1_svm_code(test_data);
+ else if (this_cpu_has(X86_FEATURE_VMX))
+ l1_vmx_code(test_data);
+ else
+ GUEST_DONE();
+
+ TEST_FAIL("L2 should have signaled 'done'");
+}
+
+int main(int argc, char *argv[])
+{
+ const bool has_nested = kvm_cpu_has(X86_FEATURE_SVM) || kvm_cpu_has(X86_FEATURE_VMX);
+ vm_vaddr_t nested_test_data_gva;
+ struct kvm_vcpu *vcpu;
+ struct kvm_run *run;
+ struct kvm_vm *vm;
+ int i, bus_locks = 0;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_BUS_LOCK_EXIT));
+
+ vm = vm_create(1);
+ vm_enable_cap(vm, KVM_CAP_X86_BUS_LOCK_EXIT, KVM_BUS_LOCK_DETECTION_EXIT);
+ vcpu = vm_vcpu_add(vm, 0, guest_code);
+
+ if (kvm_cpu_has(X86_FEATURE_SVM))
+ vcpu_alloc_svm(vm, &nested_test_data_gva);
+ else
+ vcpu_alloc_vmx(vm, &nested_test_data_gva);
+
+ vcpu_args_set(vcpu, 1, nested_test_data_gva);
+
+ run = vcpu->run;
+
+ for (i = 0; i <= NR_BUS_LOCKS_PER_LEVEL * (1 + has_nested); i++) {
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+
+ if (run->exit_reason == KVM_EXIT_IO) {
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ goto done;
+ case UCALL_SYNC:
+ continue;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
+ }
+ }
+
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_X86_BUS_LOCK);
+
+ /*
+ * Verify the counter is actually getting incremented, e.g. that
+ * KVM isn't skipping the instruction. On Intel, the exit is
+ * trap-like, i.e. the counter should already have been
+ * incremented. On AMD, it's fault-like, i.e. the counter will
+ * be incremented when the guest re-executes the instruction.
+ */
+ sync_global_from_guest(vm, *val);
+ TEST_ASSERT_EQ(atomic_read(val), bus_locks + host_cpu_is_intel);
+
+ bus_locks++;
+ }
+ TEST_FAIL("Didn't receive UCALL_DONE, took %u bus lock exits\n", bus_locks);
+done:
+ TEST_ASSERT_EQ(i, bus_locks);
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/monitor_mwait_test.c b/tools/testing/selftests/kvm/x86/monitor_mwait_test.c
index 2b550eff35f1..e45c028d2a7e 100644
--- a/tools/testing/selftests/kvm/x86/monitor_mwait_test.c
+++ b/tools/testing/selftests/kvm/x86/monitor_mwait_test.c
@@ -7,6 +7,7 @@
#include "kvm_util.h"
#include "processor.h"
+#include "kselftest.h"
#define CPUID_MWAIT (1u << 3)
@@ -14,6 +15,8 @@ enum monitor_mwait_testcases {
MWAIT_QUIRK_DISABLED = BIT(0),
MISC_ENABLES_QUIRK_DISABLED = BIT(1),
MWAIT_DISABLED = BIT(2),
+ CPUID_DISABLED = BIT(3),
+ TEST_MAX = CPUID_DISABLED * 2 - 1,
};
/*
@@ -27,19 +30,27 @@ do { \
\
if (fault_wanted) \
__GUEST_ASSERT((vector) == UD_VECTOR, \
- "Expected #UD on " insn " for testcase '0x%x', got '0x%x'", \
- testcase, vector); \
+ "Expected #UD on " insn " for testcase '0x%x', got %s", \
+ testcase, ex_str(vector)); \
else \
__GUEST_ASSERT(!(vector), \
- "Expected success on " insn " for testcase '0x%x', got '0x%x'", \
- testcase, vector); \
+ "Expected success on " insn " for testcase '0x%x', got %s", \
+ testcase, ex_str(vector)); \
} while (0)
-static void guest_monitor_wait(int testcase)
+static void guest_monitor_wait(void *arg)
{
+ int testcase = (int) (long) arg;
u8 vector;
- GUEST_SYNC(testcase);
+ u64 val = rdmsr(MSR_IA32_MISC_ENABLE) & ~MSR_IA32_MISC_ENABLE_MWAIT;
+ if (!(testcase & MWAIT_DISABLED))
+ val |= MSR_IA32_MISC_ENABLE_MWAIT;
+ wrmsr(MSR_IA32_MISC_ENABLE, val);
+
+ __GUEST_ASSERT(this_cpu_has(X86_FEATURE_MWAIT) == !(testcase & MWAIT_DISABLED),
+ "Expected CPUID.MWAIT %s\n",
+ (testcase & MWAIT_DISABLED) ? "cleared" : "set");
/*
* Arbitrarily MONITOR this function, SVM performs fault checks before
@@ -50,19 +61,6 @@ static void guest_monitor_wait(int testcase)
vector = kvm_asm_safe("mwait", "a"(guest_monitor_wait), "c"(0), "d"(0));
GUEST_ASSERT_MONITOR_MWAIT("MWAIT", testcase, vector);
-}
-
-static void guest_code(void)
-{
- guest_monitor_wait(MWAIT_DISABLED);
-
- guest_monitor_wait(MWAIT_QUIRK_DISABLED | MWAIT_DISABLED);
-
- guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_DISABLED);
- guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED);
-
- guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_QUIRK_DISABLED | MWAIT_DISABLED);
- guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_QUIRK_DISABLED);
GUEST_DONE();
}
@@ -74,56 +72,65 @@ int main(int argc, char *argv[])
struct kvm_vm *vm;
struct ucall uc;
int testcase;
+ char test[80];
TEST_REQUIRE(this_cpu_has(X86_FEATURE_MWAIT));
TEST_REQUIRE(kvm_has_cap(KVM_CAP_DISABLE_QUIRKS2));
- vm = vm_create_with_one_vcpu(&vcpu, guest_code);
- vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_MWAIT);
+ ksft_print_header();
+ ksft_set_plan(12);
+ for (testcase = 0; testcase <= TEST_MAX; testcase++) {
+ vm = vm_create_with_one_vcpu(&vcpu, guest_monitor_wait);
+ vcpu_args_set(vcpu, 1, (void *)(long)testcase);
+
+ disabled_quirks = 0;
+ if (testcase & MWAIT_QUIRK_DISABLED) {
+ disabled_quirks |= KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS;
+ strcpy(test, "MWAIT can fault");
+ } else {
+ strcpy(test, "MWAIT never faults");
+ }
+ if (testcase & MISC_ENABLES_QUIRK_DISABLED) {
+ disabled_quirks |= KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT;
+ strcat(test, ", MISC_ENABLE updates CPUID");
+ } else {
+ strcat(test, ", no CPUID updates");
+ }
+
+ vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, disabled_quirks);
+
+ if (!(testcase & MISC_ENABLES_QUIRK_DISABLED) &&
+ (!!(testcase & CPUID_DISABLED) ^ !!(testcase & MWAIT_DISABLED)))
+ continue;
+
+ if (testcase & CPUID_DISABLED) {
+ strcat(test, ", CPUID clear");
+ vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_MWAIT);
+ } else {
+ strcat(test, ", CPUID set");
+ vcpu_set_cpuid_feature(vcpu, X86_FEATURE_MWAIT);
+ }
+
+ if (testcase & MWAIT_DISABLED)
+ strcat(test, ", MWAIT disabled");
- while (1) {
vcpu_run(vcpu);
TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
switch (get_ucall(vcpu, &uc)) {
- case UCALL_SYNC:
- testcase = uc.args[1];
- break;
case UCALL_ABORT:
- REPORT_GUEST_ASSERT(uc);
- goto done;
+ /* Detected in vcpu_run */
+ break;
case UCALL_DONE:
- goto done;
+ ksft_test_result_pass("%s\n", test);
+ break;
default:
TEST_FAIL("Unknown ucall %lu", uc.cmd);
- goto done;
- }
-
- disabled_quirks = 0;
- if (testcase & MWAIT_QUIRK_DISABLED)
- disabled_quirks |= KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS;
- if (testcase & MISC_ENABLES_QUIRK_DISABLED)
- disabled_quirks |= KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT;
- vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, disabled_quirks);
-
- /*
- * If the MISC_ENABLES quirk (KVM neglects to update CPUID to
- * enable/disable MWAIT) is disabled, toggle the ENABLE_MWAIT
- * bit in MISC_ENABLES accordingly. If the quirk is enabled,
- * the only valid configuration is MWAIT disabled, as CPUID
- * can't be manually changed after running the vCPU.
- */
- if (!(testcase & MISC_ENABLES_QUIRK_DISABLED)) {
- TEST_ASSERT(testcase & MWAIT_DISABLED,
- "Can't toggle CPUID features after running vCPU");
- continue;
+ break;
}
-
- vcpu_set_msr(vcpu, MSR_IA32_MISC_ENABLE,
- (testcase & MWAIT_DISABLED) ? 0 : MSR_IA32_MISC_ENABLE_MWAIT);
+ kvm_vm_free(vm);
}
+ ksft_finished();
-done:
- kvm_vm_free(vm);
return 0;
}
diff --git a/tools/testing/selftests/kvm/x86/msrs_test.c b/tools/testing/selftests/kvm/x86/msrs_test.c
new file mode 100644
index 000000000000..f7e39bf887ad
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/msrs_test.c
@@ -0,0 +1,489 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <asm/msr-index.h>
+
+#include <stdint.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+
+/* Use HYPERVISOR for MSRs that are emulated unconditionally (as is HYPERVISOR). */
+#define X86_FEATURE_NONE X86_FEATURE_HYPERVISOR
+
+struct kvm_msr {
+ const struct kvm_x86_cpu_feature feature;
+ const struct kvm_x86_cpu_feature feature2;
+ const char *name;
+ const u64 reset_val;
+ const u64 write_val;
+ const u64 rsvd_val;
+ const u32 index;
+ const bool is_kvm_defined;
+};
+
+#define ____MSR_TEST(msr, str, val, rsvd, reset, feat, f2, is_kvm) \
+{ \
+ .index = msr, \
+ .name = str, \
+ .write_val = val, \
+ .rsvd_val = rsvd, \
+ .reset_val = reset, \
+ .feature = X86_FEATURE_ ##feat, \
+ .feature2 = X86_FEATURE_ ##f2, \
+ .is_kvm_defined = is_kvm, \
+}
+
+#define __MSR_TEST(msr, str, val, rsvd, reset, feat) \
+ ____MSR_TEST(msr, str, val, rsvd, reset, feat, feat, false)
+
+#define MSR_TEST_NON_ZERO(msr, val, rsvd, reset, feat) \
+ __MSR_TEST(msr, #msr, val, rsvd, reset, feat)
+
+#define MSR_TEST(msr, val, rsvd, feat) \
+ __MSR_TEST(msr, #msr, val, rsvd, 0, feat)
+
+#define MSR_TEST2(msr, val, rsvd, feat, f2) \
+ ____MSR_TEST(msr, #msr, val, rsvd, 0, feat, f2, false)
+
+/*
+ * Note, use a page aligned value for the canonical value so that the value
+ * is compatible with MSRs that use bits 11:0 for things other than addresses.
+ */
+static const u64 canonical_val = 0x123456789000ull;
+
+/*
+ * Arbitrary value with bits set in every byte, but not all bits set. This is
+ * also a non-canonical value, but that's coincidental (any 64-bit value with
+ * an alternating 0s/1s pattern will be non-canonical).
+ */
+static const u64 u64_val = 0xaaaa5555aaaa5555ull;
+
+#define MSR_TEST_CANONICAL(msr, feat) \
+ __MSR_TEST(msr, #msr, canonical_val, NONCANONICAL, 0, feat)
+
+#define MSR_TEST_KVM(msr, val, rsvd, feat) \
+ ____MSR_TEST(KVM_REG_ ##msr, #msr, val, rsvd, 0, feat, feat, true)
+
+/*
+ * The main struct must be scoped to a function due to the use of structures to
+ * define features. For the global structure, allocate enough space for the
+ * foreseeable future without getting too ridiculous, to minimize maintenance
+ * costs (bumping the array size every time an MSR is added is really annoying).
+ */
+static struct kvm_msr msrs[128];
+static int idx;
+
+static bool ignore_unsupported_msrs;
+
+static u64 fixup_rdmsr_val(u32 msr, u64 want)
+{
+ /*
+ * AMD CPUs drop bits 63:32 on some MSRs that Intel CPUs support. KVM
+ * is supposed to emulate that behavior based on guest vendor model
+ * (which is the same as the host vendor model for this test).
+ */
+ if (!host_cpu_is_amd_compatible)
+ return want;
+
+ switch (msr) {
+ case MSR_IA32_SYSENTER_ESP:
+ case MSR_IA32_SYSENTER_EIP:
+ case MSR_TSC_AUX:
+ return want & GENMASK_ULL(31, 0);
+ default:
+ return want;
+ }
+}
+
+static void __rdmsr(u32 msr, u64 want)
+{
+ u64 val;
+ u8 vec;
+
+ vec = rdmsr_safe(msr, &val);
+ __GUEST_ASSERT(!vec, "Unexpected %s on RDMSR(0x%x)", ex_str(vec), msr);
+
+ __GUEST_ASSERT(val == want, "Wanted 0x%lx from RDMSR(0x%x), got 0x%lx",
+ want, msr, val);
+}
+
+static void __wrmsr(u32 msr, u64 val)
+{
+ u8 vec;
+
+ vec = wrmsr_safe(msr, val);
+ __GUEST_ASSERT(!vec, "Unexpected %s on WRMSR(0x%x, 0x%lx)",
+ ex_str(vec), msr, val);
+ __rdmsr(msr, fixup_rdmsr_val(msr, val));
+}
+
+static void guest_test_supported_msr(const struct kvm_msr *msr)
+{
+ __rdmsr(msr->index, msr->reset_val);
+ __wrmsr(msr->index, msr->write_val);
+ GUEST_SYNC(fixup_rdmsr_val(msr->index, msr->write_val));
+
+ __rdmsr(msr->index, msr->reset_val);
+}
+
+static void guest_test_unsupported_msr(const struct kvm_msr *msr)
+{
+ u64 val;
+ u8 vec;
+
+ /*
+ * KVM's ABI with respect to ignore_msrs is a mess and largely beyond
+ * repair, just skip the unsupported MSR tests.
+ */
+ if (ignore_unsupported_msrs)
+ goto skip_wrmsr_gp;
+
+ /*
+ * {S,U}_CET exist if IBT or SHSTK is supported, but with bits that are
+ * writable only if their associated feature is supported. Skip the
+ * RDMSR #GP test if the secondary feature is supported, but perform
+ * the WRMSR #GP test as the to-be-written value is tied to the primary
+ * feature. For all other MSRs, simply do nothing.
+ */
+ if (this_cpu_has(msr->feature2)) {
+ if (msr->index != MSR_IA32_U_CET &&
+ msr->index != MSR_IA32_S_CET)
+ goto skip_wrmsr_gp;
+
+ goto skip_rdmsr_gp;
+ }
+
+ vec = rdmsr_safe(msr->index, &val);
+ __GUEST_ASSERT(vec == GP_VECTOR, "Wanted #GP on RDMSR(0x%x), got %s",
+ msr->index, ex_str(vec));
+
+skip_rdmsr_gp:
+ vec = wrmsr_safe(msr->index, msr->write_val);
+ __GUEST_ASSERT(vec == GP_VECTOR, "Wanted #GP on WRMSR(0x%x, 0x%lx), got %s",
+ msr->index, msr->write_val, ex_str(vec));
+
+skip_wrmsr_gp:
+ GUEST_SYNC(0);
+}
+
+void guest_test_reserved_val(const struct kvm_msr *msr)
+{
+ /* Skip reserved value checks as well, ignore_msrs is trully a mess. */
+ if (ignore_unsupported_msrs)
+ return;
+
+ /*
+ * If the CPU will truncate the written value (e.g. SYSENTER on AMD),
+ * expect success and a truncated value, not #GP.
+ */
+ if ((!this_cpu_has(msr->feature) && !this_cpu_has(msr->feature2)) ||
+ msr->rsvd_val == fixup_rdmsr_val(msr->index, msr->rsvd_val)) {
+ u8 vec = wrmsr_safe(msr->index, msr->rsvd_val);
+
+ __GUEST_ASSERT(vec == GP_VECTOR,
+ "Wanted #GP on WRMSR(0x%x, 0x%lx), got %s",
+ msr->index, msr->rsvd_val, ex_str(vec));
+ } else {
+ __wrmsr(msr->index, msr->rsvd_val);
+ __wrmsr(msr->index, msr->reset_val);
+ }
+}
+
+static void guest_main(void)
+{
+ for (;;) {
+ const struct kvm_msr *msr = &msrs[READ_ONCE(idx)];
+
+ if (this_cpu_has(msr->feature))
+ guest_test_supported_msr(msr);
+ else
+ guest_test_unsupported_msr(msr);
+
+ if (msr->rsvd_val)
+ guest_test_reserved_val(msr);
+
+ GUEST_SYNC(msr->reset_val);
+ }
+}
+
+static bool has_one_reg;
+static bool use_one_reg;
+
+#define KVM_X86_MAX_NR_REGS 1
+
+static bool vcpu_has_reg(struct kvm_vcpu *vcpu, u64 reg)
+{
+ struct {
+ struct kvm_reg_list list;
+ u64 regs[KVM_X86_MAX_NR_REGS];
+ } regs = {};
+ int r, i;
+
+ /*
+ * If KVM_GET_REG_LIST succeeds with n=0, i.e. there are no supported
+ * regs, then the vCPU obviously doesn't support the reg.
+ */
+ r = __vcpu_ioctl(vcpu, KVM_GET_REG_LIST, &regs.list);
+ if (!r)
+ return false;
+
+ TEST_ASSERT_EQ(errno, E2BIG);
+
+ /*
+ * KVM x86 is expected to support enumerating a relative small number
+ * of regs. The majority of registers supported by KVM_{G,S}ET_ONE_REG
+ * are enumerated via other ioctls, e.g. KVM_GET_MSR_INDEX_LIST. For
+ * simplicity, hardcode the maximum number of regs and manually update
+ * the test as necessary.
+ */
+ TEST_ASSERT(regs.list.n <= KVM_X86_MAX_NR_REGS,
+ "KVM reports %llu regs, test expects at most %u regs, stale test?",
+ regs.list.n, KVM_X86_MAX_NR_REGS);
+
+ vcpu_ioctl(vcpu, KVM_GET_REG_LIST, &regs.list);
+ for (i = 0; i < regs.list.n; i++) {
+ if (regs.regs[i] == reg)
+ return true;
+ }
+
+ return false;
+}
+
+static void host_test_kvm_reg(struct kvm_vcpu *vcpu)
+{
+ bool has_reg = vcpu_cpuid_has(vcpu, msrs[idx].feature);
+ u64 reset_val = msrs[idx].reset_val;
+ u64 write_val = msrs[idx].write_val;
+ u64 rsvd_val = msrs[idx].rsvd_val;
+ u32 reg = msrs[idx].index;
+ u64 val;
+ int r;
+
+ if (!use_one_reg)
+ return;
+
+ TEST_ASSERT_EQ(vcpu_has_reg(vcpu, KVM_X86_REG_KVM(reg)), has_reg);
+
+ if (!has_reg) {
+ r = __vcpu_get_reg(vcpu, KVM_X86_REG_KVM(reg), &val);
+ TEST_ASSERT(r && errno == EINVAL,
+ "Expected failure on get_reg(0x%x)", reg);
+ rsvd_val = 0;
+ goto out;
+ }
+
+ val = vcpu_get_reg(vcpu, KVM_X86_REG_KVM(reg));
+ TEST_ASSERT(val == reset_val, "Wanted 0x%lx from get_reg(0x%x), got 0x%lx",
+ reset_val, reg, val);
+
+ vcpu_set_reg(vcpu, KVM_X86_REG_KVM(reg), write_val);
+ val = vcpu_get_reg(vcpu, KVM_X86_REG_KVM(reg));
+ TEST_ASSERT(val == write_val, "Wanted 0x%lx from get_reg(0x%x), got 0x%lx",
+ write_val, reg, val);
+
+out:
+ r = __vcpu_set_reg(vcpu, KVM_X86_REG_KVM(reg), rsvd_val);
+ TEST_ASSERT(r, "Expected failure on set_reg(0x%x, 0x%lx)", reg, rsvd_val);
+}
+
+static void host_test_msr(struct kvm_vcpu *vcpu, u64 guest_val)
+{
+ u64 reset_val = msrs[idx].reset_val;
+ u32 msr = msrs[idx].index;
+ u64 val;
+
+ if (!kvm_cpu_has(msrs[idx].feature))
+ return;
+
+ val = vcpu_get_msr(vcpu, msr);
+ TEST_ASSERT(val == guest_val, "Wanted 0x%lx from get_msr(0x%x), got 0x%lx",
+ guest_val, msr, val);
+
+ if (use_one_reg)
+ vcpu_set_reg(vcpu, KVM_X86_REG_MSR(msr), reset_val);
+ else
+ vcpu_set_msr(vcpu, msr, reset_val);
+
+ val = vcpu_get_msr(vcpu, msr);
+ TEST_ASSERT(val == reset_val, "Wanted 0x%lx from get_msr(0x%x), got 0x%lx",
+ reset_val, msr, val);
+
+ if (!has_one_reg)
+ return;
+
+ val = vcpu_get_reg(vcpu, KVM_X86_REG_MSR(msr));
+ TEST_ASSERT(val == reset_val, "Wanted 0x%lx from get_reg(0x%x), got 0x%lx",
+ reset_val, msr, val);
+}
+
+static void do_vcpu_run(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ for (;;) {
+ vcpu_run(vcpu);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ host_test_msr(vcpu, uc.args[1]);
+ return;
+ case UCALL_PRINTF:
+ pr_info("%s", uc.buffer);
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ case UCALL_DONE:
+ TEST_FAIL("Unexpected UCALL_DONE");
+ default:
+ TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
+ }
+ }
+}
+
+static void vcpus_run(struct kvm_vcpu **vcpus, const int NR_VCPUS)
+{
+ int i;
+
+ for (i = 0; i < NR_VCPUS; i++)
+ do_vcpu_run(vcpus[i]);
+}
+
+#define MISC_ENABLES_RESET_VAL (MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL | MSR_IA32_MISC_ENABLE_BTS_UNAVAIL)
+
+static void test_msrs(void)
+{
+ const struct kvm_msr __msrs[] = {
+ MSR_TEST_NON_ZERO(MSR_IA32_MISC_ENABLE,
+ MISC_ENABLES_RESET_VAL | MSR_IA32_MISC_ENABLE_FAST_STRING,
+ MSR_IA32_MISC_ENABLE_FAST_STRING, MISC_ENABLES_RESET_VAL, NONE),
+ MSR_TEST_NON_ZERO(MSR_IA32_CR_PAT, 0x07070707, 0, 0x7040600070406, NONE),
+
+ /*
+ * TSC_AUX is supported if RDTSCP *or* RDPID is supported. Add
+ * entries for each features so that TSC_AUX doesn't exists for
+ * the "unsupported" vCPU, and obviously to test both cases.
+ */
+ MSR_TEST2(MSR_TSC_AUX, 0x12345678, u64_val, RDTSCP, RDPID),
+ MSR_TEST2(MSR_TSC_AUX, 0x12345678, u64_val, RDPID, RDTSCP),
+
+ MSR_TEST(MSR_IA32_SYSENTER_CS, 0x1234, 0, NONE),
+ /*
+ * SYSENTER_{ESP,EIP} are technically non-canonical on Intel,
+ * but KVM doesn't emulate that behavior on emulated writes,
+ * i.e. this test will observe different behavior if the MSR
+ * writes are handed by hardware vs. KVM. KVM's behavior is
+ * intended (though far from ideal), so don't bother testing
+ * non-canonical values.
+ */
+ MSR_TEST(MSR_IA32_SYSENTER_ESP, canonical_val, 0, NONE),
+ MSR_TEST(MSR_IA32_SYSENTER_EIP, canonical_val, 0, NONE),
+
+ MSR_TEST_CANONICAL(MSR_FS_BASE, LM),
+ MSR_TEST_CANONICAL(MSR_GS_BASE, LM),
+ MSR_TEST_CANONICAL(MSR_KERNEL_GS_BASE, LM),
+ MSR_TEST_CANONICAL(MSR_LSTAR, LM),
+ MSR_TEST_CANONICAL(MSR_CSTAR, LM),
+ MSR_TEST(MSR_SYSCALL_MASK, 0xffffffff, 0, LM),
+
+ MSR_TEST2(MSR_IA32_S_CET, CET_SHSTK_EN, CET_RESERVED, SHSTK, IBT),
+ MSR_TEST2(MSR_IA32_S_CET, CET_ENDBR_EN, CET_RESERVED, IBT, SHSTK),
+ MSR_TEST2(MSR_IA32_U_CET, CET_SHSTK_EN, CET_RESERVED, SHSTK, IBT),
+ MSR_TEST2(MSR_IA32_U_CET, CET_ENDBR_EN, CET_RESERVED, IBT, SHSTK),
+ MSR_TEST_CANONICAL(MSR_IA32_PL0_SSP, SHSTK),
+ MSR_TEST(MSR_IA32_PL0_SSP, canonical_val, canonical_val | 1, SHSTK),
+ MSR_TEST_CANONICAL(MSR_IA32_PL1_SSP, SHSTK),
+ MSR_TEST(MSR_IA32_PL1_SSP, canonical_val, canonical_val | 1, SHSTK),
+ MSR_TEST_CANONICAL(MSR_IA32_PL2_SSP, SHSTK),
+ MSR_TEST(MSR_IA32_PL2_SSP, canonical_val, canonical_val | 1, SHSTK),
+ MSR_TEST_CANONICAL(MSR_IA32_PL3_SSP, SHSTK),
+ MSR_TEST(MSR_IA32_PL3_SSP, canonical_val, canonical_val | 1, SHSTK),
+
+ MSR_TEST_KVM(GUEST_SSP, canonical_val, NONCANONICAL, SHSTK),
+ };
+
+ const struct kvm_x86_cpu_feature feat_none = X86_FEATURE_NONE;
+ const struct kvm_x86_cpu_feature feat_lm = X86_FEATURE_LM;
+
+ /*
+ * Create three vCPUs, but run them on the same task, to validate KVM's
+ * context switching of MSR state. Don't pin the task to a pCPU to
+ * also validate KVM's handling of cross-pCPU migration. Use the full
+ * set of features for the first two vCPUs, but clear all features in
+ * third vCPU in order to test both positive and negative paths.
+ */
+ const int NR_VCPUS = 3;
+ struct kvm_vcpu *vcpus[NR_VCPUS];
+ struct kvm_vm *vm;
+ int i;
+
+ kvm_static_assert(sizeof(__msrs) <= sizeof(msrs));
+ kvm_static_assert(ARRAY_SIZE(__msrs) <= ARRAY_SIZE(msrs));
+ memcpy(msrs, __msrs, sizeof(__msrs));
+
+ ignore_unsupported_msrs = kvm_is_ignore_msrs();
+
+ vm = vm_create_with_vcpus(NR_VCPUS, guest_main, vcpus);
+
+ sync_global_to_guest(vm, msrs);
+ sync_global_to_guest(vm, ignore_unsupported_msrs);
+
+ /*
+ * Clear features in the "unsupported features" vCPU. This needs to be
+ * done before the first vCPU run as KVM's ABI is that guest CPUID is
+ * immutable once the vCPU has been run.
+ */
+ for (idx = 0; idx < ARRAY_SIZE(__msrs); idx++) {
+ /*
+ * Don't clear LM; selftests are 64-bit only, and KVM doesn't
+ * honor LM=0 for MSRs that are supposed to exist if and only
+ * if the vCPU is a 64-bit model. Ditto for NONE; clearing a
+ * fake feature flag will result in false failures.
+ */
+ if (memcmp(&msrs[idx].feature, &feat_lm, sizeof(feat_lm)) &&
+ memcmp(&msrs[idx].feature, &feat_none, sizeof(feat_none)))
+ vcpu_clear_cpuid_feature(vcpus[2], msrs[idx].feature);
+ }
+
+ for (idx = 0; idx < ARRAY_SIZE(__msrs); idx++) {
+ struct kvm_msr *msr = &msrs[idx];
+
+ if (msr->is_kvm_defined) {
+ for (i = 0; i < NR_VCPUS; i++)
+ host_test_kvm_reg(vcpus[i]);
+ continue;
+ }
+
+ /*
+ * Verify KVM_GET_SUPPORTED_CPUID and KVM_GET_MSR_INDEX_LIST
+ * are consistent with respect to MSRs whose existence is
+ * enumerated via CPUID. Skip the check for FS/GS.base MSRs,
+ * as they aren't reported in the save/restore list since their
+ * state is managed via SREGS.
+ */
+ TEST_ASSERT(msr->index == MSR_FS_BASE || msr->index == MSR_GS_BASE ||
+ kvm_msr_is_in_save_restore_list(msr->index) ==
+ (kvm_cpu_has(msr->feature) || kvm_cpu_has(msr->feature2)),
+ "%s %s in save/restore list, but %s according to CPUID", msr->name,
+ kvm_msr_is_in_save_restore_list(msr->index) ? "is" : "isn't",
+ (kvm_cpu_has(msr->feature) || kvm_cpu_has(msr->feature2)) ?
+ "supported" : "unsupported");
+
+ sync_global_to_guest(vm, idx);
+
+ vcpus_run(vcpus, NR_VCPUS);
+ vcpus_run(vcpus, NR_VCPUS);
+ }
+
+ kvm_vm_free(vm);
+}
+
+int main(void)
+{
+ has_one_reg = kvm_has_cap(KVM_CAP_ONE_REG);
+
+ test_msrs();
+
+ if (has_one_reg) {
+ use_one_reg = true;
+ test_msrs();
+ }
+}
diff --git a/tools/testing/selftests/kvm/x86/vmx_close_while_nested_test.c b/tools/testing/selftests/kvm/x86/nested_close_kvm_test.c
index dad988351493..f001cb836bfa 100644
--- a/tools/testing/selftests/kvm/x86/vmx_close_while_nested_test.c
+++ b/tools/testing/selftests/kvm/x86/nested_close_kvm_test.c
@@ -1,7 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * vmx_close_while_nested
- *
* Copyright (C) 2019, Red Hat, Inc.
*
* Verify that nothing bad happens if a KVM user exits with open
@@ -12,6 +10,7 @@
#include "kvm_util.h"
#include "processor.h"
#include "vmx.h"
+#include "svm_util.h"
#include <string.h>
#include <sys/ioctl.h>
@@ -22,6 +21,8 @@ enum {
PORT_L0_EXIT = 0x2000,
};
+#define L2_GUEST_STACK_SIZE 64
+
static void l2_guest_code(void)
{
/* Exit to L0 */
@@ -29,9 +30,8 @@ static void l2_guest_code(void)
: : [port] "d" (PORT_L0_EXIT) : "rax");
}
-static void l1_guest_code(struct vmx_pages *vmx_pages)
+static void l1_vmx_code(struct vmx_pages *vmx_pages)
{
-#define L2_GUEST_STACK_SIZE 64
unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
@@ -45,19 +45,43 @@ static void l1_guest_code(struct vmx_pages *vmx_pages)
GUEST_ASSERT(0);
}
+static void l1_svm_code(struct svm_test_data *svm)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+ /* Prepare the VMCB for L2 execution. */
+ generic_svm_setup(svm, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ run_guest(svm->vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT(0);
+}
+
+static void l1_guest_code(void *data)
+{
+ if (this_cpu_has(X86_FEATURE_VMX))
+ l1_vmx_code(data);
+ else
+ l1_svm_code(data);
+}
+
int main(int argc, char *argv[])
{
- vm_vaddr_t vmx_pages_gva;
+ vm_vaddr_t guest_gva;
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
- TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX) ||
+ kvm_cpu_has(X86_FEATURE_SVM));
vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
- /* Allocate VMX pages and shared descriptors (vmx_pages). */
- vcpu_alloc_vmx(vm, &vmx_pages_gva);
- vcpu_args_set(vcpu, 1, vmx_pages_gva);
+ if (kvm_cpu_has(X86_FEATURE_VMX))
+ vcpu_alloc_vmx(vm, &guest_gva);
+ else
+ vcpu_alloc_svm(vm, &guest_gva);
+
+ vcpu_args_set(vcpu, 1, guest_gva);
for (;;) {
volatile struct kvm_run *run = vcpu->run;
diff --git a/tools/testing/selftests/kvm/x86/nested_dirty_log_test.c b/tools/testing/selftests/kvm/x86/nested_dirty_log_test.c
new file mode 100644
index 000000000000..619229bbd693
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/nested_dirty_log_test.c
@@ -0,0 +1,293 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KVM dirty page logging test
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <linux/bitmap.h>
+#include <linux/bitops.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+#include "vmx.h"
+
+/* The memory slot index to track dirty pages */
+#define TEST_MEM_SLOT_INDEX 1
+
+/*
+ * Allocate four pages total. Two pages are used to verify that the KVM marks
+ * the accessed page/GFN as marked dirty, but not the "other" page. Times two
+ * so that each "normal" page can be accessed from L2 via an aliased L2 GVA+GPA
+ * (when TDP is enabled), to verify KVM marks _L1's_ page/GFN as dirty (to
+ * detect failures, L2 => L1 GPAs can't be identity mapped in the TDP page
+ * tables, as marking L2's GPA dirty would get a false pass if L1 == L2).
+ */
+#define TEST_MEM_PAGES 4
+
+#define TEST_MEM_BASE 0xc0000000
+#define TEST_MEM_ALIAS_BASE 0xc0002000
+
+#define TEST_GUEST_ADDR(base, idx) ((base) + (idx) * PAGE_SIZE)
+
+#define TEST_GVA(idx) TEST_GUEST_ADDR(TEST_MEM_BASE, idx)
+#define TEST_GPA(idx) TEST_GUEST_ADDR(TEST_MEM_BASE, idx)
+
+#define TEST_ALIAS_GPA(idx) TEST_GUEST_ADDR(TEST_MEM_ALIAS_BASE, idx)
+
+#define TEST_HVA(vm, idx) addr_gpa2hva(vm, TEST_GPA(idx))
+
+#define L2_GUEST_STACK_SIZE 64
+
+/* Use the page offset bits to communicate the access+fault type. */
+#define TEST_SYNC_READ_FAULT BIT(0)
+#define TEST_SYNC_WRITE_FAULT BIT(1)
+#define TEST_SYNC_NO_FAULT BIT(2)
+
+static void l2_guest_code(vm_vaddr_t base)
+{
+ vm_vaddr_t page0 = TEST_GUEST_ADDR(base, 0);
+ vm_vaddr_t page1 = TEST_GUEST_ADDR(base, 1);
+
+ READ_ONCE(*(u64 *)page0);
+ GUEST_SYNC(page0 | TEST_SYNC_READ_FAULT);
+ WRITE_ONCE(*(u64 *)page0, 1);
+ GUEST_SYNC(page0 | TEST_SYNC_WRITE_FAULT);
+ READ_ONCE(*(u64 *)page0);
+ GUEST_SYNC(page0 | TEST_SYNC_NO_FAULT);
+
+ WRITE_ONCE(*(u64 *)page1, 1);
+ GUEST_SYNC(page1 | TEST_SYNC_WRITE_FAULT);
+ WRITE_ONCE(*(u64 *)page1, 1);
+ GUEST_SYNC(page1 | TEST_SYNC_WRITE_FAULT);
+ READ_ONCE(*(u64 *)page1);
+ GUEST_SYNC(page1 | TEST_SYNC_NO_FAULT);
+
+ /* Exit to L1 and never come back. */
+ vmcall();
+}
+
+static void l2_guest_code_tdp_enabled(void)
+{
+ /*
+ * Use the aliased virtual addresses when running with TDP to verify
+ * that KVM correctly handles the case where a page is dirtied via a
+ * different GPA than would be used by L1.
+ */
+ l2_guest_code(TEST_MEM_ALIAS_BASE);
+}
+
+static void l2_guest_code_tdp_disabled(void)
+{
+ /*
+ * Use the "normal" virtual addresses when running without TDP enabled,
+ * in which case L2 will use the same page tables as L1, and thus needs
+ * to use the same virtual addresses that are mapped into L1.
+ */
+ l2_guest_code(TEST_MEM_BASE);
+}
+
+void l1_vmx_code(struct vmx_pages *vmx)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ void *l2_rip;
+
+ GUEST_ASSERT(vmx->vmcs_gpa);
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx));
+ GUEST_ASSERT(load_vmcs(vmx));
+
+ if (vmx->eptp_gpa)
+ l2_rip = l2_guest_code_tdp_enabled;
+ else
+ l2_rip = l2_guest_code_tdp_disabled;
+
+ prepare_vmcs(vmx, l2_rip, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ GUEST_SYNC(TEST_SYNC_NO_FAULT);
+ GUEST_ASSERT(!vmlaunch());
+ GUEST_SYNC(TEST_SYNC_NO_FAULT);
+ GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_VMCALL);
+ GUEST_DONE();
+}
+
+static void l1_svm_code(struct svm_test_data *svm)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ void *l2_rip;
+
+ if (svm->ncr3_gpa)
+ l2_rip = l2_guest_code_tdp_enabled;
+ else
+ l2_rip = l2_guest_code_tdp_disabled;
+
+ generic_svm_setup(svm, l2_rip, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ GUEST_SYNC(TEST_SYNC_NO_FAULT);
+ run_guest(svm->vmcb, svm->vmcb_gpa);
+ GUEST_SYNC(TEST_SYNC_NO_FAULT);
+ GUEST_ASSERT_EQ(svm->vmcb->control.exit_code, SVM_EXIT_VMMCALL);
+ GUEST_DONE();
+}
+
+static void l1_guest_code(void *data)
+{
+ if (this_cpu_has(X86_FEATURE_VMX))
+ l1_vmx_code(data);
+ else
+ l1_svm_code(data);
+}
+
+static void test_handle_ucall_sync(struct kvm_vm *vm, u64 arg,
+ unsigned long *bmap)
+{
+ vm_vaddr_t gva = arg & ~(PAGE_SIZE - 1);
+ int page_nr, i;
+
+ /*
+ * Extract the page number of underlying physical page, which is also
+ * the _L1_ page number. The dirty bitmap _must_ be updated based on
+ * the L1 GPA, not L2 GPA, i.e. whether or not L2 used an aliased GPA
+ * (i.e. if TDP enabled for L2) is irrelevant with respect to the dirty
+ * bitmap and which underlying physical page is accessed.
+ *
+ * Note, gva will be '0' if there was no access, i.e. if the purpose of
+ * the sync is to verify all pages are clean.
+ */
+ if (!gva)
+ page_nr = 0;
+ else if (gva >= TEST_MEM_ALIAS_BASE)
+ page_nr = (gva - TEST_MEM_ALIAS_BASE) >> PAGE_SHIFT;
+ else
+ page_nr = (gva - TEST_MEM_BASE) >> PAGE_SHIFT;
+ TEST_ASSERT(page_nr == 0 || page_nr == 1,
+ "Test bug, unexpected frame number '%u' for arg = %lx", page_nr, arg);
+ TEST_ASSERT(gva || (arg & TEST_SYNC_NO_FAULT),
+ "Test bug, gva must be valid if a fault is expected");
+
+ kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap);
+
+ /*
+ * Check all pages to verify the correct physical page was modified (or
+ * not), and that all pages are clean/dirty as expected.
+ *
+ * If a fault of any kind is expected, the target page should be dirty
+ * as the Dirty bit is set in the gPTE. KVM should create a writable
+ * SPTE even on a read fault, *and* KVM must mark the GFN as dirty
+ * when doing so.
+ */
+ for (i = 0; i < TEST_MEM_PAGES; i++) {
+ if (i == page_nr && (arg & TEST_SYNC_WRITE_FAULT))
+ TEST_ASSERT(*(u64 *)TEST_HVA(vm, i) == 1,
+ "Page %u incorrectly not written by guest", i);
+ else
+ TEST_ASSERT(*(u64 *)TEST_HVA(vm, i) == 0xaaaaaaaaaaaaaaaaULL,
+ "Page %u incorrectly written by guest", i);
+
+ if (i == page_nr && !(arg & TEST_SYNC_NO_FAULT))
+ TEST_ASSERT(test_bit(i, bmap),
+ "Page %u incorrectly reported clean on %s fault",
+ i, arg & TEST_SYNC_READ_FAULT ? "read" : "write");
+ else
+ TEST_ASSERT(!test_bit(i, bmap),
+ "Page %u incorrectly reported dirty", i);
+ }
+}
+
+static void test_dirty_log(bool nested_tdp)
+{
+ vm_vaddr_t nested_gva = 0;
+ unsigned long *bmap;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct ucall uc;
+ bool done = false;
+
+ pr_info("Nested TDP: %s\n", nested_tdp ? "enabled" : "disabled");
+
+ /* Create VM */
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+ if (nested_tdp)
+ vm_enable_tdp(vm);
+
+ if (kvm_cpu_has(X86_FEATURE_VMX))
+ vcpu_alloc_vmx(vm, &nested_gva);
+ else
+ vcpu_alloc_svm(vm, &nested_gva);
+
+ vcpu_args_set(vcpu, 1, nested_gva);
+
+ /* Add an extra memory slot for testing dirty logging */
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+ TEST_MEM_BASE,
+ TEST_MEM_SLOT_INDEX,
+ TEST_MEM_PAGES,
+ KVM_MEM_LOG_DIRTY_PAGES);
+
+ /*
+ * Add an identity map for GVA range [0xc0000000, 0xc0004000). This
+ * affects both L1 and L2. However...
+ */
+ virt_map(vm, TEST_MEM_BASE, TEST_MEM_BASE, TEST_MEM_PAGES);
+
+ /*
+ * ... pages in the L2 GPA address range [0xc0002000, 0xc0004000) will
+ * map to [0xc0000000, 0xc0002000) when TDP is enabled (for L2).
+ *
+ * When TDP is disabled, the L2 guest code will still access the same L1
+ * GPAs as the TDP enabled case.
+ *
+ * Set the Dirty bit in the PTEs used by L2 so that KVM will create
+ * writable SPTEs when handling read faults (if the Dirty bit isn't
+ * set, KVM must intercept the next write to emulate the Dirty bit
+ * update).
+ */
+ if (nested_tdp) {
+ tdp_identity_map_default_memslots(vm);
+ tdp_map(vm, TEST_ALIAS_GPA(0), TEST_GPA(0), PAGE_SIZE);
+ tdp_map(vm, TEST_ALIAS_GPA(1), TEST_GPA(1), PAGE_SIZE);
+
+ *tdp_get_pte(vm, TEST_ALIAS_GPA(0)) |= PTE_DIRTY_MASK(&vm->stage2_mmu);
+ *tdp_get_pte(vm, TEST_ALIAS_GPA(1)) |= PTE_DIRTY_MASK(&vm->stage2_mmu);
+ } else {
+ *vm_get_pte(vm, TEST_GVA(0)) |= PTE_DIRTY_MASK(&vm->mmu);
+ *vm_get_pte(vm, TEST_GVA(1)) |= PTE_DIRTY_MASK(&vm->mmu);
+ }
+
+ bmap = bitmap_zalloc(TEST_MEM_PAGES);
+
+ while (!done) {
+ memset(TEST_HVA(vm, 0), 0xaa, TEST_MEM_PAGES * PAGE_SIZE);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ /* NOT REACHED */
+ case UCALL_SYNC:
+ test_handle_ucall_sync(vm, uc.args[1], bmap);
+ break;
+ case UCALL_DONE:
+ done = true;
+ break;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX) || kvm_cpu_has(X86_FEATURE_SVM));
+
+ test_dirty_log(/*nested_tdp=*/false);
+
+ if (kvm_cpu_has_tdp())
+ test_dirty_log(/*nested_tdp=*/true);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/nested_invalid_cr3_test.c b/tools/testing/selftests/kvm/x86/nested_invalid_cr3_test.c
new file mode 100644
index 000000000000..a6b6da9cf7fe
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/nested_invalid_cr3_test.c
@@ -0,0 +1,116 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2025, Google LLC.
+ *
+ * This test verifies that L1 fails to enter L2 with an invalid CR3, and
+ * succeeds otherwise.
+ */
+#include "kvm_util.h"
+#include "vmx.h"
+#include "svm_util.h"
+#include "kselftest.h"
+
+
+#define L2_GUEST_STACK_SIZE 64
+
+static void l2_guest_code(void)
+{
+ vmcall();
+}
+
+static void l1_svm_code(struct svm_test_data *svm)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ uintptr_t save_cr3;
+
+ generic_svm_setup(svm, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ /* Try to run L2 with invalid CR3 and make sure it fails */
+ save_cr3 = svm->vmcb->save.cr3;
+ svm->vmcb->save.cr3 = -1ull;
+ run_guest(svm->vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT(svm->vmcb->control.exit_code == SVM_EXIT_ERR);
+
+ /* Now restore CR3 and make sure L2 runs successfully */
+ svm->vmcb->save.cr3 = save_cr3;
+ run_guest(svm->vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT(svm->vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+
+ GUEST_DONE();
+}
+
+static void l1_vmx_code(struct vmx_pages *vmx_pages)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ uintptr_t save_cr3;
+
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+ GUEST_ASSERT(load_vmcs(vmx_pages));
+
+ prepare_vmcs(vmx_pages, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ /* Try to run L2 with invalid CR3 and make sure it fails */
+ save_cr3 = vmreadz(GUEST_CR3);
+ vmwrite(GUEST_CR3, -1ull);
+ GUEST_ASSERT(!vmlaunch());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) ==
+ (EXIT_REASON_FAILED_VMENTRY | EXIT_REASON_INVALID_STATE));
+
+ /* Now restore CR3 and make sure L2 runs successfully */
+ vmwrite(GUEST_CR3, save_cr3);
+ GUEST_ASSERT(!vmlaunch());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+ GUEST_DONE();
+}
+
+static void l1_guest_code(void *data)
+{
+ if (this_cpu_has(X86_FEATURE_VMX))
+ l1_vmx_code(data);
+ else
+ l1_svm_code(data);
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ vm_vaddr_t guest_gva = 0;
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX) ||
+ kvm_cpu_has(X86_FEATURE_SVM));
+
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+
+ if (kvm_cpu_has(X86_FEATURE_VMX))
+ vcpu_alloc_vmx(vm, &guest_gva);
+ else
+ vcpu_alloc_svm(vm, &guest_gva);
+
+ vcpu_args_set(vcpu, 1, guest_gva);
+
+ for (;;) {
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ case UCALL_SYNC:
+ break;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+ }
+
+done:
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/vmx_set_nested_state_test.c b/tools/testing/selftests/kvm/x86/nested_set_state_test.c
index 67a62a5a8895..0f2102b43629 100644
--- a/tools/testing/selftests/kvm/x86/vmx_set_nested_state_test.c
+++ b/tools/testing/selftests/kvm/x86/nested_set_state_test.c
@@ -1,7 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * vmx_set_nested_state_test
- *
* Copyright (C) 2019, Google LLC.
*
* This test verifies the integrity of calling the ioctl KVM_SET_NESTED_STATE.
@@ -11,6 +9,7 @@
#include "kvm_util.h"
#include "processor.h"
#include "vmx.h"
+#include "svm_util.h"
#include <errno.h>
#include <linux/kvm.h>
@@ -241,8 +240,108 @@ void test_vmx_nested_state(struct kvm_vcpu *vcpu)
TEST_ASSERT(state->size >= sizeof(*state) && state->size <= state_sz,
"Size must be between %ld and %d. The size returned was %d.",
sizeof(*state), state_sz, state->size);
- TEST_ASSERT(state->hdr.vmx.vmxon_pa == -1ull, "vmxon_pa must be -1ull.");
- TEST_ASSERT(state->hdr.vmx.vmcs12_pa == -1ull, "vmcs_pa must be -1ull.");
+
+ TEST_ASSERT_EQ(state->hdr.vmx.vmxon_pa, -1ull);
+ TEST_ASSERT_EQ(state->hdr.vmx.vmcs12_pa, -1ull);
+ TEST_ASSERT_EQ(state->flags, 0);
+
+ free(state);
+}
+
+static void vcpu_efer_enable_svm(struct kvm_vcpu *vcpu)
+{
+ uint64_t old_efer = vcpu_get_msr(vcpu, MSR_EFER);
+
+ vcpu_set_msr(vcpu, MSR_EFER, old_efer | EFER_SVME);
+}
+
+static void vcpu_efer_disable_svm(struct kvm_vcpu *vcpu)
+{
+ uint64_t old_efer = vcpu_get_msr(vcpu, MSR_EFER);
+
+ vcpu_set_msr(vcpu, MSR_EFER, old_efer & ~EFER_SVME);
+}
+
+void set_default_svm_state(struct kvm_nested_state *state, int size)
+{
+ memset(state, 0, size);
+ state->format = 1;
+ state->size = size;
+ state->hdr.svm.vmcb_pa = 0x3000;
+}
+
+void test_svm_nested_state(struct kvm_vcpu *vcpu)
+{
+ /* Add a page for VMCB. */
+ const int state_sz = sizeof(struct kvm_nested_state) + getpagesize();
+ struct kvm_nested_state *state =
+ (struct kvm_nested_state *)malloc(state_sz);
+
+ vcpu_set_cpuid_feature(vcpu, X86_FEATURE_SVM);
+
+ /* The format must be set to 1. 0 for VMX, 1 for SVM. */
+ set_default_svm_state(state, state_sz);
+ state->format = 0;
+ test_nested_state_expect_einval(vcpu, state);
+
+ /* Invalid flags are rejected, KVM_STATE_NESTED_EVMCS is VMX-only */
+ set_default_svm_state(state, state_sz);
+ state->flags = KVM_STATE_NESTED_EVMCS;
+ test_nested_state_expect_einval(vcpu, state);
+
+ /*
+ * If EFER.SVME is clear, guest mode is disallowed and GIF can be set or
+ * cleared.
+ */
+ vcpu_efer_disable_svm(vcpu);
+
+ set_default_svm_state(state, state_sz);
+ state->flags = KVM_STATE_NESTED_GUEST_MODE;
+ test_nested_state_expect_einval(vcpu, state);
+
+ state->flags = 0;
+ test_nested_state(vcpu, state);
+
+ state->flags = KVM_STATE_NESTED_GIF_SET;
+ test_nested_state(vcpu, state);
+
+ /* Enable SVM in the guest EFER. */
+ vcpu_efer_enable_svm(vcpu);
+
+ /* Setting vmcb_pa to a non-aligned address is only fine when not entering guest mode */
+ set_default_svm_state(state, state_sz);
+ state->hdr.svm.vmcb_pa = -1ull;
+ state->flags = 0;
+ test_nested_state(vcpu, state);
+ state->flags = KVM_STATE_NESTED_GUEST_MODE;
+ test_nested_state_expect_einval(vcpu, state);
+
+ /*
+ * Size must be large enough to fit kvm_nested_state and VMCB
+ * only when entering guest mode.
+ */
+ set_default_svm_state(state, state_sz/2);
+ state->flags = 0;
+ test_nested_state(vcpu, state);
+ state->flags = KVM_STATE_NESTED_GUEST_MODE;
+ test_nested_state_expect_einval(vcpu, state);
+
+ /*
+ * Test that if we leave nesting the state reflects that when we get it
+ * again, except for vmcb_pa, which is always returned as 0 when not in
+ * guest mode.
+ */
+ set_default_svm_state(state, state_sz);
+ state->hdr.svm.vmcb_pa = -1ull;
+ state->flags = KVM_STATE_NESTED_GIF_SET;
+ test_nested_state(vcpu, state);
+ vcpu_nested_state_get(vcpu, state);
+ TEST_ASSERT(state->size >= sizeof(*state) && state->size <= state_sz,
+ "Size must be between %ld and %d. The size returned was %d.",
+ sizeof(*state), state_sz, state->size);
+
+ TEST_ASSERT_EQ(state->hdr.svm.vmcb_pa, 0);
+ TEST_ASSERT_EQ(state->flags, KVM_STATE_NESTED_GIF_SET);
free(state);
}
@@ -255,20 +354,20 @@ int main(int argc, char *argv[])
have_evmcs = kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS);
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX) ||
+ kvm_cpu_has(X86_FEATURE_SVM));
TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE));
- /*
- * AMD currently does not implement set_nested_state, so for now we
- * just early out.
- */
- TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
-
vm = vm_create_with_one_vcpu(&vcpu, NULL);
/*
- * First run tests with VMX disabled to check error handling.
+ * First run tests with VMX/SVM disabled to check error handling.
+ * test_{vmx/svm}_nested_state() will re-enable as needed.
*/
- vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_VMX);
+ if (kvm_cpu_has(X86_FEATURE_VMX))
+ vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_VMX);
+ else
+ vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_SVM);
/* Passing a NULL kvm_nested_state causes a EFAULT. */
test_nested_state_expect_efault(vcpu, NULL);
@@ -297,7 +396,10 @@ int main(int argc, char *argv[])
state.flags = KVM_STATE_NESTED_RUN_PENDING;
test_nested_state_expect_einval(vcpu, &state);
- test_vmx_nested_state(vcpu);
+ if (kvm_cpu_has(X86_FEATURE_VMX))
+ test_vmx_nested_state(vcpu);
+ else
+ test_svm_nested_state(vcpu);
kvm_vm_free(vm);
return 0;
diff --git a/tools/testing/selftests/kvm/x86/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/x86/nested_tsc_adjust_test.c
index 2ceb5c78c442..2839f650e5c9 100644
--- a/tools/testing/selftests/kvm/x86/vmx_tsc_adjust_test.c
+++ b/tools/testing/selftests/kvm/x86/nested_tsc_adjust_test.c
@@ -1,7 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * vmx_tsc_adjust_test
- *
* Copyright (C) 2018, Google LLC.
*
* IA32_TSC_ADJUST test
@@ -22,6 +20,7 @@
#include "kvm_util.h"
#include "processor.h"
#include "vmx.h"
+#include "svm_util.h"
#include <string.h>
#include <sys/ioctl.h>
@@ -35,6 +34,8 @@
#define TSC_ADJUST_VALUE (1ll << 32)
#define TSC_OFFSET_VALUE -(1ll << 48)
+#define L2_GUEST_STACK_SIZE 64
+
enum {
PORT_ABORT = 0x1000,
PORT_REPORT,
@@ -72,42 +73,47 @@ static void l2_guest_code(void)
__asm__ __volatile__("vmcall");
}
-static void l1_guest_code(struct vmx_pages *vmx_pages)
+static void l1_guest_code(void *data)
{
-#define L2_GUEST_STACK_SIZE 64
unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
- uint32_t control;
- uintptr_t save_cr3;
+ /* Set TSC from L1 and make sure TSC_ADJUST is updated correctly */
GUEST_ASSERT(rdtsc() < TSC_ADJUST_VALUE);
wrmsr(MSR_IA32_TSC, rdtsc() - TSC_ADJUST_VALUE);
check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE);
- GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
- GUEST_ASSERT(load_vmcs(vmx_pages));
-
- /* Prepare the VMCS for L2 execution. */
- prepare_vmcs(vmx_pages, l2_guest_code,
- &l2_guest_stack[L2_GUEST_STACK_SIZE]);
- control = vmreadz(CPU_BASED_VM_EXEC_CONTROL);
- control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETTING;
- vmwrite(CPU_BASED_VM_EXEC_CONTROL, control);
- vmwrite(TSC_OFFSET, TSC_OFFSET_VALUE);
-
- /* Jump into L2. First, test failure to load guest CR3. */
- save_cr3 = vmreadz(GUEST_CR3);
- vmwrite(GUEST_CR3, -1ull);
- GUEST_ASSERT(!vmlaunch());
- GUEST_ASSERT(vmreadz(VM_EXIT_REASON) ==
- (EXIT_REASON_FAILED_VMENTRY | EXIT_REASON_INVALID_STATE));
- check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE);
- vmwrite(GUEST_CR3, save_cr3);
-
- GUEST_ASSERT(!vmlaunch());
- GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+ /*
+ * Run L2 with TSC_OFFSET. L2 will write to TSC, and L1 is not
+ * intercepting the write so it should update L1's TSC_ADJUST.
+ */
+ if (this_cpu_has(X86_FEATURE_VMX)) {
+ struct vmx_pages *vmx_pages = data;
+ uint32_t control;
+
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+ GUEST_ASSERT(load_vmcs(vmx_pages));
+
+ prepare_vmcs(vmx_pages, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+ control = vmreadz(CPU_BASED_VM_EXEC_CONTROL);
+ control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETTING;
+ vmwrite(CPU_BASED_VM_EXEC_CONTROL, control);
+ vmwrite(TSC_OFFSET, TSC_OFFSET_VALUE);
+
+ GUEST_ASSERT(!vmlaunch());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+ } else {
+ struct svm_test_data *svm = data;
+
+ generic_svm_setup(svm, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ svm->vmcb->control.tsc_offset = TSC_OFFSET_VALUE;
+ run_guest(svm->vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT(svm->vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+ }
check_ia32_tsc_adjust(-2 * TSC_ADJUST_VALUE);
-
GUEST_DONE();
}
@@ -119,16 +125,19 @@ static void report(int64_t val)
int main(int argc, char *argv[])
{
- vm_vaddr_t vmx_pages_gva;
+ vm_vaddr_t nested_gva;
struct kvm_vcpu *vcpu;
- TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX) ||
+ kvm_cpu_has(X86_FEATURE_SVM));
- vm = vm_create_with_one_vcpu(&vcpu, (void *) l1_guest_code);
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+ if (kvm_cpu_has(X86_FEATURE_VMX))
+ vcpu_alloc_vmx(vm, &nested_gva);
+ else
+ vcpu_alloc_svm(vm, &nested_gva);
- /* Allocate VMX pages and shared descriptors (vmx_pages). */
- vcpu_alloc_vmx(vm, &vmx_pages_gva);
- vcpu_args_set(vcpu, 1, vmx_pages_gva);
+ vcpu_args_set(vcpu, 1, nested_gva);
for (;;) {
struct ucall uc;
diff --git a/tools/testing/selftests/kvm/x86/vmx_nested_tsc_scaling_test.c b/tools/testing/selftests/kvm/x86/nested_tsc_scaling_test.c
index 1759fa5cb3f2..4260c9e4f489 100644
--- a/tools/testing/selftests/kvm/x86/vmx_nested_tsc_scaling_test.c
+++ b/tools/testing/selftests/kvm/x86/nested_tsc_scaling_test.c
@@ -13,6 +13,7 @@
#include "kvm_util.h"
#include "vmx.h"
+#include "svm_util.h"
#include "kselftest.h"
/* L2 is scaled up (from L1's perspective) by this factor */
@@ -79,7 +80,30 @@ static void l2_guest_code(void)
__asm__ __volatile__("vmcall");
}
-static void l1_guest_code(struct vmx_pages *vmx_pages)
+static void l1_svm_code(struct svm_test_data *svm)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+ /* check that L1's frequency looks alright before launching L2 */
+ check_tsc_freq(UCHECK_L1);
+
+ generic_svm_setup(svm, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ /* enable TSC scaling for L2 */
+ wrmsr(MSR_AMD64_TSC_RATIO, L2_SCALE_FACTOR << 32);
+
+ /* launch L2 */
+ run_guest(svm->vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT(svm->vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+
+ /* check that L1's frequency still looks good */
+ check_tsc_freq(UCHECK_L1);
+
+ GUEST_DONE();
+}
+
+static void l1_vmx_code(struct vmx_pages *vmx_pages)
{
unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
uint32_t control;
@@ -116,11 +140,19 @@ static void l1_guest_code(struct vmx_pages *vmx_pages)
GUEST_DONE();
}
+static void l1_guest_code(void *data)
+{
+ if (this_cpu_has(X86_FEATURE_VMX))
+ l1_vmx_code(data);
+ else
+ l1_svm_code(data);
+}
+
int main(int argc, char *argv[])
{
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
- vm_vaddr_t vmx_pages_gva;
+ vm_vaddr_t guest_gva = 0;
uint64_t tsc_start, tsc_end;
uint64_t tsc_khz;
@@ -129,7 +161,8 @@ int main(int argc, char *argv[])
uint64_t l1_tsc_freq = 0;
uint64_t l2_tsc_freq = 0;
- TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX) ||
+ kvm_cpu_has(X86_FEATURE_SVM));
TEST_REQUIRE(kvm_has_cap(KVM_CAP_TSC_CONTROL));
TEST_REQUIRE(sys_clocksource_is_based_on_tsc());
@@ -152,8 +185,13 @@ int main(int argc, char *argv[])
printf("real TSC frequency is around: %"PRIu64"\n", l0_tsc_freq);
vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
- vcpu_alloc_vmx(vm, &vmx_pages_gva);
- vcpu_args_set(vcpu, 1, vmx_pages_gva);
+
+ if (kvm_cpu_has(X86_FEATURE_VMX))
+ vcpu_alloc_vmx(vm, &guest_gva);
+ else
+ vcpu_alloc_svm(vm, &guest_gva);
+
+ vcpu_args_set(vcpu, 1, guest_gva);
tsc_khz = __vcpu_ioctl(vcpu, KVM_GET_TSC_KHZ, NULL);
TEST_ASSERT(tsc_khz != -1, "vcpu ioctl KVM_GET_TSC_KHZ failed");
diff --git a/tools/testing/selftests/kvm/x86/nested_vmsave_vmload_test.c b/tools/testing/selftests/kvm/x86/nested_vmsave_vmload_test.c
new file mode 100644
index 000000000000..71717118d692
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/nested_vmsave_vmload_test.c
@@ -0,0 +1,197 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2026, Google LLC.
+ */
+#include "kvm_util.h"
+#include "vmx.h"
+#include "svm_util.h"
+#include "kselftest.h"
+
+/*
+ * Allocate two VMCB pages for testing. Both pages have different GVAs (shared
+ * by both L1 and L2) and L1 GPAs. A single L2 GPA is used such that:
+ * - L2 GPA == L1 GPA for VMCB0.
+ * - L2 GPA is mapped to L1 GPA for VMCB1 using NPT in L1.
+ *
+ * This allows testing whether the GPA used by VMSAVE/VMLOAD in L2 is
+ * interpreted as a direct L1 GPA or translated using NPT as an L2 GPA, depends
+ * on which VMCB is accessed.
+ */
+#define TEST_MEM_SLOT_INDEX 1
+#define TEST_MEM_PAGES 2
+#define TEST_MEM_BASE 0xc0000000
+
+#define TEST_GUEST_ADDR(idx) (TEST_MEM_BASE + (idx) * PAGE_SIZE)
+
+#define TEST_VMCB_L1_GPA(idx) TEST_GUEST_ADDR(idx)
+#define TEST_VMCB_GVA(idx) TEST_GUEST_ADDR(idx)
+
+#define TEST_VMCB_L2_GPA TEST_VMCB_L1_GPA(0)
+
+#define L2_GUEST_STACK_SIZE 64
+
+static void l2_guest_code_vmsave(void)
+{
+ asm volatile("vmsave %0" : : "a"(TEST_VMCB_L2_GPA) : "memory");
+}
+
+static void l2_guest_code_vmload(void)
+{
+ asm volatile("vmload %0" : : "a"(TEST_VMCB_L2_GPA) : "memory");
+}
+
+static void l2_guest_code_vmcb(int vmcb_idx)
+{
+ wrmsr(MSR_KERNEL_GS_BASE, 0xaaaa);
+ l2_guest_code_vmsave();
+
+ /* Verify the VMCB used by VMSAVE and update KERNEL_GS_BASE to 0xbbbb */
+ GUEST_SYNC(vmcb_idx);
+
+ l2_guest_code_vmload();
+ GUEST_ASSERT_EQ(rdmsr(MSR_KERNEL_GS_BASE), 0xbbbb);
+
+ /* Reset MSR_KERNEL_GS_BASE */
+ wrmsr(MSR_KERNEL_GS_BASE, 0);
+ l2_guest_code_vmsave();
+
+ vmmcall();
+}
+
+static void l2_guest_code_vmcb0(void)
+{
+ l2_guest_code_vmcb(0);
+}
+
+static void l2_guest_code_vmcb1(void)
+{
+ l2_guest_code_vmcb(1);
+}
+
+static void l1_guest_code(struct svm_test_data *svm)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+ /* Each test case initializes the guest RIP below */
+ generic_svm_setup(svm, NULL, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ /* Set VMSAVE/VMLOAD intercepts and make sure they work with.. */
+ svm->vmcb->control.intercept |= (BIT_ULL(INTERCEPT_VMSAVE) |
+ BIT_ULL(INTERCEPT_VMLOAD));
+
+ /* ..SVM_MISC2_ENABLE_V_VMLOAD_VMSAVE cleared.. */
+ svm->vmcb->control.misc_ctl2 &= ~SVM_MISC2_ENABLE_V_VMLOAD_VMSAVE;
+
+ svm->vmcb->save.rip = (u64)l2_guest_code_vmsave;
+ run_guest(svm->vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT_EQ(svm->vmcb->control.exit_code, SVM_EXIT_VMSAVE);
+
+ svm->vmcb->save.rip = (u64)l2_guest_code_vmload;
+ run_guest(svm->vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT_EQ(svm->vmcb->control.exit_code, SVM_EXIT_VMLOAD);
+
+ /* ..and SVM_MISC2_ENABLE_V_VMLOAD_VMSAVE set */
+ svm->vmcb->control.misc_ctl2 |= SVM_MISC2_ENABLE_V_VMLOAD_VMSAVE;
+
+ svm->vmcb->save.rip = (u64)l2_guest_code_vmsave;
+ run_guest(svm->vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT_EQ(svm->vmcb->control.exit_code, SVM_EXIT_VMSAVE);
+
+ svm->vmcb->save.rip = (u64)l2_guest_code_vmload;
+ run_guest(svm->vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT_EQ(svm->vmcb->control.exit_code, SVM_EXIT_VMLOAD);
+
+ /* Now clear the intercepts to test VMSAVE/VMLOAD behavior */
+ svm->vmcb->control.intercept &= ~(BIT_ULL(INTERCEPT_VMSAVE) |
+ BIT_ULL(INTERCEPT_VMLOAD));
+
+ /*
+ * Without SVM_MISC2_ENABLE_V_VMLOAD_VMSAVE, the GPA will be
+ * interpreted as an L1 GPA, so VMCB0 should be used.
+ */
+ svm->vmcb->save.rip = (u64)l2_guest_code_vmcb0;
+ svm->vmcb->control.misc_ctl2 &= ~SVM_MISC2_ENABLE_V_VMLOAD_VMSAVE;
+ run_guest(svm->vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT_EQ(svm->vmcb->control.exit_code, SVM_EXIT_VMMCALL);
+
+ /*
+ * With SVM_MISC2_ENABLE_V_VMLOAD_VMSAVE, the GPA will be interpeted as
+ * an L2 GPA, and translated through the NPT to VMCB1.
+ */
+ svm->vmcb->save.rip = (u64)l2_guest_code_vmcb1;
+ svm->vmcb->control.misc_ctl2 |= SVM_MISC2_ENABLE_V_VMLOAD_VMSAVE;
+ run_guest(svm->vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT_EQ(svm->vmcb->control.exit_code, SVM_EXIT_VMMCALL);
+
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ vm_vaddr_t nested_gva = 0;
+ struct vmcb *test_vmcb[2];
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ int i;
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM));
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_NPT));
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_V_VMSAVE_VMLOAD));
+
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+ vm_enable_tdp(vm);
+
+ vcpu_alloc_svm(vm, &nested_gva);
+ vcpu_args_set(vcpu, 1, nested_gva);
+
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+ TEST_MEM_BASE, TEST_MEM_SLOT_INDEX,
+ TEST_MEM_PAGES, 0);
+
+ for (i = 0; i <= 1; i++) {
+ virt_map(vm, TEST_VMCB_GVA(i), TEST_VMCB_L1_GPA(i), 1);
+ test_vmcb[i] = (struct vmcb *)addr_gva2hva(vm, TEST_VMCB_GVA(i));
+ }
+
+ tdp_identity_map_default_memslots(vm);
+
+ /*
+ * L2 GPA == L1_GPA(0), but map it to L1_GPA(1), to allow testing
+ * whether the L2 GPA is interpreted as an L1 GPA or translated through
+ * the NPT.
+ */
+ TEST_ASSERT_EQ(TEST_VMCB_L2_GPA, TEST_VMCB_L1_GPA(0));
+ tdp_map(vm, TEST_VMCB_L2_GPA, TEST_VMCB_L1_GPA(1), PAGE_SIZE);
+
+ for (;;) {
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ case UCALL_SYNC:
+ i = uc.args[1];
+ TEST_ASSERT(i == 0 || i == 1, "Unexpected VMCB idx: %d", i);
+
+ /*
+ * Check that only the expected VMCB has KERNEL_GS_BASE
+ * set to 0xaaaa, and update it to 0xbbbb.
+ */
+ TEST_ASSERT_EQ(test_vmcb[i]->save.kernel_gs_base, 0xaaaa);
+ TEST_ASSERT_EQ(test_vmcb[1-i]->save.kernel_gs_base, 0);
+ test_vmcb[i]->save.kernel_gs_base = 0xbbbb;
+ break;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+ }
+
+done:
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/pmu_counters_test.c b/tools/testing/selftests/kvm/x86/pmu_counters_test.c
index 8aaaf25b6111..3eaa216b96c0 100644
--- a/tools/testing/selftests/kvm/x86/pmu_counters_test.c
+++ b/tools/testing/selftests/kvm/x86/pmu_counters_test.c
@@ -14,10 +14,10 @@
#define NUM_BRANCH_INSNS_RETIRED (NUM_LOOPS)
/*
- * Number of instructions in each loop. 1 CLFLUSH/CLFLUSHOPT/NOP, 1 MFENCE,
- * 1 LOOP.
+ * Number of instructions in each loop. 1 ENTER, 1 CLFLUSH/CLFLUSHOPT/NOP,
+ * 1 MFENCE, 1 MOV, 1 LEAVE, 1 LOOP.
*/
-#define NUM_INSNS_PER_LOOP 4
+#define NUM_INSNS_PER_LOOP 6
/*
* Number of "extra" instructions that will be counted, i.e. the number of
@@ -75,6 +75,11 @@ static struct kvm_intel_pmu_event intel_event_to_feature(uint8_t idx)
[INTEL_ARCH_BRANCHES_RETIRED_INDEX] = { X86_PMU_FEATURE_BRANCH_INSNS_RETIRED, X86_PMU_FEATURE_NULL },
[INTEL_ARCH_BRANCHES_MISPREDICTED_INDEX] = { X86_PMU_FEATURE_BRANCHES_MISPREDICTED, X86_PMU_FEATURE_NULL },
[INTEL_ARCH_TOPDOWN_SLOTS_INDEX] = { X86_PMU_FEATURE_TOPDOWN_SLOTS, X86_PMU_FEATURE_TOPDOWN_SLOTS_FIXED },
+ [INTEL_ARCH_TOPDOWN_BE_BOUND_INDEX] = { X86_PMU_FEATURE_TOPDOWN_BE_BOUND, X86_PMU_FEATURE_NULL },
+ [INTEL_ARCH_TOPDOWN_BAD_SPEC_INDEX] = { X86_PMU_FEATURE_TOPDOWN_BAD_SPEC, X86_PMU_FEATURE_NULL },
+ [INTEL_ARCH_TOPDOWN_FE_BOUND_INDEX] = { X86_PMU_FEATURE_TOPDOWN_FE_BOUND, X86_PMU_FEATURE_NULL },
+ [INTEL_ARCH_TOPDOWN_RETIRING_INDEX] = { X86_PMU_FEATURE_TOPDOWN_RETIRING, X86_PMU_FEATURE_NULL },
+ [INTEL_ARCH_LBR_INSERTS_INDEX] = { X86_PMU_FEATURE_LBR_INSERTS, X86_PMU_FEATURE_NULL },
};
kvm_static_assert(ARRAY_SIZE(__intel_event_to_feature) == NR_INTEL_ARCH_EVENTS);
@@ -158,10 +163,18 @@ static void guest_assert_event_count(uint8_t idx, uint32_t pmc, uint32_t pmc_msr
switch (idx) {
case INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX:
- GUEST_ASSERT_EQ(count, NUM_INSNS_RETIRED);
+ /* Relax precise count check due to VM-EXIT/VM-ENTRY overcount issue */
+ if (this_pmu_has_errata(INSTRUCTIONS_RETIRED_OVERCOUNT))
+ GUEST_ASSERT(count >= NUM_INSNS_RETIRED);
+ else
+ GUEST_ASSERT_EQ(count, NUM_INSNS_RETIRED);
break;
case INTEL_ARCH_BRANCHES_RETIRED_INDEX:
- GUEST_ASSERT_EQ(count, NUM_BRANCH_INSNS_RETIRED);
+ /* Relax precise count check due to VM-EXIT/VM-ENTRY overcount issue */
+ if (this_pmu_has_errata(BRANCHES_RETIRED_OVERCOUNT))
+ GUEST_ASSERT(count >= NUM_BRANCH_INSNS_RETIRED);
+ else
+ GUEST_ASSERT_EQ(count, NUM_BRANCH_INSNS_RETIRED);
break;
case INTEL_ARCH_LLC_REFERENCES_INDEX:
case INTEL_ARCH_LLC_MISSES_INDEX:
@@ -171,9 +184,12 @@ static void guest_assert_event_count(uint8_t idx, uint32_t pmc, uint32_t pmc_msr
fallthrough;
case INTEL_ARCH_CPU_CYCLES_INDEX:
case INTEL_ARCH_REFERENCE_CYCLES_INDEX:
+ case INTEL_ARCH_TOPDOWN_BE_BOUND_INDEX:
+ case INTEL_ARCH_TOPDOWN_FE_BOUND_INDEX:
GUEST_ASSERT_NE(count, 0);
break;
case INTEL_ARCH_TOPDOWN_SLOTS_INDEX:
+ case INTEL_ARCH_TOPDOWN_RETIRING_INDEX:
__GUEST_ASSERT(count >= NUM_INSNS_RETIRED,
"Expected top-down slots >= %u, got count = %lu",
NUM_INSNS_RETIRED, count);
@@ -210,9 +226,11 @@ do { \
__asm__ __volatile__("wrmsr\n\t" \
" mov $" __stringify(NUM_LOOPS) ", %%ecx\n\t" \
"1:\n\t" \
+ FEP "enter $0, $0\n\t" \
clflush "\n\t" \
"mfence\n\t" \
"mov %[m], %%eax\n\t" \
+ FEP "leave\n\t" \
FEP "loop 1b\n\t" \
FEP "mov %%edi, %%ecx\n\t" \
FEP "xor %%eax, %%eax\n\t" \
@@ -311,7 +329,7 @@ static void guest_test_arch_events(void)
}
static void test_arch_events(uint8_t pmu_version, uint64_t perf_capabilities,
- uint8_t length, uint8_t unavailable_mask)
+ uint8_t length, uint32_t unavailable_mask)
{
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
@@ -320,6 +338,9 @@ static void test_arch_events(uint8_t pmu_version, uint64_t perf_capabilities,
if (!pmu_version)
return;
+ unavailable_mask &= GENMASK(X86_PROPERTY_PMU_EVENTS_MASK.hi_bit,
+ X86_PROPERTY_PMU_EVENTS_MASK.lo_bit);
+
vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_arch_events,
pmu_version, perf_capabilities);
@@ -344,8 +365,8 @@ static void test_arch_events(uint8_t pmu_version, uint64_t perf_capabilities,
#define GUEST_ASSERT_PMC_MSR_ACCESS(insn, msr, expect_gp, vector) \
__GUEST_ASSERT(expect_gp ? vector == GP_VECTOR : !vector, \
- "Expected %s on " #insn "(0x%x), got vector %u", \
- expect_gp ? "#GP" : "no fault", msr, vector) \
+ "Expected %s on " #insn "(0x%x), got %s", \
+ expect_gp ? "#GP" : "no fault", msr, ex_str(vector)) \
#define GUEST_ASSERT_PMC_VALUE(insn, msr, val, expected) \
__GUEST_ASSERT(val == expected, \
@@ -575,6 +596,26 @@ static void test_intel_counters(void)
};
/*
+ * To keep the total runtime reasonable, test only a handful of select,
+ * semi-arbitrary values for the mask of unavailable PMU events. Test
+ * 0 (all events available) and all ones (no events available) as well
+ * as alternating bit sequencues, e.g. to detect if KVM is checking the
+ * wrong bit(s).
+ */
+ const uint32_t unavailable_masks[] = {
+ 0x0,
+ 0xffffffffu,
+ 0xaaaaaaaau,
+ 0x55555555u,
+ 0xf0f0f0f0u,
+ 0x0f0f0f0fu,
+ 0xa0a0a0a0u,
+ 0x0a0a0a0au,
+ 0x50505050u,
+ 0x05050505u,
+ };
+
+ /*
* Test up to PMU v5, which is the current maximum version defined by
* Intel, i.e. is the last version that is guaranteed to be backwards
* compatible with KVM's existing behavior.
@@ -611,16 +652,7 @@ static void test_intel_counters(void)
pr_info("Testing arch events, PMU version %u, perf_caps = %lx\n",
v, perf_caps[i]);
- /*
- * To keep the total runtime reasonable, test every
- * possible non-zero, non-reserved bitmap combination
- * only with the native PMU version and the full bit
- * vector length.
- */
- if (v == pmu_version) {
- for (k = 1; k < (BIT(NR_INTEL_ARCH_EVENTS) - 1); k++)
- test_arch_events(v, perf_caps[i], NR_INTEL_ARCH_EVENTS, k);
- }
+
/*
* Test single bits for all PMU version and lengths up
* the number of events +1 (to verify KVM doesn't do
@@ -629,11 +661,8 @@ static void test_intel_counters(void)
* ones i.e. all events being available and unavailable.
*/
for (j = 0; j <= NR_INTEL_ARCH_EVENTS + 1; j++) {
- test_arch_events(v, perf_caps[i], j, 0);
- test_arch_events(v, perf_caps[i], j, 0xff);
-
- for (k = 0; k < NR_INTEL_ARCH_EVENTS; k++)
- test_arch_events(v, perf_caps[i], j, BIT(k));
+ for (k = 1; k < ARRAY_SIZE(unavailable_masks); k++)
+ test_arch_events(v, perf_caps[i], j, unavailable_masks[k]);
}
pr_info("Testing GP counters, PMU version %u, perf_caps = %lx\n",
diff --git a/tools/testing/selftests/kvm/x86/pmu_event_filter_test.c b/tools/testing/selftests/kvm/x86/pmu_event_filter_test.c
index c15513cd74d1..93b61c077991 100644
--- a/tools/testing/selftests/kvm/x86/pmu_event_filter_test.c
+++ b/tools/testing/selftests/kvm/x86/pmu_event_filter_test.c
@@ -214,8 +214,10 @@ static void remove_event(struct __kvm_pmu_event_filter *f, uint64_t event)
do { \
uint64_t br = pmc_results.branches_retired; \
uint64_t ir = pmc_results.instructions_retired; \
+ bool br_matched = this_pmu_has_errata(BRANCHES_RETIRED_OVERCOUNT) ? \
+ br >= NUM_BRANCHES : br == NUM_BRANCHES; \
\
- if (br && br != NUM_BRANCHES) \
+ if (br && !br_matched) \
pr_info("%s: Branch instructions retired = %lu (expected %u)\n", \
__func__, br, NUM_BRANCHES); \
TEST_ASSERT(br, "%s: Branch instructions retired = %lu (expected > 0)", \
@@ -359,7 +361,8 @@ static bool use_intel_pmu(void)
*/
static bool use_amd_pmu(void)
{
- return host_cpu_is_amd && kvm_cpu_family() >= 0x17;
+ return (host_cpu_is_amd && kvm_cpu_family() >= 0x17) ||
+ host_cpu_is_hygon;
}
/*
diff --git a/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c b/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c
index 82a8d88b5338..1969f4ab9b28 100644
--- a/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c
+++ b/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c
@@ -380,7 +380,7 @@ static void test_mem_conversions(enum vm_mem_backing_src_type src_type, uint32_t
struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
pthread_t threads[KVM_MAX_VCPUS];
struct kvm_vm *vm;
- int memfd, i, r;
+ int memfd, i;
const struct vm_shape shape = {
.mode = VM_MODE_DEFAULT,
@@ -428,11 +428,8 @@ static void test_mem_conversions(enum vm_mem_backing_src_type src_type, uint32_t
* should prevent the VM from being fully destroyed until the last
* reference to the guest_memfd is also put.
*/
- r = fallocate(memfd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0, memfd_size);
- TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r));
-
- r = fallocate(memfd, FALLOC_FL_KEEP_SIZE, 0, memfd_size);
- TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r));
+ kvm_fallocate(memfd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0, memfd_size);
+ kvm_fallocate(memfd, FALLOC_FL_KEEP_SIZE, 0, memfd_size);
close(memfd);
}
diff --git a/tools/testing/selftests/kvm/x86/sev_init2_tests.c b/tools/testing/selftests/kvm/x86/sev_init2_tests.c
index 3fb967f40c6a..b238615196ad 100644
--- a/tools/testing/selftests/kvm/x86/sev_init2_tests.c
+++ b/tools/testing/selftests/kvm/x86/sev_init2_tests.c
@@ -28,6 +28,7 @@
int kvm_fd;
u64 supported_vmsa_features;
bool have_sev_es;
+bool have_snp;
static int __sev_ioctl(int vm_fd, int cmd_id, void *data)
{
@@ -83,6 +84,9 @@ void test_vm_types(void)
if (have_sev_es)
test_init2(KVM_X86_SEV_ES_VM, &(struct kvm_sev_init){});
+ if (have_snp)
+ test_init2(KVM_X86_SNP_VM, &(struct kvm_sev_init){});
+
test_init2_invalid(0, &(struct kvm_sev_init){},
"VM type is KVM_X86_DEFAULT_VM");
if (kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM))
@@ -138,15 +142,24 @@ int main(int argc, char *argv[])
"sev-es: KVM_CAP_VM_TYPES (%x) does not match cpuid (checking %x)",
kvm_check_cap(KVM_CAP_VM_TYPES), 1 << KVM_X86_SEV_ES_VM);
+ have_snp = kvm_cpu_has(X86_FEATURE_SEV_SNP);
+ TEST_ASSERT(have_snp == !!(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SNP_VM)),
+ "sev-snp: KVM_CAP_VM_TYPES (%x) indicates SNP support (bit %d), but CPUID does not",
+ kvm_check_cap(KVM_CAP_VM_TYPES), KVM_X86_SNP_VM);
+
test_vm_types();
test_flags(KVM_X86_SEV_VM);
if (have_sev_es)
test_flags(KVM_X86_SEV_ES_VM);
+ if (have_snp)
+ test_flags(KVM_X86_SNP_VM);
test_features(KVM_X86_SEV_VM, 0);
if (have_sev_es)
test_features(KVM_X86_SEV_ES_VM, supported_vmsa_features);
+ if (have_snp)
+ test_features(KVM_X86_SNP_VM, supported_vmsa_features);
return 0;
}
diff --git a/tools/testing/selftests/kvm/x86/sev_migrate_tests.c b/tools/testing/selftests/kvm/x86/sev_migrate_tests.c
index 0a6dfba3905b..6b0928e69051 100644
--- a/tools/testing/selftests/kvm/x86/sev_migrate_tests.c
+++ b/tools/testing/selftests/kvm/x86/sev_migrate_tests.c
@@ -36,8 +36,6 @@ static struct kvm_vm *sev_vm_create(bool es)
sev_vm_launch(vm, es ? SEV_POLICY_ES : 0);
- if (es)
- vm_sev_ioctl(vm, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL);
return vm;
}
diff --git a/tools/testing/selftests/kvm/x86/sev_smoke_test.c b/tools/testing/selftests/kvm/x86/sev_smoke_test.c
index d97816dc476a..8bd37a476f15 100644
--- a/tools/testing/selftests/kvm/x86/sev_smoke_test.c
+++ b/tools/testing/selftests/kvm/x86/sev_smoke_test.c
@@ -13,21 +13,61 @@
#include "linux/psp-sev.h"
#include "sev.h"
+static void guest_sev_test_msr(uint32_t msr)
+{
+ uint64_t val = rdmsr(msr);
+
+ wrmsr(msr, val);
+ GUEST_ASSERT(val == rdmsr(msr));
+}
+
+#define guest_sev_test_reg(reg) \
+do { \
+ uint64_t val = get_##reg(); \
+ \
+ set_##reg(val); \
+ GUEST_ASSERT(val == get_##reg()); \
+} while (0)
+
+static void guest_sev_test_regs(void)
+{
+ guest_sev_test_msr(MSR_EFER);
+ guest_sev_test_reg(cr0);
+ guest_sev_test_reg(cr3);
+ guest_sev_test_reg(cr4);
+ guest_sev_test_reg(cr8);
+}
#define XFEATURE_MASK_X87_AVX (XFEATURE_MASK_FP | XFEATURE_MASK_SSE | XFEATURE_MASK_YMM)
+static void guest_snp_code(void)
+{
+ uint64_t sev_msr = rdmsr(MSR_AMD64_SEV);
+
+ GUEST_ASSERT(sev_msr & MSR_AMD64_SEV_ENABLED);
+ GUEST_ASSERT(sev_msr & MSR_AMD64_SEV_ES_ENABLED);
+ GUEST_ASSERT(sev_msr & MSR_AMD64_SEV_SNP_ENABLED);
+
+ guest_sev_test_regs();
+
+ wrmsr(MSR_AMD64_SEV_ES_GHCB, GHCB_MSR_TERM_REQ);
+ vmgexit();
+}
+
static void guest_sev_es_code(void)
{
/* TODO: Check CPUID after GHCB-based hypercall support is added. */
GUEST_ASSERT(rdmsr(MSR_AMD64_SEV) & MSR_AMD64_SEV_ENABLED);
GUEST_ASSERT(rdmsr(MSR_AMD64_SEV) & MSR_AMD64_SEV_ES_ENABLED);
+ guest_sev_test_regs();
+
/*
* TODO: Add GHCB and ucall support for SEV-ES guests. For now, simply
* force "termination" to signal "done" via the GHCB MSR protocol.
*/
wrmsr(MSR_AMD64_SEV_ES_GHCB, GHCB_MSR_TERM_REQ);
- __asm__ __volatile__("rep; vmmcall");
+ vmgexit();
}
static void guest_sev_code(void)
@@ -35,6 +75,8 @@ static void guest_sev_code(void)
GUEST_ASSERT(this_cpu_has(X86_FEATURE_SEV));
GUEST_ASSERT(rdmsr(MSR_AMD64_SEV) & MSR_AMD64_SEV_ENABLED);
+ guest_sev_test_regs();
+
GUEST_DONE();
}
@@ -62,7 +104,7 @@ static void compare_xsave(u8 *from_host, u8 *from_guest)
abort();
}
-static void test_sync_vmsa(uint32_t policy)
+static void test_sync_vmsa(uint32_t type, uint64_t policy)
{
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
@@ -72,7 +114,7 @@ static void test_sync_vmsa(uint32_t policy)
double x87val = M_PI;
struct kvm_xsave __attribute__((aligned(64))) xsave = { 0 };
- vm = vm_sev_create_with_one_vcpu(KVM_X86_SEV_ES_VM, guest_code_xsave, &vcpu);
+ vm = vm_sev_create_with_one_vcpu(type, guest_code_xsave, &vcpu);
gva = vm_vaddr_alloc_shared(vm, PAGE_SIZE, KVM_UTIL_MIN_VADDR,
MEM_REGION_TEST_DATA);
hva = addr_gva2hva(vm, gva);
@@ -89,10 +131,10 @@ static void test_sync_vmsa(uint32_t policy)
: "ymm4", "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)");
vcpu_xsave_set(vcpu, &xsave);
- vm_sev_launch(vm, SEV_POLICY_ES | policy, NULL);
+ vm_sev_launch(vm, policy, NULL);
/* This page is shared, so make it decrypted. */
- memset(hva, 0, 4096);
+ memset(hva, 0, PAGE_SIZE);
vcpu_run(vcpu);
@@ -108,14 +150,12 @@ static void test_sync_vmsa(uint32_t policy)
kvm_vm_free(vm);
}
-static void test_sev(void *guest_code, uint64_t policy)
+static void test_sev(void *guest_code, uint32_t type, uint64_t policy)
{
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
struct ucall uc;
- uint32_t type = policy & SEV_POLICY_ES ? KVM_X86_SEV_ES_VM : KVM_X86_SEV_VM;
-
vm = vm_sev_create_with_one_vcpu(type, guest_code, &vcpu);
/* TODO: Validate the measurement is as expected. */
@@ -124,7 +164,7 @@ static void test_sev(void *guest_code, uint64_t policy)
for (;;) {
vcpu_run(vcpu);
- if (policy & SEV_POLICY_ES) {
+ if (is_sev_es_vm(vm)) {
TEST_ASSERT(vcpu->run->exit_reason == KVM_EXIT_SYSTEM_EVENT,
"Wanted SYSTEM_EVENT, got %s",
exit_reason_str(vcpu->run->exit_reason));
@@ -161,16 +201,14 @@ static void guest_shutdown_code(void)
__asm__ __volatile__("ud2");
}
-static void test_sev_es_shutdown(void)
+static void test_sev_shutdown(uint32_t type, uint64_t policy)
{
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
- uint32_t type = KVM_X86_SEV_ES_VM;
-
vm = vm_sev_create_with_one_vcpu(type, guest_shutdown_code, &vcpu);
- vm_sev_launch(vm, SEV_POLICY_ES, NULL);
+ vm_sev_launch(vm, policy, NULL);
vcpu_run(vcpu);
TEST_ASSERT(vcpu->run->exit_reason == KVM_EXIT_SHUTDOWN,
@@ -180,27 +218,42 @@ static void test_sev_es_shutdown(void)
kvm_vm_free(vm);
}
-int main(int argc, char *argv[])
+static void test_sev_smoke(void *guest, uint32_t type, uint64_t policy)
{
const u64 xf_mask = XFEATURE_MASK_X87_AVX;
- TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SEV));
-
- test_sev(guest_sev_code, SEV_POLICY_NO_DBG);
- test_sev(guest_sev_code, 0);
+ if (type == KVM_X86_SNP_VM)
+ test_sev(guest, type, policy | SNP_POLICY_DBG);
+ else
+ test_sev(guest, type, policy | SEV_POLICY_NO_DBG);
+ test_sev(guest, type, policy);
- if (kvm_cpu_has(X86_FEATURE_SEV_ES)) {
- test_sev(guest_sev_es_code, SEV_POLICY_ES | SEV_POLICY_NO_DBG);
- test_sev(guest_sev_es_code, SEV_POLICY_ES);
+ if (type == KVM_X86_SEV_VM)
+ return;
- test_sev_es_shutdown();
+ test_sev_shutdown(type, policy);
- if (kvm_has_cap(KVM_CAP_XCRS) &&
- (xgetbv(0) & kvm_cpu_supported_xcr0() & xf_mask) == xf_mask) {
- test_sync_vmsa(0);
- test_sync_vmsa(SEV_POLICY_NO_DBG);
- }
+ if (kvm_has_cap(KVM_CAP_XCRS) &&
+ (xgetbv(0) & kvm_cpu_supported_xcr0() & xf_mask) == xf_mask) {
+ test_sync_vmsa(type, policy);
+ if (type == KVM_X86_SNP_VM)
+ test_sync_vmsa(type, policy | SNP_POLICY_DBG);
+ else
+ test_sync_vmsa(type, policy | SEV_POLICY_NO_DBG);
}
+}
+
+int main(int argc, char *argv[])
+{
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SEV));
+
+ test_sev_smoke(guest_sev_code, KVM_X86_SEV_VM, 0);
+
+ if (kvm_cpu_has(X86_FEATURE_SEV_ES))
+ test_sev_smoke(guest_sev_es_code, KVM_X86_SEV_ES_VM, SEV_POLICY_ES);
+
+ if (kvm_cpu_has(X86_FEATURE_SEV_SNP))
+ test_sev_smoke(guest_snp_code, KVM_X86_SNP_VM, snp_default_policy());
return 0;
}
diff --git a/tools/testing/selftests/kvm/x86/smaller_maxphyaddr_emulation_test.c b/tools/testing/selftests/kvm/x86/smaller_maxphyaddr_emulation_test.c
index fabeeaddfb3a..0e8aec568010 100644
--- a/tools/testing/selftests/kvm/x86/smaller_maxphyaddr_emulation_test.c
+++ b/tools/testing/selftests/kvm/x86/smaller_maxphyaddr_emulation_test.c
@@ -47,7 +47,6 @@ int main(int argc, char *argv[])
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
struct ucall uc;
- uint64_t *pte;
uint64_t *hva;
uint64_t gpa;
int rc;
@@ -73,8 +72,7 @@ int main(int argc, char *argv[])
hva = addr_gpa2hva(vm, MEM_REGION_GPA);
memset(hva, 0, PAGE_SIZE);
- pte = vm_get_page_table_entry(vm, MEM_REGION_GVA);
- *pte |= BIT_ULL(MAXPHYADDR);
+ *vm_get_pte(vm, MEM_REGION_GVA) |= BIT_ULL(MAXPHYADDR);
vcpu_run(vcpu);
diff --git a/tools/testing/selftests/kvm/x86/smm_test.c b/tools/testing/selftests/kvm/x86/smm_test.c
index 55c88d664a94..ade8412bf94a 100644
--- a/tools/testing/selftests/kvm/x86/smm_test.c
+++ b/tools/testing/selftests/kvm/x86/smm_test.c
@@ -14,13 +14,11 @@
#include "test_util.h"
#include "kvm_util.h"
+#include "smm.h"
#include "vmx.h"
#include "svm_util.h"
-#define SMRAM_SIZE 65536
-#define SMRAM_MEMSLOT ((1 << 16) | 1)
-#define SMRAM_PAGES (SMRAM_SIZE / PAGE_SIZE)
#define SMRAM_GPA 0x1000000
#define SMRAM_STAGE 0xfe
@@ -113,18 +111,6 @@ static void guest_code(void *arg)
sync_with_host(DONE);
}
-void inject_smi(struct kvm_vcpu *vcpu)
-{
- struct kvm_vcpu_events events;
-
- vcpu_events_get(vcpu, &events);
-
- events.smi.pending = 1;
- events.flags |= KVM_VCPUEVENT_VALID_SMM;
-
- vcpu_events_set(vcpu, &events);
-}
-
int main(int argc, char *argv[])
{
vm_vaddr_t nested_gva = 0;
@@ -140,16 +126,7 @@ int main(int argc, char *argv[])
/* Create VM */
vm = vm_create_with_one_vcpu(&vcpu, guest_code);
- vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, SMRAM_GPA,
- SMRAM_MEMSLOT, SMRAM_PAGES, 0);
- TEST_ASSERT(vm_phy_pages_alloc(vm, SMRAM_PAGES, SMRAM_GPA, SMRAM_MEMSLOT)
- == SMRAM_GPA, "could not allocate guest physical addresses?");
-
- memset(addr_gpa2hva(vm, SMRAM_GPA), 0x0, SMRAM_SIZE);
- memcpy(addr_gpa2hva(vm, SMRAM_GPA) + 0x8000, smi_handler,
- sizeof(smi_handler));
-
- vcpu_set_msr(vcpu, MSR_IA32_SMBASE, SMRAM_GPA);
+ setup_smram(vm, vcpu, SMRAM_GPA, smi_handler, sizeof(smi_handler));
if (kvm_has_cap(KVM_CAP_NESTED_STATE)) {
if (kvm_cpu_has(X86_FEATURE_SVM))
diff --git a/tools/testing/selftests/kvm/x86/state_test.c b/tools/testing/selftests/kvm/x86/state_test.c
index 141b7fc0c965..992a52504a4a 100644
--- a/tools/testing/selftests/kvm/x86/state_test.c
+++ b/tools/testing/selftests/kvm/x86/state_test.c
@@ -26,7 +26,9 @@ void svm_l2_guest_code(void)
GUEST_SYNC(4);
/* Exit to L1 */
vmcall();
+ clgi();
GUEST_SYNC(6);
+ stgi();
/* Done, exit to L1 and never come back. */
vmcall();
}
@@ -41,6 +43,8 @@ static void svm_l1_guest_code(struct svm_test_data *svm)
generic_svm_setup(svm, svm_l2_guest_code,
&l2_guest_stack[L2_GUEST_STACK_SIZE]);
+ vmcb->control.int_ctl |= (V_GIF_ENABLE_MASK | V_GIF_MASK);
+
GUEST_SYNC(3);
run_guest(vmcb, svm->vmcb_gpa);
GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
@@ -141,7 +145,7 @@ static void __attribute__((__flatten__)) guest_code(void *arg)
if (this_cpu_has(X86_FEATURE_XSAVE)) {
uint64_t supported_xcr0 = this_cpu_supported_xcr0();
- uint8_t buffer[4096];
+ uint8_t buffer[PAGE_SIZE];
memset(buffer, 0xcc, sizeof(buffer));
@@ -222,6 +226,35 @@ static void __attribute__((__flatten__)) guest_code(void *arg)
GUEST_DONE();
}
+void svm_check_nested_state(int stage, struct kvm_x86_state *state)
+{
+ struct vmcb *vmcb = (struct vmcb *)state->nested.data.svm;
+
+ if (kvm_cpu_has(X86_FEATURE_VGIF)) {
+ if (stage == 4)
+ TEST_ASSERT_EQ(!!(vmcb->control.int_ctl & V_GIF_MASK), 1);
+ if (stage == 6)
+ TEST_ASSERT_EQ(!!(vmcb->control.int_ctl & V_GIF_MASK), 0);
+ }
+
+ if (kvm_cpu_has(X86_FEATURE_NRIPS)) {
+ /*
+ * GUEST_SYNC() causes IO emulation in KVM, in which case the
+ * RIP is advanced before exiting to userspace. Hence, the RIP
+ * in the saved state should be the same as nRIP saved by the
+ * CPU in the VMCB.
+ */
+ if (stage == 6)
+ TEST_ASSERT_EQ(vmcb->control.next_rip, state->regs.rip);
+ }
+}
+
+void check_nested_state(int stage, struct kvm_x86_state *state)
+{
+ if (kvm_has_cap(KVM_CAP_NESTED_STATE) && kvm_cpu_has(X86_FEATURE_SVM))
+ svm_check_nested_state(stage, state);
+}
+
int main(int argc, char *argv[])
{
uint64_t *xstate_bv, saved_xstate_bv;
@@ -278,6 +311,8 @@ int main(int argc, char *argv[])
kvm_vm_release(vm);
+ check_nested_state(stage, state);
+
/* Restore state in a new VM. */
vcpu = vm_recreate_with_one_vcpu(vm);
vcpu_load_state(vcpu, state);
diff --git a/tools/testing/selftests/kvm/x86/svm_lbr_nested_state.c b/tools/testing/selftests/kvm/x86/svm_lbr_nested_state.c
new file mode 100644
index 000000000000..ff99438824d3
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/svm_lbr_nested_state.c
@@ -0,0 +1,145 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2026, Google, Inc.
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+
+
+#define L2_GUEST_STACK_SIZE 64
+
+#define DO_BRANCH() do { asm volatile("jmp 1f\n 1: nop"); } while (0)
+
+struct lbr_branch {
+ u64 from, to;
+};
+
+volatile struct lbr_branch l2_branch;
+
+#define RECORD_AND_CHECK_BRANCH(b) \
+do { \
+ wrmsr(MSR_IA32_DEBUGCTLMSR, DEBUGCTLMSR_LBR); \
+ DO_BRANCH(); \
+ (b)->from = rdmsr(MSR_IA32_LASTBRANCHFROMIP); \
+ (b)->to = rdmsr(MSR_IA32_LASTBRANCHTOIP); \
+ /* Disable LBR right after to avoid overriding the IPs */ \
+ wrmsr(MSR_IA32_DEBUGCTLMSR, 0); \
+ \
+ GUEST_ASSERT_NE((b)->from, 0); \
+ GUEST_ASSERT_NE((b)->to, 0); \
+} while (0)
+
+#define CHECK_BRANCH_MSRS(b) \
+do { \
+ GUEST_ASSERT_EQ((b)->from, rdmsr(MSR_IA32_LASTBRANCHFROMIP)); \
+ GUEST_ASSERT_EQ((b)->to, rdmsr(MSR_IA32_LASTBRANCHTOIP)); \
+} while (0)
+
+#define CHECK_BRANCH_VMCB(b, vmcb) \
+do { \
+ GUEST_ASSERT_EQ((b)->from, vmcb->save.br_from); \
+ GUEST_ASSERT_EQ((b)->to, vmcb->save.br_to); \
+} while (0)
+
+static void l2_guest_code(struct svm_test_data *svm)
+{
+ /* Record a branch, trigger save/restore, and make sure LBRs are intact */
+ RECORD_AND_CHECK_BRANCH(&l2_branch);
+ GUEST_SYNC(true);
+ CHECK_BRANCH_MSRS(&l2_branch);
+ vmmcall();
+}
+
+static void l1_guest_code(struct svm_test_data *svm, bool nested_lbrv)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ struct vmcb *vmcb = svm->vmcb;
+ struct lbr_branch l1_branch;
+
+ /* Record a branch, trigger save/restore, and make sure LBRs are intact */
+ RECORD_AND_CHECK_BRANCH(&l1_branch);
+ GUEST_SYNC(true);
+ CHECK_BRANCH_MSRS(&l1_branch);
+
+ /* Run L2, which will also do the same */
+ generic_svm_setup(svm, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ if (nested_lbrv)
+ vmcb->control.misc_ctl2 = SVM_MISC2_ENABLE_V_LBR;
+ else
+ vmcb->control.misc_ctl2 &= ~SVM_MISC2_ENABLE_V_LBR;
+
+ run_guest(vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT(svm->vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+
+ /* Trigger save/restore one more time before checking, just for kicks */
+ GUEST_SYNC(true);
+
+ /*
+ * If LBR_CTL_ENABLE is set, L1 and L2 should have separate LBR MSRs, so
+ * expect L1's LBRs to remain intact and L2 LBRs to be in the VMCB.
+ * Otherwise, the MSRs are shared between L1 & L2 so expect L2's LBRs.
+ */
+ if (nested_lbrv) {
+ CHECK_BRANCH_MSRS(&l1_branch);
+ CHECK_BRANCH_VMCB(&l2_branch, vmcb);
+ } else {
+ CHECK_BRANCH_MSRS(&l2_branch);
+ }
+ GUEST_DONE();
+}
+
+void test_lbrv_nested_state(bool nested_lbrv)
+{
+ struct kvm_x86_state *state = NULL;
+ struct kvm_vcpu *vcpu;
+ vm_vaddr_t svm_gva;
+ struct kvm_vm *vm;
+ struct ucall uc;
+
+ pr_info("Testing with nested LBRV %s\n", nested_lbrv ? "enabled" : "disabled");
+
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+ vcpu_alloc_svm(vm, &svm_gva);
+ vcpu_args_set(vcpu, 2, svm_gva, nested_lbrv);
+
+ for (;;) {
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ /* Save the vCPU state and restore it in a new VM on sync */
+ pr_info("Guest triggered save/restore.\n");
+ state = vcpu_save_state(vcpu);
+ kvm_vm_release(vm);
+ vcpu = vm_recreate_with_one_vcpu(vm);
+ vcpu_load_state(vcpu, state);
+ kvm_x86_state_cleanup(state);
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ /* NOT REACHED */
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+ }
+done:
+ kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM));
+ TEST_REQUIRE(kvm_is_lbrv_enabled());
+
+ test_lbrv_nested_state(/*nested_lbrv=*/false);
+ test_lbrv_nested_state(/*nested_lbrv=*/true);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/svm_nested_clear_efer_svme.c b/tools/testing/selftests/kvm/x86/svm_nested_clear_efer_svme.c
new file mode 100644
index 000000000000..a521a9eed061
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/svm_nested_clear_efer_svme.c
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2026, Google LLC.
+ */
+#include "kvm_util.h"
+#include "vmx.h"
+#include "svm_util.h"
+#include "kselftest.h"
+
+
+#define L2_GUEST_STACK_SIZE 64
+
+static void l2_guest_code(void)
+{
+ unsigned long efer = rdmsr(MSR_EFER);
+
+ /* generic_svm_setup() initializes EFER_SVME set for L2 */
+ GUEST_ASSERT(efer & EFER_SVME);
+ wrmsr(MSR_EFER, efer & ~EFER_SVME);
+
+ /* Unreachable, L1 should be shutdown */
+ GUEST_ASSERT(0);
+}
+
+static void l1_guest_code(struct svm_test_data *svm)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+ generic_svm_setup(svm, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+ run_guest(svm->vmcb, svm->vmcb_gpa);
+
+ /* Unreachable, L1 should be shutdown */
+ GUEST_ASSERT(0);
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ vm_vaddr_t nested_gva = 0;
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM));
+
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+
+ vcpu_alloc_svm(vm, &nested_gva);
+ vcpu_args_set(vcpu, 1, nested_gva);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_SHUTDOWN);
+
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/svm_nested_soft_inject_test.c b/tools/testing/selftests/kvm/x86/svm_nested_soft_inject_test.c
index 7b6481d6c0d3..4bd1655f9e6d 100644
--- a/tools/testing/selftests/kvm/x86/svm_nested_soft_inject_test.c
+++ b/tools/testing/selftests/kvm/x86/svm_nested_soft_inject_test.c
@@ -103,7 +103,7 @@ static void l1_guest_code(struct svm_test_data *svm, uint64_t is_nmi, uint64_t i
run_guest(vmcb, svm->vmcb_gpa);
__GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL,
- "Expected VMMCAL #VMEXIT, got '0x%x', info1 = '0x%lx, info2 = '0x%lx'",
+ "Expected VMMCAL #VMEXIT, got '0x%lx', info1 = '0x%lx, info2 = '0x%lx'",
vmcb->control.exit_code,
vmcb->control.exit_info_1, vmcb->control.exit_info_2);
@@ -133,7 +133,7 @@ static void l1_guest_code(struct svm_test_data *svm, uint64_t is_nmi, uint64_t i
run_guest(vmcb, svm->vmcb_gpa);
__GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_HLT,
- "Expected HLT #VMEXIT, got '0x%x', info1 = '0x%lx, info2 = '0x%lx'",
+ "Expected HLT #VMEXIT, got '0x%lx', info1 = '0x%lx, info2 = '0x%lx'",
vmcb->control.exit_code,
vmcb->control.exit_info_1, vmcb->control.exit_info_2);
diff --git a/tools/testing/selftests/kvm/x86/svm_nested_vmcb12_gpa.c b/tools/testing/selftests/kvm/x86/svm_nested_vmcb12_gpa.c
new file mode 100644
index 000000000000..569869bed20b
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/svm_nested_vmcb12_gpa.c
@@ -0,0 +1,176 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2026, Google LLC.
+ */
+#include "kvm_util.h"
+#include "vmx.h"
+#include "svm_util.h"
+#include "kselftest.h"
+#include "kvm_test_harness.h"
+#include "test_util.h"
+
+
+#define L2_GUEST_STACK_SIZE 64
+
+#define SYNC_GP 101
+#define SYNC_L2_STARTED 102
+
+static unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+static void guest_gp_handler(struct ex_regs *regs)
+{
+ GUEST_SYNC(SYNC_GP);
+}
+
+static void l2_code(void)
+{
+ GUEST_SYNC(SYNC_L2_STARTED);
+ vmcall();
+}
+
+static void l1_vmrun(struct svm_test_data *svm, u64 gpa)
+{
+ generic_svm_setup(svm, l2_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ asm volatile ("vmrun %[gpa]" : : [gpa] "a" (gpa) : "memory");
+}
+
+static void l1_vmload(struct svm_test_data *svm, u64 gpa)
+{
+ generic_svm_setup(svm, l2_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ asm volatile ("vmload %[gpa]" : : [gpa] "a" (gpa) : "memory");
+}
+
+static void l1_vmsave(struct svm_test_data *svm, u64 gpa)
+{
+ generic_svm_setup(svm, l2_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ asm volatile ("vmsave %[gpa]" : : [gpa] "a" (gpa) : "memory");
+}
+
+static void l1_vmexit(struct svm_test_data *svm, u64 gpa)
+{
+ generic_svm_setup(svm, l2_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ run_guest(svm->vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT(svm->vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+ GUEST_DONE();
+}
+
+static u64 unmappable_gpa(struct kvm_vcpu *vcpu)
+{
+ struct userspace_mem_region *region;
+ u64 region_gpa_end, vm_gpa_end = 0;
+ int i;
+
+ hash_for_each(vcpu->vm->regions.slot_hash, i, region, slot_node) {
+ region_gpa_end = region->region.guest_phys_addr + region->region.memory_size;
+ vm_gpa_end = max(vm_gpa_end, region_gpa_end);
+ }
+
+ return vm_gpa_end;
+}
+
+static void test_invalid_vmcb12(struct kvm_vcpu *vcpu)
+{
+ vm_vaddr_t nested_gva = 0;
+ struct ucall uc;
+
+
+ vm_install_exception_handler(vcpu->vm, GP_VECTOR, guest_gp_handler);
+ vcpu_alloc_svm(vcpu->vm, &nested_gva);
+ vcpu_args_set(vcpu, 2, nested_gva, -1ULL);
+ vcpu_run(vcpu);
+
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+ TEST_ASSERT_EQ(get_ucall(vcpu, &uc), UCALL_SYNC);
+ TEST_ASSERT_EQ(uc.args[1], SYNC_GP);
+}
+
+static void test_unmappable_vmcb12(struct kvm_vcpu *vcpu)
+{
+ vm_vaddr_t nested_gva = 0;
+
+ vcpu_alloc_svm(vcpu->vm, &nested_gva);
+ vcpu_args_set(vcpu, 2, nested_gva, unmappable_gpa(vcpu));
+ vcpu_run(vcpu);
+
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_INTERNAL_ERROR);
+ TEST_ASSERT_EQ(vcpu->run->emulation_failure.suberror, KVM_INTERNAL_ERROR_EMULATION);
+}
+
+static void test_unmappable_vmcb12_vmexit(struct kvm_vcpu *vcpu)
+{
+ struct kvm_x86_state *state;
+ vm_vaddr_t nested_gva = 0;
+ struct ucall uc;
+
+ /*
+ * Enter L2 (with a legit vmcb12 GPA), then overwrite vmcb12 GPA with an
+ * unmappable GPA. KVM will fail to map vmcb12 on nested VM-Exit and
+ * cause a shutdown.
+ */
+ vcpu_alloc_svm(vcpu->vm, &nested_gva);
+ vcpu_args_set(vcpu, 2, nested_gva, unmappable_gpa(vcpu));
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+ TEST_ASSERT_EQ(get_ucall(vcpu, &uc), UCALL_SYNC);
+ TEST_ASSERT_EQ(uc.args[1], SYNC_L2_STARTED);
+
+ state = vcpu_save_state(vcpu);
+ state->nested.hdr.svm.vmcb_pa = unmappable_gpa(vcpu);
+ vcpu_load_state(vcpu, state);
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_SHUTDOWN);
+
+ kvm_x86_state_cleanup(state);
+}
+
+KVM_ONE_VCPU_TEST_SUITE(vmcb12_gpa);
+
+KVM_ONE_VCPU_TEST(vmcb12_gpa, vmrun_invalid, l1_vmrun)
+{
+ test_invalid_vmcb12(vcpu);
+}
+
+KVM_ONE_VCPU_TEST(vmcb12_gpa, vmload_invalid, l1_vmload)
+{
+ test_invalid_vmcb12(vcpu);
+}
+
+KVM_ONE_VCPU_TEST(vmcb12_gpa, vmsave_invalid, l1_vmsave)
+{
+ test_invalid_vmcb12(vcpu);
+}
+
+KVM_ONE_VCPU_TEST(vmcb12_gpa, vmrun_unmappable, l1_vmrun)
+{
+ test_unmappable_vmcb12(vcpu);
+}
+
+KVM_ONE_VCPU_TEST(vmcb12_gpa, vmload_unmappable, l1_vmload)
+{
+ test_unmappable_vmcb12(vcpu);
+}
+
+KVM_ONE_VCPU_TEST(vmcb12_gpa, vmsave_unmappable, l1_vmsave)
+{
+ test_unmappable_vmcb12(vcpu);
+}
+
+/*
+ * Invalid vmcb12_gpa cannot be test for #VMEXIT as KVM_SET_NESTED_STATE will
+ * reject it.
+ */
+KVM_ONE_VCPU_TEST(vmcb12_gpa, vmexit_unmappable, l1_vmexit)
+{
+ test_unmappable_vmcb12_vmexit(vcpu);
+}
+
+int main(int argc, char *argv[])
+{
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM));
+
+ return test_harness_run(argc, argv);
+}
diff --git a/tools/testing/selftests/kvm/x86/userspace_io_test.c b/tools/testing/selftests/kvm/x86/userspace_io_test.c
index 9481cbcf284f..be7d72f3c029 100644
--- a/tools/testing/selftests/kvm/x86/userspace_io_test.c
+++ b/tools/testing/selftests/kvm/x86/userspace_io_test.c
@@ -85,7 +85,7 @@ int main(int argc, char *argv[])
regs.rcx = 1;
if (regs.rcx == 3)
regs.rcx = 8192;
- memset((void *)run + run->io.data_offset, 0xaa, 4096);
+ memset((void *)run + run->io.data_offset, 0xaa, PAGE_SIZE);
vcpu_regs_set(vcpu, &regs);
}
diff --git a/tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c b/tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c
index 32b2794b78fe..8463a9956410 100644
--- a/tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c
+++ b/tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c
@@ -343,6 +343,12 @@ static void guest_code_permission_bitmap(void)
data = test_rdmsr(MSR_GS_BASE);
GUEST_ASSERT(data == MSR_GS_BASE);
+ /* Access the MSRs again to ensure KVM has disabled interception.*/
+ data = test_rdmsr(MSR_FS_BASE);
+ GUEST_ASSERT(data != MSR_FS_BASE);
+ data = test_rdmsr(MSR_GS_BASE);
+ GUEST_ASSERT(data != MSR_GS_BASE);
+
GUEST_DONE();
}
@@ -682,6 +688,8 @@ KVM_ONE_VCPU_TEST(user_msr, msr_permission_bitmap, guest_code_permission_bitmap)
"Expected ucall state to be UCALL_SYNC.");
vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_gs);
run_guest_then_process_rdmsr(vcpu, MSR_GS_BASE);
+
+ vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_allow);
run_guest_then_process_ucall_done(vcpu);
}
diff --git a/tools/testing/selftests/kvm/x86/vmx_apicv_updates_test.c b/tools/testing/selftests/kvm/x86/vmx_apicv_updates_test.c
new file mode 100644
index 000000000000..337c53fddeff
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/vmx_apicv_updates_test.c
@@ -0,0 +1,155 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+#define GOOD_IPI_VECTOR 0xe0
+#define BAD_IPI_VECTOR 0xf0
+
+static volatile int good_ipis_received;
+
+static void good_ipi_handler(struct ex_regs *regs)
+{
+ good_ipis_received++;
+}
+
+static void bad_ipi_handler(struct ex_regs *regs)
+{
+ GUEST_FAIL("Received \"bad\" IPI; ICR MMIO write should have been ignored");
+}
+
+static void l2_guest_code(void)
+{
+ x2apic_enable();
+ vmcall();
+
+ xapic_enable();
+ xapic_write_reg(APIC_ID, 1 << 24);
+ vmcall();
+}
+
+static void l1_guest_code(struct vmx_pages *vmx_pages)
+{
+#define L2_GUEST_STACK_SIZE 64
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ uint32_t control;
+
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+ GUEST_ASSERT(load_vmcs(vmx_pages));
+
+ /* Prepare the VMCS for L2 execution. */
+ prepare_vmcs(vmx_pages, l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+ control = vmreadz(CPU_BASED_VM_EXEC_CONTROL);
+ control |= CPU_BASED_USE_MSR_BITMAPS;
+ vmwrite(CPU_BASED_VM_EXEC_CONTROL, control);
+
+ /* Modify APIC ID to coerce KVM into inhibiting APICv. */
+ xapic_enable();
+ xapic_write_reg(APIC_ID, 1 << 24);
+
+ /*
+ * Generate+receive an IRQ without doing EOI to get an IRQ set in vISR
+ * but not SVI. APICv should be inhibited due to running with a
+ * modified APIC ID.
+ */
+ xapic_write_reg(APIC_ICR, APIC_DEST_SELF | APIC_DM_FIXED | GOOD_IPI_VECTOR);
+ GUEST_ASSERT_EQ(xapic_read_reg(APIC_ID), 1 << 24);
+
+ /* Enable IRQs and verify the IRQ was received. */
+ sti_nop();
+ GUEST_ASSERT_EQ(good_ipis_received, 1);
+
+ /*
+ * Run L2 to switch to x2APIC mode, which in turn will uninhibit APICv,
+ * as KVM should force the APIC ID back to its default.
+ */
+ GUEST_ASSERT(!vmlaunch());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+ vmwrite(GUEST_RIP, vmreadz(GUEST_RIP) + vmreadz(VM_EXIT_INSTRUCTION_LEN));
+ GUEST_ASSERT(rdmsr(MSR_IA32_APICBASE) & MSR_IA32_APICBASE_EXTD);
+
+ /*
+ * Scribble the APIC access page to verify KVM disabled xAPIC
+ * virtualization in vmcs01, and to verify that KVM flushes L1's TLB
+ * when L2 switches back to accelerated xAPIC mode.
+ */
+ xapic_write_reg(APIC_ICR2, 0xdeadbeefu);
+ xapic_write_reg(APIC_ICR, APIC_DEST_SELF | APIC_DM_FIXED | BAD_IPI_VECTOR);
+
+ /*
+ * Verify the IRQ is still in-service and emit an EOI to verify KVM
+ * propagates the highest vISR vector to SVI when APICv is activated
+ * (and does so even if APICv was uninhibited while L2 was active).
+ */
+ GUEST_ASSERT_EQ(x2apic_read_reg(APIC_ISR + APIC_VECTOR_TO_REG_OFFSET(GOOD_IPI_VECTOR)),
+ BIT(APIC_VECTOR_TO_BIT_NUMBER(GOOD_IPI_VECTOR)));
+ x2apic_write_reg(APIC_EOI, 0);
+ GUEST_ASSERT_EQ(x2apic_read_reg(APIC_ISR + APIC_VECTOR_TO_REG_OFFSET(GOOD_IPI_VECTOR)), 0);
+
+ /*
+ * Run L2 one more time to switch back to xAPIC mode to verify that KVM
+ * handles the x2APIC => xAPIC transition and inhibits APICv while L2
+ * is active.
+ */
+ GUEST_ASSERT(!vmresume());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+ GUEST_ASSERT(!(rdmsr(MSR_IA32_APICBASE) & MSR_IA32_APICBASE_EXTD));
+
+ xapic_write_reg(APIC_ICR, APIC_DEST_SELF | APIC_DM_FIXED | GOOD_IPI_VECTOR);
+ /* Re-enable IRQs, as VM-Exit clears RFLAGS.IF. */
+ sti_nop();
+ GUEST_ASSERT_EQ(good_ipis_received, 2);
+
+ GUEST_ASSERT_EQ(xapic_read_reg(APIC_ISR + APIC_VECTOR_TO_REG_OFFSET(GOOD_IPI_VECTOR)),
+ BIT(APIC_VECTOR_TO_BIT_NUMBER(GOOD_IPI_VECTOR)));
+ xapic_write_reg(APIC_EOI, 0);
+ GUEST_ASSERT_EQ(xapic_read_reg(APIC_ISR + APIC_VECTOR_TO_REG_OFFSET(GOOD_IPI_VECTOR)), 0);
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ vm_vaddr_t vmx_pages_gva;
+ struct vmx_pages *vmx;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct ucall uc;
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+
+ vmx = vcpu_alloc_vmx(vm, &vmx_pages_gva);
+ prepare_virtualize_apic_accesses(vmx, vm);
+ vcpu_args_set(vcpu, 1, vmx_pages_gva);
+
+ virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
+ vm_install_exception_handler(vm, BAD_IPI_VECTOR, bad_ipi_handler);
+ vm_install_exception_handler(vm, GOOD_IPI_VECTOR, good_ipi_handler);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ /* NOT REACHED */
+ case UCALL_DONE:
+ break;
+ default:
+ TEST_FAIL("Unexpected ucall %lu", uc.cmd);
+ }
+
+ /*
+ * Verify at least two IRQs were injected. Unfortunately, KVM counts
+ * re-injected IRQs (e.g. if delivering the IRQ hits an EPT violation),
+ * so being more precise isn't possible given the current stats.
+ */
+ TEST_ASSERT(vcpu_get_stat(vcpu, irq_injections) >= 2,
+ "Wanted at least 2 IRQ injections, got %lu\n",
+ vcpu_get_stat(vcpu, irq_injections));
+
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c b/tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c
deleted file mode 100644
index fa512d033205..000000000000
--- a/tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c
+++ /dev/null
@@ -1,179 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * KVM dirty page logging test
- *
- * Copyright (C) 2018, Red Hat, Inc.
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <linux/bitmap.h>
-#include <linux/bitops.h>
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "vmx.h"
-
-/* The memory slot index to track dirty pages */
-#define TEST_MEM_SLOT_INDEX 1
-#define TEST_MEM_PAGES 3
-
-/* L1 guest test virtual memory offset */
-#define GUEST_TEST_MEM 0xc0000000
-
-/* L2 guest test virtual memory offset */
-#define NESTED_TEST_MEM1 0xc0001000
-#define NESTED_TEST_MEM2 0xc0002000
-
-static void l2_guest_code(u64 *a, u64 *b)
-{
- READ_ONCE(*a);
- WRITE_ONCE(*a, 1);
- GUEST_SYNC(true);
- GUEST_SYNC(false);
-
- WRITE_ONCE(*b, 1);
- GUEST_SYNC(true);
- WRITE_ONCE(*b, 1);
- GUEST_SYNC(true);
- GUEST_SYNC(false);
-
- /* Exit to L1 and never come back. */
- vmcall();
-}
-
-static void l2_guest_code_ept_enabled(void)
-{
- l2_guest_code((u64 *)NESTED_TEST_MEM1, (u64 *)NESTED_TEST_MEM2);
-}
-
-static void l2_guest_code_ept_disabled(void)
-{
- /* Access the same L1 GPAs as l2_guest_code_ept_enabled() */
- l2_guest_code((u64 *)GUEST_TEST_MEM, (u64 *)GUEST_TEST_MEM);
-}
-
-void l1_guest_code(struct vmx_pages *vmx)
-{
-#define L2_GUEST_STACK_SIZE 64
- unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
- void *l2_rip;
-
- GUEST_ASSERT(vmx->vmcs_gpa);
- GUEST_ASSERT(prepare_for_vmx_operation(vmx));
- GUEST_ASSERT(load_vmcs(vmx));
-
- if (vmx->eptp_gpa)
- l2_rip = l2_guest_code_ept_enabled;
- else
- l2_rip = l2_guest_code_ept_disabled;
-
- prepare_vmcs(vmx, l2_rip, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
-
- GUEST_SYNC(false);
- GUEST_ASSERT(!vmlaunch());
- GUEST_SYNC(false);
- GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
- GUEST_DONE();
-}
-
-static void test_vmx_dirty_log(bool enable_ept)
-{
- vm_vaddr_t vmx_pages_gva = 0;
- struct vmx_pages *vmx;
- unsigned long *bmap;
- uint64_t *host_test_mem;
-
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm;
- struct ucall uc;
- bool done = false;
-
- pr_info("Nested EPT: %s\n", enable_ept ? "enabled" : "disabled");
-
- /* Create VM */
- vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
- vmx = vcpu_alloc_vmx(vm, &vmx_pages_gva);
- vcpu_args_set(vcpu, 1, vmx_pages_gva);
-
- /* Add an extra memory slot for testing dirty logging */
- vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
- GUEST_TEST_MEM,
- TEST_MEM_SLOT_INDEX,
- TEST_MEM_PAGES,
- KVM_MEM_LOG_DIRTY_PAGES);
-
- /*
- * Add an identity map for GVA range [0xc0000000, 0xc0002000). This
- * affects both L1 and L2. However...
- */
- virt_map(vm, GUEST_TEST_MEM, GUEST_TEST_MEM, TEST_MEM_PAGES);
-
- /*
- * ... pages in the L2 GPA range [0xc0001000, 0xc0003000) will map to
- * 0xc0000000.
- *
- * Note that prepare_eptp should be called only L1's GPA map is done,
- * meaning after the last call to virt_map.
- *
- * When EPT is disabled, the L2 guest code will still access the same L1
- * GPAs as the EPT enabled case.
- */
- if (enable_ept) {
- prepare_eptp(vmx, vm, 0);
- nested_map_memslot(vmx, vm, 0);
- nested_map(vmx, vm, NESTED_TEST_MEM1, GUEST_TEST_MEM, 4096);
- nested_map(vmx, vm, NESTED_TEST_MEM2, GUEST_TEST_MEM, 4096);
- }
-
- bmap = bitmap_zalloc(TEST_MEM_PAGES);
- host_test_mem = addr_gpa2hva(vm, GUEST_TEST_MEM);
-
- while (!done) {
- memset(host_test_mem, 0xaa, TEST_MEM_PAGES * 4096);
- vcpu_run(vcpu);
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
- switch (get_ucall(vcpu, &uc)) {
- case UCALL_ABORT:
- REPORT_GUEST_ASSERT(uc);
- /* NOT REACHED */
- case UCALL_SYNC:
- /*
- * The nested guest wrote at offset 0x1000 in the memslot, but the
- * dirty bitmap must be filled in according to L1 GPA, not L2.
- */
- kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap);
- if (uc.args[1]) {
- TEST_ASSERT(test_bit(0, bmap), "Page 0 incorrectly reported clean");
- TEST_ASSERT(host_test_mem[0] == 1, "Page 0 not written by guest");
- } else {
- TEST_ASSERT(!test_bit(0, bmap), "Page 0 incorrectly reported dirty");
- TEST_ASSERT(host_test_mem[0] == 0xaaaaaaaaaaaaaaaaULL, "Page 0 written by guest");
- }
-
- TEST_ASSERT(!test_bit(1, bmap), "Page 1 incorrectly reported dirty");
- TEST_ASSERT(host_test_mem[4096 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 1 written by guest");
- TEST_ASSERT(!test_bit(2, bmap), "Page 2 incorrectly reported dirty");
- TEST_ASSERT(host_test_mem[8192 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 2 written by guest");
- break;
- case UCALL_DONE:
- done = true;
- break;
- default:
- TEST_FAIL("Unknown ucall %lu", uc.cmd);
- }
- }
-}
-
-int main(int argc, char *argv[])
-{
- TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
-
- test_vmx_dirty_log(/*enable_ept=*/false);
-
- if (kvm_cpu_has_ept())
- test_vmx_dirty_log(/*enable_ept=*/true);
-
- return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86/vmx_exception_with_invalid_guest_state.c b/tools/testing/selftests/kvm/x86/vmx_exception_with_invalid_guest_state.c
index 3fd6eceab46f..2cae86d9d5e2 100644
--- a/tools/testing/selftests/kvm/x86/vmx_exception_with_invalid_guest_state.c
+++ b/tools/testing/selftests/kvm/x86/vmx_exception_with_invalid_guest_state.c
@@ -110,7 +110,7 @@ int main(int argc, char *argv[])
struct kvm_vm *vm;
TEST_REQUIRE(host_cpu_is_intel);
- TEST_REQUIRE(!vm_is_unrestricted_guest(NULL));
+ TEST_REQUIRE(!kvm_is_unrestricted_guest_enabled());
vm = vm_create_with_one_vcpu(&vcpu, guest_code);
get_set_sigalrm_vcpu(vcpu);
diff --git a/tools/testing/selftests/kvm/x86/vmx_nested_la57_state_test.c b/tools/testing/selftests/kvm/x86/vmx_nested_la57_state_test.c
new file mode 100644
index 000000000000..915c42001dba
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/vmx_nested_la57_state_test.c
@@ -0,0 +1,132 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2025, Google LLC.
+ *
+ * Test KVM's ability to save and restore nested state when the L1 guest
+ * is using 5-level paging and the L2 guest is using 4-level paging.
+ *
+ * This test would have failed prior to commit 9245fd6b8531 ("KVM: x86:
+ * model canonical checks more precisely").
+ */
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+#define LA57_GS_BASE 0xff2bc0311fb00000ull
+
+static void l2_guest_code(void)
+{
+ /*
+ * Sync with L0 to trigger save/restore. After
+ * resuming, execute VMCALL to exit back to L1.
+ */
+ GUEST_SYNC(1);
+ vmcall();
+}
+
+static void l1_guest_code(struct vmx_pages *vmx_pages)
+{
+#define L2_GUEST_STACK_SIZE 64
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ u64 guest_cr4;
+ vm_paddr_t pml5_pa, pml4_pa;
+ u64 *pml5;
+ u64 exit_reason;
+
+ /* Set GS_BASE to a value that is only canonical with LA57. */
+ wrmsr(MSR_GS_BASE, LA57_GS_BASE);
+ GUEST_ASSERT(rdmsr(MSR_GS_BASE) == LA57_GS_BASE);
+
+ GUEST_ASSERT(vmx_pages->vmcs_gpa);
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+ GUEST_ASSERT(load_vmcs(vmx_pages));
+
+ prepare_vmcs(vmx_pages, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ /*
+ * Set up L2 with a 4-level page table by pointing its CR3 to
+ * L1's first PML4 table and clearing CR4.LA57. This creates
+ * the CR4.LA57 mismatch that exercises the bug.
+ */
+ pml5_pa = get_cr3() & PHYSICAL_PAGE_MASK;
+ pml5 = (u64 *)pml5_pa;
+ pml4_pa = pml5[0] & PHYSICAL_PAGE_MASK;
+ vmwrite(GUEST_CR3, pml4_pa);
+
+ guest_cr4 = vmreadz(GUEST_CR4);
+ guest_cr4 &= ~X86_CR4_LA57;
+ vmwrite(GUEST_CR4, guest_cr4);
+
+ GUEST_ASSERT(!vmlaunch());
+
+ exit_reason = vmreadz(VM_EXIT_REASON);
+ GUEST_ASSERT(exit_reason == EXIT_REASON_VMCALL);
+}
+
+void guest_code(struct vmx_pages *vmx_pages)
+{
+ l1_guest_code(vmx_pages);
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ vm_vaddr_t vmx_pages_gva = 0;
+ struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu;
+ struct kvm_x86_state *state;
+ struct ucall uc;
+ int stage;
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_LA57));
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE));
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ /*
+ * L1 needs to read its own PML5 table to set up L2. Identity map
+ * the PML5 table to facilitate this.
+ */
+ virt_map(vm, vm->mmu.pgd, vm->mmu.pgd, 1);
+
+ vcpu_alloc_vmx(vm, &vmx_pages_gva);
+ vcpu_args_set(vcpu, 1, vmx_pages_gva);
+
+ for (stage = 1;; stage++) {
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ /* NOT REACHED */
+ case UCALL_SYNC:
+ break;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+
+ TEST_ASSERT(uc.args[1] == stage,
+ "Expected stage %d, got stage %lu", stage, (ulong)uc.args[1]);
+ if (stage == 1) {
+ pr_info("L2 is active; performing save/restore.\n");
+ state = vcpu_save_state(vcpu);
+
+ kvm_vm_release(vm);
+
+ /* Restore state in a new VM. */
+ vcpu = vm_recreate_with_one_vcpu(vm);
+ vcpu_load_state(vcpu, state);
+ kvm_x86_state_cleanup(state);
+ }
+ }
+
+done:
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/vmx_pmu_caps_test.c b/tools/testing/selftests/kvm/x86/vmx_pmu_caps_test.c
index a1f5ff45d518..7ff6f62e20a3 100644
--- a/tools/testing/selftests/kvm/x86/vmx_pmu_caps_test.c
+++ b/tools/testing/selftests/kvm/x86/vmx_pmu_caps_test.c
@@ -29,7 +29,7 @@ static union perf_capabilities {
u64 pebs_baseline:1;
u64 perf_metrics:1;
u64 pebs_output_pt_available:1;
- u64 anythread_deprecated:1;
+ u64 pebs_timing_info:1;
};
u64 capabilities;
} host_cap;
@@ -44,6 +44,7 @@ static const union perf_capabilities immutable_caps = {
.pebs_arch_reg = 1,
.pebs_format = -1,
.pebs_baseline = 1,
+ .pebs_timing_info = 1,
};
static const union perf_capabilities format_caps = {
@@ -56,8 +57,8 @@ static void guest_test_perf_capabilities_gp(uint64_t val)
uint8_t vector = wrmsr_safe(MSR_IA32_PERF_CAPABILITIES, val);
__GUEST_ASSERT(vector == GP_VECTOR,
- "Expected #GP for value '0x%lx', got vector '0x%x'",
- val, vector);
+ "Expected #GP for value '0x%lx', got %s",
+ val, ex_str(vector));
}
static void guest_code(uint64_t current_val)
diff --git a/tools/testing/selftests/kvm/x86/xapic_ipi_test.c b/tools/testing/selftests/kvm/x86/xapic_ipi_test.c
index 35cb9de54a82..ae4a4b6c05ca 100644
--- a/tools/testing/selftests/kvm/x86/xapic_ipi_test.c
+++ b/tools/testing/selftests/kvm/x86/xapic_ipi_test.c
@@ -256,7 +256,7 @@ void do_migrations(struct test_data_page *data, int run_secs, int delay_usecs,
int nodes = 0;
time_t start_time, last_update, now;
time_t interval_secs = 1;
- int i, r;
+ int i;
int from, to;
unsigned long bit;
uint64_t hlt_count;
@@ -267,9 +267,8 @@ void do_migrations(struct test_data_page *data, int run_secs, int delay_usecs,
delay_usecs);
/* Get set of first 64 numa nodes available */
- r = get_mempolicy(NULL, &nodemask, sizeof(nodemask) * 8,
+ kvm_get_mempolicy(NULL, &nodemask, sizeof(nodemask) * 8,
0, MPOL_F_MEMS_ALLOWED);
- TEST_ASSERT(r == 0, "get_mempolicy failed errno=%d", errno);
fprintf(stderr, "Numa nodes found amongst first %lu possible nodes "
"(each 1-bit indicates node is present): %#lx\n",
diff --git a/tools/testing/selftests/kvm/x86/xapic_state_test.c b/tools/testing/selftests/kvm/x86/xapic_state_test.c
index fdebff1165c7..0c5e12f5f14e 100644
--- a/tools/testing/selftests/kvm/x86/xapic_state_test.c
+++ b/tools/testing/selftests/kvm/x86/xapic_state_test.c
@@ -120,8 +120,8 @@ static void test_icr(struct xapic_vcpu *x)
__test_icr(x, icr | i);
/*
- * Send all flavors of IPIs to non-existent vCPUs. TODO: use number of
- * vCPUs, not vcpu.id + 1. Arbitrarily use vector 0xff.
+ * Send all flavors of IPIs to non-existent vCPUs. Arbitrarily use
+ * vector 0xff.
*/
icr = APIC_INT_ASSERT | 0xff;
for (i = 0; i < 0xff; i++) {
@@ -248,7 +248,7 @@ int main(int argc, char *argv[])
* drops writes, AMD does not). Account for the errata when checking
* that KVM reads back what was written.
*/
- x.has_xavic_errata = host_cpu_is_amd &&
+ x.has_xavic_errata = host_cpu_is_amd_compatible &&
get_kvm_amd_param_bool("avic");
vcpu_clear_cpuid_feature(x.vcpu, X86_FEATURE_X2APIC);
diff --git a/tools/testing/selftests/kvm/x86/xapic_tpr_test.c b/tools/testing/selftests/kvm/x86/xapic_tpr_test.c
new file mode 100644
index 000000000000..3862134d9d40
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/xapic_tpr_test.c
@@ -0,0 +1,276 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <fcntl.h>
+#include <stdatomic.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <unistd.h>
+
+#include "apic.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "test_util.h"
+
+static bool is_x2apic;
+
+#define IRQ_VECTOR 0x20
+
+/* See also the comment at similar assertion in memslot_perf_test.c */
+static_assert(ATOMIC_INT_LOCK_FREE == 2, "atomic int is not lockless");
+
+static atomic_uint tpr_guest_irq_sync_val;
+
+static void tpr_guest_irq_sync_flag_reset(void)
+{
+ atomic_store_explicit(&tpr_guest_irq_sync_val, 0,
+ memory_order_release);
+}
+
+static unsigned int tpr_guest_irq_sync_val_get(void)
+{
+ return atomic_load_explicit(&tpr_guest_irq_sync_val,
+ memory_order_acquire);
+}
+
+static void tpr_guest_irq_sync_val_inc(void)
+{
+ atomic_fetch_add_explicit(&tpr_guest_irq_sync_val, 1,
+ memory_order_acq_rel);
+}
+
+static void tpr_guest_irq_handler_xapic(struct ex_regs *regs)
+{
+ tpr_guest_irq_sync_val_inc();
+
+ xapic_write_reg(APIC_EOI, 0);
+}
+
+static void tpr_guest_irq_handler_x2apic(struct ex_regs *regs)
+{
+ tpr_guest_irq_sync_val_inc();
+
+ x2apic_write_reg(APIC_EOI, 0);
+}
+
+static void tpr_guest_irq_queue(void)
+{
+ if (is_x2apic) {
+ x2apic_write_reg(APIC_SELF_IPI, IRQ_VECTOR);
+ } else {
+ uint32_t icr, icr2;
+
+ icr = APIC_DEST_SELF | APIC_DEST_PHYSICAL | APIC_DM_FIXED |
+ IRQ_VECTOR;
+ icr2 = 0;
+
+ xapic_write_reg(APIC_ICR2, icr2);
+ xapic_write_reg(APIC_ICR, icr);
+ }
+}
+
+static uint8_t tpr_guest_tpr_get(void)
+{
+ uint32_t taskpri;
+
+ if (is_x2apic)
+ taskpri = x2apic_read_reg(APIC_TASKPRI);
+ else
+ taskpri = xapic_read_reg(APIC_TASKPRI);
+
+ return GET_APIC_PRI(taskpri);
+}
+
+static uint8_t tpr_guest_ppr_get(void)
+{
+ uint32_t procpri;
+
+ if (is_x2apic)
+ procpri = x2apic_read_reg(APIC_PROCPRI);
+ else
+ procpri = xapic_read_reg(APIC_PROCPRI);
+
+ return GET_APIC_PRI(procpri);
+}
+
+static uint8_t tpr_guest_cr8_get(void)
+{
+ uint64_t cr8;
+
+ asm volatile ("mov %%cr8, %[cr8]\n\t" : [cr8] "=r"(cr8));
+
+ return cr8 & GENMASK(3, 0);
+}
+
+static void tpr_guest_check_tpr_ppr_cr8_equal(void)
+{
+ uint8_t tpr;
+
+ tpr = tpr_guest_tpr_get();
+
+ GUEST_ASSERT_EQ(tpr_guest_ppr_get(), tpr);
+ GUEST_ASSERT_EQ(tpr_guest_cr8_get(), tpr);
+}
+
+static void tpr_guest_code(void)
+{
+ cli();
+
+ if (is_x2apic)
+ x2apic_enable();
+ else
+ xapic_enable();
+
+ GUEST_ASSERT_EQ(tpr_guest_tpr_get(), 0);
+ tpr_guest_check_tpr_ppr_cr8_equal();
+
+ tpr_guest_irq_queue();
+
+ /* TPR = 0 but IRQ masked by IF=0, should not fire */
+ udelay(1000);
+ GUEST_ASSERT_EQ(tpr_guest_irq_sync_val_get(), 0);
+
+ sti();
+
+ /* IF=1 now, IRQ should fire */
+ while (tpr_guest_irq_sync_val_get() == 0)
+ cpu_relax();
+ GUEST_ASSERT_EQ(tpr_guest_irq_sync_val_get(), 1);
+
+ GUEST_SYNC(true);
+ tpr_guest_check_tpr_ppr_cr8_equal();
+
+ tpr_guest_irq_queue();
+
+ /* IRQ masked by barely high enough TPR now, should not fire */
+ udelay(1000);
+ GUEST_ASSERT_EQ(tpr_guest_irq_sync_val_get(), 1);
+
+ GUEST_SYNC(false);
+ tpr_guest_check_tpr_ppr_cr8_equal();
+
+ /* TPR barely low enough now to unmask IRQ, should fire */
+ while (tpr_guest_irq_sync_val_get() == 1)
+ cpu_relax();
+ GUEST_ASSERT_EQ(tpr_guest_irq_sync_val_get(), 2);
+
+ GUEST_DONE();
+}
+
+static uint8_t lapic_tpr_get(struct kvm_lapic_state *xapic)
+{
+ return GET_APIC_PRI(*((u32 *)&xapic->regs[APIC_TASKPRI]));
+}
+
+static void lapic_tpr_set(struct kvm_lapic_state *xapic, uint8_t val)
+{
+ u32 *taskpri = (u32 *)&xapic->regs[APIC_TASKPRI];
+
+ *taskpri = SET_APIC_PRI(*taskpri, val);
+}
+
+static uint8_t sregs_tpr(struct kvm_sregs *sregs)
+{
+ return sregs->cr8 & GENMASK(3, 0);
+}
+
+static void test_tpr_check_tpr_zero(struct kvm_vcpu *vcpu)
+{
+ struct kvm_lapic_state xapic;
+
+ vcpu_ioctl(vcpu, KVM_GET_LAPIC, &xapic);
+
+ TEST_ASSERT_EQ(lapic_tpr_get(&xapic), 0);
+}
+
+static void test_tpr_check_tpr_cr8_equal(struct kvm_vcpu *vcpu)
+{
+ struct kvm_sregs sregs;
+ struct kvm_lapic_state xapic;
+
+ vcpu_sregs_get(vcpu, &sregs);
+ vcpu_ioctl(vcpu, KVM_GET_LAPIC, &xapic);
+
+ TEST_ASSERT_EQ(sregs_tpr(&sregs), lapic_tpr_get(&xapic));
+}
+
+static void test_tpr_set_tpr_for_irq(struct kvm_vcpu *vcpu, bool mask)
+{
+ struct kvm_lapic_state xapic;
+ uint8_t tpr;
+
+ static_assert(IRQ_VECTOR >= 16, "invalid IRQ vector number");
+ tpr = IRQ_VECTOR / 16;
+ if (!mask)
+ tpr--;
+
+ vcpu_ioctl(vcpu, KVM_GET_LAPIC, &xapic);
+ lapic_tpr_set(&xapic, tpr);
+ vcpu_ioctl(vcpu, KVM_SET_LAPIC, &xapic);
+}
+
+static void test_tpr(bool __is_x2apic)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ bool done = false;
+
+ is_x2apic = __is_x2apic;
+
+ vm = vm_create_with_one_vcpu(&vcpu, tpr_guest_code);
+ if (is_x2apic) {
+ vm_install_exception_handler(vm, IRQ_VECTOR,
+ tpr_guest_irq_handler_x2apic);
+ } else {
+ vm_install_exception_handler(vm, IRQ_VECTOR,
+ tpr_guest_irq_handler_xapic);
+ vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_X2APIC);
+ virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
+ }
+
+ sync_global_to_guest(vcpu->vm, is_x2apic);
+
+ /* According to the SDM/APM the TPR value at reset is 0 */
+ test_tpr_check_tpr_zero(vcpu);
+ test_tpr_check_tpr_cr8_equal(vcpu);
+
+ tpr_guest_irq_sync_flag_reset();
+ sync_global_to_guest(vcpu->vm, tpr_guest_irq_sync_val);
+
+ while (!done) {
+ struct ucall uc;
+
+ alarm(2);
+ vcpu_run(vcpu);
+ alarm(0);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_DONE:
+ test_tpr_check_tpr_cr8_equal(vcpu);
+ done = true;
+ break;
+ case UCALL_SYNC:
+ test_tpr_check_tpr_cr8_equal(vcpu);
+ test_tpr_set_tpr_for_irq(vcpu, uc.args[1]);
+ break;
+ default:
+ TEST_FAIL("Unknown ucall result 0x%lx", uc.cmd);
+ break;
+ }
+ }
+ kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+ /*
+ * Use separate VMs for the xAPIC and x2APIC tests so that x2APIC can
+ * be fully hidden from the guest. KVM disallows changing CPUID after
+ * KVM_RUN and AVIC is disabled if _any_ vCPU is allowed to use x2APIC.
+ */
+ test_tpr(false);
+ test_tpr(true);
+}
diff --git a/tools/testing/selftests/kvm/x86/xcr0_cpuid_test.c b/tools/testing/selftests/kvm/x86/xcr0_cpuid_test.c
index c8a5c5e51661..d038c1571729 100644
--- a/tools/testing/selftests/kvm/x86/xcr0_cpuid_test.c
+++ b/tools/testing/selftests/kvm/x86/xcr0_cpuid_test.c
@@ -81,13 +81,13 @@ static void guest_code(void)
vector = xsetbv_safe(0, XFEATURE_MASK_FP);
__GUEST_ASSERT(!vector,
- "Expected success on XSETBV(FP), got vector '0x%x'",
- vector);
+ "Expected success on XSETBV(FP), got %s",
+ ex_str(vector));
vector = xsetbv_safe(0, supported_xcr0);
__GUEST_ASSERT(!vector,
- "Expected success on XSETBV(0x%lx), got vector '0x%x'",
- supported_xcr0, vector);
+ "Expected success on XSETBV(0x%lx), got %s",
+ supported_xcr0, ex_str(vector));
for (i = 0; i < 64; i++) {
if (supported_xcr0 & BIT_ULL(i))
@@ -95,8 +95,8 @@ static void guest_code(void)
vector = xsetbv_safe(0, supported_xcr0 | BIT_ULL(i));
__GUEST_ASSERT(vector == GP_VECTOR,
- "Expected #GP on XSETBV(0x%llx), supported XCR0 = %lx, got vector '0x%x'",
- BIT_ULL(i), supported_xcr0, vector);
+ "Expected #GP on XSETBV(0x%llx), supported XCR0 = %lx, got %s",
+ BIT_ULL(i), supported_xcr0, ex_str(vector));
}
GUEST_DONE();
diff --git a/tools/testing/selftests/kvm/x86/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86/xen_shinfo_test.c
index 287829f850f7..23909b501ac2 100644
--- a/tools/testing/selftests/kvm/x86/xen_shinfo_test.c
+++ b/tools/testing/selftests/kvm/x86/xen_shinfo_test.c
@@ -547,15 +547,9 @@ int main(int argc, char *argv[])
int irq_fd[2] = { -1, -1 };
if (do_eventfd_tests) {
- irq_fd[0] = eventfd(0, 0);
- irq_fd[1] = eventfd(0, 0);
+ irq_fd[0] = kvm_new_eventfd();
+ irq_fd[1] = kvm_new_eventfd();
- /* Unexpected, but not a KVM failure */
- if (irq_fd[0] == -1 || irq_fd[1] == -1)
- do_evtchn_tests = do_eventfd_tests = false;
- }
-
- if (do_eventfd_tests) {
irq_routes.info.nr = 2;
irq_routes.entries[0].gsi = 32;
@@ -572,15 +566,8 @@ int main(int argc, char *argv[])
vm_ioctl(vm, KVM_SET_GSI_ROUTING, &irq_routes.info);
- struct kvm_irqfd ifd = { };
-
- ifd.fd = irq_fd[0];
- ifd.gsi = 32;
- vm_ioctl(vm, KVM_IRQFD, &ifd);
-
- ifd.fd = irq_fd[1];
- ifd.gsi = 33;
- vm_ioctl(vm, KVM_IRQFD, &ifd);
+ kvm_assign_irqfd(vm, 32, irq_fd[0]);
+ kvm_assign_irqfd(vm, 33, irq_fd[1]);
struct sigaction sa = { };
sa.sa_handler = handle_alrm;