summaryrefslogtreecommitdiff
path: root/tools/testing/selftests/kvm
diff options
context:
space:
mode:
Diffstat (limited to 'tools/testing/selftests/kvm')
-rw-r--r--tools/testing/selftests/kvm/Makefile4
-rw-r--r--tools/testing/selftests/kvm/Makefile.kvm121
-rw-r--r--tools/testing/selftests/kvm/access_tracking_perf_test.c325
-rw-r--r--tools/testing/selftests/kvm/arch_timer.c13
-rw-r--r--tools/testing/selftests/kvm/arm64/aarch32_id_regs.c16
-rw-r--r--tools/testing/selftests/kvm/arm64/arch_timer.c21
-rw-r--r--tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c234
-rw-r--r--tools/testing/selftests/kvm/arm64/at.c156
-rw-r--r--tools/testing/selftests/kvm/arm64/debug-exceptions.c84
-rw-r--r--tools/testing/selftests/kvm/arm64/external_aborts.c415
-rw-r--r--tools/testing/selftests/kvm/arm64/get-reg-list.c306
-rw-r--r--tools/testing/selftests/kvm/arm64/hello_el2.c71
-rw-r--r--tools/testing/selftests/kvm/arm64/host_sve.c127
-rw-r--r--tools/testing/selftests/kvm/arm64/hypercalls.c68
-rw-r--r--tools/testing/selftests/kvm/arm64/idreg-idst.c117
-rw-r--r--tools/testing/selftests/kvm/arm64/kvm-uuid.c70
-rw-r--r--tools/testing/selftests/kvm/arm64/mmio_abort.c159
-rw-r--r--tools/testing/selftests/kvm/arm64/no-vgic-v3.c175
-rw-r--r--tools/testing/selftests/kvm/arm64/no-vgic.c297
-rw-r--r--tools/testing/selftests/kvm/arm64/page_fault_test.c88
-rw-r--r--tools/testing/selftests/kvm/arm64/psci_test.c39
-rw-r--r--tools/testing/selftests/kvm/arm64/sea_to_user.c328
-rw-r--r--tools/testing/selftests/kvm/arm64/set_id_regs.c271
-rw-r--r--tools/testing/selftests/kvm/arm64/smccc_filter.c27
-rw-r--r--tools/testing/selftests/kvm/arm64/vgic_init.c317
-rw-r--r--tools/testing/selftests/kvm/arm64/vgic_irq.c424
-rw-r--r--tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c35
-rw-r--r--tools/testing/selftests/kvm/arm64/vgic_v5.c228
-rw-r--r--tools/testing/selftests/kvm/arm64/vpmu_counter_access.c133
-rw-r--r--tools/testing/selftests/kvm/coalesced_io_test.c38
-rw-r--r--tools/testing/selftests/kvm/config1
-rw-r--r--tools/testing/selftests/kvm/demand_paging_test.c10
-rw-r--r--tools/testing/selftests/kvm/dirty_log_perf_test.c49
-rw-r--r--tools/testing/selftests/kvm/dirty_log_test.c593
-rw-r--r--tools/testing/selftests/kvm/get-reg-list.c11
-rw-r--r--tools/testing/selftests/kvm/guest_memfd_test.c438
-rw-r--r--tools/testing/selftests/kvm/guest_print_test.c22
-rw-r--r--tools/testing/selftests/kvm/hardware_disable_test.c6
-rw-r--r--tools/testing/selftests/kvm/include/arm64/arch_timer.h54
-rw-r--r--tools/testing/selftests/kvm/include/arm64/delay.h4
-rw-r--r--tools/testing/selftests/kvm/include/arm64/gic.h9
-rw-r--r--tools/testing/selftests/kvm/include/arm64/gic_v3_its.h8
-rw-r--r--tools/testing/selftests/kvm/include/arm64/gic_v5.h150
-rw-r--r--tools/testing/selftests/kvm/include/arm64/kvm_util_arch.h7
-rw-r--r--tools/testing/selftests/kvm/include/arm64/processor.h185
-rw-r--r--tools/testing/selftests/kvm/include/arm64/ucall.h4
-rw-r--r--tools/testing/selftests/kvm/include/arm64/vgic.h23
-rw-r--r--tools/testing/selftests/kvm/include/kvm_syscalls.h92
-rw-r--r--tools/testing/selftests/kvm/include/kvm_util.h548
-rw-r--r--tools/testing/selftests/kvm/include/kvm_util_types.h8
-rw-r--r--tools/testing/selftests/kvm/include/loongarch/arch_timer.h85
-rw-r--r--tools/testing/selftests/kvm/include/loongarch/kvm_util_arch.h8
-rw-r--r--tools/testing/selftests/kvm/include/loongarch/pmu.h66
-rw-r--r--tools/testing/selftests/kvm/include/loongarch/processor.h235
-rw-r--r--tools/testing/selftests/kvm/include/loongarch/ucall.h20
-rw-r--r--tools/testing/selftests/kvm/include/lru_gen_util.h51
-rw-r--r--tools/testing/selftests/kvm/include/memstress.h30
-rw-r--r--tools/testing/selftests/kvm/include/numaif.h110
-rw-r--r--tools/testing/selftests/kvm/include/riscv/arch_timer.h22
-rw-r--r--tools/testing/selftests/kvm/include/riscv/kvm_util_arch.h1
-rw-r--r--tools/testing/selftests/kvm/include/riscv/processor.h35
-rw-r--r--tools/testing/selftests/kvm/include/riscv/sbi.h37
-rw-r--r--tools/testing/selftests/kvm/include/riscv/ucall.h4
-rw-r--r--tools/testing/selftests/kvm/include/s390/diag318_test_handler.h2
-rw-r--r--tools/testing/selftests/kvm/include/s390/facility.h4
-rw-r--r--tools/testing/selftests/kvm/include/s390/kvm_util_arch.h1
-rw-r--r--tools/testing/selftests/kvm/include/s390/ucall.h4
-rw-r--r--tools/testing/selftests/kvm/include/sparsebit.h6
-rw-r--r--tools/testing/selftests/kvm/include/test_util.h64
-rw-r--r--tools/testing/selftests/kvm/include/timer_test.h18
-rw-r--r--tools/testing/selftests/kvm/include/ucall_common.h22
-rw-r--r--tools/testing/selftests/kvm/include/userfaultfd_util.h6
-rw-r--r--tools/testing/selftests/kvm/include/x86/apic.h29
-rw-r--r--tools/testing/selftests/kvm/include/x86/evmcs.h22
-rw-r--r--tools/testing/selftests/kvm/include/x86/hyperv.h28
-rw-r--r--tools/testing/selftests/kvm/include/x86/kvm_util_arch.h34
-rw-r--r--tools/testing/selftests/kvm/include/x86/pmu.h33
-rw-r--r--tools/testing/selftests/kvm/include/x86/processor.h461
-rw-r--r--tools/testing/selftests/kvm/include/x86/sev.h65
-rw-r--r--tools/testing/selftests/kvm/include/x86/smm.h16
-rw-r--r--tools/testing/selftests/kvm/include/x86/svm.h17
-rw-r--r--tools/testing/selftests/kvm/include/x86/svm_util.h19
-rw-r--r--tools/testing/selftests/kvm/include/x86/ucall.h2
-rw-r--r--tools/testing/selftests/kvm/include/x86/vmx.h85
-rw-r--r--tools/testing/selftests/kvm/irqfd_test.c143
-rw-r--r--tools/testing/selftests/kvm/kvm_binary_stats_test.c4
-rw-r--r--tools/testing/selftests/kvm/kvm_create_max_vcpus.c28
-rw-r--r--tools/testing/selftests/kvm/kvm_page_table_test.c57
-rw-r--r--tools/testing/selftests/kvm/lib/arm64/gic.c12
-rw-r--r--tools/testing/selftests/kvm/lib/arm64/gic_private.h25
-rw-r--r--tools/testing/selftests/kvm/lib/arm64/gic_v3.c106
-rw-r--r--tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c30
-rw-r--r--tools/testing/selftests/kvm/lib/arm64/processor.c373
-rw-r--r--tools/testing/selftests/kvm/lib/arm64/ucall.c12
-rw-r--r--tools/testing/selftests/kvm/lib/arm64/vgic.c100
-rw-r--r--tools/testing/selftests/kvm/lib/assert.c8
-rw-r--r--tools/testing/selftests/kvm/lib/elf.c19
-rw-r--r--tools/testing/selftests/kvm/lib/guest_modes.c43
-rw-r--r--tools/testing/selftests/kvm/lib/guest_sprintf.c18
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util.c846
-rw-r--r--tools/testing/selftests/kvm/lib/loongarch/exception.S65
-rw-r--r--tools/testing/selftests/kvm/lib/loongarch/processor.c402
-rw-r--r--tools/testing/selftests/kvm/lib/loongarch/ucall.c38
-rw-r--r--tools/testing/selftests/kvm/lib/lru_gen_util.c387
-rw-r--r--tools/testing/selftests/kvm/lib/memstress.c44
-rw-r--r--tools/testing/selftests/kvm/lib/riscv/handlers.S139
-rw-r--r--tools/testing/selftests/kvm/lib/riscv/processor.c193
-rw-r--r--tools/testing/selftests/kvm/lib/s390/diag318_test_handler.c12
-rw-r--r--tools/testing/selftests/kvm/lib/s390/facility.c2
-rw-r--r--tools/testing/selftests/kvm/lib/s390/processor.c84
-rw-r--r--tools/testing/selftests/kvm/lib/sparsebit.c22
-rw-r--r--tools/testing/selftests/kvm/lib/test_util.c79
-rw-r--r--tools/testing/selftests/kvm/lib/ucall_common.c34
-rw-r--r--tools/testing/selftests/kvm/lib/userfaultfd_util.c16
-rw-r--r--tools/testing/selftests/kvm/lib/x86/apic.c2
-rw-r--r--tools/testing/selftests/kvm/lib/x86/hyperv.c14
-rw-r--r--tools/testing/selftests/kvm/lib/x86/memstress.c71
-rw-r--r--tools/testing/selftests/kvm/lib/x86/pmu.c53
-rw-r--r--tools/testing/selftests/kvm/lib/x86/processor.c586
-rw-r--r--tools/testing/selftests/kvm/lib/x86/sev.c88
-rw-r--r--tools/testing/selftests/kvm/lib/x86/svm.c43
-rw-r--r--tools/testing/selftests/kvm/lib/x86/ucall.c4
-rw-r--r--tools/testing/selftests/kvm/lib/x86/vmx.c296
-rw-r--r--tools/testing/selftests/kvm/loongarch/arch_timer.c200
-rw-r--r--tools/testing/selftests/kvm/loongarch/pmu_test.c205
-rw-r--r--tools/testing/selftests/kvm/memslot_modification_stress_test.c11
-rw-r--r--tools/testing/selftests/kvm/memslot_perf_test.c167
-rw-r--r--tools/testing/selftests/kvm/mmu_stress_test.c43
-rw-r--r--tools/testing/selftests/kvm/pre_fault_memory_test.c163
-rw-r--r--tools/testing/selftests/kvm/riscv/arch_timer.c10
-rw-r--r--tools/testing/selftests/kvm/riscv/ebreak_test.c8
-rw-r--r--tools/testing/selftests/kvm/riscv/get-reg-list.c218
-rw-r--r--tools/testing/selftests/kvm/riscv/sbi_pmu_test.c133
-rw-r--r--tools/testing/selftests/kvm/rseq_test.c32
-rw-r--r--tools/testing/selftests/kvm/s390/cmma_test.c2
-rw-r--r--tools/testing/selftests/kvm/s390/cpumodel_subfuncs_test.c2
-rw-r--r--tools/testing/selftests/kvm/s390/debug_test.c8
-rw-r--r--tools/testing/selftests/kvm/s390/irq_routing.c75
-rw-r--r--tools/testing/selftests/kvm/s390/keyop.c299
-rw-r--r--tools/testing/selftests/kvm/s390/memop.c94
-rw-r--r--tools/testing/selftests/kvm/s390/resets.c6
-rw-r--r--tools/testing/selftests/kvm/s390/shared_zeropage_test.c5
-rw-r--r--tools/testing/selftests/kvm/s390/tprot.c26
-rw-r--r--tools/testing/selftests/kvm/s390/ucontrol_test.c24
-rw-r--r--tools/testing/selftests/kvm/s390/user_operexec.c140
-rw-r--r--tools/testing/selftests/kvm/set_memory_region_test.c70
-rw-r--r--tools/testing/selftests/kvm/steal_time.c255
-rw-r--r--tools/testing/selftests/kvm/system_counter_offset_test.c12
-rw-r--r--tools/testing/selftests/kvm/x86/amx_test.c158
-rw-r--r--tools/testing/selftests/kvm/x86/aperfmperf_test.c213
-rw-r--r--tools/testing/selftests/kvm/x86/apic_bus_clock_test.c28
-rw-r--r--tools/testing/selftests/kvm/x86/cpuid_test.c21
-rw-r--r--tools/testing/selftests/kvm/x86/debug_regs.c4
-rw-r--r--tools/testing/selftests/kvm/x86/dirty_log_page_splitting_test.c22
-rw-r--r--tools/testing/selftests/kvm/x86/evmcs_smm_controls_test.c150
-rw-r--r--tools/testing/selftests/kvm/x86/fastops_test.c209
-rw-r--r--tools/testing/selftests/kvm/x86/feature_msrs_test.c12
-rw-r--r--tools/testing/selftests/kvm/x86/fix_hypercall_test.c22
-rw-r--r--tools/testing/selftests/kvm/x86/flds_emulation.h6
-rw-r--r--tools/testing/selftests/kvm/x86/hwcr_msr_test.c10
-rw-r--r--tools/testing/selftests/kvm/x86/hyperv_clock.c6
-rw-r--r--tools/testing/selftests/kvm/x86/hyperv_cpuid.c23
-rw-r--r--tools/testing/selftests/kvm/x86/hyperv_evmcs.c10
-rw-r--r--tools/testing/selftests/kvm/x86/hyperv_extended_hypercalls.c20
-rw-r--r--tools/testing/selftests/kvm/x86/hyperv_features.c44
-rw-r--r--tools/testing/selftests/kvm/x86/hyperv_ipi.c36
-rw-r--r--tools/testing/selftests/kvm/x86/hyperv_svm_test.c10
-rw-r--r--tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c40
-rw-r--r--tools/testing/selftests/kvm/x86/kvm_buslock_test.c135
-rw-r--r--tools/testing/selftests/kvm/x86/kvm_clock_test.c14
-rw-r--r--tools/testing/selftests/kvm/x86/kvm_pv_test.c10
-rw-r--r--tools/testing/selftests/kvm/x86/monitor_mwait_test.c117
-rw-r--r--tools/testing/selftests/kvm/x86/msrs_test.c489
-rw-r--r--tools/testing/selftests/kvm/x86/nested_close_kvm_test.c (renamed from tools/testing/selftests/kvm/x86/vmx_close_while_nested_test.c)42
-rw-r--r--tools/testing/selftests/kvm/x86/nested_dirty_log_test.c293
-rw-r--r--tools/testing/selftests/kvm/x86/nested_emulation_test.c146
-rw-r--r--tools/testing/selftests/kvm/x86/nested_exceptions_test.c6
-rw-r--r--tools/testing/selftests/kvm/x86/nested_invalid_cr3_test.c116
-rw-r--r--tools/testing/selftests/kvm/x86/nested_set_state_test.c (renamed from tools/testing/selftests/kvm/x86/vmx_set_nested_state_test.c)128
-rw-r--r--tools/testing/selftests/kvm/x86/nested_tsc_adjust_test.c (renamed from tools/testing/selftests/kvm/x86/vmx_tsc_adjust_test.c)87
-rw-r--r--tools/testing/selftests/kvm/x86/nested_tsc_scaling_test.c (renamed from tools/testing/selftests/kvm/x86/vmx_nested_tsc_scaling_test.c)70
-rw-r--r--tools/testing/selftests/kvm/x86/nested_vmsave_vmload_test.c197
-rw-r--r--tools/testing/selftests/kvm/x86/nx_huge_pages_test.c22
-rw-r--r--tools/testing/selftests/kvm/x86/platform_info_test.c6
-rw-r--r--tools/testing/selftests/kvm/x86/pmu_counters_test.c322
-rw-r--r--tools/testing/selftests/kvm/x86/pmu_event_filter_test.c109
-rw-r--r--tools/testing/selftests/kvm/x86/private_mem_conversions_test.c87
-rw-r--r--tools/testing/selftests/kvm/x86/private_mem_kvm_exits_test.c14
-rw-r--r--tools/testing/selftests/kvm/x86/set_boot_cpu_id.c6
-rw-r--r--tools/testing/selftests/kvm/x86/set_sregs_test.c6
-rw-r--r--tools/testing/selftests/kvm/x86/sev_init2_tests.c19
-rw-r--r--tools/testing/selftests/kvm/x86/sev_migrate_tests.c2
-rw-r--r--tools/testing/selftests/kvm/x86/sev_smoke_test.c113
-rw-r--r--tools/testing/selftests/kvm/x86/smaller_maxphyaddr_emulation_test.c12
-rw-r--r--tools/testing/selftests/kvm/x86/smm_test.c35
-rw-r--r--tools/testing/selftests/kvm/x86/state_test.c49
-rw-r--r--tools/testing/selftests/kvm/x86/svm_int_ctl_test.c7
-rw-r--r--tools/testing/selftests/kvm/x86/svm_lbr_nested_state.c145
-rw-r--r--tools/testing/selftests/kvm/x86/svm_nested_clear_efer_svme.c55
-rw-r--r--tools/testing/selftests/kvm/x86/svm_nested_shutdown_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86/svm_nested_soft_inject_test.c14
-rw-r--r--tools/testing/selftests/kvm/x86/svm_nested_vmcb12_gpa.c176
-rw-r--r--tools/testing/selftests/kvm/x86/svm_vmcall_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86/sync_regs_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86/triple_fault_event_test.c4
-rw-r--r--tools/testing/selftests/kvm/x86/tsc_msrs_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86/tsc_scaling_sync.c4
-rw-r--r--tools/testing/selftests/kvm/x86/ucna_injection_test.c47
-rw-r--r--tools/testing/selftests/kvm/x86/userspace_io_test.c6
-rw-r--r--tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c66
-rw-r--r--tools/testing/selftests/kvm/x86/vmx_apic_access_test.c4
-rw-r--r--tools/testing/selftests/kvm/x86/vmx_apicv_updates_test.c155
-rw-r--r--tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c179
-rw-r--r--tools/testing/selftests/kvm/x86/vmx_exception_with_invalid_guest_state.c2
-rw-r--r--tools/testing/selftests/kvm/x86/vmx_invalid_nested_guest_state.c2
-rw-r--r--tools/testing/selftests/kvm/x86/vmx_msrs_test.c22
-rw-r--r--tools/testing/selftests/kvm/x86/vmx_nested_la57_state_test.c132
-rw-r--r--tools/testing/selftests/kvm/x86/vmx_pmu_caps_test.c19
-rw-r--r--tools/testing/selftests/kvm/x86/vmx_preemption_timer_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86/xapic_ipi_test.c85
-rw-r--r--tools/testing/selftests/kvm/x86/xapic_state_test.c30
-rw-r--r--tools/testing/selftests/kvm/x86/xapic_tpr_test.c276
-rw-r--r--tools/testing/selftests/kvm/x86/xcr0_cpuid_test.c20
-rw-r--r--tools/testing/selftests/kvm/x86/xen_shinfo_test.c48
-rw-r--r--tools/testing/selftests/kvm/x86/xss_msr_test.c2
225 files changed, 15417 insertions, 5066 deletions
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 20af35a91d6f..f2b223072b62 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -3,10 +3,10 @@ top_srcdir = ../../../..
include $(top_srcdir)/scripts/subarch.include
ARCH ?= $(SUBARCH)
-ifeq ($(ARCH),$(filter $(ARCH),arm64 s390 riscv x86 x86_64))
+ifeq ($(ARCH),$(filter $(ARCH),arm64 s390 riscv x86 x86_64 loongarch))
# Top-level selftests allows ARCH=x86_64 :-(
ifeq ($(ARCH),x86_64)
- ARCH := x86
+ override ARCH := x86
endif
include Makefile.kvm
else
diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm
index 4277b983cace..9118a5a51b89 100644
--- a/tools/testing/selftests/kvm/Makefile.kvm
+++ b/tools/testing/selftests/kvm/Makefile.kvm
@@ -8,6 +8,7 @@ LIBKVM += lib/elf.c
LIBKVM += lib/guest_modes.c
LIBKVM += lib/io.c
LIBKVM += lib/kvm_util.c
+LIBKVM += lib/lru_gen_util.c
LIBKVM += lib/memstress.c
LIBKVM += lib/guest_sprintf.c
LIBKVM += lib/rbtree.c
@@ -47,15 +48,34 @@ LIBKVM_riscv += lib/riscv/handlers.S
LIBKVM_riscv += lib/riscv/processor.c
LIBKVM_riscv += lib/riscv/ucall.c
+LIBKVM_loongarch += lib/loongarch/processor.c
+LIBKVM_loongarch += lib/loongarch/ucall.c
+LIBKVM_loongarch += lib/loongarch/exception.S
+
# Non-compiled test targets
TEST_PROGS_x86 += x86/nx_huge_pages_test.sh
+# Compiled test targets valid on all architectures with libkvm support
+TEST_GEN_PROGS_COMMON = demand_paging_test
+TEST_GEN_PROGS_COMMON += dirty_log_test
+TEST_GEN_PROGS_COMMON += guest_print_test
+TEST_GEN_PROGS_COMMON += irqfd_test
+TEST_GEN_PROGS_COMMON += kvm_binary_stats_test
+TEST_GEN_PROGS_COMMON += kvm_create_max_vcpus
+TEST_GEN_PROGS_COMMON += kvm_page_table_test
+TEST_GEN_PROGS_COMMON += set_memory_region_test
+TEST_GEN_PROGS_COMMON += memslot_modification_stress_test
+TEST_GEN_PROGS_COMMON += memslot_perf_test
+
# Compiled test targets
-TEST_GEN_PROGS_x86 = x86/cpuid_test
+TEST_GEN_PROGS_x86 = $(TEST_GEN_PROGS_COMMON)
+TEST_GEN_PROGS_x86 += x86/cpuid_test
TEST_GEN_PROGS_x86 += x86/cr4_cpuid_sync_test
TEST_GEN_PROGS_x86 += x86/dirty_log_page_splitting_test
TEST_GEN_PROGS_x86 += x86/feature_msrs_test
+TEST_GEN_PROGS_x86 += x86/evmcs_smm_controls_test
TEST_GEN_PROGS_x86 += x86/exit_on_emulation_failure_test
+TEST_GEN_PROGS_x86 += x86/fastops_test
TEST_GEN_PROGS_x86 += x86/fix_hypercall_test
TEST_GEN_PROGS_x86 += x86/hwcr_msr_test
TEST_GEN_PROGS_x86 += x86/hyperv_clock
@@ -68,8 +88,18 @@ TEST_GEN_PROGS_x86 += x86/hyperv_svm_test
TEST_GEN_PROGS_x86 += x86/hyperv_tlb_flush
TEST_GEN_PROGS_x86 += x86/kvm_clock_test
TEST_GEN_PROGS_x86 += x86/kvm_pv_test
+TEST_GEN_PROGS_x86 += x86/kvm_buslock_test
TEST_GEN_PROGS_x86 += x86/monitor_mwait_test
+TEST_GEN_PROGS_x86 += x86/msrs_test
+TEST_GEN_PROGS_x86 += x86/nested_close_kvm_test
+TEST_GEN_PROGS_x86 += x86/nested_dirty_log_test
+TEST_GEN_PROGS_x86 += x86/nested_emulation_test
TEST_GEN_PROGS_x86 += x86/nested_exceptions_test
+TEST_GEN_PROGS_x86 += x86/nested_invalid_cr3_test
+TEST_GEN_PROGS_x86 += x86/nested_set_state_test
+TEST_GEN_PROGS_x86 += x86/nested_tsc_adjust_test
+TEST_GEN_PROGS_x86 += x86/nested_tsc_scaling_test
+TEST_GEN_PROGS_x86 += x86/nested_vmsave_vmload_test
TEST_GEN_PROGS_x86 += x86/platform_info_test
TEST_GEN_PROGS_x86 += x86/pmu_counters_test
TEST_GEN_PROGS_x86 += x86/pmu_event_filter_test
@@ -83,25 +113,26 @@ TEST_GEN_PROGS_x86 += x86/state_test
TEST_GEN_PROGS_x86 += x86/vmx_preemption_timer_test
TEST_GEN_PROGS_x86 += x86/svm_vmcall_test
TEST_GEN_PROGS_x86 += x86/svm_int_ctl_test
+TEST_GEN_PROGS_x86 += x86/svm_nested_clear_efer_svme
TEST_GEN_PROGS_x86 += x86/svm_nested_shutdown_test
TEST_GEN_PROGS_x86 += x86/svm_nested_soft_inject_test
+TEST_GEN_PROGS_x86 += x86/svm_nested_vmcb12_gpa
+TEST_GEN_PROGS_x86 += x86/svm_lbr_nested_state
TEST_GEN_PROGS_x86 += x86/tsc_scaling_sync
TEST_GEN_PROGS_x86 += x86/sync_regs_test
TEST_GEN_PROGS_x86 += x86/ucna_injection_test
TEST_GEN_PROGS_x86 += x86/userspace_io_test
TEST_GEN_PROGS_x86 += x86/userspace_msr_exit_test
TEST_GEN_PROGS_x86 += x86/vmx_apic_access_test
-TEST_GEN_PROGS_x86 += x86/vmx_close_while_nested_test
-TEST_GEN_PROGS_x86 += x86/vmx_dirty_log_test
+TEST_GEN_PROGS_x86 += x86/vmx_apicv_updates_test
TEST_GEN_PROGS_x86 += x86/vmx_exception_with_invalid_guest_state
TEST_GEN_PROGS_x86 += x86/vmx_msrs_test
TEST_GEN_PROGS_x86 += x86/vmx_invalid_nested_guest_state
-TEST_GEN_PROGS_x86 += x86/vmx_set_nested_state_test
-TEST_GEN_PROGS_x86 += x86/vmx_tsc_adjust_test
-TEST_GEN_PROGS_x86 += x86/vmx_nested_tsc_scaling_test
+TEST_GEN_PROGS_x86 += x86/vmx_nested_la57_state_test
TEST_GEN_PROGS_x86 += x86/apic_bus_clock_test
TEST_GEN_PROGS_x86 += x86/xapic_ipi_test
TEST_GEN_PROGS_x86 += x86/xapic_state_test
+TEST_GEN_PROGS_x86 += x86/xapic_tpr_test
TEST_GEN_PROGS_x86 += x86/xcr0_cpuid_test
TEST_GEN_PROGS_x86 += x86/xss_msr_test
TEST_GEN_PROGS_x86 += x86/debug_regs
@@ -116,63 +147,56 @@ TEST_GEN_PROGS_x86 += x86/amx_test
TEST_GEN_PROGS_x86 += x86/max_vcpuid_cap_test
TEST_GEN_PROGS_x86 += x86/triple_fault_event_test
TEST_GEN_PROGS_x86 += x86/recalc_apic_map_test
+TEST_GEN_PROGS_x86 += x86/aperfmperf_test
TEST_GEN_PROGS_x86 += access_tracking_perf_test
TEST_GEN_PROGS_x86 += coalesced_io_test
-TEST_GEN_PROGS_x86 += demand_paging_test
-TEST_GEN_PROGS_x86 += dirty_log_test
TEST_GEN_PROGS_x86 += dirty_log_perf_test
TEST_GEN_PROGS_x86 += guest_memfd_test
-TEST_GEN_PROGS_x86 += guest_print_test
TEST_GEN_PROGS_x86 += hardware_disable_test
-TEST_GEN_PROGS_x86 += kvm_create_max_vcpus
-TEST_GEN_PROGS_x86 += kvm_page_table_test
-TEST_GEN_PROGS_x86 += memslot_modification_stress_test
-TEST_GEN_PROGS_x86 += memslot_perf_test
TEST_GEN_PROGS_x86 += mmu_stress_test
TEST_GEN_PROGS_x86 += rseq_test
-TEST_GEN_PROGS_x86 += set_memory_region_test
TEST_GEN_PROGS_x86 += steal_time
-TEST_GEN_PROGS_x86 += kvm_binary_stats_test
TEST_GEN_PROGS_x86 += system_counter_offset_test
TEST_GEN_PROGS_x86 += pre_fault_memory_test
# Compiled outputs used by test targets
TEST_GEN_PROGS_EXTENDED_x86 += x86/nx_huge_pages_test
+TEST_GEN_PROGS_arm64 = $(TEST_GEN_PROGS_COMMON)
TEST_GEN_PROGS_arm64 += arm64/aarch32_id_regs
TEST_GEN_PROGS_arm64 += arm64/arch_timer_edge_cases
+TEST_GEN_PROGS_arm64 += arm64/at
TEST_GEN_PROGS_arm64 += arm64/debug-exceptions
+TEST_GEN_PROGS_arm64 += arm64/hello_el2
+TEST_GEN_PROGS_arm64 += arm64/host_sve
TEST_GEN_PROGS_arm64 += arm64/hypercalls
-TEST_GEN_PROGS_arm64 += arm64/mmio_abort
+TEST_GEN_PROGS_arm64 += arm64/external_aborts
TEST_GEN_PROGS_arm64 += arm64/page_fault_test
TEST_GEN_PROGS_arm64 += arm64/psci_test
+TEST_GEN_PROGS_arm64 += arm64/sea_to_user
TEST_GEN_PROGS_arm64 += arm64/set_id_regs
TEST_GEN_PROGS_arm64 += arm64/smccc_filter
TEST_GEN_PROGS_arm64 += arm64/vcpu_width_config
TEST_GEN_PROGS_arm64 += arm64/vgic_init
TEST_GEN_PROGS_arm64 += arm64/vgic_irq
TEST_GEN_PROGS_arm64 += arm64/vgic_lpi_stress
+TEST_GEN_PROGS_arm64 += arm64/vgic_v5
TEST_GEN_PROGS_arm64 += arm64/vpmu_counter_access
-TEST_GEN_PROGS_arm64 += arm64/no-vgic-v3
+TEST_GEN_PROGS_arm64 += arm64/no-vgic
+TEST_GEN_PROGS_arm64 += arm64/idreg-idst
+TEST_GEN_PROGS_arm64 += arm64/kvm-uuid
TEST_GEN_PROGS_arm64 += access_tracking_perf_test
TEST_GEN_PROGS_arm64 += arch_timer
TEST_GEN_PROGS_arm64 += coalesced_io_test
-TEST_GEN_PROGS_arm64 += demand_paging_test
-TEST_GEN_PROGS_arm64 += dirty_log_test
TEST_GEN_PROGS_arm64 += dirty_log_perf_test
-TEST_GEN_PROGS_arm64 += guest_print_test
TEST_GEN_PROGS_arm64 += get-reg-list
-TEST_GEN_PROGS_arm64 += kvm_create_max_vcpus
-TEST_GEN_PROGS_arm64 += kvm_page_table_test
-TEST_GEN_PROGS_arm64 += memslot_modification_stress_test
-TEST_GEN_PROGS_arm64 += memslot_perf_test
+TEST_GEN_PROGS_arm64 += guest_memfd_test
TEST_GEN_PROGS_arm64 += mmu_stress_test
TEST_GEN_PROGS_arm64 += rseq_test
-TEST_GEN_PROGS_arm64 += set_memory_region_test
TEST_GEN_PROGS_arm64 += steal_time
-TEST_GEN_PROGS_arm64 += kvm_binary_stats_test
-TEST_GEN_PROGS_s390 = s390/memop
+TEST_GEN_PROGS_s390 = $(TEST_GEN_PROGS_COMMON)
+TEST_GEN_PROGS_s390 += s390/memop
TEST_GEN_PROGS_s390 += s390/resets
TEST_GEN_PROGS_s390 += s390/sync_regs_test
TEST_GEN_PROGS_s390 += s390/tprot
@@ -181,29 +205,40 @@ TEST_GEN_PROGS_s390 += s390/debug_test
TEST_GEN_PROGS_s390 += s390/cpumodel_subfuncs_test
TEST_GEN_PROGS_s390 += s390/shared_zeropage_test
TEST_GEN_PROGS_s390 += s390/ucontrol_test
-TEST_GEN_PROGS_s390 += demand_paging_test
-TEST_GEN_PROGS_s390 += dirty_log_test
-TEST_GEN_PROGS_s390 += guest_print_test
-TEST_GEN_PROGS_s390 += kvm_create_max_vcpus
-TEST_GEN_PROGS_s390 += kvm_page_table_test
+TEST_GEN_PROGS_s390 += s390/user_operexec
+TEST_GEN_PROGS_s390 += s390/keyop
TEST_GEN_PROGS_s390 += rseq_test
-TEST_GEN_PROGS_s390 += set_memory_region_test
-TEST_GEN_PROGS_s390 += kvm_binary_stats_test
+TEST_GEN_PROGS_s390 += s390/irq_routing
+TEST_GEN_PROGS_s390 += mmu_stress_test
+TEST_GEN_PROGS_riscv = $(TEST_GEN_PROGS_COMMON)
TEST_GEN_PROGS_riscv += riscv/sbi_pmu_test
TEST_GEN_PROGS_riscv += riscv/ebreak_test
+TEST_GEN_PROGS_riscv += access_tracking_perf_test
TEST_GEN_PROGS_riscv += arch_timer
TEST_GEN_PROGS_riscv += coalesced_io_test
-TEST_GEN_PROGS_riscv += demand_paging_test
-TEST_GEN_PROGS_riscv += dirty_log_test
+TEST_GEN_PROGS_riscv += dirty_log_perf_test
TEST_GEN_PROGS_riscv += get-reg-list
-TEST_GEN_PROGS_riscv += guest_print_test
-TEST_GEN_PROGS_riscv += kvm_binary_stats_test
-TEST_GEN_PROGS_riscv += kvm_create_max_vcpus
-TEST_GEN_PROGS_riscv += kvm_page_table_test
-TEST_GEN_PROGS_riscv += set_memory_region_test
+TEST_GEN_PROGS_riscv += mmu_stress_test
+TEST_GEN_PROGS_riscv += rseq_test
TEST_GEN_PROGS_riscv += steal_time
+TEST_GEN_PROGS_loongarch = loongarch/pmu_test
+TEST_GEN_PROGS_loongarch += arch_timer
+TEST_GEN_PROGS_loongarch += coalesced_io_test
+TEST_GEN_PROGS_loongarch += demand_paging_test
+TEST_GEN_PROGS_loongarch += dirty_log_perf_test
+TEST_GEN_PROGS_loongarch += dirty_log_test
+TEST_GEN_PROGS_loongarch += guest_print_test
+TEST_GEN_PROGS_loongarch += hardware_disable_test
+TEST_GEN_PROGS_loongarch += kvm_binary_stats_test
+TEST_GEN_PROGS_loongarch += kvm_create_max_vcpus
+TEST_GEN_PROGS_loongarch += kvm_page_table_test
+TEST_GEN_PROGS_loongarch += memslot_modification_stress_test
+TEST_GEN_PROGS_loongarch += memslot_perf_test
+TEST_GEN_PROGS_loongarch += set_memory_region_test
+TEST_GEN_PROGS_loongarch += steal_time
+
SPLIT_TESTS += arch_timer
SPLIT_TESTS += get-reg-list
@@ -218,6 +253,7 @@ OVERRIDE_TARGETS = 1
# importantly defines, i.e. overwrites, $(CC) (unless `make -e` or `make CC=`,
# which causes the environment variable to override the makefile).
include ../lib.mk
+include ../cgroup/lib/libcgroup.mk
INSTALL_HDR_PATH = $(top_srcdir)/usr
LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/
@@ -225,6 +261,7 @@ LINUX_TOOL_INCLUDE = $(top_srcdir)/tools/include
LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/$(ARCH)/include
CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \
-Wno-gnu-variable-sized-type-not-at-end -MD -MP -DCONFIG_64BIT \
+ -U_FORTIFY_SOURCE \
-fno-builtin-memcmp -fno-builtin-memcpy \
-fno-builtin-memset -fno-builtin-strnlen \
-fno-stack-protector -fno-PIE -fno-strict-aliasing \
@@ -271,7 +308,7 @@ LIBKVM_S := $(filter %.S,$(LIBKVM))
LIBKVM_C_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_C))
LIBKVM_S_OBJ := $(patsubst %.S, $(OUTPUT)/%.o, $(LIBKVM_S))
LIBKVM_STRING_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_STRING))
-LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ) $(LIBKVM_STRING_OBJ)
+LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ) $(LIBKVM_STRING_OBJ) $(LIBCGROUP_O)
SPLIT_TEST_GEN_PROGS := $(patsubst %, $(OUTPUT)/%, $(SPLIT_TESTS))
SPLIT_TEST_GEN_OBJ := $(patsubst %, $(OUTPUT)/$(ARCH)/%.o, $(SPLIT_TESTS))
diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c
index 3c7defd34f56..4415c94b2866 100644
--- a/tools/testing/selftests/kvm/access_tracking_perf_test.c
+++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c
@@ -7,9 +7,11 @@
* This test measures the performance effects of KVM's access tracking.
* Access tracking is driven by the MMU notifiers test_young, clear_young, and
* clear_flush_young. These notifiers do not have a direct userspace API,
- * however the clear_young notifier can be triggered by marking a pages as idle
- * in /sys/kernel/mm/page_idle/bitmap. This test leverages that mechanism to
- * enable access tracking on guest memory.
+ * however the clear_young notifier can be triggered either by
+ * 1. marking a pages as idle in /sys/kernel/mm/page_idle/bitmap OR
+ * 2. adding a new MGLRU generation using the lru_gen debugfs file.
+ * This test leverages page_idle to enable access tracking on guest memory
+ * unless MGLRU is enabled, in which case MGLRU is used.
*
* To measure performance this test runs a VM with a configurable number of
* vCPUs that each touch every page in disjoint regions of memory. Performance
@@ -17,10 +19,11 @@
* predefined region.
*
* Note that a deterministic correctness test of access tracking is not possible
- * by using page_idle as it exists today. This is for a few reasons:
+ * by using page_idle or MGLRU aging as it exists today. This is for a few
+ * reasons:
*
- * 1. page_idle only issues clear_young notifiers, which lack a TLB flush. This
- * means subsequent guest accesses are not guaranteed to see page table
+ * 1. page_idle and MGLRU only issue clear_young notifiers, which lack a TLB flush.
+ * This means subsequent guest accesses are not guaranteed to see page table
* updates made by KVM until some time in the future.
*
* 2. page_idle only operates on LRU pages. Newly allocated pages are not
@@ -38,19 +41,28 @@
#include <inttypes.h>
#include <limits.h>
#include <pthread.h>
-#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
+#include "kvm_syscalls.h"
#include "kvm_util.h"
#include "test_util.h"
#include "memstress.h"
#include "guest_modes.h"
#include "processor.h"
+#include "ucall_common.h"
+
+#include "cgroup_util.h"
+#include "lru_gen_util.h"
+
+static const char *TEST_MEMCG_NAME = "access_tracking_perf_test";
/* Global variable used to synchronize all of the vCPU threads. */
static int iteration;
+/* The cgroup memory controller root. Needed for lru_gen-based aging. */
+char cgroup_root[PATH_MAX];
+
/* Defines what vCPU threads should do during a given iteration. */
static enum {
/* Run the vCPU to access all its memory. */
@@ -65,20 +77,39 @@ static int vcpu_last_completed_iteration[KVM_MAX_VCPUS];
/* Whether to overlap the regions of memory vCPUs access. */
static bool overlap_memory_access;
+/*
+ * If the test should only warn if there are too many idle pages (i.e., it is
+ * expected).
+ * -1: Not yet set.
+ * 0: We do not expect too many idle pages, so FAIL if too many idle pages.
+ * 1: Having too many idle pages is expected, so merely print a warning if
+ * too many idle pages are found.
+ */
+static int idle_pages_warn_only = -1;
+
+/* Whether or not to use MGLRU instead of page_idle for access tracking */
+static bool use_lru_gen;
+
+/* Total number of pages to expect in the memcg after touching everything */
+static long test_pages;
+
+/* Last generation we found the pages in */
+static int lru_gen_last_gen = -1;
+
struct test_params {
/* The backing source for the region of memory. */
enum vm_mem_backing_src_type backing_src;
/* The amount of memory to allocate for each vCPU. */
- uint64_t vcpu_memory_bytes;
+ u64 vcpu_memory_bytes;
/* The number of vCPUs to create in the VM. */
int nr_vcpus;
};
-static uint64_t pread_uint64(int fd, const char *filename, uint64_t index)
+static u64 pread_u64(int fd, const char *filename, u64 index)
{
- uint64_t value;
+ u64 value;
off_t offset = index * sizeof(value);
TEST_ASSERT(pread(fd, &value, sizeof(value), offset) == sizeof(value),
@@ -92,13 +123,13 @@ static uint64_t pread_uint64(int fd, const char *filename, uint64_t index)
#define PAGEMAP_PRESENT (1ULL << 63)
#define PAGEMAP_PFN_MASK ((1ULL << 55) - 1)
-static uint64_t lookup_pfn(int pagemap_fd, struct kvm_vm *vm, uint64_t gva)
+static u64 lookup_pfn(int pagemap_fd, struct kvm_vm *vm, gva_t gva)
{
- uint64_t hva = (uint64_t) addr_gva2hva(vm, gva);
- uint64_t entry;
- uint64_t pfn;
+ u64 hva = (u64)addr_gva2hva(vm, gva);
+ u64 entry;
+ u64 pfn;
- entry = pread_uint64(pagemap_fd, "pagemap", hva / getpagesize());
+ entry = pread_u64(pagemap_fd, "pagemap", hva / getpagesize());
if (!(entry & PAGEMAP_PRESENT))
return 0;
@@ -108,30 +139,46 @@ static uint64_t lookup_pfn(int pagemap_fd, struct kvm_vm *vm, uint64_t gva)
return pfn;
}
-static bool is_page_idle(int page_idle_fd, uint64_t pfn)
+static bool is_page_idle(int page_idle_fd, u64 pfn)
{
- uint64_t bits = pread_uint64(page_idle_fd, "page_idle", pfn / 64);
+ u64 bits = pread_u64(page_idle_fd, "page_idle", pfn / 64);
return !!((bits >> (pfn % 64)) & 1);
}
-static void mark_page_idle(int page_idle_fd, uint64_t pfn)
+static void mark_page_idle(int page_idle_fd, u64 pfn)
{
- uint64_t bits = 1ULL << (pfn % 64);
+ u64 bits = 1ULL << (pfn % 64);
TEST_ASSERT(pwrite(page_idle_fd, &bits, 8, 8 * (pfn / 64)) == 8,
"Set page_idle bits for PFN 0x%" PRIx64, pfn);
}
-static void mark_vcpu_memory_idle(struct kvm_vm *vm,
- struct memstress_vcpu_args *vcpu_args)
+static void too_many_idle_pages(long idle_pages, long total_pages, int vcpu_idx)
+{
+ char prefix[18] = {};
+
+ if (vcpu_idx >= 0)
+ snprintf(prefix, 18, "vCPU%d: ", vcpu_idx);
+
+ TEST_ASSERT(idle_pages_warn_only,
+ "%sToo many pages still idle (%lu out of %lu)",
+ prefix, idle_pages, total_pages);
+
+ printf("WARNING: %sToo many pages still idle (%lu out of %lu), "
+ "this will affect performance results.\n",
+ prefix, idle_pages, total_pages);
+}
+
+static void pageidle_mark_vcpu_memory_idle(struct kvm_vm *vm,
+ struct memstress_vcpu_args *vcpu_args)
{
int vcpu_idx = vcpu_args->vcpu_idx;
- uint64_t base_gva = vcpu_args->gva;
- uint64_t pages = vcpu_args->pages;
- uint64_t page;
- uint64_t still_idle = 0;
- uint64_t no_pfn = 0;
+ gva_t base_gva = vcpu_args->gva;
+ u64 pages = vcpu_args->pages;
+ u64 page;
+ u64 still_idle = 0;
+ u64 no_pfn = 0;
int page_idle_fd;
int pagemap_fd;
@@ -146,8 +193,8 @@ static void mark_vcpu_memory_idle(struct kvm_vm *vm,
TEST_ASSERT(pagemap_fd > 0, "Failed to open pagemap.");
for (page = 0; page < pages; page++) {
- uint64_t gva = base_gva + page * memstress_args.guest_page_size;
- uint64_t pfn = lookup_pfn(pagemap_fd, vm, gva);
+ gva_t gva = base_gva + page * memstress_args.guest_page_size;
+ u64 pfn = lookup_pfn(pagemap_fd, vm, gva);
if (!pfn) {
no_pfn++;
@@ -177,31 +224,83 @@ static void mark_vcpu_memory_idle(struct kvm_vm *vm,
* arbitrary; high enough that we ensure most memory access went through
* access tracking but low enough as to not make the test too brittle
* over time and across architectures.
- *
- * When running the guest as a nested VM, "warn" instead of asserting
- * as the TLB size is effectively unlimited and the KVM doesn't
- * explicitly flush the TLB when aging SPTEs. As a result, more pages
- * are cached and the guest won't see the "idle" bit cleared.
*/
- if (still_idle >= pages / 10) {
-#ifdef __x86_64__
- TEST_ASSERT(this_cpu_has(X86_FEATURE_HYPERVISOR),
- "vCPU%d: Too many pages still idle (%lu out of %lu)",
- vcpu_idx, still_idle, pages);
-#endif
- printf("WARNING: vCPU%d: Too many pages still idle (%lu out of %lu), "
- "this will affect performance results.\n",
- vcpu_idx, still_idle, pages);
- }
+ if (still_idle >= pages / 10)
+ too_many_idle_pages(still_idle, pages,
+ overlap_memory_access ? -1 : vcpu_idx);
close(page_idle_fd);
close(pagemap_fd);
}
-static void assert_ucall(struct kvm_vcpu *vcpu, uint64_t expected_ucall)
+int find_generation(struct memcg_stats *stats, long total_pages)
+{
+ /*
+ * For finding the generation that contains our pages, use the same
+ * 90% threshold that page_idle uses.
+ */
+ int gen = lru_gen_find_generation(stats, total_pages * 9 / 10);
+
+ if (gen >= 0)
+ return gen;
+
+ if (!idle_pages_warn_only) {
+ TEST_FAIL("Could not find a generation with 90%% of guest memory (%ld pages).",
+ total_pages * 9 / 10);
+ return gen;
+ }
+
+ /*
+ * We couldn't find a generation with 90% of guest memory, which can
+ * happen if access tracking is unreliable. Simply look for a majority
+ * of pages.
+ */
+ puts("WARNING: Couldn't find a generation with 90% of guest memory. "
+ "Performance results may not be accurate.");
+ gen = lru_gen_find_generation(stats, total_pages / 2);
+ TEST_ASSERT(gen >= 0,
+ "Could not find a generation with 50%% of guest memory (%ld pages).",
+ total_pages / 2);
+ return gen;
+}
+
+static void lru_gen_mark_memory_idle(struct kvm_vm *vm)
+{
+ struct timespec ts_start;
+ struct timespec ts_elapsed;
+ struct memcg_stats stats;
+ int new_gen;
+
+ /* Make a new generation */
+ clock_gettime(CLOCK_MONOTONIC, &ts_start);
+ lru_gen_do_aging(&stats, TEST_MEMCG_NAME);
+ ts_elapsed = timespec_elapsed(ts_start);
+
+ /* Check the generation again */
+ new_gen = find_generation(&stats, test_pages);
+
+ /*
+ * This function should only be invoked with newly-accessed pages,
+ * so pages should always move to a newer generation.
+ */
+ if (new_gen <= lru_gen_last_gen) {
+ /* We did not move to a newer generation. */
+ long idle_pages = lru_gen_sum_memcg_stats_for_gen(lru_gen_last_gen,
+ &stats);
+
+ too_many_idle_pages(min_t(long, idle_pages, test_pages),
+ test_pages, -1);
+ }
+ pr_info("%-30s: %ld.%09lds\n",
+ "Mark memory idle (lru_gen)", ts_elapsed.tv_sec,
+ ts_elapsed.tv_nsec);
+ lru_gen_last_gen = new_gen;
+}
+
+static void assert_ucall(struct kvm_vcpu *vcpu, u64 expected_ucall)
{
struct ucall uc;
- uint64_t actual_ucall = get_ucall(vcpu, &uc);
+ u64 actual_ucall = get_ucall(vcpu, &uc);
TEST_ASSERT(expected_ucall == actual_ucall,
"Guest exited unexpectedly (expected ucall %" PRIu64
@@ -237,9 +336,9 @@ static void vcpu_thread_main(struct memstress_vcpu_args *vcpu_args)
assert_ucall(vcpu, UCALL_SYNC);
break;
case ITERATION_MARK_IDLE:
- mark_vcpu_memory_idle(vm, vcpu_args);
+ pageidle_mark_vcpu_memory_idle(vm, vcpu_args);
break;
- };
+ }
vcpu_last_completed_iteration[vcpu_idx] = current_iteration;
}
@@ -289,15 +388,18 @@ static void access_memory(struct kvm_vm *vm, int nr_vcpus,
static void mark_memory_idle(struct kvm_vm *vm, int nr_vcpus)
{
+ if (use_lru_gen)
+ return lru_gen_mark_memory_idle(vm);
+
/*
* Even though this parallelizes the work across vCPUs, this is still a
* very slow operation because page_idle forces the test to mark one pfn
- * at a time and the clear_young notifier serializes on the KVM MMU
+ * at a time and the clear_young notifier may serialize on the KVM MMU
* lock.
*/
pr_debug("Marking VM memory idle (slow)...\n");
iteration_work = ITERATION_MARK_IDLE;
- run_iteration(vm, nr_vcpus, "Mark memory idle");
+ run_iteration(vm, nr_vcpus, "Mark memory idle (page_idle)");
}
static void run_test(enum vm_guest_mode mode, void *arg)
@@ -309,11 +411,38 @@ static void run_test(enum vm_guest_mode mode, void *arg)
vm = memstress_create_vm(mode, nr_vcpus, params->vcpu_memory_bytes, 1,
params->backing_src, !overlap_memory_access);
+ /*
+ * If guest_page_size is larger than the host's page size, the
+ * guest (memstress) will only fault in a subset of the host's pages.
+ */
+ test_pages = params->nr_vcpus * params->vcpu_memory_bytes /
+ max(memstress_args.guest_page_size,
+ (u64)getpagesize());
+
memstress_start_vcpu_threads(nr_vcpus, vcpu_thread_main);
pr_info("\n");
access_memory(vm, nr_vcpus, ACCESS_WRITE, "Populating memory");
+ if (use_lru_gen) {
+ struct memcg_stats stats;
+
+ /*
+ * Do a page table scan now. Following initial population, aging
+ * may not cause the pages to move to a newer generation. Do
+ * an aging pass now so that future aging passes always move
+ * pages to a newer generation.
+ */
+ printf("Initial aging pass (lru_gen)\n");
+ lru_gen_do_aging(&stats, TEST_MEMCG_NAME);
+ TEST_ASSERT(lru_gen_sum_memcg_stats(&stats) >= test_pages,
+ "Not all pages accounted for (looking for %ld). "
+ "Was the memcg set up correctly?", test_pages);
+ access_memory(vm, nr_vcpus, ACCESS_WRITE, "Re-populating memory");
+ lru_gen_read_memcg_stats(&stats, TEST_MEMCG_NAME);
+ lru_gen_last_gen = find_generation(&stats, test_pages);
+ }
+
/* As a control, read and write to the populated memory first. */
access_memory(vm, nr_vcpus, ACCESS_WRITE, "Writing to populated memory");
access_memory(vm, nr_vcpus, ACCESS_READ, "Reading from populated memory");
@@ -328,6 +457,37 @@ static void run_test(enum vm_guest_mode mode, void *arg)
memstress_destroy_vm(vm);
}
+static int access_tracking_unreliable(void)
+{
+#ifdef __x86_64__
+ /*
+ * When running nested, the TLB size may be effectively unlimited (for
+ * example, this is the case when running on KVM L0), and KVM doesn't
+ * explicitly flush the TLB when aging SPTEs. As a result, more pages
+ * are cached and the guest won't see the "idle" bit cleared.
+ */
+ if (this_cpu_has(X86_FEATURE_HYPERVISOR)) {
+ puts("Skipping idle page count sanity check, because the test is run nested");
+ return 1;
+ }
+#endif
+ /*
+ * When NUMA balancing is enabled, guest memory will be unmapped to get
+ * NUMA faults, dropping the Accessed bits.
+ */
+ if (is_numa_balancing_enabled()) {
+ puts("Skipping idle page count sanity check, because NUMA balancing is enabled");
+ return 1;
+ }
+ return 0;
+}
+
+static int run_test_for_each_guest_mode(const char *cgroup, void *arg)
+{
+ for_each_guest_mode(run_test, arg);
+ return 0;
+}
+
static void help(char *name)
{
puts("");
@@ -342,11 +502,22 @@ static void help(char *name)
printf(" -v: specify the number of vCPUs to run.\n");
printf(" -o: Overlap guest memory accesses instead of partitioning\n"
" them into a separate region of memory for each vCPU.\n");
+ printf(" -w: Control whether the test warns or fails if more than 10%%\n"
+ " of pages are still seen as idle/old after accessing guest\n"
+ " memory. >0 == warn only, 0 == fail, <0 == auto. For auto\n"
+ " mode, the test fails by default, but switches to warn only\n"
+ " if NUMA balancing is enabled or the test detects it's running\n"
+ " in a VM.\n");
backing_src_help("-s");
puts("");
exit(0);
}
+void destroy_cgroup(char *cg)
+{
+ printf("Destroying cgroup: %s\n", cg);
+}
+
int main(int argc, char *argv[])
{
struct test_params params = {
@@ -354,12 +525,13 @@ int main(int argc, char *argv[])
.vcpu_memory_bytes = DEFAULT_PER_VCPU_MEM_SIZE,
.nr_vcpus = 1,
};
+ char *new_cg = NULL;
int page_idle_fd;
int opt;
guest_modes_append_default();
- while ((opt = getopt(argc, argv, "hm:b:v:os:")) != -1) {
+ while ((opt = getopt(argc, argv, "hm:b:v:os:w:")) != -1) {
switch (opt) {
case 'm':
guest_modes_cmdline(optarg);
@@ -376,6 +548,11 @@ int main(int argc, char *argv[])
case 's':
params.backing_src = parse_backing_src_type(optarg);
break;
+ case 'w':
+ idle_pages_warn_only =
+ atoi_non_negative("Idle pages warning",
+ optarg);
+ break;
case 'h':
default:
help(argv[0]);
@@ -383,12 +560,50 @@ int main(int argc, char *argv[])
}
}
- page_idle_fd = open("/sys/kernel/mm/page_idle/bitmap", O_RDWR);
- __TEST_REQUIRE(page_idle_fd >= 0,
- "CONFIG_IDLE_PAGE_TRACKING is not enabled");
- close(page_idle_fd);
+ if (idle_pages_warn_only == -1)
+ idle_pages_warn_only = access_tracking_unreliable();
+
+ if (lru_gen_usable()) {
+ bool cg_created = true;
+ int ret;
- for_each_guest_mode(run_test, &params);
+ puts("Using lru_gen for aging");
+ use_lru_gen = true;
+
+ if (cg_find_controller_root(cgroup_root, sizeof(cgroup_root), "memory"))
+ ksft_exit_skip("Cannot find memory cgroup controller\n");
+
+ new_cg = cg_name(cgroup_root, TEST_MEMCG_NAME);
+ printf("Creating cgroup: %s\n", new_cg);
+ if (cg_create(new_cg)) {
+ if (errno == EEXIST) {
+ printf("Found existing cgroup");
+ cg_created = false;
+ } else {
+ ksft_exit_skip("could not create new cgroup: %s\n", new_cg);
+ }
+ }
+
+ /*
+ * This will fork off a new process to run the test within
+ * a new memcg, so we need to properly propagate the return
+ * value up.
+ */
+ ret = cg_run(new_cg, &run_test_for_each_guest_mode, &params);
+ if (cg_created)
+ cg_destroy(new_cg);
+ if (ret < 0)
+ TEST_FAIL("child did not spawn or was abnormally killed");
+ if (ret)
+ return ret;
+ } else {
+ page_idle_fd = __open_path_or_exit("/sys/kernel/mm/page_idle/bitmap", O_RDWR,
+ "Is CONFIG_IDLE_PAGE_TRACKING enabled?");
+ close(page_idle_fd);
+
+ puts("Using page_idle for aging");
+ run_test_for_each_guest_mode(NULL, &params);
+ }
return 0;
}
diff --git a/tools/testing/selftests/kvm/arch_timer.c b/tools/testing/selftests/kvm/arch_timer.c
index acb2cb596332..90c475a61b22 100644
--- a/tools/testing/selftests/kvm/arch_timer.c
+++ b/tools/testing/selftests/kvm/arch_timer.c
@@ -78,9 +78,9 @@ static void *test_vcpu_run(void *arg)
return NULL;
}
-static uint32_t test_get_pcpu(void)
+static u32 test_get_pcpu(void)
{
- uint32_t pcpu;
+ u32 pcpu;
unsigned int nproc_conf;
cpu_set_t online_cpuset;
@@ -98,16 +98,11 @@ static uint32_t test_get_pcpu(void)
static int test_migrate_vcpu(unsigned int vcpu_idx)
{
int ret;
- cpu_set_t cpuset;
- uint32_t new_pcpu = test_get_pcpu();
-
- CPU_ZERO(&cpuset);
- CPU_SET(new_pcpu, &cpuset);
+ u32 new_pcpu = test_get_pcpu();
pr_debug("Migrating vCPU: %u to pCPU: %u\n", vcpu_idx, new_pcpu);
- ret = pthread_setaffinity_np(pt_vcpu_run[vcpu_idx],
- sizeof(cpuset), &cpuset);
+ ret = __pin_task_to_cpu(pt_vcpu_run[vcpu_idx], new_pcpu);
/* Allow the error where the vCPU thread is already finished */
TEST_ASSERT(ret == 0 || ret == ESRCH,
diff --git a/tools/testing/selftests/kvm/arm64/aarch32_id_regs.c b/tools/testing/selftests/kvm/arm64/aarch32_id_regs.c
index cef8f7323ceb..8a019cbaf4c4 100644
--- a/tools/testing/selftests/kvm/arm64/aarch32_id_regs.c
+++ b/tools/testing/selftests/kvm/arm64/aarch32_id_regs.c
@@ -66,7 +66,7 @@ static void test_guest_raz(struct kvm_vcpu *vcpu)
}
}
-static uint64_t raz_wi_reg_ids[] = {
+static u64 raz_wi_reg_ids[] = {
KVM_ARM64_SYS_REG(SYS_ID_PFR0_EL1),
KVM_ARM64_SYS_REG(SYS_ID_PFR1_EL1),
KVM_ARM64_SYS_REG(SYS_ID_DFR0_EL1),
@@ -94,8 +94,8 @@ static void test_user_raz_wi(struct kvm_vcpu *vcpu)
int i;
for (i = 0; i < ARRAY_SIZE(raz_wi_reg_ids); i++) {
- uint64_t reg_id = raz_wi_reg_ids[i];
- uint64_t val;
+ u64 reg_id = raz_wi_reg_ids[i];
+ u64 val;
val = vcpu_get_reg(vcpu, reg_id);
TEST_ASSERT_EQ(val, 0);
@@ -111,7 +111,7 @@ static void test_user_raz_wi(struct kvm_vcpu *vcpu)
}
}
-static uint64_t raz_invariant_reg_ids[] = {
+static u64 raz_invariant_reg_ids[] = {
KVM_ARM64_SYS_REG(SYS_ID_AFR0_EL1),
KVM_ARM64_SYS_REG(sys_reg(3, 0, 0, 3, 3)),
KVM_ARM64_SYS_REG(SYS_ID_DFR1_EL1),
@@ -123,8 +123,8 @@ static void test_user_raz_invariant(struct kvm_vcpu *vcpu)
int i, r;
for (i = 0; i < ARRAY_SIZE(raz_invariant_reg_ids); i++) {
- uint64_t reg_id = raz_invariant_reg_ids[i];
- uint64_t val;
+ u64 reg_id = raz_invariant_reg_ids[i];
+ u64 val;
val = vcpu_get_reg(vcpu, reg_id);
TEST_ASSERT_EQ(val, 0);
@@ -142,11 +142,11 @@ static void test_user_raz_invariant(struct kvm_vcpu *vcpu)
static bool vcpu_aarch64_only(struct kvm_vcpu *vcpu)
{
- uint64_t val, el0;
+ u64 val, el0;
val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1));
- el0 = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0), val);
+ el0 = FIELD_GET(ID_AA64PFR0_EL1_EL0, val);
return el0 == ID_AA64PFR0_EL1_EL0_IMP;
}
diff --git a/tools/testing/selftests/kvm/arm64/arch_timer.c b/tools/testing/selftests/kvm/arm64/arch_timer.c
index eeba1cc87ff8..5fa5c0ec2b3e 100644
--- a/tools/testing/selftests/kvm/arm64/arch_timer.c
+++ b/tools/testing/selftests/kvm/arm64/arch_timer.c
@@ -56,7 +56,7 @@ static void guest_validate_irq(unsigned int intid,
struct test_vcpu_shared_data *shared_data)
{
enum guest_stage stage = shared_data->guest_stage;
- uint64_t xcnt = 0, xcnt_diff_us, cval = 0;
+ u64 xcnt = 0, xcnt_diff_us, cval = 0;
unsigned long xctl = 0;
unsigned int timer_irq = 0;
unsigned int accessor;
@@ -105,7 +105,7 @@ static void guest_validate_irq(unsigned int intid,
static void guest_irq_handler(struct ex_regs *regs)
{
unsigned int intid = gic_get_and_ack_irq();
- uint32_t cpu = guest_get_vcpuid();
+ u32 cpu = guest_get_vcpuid();
struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu];
guest_validate_irq(intid, shared_data);
@@ -116,7 +116,7 @@ static void guest_irq_handler(struct ex_regs *regs)
static void guest_run_stage(struct test_vcpu_shared_data *shared_data,
enum guest_stage stage)
{
- uint32_t irq_iter, config_iter;
+ u32 irq_iter, config_iter;
shared_data->guest_stage = stage;
shared_data->nr_iter = 0;
@@ -140,7 +140,7 @@ static void guest_run_stage(struct test_vcpu_shared_data *shared_data,
static void guest_code(void)
{
- uint32_t cpu = guest_get_vcpuid();
+ u32 cpu = guest_get_vcpuid();
struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu];
local_irq_disable();
@@ -165,10 +165,8 @@ static void guest_code(void)
static void test_init_timer_irq(struct kvm_vm *vm)
{
/* Timer initid should be same for all the vCPUs, so query only vCPU-0 */
- vcpu_device_attr_get(vcpus[0], KVM_ARM_VCPU_TIMER_CTRL,
- KVM_ARM_VCPU_TIMER_IRQ_PTIMER, &ptimer_irq);
- vcpu_device_attr_get(vcpus[0], KVM_ARM_VCPU_TIMER_CTRL,
- KVM_ARM_VCPU_TIMER_IRQ_VTIMER, &vtimer_irq);
+ ptimer_irq = vcpu_get_ptimer_irq(vcpus[0]);
+ vtimer_irq = vcpu_get_vtimer_irq(vcpus[0]);
sync_global_to_guest(vm, ptimer_irq);
sync_global_to_guest(vm, vtimer_irq);
@@ -176,14 +174,14 @@ static void test_init_timer_irq(struct kvm_vm *vm)
pr_debug("ptimer_irq: %d; vtimer_irq: %d\n", ptimer_irq, vtimer_irq);
}
-static int gic_fd;
-
struct kvm_vm *test_vm_create(void)
{
struct kvm_vm *vm;
unsigned int i;
int nr_vcpus = test_args.nr_vcpus;
+ TEST_REQUIRE(kvm_supports_vgic_v3());
+
vm = vm_create_with_vcpus(nr_vcpus, guest_code, vcpus);
vm_init_descriptor_tables(vm);
@@ -204,8 +202,6 @@ struct kvm_vm *test_vm_create(void)
vcpu_init_descriptor_tables(vcpus[i]);
test_init_timer_irq(vm);
- gic_fd = vgic_v3_setup(vm, nr_vcpus, 64);
- __TEST_REQUIRE(gic_fd >= 0, "Failed to create vgic-v3");
/* Make all the test's cmdline args visible to the guest */
sync_global_to_guest(vm, test_args);
@@ -215,6 +211,5 @@ struct kvm_vm *test_vm_create(void)
void test_vm_cleanup(struct kvm_vm *vm)
{
- close(gic_fd);
kvm_vm_free(vm);
}
diff --git a/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c
index a36a7e2db434..f7625eb711d6 100644
--- a/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c
+++ b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c
@@ -22,25 +22,26 @@
#include "gic.h"
#include "vgic.h"
-static const uint64_t CVAL_MAX = ~0ULL;
+/* Depends on counter width. */
+static u64 CVAL_MAX;
/* tval is a signed 32-bit int. */
-static const int32_t TVAL_MAX = INT32_MAX;
-static const int32_t TVAL_MIN = INT32_MIN;
+static const s32 TVAL_MAX = INT32_MAX;
+static const s32 TVAL_MIN = INT32_MIN;
/* After how much time we say there is no IRQ. */
-static const uint32_t TIMEOUT_NO_IRQ_US = 50000;
+static const u32 TIMEOUT_NO_IRQ_US = 50000;
-/* A nice counter value to use as the starting one for most tests. */
-static const uint64_t DEF_CNT = (CVAL_MAX / 2);
+/* Counter value to use as the starting one for most tests. Set to CVAL_MAX/2 */
+static u64 DEF_CNT;
/* Number of runs. */
-static const uint32_t NR_TEST_ITERS_DEF = 5;
+static const u32 NR_TEST_ITERS_DEF = 5;
/* Default wait test time in ms. */
-static const uint32_t WAIT_TEST_MS = 10;
+static const u32 WAIT_TEST_MS = 10;
/* Default "long" wait test time in ms. */
-static const uint32_t LONG_WAIT_TEST_MS = 100;
+static const u32 LONG_WAIT_TEST_MS = 100;
/* Shared with IRQ handler. */
struct test_vcpu_shared_data {
@@ -52,9 +53,9 @@ struct test_args {
/* Virtual or physical timer and counter tests. */
enum arch_timer timer;
/* Delay used for most timer tests. */
- uint64_t wait_ms;
+ u64 wait_ms;
/* Delay used in the test_long_timer_delays test. */
- uint64_t long_wait_ms;
+ u64 long_wait_ms;
/* Number of iterations. */
int iterations;
/* Whether to test the physical timer. */
@@ -81,12 +82,12 @@ enum sync_cmd {
NO_USERSPACE_CMD,
};
-typedef void (*sleep_method_t)(enum arch_timer timer, uint64_t usec);
+typedef void (*sleep_method_t)(enum arch_timer timer, u64 usec);
-static void sleep_poll(enum arch_timer timer, uint64_t usec);
-static void sleep_sched_poll(enum arch_timer timer, uint64_t usec);
-static void sleep_in_userspace(enum arch_timer timer, uint64_t usec);
-static void sleep_migrate(enum arch_timer timer, uint64_t usec);
+static void sleep_poll(enum arch_timer timer, u64 usec);
+static void sleep_sched_poll(enum arch_timer timer, u64 usec);
+static void sleep_in_userspace(enum arch_timer timer, u64 usec);
+static void sleep_migrate(enum arch_timer timer, u64 usec);
sleep_method_t sleep_method[] = {
sleep_poll,
@@ -114,14 +115,14 @@ enum timer_view {
TIMER_TVAL,
};
-static void assert_irqs_handled(uint32_t n)
+static void assert_irqs_handled(u32 n)
{
int h = atomic_read(&shared_data.handled);
__GUEST_ASSERT(h == n, "Handled %d IRQS but expected %d", h, n);
}
-static void userspace_cmd(uint64_t cmd)
+static void userspace_cmd(u64 cmd)
{
GUEST_SYNC_ARGS(cmd, 0, 0, 0, 0);
}
@@ -131,12 +132,12 @@ static void userspace_migrate_vcpu(void)
userspace_cmd(USERSPACE_MIGRATE_SELF);
}
-static void userspace_sleep(uint64_t usecs)
+static void userspace_sleep(u64 usecs)
{
GUEST_SYNC_ARGS(USERSPACE_USLEEP, usecs, 0, 0, 0);
}
-static void set_counter(enum arch_timer timer, uint64_t counter)
+static void set_counter(enum arch_timer timer, u64 counter)
{
GUEST_SYNC_ARGS(SET_COUNTER_VALUE, counter, timer, 0, 0);
}
@@ -145,8 +146,8 @@ static void guest_irq_handler(struct ex_regs *regs)
{
unsigned int intid = gic_get_and_ack_irq();
enum arch_timer timer;
- uint64_t cnt, cval;
- uint32_t ctl;
+ u64 cnt, cval;
+ u32 ctl;
bool timer_condition, istatus;
if (intid == IAR_SPURIOUS) {
@@ -177,8 +178,8 @@ out:
gic_set_eoi(intid);
}
-static void set_cval_irq(enum arch_timer timer, uint64_t cval_cycles,
- uint32_t ctl)
+static void set_cval_irq(enum arch_timer timer, u64 cval_cycles,
+ u32 ctl)
{
atomic_set(&shared_data.handled, 0);
atomic_set(&shared_data.spurious, 0);
@@ -186,16 +187,16 @@ static void set_cval_irq(enum arch_timer timer, uint64_t cval_cycles,
timer_set_ctl(timer, ctl);
}
-static void set_tval_irq(enum arch_timer timer, uint64_t tval_cycles,
- uint32_t ctl)
+static void set_tval_irq(enum arch_timer timer, u64 tval_cycles,
+ u32 ctl)
{
atomic_set(&shared_data.handled, 0);
atomic_set(&shared_data.spurious, 0);
- timer_set_ctl(timer, ctl);
timer_set_tval(timer, tval_cycles);
+ timer_set_ctl(timer, ctl);
}
-static void set_xval_irq(enum arch_timer timer, uint64_t xval, uint32_t ctl,
+static void set_xval_irq(enum arch_timer timer, u64 xval, u32 ctl,
enum timer_view tv)
{
switch (tv) {
@@ -274,13 +275,13 @@ static void wait_migrate_poll_for_irq(void)
* Sleep for usec microseconds by polling in the guest or in
* userspace (e.g. userspace_cmd=USERSPACE_SCHEDULE).
*/
-static void guest_poll(enum arch_timer test_timer, uint64_t usec,
+static void guest_poll(enum arch_timer test_timer, u64 usec,
enum sync_cmd usp_cmd)
{
- uint64_t cycles = usec_to_cycles(usec);
+ u64 cycles = usec_to_cycles(usec);
/* Whichever timer we are testing with, sleep with the other. */
enum arch_timer sleep_timer = 1 - test_timer;
- uint64_t start = timer_get_cntct(sleep_timer);
+ u64 start = timer_get_cntct(sleep_timer);
while ((timer_get_cntct(sleep_timer) - start) < cycles) {
if (usp_cmd == NO_USERSPACE_CMD)
@@ -290,22 +291,22 @@ static void guest_poll(enum arch_timer test_timer, uint64_t usec,
}
}
-static void sleep_poll(enum arch_timer timer, uint64_t usec)
+static void sleep_poll(enum arch_timer timer, u64 usec)
{
guest_poll(timer, usec, NO_USERSPACE_CMD);
}
-static void sleep_sched_poll(enum arch_timer timer, uint64_t usec)
+static void sleep_sched_poll(enum arch_timer timer, u64 usec)
{
guest_poll(timer, usec, USERSPACE_SCHED_YIELD);
}
-static void sleep_migrate(enum arch_timer timer, uint64_t usec)
+static void sleep_migrate(enum arch_timer timer, u64 usec)
{
guest_poll(timer, usec, USERSPACE_MIGRATE_SELF);
}
-static void sleep_in_userspace(enum arch_timer timer, uint64_t usec)
+static void sleep_in_userspace(enum arch_timer timer, u64 usec)
{
userspace_sleep(usec);
}
@@ -314,15 +315,15 @@ static void sleep_in_userspace(enum arch_timer timer, uint64_t usec)
* Reset the timer state to some nice values like the counter not being close
* to the edge, and the control register masked and disabled.
*/
-static void reset_timer_state(enum arch_timer timer, uint64_t cnt)
+static void reset_timer_state(enum arch_timer timer, u64 cnt)
{
set_counter(timer, cnt);
timer_set_ctl(timer, CTL_IMASK);
}
-static void test_timer_xval(enum arch_timer timer, uint64_t xval,
+static void test_timer_xval(enum arch_timer timer, u64 xval,
enum timer_view tv, irq_wait_method_t wm, bool reset_state,
- uint64_t reset_cnt)
+ u64 reset_cnt)
{
local_irq_disable();
@@ -347,23 +348,23 @@ static void test_timer_xval(enum arch_timer timer, uint64_t xval,
* the "runner", like: tools/testing/selftests/kselftest/runner.sh.
*/
-static void test_timer_cval(enum arch_timer timer, uint64_t cval,
+static void test_timer_cval(enum arch_timer timer, u64 cval,
irq_wait_method_t wm, bool reset_state,
- uint64_t reset_cnt)
+ u64 reset_cnt)
{
test_timer_xval(timer, cval, TIMER_CVAL, wm, reset_state, reset_cnt);
}
-static void test_timer_tval(enum arch_timer timer, int32_t tval,
+static void test_timer_tval(enum arch_timer timer, s32 tval,
irq_wait_method_t wm, bool reset_state,
- uint64_t reset_cnt)
+ u64 reset_cnt)
{
- test_timer_xval(timer, (uint64_t) tval, TIMER_TVAL, wm, reset_state,
+ test_timer_xval(timer, (u64)tval, TIMER_TVAL, wm, reset_state,
reset_cnt);
}
-static void test_xval_check_no_irq(enum arch_timer timer, uint64_t xval,
- uint64_t usec, enum timer_view timer_view,
+static void test_xval_check_no_irq(enum arch_timer timer, u64 xval,
+ u64 usec, enum timer_view timer_view,
sleep_method_t guest_sleep)
{
local_irq_disable();
@@ -378,17 +379,17 @@ static void test_xval_check_no_irq(enum arch_timer timer, uint64_t xval,
assert_irqs_handled(0);
}
-static void test_cval_no_irq(enum arch_timer timer, uint64_t cval,
- uint64_t usec, sleep_method_t wm)
+static void test_cval_no_irq(enum arch_timer timer, u64 cval,
+ u64 usec, sleep_method_t wm)
{
test_xval_check_no_irq(timer, cval, usec, TIMER_CVAL, wm);
}
-static void test_tval_no_irq(enum arch_timer timer, int32_t tval, uint64_t usec,
+static void test_tval_no_irq(enum arch_timer timer, s32 tval, u64 usec,
sleep_method_t wm)
{
- /* tval will be cast to an int32_t in test_xval_check_no_irq */
- test_xval_check_no_irq(timer, (uint64_t) tval, usec, TIMER_TVAL, wm);
+ /* tval will be cast to an s32 in test_xval_check_no_irq */
+ test_xval_check_no_irq(timer, (u64)tval, usec, TIMER_TVAL, wm);
}
/* Test masking/unmasking a timer using the timer mask (not the IRQ mask). */
@@ -462,7 +463,7 @@ static void test_timers_fired_multiple_times(enum arch_timer timer)
* timeout for the wait: we use the wfi instruction.
*/
static void test_reprogramming_timer(enum arch_timer timer, irq_wait_method_t wm,
- int32_t delta_1_ms, int32_t delta_2_ms)
+ s32 delta_1_ms, s32 delta_2_ms)
{
local_irq_disable();
reset_timer_state(timer, DEF_CNT);
@@ -487,7 +488,7 @@ static void test_reprogramming_timer(enum arch_timer timer, irq_wait_method_t wm
static void test_reprogram_timers(enum arch_timer timer)
{
int i;
- uint64_t base_wait = test_args.wait_ms;
+ u64 base_wait = test_args.wait_ms;
for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) {
/*
@@ -503,8 +504,8 @@ static void test_reprogram_timers(enum arch_timer timer)
static void test_basic_functionality(enum arch_timer timer)
{
- int32_t tval = (int32_t) msec_to_cycles(test_args.wait_ms);
- uint64_t cval = DEF_CNT + msec_to_cycles(test_args.wait_ms);
+ s32 tval = (s32)msec_to_cycles(test_args.wait_ms);
+ u64 cval = DEF_CNT + msec_to_cycles(test_args.wait_ms);
int i;
for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) {
@@ -592,7 +593,7 @@ static void test_set_cnt_after_tval_max(enum arch_timer timer, irq_wait_method_t
reset_timer_state(timer, DEF_CNT);
set_cval_irq(timer,
- (uint64_t) TVAL_MAX +
+ (u64)TVAL_MAX +
msec_to_cycles(test_args.wait_ms) / 2, CTL_ENABLE);
set_counter(timer, TVAL_MAX);
@@ -607,7 +608,7 @@ static void test_set_cnt_after_tval_max(enum arch_timer timer, irq_wait_method_t
/* Test timers set for: cval = now + TVAL_MAX + wait_ms / 2 */
static void test_timers_above_tval_max(enum arch_timer timer)
{
- uint64_t cval;
+ u64 cval;
int i;
/*
@@ -637,8 +638,8 @@ static void test_timers_above_tval_max(enum arch_timer timer)
* sets the counter to cnt_1, the [c|t]val, the counter to cnt_2, and
* then waits for an IRQ.
*/
-static void test_set_cnt_after_xval(enum arch_timer timer, uint64_t cnt_1,
- uint64_t xval, uint64_t cnt_2,
+static void test_set_cnt_after_xval(enum arch_timer timer, u64 cnt_1,
+ u64 xval, u64 cnt_2,
irq_wait_method_t wm, enum timer_view tv)
{
local_irq_disable();
@@ -661,8 +662,8 @@ static void test_set_cnt_after_xval(enum arch_timer timer, uint64_t cnt_1,
* then waits for an IRQ.
*/
static void test_set_cnt_after_xval_no_irq(enum arch_timer timer,
- uint64_t cnt_1, uint64_t xval,
- uint64_t cnt_2,
+ u64 cnt_1, u64 xval,
+ u64 cnt_2,
sleep_method_t guest_sleep,
enum timer_view tv)
{
@@ -683,31 +684,31 @@ static void test_set_cnt_after_xval_no_irq(enum arch_timer timer,
timer_set_ctl(timer, CTL_IMASK);
}
-static void test_set_cnt_after_tval(enum arch_timer timer, uint64_t cnt_1,
- int32_t tval, uint64_t cnt_2,
+static void test_set_cnt_after_tval(enum arch_timer timer, u64 cnt_1,
+ s32 tval, u64 cnt_2,
irq_wait_method_t wm)
{
test_set_cnt_after_xval(timer, cnt_1, tval, cnt_2, wm, TIMER_TVAL);
}
-static void test_set_cnt_after_cval(enum arch_timer timer, uint64_t cnt_1,
- uint64_t cval, uint64_t cnt_2,
+static void test_set_cnt_after_cval(enum arch_timer timer, u64 cnt_1,
+ u64 cval, u64 cnt_2,
irq_wait_method_t wm)
{
test_set_cnt_after_xval(timer, cnt_1, cval, cnt_2, wm, TIMER_CVAL);
}
static void test_set_cnt_after_tval_no_irq(enum arch_timer timer,
- uint64_t cnt_1, int32_t tval,
- uint64_t cnt_2, sleep_method_t wm)
+ u64 cnt_1, s32 tval,
+ u64 cnt_2, sleep_method_t wm)
{
test_set_cnt_after_xval_no_irq(timer, cnt_1, tval, cnt_2, wm,
TIMER_TVAL);
}
static void test_set_cnt_after_cval_no_irq(enum arch_timer timer,
- uint64_t cnt_1, uint64_t cval,
- uint64_t cnt_2, sleep_method_t wm)
+ u64 cnt_1, u64 cval,
+ u64 cnt_2, sleep_method_t wm)
{
test_set_cnt_after_xval_no_irq(timer, cnt_1, cval, cnt_2, wm,
TIMER_CVAL);
@@ -717,7 +718,7 @@ static void test_set_cnt_after_cval_no_irq(enum arch_timer timer,
static void test_move_counters_ahead_of_timers(enum arch_timer timer)
{
int i;
- int32_t tval;
+ s32 tval;
for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) {
irq_wait_method_t wm = irq_wait_method[i];
@@ -729,14 +730,7 @@ static void test_move_counters_ahead_of_timers(enum arch_timer timer)
test_set_cnt_after_tval(timer, 0, -1, DEF_CNT + 1, wm);
test_set_cnt_after_tval(timer, 0, -1, TVAL_MAX, wm);
tval = TVAL_MAX;
- test_set_cnt_after_tval(timer, 0, tval, (uint64_t) tval + 1,
- wm);
- }
-
- for (i = 0; i < ARRAY_SIZE(sleep_method); i++) {
- sleep_method_t sm = sleep_method[i];
-
- test_set_cnt_after_cval_no_irq(timer, 0, DEF_CNT, CVAL_MAX, sm);
+ test_set_cnt_after_tval(timer, 0, tval, (u64)tval + 1, wm);
}
}
@@ -759,8 +753,8 @@ static void test_move_counters_behind_timers(enum arch_timer timer)
static void test_timers_in_the_past(enum arch_timer timer)
{
- int32_t tval = -1 * (int32_t) msec_to_cycles(test_args.wait_ms);
- uint64_t cval;
+ s32 tval = -1 * (s32)msec_to_cycles(test_args.wait_ms);
+ u64 cval;
int i;
for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) {
@@ -795,8 +789,8 @@ static void test_timers_in_the_past(enum arch_timer timer)
static void test_long_timer_delays(enum arch_timer timer)
{
- int32_t tval = (int32_t) msec_to_cycles(test_args.long_wait_ms);
- uint64_t cval = DEF_CNT + msec_to_cycles(test_args.long_wait_ms);
+ s32 tval = (s32)msec_to_cycles(test_args.long_wait_ms);
+ u64 cval = DEF_CNT + msec_to_cycles(test_args.long_wait_ms);
int i;
for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) {
@@ -849,17 +843,17 @@ static void guest_code(enum arch_timer timer)
GUEST_DONE();
}
-static uint32_t next_pcpu(void)
+static cpu_set_t default_cpuset;
+
+static u32 next_pcpu(void)
{
- uint32_t max = get_nprocs();
- uint32_t cur = sched_getcpu();
- uint32_t next = cur;
- cpu_set_t cpuset;
+ u32 max = get_nprocs();
+ u32 cur = sched_getcpu();
+ u32 next = cur;
+ cpu_set_t cpuset = default_cpuset;
TEST_ASSERT(max > 1, "Need at least two physical cpus");
- sched_getaffinity(0, sizeof(cpuset), &cpuset);
-
do {
next = (next + 1) % CPU_SETSIZE;
} while (!CPU_ISSET(next, &cpuset));
@@ -867,26 +861,7 @@ static uint32_t next_pcpu(void)
return next;
}
-static void migrate_self(uint32_t new_pcpu)
-{
- int ret;
- cpu_set_t cpuset;
- pthread_t thread;
-
- thread = pthread_self();
-
- CPU_ZERO(&cpuset);
- CPU_SET(new_pcpu, &cpuset);
-
- pr_debug("Migrating from %u to %u\n", sched_getcpu(), new_pcpu);
-
- ret = pthread_setaffinity_np(thread, sizeof(cpuset), &cpuset);
-
- TEST_ASSERT(ret == 0, "Failed to migrate to pCPU: %u; ret: %d\n",
- new_pcpu, ret);
-}
-
-static void kvm_set_cntxct(struct kvm_vcpu *vcpu, uint64_t cnt,
+static void kvm_set_cntxct(struct kvm_vcpu *vcpu, u64 cnt,
enum arch_timer timer)
{
if (timer == PHYSICAL)
@@ -898,7 +873,7 @@ static void kvm_set_cntxct(struct kvm_vcpu *vcpu, uint64_t cnt,
static void handle_sync(struct kvm_vcpu *vcpu, struct ucall *uc)
{
enum sync_cmd cmd = uc->args[1];
- uint64_t val = uc->args[2];
+ u64 val = uc->args[2];
enum arch_timer timer = uc->args[3];
switch (cmd) {
@@ -912,7 +887,7 @@ static void handle_sync(struct kvm_vcpu *vcpu, struct ucall *uc)
sched_yield();
break;
case USERSPACE_MIGRATE_SELF:
- migrate_self(next_pcpu());
+ pin_self_to_cpu(next_pcpu());
break;
default:
break;
@@ -924,7 +899,7 @@ static void test_run(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
struct ucall uc;
/* Start on CPU 0 */
- migrate_self(0);
+ pin_self_to_cpu(0);
while (true) {
vcpu_run(vcpu);
@@ -948,10 +923,8 @@ static void test_run(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
static void test_init_timer_irq(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
{
- vcpu_device_attr_get(vcpu, KVM_ARM_VCPU_TIMER_CTRL,
- KVM_ARM_VCPU_TIMER_IRQ_PTIMER, &ptimer_irq);
- vcpu_device_attr_get(vcpu, KVM_ARM_VCPU_TIMER_CTRL,
- KVM_ARM_VCPU_TIMER_IRQ_VTIMER, &vtimer_irq);
+ ptimer_irq = vcpu_get_ptimer_irq(vcpu);
+ vtimer_irq = vcpu_get_vtimer_irq(vcpu);
sync_global_to_guest(vm, ptimer_irq);
sync_global_to_guest(vm, vtimer_irq);
@@ -973,8 +946,15 @@ static void test_vm_create(struct kvm_vm **vm, struct kvm_vcpu **vcpu,
vcpu_args_set(*vcpu, 1, timer);
test_init_timer_irq(*vm, *vcpu);
- vgic_v3_setup(*vm, 1, 64);
+
sync_global_to_guest(*vm, test_args);
+ sync_global_to_guest(*vm, CVAL_MAX);
+ sync_global_to_guest(*vm, DEF_CNT);
+}
+
+static void test_vm_cleanup(struct kvm_vm *vm)
+{
+ kvm_vm_free(vm);
}
static void test_print_help(char *name)
@@ -986,7 +966,7 @@ static void test_print_help(char *name)
pr_info("\t-b: Test both physical and virtual timers (default: true)\n");
pr_info("\t-l: Delta (in ms) used for long wait time test (default: %u)\n",
LONG_WAIT_TEST_MS);
- pr_info("\t-l: Delta (in ms) used for wait times (default: %u)\n",
+ pr_info("\t-w: Delta (in ms) used for wait times (default: %u)\n",
WAIT_TEST_MS);
pr_info("\t-p: Test physical timer (default: true)\n");
pr_info("\t-v: Test virtual timer (default: true)\n");
@@ -1035,6 +1015,17 @@ static bool parse_args(int argc, char *argv[])
return false;
}
+static void set_counter_defaults(void)
+{
+ const u64 MIN_ROLLOVER_SECS = 40ULL * 365 * 24 * 3600;
+ u64 freq = read_sysreg(CNTFRQ_EL0);
+ int width = ilog2(MIN_ROLLOVER_SECS * freq);
+
+ width = clamp(width, 56, 64);
+ CVAL_MAX = GENMASK_ULL(width - 1, 0);
+ DEF_CNT = CVAL_MAX / 2;
+}
+
int main(int argc, char *argv[])
{
struct kvm_vcpu *vcpu;
@@ -1043,19 +1034,24 @@ int main(int argc, char *argv[])
/* Tell stdout not to buffer its content */
setbuf(stdout, NULL);
+ TEST_REQUIRE(kvm_supports_vgic_v3());
+
if (!parse_args(argc, argv))
exit(KSFT_SKIP);
+ sched_getaffinity(0, sizeof(default_cpuset), &default_cpuset);
+ set_counter_defaults();
+
if (test_args.test_virtual) {
test_vm_create(&vm, &vcpu, VIRTUAL);
test_run(vm, vcpu);
- kvm_vm_free(vm);
+ test_vm_cleanup(vm);
}
if (test_args.test_physical) {
test_vm_create(&vm, &vcpu, PHYSICAL);
test_run(vm, vcpu);
- kvm_vm_free(vm);
+ test_vm_cleanup(vm);
}
return 0;
diff --git a/tools/testing/selftests/kvm/arm64/at.c b/tools/testing/selftests/kvm/arm64/at.c
new file mode 100644
index 000000000000..ce5d312ef6ba
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/at.c
@@ -0,0 +1,156 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * at - Test for KVM's AT emulation in the EL2&0 and EL1&0 translation regimes.
+ */
+#include "kvm_util.h"
+#include "processor.h"
+#include "test_util.h"
+#include "ucall.h"
+
+#include <asm/sysreg.h>
+
+#define TEST_ADDR 0x80000000
+
+enum {
+ CLEAR_ACCESS_FLAG,
+};
+
+static u64 *ptep_hva;
+
+#define copy_el2_to_el1(reg) \
+ write_sysreg_s(read_sysreg_s(SYS_##reg##_EL1), SYS_##reg##_EL12)
+
+/* Yes, this is an ugly hack */
+#define __at(op, addr) write_sysreg_s(addr, op)
+
+#define test_at_insn(op, expect_fault) \
+do { \
+ u64 par, fsc; \
+ bool fault; \
+ \
+ GUEST_SYNC(CLEAR_ACCESS_FLAG); \
+ \
+ __at(OP_AT_##op, TEST_ADDR); \
+ isb(); \
+ par = read_sysreg(par_el1); \
+ \
+ fault = par & SYS_PAR_EL1_F; \
+ fsc = FIELD_GET(SYS_PAR_EL1_FST, par); \
+ \
+ __GUEST_ASSERT((expect_fault) == fault, \
+ "AT "#op": %sexpected fault (par: %lx)1", \
+ (expect_fault) ? "" : "un", par); \
+ if ((expect_fault)) { \
+ __GUEST_ASSERT(fsc == ESR_ELx_FSC_ACCESS_L(3), \
+ "AT "#op": expected access flag fault (par: %lx)", \
+ par); \
+ } else { \
+ GUEST_ASSERT_EQ(FIELD_GET(SYS_PAR_EL1_ATTR, par), MAIR_ATTR_NORMAL); \
+ GUEST_ASSERT_EQ(FIELD_GET(SYS_PAR_EL1_SH, par), PTE_SHARED >> 8); \
+ GUEST_ASSERT_EQ(par & SYS_PAR_EL1_PA, TEST_ADDR); \
+ } \
+} while (0)
+
+static void test_at(bool expect_fault)
+{
+ test_at_insn(S1E2R, expect_fault);
+ test_at_insn(S1E2W, expect_fault);
+
+ /* Reuse the stage-1 MMU context from EL2 at EL1 */
+ copy_el2_to_el1(SCTLR);
+ copy_el2_to_el1(MAIR);
+ copy_el2_to_el1(TCR);
+ copy_el2_to_el1(TTBR0);
+ copy_el2_to_el1(TTBR1);
+
+ /* Disable stage-2 translation and enter a non-host context */
+ write_sysreg(0, vtcr_el2);
+ write_sysreg(0, vttbr_el2);
+ sysreg_clear_set(hcr_el2, HCR_EL2_TGE | HCR_EL2_VM, 0);
+ isb();
+
+ test_at_insn(S1E1R, expect_fault);
+ test_at_insn(S1E1W, expect_fault);
+}
+
+static void guest_code(void)
+{
+ sysreg_clear_set(tcr_el1, TCR_HA, 0);
+ isb();
+
+ test_at(true);
+
+ if (!SYS_FIELD_GET(ID_AA64MMFR1_EL1, HAFDBS, read_sysreg(id_aa64mmfr1_el1)))
+ GUEST_DONE();
+
+ sysreg_clear_set(tcr_el1, 0, TCR_HA);
+ isb();
+ test_at(false);
+
+ GUEST_DONE();
+}
+
+static void handle_sync(struct kvm_vcpu *vcpu, struct ucall *uc)
+{
+ switch (uc->args[1]) {
+ case CLEAR_ACCESS_FLAG:
+ /*
+ * Delete + reinstall the memslot to invalidate stage-2
+ * mappings of the stage-1 page tables, allowing KVM to
+ * potentially use the 'slow' AT emulation path.
+ *
+ * This and clearing the access flag from host userspace
+ * ensures that the access flag cannot be set speculatively
+ * and is reliably cleared at the time of the AT instruction.
+ */
+ clear_bit(__ffs(PTE_AF), ptep_hva);
+ vm_mem_region_reload(vcpu->vm, vcpu->vm->memslots[MEM_REGION_PT]);
+ break;
+ default:
+ TEST_FAIL("Unexpected SYNC arg: %lu", uc->args[1]);
+ }
+}
+
+static void run_test(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ while (true) {
+ vcpu_run(vcpu);
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_DONE:
+ return;
+ case UCALL_SYNC:
+ handle_sync(vcpu, &uc);
+ continue;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ return;
+ default:
+ TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
+ }
+ }
+}
+
+int main(void)
+{
+ struct kvm_vcpu_init init;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ TEST_REQUIRE(kvm_check_cap(KVM_CAP_ARM_EL2));
+
+ vm = vm_create(1);
+
+ kvm_get_default_vcpu_target(vm, &init);
+ init.features[0] |= BIT(KVM_ARM_VCPU_HAS_EL2);
+ vcpu = aarch64_vcpu_add(vm, 0, &init, guest_code);
+ kvm_arch_vm_finalize_vcpus(vm);
+
+ virt_map(vm, TEST_ADDR, TEST_ADDR, 1);
+ ptep_hva = virt_get_pte_hva_at_level(vm, TEST_ADDR, 3);
+ run_test(vcpu);
+
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/arm64/debug-exceptions.c b/tools/testing/selftests/kvm/arm64/debug-exceptions.c
index c7fb55c9135b..3eb4b1b6682d 100644
--- a/tools/testing/selftests/kvm/arm64/debug-exceptions.c
+++ b/tools/testing/selftests/kvm/arm64/debug-exceptions.c
@@ -31,14 +31,14 @@
extern unsigned char sw_bp, sw_bp2, hw_bp, hw_bp2, bp_svc, bp_brk, hw_wp, ss_start, hw_bp_ctx;
extern unsigned char iter_ss_begin, iter_ss_end;
-static volatile uint64_t sw_bp_addr, hw_bp_addr;
-static volatile uint64_t wp_addr, wp_data_addr;
-static volatile uint64_t svc_addr;
-static volatile uint64_t ss_addr[4], ss_idx;
-#define PC(v) ((uint64_t)&(v))
+static volatile u64 sw_bp_addr, hw_bp_addr;
+static volatile u64 wp_addr, wp_data_addr;
+static volatile u64 svc_addr;
+static volatile u64 ss_addr[4], ss_idx;
+#define PC(v) ((u64)&(v))
#define GEN_DEBUG_WRITE_REG(reg_name) \
-static void write_##reg_name(int num, uint64_t val) \
+static void write_##reg_name(int num, u64 val) \
{ \
switch (num) { \
case 0: \
@@ -102,8 +102,8 @@ GEN_DEBUG_WRITE_REG(dbgwvr)
static void reset_debug_state(void)
{
- uint8_t brps, wrps, i;
- uint64_t dfr0;
+ u8 brps, wrps, i;
+ u64 dfr0;
asm volatile("msr daifset, #8");
@@ -116,12 +116,12 @@ static void reset_debug_state(void)
/* Reset all bcr/bvr/wcr/wvr registers */
dfr0 = read_sysreg(id_aa64dfr0_el1);
- brps = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_BRPs), dfr0);
+ brps = FIELD_GET(ID_AA64DFR0_EL1_BRPs, dfr0);
for (i = 0; i <= brps; i++) {
write_dbgbcr(i, 0);
write_dbgbvr(i, 0);
}
- wrps = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_WRPs), dfr0);
+ wrps = FIELD_GET(ID_AA64DFR0_EL1_WRPs, dfr0);
for (i = 0; i <= wrps; i++) {
write_dbgwcr(i, 0);
write_dbgwvr(i, 0);
@@ -140,7 +140,7 @@ static void enable_os_lock(void)
static void enable_monitor_debug_exceptions(void)
{
- uint32_t mdscr;
+ u64 mdscr;
asm volatile("msr daifclr, #8");
@@ -149,9 +149,9 @@ static void enable_monitor_debug_exceptions(void)
isb();
}
-static void install_wp(uint8_t wpn, uint64_t addr)
+static void install_wp(u8 wpn, u64 addr)
{
- uint32_t wcr;
+ u32 wcr;
wcr = DBGWCR_LEN8 | DBGWCR_RD | DBGWCR_WR | DBGWCR_EL1 | DBGWCR_E;
write_dbgwcr(wpn, wcr);
@@ -162,9 +162,9 @@ static void install_wp(uint8_t wpn, uint64_t addr)
enable_monitor_debug_exceptions();
}
-static void install_hw_bp(uint8_t bpn, uint64_t addr)
+static void install_hw_bp(u8 bpn, u64 addr)
{
- uint32_t bcr;
+ u32 bcr;
bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E;
write_dbgbcr(bpn, bcr);
@@ -174,11 +174,10 @@ static void install_hw_bp(uint8_t bpn, uint64_t addr)
enable_monitor_debug_exceptions();
}
-static void install_wp_ctx(uint8_t addr_wp, uint8_t ctx_bp, uint64_t addr,
- uint64_t ctx)
+static void install_wp_ctx(u8 addr_wp, u8 ctx_bp, u64 addr, u64 ctx)
{
- uint32_t wcr;
- uint64_t ctx_bcr;
+ u32 wcr;
+ u64 ctx_bcr;
/* Setup a context-aware breakpoint for Linked Context ID Match */
ctx_bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E |
@@ -188,7 +187,7 @@ static void install_wp_ctx(uint8_t addr_wp, uint8_t ctx_bp, uint64_t addr,
/* Setup a linked watchpoint (linked to the context-aware breakpoint) */
wcr = DBGWCR_LEN8 | DBGWCR_RD | DBGWCR_WR | DBGWCR_EL1 | DBGWCR_E |
- DBGWCR_WT_LINK | ((uint32_t)ctx_bp << DBGWCR_LBN_SHIFT);
+ DBGWCR_WT_LINK | ((u32)ctx_bp << DBGWCR_LBN_SHIFT);
write_dbgwcr(addr_wp, wcr);
write_dbgwvr(addr_wp, addr);
isb();
@@ -196,10 +195,9 @@ static void install_wp_ctx(uint8_t addr_wp, uint8_t ctx_bp, uint64_t addr,
enable_monitor_debug_exceptions();
}
-void install_hw_bp_ctx(uint8_t addr_bp, uint8_t ctx_bp, uint64_t addr,
- uint64_t ctx)
+void install_hw_bp_ctx(u8 addr_bp, u8 ctx_bp, u64 addr, u64 ctx)
{
- uint32_t addr_bcr, ctx_bcr;
+ u32 addr_bcr, ctx_bcr;
/* Setup a context-aware breakpoint for Linked Context ID Match */
ctx_bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E |
@@ -213,7 +211,7 @@ void install_hw_bp_ctx(uint8_t addr_bp, uint8_t ctx_bp, uint64_t addr,
*/
addr_bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E |
DBGBCR_BT_ADDR_LINK_CTX |
- ((uint32_t)ctx_bp << DBGBCR_LBN_SHIFT);
+ ((u32)ctx_bp << DBGBCR_LBN_SHIFT);
write_dbgbcr(addr_bp, addr_bcr);
write_dbgbvr(addr_bp, addr);
isb();
@@ -223,7 +221,7 @@ void install_hw_bp_ctx(uint8_t addr_bp, uint8_t ctx_bp, uint64_t addr,
static void install_ss(void)
{
- uint32_t mdscr;
+ u64 mdscr;
asm volatile("msr daifclr, #8");
@@ -234,9 +232,9 @@ static void install_ss(void)
static volatile char write_data;
-static void guest_code(uint8_t bpn, uint8_t wpn, uint8_t ctx_bpn)
+static void guest_code(u8 bpn, u8 wpn, u8 ctx_bpn)
{
- uint64_t ctx = 0xabcdef; /* a random context number */
+ u64 ctx = 0xabcdef; /* a random context number */
/* Software-breakpoint */
reset_debug_state();
@@ -377,8 +375,8 @@ static void guest_svc_handler(struct ex_regs *regs)
static void guest_code_ss(int test_cnt)
{
- uint64_t i;
- uint64_t bvr, wvr, w_bvr, w_wvr;
+ u64 i;
+ u64 bvr, wvr, w_bvr, w_wvr;
for (i = 0; i < test_cnt; i++) {
/* Bits [1:0] of dbg{b,w}vr are RES0 */
@@ -416,12 +414,12 @@ static void guest_code_ss(int test_cnt)
GUEST_DONE();
}
-static int debug_version(uint64_t id_aa64dfr0)
+static int debug_version(u64 id_aa64dfr0)
{
- return FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DebugVer), id_aa64dfr0);
+ return FIELD_GET(ID_AA64DFR0_EL1_DebugVer, id_aa64dfr0);
}
-static void test_guest_debug_exceptions(uint8_t bpn, uint8_t wpn, uint8_t ctx_bpn)
+static void test_guest_debug_exceptions(u8 bpn, u8 wpn, u8 ctx_bpn)
{
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
@@ -468,8 +466,8 @@ void test_single_step_from_userspace(int test_cnt)
struct kvm_vm *vm;
struct ucall uc;
struct kvm_run *run;
- uint64_t pc, cmd;
- uint64_t test_pc = 0;
+ u64 pc, cmd;
+ u64 test_pc = 0;
bool ss_enable = false;
struct kvm_guest_debug debug = {};
@@ -506,7 +504,7 @@ void test_single_step_from_userspace(int test_cnt)
"Unexpected pc 0x%lx (expected 0x%lx)",
pc, test_pc);
- if ((pc + 4) == (uint64_t)&iter_ss_end) {
+ if ((pc + 4) == (u64)&iter_ss_end) {
test_pc = 0;
debug.control = KVM_GUESTDBG_ENABLE;
ss_enable = false;
@@ -519,8 +517,8 @@ void test_single_step_from_userspace(int test_cnt)
* iter_ss_end, the pc for the next KVM_EXIT_DEBUG should
* be the current pc + 4.
*/
- if ((pc >= (uint64_t)&iter_ss_begin) &&
- (pc < (uint64_t)&iter_ss_end))
+ if ((pc >= (u64)&iter_ss_begin) &&
+ (pc < (u64)&iter_ss_end))
test_pc = pc + 4;
else
test_pc = 0;
@@ -533,20 +531,20 @@ void test_single_step_from_userspace(int test_cnt)
* Run debug testing using the various breakpoint#, watchpoint# and
* context-aware breakpoint# with the given ID_AA64DFR0_EL1 configuration.
*/
-void test_guest_debug_exceptions_all(uint64_t aa64dfr0)
+void test_guest_debug_exceptions_all(u64 aa64dfr0)
{
- uint8_t brp_num, wrp_num, ctx_brp_num, normal_brp_num, ctx_brp_base;
+ u8 brp_num, wrp_num, ctx_brp_num, normal_brp_num, ctx_brp_base;
int b, w, c;
/* Number of breakpoints */
- brp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_BRPs), aa64dfr0) + 1;
+ brp_num = FIELD_GET(ID_AA64DFR0_EL1_BRPs, aa64dfr0) + 1;
__TEST_REQUIRE(brp_num >= 2, "At least two breakpoints are required");
/* Number of watchpoints */
- wrp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_WRPs), aa64dfr0) + 1;
+ wrp_num = FIELD_GET(ID_AA64DFR0_EL1_WRPs, aa64dfr0) + 1;
/* Number of context aware breakpoints */
- ctx_brp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_CTX_CMPs), aa64dfr0) + 1;
+ ctx_brp_num = FIELD_GET(ID_AA64DFR0_EL1_CTX_CMPs, aa64dfr0) + 1;
pr_debug("%s brp_num:%d, wrp_num:%d, ctx_brp_num:%d\n", __func__,
brp_num, wrp_num, ctx_brp_num);
@@ -580,7 +578,7 @@ int main(int argc, char *argv[])
struct kvm_vm *vm;
int opt;
int ss_iteration = 10000;
- uint64_t aa64dfr0;
+ u64 aa64dfr0;
vm = vm_create_with_one_vcpu(&vcpu, guest_code);
aa64dfr0 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1));
diff --git a/tools/testing/selftests/kvm/arm64/external_aborts.c b/tools/testing/selftests/kvm/arm64/external_aborts.c
new file mode 100644
index 000000000000..d8fe17a6cc59
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/external_aborts.c
@@ -0,0 +1,415 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * external_abort - Tests for userspace external abort injection
+ *
+ * Copyright (c) 2024 Google LLC
+ */
+#include "processor.h"
+#include "test_util.h"
+
+#define MMIO_ADDR 0x8000000ULL
+#define EXPECTED_SERROR_ISS (ESR_ELx_ISV | 0x1d1ed)
+
+static u64 expected_abort_pc;
+
+static void expect_sea_handler(struct ex_regs *regs)
+{
+ u64 esr = read_sysreg(esr_el1);
+
+ GUEST_ASSERT_EQ(regs->pc, expected_abort_pc);
+ GUEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_DABT_CUR);
+ GUEST_ASSERT_EQ(esr & ESR_ELx_FSC_TYPE, ESR_ELx_FSC_EXTABT);
+
+ GUEST_DONE();
+}
+
+static void unexpected_dabt_handler(struct ex_regs *regs)
+{
+ GUEST_FAIL("Unexpected data abort at PC: %lx\n", regs->pc);
+}
+
+static struct kvm_vm *vm_create_with_dabt_handler(struct kvm_vcpu **vcpu, void *guest_code,
+ handler_fn dabt_handler)
+{
+ struct kvm_vm *vm = vm_create_with_one_vcpu(vcpu, guest_code);
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(*vcpu);
+ vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, ESR_ELx_EC_DABT_CUR, dabt_handler);
+
+ virt_map(vm, MMIO_ADDR, MMIO_ADDR, 1);
+
+ return vm;
+}
+
+static void vcpu_inject_sea(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vcpu_events events = {};
+
+ events.exception.ext_dabt_pending = true;
+ vcpu_events_set(vcpu, &events);
+}
+
+static bool vcpu_has_ras(struct kvm_vcpu *vcpu)
+{
+ u64 pfr0 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1));
+
+ return SYS_FIELD_GET(ID_AA64PFR0_EL1, RAS, pfr0);
+}
+
+static bool guest_has_ras(void)
+{
+ return SYS_FIELD_GET(ID_AA64PFR0_EL1, RAS, read_sysreg(id_aa64pfr0_el1));
+}
+
+static void vcpu_inject_serror(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vcpu_events events = {};
+
+ events.exception.serror_pending = true;
+ if (vcpu_has_ras(vcpu)) {
+ events.exception.serror_has_esr = true;
+ events.exception.serror_esr = EXPECTED_SERROR_ISS;
+ }
+
+ vcpu_events_set(vcpu, &events);
+}
+
+static void __vcpu_run_expect(struct kvm_vcpu *vcpu, unsigned int cmd)
+{
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ default:
+ if (uc.cmd == cmd)
+ return;
+
+ TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
+ }
+}
+
+static void vcpu_run_expect_done(struct kvm_vcpu *vcpu)
+{
+ __vcpu_run_expect(vcpu, UCALL_DONE);
+}
+
+static void vcpu_run_expect_sync(struct kvm_vcpu *vcpu)
+{
+ __vcpu_run_expect(vcpu, UCALL_SYNC);
+}
+
+extern char test_mmio_abort_insn;
+
+static noinline void test_mmio_abort_guest(void)
+{
+ WRITE_ONCE(expected_abort_pc, (u64)&test_mmio_abort_insn);
+
+ asm volatile("test_mmio_abort_insn:\n\t"
+ "ldr x0, [%0]\n\t"
+ : : "r" (MMIO_ADDR) : "x0", "memory");
+
+ GUEST_FAIL("MMIO instruction should not retire");
+}
+
+/*
+ * Test that KVM doesn't complete MMIO emulation when userspace has made an
+ * external abort pending for the instruction.
+ */
+static void test_mmio_abort(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_abort_guest,
+ expect_sea_handler);
+ struct kvm_run *run = vcpu->run;
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_MMIO);
+ TEST_ASSERT_EQ(run->mmio.phys_addr, MMIO_ADDR);
+ TEST_ASSERT_EQ(run->mmio.len, sizeof(unsigned long));
+ TEST_ASSERT(!run->mmio.is_write, "Expected MMIO read");
+
+ vcpu_inject_sea(vcpu);
+ vcpu_run_expect_done(vcpu);
+ kvm_vm_free(vm);
+}
+
+extern char test_mmio_nisv_insn;
+
+static void test_mmio_nisv_guest(void)
+{
+ WRITE_ONCE(expected_abort_pc, (u64)&test_mmio_nisv_insn);
+
+ asm volatile("test_mmio_nisv_insn:\n\t"
+ "ldr x0, [%0], #8\n\t"
+ : : "r" (MMIO_ADDR) : "x0", "memory");
+
+ GUEST_FAIL("MMIO instruction should not retire");
+}
+
+/*
+ * Test that the KVM_RUN ioctl fails for ESR_EL2.ISV=0 MMIO aborts if userspace
+ * hasn't enabled KVM_CAP_ARM_NISV_TO_USER.
+ */
+static void test_mmio_nisv(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_nisv_guest,
+ unexpected_dabt_handler);
+
+ TEST_ASSERT(_vcpu_run(vcpu), "Expected nonzero return code from KVM_RUN");
+ TEST_ASSERT_EQ(errno, ENOSYS);
+
+ kvm_vm_free(vm);
+}
+
+/*
+ * Test that ESR_EL2.ISV=0 MMIO aborts reach userspace and that an injected SEA
+ * reaches the guest.
+ */
+static void test_mmio_nisv_abort(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_nisv_guest,
+ expect_sea_handler);
+ struct kvm_run *run = vcpu->run;
+
+ vm_enable_cap(vm, KVM_CAP_ARM_NISV_TO_USER, 1);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_ARM_NISV);
+ TEST_ASSERT_EQ(run->arm_nisv.fault_ipa, MMIO_ADDR);
+
+ vcpu_inject_sea(vcpu);
+ vcpu_run_expect_done(vcpu);
+ kvm_vm_free(vm);
+}
+
+static void unexpected_serror_handler(struct ex_regs *regs)
+{
+ GUEST_FAIL("Took unexpected SError exception");
+}
+
+static void test_serror_masked_guest(void)
+{
+ GUEST_ASSERT(read_sysreg(isr_el1) & ISR_EL1_A);
+
+ isb();
+
+ GUEST_DONE();
+}
+
+static void test_serror_masked(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_serror_masked_guest,
+ unexpected_dabt_handler);
+
+ vm_install_exception_handler(vm, VECTOR_ERROR_CURRENT, unexpected_serror_handler);
+
+ vcpu_inject_serror(vcpu);
+ vcpu_run_expect_done(vcpu);
+ kvm_vm_free(vm);
+}
+
+static void expect_serror_handler(struct ex_regs *regs)
+{
+ u64 esr = read_sysreg(esr_el1);
+
+ GUEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_SERROR);
+ if (guest_has_ras())
+ GUEST_ASSERT_EQ(ESR_ELx_ISS(esr), EXPECTED_SERROR_ISS);
+
+ GUEST_DONE();
+}
+
+static void test_serror_guest(void)
+{
+ GUEST_ASSERT(read_sysreg(isr_el1) & ISR_EL1_A);
+
+ local_serror_enable();
+ isb();
+ local_serror_disable();
+
+ GUEST_FAIL("Should've taken pending SError exception");
+}
+
+static void test_serror(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_serror_guest,
+ unexpected_dabt_handler);
+
+ vm_install_exception_handler(vm, VECTOR_ERROR_CURRENT, expect_serror_handler);
+
+ vcpu_inject_serror(vcpu);
+ vcpu_run_expect_done(vcpu);
+ kvm_vm_free(vm);
+}
+
+static void expect_sea_s1ptw_handler(struct ex_regs *regs)
+{
+ u64 esr = read_sysreg(esr_el1);
+
+ GUEST_ASSERT_EQ(regs->pc, expected_abort_pc);
+ GUEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_DABT_CUR);
+ GUEST_ASSERT_EQ((esr & ESR_ELx_FSC), ESR_ELx_FSC_SEA_TTW(3));
+
+ GUEST_DONE();
+}
+
+static noinline void test_s1ptw_abort_guest(void)
+{
+ extern char test_s1ptw_abort_insn;
+
+ WRITE_ONCE(expected_abort_pc, (u64)&test_s1ptw_abort_insn);
+
+ asm volatile("test_s1ptw_abort_insn:\n\t"
+ "ldr x0, [%0]\n\t"
+ : : "r" (MMIO_ADDR) : "x0", "memory");
+
+ GUEST_FAIL("Load on S1PTW abort should not retire");
+}
+
+static void test_s1ptw_abort(void)
+{
+ struct kvm_vcpu *vcpu;
+ u64 *ptep, bad_pa;
+ struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_s1ptw_abort_guest,
+ expect_sea_s1ptw_handler);
+
+ ptep = virt_get_pte_hva_at_level(vm, MMIO_ADDR, 2);
+ bad_pa = BIT(vm->pa_bits) - vm->page_size;
+
+ *ptep &= ~GENMASK(47, 12);
+ *ptep |= bad_pa;
+
+ vcpu_run_expect_done(vcpu);
+ kvm_vm_free(vm);
+}
+
+static void test_serror_emulated_guest(void)
+{
+ GUEST_ASSERT(!(read_sysreg(isr_el1) & ISR_EL1_A));
+
+ local_serror_enable();
+ GUEST_SYNC(0);
+ local_serror_disable();
+
+ GUEST_FAIL("Should've taken unmasked SError exception");
+}
+
+static void test_serror_emulated(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_serror_emulated_guest,
+ unexpected_dabt_handler);
+
+ vm_install_exception_handler(vm, VECTOR_ERROR_CURRENT, expect_serror_handler);
+
+ vcpu_run_expect_sync(vcpu);
+ vcpu_inject_serror(vcpu);
+ vcpu_run_expect_done(vcpu);
+ kvm_vm_free(vm);
+}
+
+static void test_mmio_ease_guest(void)
+{
+ sysreg_clear_set_s(SYS_SCTLR2_EL1, 0, SCTLR2_EL1_EASE);
+ isb();
+
+ test_mmio_abort_guest();
+}
+
+/*
+ * Test that KVM doesn't complete MMIO emulation when userspace has made an
+ * external abort pending for the instruction.
+ */
+static void test_mmio_ease(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_ease_guest,
+ unexpected_dabt_handler);
+ struct kvm_run *run = vcpu->run;
+ u64 pfr1;
+
+ pfr1 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1));
+ if (!SYS_FIELD_GET(ID_AA64PFR1_EL1, DF2, pfr1)) {
+ pr_debug("Skipping %s\n", __func__);
+ return;
+ }
+
+ /*
+ * SCTLR2_ELx.EASE changes the exception vector to the SError vector but
+ * doesn't further modify the exception context (e.g. ESR_ELx, FAR_ELx).
+ */
+ vm_install_exception_handler(vm, VECTOR_ERROR_CURRENT, expect_sea_handler);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_MMIO);
+ TEST_ASSERT_EQ(run->mmio.phys_addr, MMIO_ADDR);
+ TEST_ASSERT_EQ(run->mmio.len, sizeof(unsigned long));
+ TEST_ASSERT(!run->mmio.is_write, "Expected MMIO read");
+
+ vcpu_inject_sea(vcpu);
+ vcpu_run_expect_done(vcpu);
+ kvm_vm_free(vm);
+}
+
+static void test_serror_amo_guest(void)
+{
+ /*
+ * The ISB is entirely unnecessary (and highlights how FEAT_NV2 is borked)
+ * since the write is redirected to memory. But don't write (intentionally)
+ * broken code!
+ */
+ sysreg_clear_set(hcr_el2, HCR_EL2_AMO | HCR_EL2_TGE, 0);
+ isb();
+
+ GUEST_SYNC(0);
+ GUEST_ASSERT(read_sysreg(isr_el1) & ISR_EL1_A);
+
+ /*
+ * KVM treats the effective value of AMO as 1 when
+ * HCR_EL2.{E2H,TGE} = {1, 0}, meaning the SError will be taken when
+ * unmasked.
+ */
+ local_serror_enable();
+ isb();
+ local_serror_disable();
+
+ GUEST_FAIL("Should've taken pending SError exception");
+}
+
+static void test_serror_amo(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_serror_amo_guest,
+ unexpected_dabt_handler);
+
+ vm_install_exception_handler(vm, VECTOR_ERROR_CURRENT, expect_serror_handler);
+ vcpu_run_expect_sync(vcpu);
+ vcpu_inject_serror(vcpu);
+ vcpu_run_expect_done(vcpu);
+ kvm_vm_free(vm);
+}
+
+int main(void)
+{
+ test_mmio_abort();
+ test_mmio_nisv();
+ test_mmio_nisv_abort();
+ test_serror();
+ test_serror_masked();
+ test_serror_emulated();
+ test_mmio_ease();
+ test_s1ptw_abort();
+
+ if (!test_supports_el2())
+ return 0;
+
+ test_serror_amo();
+}
diff --git a/tools/testing/selftests/kvm/arm64/get-reg-list.c b/tools/testing/selftests/kvm/arm64/get-reg-list.c
index d43fb3f49050..0a3a94c4cca1 100644
--- a/tools/testing/selftests/kvm/arm64/get-reg-list.c
+++ b/tools/testing/selftests/kvm/arm64/get-reg-list.c
@@ -15,6 +15,12 @@
#include "test_util.h"
#include "processor.h"
+#define SYS_REG(r) ARM64_SYS_REG(sys_reg_Op0(SYS_ ## r), \
+ sys_reg_Op1(SYS_ ## r), \
+ sys_reg_CRn(SYS_ ## r), \
+ sys_reg_CRm(SYS_ ## r), \
+ sys_reg_Op2(SYS_ ## r))
+
struct feature_id_reg {
__u64 reg;
__u64 id_reg;
@@ -22,37 +28,48 @@ struct feature_id_reg {
__u64 feat_min;
};
-static struct feature_id_reg feat_id_regs[] = {
- {
- ARM64_SYS_REG(3, 0, 2, 0, 3), /* TCR2_EL1 */
- ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */
- 0,
- 1
- },
- {
- ARM64_SYS_REG(3, 0, 10, 2, 2), /* PIRE0_EL1 */
- ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */
- 8,
- 1
- },
- {
- ARM64_SYS_REG(3, 0, 10, 2, 3), /* PIR_EL1 */
- ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */
- 8,
- 1
- },
- {
- ARM64_SYS_REG(3, 0, 10, 2, 4), /* POR_EL1 */
- ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */
- 16,
- 1
- },
- {
- ARM64_SYS_REG(3, 3, 10, 2, 4), /* POR_EL0 */
- ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */
- 16,
- 1
+#define FEAT(id, f, v) \
+ .id_reg = SYS_REG(id), \
+ .feat_shift = id ## _ ## f ## _SHIFT, \
+ .feat_min = id ## _ ## f ## _ ## v
+
+#define REG_FEAT(r, id, f, v) \
+ { \
+ .reg = SYS_REG(r), \
+ FEAT(id, f, v) \
}
+
+static struct feature_id_reg feat_id_regs[] = {
+ REG_FEAT(TCR2_EL1, ID_AA64MMFR3_EL1, TCRX, IMP),
+ REG_FEAT(TCR2_EL2, ID_AA64MMFR3_EL1, TCRX, IMP),
+ REG_FEAT(PIRE0_EL1, ID_AA64MMFR3_EL1, S1PIE, IMP),
+ REG_FEAT(PIRE0_EL2, ID_AA64MMFR3_EL1, S1PIE, IMP),
+ REG_FEAT(PIR_EL1, ID_AA64MMFR3_EL1, S1PIE, IMP),
+ REG_FEAT(PIR_EL2, ID_AA64MMFR3_EL1, S1PIE, IMP),
+ REG_FEAT(POR_EL1, ID_AA64MMFR3_EL1, S1POE, IMP),
+ REG_FEAT(POR_EL0, ID_AA64MMFR3_EL1, S1POE, IMP),
+ REG_FEAT(POR_EL2, ID_AA64MMFR3_EL1, S1POE, IMP),
+ REG_FEAT(HCRX_EL2, ID_AA64MMFR1_EL1, HCX, IMP),
+ REG_FEAT(HFGRTR_EL2, ID_AA64MMFR0_EL1, FGT, IMP),
+ REG_FEAT(HFGWTR_EL2, ID_AA64MMFR0_EL1, FGT, IMP),
+ REG_FEAT(HFGITR_EL2, ID_AA64MMFR0_EL1, FGT, IMP),
+ REG_FEAT(HDFGRTR_EL2, ID_AA64MMFR0_EL1, FGT, IMP),
+ REG_FEAT(HDFGWTR_EL2, ID_AA64MMFR0_EL1, FGT, IMP),
+ REG_FEAT(HAFGRTR_EL2, ID_AA64MMFR0_EL1, FGT, IMP),
+ REG_FEAT(HFGRTR2_EL2, ID_AA64MMFR0_EL1, FGT, FGT2),
+ REG_FEAT(HFGWTR2_EL2, ID_AA64MMFR0_EL1, FGT, FGT2),
+ REG_FEAT(HFGITR2_EL2, ID_AA64MMFR0_EL1, FGT, FGT2),
+ REG_FEAT(HDFGRTR2_EL2, ID_AA64MMFR0_EL1, FGT, FGT2),
+ REG_FEAT(HDFGWTR2_EL2, ID_AA64MMFR0_EL1, FGT, FGT2),
+ REG_FEAT(ZCR_EL2, ID_AA64PFR0_EL1, SVE, IMP),
+ REG_FEAT(SCTLR2_EL1, ID_AA64MMFR3_EL1, SCTLRX, IMP),
+ REG_FEAT(SCTLR2_EL2, ID_AA64MMFR3_EL1, SCTLRX, IMP),
+ REG_FEAT(VDISR_EL2, ID_AA64PFR0_EL1, RAS, IMP),
+ REG_FEAT(VSESR_EL2, ID_AA64PFR0_EL1, RAS, IMP),
+ REG_FEAT(VNCR_EL2, ID_AA64MMFR4_EL1, NV_frac, NV2_ONLY),
+ REG_FEAT(CNTHV_CTL_EL2, ID_AA64MMFR1_EL1, VH, IMP),
+ REG_FEAT(CNTHV_CVAL_EL2,ID_AA64MMFR1_EL1, VH, IMP),
+ REG_FEAT(ZCR_EL2, ID_AA64PFR0_EL1, SVE, IMP),
};
bool filter_reg(__u64 reg)
@@ -332,9 +349,21 @@ static __u64 base_regs[] = {
KVM_REG_ARM_FW_FEAT_BMAP_REG(0), /* KVM_REG_ARM_STD_BMAP */
KVM_REG_ARM_FW_FEAT_BMAP_REG(1), /* KVM_REG_ARM_STD_HYP_BMAP */
KVM_REG_ARM_FW_FEAT_BMAP_REG(2), /* KVM_REG_ARM_VENDOR_HYP_BMAP */
- ARM64_SYS_REG(3, 3, 14, 3, 1), /* CNTV_CTL_EL0 */
- ARM64_SYS_REG(3, 3, 14, 3, 2), /* CNTV_CVAL_EL0 */
- ARM64_SYS_REG(3, 3, 14, 0, 2),
+ KVM_REG_ARM_FW_FEAT_BMAP_REG(3), /* KVM_REG_ARM_VENDOR_HYP_BMAP_2 */
+
+ /*
+ * EL0 Virtual Timer Registers
+ *
+ * WARNING:
+ * KVM_REG_ARM_TIMER_CVAL and KVM_REG_ARM_TIMER_CNT are not defined
+ * with the appropriate register encodings. Their values have been
+ * accidentally swapped. As this is set API, the definitions here
+ * must be used, rather than ones derived from the encodings.
+ */
+ KVM_ARM64_SYS_REG(SYS_CNTV_CTL_EL0),
+ KVM_REG_ARM_TIMER_CVAL,
+ KVM_REG_ARM_TIMER_CNT,
+
ARM64_SYS_REG(3, 0, 0, 0, 0), /* MIDR_EL1 */
ARM64_SYS_REG(3, 0, 0, 0, 6), /* REVIDR_EL1 */
ARM64_SYS_REG(3, 1, 0, 0, 1), /* CLIDR_EL1 */
@@ -468,6 +497,7 @@ static __u64 base_regs[] = {
ARM64_SYS_REG(3, 0, 1, 0, 0), /* SCTLR_EL1 */
ARM64_SYS_REG(3, 0, 1, 0, 1), /* ACTLR_EL1 */
ARM64_SYS_REG(3, 0, 1, 0, 2), /* CPACR_EL1 */
+ KVM_ARM64_SYS_REG(SYS_SCTLR2_EL1),
ARM64_SYS_REG(3, 0, 2, 0, 0), /* TTBR0_EL1 */
ARM64_SYS_REG(3, 0, 2, 0, 1), /* TTBR1_EL1 */
ARM64_SYS_REG(3, 0, 2, 0, 2), /* TCR_EL1 */
@@ -685,6 +715,67 @@ static __u64 pauth_generic_regs[] = {
ARM64_SYS_REG(3, 0, 2, 3, 1), /* APGAKEYHI_EL1 */
};
+static __u64 el2_regs[] = {
+ SYS_REG(VPIDR_EL2),
+ SYS_REG(VMPIDR_EL2),
+ SYS_REG(SCTLR_EL2),
+ SYS_REG(ACTLR_EL2),
+ SYS_REG(SCTLR2_EL2),
+ SYS_REG(HCR_EL2),
+ SYS_REG(MDCR_EL2),
+ SYS_REG(CPTR_EL2),
+ SYS_REG(HSTR_EL2),
+ SYS_REG(HFGRTR_EL2),
+ SYS_REG(HFGWTR_EL2),
+ SYS_REG(HFGITR_EL2),
+ SYS_REG(HACR_EL2),
+ SYS_REG(ZCR_EL2),
+ SYS_REG(HCRX_EL2),
+ SYS_REG(TTBR0_EL2),
+ SYS_REG(TTBR1_EL2),
+ SYS_REG(TCR_EL2),
+ SYS_REG(TCR2_EL2),
+ SYS_REG(VTTBR_EL2),
+ SYS_REG(VTCR_EL2),
+ SYS_REG(VNCR_EL2),
+ SYS_REG(HDFGRTR2_EL2),
+ SYS_REG(HDFGWTR2_EL2),
+ SYS_REG(HFGRTR2_EL2),
+ SYS_REG(HFGWTR2_EL2),
+ SYS_REG(HDFGRTR_EL2),
+ SYS_REG(HDFGWTR_EL2),
+ SYS_REG(HAFGRTR_EL2),
+ SYS_REG(HFGITR2_EL2),
+ SYS_REG(SPSR_EL2),
+ SYS_REG(ELR_EL2),
+ SYS_REG(AFSR0_EL2),
+ SYS_REG(AFSR1_EL2),
+ SYS_REG(ESR_EL2),
+ SYS_REG(FAR_EL2),
+ SYS_REG(HPFAR_EL2),
+ SYS_REG(MAIR_EL2),
+ SYS_REG(PIRE0_EL2),
+ SYS_REG(PIR_EL2),
+ SYS_REG(POR_EL2),
+ SYS_REG(AMAIR_EL2),
+ SYS_REG(VBAR_EL2),
+ SYS_REG(CONTEXTIDR_EL2),
+ SYS_REG(TPIDR_EL2),
+ SYS_REG(CNTVOFF_EL2),
+ SYS_REG(CNTHCTL_EL2),
+ SYS_REG(CNTHP_CTL_EL2),
+ SYS_REG(CNTHP_CVAL_EL2),
+ SYS_REG(CNTHV_CTL_EL2),
+ SYS_REG(CNTHV_CVAL_EL2),
+ SYS_REG(SP_EL2),
+ SYS_REG(VDISR_EL2),
+ SYS_REG(VSESR_EL2),
+};
+
+static __u64 el2_e2h0_regs[] = {
+ /* Empty */
+};
+
#define BASE_SUBLIST \
{ "base", .regs = base_regs, .regs_n = ARRAY_SIZE(base_regs), }
#define VREGS_SUBLIST \
@@ -711,6 +802,23 @@ static __u64 pauth_generic_regs[] = {
.regs = pauth_generic_regs, \
.regs_n = ARRAY_SIZE(pauth_generic_regs), \
}
+#define EL2_SUBLIST \
+ { \
+ .name = "EL2", \
+ .capability = KVM_CAP_ARM_EL2, \
+ .feature = KVM_ARM_VCPU_HAS_EL2, \
+ .regs = el2_regs, \
+ .regs_n = ARRAY_SIZE(el2_regs), \
+ }
+#define EL2_E2H0_SUBLIST \
+ EL2_SUBLIST, \
+ { \
+ .name = "EL2 E2H0", \
+ .capability = KVM_CAP_ARM_EL2_E2H0, \
+ .feature = KVM_ARM_VCPU_HAS_EL2_E2H0, \
+ .regs = el2_e2h0_regs, \
+ .regs_n = ARRAY_SIZE(el2_e2h0_regs), \
+ }
static struct vcpu_reg_list vregs_config = {
.sublists = {
@@ -760,6 +868,124 @@ static struct vcpu_reg_list pauth_pmu_config = {
},
};
+static struct vcpu_reg_list el2_vregs_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ EL2_SUBLIST,
+ VREGS_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_reg_list el2_vregs_pmu_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ EL2_SUBLIST,
+ VREGS_SUBLIST,
+ PMU_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_reg_list el2_sve_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ EL2_SUBLIST,
+ SVE_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_reg_list el2_sve_pmu_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ EL2_SUBLIST,
+ SVE_SUBLIST,
+ PMU_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_reg_list el2_pauth_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ EL2_SUBLIST,
+ VREGS_SUBLIST,
+ PAUTH_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_reg_list el2_pauth_pmu_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ EL2_SUBLIST,
+ VREGS_SUBLIST,
+ PAUTH_SUBLIST,
+ PMU_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_reg_list el2_e2h0_vregs_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ EL2_E2H0_SUBLIST,
+ VREGS_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_reg_list el2_e2h0_vregs_pmu_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ EL2_E2H0_SUBLIST,
+ VREGS_SUBLIST,
+ PMU_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_reg_list el2_e2h0_sve_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ EL2_E2H0_SUBLIST,
+ SVE_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_reg_list el2_e2h0_sve_pmu_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ EL2_E2H0_SUBLIST,
+ SVE_SUBLIST,
+ PMU_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_reg_list el2_e2h0_pauth_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ EL2_E2H0_SUBLIST,
+ VREGS_SUBLIST,
+ PAUTH_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_reg_list el2_e2h0_pauth_pmu_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ EL2_E2H0_SUBLIST,
+ VREGS_SUBLIST,
+ PAUTH_SUBLIST,
+ PMU_SUBLIST,
+ {0},
+ },
+};
+
struct vcpu_reg_list *vcpu_configs[] = {
&vregs_config,
&vregs_pmu_config,
@@ -767,5 +993,19 @@ struct vcpu_reg_list *vcpu_configs[] = {
&sve_pmu_config,
&pauth_config,
&pauth_pmu_config,
+
+ &el2_vregs_config,
+ &el2_vregs_pmu_config,
+ &el2_sve_config,
+ &el2_sve_pmu_config,
+ &el2_pauth_config,
+ &el2_pauth_pmu_config,
+
+ &el2_e2h0_vregs_config,
+ &el2_e2h0_vregs_pmu_config,
+ &el2_e2h0_sve_config,
+ &el2_e2h0_sve_pmu_config,
+ &el2_e2h0_pauth_config,
+ &el2_e2h0_pauth_pmu_config,
};
int vcpu_configs_n = ARRAY_SIZE(vcpu_configs);
diff --git a/tools/testing/selftests/kvm/arm64/hello_el2.c b/tools/testing/selftests/kvm/arm64/hello_el2.c
new file mode 100644
index 000000000000..bbe6862c6ab1
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/hello_el2.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * hello_el2 - Basic KVM selftest for VM running at EL2 with E2H=RES1
+ *
+ * Copyright 2025 Google LLC
+ */
+#include "kvm_util.h"
+#include "processor.h"
+#include "test_util.h"
+#include "ucall.h"
+
+#include <asm/sysreg.h>
+
+static void guest_code(void)
+{
+ u64 mmfr0 = read_sysreg_s(SYS_ID_AA64MMFR0_EL1);
+ u64 mmfr1 = read_sysreg_s(SYS_ID_AA64MMFR1_EL1);
+ u64 mmfr4 = read_sysreg_s(SYS_ID_AA64MMFR4_EL1);
+ u8 e2h0 = SYS_FIELD_GET(ID_AA64MMFR4_EL1, E2H0, mmfr4);
+
+ GUEST_ASSERT_EQ(get_current_el(), 2);
+ GUEST_ASSERT(read_sysreg(hcr_el2) & HCR_EL2_E2H);
+ GUEST_ASSERT_EQ(SYS_FIELD_GET(ID_AA64MMFR1_EL1, VH, mmfr1),
+ ID_AA64MMFR1_EL1_VH_IMP);
+
+ /*
+ * Traps of the complete ID register space are IMPDEF without FEAT_FGT,
+ * which is really annoying to deal with in KVM describing E2H as RES1.
+ *
+ * If the implementation doesn't honor the trap then expect the register
+ * to return all zeros.
+ */
+ if (e2h0 == ID_AA64MMFR4_EL1_E2H0_IMP)
+ GUEST_ASSERT_EQ(SYS_FIELD_GET(ID_AA64MMFR0_EL1, FGT, mmfr0),
+ ID_AA64MMFR0_EL1_FGT_NI);
+ else
+ GUEST_ASSERT_EQ(e2h0, ID_AA64MMFR4_EL1_E2H0_NI_NV1);
+
+ GUEST_DONE();
+}
+
+int main(void)
+{
+ struct kvm_vcpu_init init;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct ucall uc;
+
+ TEST_REQUIRE(kvm_check_cap(KVM_CAP_ARM_EL2));
+
+ vm = vm_create(1);
+
+ kvm_get_default_vcpu_target(vm, &init);
+ init.features[0] |= BIT(KVM_ARM_VCPU_HAS_EL2);
+ vcpu = aarch64_vcpu_add(vm, 0, &init, guest_code);
+ kvm_arch_vm_finalize_vcpus(vm);
+
+ vcpu_run(vcpu);
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_DONE:
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ default:
+ TEST_FAIL("Unhandled ucall: %ld\n", uc.cmd);
+ }
+
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/arm64/host_sve.c b/tools/testing/selftests/kvm/arm64/host_sve.c
new file mode 100644
index 000000000000..3826772fd470
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/host_sve.c
@@ -0,0 +1,127 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Host SVE: Check FPSIMD/SVE/SME save/restore over KVM_RUN ioctls.
+ *
+ * Copyright 2025 Arm, Ltd
+ */
+
+#include <errno.h>
+#include <signal.h>
+#include <sys/auxv.h>
+#include <asm/kvm.h>
+#include <kvm_util.h>
+
+#include "ucall_common.h"
+
+static void guest_code(void)
+{
+ for (int i = 0; i < 10; i++) {
+ GUEST_UCALL_NONE();
+ }
+
+ GUEST_DONE();
+}
+
+void handle_sigill(int sig, siginfo_t *info, void *ctx)
+{
+ ucontext_t *uctx = ctx;
+
+ printf(" < host signal %d >\n", sig);
+
+ /*
+ * Skip the UDF
+ */
+ uctx->uc_mcontext.pc += 4;
+}
+
+void register_sigill_handler(void)
+{
+ struct sigaction sa = {
+ .sa_sigaction = handle_sigill,
+ .sa_flags = SA_SIGINFO,
+ };
+ sigaction(SIGILL, &sa, NULL);
+}
+
+static void do_sve_roundtrip(void)
+{
+ unsigned long before, after;
+
+ /*
+ * Set all bits in a predicate register, force a save/restore via a
+ * SIGILL (which handle_sigill() will recover from), then report
+ * whether the value has changed.
+ */
+ asm volatile(
+ " .arch_extension sve\n"
+ " ptrue p0.B\n"
+ " cntp %[before], p0, p0.B\n"
+ " udf #0\n"
+ " cntp %[after], p0, p0.B\n"
+ : [before] "=r" (before),
+ [after] "=r" (after)
+ :
+ : "p0"
+ );
+
+ if (before != after) {
+ TEST_FAIL("Signal roundtrip discarded predicate bits (%ld => %ld)\n",
+ before, after);
+ } else {
+ printf("Signal roundtrip preserved predicate bits (%ld => %ld)\n",
+ before, after);
+ }
+}
+
+static void test_run(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct ucall uc;
+ bool guest_done = false;
+
+ register_sigill_handler();
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ do_sve_roundtrip();
+
+ while (!guest_done) {
+
+ printf("Running VCPU...\n");
+ vcpu_run(vcpu);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_NONE:
+ do_sve_roundtrip();
+ do_sve_roundtrip();
+ break;
+ case UCALL_DONE:
+ guest_done = true;
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ default:
+ TEST_FAIL("Unexpected guest exit");
+ }
+ }
+
+ kvm_vm_free(vm);
+}
+
+int main(void)
+{
+ /*
+ * This is testing the host environment, we don't care about
+ * guest SVE support.
+ */
+ if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) {
+ printf("SVE not supported\n");
+ return KSFT_SKIP;
+ }
+
+ test_run();
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/arm64/hypercalls.c b/tools/testing/selftests/kvm/arm64/hypercalls.c
index ec54ec7726e9..5d96cdf382c4 100644
--- a/tools/testing/selftests/kvm/arm64/hypercalls.c
+++ b/tools/testing/selftests/kvm/arm64/hypercalls.c
@@ -21,22 +21,31 @@
#define KVM_REG_ARM_STD_BMAP_BIT_MAX 0
#define KVM_REG_ARM_STD_HYP_BMAP_BIT_MAX 0
#define KVM_REG_ARM_VENDOR_HYP_BMAP_BIT_MAX 1
+#define KVM_REG_ARM_VENDOR_HYP_BMAP_2_BIT_MAX 1
+
+#define KVM_REG_ARM_STD_BMAP_RESET_VAL FW_REG_ULIMIT_VAL(KVM_REG_ARM_STD_BMAP_BIT_MAX)
+#define KVM_REG_ARM_STD_HYP_BMAP_RESET_VAL FW_REG_ULIMIT_VAL(KVM_REG_ARM_STD_HYP_BMAP_BIT_MAX)
+#define KVM_REG_ARM_VENDOR_HYP_BMAP_RESET_VAL FW_REG_ULIMIT_VAL(KVM_REG_ARM_VENDOR_HYP_BMAP_BIT_MAX)
+#define KVM_REG_ARM_VENDOR_HYP_BMAP_2_RESET_VAL 0
struct kvm_fw_reg_info {
- uint64_t reg; /* Register definition */
- uint64_t max_feat_bit; /* Bit that represents the upper limit of the feature-map */
+ u64 reg; /* Register definition */
+ u64 max_feat_bit; /* Bit that represents the upper limit of the feature-map */
+ u64 reset_val; /* Reset value for the register */
};
#define FW_REG_INFO(r) \
{ \
.reg = r, \
.max_feat_bit = r##_BIT_MAX, \
+ .reset_val = r##_RESET_VAL \
}
static const struct kvm_fw_reg_info fw_reg_info[] = {
FW_REG_INFO(KVM_REG_ARM_STD_BMAP),
FW_REG_INFO(KVM_REG_ARM_STD_HYP_BMAP),
FW_REG_INFO(KVM_REG_ARM_VENDOR_HYP_BMAP),
+ FW_REG_INFO(KVM_REG_ARM_VENDOR_HYP_BMAP_2),
};
enum test_stage {
@@ -50,8 +59,8 @@ enum test_stage {
static int stage = TEST_STAGE_REG_IFACE;
struct test_hvc_info {
- uint32_t func_id;
- uint64_t arg1;
+ u32 func_id;
+ u64 arg1;
};
#define TEST_HVC_INFO(f, a1) \
@@ -99,7 +108,7 @@ static void guest_test_hvc(const struct test_hvc_info *hc_info)
for (i = 0; i < hvc_info_arr_sz; i++, hc_info++) {
memset(&res, 0, sizeof(res));
- smccc_hvc(hc_info->func_id, hc_info->arg1, 0, 0, 0, 0, 0, 0, &res);
+ do_smccc(hc_info->func_id, hc_info->arg1, 0, 0, 0, 0, 0, 0, &res);
switch (stage) {
case TEST_STAGE_HVC_IFACE_FEAT_DISABLED:
@@ -143,9 +152,9 @@ static void guest_code(void)
}
struct st_time {
- uint32_t rev;
- uint32_t attr;
- uint64_t st_time;
+ u32 rev;
+ u32 attr;
+ u64 st_time;
};
#define STEAL_TIME_SIZE ((sizeof(struct st_time) + 63) & ~63)
@@ -153,7 +162,7 @@ struct st_time {
static void steal_time_init(struct kvm_vcpu *vcpu)
{
- uint64_t st_ipa = (ulong)ST_GPA_BASE;
+ u64 st_ipa = (ulong)ST_GPA_BASE;
unsigned int gpages;
gpages = vm_calc_num_guest_pages(VM_MODE_DEFAULT, STEAL_TIME_SIZE);
@@ -165,28 +174,45 @@ static void steal_time_init(struct kvm_vcpu *vcpu)
static void test_fw_regs_before_vm_start(struct kvm_vcpu *vcpu)
{
- uint64_t val;
+ u64 val;
unsigned int i;
int ret;
for (i = 0; i < ARRAY_SIZE(fw_reg_info); i++) {
const struct kvm_fw_reg_info *reg_info = &fw_reg_info[i];
+ u64 set_val;
- /* First 'read' should be an upper limit of the features supported */
+ /* First 'read' should be the reset value for the reg */
val = vcpu_get_reg(vcpu, reg_info->reg);
- TEST_ASSERT(val == FW_REG_ULIMIT_VAL(reg_info->max_feat_bit),
- "Expected all the features to be set for reg: 0x%lx; expected: 0x%lx; read: 0x%lx",
- reg_info->reg, FW_REG_ULIMIT_VAL(reg_info->max_feat_bit), val);
+ TEST_ASSERT(val == reg_info->reset_val,
+ "Unexpected reset value for reg: 0x%lx; expected: 0x%lx; read: 0x%lx",
+ reg_info->reg, reg_info->reset_val, val);
+
+ if (reg_info->reset_val)
+ set_val = 0;
+ else
+ set_val = FW_REG_ULIMIT_VAL(reg_info->max_feat_bit);
- /* Test a 'write' by disabling all the features of the register map */
- ret = __vcpu_set_reg(vcpu, reg_info->reg, 0);
+ ret = __vcpu_set_reg(vcpu, reg_info->reg, set_val);
TEST_ASSERT(ret == 0,
- "Failed to clear all the features of reg: 0x%lx; ret: %d",
- reg_info->reg, errno);
+ "Failed to %s all the features of reg: 0x%lx; ret: %d",
+ (set_val ? "set" : "clear"), reg_info->reg, errno);
val = vcpu_get_reg(vcpu, reg_info->reg);
- TEST_ASSERT(val == 0,
- "Expected all the features to be cleared for reg: 0x%lx", reg_info->reg);
+ TEST_ASSERT(val == set_val,
+ "Expected all the features to be %s for reg: 0x%lx",
+ (set_val ? "set" : "cleared"), reg_info->reg);
+
+ /*
+ * If the reg has been set, clear it as test_fw_regs_after_vm_start()
+ * expects it to be cleared.
+ */
+ if (set_val) {
+ ret = __vcpu_set_reg(vcpu, reg_info->reg, 0);
+ TEST_ASSERT(ret == 0,
+ "Failed to clear all the features of reg: 0x%lx; ret: %d",
+ reg_info->reg, errno);
+ }
/*
* Test enabling a feature that's not supported.
@@ -203,7 +229,7 @@ static void test_fw_regs_before_vm_start(struct kvm_vcpu *vcpu)
static void test_fw_regs_after_vm_start(struct kvm_vcpu *vcpu)
{
- uint64_t val;
+ u64 val;
unsigned int i;
int ret;
diff --git a/tools/testing/selftests/kvm/arm64/idreg-idst.c b/tools/testing/selftests/kvm/arm64/idreg-idst.c
new file mode 100644
index 000000000000..a3e84701d814
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/idreg-idst.c
@@ -0,0 +1,117 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Access all FEAT_IDST-handled registers that depend on more than
+ * just FEAT_AA64, and fail if we don't get an a trap with an 0x18 EC.
+ */
+
+#include <test_util.h>
+#include <kvm_util.h>
+#include <processor.h>
+
+static volatile bool sys64, undef;
+
+#define __check_sr_read(r) \
+ ({ \
+ u64 val; \
+ \
+ sys64 = false; \
+ undef = false; \
+ dsb(sy); \
+ val = read_sysreg_s(SYS_ ## r); \
+ val; \
+ })
+
+/* Fatal checks */
+#define check_sr_read(r) \
+ do { \
+ __check_sr_read(r); \
+ __GUEST_ASSERT(!undef, #r " unexpected UNDEF"); \
+ __GUEST_ASSERT(sys64, #r " didn't trap"); \
+ } while(0)
+
+
+static void guest_code(void)
+{
+ check_sr_read(CCSIDR2_EL1);
+ check_sr_read(SMIDR_EL1);
+ check_sr_read(GMID_EL1);
+
+ GUEST_DONE();
+}
+
+static void guest_sys64_handler(struct ex_regs *regs)
+{
+ sys64 = true;
+ undef = false;
+ regs->pc += 4;
+}
+
+static void guest_undef_handler(struct ex_regs *regs)
+{
+ sys64 = false;
+ undef = true;
+ regs->pc += 4;
+}
+
+static void test_run_vcpu(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ do {
+ vcpu_run(vcpu);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_PRINTF:
+ printf("%s", uc.buffer);
+ break;
+ case UCALL_DONE:
+ break;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+ } while (uc.cmd != UCALL_DONE);
+}
+
+static void test_guest_feat_idst(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ /* This VM has no MTE, no SME, no CCIDX */
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vcpu);
+
+ vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+ ESR_ELx_EC_SYS64, guest_sys64_handler);
+ vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+ ESR_ELx_EC_UNKNOWN, guest_undef_handler);
+
+ test_run_vcpu(vcpu);
+
+ kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ u64 mmfr2;
+
+ test_disable_default_vgic();
+
+ vm = vm_create_with_one_vcpu(&vcpu, NULL);
+ mmfr2 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64MMFR2_EL1));
+ __TEST_REQUIRE(FIELD_GET(ID_AA64MMFR2_EL1_IDS, mmfr2) > 0,
+ "FEAT_IDST not supported");
+ kvm_vm_free(vm);
+
+ test_guest_feat_idst();
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/arm64/kvm-uuid.c b/tools/testing/selftests/kvm/arm64/kvm-uuid.c
new file mode 100644
index 000000000000..b5be9133535a
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/kvm-uuid.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0
+
+// Check that nobody has tampered with KVM's UID
+
+#include <errno.h>
+#include <linux/arm-smccc.h>
+#include <asm/kvm.h>
+#include <kvm_util.h>
+
+#include "processor.h"
+
+/*
+ * Do NOT redefine these constants, or try to replace them with some
+ * "common" version. They are hardcoded here to detect any potential
+ * breakage happening in the rest of the kernel.
+ *
+ * KVM UID value: 28b46fb6-2ec5-11e9-a9ca-4b564d003a74
+ */
+#define ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_0 0xb66fb428U
+#define ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_1 0xe911c52eU
+#define ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_2 0x564bcaa9U
+#define ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_3 0x743a004dU
+
+static void guest_code(void)
+{
+ struct arm_smccc_res res = {};
+
+ do_smccc(ARM_SMCCC_VENDOR_HYP_CALL_UID_FUNC_ID, 0, 0, 0, 0, 0, 0, 0, &res);
+
+ __GUEST_ASSERT(res.a0 == ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_0 &&
+ res.a1 == ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_1 &&
+ res.a2 == ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_2 &&
+ res.a3 == ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_3,
+ "Unexpected KVM-specific UID %lx %lx %lx %lx\n", res.a0, res.a1, res.a2, res.a3);
+ GUEST_DONE();
+}
+
+int main (int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct ucall uc;
+ bool guest_done = false;
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ while (!guest_done) {
+ vcpu_run(vcpu);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ break;
+ case UCALL_DONE:
+ guest_done = true;
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_PRINTF:
+ printf("%s", uc.buffer);
+ break;
+ default:
+ TEST_FAIL("Unexpected guest exit");
+ }
+ }
+
+ kvm_vm_free(vm);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/arm64/mmio_abort.c b/tools/testing/selftests/kvm/arm64/mmio_abort.c
deleted file mode 100644
index 8b7a80a51b1c..000000000000
--- a/tools/testing/selftests/kvm/arm64/mmio_abort.c
+++ /dev/null
@@ -1,159 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * mmio_abort - Tests for userspace MMIO abort injection
- *
- * Copyright (c) 2024 Google LLC
- */
-#include "processor.h"
-#include "test_util.h"
-
-#define MMIO_ADDR 0x8000000ULL
-
-static u64 expected_abort_pc;
-
-static void expect_sea_handler(struct ex_regs *regs)
-{
- u64 esr = read_sysreg(esr_el1);
-
- GUEST_ASSERT_EQ(regs->pc, expected_abort_pc);
- GUEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_DABT_CUR);
- GUEST_ASSERT_EQ(esr & ESR_ELx_FSC_TYPE, ESR_ELx_FSC_EXTABT);
-
- GUEST_DONE();
-}
-
-static void unexpected_dabt_handler(struct ex_regs *regs)
-{
- GUEST_FAIL("Unexpected data abort at PC: %lx\n", regs->pc);
-}
-
-static struct kvm_vm *vm_create_with_dabt_handler(struct kvm_vcpu **vcpu, void *guest_code,
- handler_fn dabt_handler)
-{
- struct kvm_vm *vm = vm_create_with_one_vcpu(vcpu, guest_code);
-
- vm_init_descriptor_tables(vm);
- vcpu_init_descriptor_tables(*vcpu);
- vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, ESR_ELx_EC_DABT_CUR, dabt_handler);
-
- virt_map(vm, MMIO_ADDR, MMIO_ADDR, 1);
-
- return vm;
-}
-
-static void vcpu_inject_extabt(struct kvm_vcpu *vcpu)
-{
- struct kvm_vcpu_events events = {};
-
- events.exception.ext_dabt_pending = true;
- vcpu_events_set(vcpu, &events);
-}
-
-static void vcpu_run_expect_done(struct kvm_vcpu *vcpu)
-{
- struct ucall uc;
-
- vcpu_run(vcpu);
- switch (get_ucall(vcpu, &uc)) {
- case UCALL_ABORT:
- REPORT_GUEST_ASSERT(uc);
- break;
- case UCALL_DONE:
- break;
- default:
- TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
- }
-}
-
-extern char test_mmio_abort_insn;
-
-static void test_mmio_abort_guest(void)
-{
- WRITE_ONCE(expected_abort_pc, (u64)&test_mmio_abort_insn);
-
- asm volatile("test_mmio_abort_insn:\n\t"
- "ldr x0, [%0]\n\t"
- : : "r" (MMIO_ADDR) : "x0", "memory");
-
- GUEST_FAIL("MMIO instruction should not retire");
-}
-
-/*
- * Test that KVM doesn't complete MMIO emulation when userspace has made an
- * external abort pending for the instruction.
- */
-static void test_mmio_abort(void)
-{
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_abort_guest,
- expect_sea_handler);
- struct kvm_run *run = vcpu->run;
-
- vcpu_run(vcpu);
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_MMIO);
- TEST_ASSERT_EQ(run->mmio.phys_addr, MMIO_ADDR);
- TEST_ASSERT_EQ(run->mmio.len, sizeof(unsigned long));
- TEST_ASSERT(!run->mmio.is_write, "Expected MMIO read");
-
- vcpu_inject_extabt(vcpu);
- vcpu_run_expect_done(vcpu);
- kvm_vm_free(vm);
-}
-
-extern char test_mmio_nisv_insn;
-
-static void test_mmio_nisv_guest(void)
-{
- WRITE_ONCE(expected_abort_pc, (u64)&test_mmio_nisv_insn);
-
- asm volatile("test_mmio_nisv_insn:\n\t"
- "ldr x0, [%0], #8\n\t"
- : : "r" (MMIO_ADDR) : "x0", "memory");
-
- GUEST_FAIL("MMIO instruction should not retire");
-}
-
-/*
- * Test that the KVM_RUN ioctl fails for ESR_EL2.ISV=0 MMIO aborts if userspace
- * hasn't enabled KVM_CAP_ARM_NISV_TO_USER.
- */
-static void test_mmio_nisv(void)
-{
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_nisv_guest,
- unexpected_dabt_handler);
-
- TEST_ASSERT(_vcpu_run(vcpu), "Expected nonzero return code from KVM_RUN");
- TEST_ASSERT_EQ(errno, ENOSYS);
-
- kvm_vm_free(vm);
-}
-
-/*
- * Test that ESR_EL2.ISV=0 MMIO aborts reach userspace and that an injected SEA
- * reaches the guest.
- */
-static void test_mmio_nisv_abort(void)
-{
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_nisv_guest,
- expect_sea_handler);
- struct kvm_run *run = vcpu->run;
-
- vm_enable_cap(vm, KVM_CAP_ARM_NISV_TO_USER, 1);
-
- vcpu_run(vcpu);
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_ARM_NISV);
- TEST_ASSERT_EQ(run->arm_nisv.fault_ipa, MMIO_ADDR);
-
- vcpu_inject_extabt(vcpu);
- vcpu_run_expect_done(vcpu);
- kvm_vm_free(vm);
-}
-
-int main(void)
-{
- test_mmio_abort();
- test_mmio_nisv();
- test_mmio_nisv_abort();
-}
diff --git a/tools/testing/selftests/kvm/arm64/no-vgic-v3.c b/tools/testing/selftests/kvm/arm64/no-vgic-v3.c
deleted file mode 100644
index ebd70430c89d..000000000000
--- a/tools/testing/selftests/kvm/arm64/no-vgic-v3.c
+++ /dev/null
@@ -1,175 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-// Check that, on a GICv3 system, not configuring GICv3 correctly
-// results in all of the sysregs generating an UNDEF exception.
-
-#include <test_util.h>
-#include <kvm_util.h>
-#include <processor.h>
-
-static volatile bool handled;
-
-#define __check_sr_read(r) \
- ({ \
- uint64_t val; \
- \
- handled = false; \
- dsb(sy); \
- val = read_sysreg_s(SYS_ ## r); \
- val; \
- })
-
-#define __check_sr_write(r) \
- do { \
- handled = false; \
- dsb(sy); \
- write_sysreg_s(0, SYS_ ## r); \
- isb(); \
- } while(0)
-
-/* Fatal checks */
-#define check_sr_read(r) \
- do { \
- __check_sr_read(r); \
- __GUEST_ASSERT(handled, #r " no read trap"); \
- } while(0)
-
-#define check_sr_write(r) \
- do { \
- __check_sr_write(r); \
- __GUEST_ASSERT(handled, #r " no write trap"); \
- } while(0)
-
-#define check_sr_rw(r) \
- do { \
- check_sr_read(r); \
- check_sr_write(r); \
- } while(0)
-
-static void guest_code(void)
-{
- uint64_t val;
-
- /*
- * Check that we advertise that ID_AA64PFR0_EL1.GIC == 0, having
- * hidden the feature at runtime without any other userspace action.
- */
- __GUEST_ASSERT(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_GIC),
- read_sysreg(id_aa64pfr0_el1)) == 0,
- "GICv3 wrongly advertised");
-
- /*
- * Access all GICv3 registers, and fail if we don't get an UNDEF.
- * Note that we happily access all the APxRn registers without
- * checking their existance, as all we want to see is a failure.
- */
- check_sr_rw(ICC_PMR_EL1);
- check_sr_read(ICC_IAR0_EL1);
- check_sr_write(ICC_EOIR0_EL1);
- check_sr_rw(ICC_HPPIR0_EL1);
- check_sr_rw(ICC_BPR0_EL1);
- check_sr_rw(ICC_AP0R0_EL1);
- check_sr_rw(ICC_AP0R1_EL1);
- check_sr_rw(ICC_AP0R2_EL1);
- check_sr_rw(ICC_AP0R3_EL1);
- check_sr_rw(ICC_AP1R0_EL1);
- check_sr_rw(ICC_AP1R1_EL1);
- check_sr_rw(ICC_AP1R2_EL1);
- check_sr_rw(ICC_AP1R3_EL1);
- check_sr_write(ICC_DIR_EL1);
- check_sr_read(ICC_RPR_EL1);
- check_sr_write(ICC_SGI1R_EL1);
- check_sr_write(ICC_ASGI1R_EL1);
- check_sr_write(ICC_SGI0R_EL1);
- check_sr_read(ICC_IAR1_EL1);
- check_sr_write(ICC_EOIR1_EL1);
- check_sr_rw(ICC_HPPIR1_EL1);
- check_sr_rw(ICC_BPR1_EL1);
- check_sr_rw(ICC_CTLR_EL1);
- check_sr_rw(ICC_IGRPEN0_EL1);
- check_sr_rw(ICC_IGRPEN1_EL1);
-
- /*
- * ICC_SRE_EL1 may not be trappable, as ICC_SRE_EL2.Enable can
- * be RAO/WI. Engage in non-fatal accesses, starting with a
- * write of 0 to try and disable SRE, and let's see if it
- * sticks.
- */
- __check_sr_write(ICC_SRE_EL1);
- if (!handled)
- GUEST_PRINTF("ICC_SRE_EL1 write not trapping (OK)\n");
-
- val = __check_sr_read(ICC_SRE_EL1);
- if (!handled) {
- __GUEST_ASSERT((val & BIT(0)),
- "ICC_SRE_EL1 not trapped but ICC_SRE_EL1.SRE not set\n");
- GUEST_PRINTF("ICC_SRE_EL1 read not trapping (OK)\n");
- }
-
- GUEST_DONE();
-}
-
-static void guest_undef_handler(struct ex_regs *regs)
-{
- /* Success, we've gracefully exploded! */
- handled = true;
- regs->pc += 4;
-}
-
-static void test_run_vcpu(struct kvm_vcpu *vcpu)
-{
- struct ucall uc;
-
- do {
- vcpu_run(vcpu);
-
- switch (get_ucall(vcpu, &uc)) {
- case UCALL_ABORT:
- REPORT_GUEST_ASSERT(uc);
- break;
- case UCALL_PRINTF:
- printf("%s", uc.buffer);
- break;
- case UCALL_DONE:
- break;
- default:
- TEST_FAIL("Unknown ucall %lu", uc.cmd);
- }
- } while (uc.cmd != UCALL_DONE);
-}
-
-static void test_guest_no_gicv3(void)
-{
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm;
-
- /* Create a VM without a GICv3 */
- vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-
- vm_init_descriptor_tables(vm);
- vcpu_init_descriptor_tables(vcpu);
-
- vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
- ESR_ELx_EC_UNKNOWN, guest_undef_handler);
-
- test_run_vcpu(vcpu);
-
- kvm_vm_free(vm);
-}
-
-int main(int argc, char *argv[])
-{
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm;
- uint64_t pfr0;
-
- vm = vm_create_with_one_vcpu(&vcpu, NULL);
- pfr0 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1));
- __TEST_REQUIRE(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_GIC), pfr0),
- "GICv3 not supported.");
- kvm_vm_free(vm);
-
- test_guest_no_gicv3();
-
- return 0;
-}
diff --git a/tools/testing/selftests/kvm/arm64/no-vgic.c b/tools/testing/selftests/kvm/arm64/no-vgic.c
new file mode 100644
index 000000000000..25b2e3222f68
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/no-vgic.c
@@ -0,0 +1,297 @@
+// SPDX-License-Identifier: GPL-2.0
+
+// Check that, on a GICv3-capable system (GICv3 native, or GICv5 with
+// FEAT_GCIE_LEGACY), not configuring GICv3 correctly results in all
+// of the sysregs generating an UNDEF exception. Do the same for GICv5
+// on a GICv5 host.
+
+#include <test_util.h>
+#include <kvm_util.h>
+#include <processor.h>
+
+#include <arm64/gic_v5.h>
+
+static volatile bool handled;
+
+#define __check_sr_read(r) \
+ ({ \
+ u64 val; \
+ \
+ handled = false; \
+ dsb(sy); \
+ val = read_sysreg_s(SYS_ ## r); \
+ val; \
+ })
+
+#define __check_sr_write(r) \
+ do { \
+ handled = false; \
+ dsb(sy); \
+ write_sysreg_s(0, SYS_ ## r); \
+ isb(); \
+ } while (0)
+
+#define __check_gicv5_gicr_op(r) \
+ ({ \
+ u64 val; \
+ \
+ handled = false; \
+ dsb(sy); \
+ val = read_sysreg_s(GICV5_OP_GICR_ ## r); \
+ val; \
+ })
+
+#define __check_gicv5_gic_op(r) \
+ do { \
+ handled = false; \
+ dsb(sy); \
+ write_sysreg_s(0, GICV5_OP_GIC_ ## r); \
+ isb(); \
+ } while (0)
+
+/* Fatal checks */
+#define check_sr_read(r) \
+ do { \
+ __check_sr_read(r); \
+ __GUEST_ASSERT(handled, #r " no read trap"); \
+ } while (0)
+
+#define check_sr_write(r) \
+ do { \
+ __check_sr_write(r); \
+ __GUEST_ASSERT(handled, #r " no write trap"); \
+ } while (0)
+
+#define check_sr_rw(r) \
+ do { \
+ check_sr_read(r); \
+ check_sr_write(r); \
+ } while (0)
+
+#define check_gicv5_gicr_op(r) \
+ do { \
+ __check_gicv5_gicr_op(r); \
+ __GUEST_ASSERT(handled, #r " no read trap"); \
+ } while (0)
+
+#define check_gicv5_gic_op(r) \
+ do { \
+ __check_gicv5_gic_op(r); \
+ __GUEST_ASSERT(handled, #r " no write trap"); \
+ } while (0)
+
+static void guest_code_gicv3(void)
+{
+ u64 val;
+
+ /*
+ * Check that we advertise that ID_AA64PFR0_EL1.GIC == 0, having
+ * hidden the feature at runtime without any other userspace action.
+ */
+ __GUEST_ASSERT(FIELD_GET(ID_AA64PFR0_EL1_GIC,
+ read_sysreg(id_aa64pfr0_el1)) == 0,
+ "GICv3 wrongly advertised");
+
+ /*
+ * Access all GICv3 registers, and fail if we don't get an UNDEF.
+ * Note that we happily access all the APxRn registers without
+ * checking their existence, as all we want to see is a failure.
+ */
+ check_sr_rw(ICC_PMR_EL1);
+ check_sr_read(ICC_IAR0_EL1);
+ check_sr_write(ICC_EOIR0_EL1);
+ check_sr_rw(ICC_HPPIR0_EL1);
+ check_sr_rw(ICC_BPR0_EL1);
+ check_sr_rw(ICC_AP0R0_EL1);
+ check_sr_rw(ICC_AP0R1_EL1);
+ check_sr_rw(ICC_AP0R2_EL1);
+ check_sr_rw(ICC_AP0R3_EL1);
+ check_sr_rw(ICC_AP1R0_EL1);
+ check_sr_rw(ICC_AP1R1_EL1);
+ check_sr_rw(ICC_AP1R2_EL1);
+ check_sr_rw(ICC_AP1R3_EL1);
+ check_sr_write(ICC_DIR_EL1);
+ check_sr_read(ICC_RPR_EL1);
+ check_sr_write(ICC_SGI1R_EL1);
+ check_sr_write(ICC_ASGI1R_EL1);
+ check_sr_write(ICC_SGI0R_EL1);
+ check_sr_read(ICC_IAR1_EL1);
+ check_sr_write(ICC_EOIR1_EL1);
+ check_sr_rw(ICC_HPPIR1_EL1);
+ check_sr_rw(ICC_BPR1_EL1);
+ check_sr_rw(ICC_CTLR_EL1);
+ check_sr_rw(ICC_IGRPEN0_EL1);
+ check_sr_rw(ICC_IGRPEN1_EL1);
+
+ /*
+ * ICC_SRE_EL1 may not be trappable, as ICC_SRE_EL2.Enable can
+ * be RAO/WI. Engage in non-fatal accesses, starting with a
+ * write of 0 to try and disable SRE, and let's see if it
+ * sticks.
+ */
+ __check_sr_write(ICC_SRE_EL1);
+ if (!handled)
+ GUEST_PRINTF("ICC_SRE_EL1 write not trapping (OK)\n");
+
+ val = __check_sr_read(ICC_SRE_EL1);
+ if (!handled) {
+ __GUEST_ASSERT((val & BIT(0)),
+ "ICC_SRE_EL1 not trapped but ICC_SRE_EL1.SRE not set\n");
+ GUEST_PRINTF("ICC_SRE_EL1 read not trapping (OK)\n");
+ }
+
+ GUEST_DONE();
+}
+
+static void guest_code_gicv5(void)
+{
+ /*
+ * Check that we advertise that ID_AA64PFR2_EL1.GCIE == 0, having
+ * hidden the feature at runtime without any other userspace action.
+ */
+ __GUEST_ASSERT(FIELD_GET(ID_AA64PFR2_EL1_GCIE,
+ read_sysreg_s(SYS_ID_AA64PFR2_EL1)) == 0,
+ "GICv5 wrongly advertised");
+
+ /*
+ * Try all GICv5 instructions, and fail if we don't get an UNDEF.
+ */
+ check_gicv5_gic_op(CDAFF);
+ check_gicv5_gic_op(CDDI);
+ check_gicv5_gic_op(CDDIS);
+ check_gicv5_gic_op(CDEOI);
+ check_gicv5_gic_op(CDHM);
+ check_gicv5_gic_op(CDPEND);
+ check_gicv5_gic_op(CDPRI);
+ check_gicv5_gic_op(CDRCFG);
+ check_gicv5_gicr_op(CDIA);
+ check_gicv5_gicr_op(CDNMIA);
+
+ /* Check General System Register acccesses */
+ check_sr_rw(ICC_APR_EL1);
+ check_sr_rw(ICC_CR0_EL1);
+ check_sr_read(ICC_HPPIR_EL1);
+ check_sr_read(ICC_IAFFIDR_EL1);
+ check_sr_rw(ICC_ICSR_EL1);
+ check_sr_read(ICC_IDR0_EL1);
+ check_sr_rw(ICC_PCR_EL1);
+
+ /* Check PPI System Register accessess */
+ check_sr_rw(ICC_PPI_CACTIVER0_EL1);
+ check_sr_rw(ICC_PPI_CACTIVER1_EL1);
+ check_sr_rw(ICC_PPI_SACTIVER0_EL1);
+ check_sr_rw(ICC_PPI_SACTIVER1_EL1);
+ check_sr_rw(ICC_PPI_CPENDR0_EL1);
+ check_sr_rw(ICC_PPI_CPENDR1_EL1);
+ check_sr_rw(ICC_PPI_SPENDR0_EL1);
+ check_sr_rw(ICC_PPI_SPENDR1_EL1);
+ check_sr_rw(ICC_PPI_ENABLER0_EL1);
+ check_sr_rw(ICC_PPI_ENABLER1_EL1);
+ check_sr_read(ICC_PPI_HMR0_EL1);
+ check_sr_read(ICC_PPI_HMR1_EL1);
+ check_sr_rw(ICC_PPI_PRIORITYR0_EL1);
+ check_sr_rw(ICC_PPI_PRIORITYR1_EL1);
+ check_sr_rw(ICC_PPI_PRIORITYR2_EL1);
+ check_sr_rw(ICC_PPI_PRIORITYR3_EL1);
+ check_sr_rw(ICC_PPI_PRIORITYR4_EL1);
+ check_sr_rw(ICC_PPI_PRIORITYR5_EL1);
+ check_sr_rw(ICC_PPI_PRIORITYR6_EL1);
+ check_sr_rw(ICC_PPI_PRIORITYR7_EL1);
+ check_sr_rw(ICC_PPI_PRIORITYR8_EL1);
+ check_sr_rw(ICC_PPI_PRIORITYR9_EL1);
+ check_sr_rw(ICC_PPI_PRIORITYR10_EL1);
+ check_sr_rw(ICC_PPI_PRIORITYR11_EL1);
+ check_sr_rw(ICC_PPI_PRIORITYR12_EL1);
+ check_sr_rw(ICC_PPI_PRIORITYR13_EL1);
+ check_sr_rw(ICC_PPI_PRIORITYR14_EL1);
+ check_sr_rw(ICC_PPI_PRIORITYR15_EL1);
+
+ GUEST_DONE();
+}
+
+static void guest_undef_handler(struct ex_regs *regs)
+{
+ /* Success, we've gracefully exploded! */
+ handled = true;
+ regs->pc += 4;
+}
+
+static void test_run_vcpu(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ do {
+ vcpu_run(vcpu);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_PRINTF:
+ printf("%s", uc.buffer);
+ break;
+ case UCALL_DONE:
+ break;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+ } while (uc.cmd != UCALL_DONE);
+}
+
+static void test_guest_no_vgic(void *guest_code)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ /* Create a VM without a GIC */
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vcpu);
+
+ vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+ ESR_ELx_EC_UNKNOWN, guest_undef_handler);
+
+ test_run_vcpu(vcpu);
+
+ kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ bool has_v3, has_v5;
+ u64 pfr;
+
+ test_disable_default_vgic();
+
+ vm = vm_create_with_one_vcpu(&vcpu, NULL);
+
+ pfr = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1));
+ has_v3 = !!FIELD_GET(ID_AA64PFR0_EL1_GIC, pfr);
+
+ pfr = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR2_EL1));
+ has_v5 = !!FIELD_GET(ID_AA64PFR2_EL1_GCIE, pfr);
+
+ kvm_vm_free(vm);
+
+ __TEST_REQUIRE(has_v3 || has_v5,
+ "Neither GICv3 nor GICv5 supported.");
+
+ if (has_v3) {
+ pr_info("Testing no-vgic-v3\n");
+ test_guest_no_vgic(guest_code_gicv3);
+ } else {
+ pr_info("No GICv3 support: skipping no-vgic-v3 test\n");
+ }
+
+ if (has_v5) {
+ pr_info("Testing no-vgic-v5\n");
+ test_guest_no_vgic(guest_code_gicv5);
+ } else {
+ pr_info("No GICv5 support: skipping no-vgic-v5 test\n");
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/arm64/page_fault_test.c b/tools/testing/selftests/kvm/arm64/page_fault_test.c
index ec33a8f9c908..6bb3d82906b2 100644
--- a/tools/testing/selftests/kvm/arm64/page_fault_test.c
+++ b/tools/testing/selftests/kvm/arm64/page_fault_test.c
@@ -23,7 +23,7 @@
#define TEST_PTE_GVA 0xb0000000
#define TEST_DATA 0x0123456789ABCDEF
-static uint64_t *guest_test_memory = (uint64_t *)TEST_GVA;
+static u64 *guest_test_memory = (u64 *)TEST_GVA;
#define CMD_NONE (0)
#define CMD_SKIP_TEST (1ULL << 1)
@@ -48,7 +48,7 @@ static struct event_cnt {
struct test_desc {
const char *name;
- uint64_t mem_mark_cmd;
+ u64 mem_mark_cmd;
/* Skip the test if any prepare function returns false */
bool (*guest_prepare[PREPARE_FN_NR])(void);
void (*guest_test)(void);
@@ -59,8 +59,8 @@ struct test_desc {
void (*iabt_handler)(struct ex_regs *regs);
void (*mmio_handler)(struct kvm_vm *vm, struct kvm_run *run);
void (*fail_vcpu_run_handler)(int ret);
- uint32_t pt_memslot_flags;
- uint32_t data_memslot_flags;
+ u32 pt_memslot_flags;
+ u32 data_memslot_flags;
bool skip;
struct event_cnt expected_events;
};
@@ -70,9 +70,9 @@ struct test_params {
struct test_desc *test_desc;
};
-static inline void flush_tlb_page(uint64_t vaddr)
+static inline void flush_tlb_page(gva_t gva)
{
- uint64_t page = vaddr >> 12;
+ gva_t page = gva >> 12;
dsb(ishst);
asm volatile("tlbi vaae1is, %0" :: "r" (page));
@@ -82,7 +82,7 @@ static inline void flush_tlb_page(uint64_t vaddr)
static void guest_write64(void)
{
- uint64_t val;
+ u64 val;
WRITE_ONCE(*guest_test_memory, TEST_DATA);
val = READ_ONCE(*guest_test_memory);
@@ -92,17 +92,17 @@ static void guest_write64(void)
/* Check the system for atomic instructions. */
static bool guest_check_lse(void)
{
- uint64_t isar0 = read_sysreg(id_aa64isar0_el1);
- uint64_t atomic;
+ u64 isar0 = read_sysreg(id_aa64isar0_el1);
+ u64 atomic;
- atomic = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_ATOMIC), isar0);
+ atomic = FIELD_GET(ID_AA64ISAR0_EL1_ATOMIC, isar0);
return atomic >= 2;
}
static bool guest_check_dc_zva(void)
{
- uint64_t dczid = read_sysreg(dczid_el0);
- uint64_t dzp = FIELD_GET(ARM64_FEATURE_MASK(DCZID_EL0_DZP), dczid);
+ u64 dczid = read_sysreg(dczid_el0);
+ u64 dzp = FIELD_GET(DCZID_EL0_DZP, dczid);
return dzp == 0;
}
@@ -110,7 +110,7 @@ static bool guest_check_dc_zva(void)
/* Compare and swap instruction. */
static void guest_cas(void)
{
- uint64_t val;
+ u64 val;
GUEST_ASSERT(guest_check_lse());
asm volatile(".arch_extension lse\n"
@@ -122,7 +122,7 @@ static void guest_cas(void)
static void guest_read64(void)
{
- uint64_t val;
+ u64 val;
val = READ_ONCE(*guest_test_memory);
GUEST_ASSERT_EQ(val, 0);
@@ -131,7 +131,7 @@ static void guest_read64(void)
/* Address translation instruction */
static void guest_at(void)
{
- uint64_t par;
+ u64 par;
asm volatile("at s1e1r, %0" :: "r" (guest_test_memory));
isb();
@@ -148,7 +148,7 @@ static void guest_at(void)
*/
static void guest_dc_zva(void)
{
- uint16_t val;
+ u16 val;
asm volatile("dc zva, %0" :: "r" (guest_test_memory));
dsb(ish);
@@ -164,8 +164,8 @@ static void guest_dc_zva(void)
*/
static void guest_ld_preidx(void)
{
- uint64_t val;
- uint64_t addr = TEST_GVA - 8;
+ u64 val;
+ u64 addr = TEST_GVA - 8;
/*
* This ends up accessing "TEST_GVA + 8 - 8", where "TEST_GVA - 8" is
@@ -179,8 +179,8 @@ static void guest_ld_preidx(void)
static void guest_st_preidx(void)
{
- uint64_t val = TEST_DATA;
- uint64_t addr = TEST_GVA - 8;
+ u64 val = TEST_DATA;
+ u64 addr = TEST_GVA - 8;
asm volatile("str %0, [%1, #8]!"
: "+r" (val), "+r" (addr));
@@ -191,15 +191,15 @@ static void guest_st_preidx(void)
static bool guest_set_ha(void)
{
- uint64_t mmfr1 = read_sysreg(id_aa64mmfr1_el1);
- uint64_t hadbs, tcr;
+ u64 mmfr1 = read_sysreg(id_aa64mmfr1_el1);
+ u64 hadbs, tcr;
/* Skip if HA is not supported. */
- hadbs = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_HAFDBS), mmfr1);
+ hadbs = FIELD_GET(ID_AA64MMFR1_EL1_HAFDBS, mmfr1);
if (hadbs == 0)
return false;
- tcr = read_sysreg(tcr_el1) | TCR_EL1_HA;
+ tcr = read_sysreg(tcr_el1) | TCR_HA;
write_sysreg(tcr, tcr_el1);
isb();
@@ -208,7 +208,7 @@ static bool guest_set_ha(void)
static bool guest_clear_pte_af(void)
{
- *((uint64_t *)TEST_PTE_GVA) &= ~PTE_AF;
+ *((u64 *)TEST_PTE_GVA) &= ~PTE_AF;
flush_tlb_page(TEST_GVA);
return true;
@@ -217,7 +217,7 @@ static bool guest_clear_pte_af(void)
static void guest_check_pte_af(void)
{
dsb(ish);
- GUEST_ASSERT_EQ(*((uint64_t *)TEST_PTE_GVA) & PTE_AF, PTE_AF);
+ GUEST_ASSERT_EQ(*((u64 *)TEST_PTE_GVA) & PTE_AF, PTE_AF);
}
static void guest_check_write_in_dirty_log(void)
@@ -302,26 +302,26 @@ static void no_iabt_handler(struct ex_regs *regs)
static struct uffd_args {
char *copy;
void *hva;
- uint64_t paging_size;
+ u64 paging_size;
} pt_args, data_args;
/* Returns true to continue the test, and false if it should be skipped. */
static int uffd_generic_handler(int uffd_mode, int uffd, struct uffd_msg *msg,
struct uffd_args *args)
{
- uint64_t addr = msg->arg.pagefault.address;
- uint64_t flags = msg->arg.pagefault.flags;
+ u64 addr = msg->arg.pagefault.address;
+ u64 flags = msg->arg.pagefault.flags;
struct uffdio_copy copy;
int ret;
TEST_ASSERT(uffd_mode == UFFDIO_REGISTER_MODE_MISSING,
"The only expected UFFD mode is MISSING");
- TEST_ASSERT_EQ(addr, (uint64_t)args->hva);
+ TEST_ASSERT_EQ(addr, (u64)args->hva);
pr_debug("uffd fault: addr=%p write=%d\n",
(void *)addr, !!(flags & UFFD_PAGEFAULT_FLAG_WRITE));
- copy.src = (uint64_t)args->copy;
+ copy.src = (u64)args->copy;
copy.dst = addr;
copy.len = args->paging_size;
copy.mode = 0;
@@ -407,7 +407,7 @@ static bool punch_hole_in_backing_store(struct kvm_vm *vm,
struct userspace_mem_region *region)
{
void *hva = (void *)region->region.userspace_addr;
- uint64_t paging_size = region->region.memory_size;
+ u64 paging_size = region->region.memory_size;
int ret, fd = region->fd;
if (fd != -1) {
@@ -438,7 +438,7 @@ static void mmio_on_test_gpa_handler(struct kvm_vm *vm, struct kvm_run *run)
static void mmio_no_handler(struct kvm_vm *vm, struct kvm_run *run)
{
- uint64_t data;
+ u64 data;
memcpy(&data, run->mmio.data, sizeof(data));
pr_debug("addr=%lld len=%d w=%d data=%lx\n",
@@ -449,11 +449,11 @@ static void mmio_no_handler(struct kvm_vm *vm, struct kvm_run *run)
static bool check_write_in_dirty_log(struct kvm_vm *vm,
struct userspace_mem_region *region,
- uint64_t host_pg_nr)
+ u64 host_pg_nr)
{
unsigned long *bmap;
bool first_page_dirty;
- uint64_t size = region->region.memory_size;
+ u64 size = region->region.memory_size;
/* getpage_size() is not always equal to vm->page_size */
bmap = bitmap_zalloc(size / getpagesize());
@@ -468,7 +468,7 @@ static bool handle_cmd(struct kvm_vm *vm, int cmd)
{
struct userspace_mem_region *data_region, *pt_region;
bool continue_test = true;
- uint64_t pte_gpa, pte_pg;
+ u64 pte_gpa, pte_pg;
data_region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA);
pt_region = vm_get_mem_region(vm, MEM_REGION_PT);
@@ -510,7 +510,7 @@ void fail_vcpu_run_mmio_no_syndrome_handler(int ret)
events.fail_vcpu_runs += 1;
}
-typedef uint32_t aarch64_insn_t;
+typedef u32 aarch64_insn_t;
extern aarch64_insn_t __exec_test[2];
noinline void __return_0x77(void)
@@ -525,7 +525,7 @@ noinline void __return_0x77(void)
*/
static void load_exec_code_for_test(struct kvm_vm *vm)
{
- uint64_t *code;
+ u64 *code;
struct userspace_mem_region *region;
void *hva;
@@ -552,7 +552,7 @@ static void setup_abort_handlers(struct kvm_vm *vm, struct kvm_vcpu *vcpu,
static void setup_gva_maps(struct kvm_vm *vm)
{
struct userspace_mem_region *region;
- uint64_t pte_gpa;
+ u64 pte_gpa;
region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA);
/* Map TEST_GVA first. This will install a new PTE. */
@@ -574,12 +574,12 @@ enum pf_test_memslots {
*/
static void setup_memslots(struct kvm_vm *vm, struct test_params *p)
{
- uint64_t backing_src_pagesz = get_backing_src_pagesz(p->src_type);
- uint64_t guest_page_size = vm->page_size;
- uint64_t max_gfn = vm_compute_max_gfn(vm);
+ u64 backing_src_pagesz = get_backing_src_pagesz(p->src_type);
+ u64 guest_page_size = vm->page_size;
+ u64 max_gfn = vm_compute_max_gfn(vm);
/* Enough for 2M of code when using 4K guest pages. */
- uint64_t code_npages = 512;
- uint64_t pt_size, data_size, data_gpa;
+ u64 code_npages = 512;
+ u64 pt_size, data_size, data_gpa;
/*
* This test requires 1 pgd, 2 pud, 4 pmd, and 6 pte pages when using
diff --git a/tools/testing/selftests/kvm/arm64/psci_test.c b/tools/testing/selftests/kvm/arm64/psci_test.c
index ab491ee9e5f7..e775faf20868 100644
--- a/tools/testing/selftests/kvm/arm64/psci_test.c
+++ b/tools/testing/selftests/kvm/arm64/psci_test.c
@@ -22,52 +22,50 @@
#define CPU_ON_ENTRY_ADDR 0xfeedf00dul
#define CPU_ON_CONTEXT_ID 0xdeadc0deul
-static uint64_t psci_cpu_on(uint64_t target_cpu, uint64_t entry_addr,
- uint64_t context_id)
+static u64 psci_cpu_on(u64 target_cpu, u64 entry_addr, u64 context_id)
{
struct arm_smccc_res res;
- smccc_hvc(PSCI_0_2_FN64_CPU_ON, target_cpu, entry_addr, context_id,
+ do_smccc(PSCI_0_2_FN64_CPU_ON, target_cpu, entry_addr, context_id,
0, 0, 0, 0, &res);
return res.a0;
}
-static uint64_t psci_affinity_info(uint64_t target_affinity,
- uint64_t lowest_affinity_level)
+static u64 psci_affinity_info(u64 target_affinity, u64 lowest_affinity_level)
{
struct arm_smccc_res res;
- smccc_hvc(PSCI_0_2_FN64_AFFINITY_INFO, target_affinity, lowest_affinity_level,
+ do_smccc(PSCI_0_2_FN64_AFFINITY_INFO, target_affinity, lowest_affinity_level,
0, 0, 0, 0, 0, &res);
return res.a0;
}
-static uint64_t psci_system_suspend(uint64_t entry_addr, uint64_t context_id)
+static u64 psci_system_suspend(u64 entry_addr, u64 context_id)
{
struct arm_smccc_res res;
- smccc_hvc(PSCI_1_0_FN64_SYSTEM_SUSPEND, entry_addr, context_id,
+ do_smccc(PSCI_1_0_FN64_SYSTEM_SUSPEND, entry_addr, context_id,
0, 0, 0, 0, 0, &res);
return res.a0;
}
-static uint64_t psci_system_off2(uint64_t type, uint64_t cookie)
+static u64 psci_system_off2(u64 type, u64 cookie)
{
struct arm_smccc_res res;
- smccc_hvc(PSCI_1_3_FN64_SYSTEM_OFF2, type, cookie, 0, 0, 0, 0, 0, &res);
+ do_smccc(PSCI_1_3_FN64_SYSTEM_OFF2, type, cookie, 0, 0, 0, 0, 0, &res);
return res.a0;
}
-static uint64_t psci_features(uint32_t func_id)
+static u64 psci_features(u32 func_id)
{
struct arm_smccc_res res;
- smccc_hvc(PSCI_1_0_FN_PSCI_FEATURES, func_id, 0, 0, 0, 0, 0, 0, &res);
+ do_smccc(PSCI_1_0_FN_PSCI_FEATURES, func_id, 0, 0, 0, 0, 0, 0, &res);
return res.a0;
}
@@ -89,12 +87,13 @@ static struct kvm_vm *setup_vm(void *guest_code, struct kvm_vcpu **source,
vm = vm_create(2);
- vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init);
+ kvm_get_default_vcpu_target(vm, &init);
init.features[0] |= (1 << KVM_ARM_VCPU_PSCI_0_2);
*source = aarch64_vcpu_add(vm, 0, &init, guest_code);
*target = aarch64_vcpu_add(vm, 1, &init, guest_code);
+ kvm_arch_vm_finalize_vcpus(vm);
return vm;
}
@@ -109,7 +108,7 @@ static void enter_guest(struct kvm_vcpu *vcpu)
static void assert_vcpu_reset(struct kvm_vcpu *vcpu)
{
- uint64_t obs_pc, obs_x0;
+ u64 obs_pc, obs_x0;
obs_pc = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc));
obs_x0 = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.regs[0]));
@@ -122,9 +121,9 @@ static void assert_vcpu_reset(struct kvm_vcpu *vcpu)
obs_x0, CPU_ON_CONTEXT_ID);
}
-static void guest_test_cpu_on(uint64_t target_cpu)
+static void guest_test_cpu_on(u64 target_cpu)
{
- uint64_t target_state;
+ u64 target_state;
GUEST_ASSERT(!psci_cpu_on(target_cpu, CPU_ON_ENTRY_ADDR, CPU_ON_CONTEXT_ID));
@@ -141,7 +140,7 @@ static void guest_test_cpu_on(uint64_t target_cpu)
static void host_test_cpu_on(void)
{
struct kvm_vcpu *source, *target;
- uint64_t target_mpidr;
+ u64 target_mpidr;
struct kvm_vm *vm;
struct ucall uc;
@@ -165,7 +164,7 @@ static void host_test_cpu_on(void)
static void guest_test_system_suspend(void)
{
- uint64_t ret;
+ u64 ret;
/* assert that SYSTEM_SUSPEND is discoverable */
GUEST_ASSERT(!psci_features(PSCI_1_0_FN_SYSTEM_SUSPEND));
@@ -199,7 +198,7 @@ static void host_test_system_suspend(void)
static void guest_test_system_off2(void)
{
- uint64_t ret;
+ u64 ret;
/* assert that SYSTEM_OFF2 is discoverable */
GUEST_ASSERT(psci_features(PSCI_1_3_FN_SYSTEM_OFF2) &
@@ -237,7 +236,7 @@ static void host_test_system_off2(void)
{
struct kvm_vcpu *source, *target;
struct kvm_mp_state mps;
- uint64_t psci_version = 0;
+ u64 psci_version = 0;
int nr_shutdowns = 0;
struct kvm_run *run;
struct ucall uc;
diff --git a/tools/testing/selftests/kvm/arm64/sea_to_user.c b/tools/testing/selftests/kvm/arm64/sea_to_user.c
new file mode 100644
index 000000000000..e96d8982c28b
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/sea_to_user.c
@@ -0,0 +1,328 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Test KVM returns to userspace with KVM_EXIT_ARM_SEA if host APEI fails
+ * to handle SEA and userspace has opt-ed in KVM_CAP_ARM_SEA_TO_USER.
+ *
+ * After reaching userspace with expected arm_sea info, also test userspace
+ * injecting a synchronous external data abort into the guest.
+ *
+ * This test utilizes EINJ to generate a REAL synchronous external data
+ * abort by consuming a recoverable uncorrectable memory error. Therefore
+ * the device under test must support EINJ in both firmware and host kernel,
+ * including the notrigger feature. Otherwise the test will be skipped.
+ * The under-test platform's APEI should be unable to claim SEA. Otherwise
+ * the test will also be skipped.
+ */
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "guest_modes.h"
+
+#define PAGE_PRESENT (1ULL << 63)
+#define PAGE_PHYSICAL 0x007fffffffffffffULL
+#define PAGE_ADDR_MASK (~(0xfffULL))
+
+/* Group ISV and ISS[23:14]. */
+#define ESR_ELx_INST_SYNDROME ((ESR_ELx_ISV) | (ESR_ELx_SAS) | \
+ (ESR_ELx_SSE) | (ESR_ELx_SRT_MASK) | \
+ (ESR_ELx_SF) | (ESR_ELx_AR))
+
+#define EINJ_ETYPE "/sys/kernel/debug/apei/einj/error_type"
+#define EINJ_ADDR "/sys/kernel/debug/apei/einj/param1"
+#define EINJ_MASK "/sys/kernel/debug/apei/einj/param2"
+#define EINJ_FLAGS "/sys/kernel/debug/apei/einj/flags"
+#define EINJ_NOTRIGGER "/sys/kernel/debug/apei/einj/notrigger"
+#define EINJ_DOIT "/sys/kernel/debug/apei/einj/error_inject"
+/* Memory Uncorrectable non-fatal. */
+#define ERROR_TYPE_MEMORY_UER 0x10
+/* Memory address and mask valid (param1 and param2). */
+#define MASK_MEMORY_UER 0b10
+
+/* Guest virtual address region = [2G, 3G). */
+#define START_GVA 0x80000000UL
+#define VM_MEM_SIZE 0x40000000UL
+/* Note: EINJ_OFFSET must < VM_MEM_SIZE. */
+#define EINJ_OFFSET 0x01234badUL
+#define EINJ_GVA ((START_GVA) + (EINJ_OFFSET))
+
+static gpa_t einj_gpa;
+static void *einj_hva;
+static u64 einj_hpa;
+static bool far_invalid;
+
+static u64 translate_hva_to_hpa(unsigned long hva)
+{
+ u64 pinfo;
+ s64 offset = hva / getpagesize() * sizeof(pinfo);
+ int fd;
+
+ fd = open("/proc/self/pagemap", O_RDONLY);
+ if (fd < 0)
+ ksft_exit_fail_perror("Failed to open /proc/self/pagemap");
+ if (pread(fd, &pinfo, sizeof(pinfo), offset) != sizeof(pinfo)) {
+ close(fd);
+ ksft_exit_fail_perror("Failed to read /proc/self/pagemap");
+ }
+
+ close(fd);
+
+ if ((pinfo & PAGE_PRESENT) == 0)
+ ksft_exit_fail_perror("Page not present");
+
+ return ((pinfo & PAGE_PHYSICAL) << MIN_PAGE_SHIFT) +
+ (hva & (getpagesize() - 1));
+}
+
+static void write_einj_entry(const char *einj_path, u64 val)
+{
+ char cmd[256] = {0};
+ FILE *cmdfile = NULL;
+
+ sprintf(cmd, "echo %#lx > %s", val, einj_path);
+ cmdfile = popen(cmd, "r");
+
+ if (pclose(cmdfile) == 0)
+ ksft_print_msg("echo %#lx > %s - done\n", val, einj_path);
+ else
+ ksft_exit_fail_perror("Failed to write EINJ entry");
+}
+
+static void inject_uer(u64 hpa)
+{
+ if (access("/sys/firmware/acpi/tables/EINJ", R_OK) == -1)
+ ksft_test_result_skip("EINJ table no available in firmware");
+
+ if (access(EINJ_ETYPE, R_OK | W_OK) == -1)
+ ksft_test_result_skip("EINJ module probably not loaded?");
+
+ write_einj_entry(EINJ_ETYPE, ERROR_TYPE_MEMORY_UER);
+ write_einj_entry(EINJ_FLAGS, MASK_MEMORY_UER);
+ write_einj_entry(EINJ_ADDR, hpa);
+ write_einj_entry(EINJ_MASK, ~0x0UL);
+ write_einj_entry(EINJ_NOTRIGGER, 1);
+ write_einj_entry(EINJ_DOIT, 1);
+}
+
+/*
+ * When host APEI successfully claims the SEA caused by guest_code, kernel
+ * will send SIGBUS signal with BUS_MCEERR_AR to test thread.
+ *
+ * We set up this SIGBUS handler to skip the test for that case.
+ */
+static void sigbus_signal_handler(int sig, siginfo_t *si, void *v)
+{
+ ksft_print_msg("SIGBUS (%d) received, dumping siginfo...\n", sig);
+ ksft_print_msg("si_signo=%d, si_errno=%d, si_code=%d, si_addr=%p\n",
+ si->si_signo, si->si_errno, si->si_code, si->si_addr);
+ if (si->si_code == BUS_MCEERR_AR)
+ ksft_test_result_skip("SEA is claimed by host APEI\n");
+ else
+ ksft_test_result_fail("Exit with signal unhandled\n");
+
+ exit(0);
+}
+
+static void setup_sigbus_handler(void)
+{
+ struct sigaction act;
+
+ memset(&act, 0, sizeof(act));
+ sigemptyset(&act.sa_mask);
+ act.sa_sigaction = sigbus_signal_handler;
+ act.sa_flags = SA_SIGINFO;
+ TEST_ASSERT(sigaction(SIGBUS, &act, NULL) == 0,
+ "Failed to setup SIGBUS handler");
+}
+
+static void guest_code(void)
+{
+ u64 guest_data;
+
+ /* Consumes error will cause a SEA. */
+ guest_data = *(u64 *)EINJ_GVA;
+
+ GUEST_FAIL("Poison not protected by SEA: gva=%#lx, guest_data=%#lx\n",
+ EINJ_GVA, guest_data);
+}
+
+static void expect_sea_handler(struct ex_regs *regs)
+{
+ u64 esr = read_sysreg(esr_el1);
+ u64 far = read_sysreg(far_el1);
+ bool expect_far_invalid = far_invalid;
+
+ GUEST_PRINTF("Handling Guest SEA\n");
+ GUEST_PRINTF("ESR_EL1=%#lx, FAR_EL1=%#lx\n", esr, far);
+
+ GUEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_DABT_CUR);
+ GUEST_ASSERT_EQ(esr & ESR_ELx_FSC_TYPE, ESR_ELx_FSC_EXTABT);
+
+ if (expect_far_invalid) {
+ GUEST_ASSERT_EQ(esr & ESR_ELx_FnV, ESR_ELx_FnV);
+ GUEST_PRINTF("Guest observed garbage value in FAR\n");
+ } else {
+ GUEST_ASSERT_EQ(esr & ESR_ELx_FnV, 0);
+ GUEST_ASSERT_EQ(far, EINJ_GVA);
+ }
+
+ GUEST_DONE();
+}
+
+static void vcpu_inject_sea(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vcpu_events events = {};
+
+ events.exception.ext_dabt_pending = true;
+ vcpu_events_set(vcpu, &events);
+}
+
+static void run_vm(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+ bool guest_done = false;
+ struct kvm_run *run = vcpu->run;
+ u64 esr;
+
+ /* Resume the vCPU after error injection to consume the error. */
+ vcpu_run(vcpu);
+
+ ksft_print_msg("Dump kvm_run info about KVM_EXIT_%s\n",
+ exit_reason_str(run->exit_reason));
+ ksft_print_msg("kvm_run.arm_sea: esr=%#llx, flags=%#llx\n",
+ run->arm_sea.esr, run->arm_sea.flags);
+ ksft_print_msg("kvm_run.arm_sea: gva=%#llx, gpa=%#llx\n",
+ run->arm_sea.gva, run->arm_sea.gpa);
+
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_ARM_SEA);
+
+ esr = run->arm_sea.esr;
+ TEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_DABT_LOW);
+ TEST_ASSERT_EQ(esr & ESR_ELx_FSC_TYPE, ESR_ELx_FSC_EXTABT);
+ TEST_ASSERT_EQ(ESR_ELx_ISS2(esr), 0);
+ TEST_ASSERT_EQ((esr & ESR_ELx_INST_SYNDROME), 0);
+ TEST_ASSERT_EQ(esr & ESR_ELx_VNCR, 0);
+
+ if (!(esr & ESR_ELx_FnV)) {
+ ksft_print_msg("Expect gva to match given FnV bit is 0\n");
+ TEST_ASSERT_EQ(run->arm_sea.gva, EINJ_GVA);
+ }
+
+ if (run->arm_sea.flags & KVM_EXIT_ARM_SEA_FLAG_GPA_VALID) {
+ ksft_print_msg("Expect gpa to match given KVM_EXIT_ARM_SEA_FLAG_GPA_VALID is set\n");
+ TEST_ASSERT_EQ(run->arm_sea.gpa, einj_gpa & PAGE_ADDR_MASK);
+ }
+
+ far_invalid = esr & ESR_ELx_FnV;
+
+ /* Inject a SEA into guest and expect handled in SEA handler. */
+ vcpu_inject_sea(vcpu);
+
+ /* Expect the guest to reach GUEST_DONE gracefully. */
+ do {
+ vcpu_run(vcpu);
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_PRINTF:
+ ksft_print_msg("From guest: %s", uc.buffer);
+ break;
+ case UCALL_DONE:
+ ksft_print_msg("Guest done gracefully!\n");
+ guest_done = 1;
+ break;
+ case UCALL_ABORT:
+ ksft_print_msg("Guest aborted!\n");
+ guest_done = 1;
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ default:
+ TEST_FAIL("Unexpected ucall: %lu\n", uc.cmd);
+ }
+ } while (!guest_done);
+}
+
+static struct kvm_vm *vm_create_with_sea_handler(struct kvm_vcpu **vcpu)
+{
+ size_t backing_page_size;
+ size_t guest_page_size;
+ size_t alignment;
+ u64 num_guest_pages;
+ gpa_t start_gpa;
+ enum vm_mem_backing_src_type src_type = VM_MEM_SRC_ANONYMOUS_HUGETLB_1GB;
+ struct kvm_vm *vm;
+
+ backing_page_size = get_backing_src_pagesz(src_type);
+ guest_page_size = vm_guest_mode_params[VM_MODE_DEFAULT].page_size;
+ alignment = max(backing_page_size, guest_page_size);
+ num_guest_pages = VM_MEM_SIZE / guest_page_size;
+
+ vm = __vm_create_with_one_vcpu(vcpu, num_guest_pages, guest_code);
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(*vcpu);
+
+ vm_install_sync_handler(vm,
+ /*vector=*/VECTOR_SYNC_CURRENT,
+ /*ec=*/ESR_ELx_EC_DABT_CUR,
+ /*handler=*/expect_sea_handler);
+
+ start_gpa = (vm->max_gfn - num_guest_pages) * guest_page_size;
+ start_gpa = align_down(start_gpa, alignment);
+
+ vm_userspace_mem_region_add(
+ /*vm=*/vm,
+ /*src_type=*/src_type,
+ /*gpa=*/start_gpa,
+ /*slot=*/1,
+ /*npages=*/num_guest_pages,
+ /*flags=*/0);
+
+ virt_map(vm, START_GVA, start_gpa, num_guest_pages);
+
+ ksft_print_msg("Mapped %#lx pages: gva=%#lx to gpa=%#lx\n",
+ num_guest_pages, START_GVA, start_gpa);
+ return vm;
+}
+
+static void vm_inject_memory_uer(struct kvm_vm *vm)
+{
+ u64 guest_data;
+
+ einj_gpa = addr_gva2gpa(vm, EINJ_GVA);
+ einj_hva = addr_gva2hva(vm, EINJ_GVA);
+
+ /* Populate certain data before injecting UER. */
+ *(u64 *)einj_hva = 0xBAADCAFE;
+ guest_data = *(u64 *)einj_hva;
+ ksft_print_msg("Before EINJect: data=%#lx\n",
+ guest_data);
+
+ einj_hpa = translate_hva_to_hpa((unsigned long)einj_hva);
+
+ ksft_print_msg("EINJ_GVA=%#lx, einj_gpa=%#lx, einj_hva=%p, einj_hpa=%#lx\n",
+ EINJ_GVA, einj_gpa, einj_hva, einj_hpa);
+
+ inject_uer(einj_hpa);
+ ksft_print_msg("Memory UER EINJected\n");
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_SEA_TO_USER));
+
+ setup_sigbus_handler();
+
+ vm = vm_create_with_sea_handler(&vcpu);
+ vm_enable_cap(vm, KVM_CAP_ARM_SEA_TO_USER, 0);
+ vm_inject_memory_uer(vm);
+ run_vm(vm, vcpu);
+ kvm_vm_free(vm);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/arm64/set_id_regs.c b/tools/testing/selftests/kvm/arm64/set_id_regs.c
index 217541fe6536..7429a1055df5 100644
--- a/tools/testing/selftests/kvm/arm64/set_id_regs.c
+++ b/tools/testing/selftests/kvm/arm64/set_id_regs.c
@@ -30,21 +30,24 @@ struct reg_ftr_bits {
char *name;
bool sign;
enum ftr_type type;
- uint8_t shift;
- uint64_t mask;
+ u8 shift;
+ u64 mask;
/*
* For FTR_EXACT, safe_val is used as the exact safe value.
* For FTR_LOWER_SAFE, safe_val is used as the minimal safe value.
*/
- int64_t safe_val;
+ s64 safe_val;
+
+ /* Allowed to be changed by the host after run */
+ bool mutable;
};
struct test_feature_reg {
- uint32_t reg;
+ u32 reg;
const struct reg_ftr_bits *ftr_bits;
};
-#define __REG_FTR_BITS(NAME, SIGNED, TYPE, SHIFT, MASK, SAFE_VAL) \
+#define __REG_FTR_BITS(NAME, SIGNED, TYPE, SHIFT, MASK, SAFE_VAL, MUT) \
{ \
.name = #NAME, \
.sign = SIGNED, \
@@ -52,15 +55,20 @@ struct test_feature_reg {
.shift = SHIFT, \
.mask = MASK, \
.safe_val = SAFE_VAL, \
+ .mutable = MUT, \
}
#define REG_FTR_BITS(type, reg, field, safe_val) \
__REG_FTR_BITS(reg##_##field, FTR_UNSIGNED, type, reg##_##field##_SHIFT, \
- reg##_##field##_MASK, safe_val)
+ reg##_##field##_MASK, safe_val, false)
+
+#define REG_FTR_BITS_MUTABLE(type, reg, field, safe_val) \
+ __REG_FTR_BITS(reg##_##field, FTR_UNSIGNED, type, reg##_##field##_SHIFT, \
+ reg##_##field##_MASK, safe_val, true)
#define S_REG_FTR_BITS(type, reg, field, safe_val) \
__REG_FTR_BITS(reg##_##field, FTR_SIGNED, type, reg##_##field##_SHIFT, \
- reg##_##field##_MASK, safe_val)
+ reg##_##field##_MASK, safe_val, false)
#define REG_FTR_END \
{ \
@@ -91,7 +99,6 @@ static const struct reg_ftr_bits ftr_id_aa64isar0_el1[] = {
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SM3, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SHA3, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, RDM, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, TME, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, ATOMIC, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, CRC32, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SHA2, 0),
@@ -123,20 +130,30 @@ static const struct reg_ftr_bits ftr_id_aa64isar2_el1[] = {
REG_FTR_END,
};
+static const struct reg_ftr_bits ftr_id_aa64isar3_el1[] = {
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR3_EL1, FPRCVT, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR3_EL1, LSUI, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR3_EL1, LSFE, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR3_EL1, FAMINMAX, 0),
+ REG_FTR_END,
+};
+
static const struct reg_ftr_bits ftr_id_aa64pfr0_el1[] = {
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, CSV3, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, CSV2, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, DIT, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, SEL2, 0),
- REG_FTR_BITS(FTR_EXACT, ID_AA64PFR0_EL1, GIC, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL3, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL2, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL1, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL0, 0),
+ /* GICv3 support will be forced at run time if available */
+ REG_FTR_BITS_MUTABLE(FTR_EXACT, ID_AA64PFR0_EL1, GIC, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL3, 1),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL2, 1),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL1, 1),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL0, 1),
REG_FTR_END,
};
static const struct reg_ftr_bits ftr_id_aa64pfr1_el1[] = {
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, DF2, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, CSV2_frac, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, SSBS, ID_AA64PFR1_EL1_SSBS_NI),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, BT, 0),
@@ -146,6 +163,9 @@ static const struct reg_ftr_bits ftr_id_aa64pfr1_el1[] = {
static const struct reg_ftr_bits ftr_id_aa64mmfr0_el1[] = {
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, ECV, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, EXS, 0),
+ REG_FTR_BITS(FTR_EXACT, ID_AA64MMFR0_EL1, TGRAN4_2, 1),
+ REG_FTR_BITS(FTR_EXACT, ID_AA64MMFR0_EL1, TGRAN64_2, 1),
+ REG_FTR_BITS(FTR_EXACT, ID_AA64MMFR0_EL1, TGRAN16_2, 1),
S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, TGRAN4, 0),
S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, TGRAN64, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, TGRAN16, 0),
@@ -159,7 +179,9 @@ static const struct reg_ftr_bits ftr_id_aa64mmfr0_el1[] = {
static const struct reg_ftr_bits ftr_id_aa64mmfr1_el1[] = {
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, TIDCP1, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, AFP, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, HCX, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, ETS, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, TWED, 0),
REG_FTR_BITS(FTR_HIGHER_SAFE, ID_AA64MMFR1_EL1, SpecSEI, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, PAN, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, LO, 0),
@@ -182,6 +204,14 @@ static const struct reg_ftr_bits ftr_id_aa64mmfr2_el1[] = {
REG_FTR_END,
};
+static const struct reg_ftr_bits ftr_id_aa64mmfr3_el1[] = {
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR3_EL1, S1POE, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR3_EL1, S1PIE, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR3_EL1, SCTLRX, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR3_EL1, TCRX, 0),
+ REG_FTR_END,
+};
+
static const struct reg_ftr_bits ftr_id_aa64zfr0_el1[] = {
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, F64MM, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, F32MM, 0),
@@ -207,11 +237,13 @@ static struct test_feature_reg test_regs[] = {
TEST_REG(SYS_ID_AA64ISAR0_EL1, ftr_id_aa64isar0_el1),
TEST_REG(SYS_ID_AA64ISAR1_EL1, ftr_id_aa64isar1_el1),
TEST_REG(SYS_ID_AA64ISAR2_EL1, ftr_id_aa64isar2_el1),
+ TEST_REG(SYS_ID_AA64ISAR3_EL1, ftr_id_aa64isar3_el1),
TEST_REG(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0_el1),
TEST_REG(SYS_ID_AA64PFR1_EL1, ftr_id_aa64pfr1_el1),
TEST_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0_el1),
TEST_REG(SYS_ID_AA64MMFR1_EL1, ftr_id_aa64mmfr1_el1),
TEST_REG(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2_el1),
+ TEST_REG(SYS_ID_AA64MMFR3_EL1, ftr_id_aa64mmfr3_el1),
TEST_REG(SYS_ID_AA64ZFR0_EL1, ftr_id_aa64zfr0_el1),
};
@@ -224,20 +256,30 @@ static void guest_code(void)
GUEST_REG_SYNC(SYS_ID_AA64ISAR0_EL1);
GUEST_REG_SYNC(SYS_ID_AA64ISAR1_EL1);
GUEST_REG_SYNC(SYS_ID_AA64ISAR2_EL1);
+ GUEST_REG_SYNC(SYS_ID_AA64ISAR3_EL1);
GUEST_REG_SYNC(SYS_ID_AA64PFR0_EL1);
+ GUEST_REG_SYNC(SYS_ID_AA64PFR1_EL1);
GUEST_REG_SYNC(SYS_ID_AA64MMFR0_EL1);
GUEST_REG_SYNC(SYS_ID_AA64MMFR1_EL1);
GUEST_REG_SYNC(SYS_ID_AA64MMFR2_EL1);
+ GUEST_REG_SYNC(SYS_ID_AA64MMFR3_EL1);
GUEST_REG_SYNC(SYS_ID_AA64ZFR0_EL1);
+ GUEST_REG_SYNC(SYS_MPIDR_EL1);
+ GUEST_REG_SYNC(SYS_CLIDR_EL1);
GUEST_REG_SYNC(SYS_CTR_EL0);
+ GUEST_REG_SYNC(SYS_MIDR_EL1);
+ GUEST_REG_SYNC(SYS_REVIDR_EL1);
+ GUEST_REG_SYNC(SYS_AIDR_EL1);
GUEST_DONE();
}
/* Return a safe value to a given ftr_bits an ftr value */
-uint64_t get_safe_value(const struct reg_ftr_bits *ftr_bits, uint64_t ftr)
+u64 get_safe_value(const struct reg_ftr_bits *ftr_bits, u64 ftr)
{
- uint64_t ftr_max = GENMASK_ULL(ARM64_FEATURE_FIELD_BITS - 1, 0);
+ u64 ftr_max = ftr_bits->mask >> ftr_bits->shift;
+
+ TEST_ASSERT(ftr_max > 1, "This test doesn't support single bit features");
if (ftr_bits->sign == FTR_UNSIGNED) {
switch (ftr_bits->type) {
@@ -287,14 +329,16 @@ uint64_t get_safe_value(const struct reg_ftr_bits *ftr_bits, uint64_t ftr)
}
/* Return an invalid value to a given ftr_bits an ftr value */
-uint64_t get_invalid_value(const struct reg_ftr_bits *ftr_bits, uint64_t ftr)
+u64 get_invalid_value(const struct reg_ftr_bits *ftr_bits, u64 ftr)
{
- uint64_t ftr_max = GENMASK_ULL(ARM64_FEATURE_FIELD_BITS - 1, 0);
+ u64 ftr_max = ftr_bits->mask >> ftr_bits->shift;
+
+ TEST_ASSERT(ftr_max > 1, "This test doesn't support single bit features");
if (ftr_bits->sign == FTR_UNSIGNED) {
switch (ftr_bits->type) {
case FTR_EXACT:
- ftr = max((uint64_t)ftr_bits->safe_val + 1, ftr + 1);
+ ftr = max((u64)ftr_bits->safe_val + 1, ftr + 1);
break;
case FTR_LOWER_SAFE:
ftr++;
@@ -314,7 +358,7 @@ uint64_t get_invalid_value(const struct reg_ftr_bits *ftr_bits, uint64_t ftr)
} else if (ftr != ftr_max) {
switch (ftr_bits->type) {
case FTR_EXACT:
- ftr = max((uint64_t)ftr_bits->safe_val + 1, ftr + 1);
+ ftr = max((u64)ftr_bits->safe_val + 1, ftr + 1);
break;
case FTR_LOWER_SAFE:
ftr++;
@@ -338,12 +382,12 @@ uint64_t get_invalid_value(const struct reg_ftr_bits *ftr_bits, uint64_t ftr)
return ftr;
}
-static uint64_t test_reg_set_success(struct kvm_vcpu *vcpu, uint64_t reg,
- const struct reg_ftr_bits *ftr_bits)
+static u64 test_reg_set_success(struct kvm_vcpu *vcpu, u64 reg,
+ const struct reg_ftr_bits *ftr_bits)
{
- uint8_t shift = ftr_bits->shift;
- uint64_t mask = ftr_bits->mask;
- uint64_t val, new_val, ftr;
+ u8 shift = ftr_bits->shift;
+ u64 mask = ftr_bits->mask;
+ u64 val, new_val, ftr;
val = vcpu_get_reg(vcpu, reg);
ftr = (val & mask) >> shift;
@@ -361,12 +405,12 @@ static uint64_t test_reg_set_success(struct kvm_vcpu *vcpu, uint64_t reg,
return new_val;
}
-static void test_reg_set_fail(struct kvm_vcpu *vcpu, uint64_t reg,
+static void test_reg_set_fail(struct kvm_vcpu *vcpu, u64 reg,
const struct reg_ftr_bits *ftr_bits)
{
- uint8_t shift = ftr_bits->shift;
- uint64_t mask = ftr_bits->mask;
- uint64_t val, old_val, ftr;
+ u8 shift = ftr_bits->shift;
+ u64 mask = ftr_bits->mask;
+ u64 val, old_val, ftr;
int r;
val = vcpu_get_reg(vcpu, reg);
@@ -387,7 +431,7 @@ static void test_reg_set_fail(struct kvm_vcpu *vcpu, uint64_t reg,
TEST_ASSERT_EQ(val, old_val);
}
-static uint64_t test_reg_vals[KVM_ARM_FEATURE_ID_RANGE_SIZE];
+static u64 test_reg_vals[KVM_ARM_FEATURE_ID_RANGE_SIZE];
#define encoding_to_range_idx(encoding) \
KVM_ARM_FEATURE_ID_RANGE_IDX(sys_reg_Op0(encoding), sys_reg_Op1(encoding), \
@@ -397,7 +441,7 @@ static uint64_t test_reg_vals[KVM_ARM_FEATURE_ID_RANGE_SIZE];
static void test_vm_ftr_id_regs(struct kvm_vcpu *vcpu, bool aarch64_only)
{
- uint64_t masks[KVM_ARM_FEATURE_ID_RANGE_SIZE];
+ u64 masks[KVM_ARM_FEATURE_ID_RANGE_SIZE];
struct reg_mask_range range = {
.addr = (__u64)masks,
};
@@ -414,8 +458,8 @@ static void test_vm_ftr_id_regs(struct kvm_vcpu *vcpu, bool aarch64_only)
for (int i = 0; i < ARRAY_SIZE(test_regs); i++) {
const struct reg_ftr_bits *ftr_bits = test_regs[i].ftr_bits;
- uint32_t reg_id = test_regs[i].reg;
- uint64_t reg = KVM_ARM64_SYS_REG(reg_id);
+ u32 reg_id = test_regs[i].reg;
+ u64 reg = KVM_ARM64_SYS_REG(reg_id);
int idx;
/* Get the index to masks array for the idreg */
@@ -445,11 +489,11 @@ static void test_vm_ftr_id_regs(struct kvm_vcpu *vcpu, bool aarch64_only)
#define MPAM_IDREG_TEST 6
static void test_user_set_mpam_reg(struct kvm_vcpu *vcpu)
{
- uint64_t masks[KVM_ARM_FEATURE_ID_RANGE_SIZE];
+ u64 masks[KVM_ARM_FEATURE_ID_RANGE_SIZE];
struct reg_mask_range range = {
.addr = (__u64)masks,
};
- uint64_t val;
+ u64 val;
int idx, err;
/*
@@ -537,12 +581,101 @@ static void test_user_set_mpam_reg(struct kvm_vcpu *vcpu)
ksft_test_result_fail("ID_AA64PFR1_EL1.MPAM_frac value should not be ignored\n");
}
+#define MTE_IDREG_TEST 1
+static void test_user_set_mte_reg(struct kvm_vcpu *vcpu)
+{
+ u64 masks[KVM_ARM_FEATURE_ID_RANGE_SIZE];
+ struct reg_mask_range range = {
+ .addr = (__u64)masks,
+ };
+ u64 val;
+ u64 mte;
+ u64 mte_frac;
+ int idx, err;
+
+ val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1));
+ mte = FIELD_GET(ID_AA64PFR1_EL1_MTE, val);
+ if (!mte) {
+ ksft_test_result_skip("MTE capability not supported, nothing to test\n");
+ return;
+ }
+
+ /* Get writable masks for feature ID registers */
+ memset(range.reserved, 0, sizeof(range.reserved));
+ vm_ioctl(vcpu->vm, KVM_ARM_GET_REG_WRITABLE_MASKS, &range);
+
+ idx = encoding_to_range_idx(SYS_ID_AA64PFR1_EL1);
+ if ((masks[idx] & ID_AA64PFR1_EL1_MTE_frac_MASK) == ID_AA64PFR1_EL1_MTE_frac_MASK) {
+ ksft_test_result_skip("ID_AA64PFR1_EL1.MTE_frac is officially writable, nothing to test\n");
+ return;
+ }
+
+ /*
+ * When MTE is supported but MTE_ASYMM is not (ID_AA64PFR1_EL1.MTE == 2)
+ * ID_AA64PFR1_EL1.MTE_frac == 0xF indicates MTE_ASYNC is unsupported
+ * and MTE_frac == 0 indicates it is supported.
+ *
+ * As MTE_frac was previously unconditionally read as 0, check
+ * that the set to 0 succeeds but does not change MTE_frac
+ * from unsupported (0xF) to supported (0).
+ *
+ */
+ mte_frac = FIELD_GET(ID_AA64PFR1_EL1_MTE_frac, val);
+ if (mte != ID_AA64PFR1_EL1_MTE_MTE2 ||
+ mte_frac != ID_AA64PFR1_EL1_MTE_frac_NI) {
+ ksft_test_result_skip("MTE_ASYNC or MTE_ASYMM are supported, nothing to test\n");
+ return;
+ }
+
+ /* Try to set MTE_frac=0. */
+ val &= ~ID_AA64PFR1_EL1_MTE_frac_MASK;
+ val |= FIELD_PREP(ID_AA64PFR1_EL1_MTE_frac_MASK, 0);
+ err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1), val);
+ if (err) {
+ ksft_test_result_fail("ID_AA64PFR1_EL1.MTE_frac=0 was not accepted\n");
+ return;
+ }
+
+ val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1));
+ mte_frac = FIELD_GET(ID_AA64PFR1_EL1_MTE_frac, val);
+ if (mte_frac == ID_AA64PFR1_EL1_MTE_frac_NI)
+ ksft_test_result_pass("ID_AA64PFR1_EL1.MTE_frac=0 accepted and still 0xF\n");
+ else
+ ksft_test_result_pass("ID_AA64PFR1_EL1.MTE_frac no longer 0xF\n");
+}
+
+static u64 reset_mutable_bits(u32 id, u64 val)
+{
+ struct test_feature_reg *reg = NULL;
+
+ for (int i = 0; i < ARRAY_SIZE(test_regs); i++) {
+ if (test_regs[i].reg == id) {
+ reg = &test_regs[i];
+ break;
+ }
+ }
+
+ if (!reg)
+ return val;
+
+ for (const struct reg_ftr_bits *bits = reg->ftr_bits; bits->type != FTR_END; bits++) {
+ if (bits->mutable) {
+ val &= ~bits->mask;
+ val |= bits->safe_val << bits->shift;
+ }
+ }
+
+ return val;
+}
+
static void test_guest_reg_read(struct kvm_vcpu *vcpu)
{
bool done = false;
struct ucall uc;
while (!done) {
+ u64 val;
+
vcpu_run(vcpu);
switch (get_ucall(vcpu, &uc)) {
@@ -550,9 +683,11 @@ static void test_guest_reg_read(struct kvm_vcpu *vcpu)
REPORT_GUEST_ASSERT(uc);
break;
case UCALL_SYNC:
+ val = test_reg_vals[encoding_to_range_idx(uc.args[2])];
+ val = reset_mutable_bits(uc.args[2], val);
+
/* Make sure the written values are seen by guest */
- TEST_ASSERT_EQ(test_reg_vals[encoding_to_range_idx(uc.args[2])],
- uc.args[3]);
+ TEST_ASSERT_EQ(val, reset_mutable_bits(uc.args[2], uc.args[3]));
break;
case UCALL_DONE:
done = true;
@@ -572,13 +707,13 @@ static void test_guest_reg_read(struct kvm_vcpu *vcpu)
static void test_clidr(struct kvm_vcpu *vcpu)
{
- uint64_t clidr;
+ u64 clidr;
int level;
clidr = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CLIDR_EL1));
/* find the first empty level in the cache hierarchy */
- for (level = 1; level < 7; level++) {
+ for (level = 1; level <= 7; level++) {
if (!CLIDR_CTYPE(clidr, level))
break;
}
@@ -609,28 +744,42 @@ static void test_ctr(struct kvm_vcpu *vcpu)
test_reg_vals[encoding_to_range_idx(SYS_CTR_EL0)] = ctr;
}
-static void test_vcpu_ftr_id_regs(struct kvm_vcpu *vcpu)
+static void test_id_reg(struct kvm_vcpu *vcpu, u32 id)
{
u64 val;
+ val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(id));
+ val++;
+ vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(id), val);
+ test_reg_vals[encoding_to_range_idx(id)] = val;
+}
+
+static void test_vcpu_ftr_id_regs(struct kvm_vcpu *vcpu)
+{
test_clidr(vcpu);
test_ctr(vcpu);
- val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1));
- val++;
- vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1), val);
+ test_id_reg(vcpu, SYS_MPIDR_EL1);
+ ksft_test_result_pass("%s\n", __func__);
+}
+
+static void test_vcpu_non_ftr_id_regs(struct kvm_vcpu *vcpu)
+{
+ test_id_reg(vcpu, SYS_MIDR_EL1);
+ test_id_reg(vcpu, SYS_REVIDR_EL1);
+ test_id_reg(vcpu, SYS_AIDR_EL1);
- test_reg_vals[encoding_to_range_idx(SYS_MPIDR_EL1)] = val;
ksft_test_result_pass("%s\n", __func__);
}
-static void test_assert_id_reg_unchanged(struct kvm_vcpu *vcpu, uint32_t encoding)
+static void test_assert_id_reg_unchanged(struct kvm_vcpu *vcpu, u32 encoding)
{
size_t idx = encoding_to_range_idx(encoding);
- uint64_t observed;
+ u64 observed;
observed = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(encoding));
- TEST_ASSERT_EQ(test_reg_vals[idx], observed);
+ TEST_ASSERT_EQ(reset_mutable_bits(encoding, test_reg_vals[idx]),
+ reset_mutable_bits(encoding, observed));
}
static void test_reset_preserves_id_regs(struct kvm_vcpu *vcpu)
@@ -647,6 +796,9 @@ static void test_reset_preserves_id_regs(struct kvm_vcpu *vcpu)
test_assert_id_reg_unchanged(vcpu, SYS_MPIDR_EL1);
test_assert_id_reg_unchanged(vcpu, SYS_CLIDR_EL1);
test_assert_id_reg_unchanged(vcpu, SYS_CTR_EL0);
+ test_assert_id_reg_unchanged(vcpu, SYS_MIDR_EL1);
+ test_assert_id_reg_unchanged(vcpu, SYS_REVIDR_EL1);
+ test_assert_id_reg_unchanged(vcpu, SYS_AIDR_EL1);
ksft_test_result_pass("%s\n", __func__);
}
@@ -656,33 +808,38 @@ int main(void)
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
bool aarch64_only;
- uint64_t val, el0;
- int test_cnt;
+ u64 val, el0;
+ int test_cnt, i, j;
TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES));
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_WRITABLE_IMP_ID_REGS));
+
+ test_wants_mte();
- vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ vm = vm_create(1);
+ vm_enable_cap(vm, KVM_CAP_ARM_WRITABLE_IMP_ID_REGS, 0);
+ vcpu = vm_vcpu_add(vm, 0, guest_code);
+ kvm_arch_vm_finalize_vcpus(vm);
/* Check for AARCH64 only system */
val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1));
- el0 = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0), val);
+ el0 = FIELD_GET(ID_AA64PFR0_EL1_EL0, val);
aarch64_only = (el0 == ID_AA64PFR0_EL1_EL0_IMP);
ksft_print_header();
- test_cnt = ARRAY_SIZE(ftr_id_aa64dfr0_el1) + ARRAY_SIZE(ftr_id_dfr0_el1) +
- ARRAY_SIZE(ftr_id_aa64isar0_el1) + ARRAY_SIZE(ftr_id_aa64isar1_el1) +
- ARRAY_SIZE(ftr_id_aa64isar2_el1) + ARRAY_SIZE(ftr_id_aa64pfr0_el1) +
- ARRAY_SIZE(ftr_id_aa64pfr1_el1) + ARRAY_SIZE(ftr_id_aa64mmfr0_el1) +
- ARRAY_SIZE(ftr_id_aa64mmfr1_el1) + ARRAY_SIZE(ftr_id_aa64mmfr2_el1) +
- ARRAY_SIZE(ftr_id_aa64zfr0_el1) - ARRAY_SIZE(test_regs) + 2 +
- MPAM_IDREG_TEST;
+ test_cnt = 3 + MPAM_IDREG_TEST + MTE_IDREG_TEST;
+ for (i = 0; i < ARRAY_SIZE(test_regs); i++)
+ for (j = 0; test_regs[i].ftr_bits[j].type != FTR_END; j++)
+ test_cnt++;
ksft_set_plan(test_cnt);
test_vm_ftr_id_regs(vcpu, aarch64_only);
test_vcpu_ftr_id_regs(vcpu);
+ test_vcpu_non_ftr_id_regs(vcpu);
test_user_set_mpam_reg(vcpu);
+ test_user_set_mte_reg(vcpu);
test_guest_reg_read(vcpu);
diff --git a/tools/testing/selftests/kvm/arm64/smccc_filter.c b/tools/testing/selftests/kvm/arm64/smccc_filter.c
index 2d189f3da228..21e41880261b 100644
--- a/tools/testing/selftests/kvm/arm64/smccc_filter.c
+++ b/tools/testing/selftests/kvm/arm64/smccc_filter.c
@@ -22,10 +22,22 @@ enum smccc_conduit {
SMC_INSN,
};
+static bool test_runs_at_el2(void)
+{
+ struct kvm_vm *vm = vm_create(1);
+ struct kvm_vcpu_init init;
+
+ kvm_get_default_vcpu_target(vm, &init);
+ kvm_vm_free(vm);
+
+ return init.features[0] & BIT(KVM_ARM_VCPU_HAS_EL2);
+}
+
#define for_each_conduit(conduit) \
- for (conduit = HVC_INSN; conduit <= SMC_INSN; conduit++)
+ for (conduit = test_runs_at_el2() ? SMC_INSN : HVC_INSN; \
+ conduit <= SMC_INSN; conduit++)
-static void guest_main(uint32_t func_id, enum smccc_conduit conduit)
+static void guest_main(u32 func_id, enum smccc_conduit conduit)
{
struct arm_smccc_res res;
@@ -37,7 +49,7 @@ static void guest_main(uint32_t func_id, enum smccc_conduit conduit)
GUEST_SYNC(res.a0);
}
-static int __set_smccc_filter(struct kvm_vm *vm, uint32_t start, uint32_t nr_functions,
+static int __set_smccc_filter(struct kvm_vm *vm, u32 start, u32 nr_functions,
enum kvm_smccc_filter_action action)
{
struct kvm_smccc_filter filter = {
@@ -50,7 +62,7 @@ static int __set_smccc_filter(struct kvm_vm *vm, uint32_t start, uint32_t nr_fun
KVM_ARM_VM_SMCCC_FILTER, &filter);
}
-static void set_smccc_filter(struct kvm_vm *vm, uint32_t start, uint32_t nr_functions,
+static void set_smccc_filter(struct kvm_vm *vm, u32 start, u32 nr_functions,
enum kvm_smccc_filter_action action)
{
int ret = __set_smccc_filter(vm, start, nr_functions, action);
@@ -64,7 +76,7 @@ static struct kvm_vm *setup_vm(struct kvm_vcpu **vcpu)
struct kvm_vm *vm;
vm = vm_create(1);
- vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init);
+ kvm_get_default_vcpu_target(vm, &init);
/*
* Enable in-kernel emulation of PSCI to ensure that calls are denied
@@ -73,6 +85,7 @@ static struct kvm_vm *setup_vm(struct kvm_vcpu **vcpu)
init.features[0] |= (1 << KVM_ARM_VCPU_PSCI_0_2);
*vcpu = aarch64_vcpu_add(vm, 0, &init, guest_main);
+ kvm_arch_vm_finalize_vcpus(vm);
return vm;
}
@@ -99,7 +112,7 @@ static void test_filter_reserved_range(void)
{
struct kvm_vcpu *vcpu;
struct kvm_vm *vm = setup_vm(&vcpu);
- uint32_t smc64_fn;
+ u32 smc64_fn;
int r;
r = __set_smccc_filter(vm, ARM_SMCCC_ARCH_WORKAROUND_1,
@@ -204,7 +217,7 @@ static void test_filter_denied(void)
}
}
-static void expect_call_fwd_to_user(struct kvm_vcpu *vcpu, uint32_t func_id,
+static void expect_call_fwd_to_user(struct kvm_vcpu *vcpu, u32 func_id,
enum smccc_conduit conduit)
{
struct kvm_run *run = vcpu->run;
diff --git a/tools/testing/selftests/kvm/arm64/vgic_init.c b/tools/testing/selftests/kvm/arm64/vgic_init.c
index b3b5fb0ff0a9..47e34b43afb2 100644
--- a/tools/testing/selftests/kvm/arm64/vgic_init.c
+++ b/tools/testing/selftests/kvm/arm64/vgic_init.c
@@ -9,16 +9,17 @@
#include <asm/kvm.h>
#include <asm/kvm_para.h>
+#include <arm64/gic_v3.h>
+
#include "test_util.h"
#include "kvm_util.h"
#include "processor.h"
#include "vgic.h"
+#include "gic_v3.h"
#define NR_VCPUS 4
-#define REG_OFFSET(vcpu, offset) (((uint64_t)vcpu << 32) | offset)
-
-#define GICR_TYPER 0x8
+#define REG_OFFSET(vcpu, offset) (((u64)vcpu << 32) | offset)
#define VGIC_DEV_IS_V2(_d) ((_d) == KVM_DEV_TYPE_ARM_VGIC_V2)
#define VGIC_DEV_IS_V3(_d) ((_d) == KVM_DEV_TYPE_ARM_VGIC_V3)
@@ -26,10 +27,10 @@
struct vm_gic {
struct kvm_vm *vm;
int gic_fd;
- uint32_t gic_dev_type;
+ u32 gic_dev_type;
};
-static uint64_t max_phys_size;
+static u64 max_phys_size;
/*
* Helpers to access a redistributor register and verify the ioctl() failed or
@@ -38,17 +39,17 @@ static uint64_t max_phys_size;
static void v3_redist_reg_get_errno(int gicv3_fd, int vcpu, int offset,
int want, const char *msg)
{
- uint32_t ignored_val;
+ u32 ignored_val;
int ret = __kvm_device_attr_get(gicv3_fd, KVM_DEV_ARM_VGIC_GRP_REDIST_REGS,
REG_OFFSET(vcpu, offset), &ignored_val);
TEST_ASSERT(ret && errno == want, "%s; want errno = %d", msg, want);
}
-static void v3_redist_reg_get(int gicv3_fd, int vcpu, int offset, uint32_t want,
+static void v3_redist_reg_get(int gicv3_fd, int vcpu, int offset, u32 want,
const char *msg)
{
- uint32_t val;
+ u32 val;
kvm_device_attr_get(gicv3_fd, KVM_DEV_ARM_VGIC_GRP_REDIST_REGS,
REG_OFFSET(vcpu, offset), &val);
@@ -70,8 +71,8 @@ static int run_vcpu(struct kvm_vcpu *vcpu)
return __vcpu_run(vcpu) ? -errno : 0;
}
-static struct vm_gic vm_gic_create_with_vcpus(uint32_t gic_dev_type,
- uint32_t nr_vcpus,
+static struct vm_gic vm_gic_create_with_vcpus(u32 gic_dev_type,
+ u32 nr_vcpus,
struct kvm_vcpu *vcpus[])
{
struct vm_gic v;
@@ -83,7 +84,7 @@ static struct vm_gic vm_gic_create_with_vcpus(uint32_t gic_dev_type,
return v;
}
-static struct vm_gic vm_gic_create_barebones(uint32_t gic_dev_type)
+static struct vm_gic vm_gic_create_barebones(u32 gic_dev_type)
{
struct vm_gic v;
@@ -102,9 +103,9 @@ static void vm_gic_destroy(struct vm_gic *v)
}
struct vgic_region_attr {
- uint64_t attr;
- uint64_t size;
- uint64_t alignment;
+ u64 attr;
+ u64 size;
+ u64 alignment;
};
struct vgic_region_attr gic_v3_dist_region = {
@@ -142,7 +143,7 @@ struct vgic_region_attr gic_v2_cpu_region = {
static void subtest_dist_rdist(struct vm_gic *v)
{
int ret;
- uint64_t addr;
+ u64 addr;
struct vgic_region_attr rdist; /* CPU interface in GICv2*/
struct vgic_region_attr dist;
@@ -222,7 +223,7 @@ static void subtest_dist_rdist(struct vm_gic *v)
/* Test the new REDIST region API */
static void subtest_v3_redist_regions(struct vm_gic *v)
{
- uint64_t addr, expected_addr;
+ u64 addr, expected_addr;
int ret;
ret = __kvm_has_device_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
@@ -331,7 +332,7 @@ static void subtest_v3_redist_regions(struct vm_gic *v)
* VGIC KVM device is created and initialized before the secondary CPUs
* get created
*/
-static void test_vgic_then_vcpus(uint32_t gic_dev_type)
+static void test_vgic_then_vcpus(u32 gic_dev_type)
{
struct kvm_vcpu *vcpus[NR_VCPUS];
struct vm_gic v;
@@ -352,7 +353,7 @@ static void test_vgic_then_vcpus(uint32_t gic_dev_type)
}
/* All the VCPUs are created before the VGIC KVM device gets initialized */
-static void test_vcpus_then_vgic(uint32_t gic_dev_type)
+static void test_vcpus_then_vgic(u32 gic_dev_type)
{
struct kvm_vcpu *vcpus[NR_VCPUS];
struct vm_gic v;
@@ -407,7 +408,7 @@ static void test_v3_new_redist_regions(void)
struct kvm_vcpu *vcpus[NR_VCPUS];
void *dummy = NULL;
struct vm_gic v;
- uint64_t addr;
+ u64 addr;
int ret;
v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS, vcpus);
@@ -459,7 +460,7 @@ static void test_v3_new_redist_regions(void)
static void test_v3_typer_accesses(void)
{
struct vm_gic v;
- uint64_t addr;
+ u64 addr;
int ret, i;
v.vm = vm_create(NR_VCPUS);
@@ -517,7 +518,7 @@ static void test_v3_typer_accesses(void)
}
static struct vm_gic vm_gic_v3_create_with_vcpuids(int nr_vcpus,
- uint32_t vcpuids[])
+ u32 vcpuids[])
{
struct vm_gic v;
int i;
@@ -543,9 +544,9 @@ static struct vm_gic vm_gic_v3_create_with_vcpuids(int nr_vcpus,
*/
static void test_v3_last_bit_redist_regions(void)
{
- uint32_t vcpuids[] = { 0, 3, 5, 4, 1, 2 };
+ u32 vcpuids[] = { 0, 3, 5, 4, 1, 2 };
struct vm_gic v;
- uint64_t addr;
+ u64 addr;
v = vm_gic_v3_create_with_vcpuids(ARRAY_SIZE(vcpuids), vcpuids);
@@ -577,9 +578,9 @@ static void test_v3_last_bit_redist_regions(void)
/* Test last bit with legacy region */
static void test_v3_last_bit_single_rdist(void)
{
- uint32_t vcpuids[] = { 0, 3, 5, 4, 1, 2 };
+ u32 vcpuids[] = { 0, 3, 5, 4, 1, 2 };
struct vm_gic v;
- uint64_t addr;
+ u64 addr;
v = vm_gic_v3_create_with_vcpuids(ARRAY_SIZE(vcpuids), vcpuids);
@@ -605,7 +606,7 @@ static void test_v3_redist_ipa_range_check_at_vcpu_run(void)
struct kvm_vcpu *vcpus[NR_VCPUS];
struct vm_gic v;
int ret, i;
- uint64_t addr;
+ u64 addr;
v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, 1, vcpus);
@@ -637,7 +638,7 @@ static void test_v3_its_region(void)
{
struct kvm_vcpu *vcpus[NR_VCPUS];
struct vm_gic v;
- uint64_t addr;
+ u64 addr;
int its_fd, ret;
v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS, vcpus);
@@ -675,14 +676,52 @@ static void test_v3_its_region(void)
vm_gic_destroy(&v);
}
+static void test_v3_nassgicap(void)
+{
+ struct kvm_vcpu *vcpus[NR_VCPUS];
+ bool has_nassgicap;
+ struct vm_gic vm;
+ u32 typer2;
+ int ret;
+
+ vm = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS, vcpus);
+ kvm_device_attr_get(vm.gic_fd, KVM_DEV_ARM_VGIC_GRP_DIST_REGS,
+ GICD_TYPER2, &typer2);
+ has_nassgicap = typer2 & GICD_TYPER2_nASSGIcap;
+
+ typer2 |= GICD_TYPER2_nASSGIcap;
+ ret = __kvm_device_attr_set(vm.gic_fd, KVM_DEV_ARM_VGIC_GRP_DIST_REGS,
+ GICD_TYPER2, &typer2);
+ if (has_nassgicap)
+ TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_DEVICE_ATTR_SET, ret));
+ else
+ TEST_ASSERT(ret && errno == EINVAL,
+ "Enabled nASSGIcap even though it's unavailable");
+
+ typer2 &= ~GICD_TYPER2_nASSGIcap;
+ kvm_device_attr_set(vm.gic_fd, KVM_DEV_ARM_VGIC_GRP_DIST_REGS,
+ GICD_TYPER2, &typer2);
+
+ kvm_device_attr_set(vm.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+ KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
+
+ typer2 ^= GICD_TYPER2_nASSGIcap;
+ ret = __kvm_device_attr_set(vm.gic_fd, KVM_DEV_ARM_VGIC_GRP_DIST_REGS,
+ GICD_TYPER2, &typer2);
+ TEST_ASSERT(ret && errno == EBUSY,
+ "Changed nASSGIcap after initializing the VGIC");
+
+ vm_gic_destroy(&vm);
+}
+
/*
* Returns 0 if it's possible to create GIC device of a given type (V2 or V3).
*/
-int test_kvm_device(uint32_t gic_dev_type)
+int test_kvm_device(u32 gic_dev_type)
{
struct kvm_vcpu *vcpus[NR_VCPUS];
struct vm_gic v;
- uint32_t other;
+ u32 other;
int ret;
v.vm = vm_create_with_vcpus(NR_VCPUS, guest_code, vcpus);
@@ -715,7 +754,221 @@ int test_kvm_device(uint32_t gic_dev_type)
return 0;
}
-void run_tests(uint32_t gic_dev_type)
+struct sr_def {
+ const char *name;
+ u32 encoding;
+};
+
+#define PACK_SR(r) \
+ ((sys_reg_Op0(r) << 14) | \
+ (sys_reg_Op1(r) << 11) | \
+ (sys_reg_CRn(r) << 7) | \
+ (sys_reg_CRm(r) << 3) | \
+ (sys_reg_Op2(r)))
+
+#define SR(r) \
+ { \
+ .name = #r, \
+ .encoding = r, \
+ }
+
+static const struct sr_def sysregs_el1[] = {
+ SR(SYS_ICC_PMR_EL1),
+ SR(SYS_ICC_BPR0_EL1),
+ SR(SYS_ICC_AP0R0_EL1),
+ SR(SYS_ICC_AP0R1_EL1),
+ SR(SYS_ICC_AP0R2_EL1),
+ SR(SYS_ICC_AP0R3_EL1),
+ SR(SYS_ICC_AP1R0_EL1),
+ SR(SYS_ICC_AP1R1_EL1),
+ SR(SYS_ICC_AP1R2_EL1),
+ SR(SYS_ICC_AP1R3_EL1),
+ SR(SYS_ICC_BPR1_EL1),
+ SR(SYS_ICC_CTLR_EL1),
+ SR(SYS_ICC_SRE_EL1),
+ SR(SYS_ICC_IGRPEN0_EL1),
+ SR(SYS_ICC_IGRPEN1_EL1),
+};
+
+static const struct sr_def sysregs_el2[] = {
+ SR(SYS_ICH_AP0R0_EL2),
+ SR(SYS_ICH_AP0R1_EL2),
+ SR(SYS_ICH_AP0R2_EL2),
+ SR(SYS_ICH_AP0R3_EL2),
+ SR(SYS_ICH_AP1R0_EL2),
+ SR(SYS_ICH_AP1R1_EL2),
+ SR(SYS_ICH_AP1R2_EL2),
+ SR(SYS_ICH_AP1R3_EL2),
+ SR(SYS_ICH_HCR_EL2),
+ SR(SYS_ICC_SRE_EL2),
+ SR(SYS_ICH_VTR_EL2),
+ SR(SYS_ICH_VMCR_EL2),
+ SR(SYS_ICH_LR0_EL2),
+ SR(SYS_ICH_LR1_EL2),
+ SR(SYS_ICH_LR2_EL2),
+ SR(SYS_ICH_LR3_EL2),
+ SR(SYS_ICH_LR4_EL2),
+ SR(SYS_ICH_LR5_EL2),
+ SR(SYS_ICH_LR6_EL2),
+ SR(SYS_ICH_LR7_EL2),
+ SR(SYS_ICH_LR8_EL2),
+ SR(SYS_ICH_LR9_EL2),
+ SR(SYS_ICH_LR10_EL2),
+ SR(SYS_ICH_LR11_EL2),
+ SR(SYS_ICH_LR12_EL2),
+ SR(SYS_ICH_LR13_EL2),
+ SR(SYS_ICH_LR14_EL2),
+ SR(SYS_ICH_LR15_EL2),
+};
+
+static void test_sysreg_array(int gic, const struct sr_def *sr, int nr,
+ int (*check)(int, const struct sr_def *, const char *))
+{
+ for (int i = 0; i < nr; i++) {
+ u64 val;
+ u64 attr;
+ int ret;
+
+ /* Assume MPIDR_EL1.Aff*=0 */
+ attr = PACK_SR(sr[i].encoding);
+
+ /*
+ * The API is braindead. A register can be advertised as
+ * available, and yet not be readable or writable.
+ * ICC_APnR{1,2,3}_EL1 are examples of such non-sense, and
+ * ICH_APnR{1,2,3}_EL2 do follow suit for consistency.
+ *
+ * On the bright side, no known HW is implementing more than
+ * 5 bits of priority, so we're safe. Sort of...
+ */
+ ret = __kvm_has_device_attr(gic, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS,
+ attr);
+ TEST_ASSERT(ret == 0, "%s unavailable", sr[i].name);
+
+ /* Check that we can write back what we read */
+ ret = __kvm_device_attr_get(gic, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS,
+ attr, &val);
+ TEST_ASSERT(ret == 0 || !check(gic, &sr[i], "read"), "%s unreadable", sr[i].name);
+ ret = __kvm_device_attr_set(gic, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS,
+ attr, &val);
+ TEST_ASSERT(ret == 0 || !check(gic, &sr[i], "write"), "%s unwritable", sr[i].name);
+ }
+}
+
+static u8 get_ctlr_pribits(int gic)
+{
+ int ret;
+ u64 val;
+ u8 pri;
+
+ ret = __kvm_device_attr_get(gic, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS,
+ PACK_SR(SYS_ICC_CTLR_EL1), &val);
+ TEST_ASSERT(ret == 0, "ICC_CTLR_EL1 unreadable");
+
+ pri = FIELD_GET(ICC_CTLR_EL1_PRI_BITS_MASK, val) + 1;
+ TEST_ASSERT(pri >= 5 && pri <= 7, "Bad pribits %d", pri);
+
+ return pri;
+}
+
+static int check_unaccessible_el1_regs(int gic, const struct sr_def *sr, const char *what)
+{
+ switch (sr->encoding) {
+ case SYS_ICC_AP0R1_EL1:
+ case SYS_ICC_AP1R1_EL1:
+ if (get_ctlr_pribits(gic) >= 6)
+ return -EINVAL;
+ break;
+ case SYS_ICC_AP0R2_EL1:
+ case SYS_ICC_AP0R3_EL1:
+ case SYS_ICC_AP1R2_EL1:
+ case SYS_ICC_AP1R3_EL1:
+ if (get_ctlr_pribits(gic) == 7)
+ return 0;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ pr_info("SKIP %s for %s\n", sr->name, what);
+ return 0;
+}
+
+static u8 get_vtr_pribits(int gic)
+{
+ int ret;
+ u64 val;
+ u8 pri;
+
+ ret = __kvm_device_attr_get(gic, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS,
+ PACK_SR(SYS_ICH_VTR_EL2), &val);
+ TEST_ASSERT(ret == 0, "ICH_VTR_EL2 unreadable");
+
+ pri = FIELD_GET(ICH_VTR_EL2_PRIbits, val) + 1;
+ TEST_ASSERT(pri >= 5 && pri <= 7, "Bad pribits %d", pri);
+
+ return pri;
+}
+
+static int check_unaccessible_el2_regs(int gic, const struct sr_def *sr, const char *what)
+{
+ switch (sr->encoding) {
+ case SYS_ICH_AP0R1_EL2:
+ case SYS_ICH_AP1R1_EL2:
+ if (get_vtr_pribits(gic) >= 6)
+ return -EINVAL;
+ break;
+ case SYS_ICH_AP0R2_EL2:
+ case SYS_ICH_AP0R3_EL2:
+ case SYS_ICH_AP1R2_EL2:
+ case SYS_ICH_AP1R3_EL2:
+ if (get_vtr_pribits(gic) == 7)
+ return -EINVAL;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ pr_info("SKIP %s for %s\n", sr->name, what);
+ return 0;
+}
+
+static void test_v3_sysregs(void)
+{
+ struct kvm_vcpu_init init = {};
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ u32 feat = 0;
+ int gic;
+
+ if (kvm_check_cap(KVM_CAP_ARM_EL2))
+ feat |= BIT(KVM_ARM_VCPU_HAS_EL2);
+
+ vm = vm_create(1);
+
+ vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init);
+ init.features[0] |= feat;
+
+ vcpu = aarch64_vcpu_add(vm, 0, &init, NULL);
+ TEST_ASSERT(vcpu, "Can't create a vcpu?");
+
+ gic = kvm_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_V3);
+ TEST_ASSERT(gic >= 0, "No GIC???");
+
+ kvm_device_attr_set(gic, KVM_DEV_ARM_VGIC_GRP_CTRL,
+ KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
+
+ test_sysreg_array(gic, sysregs_el1, ARRAY_SIZE(sysregs_el1), check_unaccessible_el1_regs);
+ if (feat)
+ test_sysreg_array(gic, sysregs_el2, ARRAY_SIZE(sysregs_el2), check_unaccessible_el2_regs);
+ else
+ pr_info("SKIP EL2 registers, not available\n");
+
+ close(gic);
+ kvm_vm_free(vm);
+}
+
+void run_tests(u32 gic_dev_type)
{
test_vcpus_then_vgic(gic_dev_type);
test_vgic_then_vcpus(gic_dev_type);
@@ -730,6 +983,8 @@ void run_tests(uint32_t gic_dev_type)
test_v3_last_bit_single_rdist();
test_v3_redist_ipa_range_check_at_vcpu_run();
test_v3_its_region();
+ test_v3_sysregs();
+ test_v3_nassgicap();
}
}
@@ -739,6 +994,8 @@ int main(int ac, char **av)
int pa_bits;
int cnt_impl = 0;
+ test_disable_default_vgic();
+
pa_bits = vm_guest_mode_params[VM_MODE_DEFAULT].pa_bits;
max_phys_size = 1ULL << pa_bits;
diff --git a/tools/testing/selftests/kvm/arm64/vgic_irq.c b/tools/testing/selftests/kvm/arm64/vgic_irq.c
index f4ac28d53747..5e231998617e 100644
--- a/tools/testing/selftests/kvm/arm64/vgic_irq.c
+++ b/tools/testing/selftests/kvm/arm64/vgic_irq.c
@@ -24,11 +24,12 @@
* function.
*/
struct test_args {
- uint32_t nr_irqs; /* number of KVM supported IRQs. */
+ u32 nr_irqs; /* number of KVM supported IRQs. */
bool eoi_split; /* 1 is eoir+dir, 0 is eoir only */
bool level_sensitive; /* 1 is level, 0 is edge */
int kvm_max_routes; /* output of KVM_CAP_IRQ_ROUTING */
bool kvm_supports_irqfd; /* output of KVM_CAP_IRQFD */
+ u32 shared_data;
};
/*
@@ -63,15 +64,15 @@ typedef enum {
struct kvm_inject_args {
kvm_inject_cmd cmd;
- uint32_t first_intid;
- uint32_t num;
+ u32 first_intid;
+ u32 num;
int level;
bool expect_failure;
};
/* Used on the guest side to perform the hypercall. */
-static void kvm_inject_call(kvm_inject_cmd cmd, uint32_t first_intid,
- uint32_t num, int level, bool expect_failure);
+static void kvm_inject_call(kvm_inject_cmd cmd, u32 first_intid,
+ u32 num, int level, bool expect_failure);
/* Used on the host side to get the hypercall info. */
static void kvm_inject_get_call(struct kvm_vm *vm, struct ucall *uc,
@@ -132,8 +133,8 @@ static struct kvm_inject_desc set_active_fns[] = {
for_each_supported_inject_fn((args), (t), (f))
/* Shared between the guest main thread and the IRQ handlers. */
-volatile uint64_t irq_handled;
-volatile uint32_t irqnr_received[MAX_SPI + 1];
+volatile u64 irq_handled;
+volatile u32 irqnr_received[MAX_SPI + 1];
static void reset_stats(void)
{
@@ -144,25 +145,25 @@ static void reset_stats(void)
irqnr_received[i] = 0;
}
-static uint64_t gic_read_ap1r0(void)
+static u64 gic_read_ap1r0(void)
{
- uint64_t reg = read_sysreg_s(SYS_ICC_AP1R0_EL1);
+ u64 reg = read_sysreg_s(SYS_ICC_AP1R0_EL1);
dsb(sy);
return reg;
}
-static void gic_write_ap1r0(uint64_t val)
+static void gic_write_ap1r0(u64 val)
{
write_sysreg_s(val, SYS_ICC_AP1R0_EL1);
isb();
}
-static void guest_set_irq_line(uint32_t intid, uint32_t level);
+static void guest_set_irq_line(u32 intid, u32 level);
static void guest_irq_generic_handler(bool eoi_split, bool level_sensitive)
{
- uint32_t intid = gic_get_and_ack_irq();
+ u32 intid = gic_get_and_ack_irq();
if (intid == IAR_SPURIOUS)
return;
@@ -188,8 +189,8 @@ static void guest_irq_generic_handler(bool eoi_split, bool level_sensitive)
GUEST_ASSERT(!gic_irq_get_pending(intid));
}
-static void kvm_inject_call(kvm_inject_cmd cmd, uint32_t first_intid,
- uint32_t num, int level, bool expect_failure)
+static void kvm_inject_call(kvm_inject_cmd cmd, u32 first_intid,
+ u32 num, int level, bool expect_failure)
{
struct kvm_inject_args args = {
.cmd = cmd,
@@ -203,9 +204,9 @@ static void kvm_inject_call(kvm_inject_cmd cmd, uint32_t first_intid,
#define GUEST_ASSERT_IAR_EMPTY() \
do { \
- uint32_t _intid; \
+ u32 _intid; \
_intid = gic_get_and_ack_irq(); \
- GUEST_ASSERT(_intid == 0 || _intid == IAR_SPURIOUS); \
+ GUEST_ASSERT(_intid == IAR_SPURIOUS); \
} while (0)
#define CAT_HELPER(a, b) a ## b
@@ -236,13 +237,13 @@ static void reset_priorities(struct test_args *args)
gic_set_priority(i, IRQ_DEFAULT_PRIO_REG);
}
-static void guest_set_irq_line(uint32_t intid, uint32_t level)
+static void guest_set_irq_line(u32 intid, u32 level)
{
kvm_inject_call(KVM_SET_IRQ_LINE, intid, 1, level, false);
}
static void test_inject_fail(struct test_args *args,
- uint32_t intid, kvm_inject_cmd cmd)
+ u32 intid, kvm_inject_cmd cmd)
{
reset_stats();
@@ -254,10 +255,10 @@ static void test_inject_fail(struct test_args *args,
}
static void guest_inject(struct test_args *args,
- uint32_t first_intid, uint32_t num,
- kvm_inject_cmd cmd)
+ u32 first_intid, u32 num,
+ kvm_inject_cmd cmd)
{
- uint32_t i;
+ u32 i;
reset_stats();
@@ -291,10 +292,10 @@ static void guest_inject(struct test_args *args,
* deactivated yet.
*/
static void guest_restore_active(struct test_args *args,
- uint32_t first_intid, uint32_t num,
- kvm_inject_cmd cmd)
+ u32 first_intid, u32 num,
+ kvm_inject_cmd cmd)
{
- uint32_t prio, intid, ap1r;
+ u32 prio, intid, ap1r;
int i;
/*
@@ -341,9 +342,9 @@ static void guest_restore_active(struct test_args *args,
* This function should only be used in test_inject_preemption (with IRQs
* masked).
*/
-static uint32_t wait_for_and_activate_irq(void)
+static u32 wait_for_and_activate_irq(void)
{
- uint32_t intid;
+ u32 intid;
do {
asm volatile("wfi" : : : "memory");
@@ -359,10 +360,11 @@ static uint32_t wait_for_and_activate_irq(void)
* interrupts for the whole test.
*/
static void test_inject_preemption(struct test_args *args,
- uint32_t first_intid, int num,
- kvm_inject_cmd cmd)
+ u32 first_intid, int num,
+ const unsigned long *exclude,
+ kvm_inject_cmd cmd)
{
- uint32_t intid, prio, step = KVM_PRIO_STEPS;
+ u32 intid, prio, step = KVM_PRIO_STEPS;
int i;
/* Set the priorities of the first (KVM_NUM_PRIOS - 1) IRQs
@@ -377,8 +379,12 @@ static void test_inject_preemption(struct test_args *args,
local_irq_disable();
for (i = 0; i < num; i++) {
- uint32_t tmp;
+ u32 tmp;
intid = i + first_intid;
+
+ if (exclude && test_bit(i, exclude))
+ continue;
+
KVM_INJECT(cmd, intid);
/* Each successive IRQ will preempt the previous one. */
tmp = wait_for_and_activate_irq();
@@ -390,15 +396,33 @@ static void test_inject_preemption(struct test_args *args,
/* finish handling the IRQs starting with the highest priority one. */
for (i = 0; i < num; i++) {
intid = num - i - 1 + first_intid;
+
+ if (exclude && test_bit(intid - first_intid, exclude))
+ continue;
+
gic_set_eoi(intid);
- if (args->eoi_split)
- gic_set_dir(intid);
+ }
+
+ if (args->eoi_split) {
+ for (i = 0; i < num; i++) {
+ intid = i + first_intid;
+
+ if (exclude && test_bit(i, exclude))
+ continue;
+
+ if (args->eoi_split)
+ gic_set_dir(intid);
+ }
}
local_irq_enable();
- for (i = 0; i < num; i++)
+ for (i = 0; i < num; i++) {
+ if (exclude && test_bit(i, exclude))
+ continue;
+
GUEST_ASSERT(!gic_irq_get_active(i + first_intid));
+ }
GUEST_ASSERT_EQ(gic_read_ap1r0(), 0);
GUEST_ASSERT_IAR_EMPTY();
@@ -407,7 +431,7 @@ static void test_inject_preemption(struct test_args *args,
static void test_injection(struct test_args *args, struct kvm_inject_desc *f)
{
- uint32_t nr_irqs = args->nr_irqs;
+ u32 nr_irqs = args->nr_irqs;
if (f->sgi) {
guest_inject(args, MIN_SGI, 1, f->cmd);
@@ -427,7 +451,7 @@ static void test_injection(struct test_args *args, struct kvm_inject_desc *f)
static void test_injection_failure(struct test_args *args,
struct kvm_inject_desc *f)
{
- uint32_t bad_intid[] = { args->nr_irqs, 1020, 1024, 1120, 5120, ~0U, };
+ u32 bad_intid[] = { args->nr_irqs, 1020, 1024, 1120, 5120, ~0U, };
int i;
for (i = 0; i < ARRAY_SIZE(bad_intid); i++)
@@ -436,49 +460,48 @@ static void test_injection_failure(struct test_args *args,
static void test_preemption(struct test_args *args, struct kvm_inject_desc *f)
{
- /*
- * Test up to 4 levels of preemption. The reason is that KVM doesn't
- * currently implement the ability to have more than the number-of-LRs
- * number of concurrently active IRQs. The number of LRs implemented is
- * IMPLEMENTATION DEFINED, however, it seems that most implement 4.
- */
+ /* Timer PPIs cannot be injected from userspace */
+ static const unsigned long ppi_exclude = (BIT(27 - MIN_PPI) |
+ BIT(30 - MIN_PPI) |
+ BIT(28 - MIN_PPI) |
+ BIT(26 - MIN_PPI));
+
if (f->sgi)
- test_inject_preemption(args, MIN_SGI, 4, f->cmd);
+ test_inject_preemption(args, MIN_SGI, 16, NULL, f->cmd);
if (f->ppi)
- test_inject_preemption(args, MIN_PPI, 4, f->cmd);
+ test_inject_preemption(args, MIN_PPI, 16, &ppi_exclude, f->cmd);
if (f->spi)
- test_inject_preemption(args, MIN_SPI, 4, f->cmd);
+ test_inject_preemption(args, MIN_SPI, 31, NULL, f->cmd);
}
static void test_restore_active(struct test_args *args, struct kvm_inject_desc *f)
{
- /* Test up to 4 active IRQs. Same reason as in test_preemption. */
if (f->sgi)
- guest_restore_active(args, MIN_SGI, 4, f->cmd);
+ guest_restore_active(args, MIN_SGI, 16, f->cmd);
if (f->ppi)
- guest_restore_active(args, MIN_PPI, 4, f->cmd);
+ guest_restore_active(args, MIN_PPI, 16, f->cmd);
if (f->spi)
- guest_restore_active(args, MIN_SPI, 4, f->cmd);
+ guest_restore_active(args, MIN_SPI, 31, f->cmd);
}
static void guest_code(struct test_args *args)
{
- uint32_t i, nr_irqs = args->nr_irqs;
+ u32 i, nr_irqs = args->nr_irqs;
bool level_sensitive = args->level_sensitive;
struct kvm_inject_desc *f, *inject_fns;
gic_init(GIC_V3, 1);
- for (i = 0; i < nr_irqs; i++)
- gic_irq_enable(i);
-
for (i = MIN_SPI; i < nr_irqs; i++)
gic_irq_set_config(i, !level_sensitive);
+ for (i = 0; i < nr_irqs; i++)
+ gic_irq_enable(i);
+
gic_set_eoi_split(args->eoi_split);
reset_priorities(args);
@@ -506,8 +529,8 @@ static void guest_code(struct test_args *args)
GUEST_DONE();
}
-static void kvm_irq_line_check(struct kvm_vm *vm, uint32_t intid, int level,
- struct test_args *test_args, bool expect_failure)
+static void kvm_irq_line_check(struct kvm_vm *vm, u32 intid, int level,
+ struct test_args *test_args, bool expect_failure)
{
int ret;
@@ -525,8 +548,8 @@ static void kvm_irq_line_check(struct kvm_vm *vm, uint32_t intid, int level,
}
}
-void kvm_irq_set_level_info_check(int gic_fd, uint32_t intid, int level,
- bool expect_failure)
+void kvm_irq_set_level_info_check(int gic_fd, u32 intid, int level,
+ bool expect_failure)
{
if (!expect_failure) {
kvm_irq_set_level_info(gic_fd, intid, level);
@@ -550,17 +573,18 @@ void kvm_irq_set_level_info_check(int gic_fd, uint32_t intid, int level,
}
static void kvm_set_gsi_routing_irqchip_check(struct kvm_vm *vm,
- uint32_t intid, uint32_t num, uint32_t kvm_max_routes,
- bool expect_failure)
+ u32 intid, u32 num,
+ u32 kvm_max_routes,
+ bool expect_failure)
{
struct kvm_irq_routing *routing;
int ret;
- uint64_t i;
+ u64 i;
assert(num <= kvm_max_routes && kvm_max_routes <= KVM_MAX_IRQ_ROUTES);
routing = kvm_gsi_routing_create();
- for (i = intid; i < (uint64_t)intid + num; i++)
+ for (i = intid; i < (u64)intid + num; i++)
kvm_gsi_routing_irqchip_add(routing, i - MIN_SPI, i - MIN_SPI);
if (!expect_failure) {
@@ -568,7 +592,7 @@ static void kvm_set_gsi_routing_irqchip_check(struct kvm_vm *vm,
} else {
ret = _kvm_gsi_routing_write(vm, routing);
/* The kernel only checks e->irqchip.pin >= KVM_IRQCHIP_NUM_PINS */
- if (((uint64_t)intid + num - 1 - MIN_SPI) >= KVM_IRQCHIP_NUM_PINS)
+ if (((u64)intid + num - 1 - MIN_SPI) >= KVM_IRQCHIP_NUM_PINS)
TEST_ASSERT(ret != 0 && errno == EINVAL,
"Bad intid %u did not cause KVM_SET_GSI_ROUTING "
"error: rc: %i errno: %i", intid, ret, errno);
@@ -579,7 +603,7 @@ static void kvm_set_gsi_routing_irqchip_check(struct kvm_vm *vm,
}
}
-static void kvm_irq_write_ispendr_check(int gic_fd, uint32_t intid,
+static void kvm_irq_write_ispendr_check(int gic_fd, u32 intid,
struct kvm_vcpu *vcpu,
bool expect_failure)
{
@@ -595,13 +619,13 @@ static void kvm_irq_write_ispendr_check(int gic_fd, uint32_t intid,
}
static void kvm_routing_and_irqfd_check(struct kvm_vm *vm,
- uint32_t intid, uint32_t num, uint32_t kvm_max_routes,
- bool expect_failure)
+ u32 intid, u32 num, u32 kvm_max_routes,
+ bool expect_failure)
{
int fd[MAX_SPI];
- uint64_t val;
+ u64 val;
int ret, f;
- uint64_t i;
+ u64 i;
/*
* There is no way to try injecting an SGI or PPI as the interface
@@ -620,35 +644,29 @@ static void kvm_routing_and_irqfd_check(struct kvm_vm *vm,
* that no actual interrupt was injected for those cases.
*/
- for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) {
- fd[f] = eventfd(0, 0);
- TEST_ASSERT(fd[f] != -1, __KVM_SYSCALL_ERROR("eventfd()", fd[f]));
- }
+ for (f = 0, i = intid; i < (u64)intid + num; i++, f++)
+ fd[f] = kvm_new_eventfd();
- for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) {
- struct kvm_irqfd irqfd = {
- .fd = fd[f],
- .gsi = i - MIN_SPI,
- };
- assert(i <= (uint64_t)UINT_MAX);
- vm_ioctl(vm, KVM_IRQFD, &irqfd);
+ for (f = 0, i = intid; i < (u64)intid + num; i++, f++) {
+ assert(i <= (u64)UINT_MAX);
+ kvm_assign_irqfd(vm, i - MIN_SPI, fd[f]);
}
- for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) {
+ for (f = 0, i = intid; i < (u64)intid + num; i++, f++) {
val = 1;
- ret = write(fd[f], &val, sizeof(uint64_t));
- TEST_ASSERT(ret == sizeof(uint64_t),
+ ret = write(fd[f], &val, sizeof(u64));
+ TEST_ASSERT(ret == sizeof(u64),
__KVM_SYSCALL_ERROR("write()", ret));
}
- for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++)
- close(fd[f]);
+ for (f = 0, i = intid; i < (u64)intid + num; i++, f++)
+ kvm_close(fd[f]);
}
/* handles the valid case: intid=0xffffffff num=1 */
#define for_each_intid(first, num, tmp, i) \
for ((tmp) = (i) = (first); \
- (tmp) < (uint64_t)(first) + (uint64_t)(num); \
+ (tmp) < (u64)(first) + (u64)(num); \
(tmp)++, (i)++)
static void run_guest_cmd(struct kvm_vcpu *vcpu, int gic_fd,
@@ -656,13 +674,13 @@ static void run_guest_cmd(struct kvm_vcpu *vcpu, int gic_fd,
struct test_args *test_args)
{
kvm_inject_cmd cmd = inject_args->cmd;
- uint32_t intid = inject_args->first_intid;
- uint32_t num = inject_args->num;
+ u32 intid = inject_args->first_intid;
+ u32 num = inject_args->num;
int level = inject_args->level;
bool expect_failure = inject_args->expect_failure;
struct kvm_vm *vm = vcpu->vm;
- uint64_t tmp;
- uint32_t i;
+ u64 tmp;
+ u32 i;
/* handles the valid case: intid=0xffffffff num=1 */
assert(intid < UINT_MAX - num || num == 1);
@@ -714,7 +732,7 @@ static void kvm_inject_get_call(struct kvm_vm *vm, struct ucall *uc,
struct kvm_inject_args *args)
{
struct kvm_inject_args *kvm_args_hva;
- vm_vaddr_t kvm_args_gva;
+ gva_t kvm_args_gva;
kvm_args_gva = uc->args[1];
kvm_args_hva = (struct kvm_inject_args *)addr_gva2hva(vm, kvm_args_gva);
@@ -728,14 +746,14 @@ static void print_args(struct test_args *args)
args->eoi_split);
}
-static void test_vgic(uint32_t nr_irqs, bool level_sensitive, bool eoi_split)
+static void test_vgic(u32 nr_irqs, bool level_sensitive, bool eoi_split)
{
struct ucall uc;
int gic_fd;
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
struct kvm_inject_args inject_args;
- vm_vaddr_t args_gva;
+ gva_t args_gva;
struct test_args args = {
.nr_irqs = nr_irqs,
@@ -753,12 +771,11 @@ static void test_vgic(uint32_t nr_irqs, bool level_sensitive, bool eoi_split)
vcpu_init_descriptor_tables(vcpu);
/* Setup the guest args page (so it gets the args). */
- args_gva = vm_vaddr_alloc_page(vm);
+ args_gva = vm_alloc_page(vm);
memcpy(addr_gva2hva(vm, args_gva), &args, sizeof(args));
vcpu_args_set(vcpu, 1, args_gva);
gic_fd = vgic_v3_setup(vm, 1, nr_irqs);
- __TEST_REQUIRE(gic_fd >= 0, "Failed to create vgic-v3, skipping");
vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT,
guest_irq_handlers[args.eoi_split][args.level_sensitive]);
@@ -786,6 +803,221 @@ done:
kvm_vm_free(vm);
}
+static void guest_code_asym_dir(struct test_args *args, int cpuid)
+{
+ gic_init(GIC_V3, 2);
+
+ gic_set_eoi_split(1);
+ gic_set_priority_mask(CPU_PRIO_MASK);
+
+ if (cpuid == 0) {
+ u32 intid;
+
+ local_irq_disable();
+
+ gic_set_priority(MIN_PPI, IRQ_DEFAULT_PRIO);
+ gic_irq_enable(MIN_SPI);
+ gic_irq_set_pending(MIN_SPI);
+
+ intid = wait_for_and_activate_irq();
+ GUEST_ASSERT_EQ(intid, MIN_SPI);
+
+ gic_set_eoi(intid);
+ isb();
+
+ WRITE_ONCE(args->shared_data, MIN_SPI);
+ dsb(ishst);
+
+ do {
+ dsb(ishld);
+ } while (READ_ONCE(args->shared_data) == MIN_SPI);
+ GUEST_ASSERT(!gic_irq_get_active(MIN_SPI));
+ } else {
+ do {
+ dsb(ishld);
+ } while (READ_ONCE(args->shared_data) != MIN_SPI);
+
+ gic_set_dir(MIN_SPI);
+ isb();
+
+ WRITE_ONCE(args->shared_data, 0);
+ dsb(ishst);
+ }
+
+ GUEST_DONE();
+}
+
+static void guest_code_group_en(struct test_args *args, int cpuid)
+{
+ u32 intid;
+
+ gic_init(GIC_V3, 2);
+
+ gic_set_eoi_split(0);
+ gic_set_priority_mask(CPU_PRIO_MASK);
+ /* SGI0 is G0, which is disabled */
+ gic_irq_set_group(0, 0);
+
+ /* Configure all SGIs with decreasing priority */
+ for (intid = 0; intid < MIN_PPI; intid++) {
+ gic_set_priority(intid, (intid + 1) * 8);
+ gic_irq_enable(intid);
+ gic_irq_set_pending(intid);
+ }
+
+ /* Ack and EOI all G1 interrupts */
+ for (int i = 1; i < MIN_PPI; i++) {
+ intid = wait_for_and_activate_irq();
+
+ GUEST_ASSERT(intid < MIN_PPI);
+ gic_set_eoi(intid);
+ isb();
+ }
+
+ /*
+ * Check that SGI0 is still pending, inactive, and that we cannot
+ * ack anything.
+ */
+ GUEST_ASSERT(gic_irq_get_pending(0));
+ GUEST_ASSERT(!gic_irq_get_active(0));
+ GUEST_ASSERT_IAR_EMPTY();
+ GUEST_ASSERT(read_sysreg_s(SYS_ICC_IAR0_EL1) == IAR_SPURIOUS);
+
+ /* Open the G0 gates, and verify we can ack SGI0 */
+ write_sysreg_s(1, SYS_ICC_IGRPEN0_EL1);
+ isb();
+
+ do {
+ intid = read_sysreg_s(SYS_ICC_IAR0_EL1);
+ } while (intid == IAR_SPURIOUS);
+
+ GUEST_ASSERT(intid == 0);
+ GUEST_DONE();
+}
+
+static void guest_code_timer_spi(struct test_args *args, int cpuid)
+{
+ u32 intid;
+ u64 val;
+
+ gic_init(GIC_V3, 2);
+
+ gic_set_eoi_split(1);
+ gic_set_priority_mask(CPU_PRIO_MASK);
+
+ /* Add a pending SPI so that KVM starts trapping DIR */
+ gic_set_priority(MIN_SPI + cpuid, IRQ_DEFAULT_PRIO);
+ gic_irq_set_pending(MIN_SPI + cpuid);
+
+ /* Configure the timer with a higher priority, make it pending */
+ gic_set_priority(27, IRQ_DEFAULT_PRIO - 8);
+
+ isb();
+ val = read_sysreg(cntvct_el0);
+ write_sysreg(val, cntv_cval_el0);
+ write_sysreg(1, cntv_ctl_el0);
+ isb();
+
+ GUEST_ASSERT(gic_irq_get_pending(27));
+
+ /* Enable both interrupts */
+ gic_irq_enable(MIN_SPI + cpuid);
+ gic_irq_enable(27);
+
+ /* The timer must fire */
+ intid = wait_for_and_activate_irq();
+ GUEST_ASSERT(intid == 27);
+
+ /* Check that we can deassert it */
+ write_sysreg(0, cntv_ctl_el0);
+ isb();
+
+ GUEST_ASSERT(!gic_irq_get_pending(27));
+
+ /*
+ * Priority drop, deactivation -- we expect that the host
+ * deactivation will have been effective
+ */
+ gic_set_eoi(27);
+ gic_set_dir(27);
+
+ GUEST_ASSERT(!gic_irq_get_active(27));
+
+ /* Do it one more time */
+ isb();
+ val = read_sysreg(cntvct_el0);
+ write_sysreg(val, cntv_cval_el0);
+ write_sysreg(1, cntv_ctl_el0);
+ isb();
+
+ GUEST_ASSERT(gic_irq_get_pending(27));
+
+ /* The timer must fire again */
+ intid = wait_for_and_activate_irq();
+ GUEST_ASSERT(intid == 27);
+
+ GUEST_DONE();
+}
+
+static void *test_vcpu_run(void *arg)
+{
+ struct kvm_vcpu *vcpu = arg;
+ struct ucall uc;
+
+ while (1) {
+ vcpu_run(vcpu);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_DONE:
+ return NULL;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+ }
+
+ return NULL;
+}
+
+static void test_vgic_two_cpus(void *gcode)
+{
+ pthread_t thr[2];
+ struct kvm_vcpu *vcpus[2];
+ struct test_args args = {};
+ struct kvm_vm *vm;
+ gva_t args_gva;
+ int gic_fd, ret;
+
+ vm = vm_create_with_vcpus(2, gcode, vcpus);
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vcpus[0]);
+ vcpu_init_descriptor_tables(vcpus[1]);
+
+ /* Setup the guest args page (so it gets the args). */
+ args_gva = vm_alloc_page(vm);
+ memcpy(addr_gva2hva(vm, args_gva), &args, sizeof(args));
+ vcpu_args_set(vcpus[0], 2, args_gva, 0);
+ vcpu_args_set(vcpus[1], 2, args_gva, 1);
+
+ gic_fd = vgic_v3_setup(vm, 2, 64);
+
+ ret = pthread_create(&thr[0], NULL, test_vcpu_run, vcpus[0]);
+ if (ret)
+ TEST_FAIL("Can't create thread for vcpu 0 (%d)\n", ret);
+ ret = pthread_create(&thr[1], NULL, test_vcpu_run, vcpus[1]);
+ if (ret)
+ TEST_FAIL("Can't create thread for vcpu 1 (%d)\n", ret);
+
+ pthread_join(thr[0], NULL);
+ pthread_join(thr[1], NULL);
+
+ close(gic_fd);
+ kvm_vm_free(vm);
+}
+
static void help(const char *name)
{
printf(
@@ -802,12 +1034,15 @@ static void help(const char *name)
int main(int argc, char **argv)
{
- uint32_t nr_irqs = 64;
+ u32 nr_irqs = 64;
bool default_args = true;
bool level_sensitive = false;
int opt;
bool eoi_split = false;
+ TEST_REQUIRE(kvm_supports_vgic_v3());
+ test_disable_default_vgic();
+
while ((opt = getopt(argc, argv, "hn:e:l:")) != -1) {
switch (opt) {
case 'n':
@@ -839,6 +1074,9 @@ int main(int argc, char **argv)
test_vgic(nr_irqs, false /* level */, true /* eoi_split */);
test_vgic(nr_irqs, true /* level */, false /* eoi_split */);
test_vgic(nr_irqs, true /* level */, true /* eoi_split */);
+ test_vgic_two_cpus(guest_code_asym_dir);
+ test_vgic_two_cpus(guest_code_group_en);
+ test_vgic_two_cpus(guest_code_timer_spi);
} else {
test_vgic(nr_irqs, level_sensitive, eoi_split);
}
diff --git a/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c b/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c
index fc4fe52fb6f8..d64d434d3f06 100644
--- a/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c
+++ b/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c
@@ -23,11 +23,11 @@
#define GIC_LPI_OFFSET 8192
static size_t nr_iterations = 1000;
-static vm_paddr_t gpa_base;
+static gpa_t gpa_base;
static struct kvm_vm *vm;
static struct kvm_vcpu **vcpus;
-static int gic_fd, its_fd;
+static int its_fd;
static struct test_data {
bool request_vcpus_stop;
@@ -35,14 +35,14 @@ static struct test_data {
u32 nr_devices;
u32 nr_event_ids;
- vm_paddr_t device_table;
- vm_paddr_t collection_table;
- vm_paddr_t cmdq_base;
+ gpa_t device_table;
+ gpa_t collection_table;
+ gpa_t cmdq_base;
void *cmdq_base_va;
- vm_paddr_t itt_tables;
+ gpa_t itt_tables;
- vm_paddr_t lpi_prop_table;
- vm_paddr_t lpi_pend_tables;
+ gpa_t lpi_prop_table;
+ gpa_t lpi_pend_tables;
} test_data = {
.nr_cpus = 1,
.nr_devices = 1,
@@ -73,7 +73,7 @@ static void guest_setup_its_mappings(void)
/* Round-robin the LPIs to all of the vCPUs in the VM */
coll_id = 0;
for (device_id = 0; device_id < nr_devices; device_id++) {
- vm_paddr_t itt_base = test_data.itt_tables + (device_id * SZ_64K);
+ gpa_t itt_base = test_data.itt_tables + (device_id * SZ_64K);
its_send_mapd_cmd(test_data.cmdq_base_va, device_id,
itt_base, SZ_64K, true);
@@ -118,11 +118,16 @@ static void guest_setup_gic(void)
guest_setup_its_mappings();
guest_invalidate_all_rdists();
+
+ /* SYNC to ensure ITS setup is complete */
+ for (cpuid = 0; cpuid < test_data.nr_cpus; cpuid++)
+ its_send_sync_cmd(test_data.cmdq_base_va, cpuid);
}
static void guest_code(size_t nr_lpis)
{
guest_setup_gic();
+ local_irq_enable();
GUEST_SYNC(0);
@@ -183,7 +188,7 @@ static void setup_test_data(void)
size_t pages_per_64k = vm_calc_num_guest_pages(vm->mode, SZ_64K);
u32 nr_devices = test_data.nr_devices;
u32 nr_cpus = test_data.nr_cpus;
- vm_paddr_t cmdq_base;
+ gpa_t cmdq_base;
test_data.device_table = vm_phy_pages_alloc(vm, pages_per_64k,
gpa_base,
@@ -214,15 +219,12 @@ static void setup_test_data(void)
static void setup_gic(void)
{
- gic_fd = vgic_v3_setup(vm, test_data.nr_cpus, 64);
- __TEST_REQUIRE(gic_fd >= 0, "Failed to create GICv3");
-
its_fd = vgic_its_setup(vm);
}
static void signal_lpi(u32 device_id, u32 event_id)
{
- vm_paddr_t db_addr = GITS_BASE_GPA + GITS_TRANSLATER;
+ gpa_t db_addr = GITS_BASE_GPA + GITS_TRANSLATER;
struct kvm_msi msi = {
.address_lo = db_addr,
@@ -334,7 +336,7 @@ static void setup_vm(void)
{
int i;
- vcpus = malloc(test_data.nr_cpus * sizeof(struct kvm_vcpu));
+ vcpus = malloc(test_data.nr_cpus * sizeof(struct kvm_vcpu *));
TEST_ASSERT(vcpus, "Failed to allocate vCPU array");
vm = vm_create_with_vcpus(test_data.nr_cpus, guest_code, vcpus);
@@ -355,7 +357,6 @@ static void setup_vm(void)
static void destroy_vm(void)
{
close(its_fd);
- close(gic_fd);
kvm_vm_free(vm);
free(vcpus);
}
@@ -374,6 +375,8 @@ int main(int argc, char **argv)
u32 nr_threads;
int c;
+ TEST_REQUIRE(kvm_supports_vgic_v3());
+
while ((c = getopt(argc, argv, "hv:d:e:i:")) != -1) {
switch (c) {
case 'v':
diff --git a/tools/testing/selftests/kvm/arm64/vgic_v5.c b/tools/testing/selftests/kvm/arm64/vgic_v5.c
new file mode 100644
index 000000000000..d785b660d847
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/vgic_v5.c
@@ -0,0 +1,228 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/kernel.h>
+#include <sys/syscall.h>
+#include <asm/kvm.h>
+#include <asm/kvm_para.h>
+
+#include <arm64/gic_v5.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vgic.h"
+
+#define NR_VCPUS 1
+
+struct vm_gic {
+ struct kvm_vm *vm;
+ int gic_fd;
+ u32 gic_dev_type;
+};
+
+static u64 max_phys_size;
+
+#define GUEST_CMD_IRQ_CDIA 10
+#define GUEST_CMD_IRQ_DIEOI 11
+#define GUEST_CMD_IS_AWAKE 12
+#define GUEST_CMD_IS_READY 13
+
+static void guest_irq_handler(struct ex_regs *regs)
+{
+ bool valid;
+ u32 hwirq;
+ u64 ia;
+ static int count;
+
+ /*
+ * We have pending interrupts. Should never actually enter WFI
+ * here!
+ */
+ wfi();
+ GUEST_SYNC(GUEST_CMD_IS_AWAKE);
+
+ ia = gicr_insn(CDIA);
+ valid = GICV5_GICR_CDIA_VALID(ia);
+
+ GUEST_SYNC(GUEST_CMD_IRQ_CDIA);
+
+ if (!valid)
+ return;
+
+ gsb_ack();
+ isb();
+
+ hwirq = FIELD_GET(GICV5_GICR_CDIA_INTID, ia);
+
+ gic_insn(hwirq, CDDI);
+ gic_insn(0, CDEOI);
+
+ GUEST_SYNC(GUEST_CMD_IRQ_DIEOI);
+
+ if (++count >= 2)
+ GUEST_DONE();
+
+ /* Ask for the next interrupt to be injected */
+ GUEST_SYNC(GUEST_CMD_IS_READY);
+}
+
+static void guest_code(void)
+{
+ local_irq_disable();
+
+ gicv5_cpu_enable_interrupts();
+ local_irq_enable();
+
+ /* Enable the SW_PPI (3) */
+ write_sysreg_s(BIT_ULL(3), SYS_ICC_PPI_ENABLER0_EL1);
+
+ /* Ask for the first interrupt to be injected */
+ GUEST_SYNC(GUEST_CMD_IS_READY);
+
+ /* Loop forever waiting for interrupts */
+ while (1);
+}
+
+
+/* we don't want to assert on run execution, hence that helper */
+static int run_vcpu(struct kvm_vcpu *vcpu)
+{
+ return __vcpu_run(vcpu) ? -errno : 0;
+}
+
+static void vm_gic_destroy(struct vm_gic *v)
+{
+ close(v->gic_fd);
+ kvm_vm_free(v->vm);
+}
+
+static void test_vgic_v5_ppis(u32 gic_dev_type)
+{
+ struct kvm_vcpu *vcpus[NR_VCPUS];
+ struct ucall uc;
+ u64 user_ppis[2];
+ struct vm_gic v;
+ int ret, i;
+
+ v.gic_dev_type = gic_dev_type;
+ v.vm = __vm_create(VM_SHAPE_DEFAULT, NR_VCPUS, 0);
+
+ v.gic_fd = kvm_create_device(v.vm, gic_dev_type);
+
+ for (i = 0; i < NR_VCPUS; i++)
+ vcpus[i] = vm_vcpu_add(v.vm, i, guest_code);
+
+ vm_init_descriptor_tables(v.vm);
+ vm_install_exception_handler(v.vm, VECTOR_IRQ_CURRENT, guest_irq_handler);
+
+ for (i = 0; i < NR_VCPUS; i++)
+ vcpu_init_descriptor_tables(vcpus[i]);
+
+ kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+ KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
+
+ /* Read out the PPIs that user space is allowed to drive. */
+ kvm_device_attr_get(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+ KVM_DEV_ARM_VGIC_USERSPACE_PPIS, &user_ppis);
+
+ /* We should always be able to drive the SW_PPI. */
+ TEST_ASSERT(user_ppis[0] & BIT(GICV5_ARCH_PPI_SW_PPI),
+ "SW_PPI is not drivable by userspace");
+
+ while (1) {
+ ret = run_vcpu(vcpus[0]);
+
+ switch (get_ucall(vcpus[0], &uc)) {
+ case UCALL_SYNC:
+ /*
+ * The guest is ready for the next level change. Set
+ * high if ready, and lower if it has been consumed.
+ */
+ if (uc.args[1] == GUEST_CMD_IS_READY ||
+ uc.args[1] == GUEST_CMD_IRQ_DIEOI) {
+ u64 irq;
+ bool level = uc.args[1] == GUEST_CMD_IRQ_DIEOI ? 0 : 1;
+
+ irq = FIELD_PREP(KVM_ARM_IRQ_NUM_MASK, 3);
+ irq |= KVM_ARM_IRQ_TYPE_PPI << KVM_ARM_IRQ_TYPE_SHIFT;
+
+ _kvm_irq_line(v.vm, irq, level);
+ } else if (uc.args[1] == GUEST_CMD_IS_AWAKE) {
+ pr_info("Guest skipping WFI due to pending IRQ\n");
+ } else if (uc.args[1] == GUEST_CMD_IRQ_CDIA) {
+ pr_info("Guest acknowledged IRQ\n");
+ }
+
+ continue;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+ }
+
+done:
+ TEST_ASSERT(ret == 0, "Failed to test GICv5 PPIs");
+
+ vm_gic_destroy(&v);
+}
+
+/*
+ * Returns 0 if it's possible to create GIC device of a given type (V5).
+ */
+int test_kvm_device(u32 gic_dev_type)
+{
+ struct kvm_vcpu *vcpus[NR_VCPUS];
+ struct vm_gic v;
+ int ret;
+
+ v.vm = vm_create_with_vcpus(NR_VCPUS, guest_code, vcpus);
+
+ /* try to create a non existing KVM device */
+ ret = __kvm_test_create_device(v.vm, 0);
+ TEST_ASSERT(ret && errno == ENODEV, "unsupported device");
+
+ /* trial mode */
+ ret = __kvm_test_create_device(v.vm, gic_dev_type);
+ if (ret)
+ return ret;
+ v.gic_fd = kvm_create_device(v.vm, gic_dev_type);
+
+ ret = __kvm_create_device(v.vm, gic_dev_type);
+ TEST_ASSERT(ret < 0 && errno == EEXIST, "create GIC device twice");
+
+ vm_gic_destroy(&v);
+
+ return 0;
+}
+
+void run_tests(u32 gic_dev_type)
+{
+ pr_info("Test VGICv5 PPIs\n");
+ test_vgic_v5_ppis(gic_dev_type);
+}
+
+int main(int ac, char **av)
+{
+ int ret;
+ int pa_bits;
+
+ test_disable_default_vgic();
+
+ pa_bits = vm_guest_mode_params[VM_MODE_DEFAULT].pa_bits;
+ max_phys_size = 1ULL << pa_bits;
+
+ ret = test_kvm_device(KVM_DEV_TYPE_ARM_VGIC_V5);
+ if (ret) {
+ pr_info("No GICv5 support; Not running GIC_v5 tests.\n");
+ exit(KSFT_SKIP);
+ }
+
+ pr_info("Running VGIC_V5 tests.\n");
+ run_tests(KVM_DEV_TYPE_ARM_VGIC_V5);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/arm64/vpmu_counter_access.c b/tools/testing/selftests/kvm/arm64/vpmu_counter_access.c
index f16b3b27e32e..22223395969e 100644
--- a/tools/testing/selftests/kvm/arm64/vpmu_counter_access.c
+++ b/tools/testing/selftests/kvm/arm64/vpmu_counter_access.c
@@ -28,31 +28,25 @@
struct vpmu_vm {
struct kvm_vm *vm;
struct kvm_vcpu *vcpu;
- int gic_fd;
};
static struct vpmu_vm vpmu_vm;
struct pmreg_sets {
- uint64_t set_reg_id;
- uint64_t clr_reg_id;
+ u64 set_reg_id;
+ u64 clr_reg_id;
};
#define PMREG_SET(set, clr) {.set_reg_id = set, .clr_reg_id = clr}
-static uint64_t get_pmcr_n(uint64_t pmcr)
+static u64 get_pmcr_n(u64 pmcr)
{
return FIELD_GET(ARMV8_PMU_PMCR_N, pmcr);
}
-static void set_pmcr_n(uint64_t *pmcr, uint64_t pmcr_n)
+static u64 get_counters_mask(u64 n)
{
- u64p_replace_bits((__u64 *) pmcr, pmcr_n, ARMV8_PMU_PMCR_N);
-}
-
-static uint64_t get_counters_mask(uint64_t n)
-{
- uint64_t mask = BIT(ARMV8_PMU_CYCLE_IDX);
+ u64 mask = BIT(ARMV8_PMU_CYCLE_IDX);
if (n)
mask |= GENMASK(n - 1, 0);
@@ -95,7 +89,7 @@ static inline void write_sel_evtyper(int sel, unsigned long val)
static void pmu_disable_reset(void)
{
- uint64_t pmcr = read_sysreg(pmcr_el0);
+ u64 pmcr = read_sysreg(pmcr_el0);
/* Reset all counters, disabling them */
pmcr &= ~ARMV8_PMU_PMCR_E;
@@ -175,7 +169,7 @@ struct pmc_accessor pmc_accessors[] = {
#define GUEST_ASSERT_BITMAP_REG(regname, mask, set_expected) \
{ \
- uint64_t _tval = read_sysreg(regname); \
+ u64 _tval = read_sysreg(regname); \
\
if (set_expected) \
__GUEST_ASSERT((_tval & mask), \
@@ -191,7 +185,7 @@ struct pmc_accessor pmc_accessors[] = {
* Check if @mask bits in {PMCNTEN,PMINTEN,PMOVS}{SET,CLR} registers
* are set or cleared as specified in @set_expected.
*/
-static void check_bitmap_pmu_regs(uint64_t mask, bool set_expected)
+static void check_bitmap_pmu_regs(u64 mask, bool set_expected)
{
GUEST_ASSERT_BITMAP_REG(pmcntenset_el0, mask, set_expected);
GUEST_ASSERT_BITMAP_REG(pmcntenclr_el0, mask, set_expected);
@@ -213,7 +207,7 @@ static void check_bitmap_pmu_regs(uint64_t mask, bool set_expected)
*/
static void test_bitmap_pmu_regs(int pmc_idx, bool set_op)
{
- uint64_t pmcr_n, test_bit = BIT(pmc_idx);
+ u64 pmcr_n, test_bit = BIT(pmc_idx);
bool set_expected = false;
if (set_op) {
@@ -238,7 +232,7 @@ static void test_bitmap_pmu_regs(int pmc_idx, bool set_op)
*/
static void test_access_pmc_regs(struct pmc_accessor *acc, int pmc_idx)
{
- uint64_t write_data, read_data;
+ u64 write_data, read_data;
/* Disable all PMCs and reset all PMCs to zero. */
pmu_disable_reset();
@@ -293,11 +287,11 @@ static void test_access_pmc_regs(struct pmc_accessor *acc, int pmc_idx)
}
#define INVALID_EC (-1ul)
-uint64_t expected_ec = INVALID_EC;
+u64 expected_ec = INVALID_EC;
static void guest_sync_handler(struct ex_regs *regs)
{
- uint64_t esr, ec;
+ u64 esr, ec;
esr = read_sysreg(esr_el1);
ec = ESR_ELx_EC(esr);
@@ -357,9 +351,9 @@ static void test_access_invalid_pmc_regs(struct pmc_accessor *acc, int pmc_idx)
* if reading/writing PMU registers for implemented or unimplemented
* counters works as expected.
*/
-static void guest_code(uint64_t expected_pmcr_n)
+static void guest_code(u64 expected_pmcr_n)
{
- uint64_t pmcr, pmcr_n, unimp_mask;
+ u64 pmcr, pmcr_n, unimp_mask;
int i, pmc;
__GUEST_ASSERT(expected_pmcr_n <= ARMV8_PMU_MAX_GENERAL_COUNTERS,
@@ -408,16 +402,12 @@ static void guest_code(uint64_t expected_pmcr_n)
static void create_vpmu_vm(void *guest_code)
{
struct kvm_vcpu_init init;
- uint8_t pmuver, ec;
- uint64_t dfr0, irq = 23;
+ u8 pmuver, ec;
+ u64 dfr0, irq = 23;
struct kvm_device_attr irq_attr = {
.group = KVM_ARM_VCPU_PMU_V3_CTRL,
.attr = KVM_ARM_VCPU_PMU_V3_IRQ,
- .addr = (uint64_t)&irq,
- };
- struct kvm_device_attr init_attr = {
- .group = KVM_ARM_VCPU_PMU_V3_CTRL,
- .attr = KVM_ARM_VCPU_PMU_V3_INIT,
+ .addr = (u64)&irq,
};
/* The test creates the vpmu_vm multiple times. Ensure a clean state */
@@ -431,33 +421,29 @@ static void create_vpmu_vm(void *guest_code)
}
/* Create vCPU with PMUv3 */
- vm_ioctl(vpmu_vm.vm, KVM_ARM_PREFERRED_TARGET, &init);
+ kvm_get_default_vcpu_target(vpmu_vm.vm, &init);
init.features[0] |= (1 << KVM_ARM_VCPU_PMU_V3);
vpmu_vm.vcpu = aarch64_vcpu_add(vpmu_vm.vm, 0, &init, guest_code);
vcpu_init_descriptor_tables(vpmu_vm.vcpu);
- vpmu_vm.gic_fd = vgic_v3_setup(vpmu_vm.vm, 1, 64);
- __TEST_REQUIRE(vpmu_vm.gic_fd >= 0,
- "Failed to create vgic-v3, skipping");
+
+ kvm_arch_vm_finalize_vcpus(vpmu_vm.vm);
/* Make sure that PMUv3 support is indicated in the ID register */
dfr0 = vcpu_get_reg(vpmu_vm.vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1));
- pmuver = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), dfr0);
+ pmuver = FIELD_GET(ID_AA64DFR0_EL1_PMUVer, dfr0);
TEST_ASSERT(pmuver != ID_AA64DFR0_EL1_PMUVer_IMP_DEF &&
pmuver >= ID_AA64DFR0_EL1_PMUVer_IMP,
"Unexpected PMUVER (0x%x) on the vCPU with PMUv3", pmuver);
- /* Initialize vPMU */
vcpu_ioctl(vpmu_vm.vcpu, KVM_SET_DEVICE_ATTR, &irq_attr);
- vcpu_ioctl(vpmu_vm.vcpu, KVM_SET_DEVICE_ATTR, &init_attr);
}
static void destroy_vpmu_vm(void)
{
- close(vpmu_vm.gic_fd);
kvm_vm_free(vpmu_vm.vm);
}
-static void run_vcpu(struct kvm_vcpu *vcpu, uint64_t pmcr_n)
+static void run_vcpu(struct kvm_vcpu *vcpu, u64 pmcr_n)
{
struct ucall uc;
@@ -475,52 +461,47 @@ static void run_vcpu(struct kvm_vcpu *vcpu, uint64_t pmcr_n)
}
}
-static void test_create_vpmu_vm_with_pmcr_n(uint64_t pmcr_n, bool expect_fail)
+static void test_create_vpmu_vm_with_nr_counters(unsigned int nr_counters, bool expect_fail)
{
struct kvm_vcpu *vcpu;
- uint64_t pmcr, pmcr_orig;
+ unsigned int prev;
+ int ret;
create_vpmu_vm(guest_code);
vcpu = vpmu_vm.vcpu;
- pmcr_orig = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0));
- pmcr = pmcr_orig;
+ prev = get_pmcr_n(vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0)));
- /*
- * Setting a larger value of PMCR.N should not modify the field, and
- * return a success.
- */
- set_pmcr_n(&pmcr, pmcr_n);
- vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0), pmcr);
- pmcr = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0));
+ ret = __vcpu_device_attr_set(vcpu, KVM_ARM_VCPU_PMU_V3_CTRL,
+ KVM_ARM_VCPU_PMU_V3_SET_NR_COUNTERS, &nr_counters);
if (expect_fail)
- TEST_ASSERT(pmcr_orig == pmcr,
- "PMCR.N modified by KVM to a larger value (PMCR: 0x%lx) for pmcr_n: 0x%lx",
- pmcr, pmcr_n);
+ TEST_ASSERT(ret && errno == EINVAL,
+ "Setting more PMU counters (%u) than available (%u) unexpectedly succeeded",
+ nr_counters, prev);
else
- TEST_ASSERT(pmcr_n == get_pmcr_n(pmcr),
- "Failed to update PMCR.N to %lu (received: %lu)",
- pmcr_n, get_pmcr_n(pmcr));
+ TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_SET_DEVICE_ATTR, ret));
+
+ vcpu_device_attr_set(vcpu, KVM_ARM_VCPU_PMU_V3_CTRL, KVM_ARM_VCPU_PMU_V3_INIT, NULL);
}
/*
* Create a guest with one vCPU, set the PMCR_EL0.N for the vCPU to @pmcr_n,
* and run the test.
*/
-static void run_access_test(uint64_t pmcr_n)
+static void run_access_test(u64 pmcr_n)
{
- uint64_t sp;
+ u64 sp;
struct kvm_vcpu *vcpu;
struct kvm_vcpu_init init;
pr_debug("Test with pmcr_n %lu\n", pmcr_n);
- test_create_vpmu_vm_with_pmcr_n(pmcr_n, false);
+ test_create_vpmu_vm_with_nr_counters(pmcr_n, false);
vcpu = vpmu_vm.vcpu;
/* Save the initial sp to restore them later to run the guest again */
- sp = vcpu_get_reg(vcpu, ARM64_CORE_REG(sp_el1));
+ sp = vcpu_get_reg(vcpu, ctxt_reg_alias(vcpu, SYS_SP_EL1));
run_vcpu(vcpu, pmcr_n);
@@ -528,12 +509,12 @@ static void run_access_test(uint64_t pmcr_n)
* Reset and re-initialize the vCPU, and run the guest code again to
* check if PMCR_EL0.N is preserved.
*/
- vm_ioctl(vpmu_vm.vm, KVM_ARM_PREFERRED_TARGET, &init);
+ kvm_get_default_vcpu_target(vpmu_vm.vm, &init);
init.features[0] |= (1 << KVM_ARM_VCPU_PMU_V3);
aarch64_vcpu_setup(vcpu, &init);
vcpu_init_descriptor_tables(vcpu);
- vcpu_set_reg(vcpu, ARM64_CORE_REG(sp_el1), sp);
- vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.pc), (uint64_t)guest_code);
+ vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_SP_EL1), sp);
+ vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.pc), (u64)guest_code);
run_vcpu(vcpu, pmcr_n);
@@ -550,14 +531,14 @@ static struct pmreg_sets validity_check_reg_sets[] = {
* Create a VM, and check if KVM handles the userspace accesses of
* the PMU register sets in @validity_check_reg_sets[] correctly.
*/
-static void run_pmregs_validity_test(uint64_t pmcr_n)
+static void run_pmregs_validity_test(u64 pmcr_n)
{
int i;
struct kvm_vcpu *vcpu;
- uint64_t set_reg_id, clr_reg_id, reg_val;
- uint64_t valid_counters_mask, max_counters_mask;
+ u64 set_reg_id, clr_reg_id, reg_val;
+ u64 valid_counters_mask, max_counters_mask;
- test_create_vpmu_vm_with_pmcr_n(pmcr_n, false);
+ test_create_vpmu_vm_with_nr_counters(pmcr_n, false);
vcpu = vpmu_vm.vcpu;
valid_counters_mask = get_counters_mask(pmcr_n);
@@ -607,11 +588,11 @@ static void run_pmregs_validity_test(uint64_t pmcr_n)
* the vCPU to @pmcr_n, which is larger than the host value.
* The attempt should fail as @pmcr_n is too big to set for the vCPU.
*/
-static void run_error_test(uint64_t pmcr_n)
+static void run_error_test(u64 pmcr_n)
{
pr_debug("Error test with pmcr_n %lu (larger than the host)\n", pmcr_n);
- test_create_vpmu_vm_with_pmcr_n(pmcr_n, true);
+ test_create_vpmu_vm_with_nr_counters(pmcr_n, true);
destroy_vpmu_vm();
}
@@ -619,9 +600,9 @@ static void run_error_test(uint64_t pmcr_n)
* Return the default number of implemented PMU event counters excluding
* the cycle counter (i.e. PMCR_EL0.N value) for the guest.
*/
-static uint64_t get_pmcr_n_limit(void)
+static u64 get_pmcr_n_limit(void)
{
- uint64_t pmcr;
+ u64 pmcr;
create_vpmu_vm(guest_code);
pmcr = vcpu_get_reg(vpmu_vm.vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0));
@@ -629,11 +610,25 @@ static uint64_t get_pmcr_n_limit(void)
return get_pmcr_n(pmcr);
}
+static bool kvm_supports_nr_counters_attr(void)
+{
+ bool supported;
+
+ create_vpmu_vm(NULL);
+ supported = !__vcpu_has_device_attr(vpmu_vm.vcpu, KVM_ARM_VCPU_PMU_V3_CTRL,
+ KVM_ARM_VCPU_PMU_V3_SET_NR_COUNTERS);
+ destroy_vpmu_vm();
+
+ return supported;
+}
+
int main(void)
{
- uint64_t i, pmcr_n;
+ u64 i, pmcr_n;
TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_PMU_V3));
+ TEST_REQUIRE(kvm_supports_vgic_v3());
+ TEST_REQUIRE(kvm_supports_nr_counters_attr());
pmcr_n = get_pmcr_n_limit();
for (i = 0; i <= pmcr_n; i++) {
diff --git a/tools/testing/selftests/kvm/coalesced_io_test.c b/tools/testing/selftests/kvm/coalesced_io_test.c
index 60cb25454899..df4ed5e3877c 100644
--- a/tools/testing/selftests/kvm/coalesced_io_test.c
+++ b/tools/testing/selftests/kvm/coalesced_io_test.c
@@ -14,16 +14,16 @@
struct kvm_coalesced_io {
struct kvm_coalesced_mmio_ring *ring;
- uint32_t ring_size;
- uint64_t mmio_gpa;
- uint64_t *mmio;
+ u32 ring_size;
+ u64 mmio_gpa;
+ u64 *mmio;
/*
* x86-only, but define pio_port for all architectures to minimize the
* amount of #ifdeffery and complexity, without having to sacrifice
* verbose error messages.
*/
- uint8_t pio_port;
+ u8 pio_port;
};
static struct kvm_coalesced_io kvm_builtin_io_ring;
@@ -70,13 +70,13 @@ static void guest_code(struct kvm_coalesced_io *io)
static void vcpu_run_and_verify_io_exit(struct kvm_vcpu *vcpu,
struct kvm_coalesced_io *io,
- uint32_t ring_start,
- uint32_t expected_exit)
+ u32 ring_start,
+ u32 expected_exit)
{
const bool want_pio = expected_exit == KVM_EXIT_IO;
struct kvm_coalesced_mmio_ring *ring = io->ring;
struct kvm_run *run = vcpu->run;
- uint32_t pio_value;
+ u32 pio_value;
WRITE_ONCE(ring->first, ring_start);
WRITE_ONCE(ring->last, ring_start);
@@ -88,13 +88,13 @@ static void vcpu_run_and_verify_io_exit(struct kvm_vcpu *vcpu,
* data_offset is garbage, e.g. an MMIO gpa.
*/
if (run->exit_reason == KVM_EXIT_IO)
- pio_value = *(uint32_t *)((void *)run + run->io.data_offset);
+ pio_value = *(u32 *)((void *)run + run->io.data_offset);
else
pio_value = 0;
TEST_ASSERT((!want_pio && (run->exit_reason == KVM_EXIT_MMIO && run->mmio.is_write &&
run->mmio.phys_addr == io->mmio_gpa && run->mmio.len == 8 &&
- *(uint64_t *)run->mmio.data == io->mmio_gpa + io->ring_size - 1)) ||
+ *(u64 *)run->mmio.data == io->mmio_gpa + io->ring_size - 1)) ||
(want_pio && (run->exit_reason == KVM_EXIT_IO && run->io.port == io->pio_port &&
run->io.direction == KVM_EXIT_IO_OUT && run->io.count == 1 &&
pio_value == io->pio_port + io->ring_size - 1)),
@@ -105,14 +105,14 @@ static void vcpu_run_and_verify_io_exit(struct kvm_vcpu *vcpu,
want_pio ? (unsigned long long)io->pio_port : io->mmio_gpa,
(want_pio ? io->pio_port : io->mmio_gpa) + io->ring_size - 1, run->exit_reason,
run->exit_reason == KVM_EXIT_MMIO ? "MMIO" : run->exit_reason == KVM_EXIT_IO ? "PIO" : "other",
- run->mmio.phys_addr, run->mmio.is_write, run->mmio.len, *(uint64_t *)run->mmio.data,
+ run->mmio.phys_addr, run->mmio.is_write, run->mmio.len, *(u64 *)run->mmio.data,
run->io.port, run->io.direction, run->io.size, run->io.count, pio_value);
}
static void vcpu_run_and_verify_coalesced_io(struct kvm_vcpu *vcpu,
struct kvm_coalesced_io *io,
- uint32_t ring_start,
- uint32_t expected_exit)
+ u32 ring_start,
+ u32 expected_exit)
{
struct kvm_coalesced_mmio_ring *ring = io->ring;
int i;
@@ -124,18 +124,18 @@ static void vcpu_run_and_verify_coalesced_io(struct kvm_vcpu *vcpu,
ring->first, ring->last, io->ring_size, ring_start);
for (i = 0; i < io->ring_size - 1; i++) {
- uint32_t idx = (ring->first + i) % io->ring_size;
+ u32 idx = (ring->first + i) % io->ring_size;
struct kvm_coalesced_mmio *entry = &ring->coalesced_mmio[idx];
#ifdef __x86_64__
if (i & 1)
TEST_ASSERT(entry->phys_addr == io->pio_port &&
entry->len == 4 && entry->pio &&
- *(uint32_t *)entry->data == io->pio_port + i,
+ *(u32 *)entry->data == io->pio_port + i,
"Wanted 4-byte port I/O 0x%x = 0x%x in entry %u, got %u-byte %s 0x%llx = 0x%x",
io->pio_port, io->pio_port + i, i,
entry->len, entry->pio ? "PIO" : "MMIO",
- entry->phys_addr, *(uint32_t *)entry->data);
+ entry->phys_addr, *(u32 *)entry->data);
else
#endif
TEST_ASSERT(entry->phys_addr == io->mmio_gpa &&
@@ -143,12 +143,12 @@ static void vcpu_run_and_verify_coalesced_io(struct kvm_vcpu *vcpu,
"Wanted 8-byte MMIO to 0x%lx = %lx in entry %u, got %u-byte %s 0x%llx = 0x%lx",
io->mmio_gpa, io->mmio_gpa + i, i,
entry->len, entry->pio ? "PIO" : "MMIO",
- entry->phys_addr, *(uint64_t *)entry->data);
+ entry->phys_addr, *(u64 *)entry->data);
}
}
static void test_coalesced_io(struct kvm_vcpu *vcpu,
- struct kvm_coalesced_io *io, uint32_t ring_start)
+ struct kvm_coalesced_io *io, u32 ring_start)
{
struct kvm_coalesced_mmio_ring *ring = io->ring;
@@ -219,11 +219,11 @@ int main(int argc, char *argv[])
* the MMIO GPA identity mapped in the guest.
*/
.mmio_gpa = 4ull * SZ_1G,
- .mmio = (uint64_t *)(4ull * SZ_1G),
+ .mmio = (u64 *)(4ull * SZ_1G),
.pio_port = 0x80,
};
- virt_map(vm, (uint64_t)kvm_builtin_io_ring.mmio, kvm_builtin_io_ring.mmio_gpa, 1);
+ virt_map(vm, (u64)kvm_builtin_io_ring.mmio, kvm_builtin_io_ring.mmio_gpa, 1);
sync_global_to_guest(vm, kvm_builtin_io_ring);
vcpu_args_set(vcpu, 1, &kvm_builtin_io_ring);
diff --git a/tools/testing/selftests/kvm/config b/tools/testing/selftests/kvm/config
index 8835fed09e9f..96d874b239eb 100644
--- a/tools/testing/selftests/kvm/config
+++ b/tools/testing/selftests/kvm/config
@@ -1,5 +1,6 @@
CONFIG_KVM=y
CONFIG_KVM_INTEL=y
CONFIG_KVM_AMD=y
+CONFIG_EVENTFD=y
CONFIG_USERFAULTFD=y
CONFIG_IDLE_PAGE_TRACKING=y
diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c
index 0202b78f8680..302c4923d093 100644
--- a/tools/testing/selftests/kvm/demand_paging_test.c
+++ b/tools/testing/selftests/kvm/demand_paging_test.c
@@ -24,7 +24,7 @@
#ifdef __NR_userfaultfd
static int nr_vcpus = 1;
-static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE;
+static u64 guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE;
static size_t demand_paging_size;
static char *guest_data_prototype;
@@ -58,7 +58,7 @@ static int handle_uffd_page_request(int uffd_mode, int uffd,
struct uffd_msg *msg)
{
pid_t tid = syscall(__NR_gettid);
- uint64_t addr = msg->arg.pagefault.address;
+ u64 addr = msg->arg.pagefault.address;
struct timespec start;
struct timespec ts_diff;
int r;
@@ -68,7 +68,7 @@ static int handle_uffd_page_request(int uffd_mode, int uffd,
if (uffd_mode == UFFDIO_REGISTER_MODE_MISSING) {
struct uffdio_copy copy;
- copy.src = (uint64_t)guest_data_prototype;
+ copy.src = (u64)guest_data_prototype;
copy.dst = addr;
copy.len = demand_paging_size;
copy.mode = 0;
@@ -138,7 +138,7 @@ struct test_params {
bool partition_vcpu_memory_access;
};
-static void prefault_mem(void *alias, uint64_t len)
+static void prefault_mem(void *alias, u64 len)
{
size_t p;
@@ -154,7 +154,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
struct memstress_vcpu_args *vcpu_args;
struct test_params *p = arg;
struct uffd_desc **uffd_descs = NULL;
- uint64_t uffd_region_size;
+ u64 uffd_region_size;
struct timespec start;
struct timespec ts_diff;
double vcpu_paging_rate;
diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c
index e79817bd0e29..ef779fa91827 100644
--- a/tools/testing/selftests/kvm/dirty_log_perf_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c
@@ -20,43 +20,11 @@
#include "guest_modes.h"
#include "ucall_common.h"
-#ifdef __aarch64__
-#include "arm64/vgic.h"
-
-static int gic_fd;
-
-static void arch_setup_vm(struct kvm_vm *vm, unsigned int nr_vcpus)
-{
- /*
- * The test can still run even if hardware does not support GICv3, as it
- * is only an optimization to reduce guest exits.
- */
- gic_fd = vgic_v3_setup(vm, nr_vcpus, 64);
-}
-
-static void arch_cleanup_vm(struct kvm_vm *vm)
-{
- if (gic_fd > 0)
- close(gic_fd);
-}
-
-#else /* __aarch64__ */
-
-static void arch_setup_vm(struct kvm_vm *vm, unsigned int nr_vcpus)
-{
-}
-
-static void arch_cleanup_vm(struct kvm_vm *vm)
-{
-}
-
-#endif
-
/* How many host loops to run by default (one KVM_GET_DIRTY_LOG for each loop)*/
#define TEST_HOST_LOOP_N 2UL
static int nr_vcpus = 1;
-static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE;
+static u64 guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE;
static bool run_vcpus_while_disabling_dirty_logging;
/* Host variables */
@@ -69,7 +37,7 @@ static void vcpu_worker(struct memstress_vcpu_args *vcpu_args)
{
struct kvm_vcpu *vcpu = vcpu_args->vcpu;
int vcpu_idx = vcpu_args->vcpu_idx;
- uint64_t pages_count = 0;
+ u64 pages_count = 0;
struct kvm_run *run;
struct timespec start;
struct timespec ts_diff;
@@ -125,11 +93,11 @@ static void vcpu_worker(struct memstress_vcpu_args *vcpu_args)
struct test_params {
unsigned long iterations;
- uint64_t phys_offset;
+ u64 phys_offset;
bool partition_vcpu_memory_access;
enum vm_mem_backing_src_type backing_src;
int slots;
- uint32_t write_percent;
+ u32 write_percent;
bool random_access;
};
@@ -138,9 +106,9 @@ static void run_test(enum vm_guest_mode mode, void *arg)
struct test_params *p = arg;
struct kvm_vm *vm;
unsigned long **bitmaps;
- uint64_t guest_num_pages;
- uint64_t host_num_pages;
- uint64_t pages_per_slot;
+ u64 guest_num_pages;
+ u64 host_num_pages;
+ u64 pages_per_slot;
struct timespec start;
struct timespec ts_diff;
struct timespec get_dirty_log_total = (struct timespec){0};
@@ -166,8 +134,6 @@ static void run_test(enum vm_guest_mode mode, void *arg)
vm_enable_cap(vm, KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2,
dirty_log_manual_caps);
- arch_setup_vm(vm, nr_vcpus);
-
/* Start the iterations */
iteration = 0;
host_quit = false;
@@ -285,7 +251,6 @@ static void run_test(enum vm_guest_mode mode, void *arg)
}
memstress_free_bitmaps(bitmaps, p->slots);
- arch_cleanup_vm(vm);
memstress_destroy_vm(vm);
}
diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c
index aacf80f57439..12446a4b6e8d 100644
--- a/tools/testing/selftests/kvm/dirty_log_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_test.c
@@ -31,15 +31,18 @@
/* Default guest test virtual memory offset */
#define DEFAULT_GUEST_TEST_MEM 0xc0000000
-/* How many pages to dirty for each guest loop */
-#define TEST_PAGES_PER_LOOP 1024
-
/* How many host loops to run (one KVM_GET_DIRTY_LOG for each loop) */
#define TEST_HOST_LOOP_N 32UL
/* Interval for each host loop (ms) */
#define TEST_HOST_LOOP_INTERVAL 10UL
+/*
+ * Ensure the vCPU is able to perform a reasonable number of writes in each
+ * iteration to provide a lower bound on coverage.
+ */
+#define TEST_MIN_WRITES_PER_ITERATION 0x100
+
/* Dirty bitmaps are always little endian, so we need to swap on big endian */
#if defined(__s390x__)
# define BITOP_LE_SWIZZLE ((BITS_PER_LONG-1) & ~0x7)
@@ -71,23 +74,25 @@
* the host. READ/WRITE_ONCE() should also be used with anything
* that may change.
*/
-static uint64_t host_page_size;
-static uint64_t guest_page_size;
-static uint64_t guest_num_pages;
-static uint64_t iteration;
+static u64 host_page_size;
+static u64 guest_page_size;
+static u64 guest_num_pages;
+static u64 iteration;
+static u64 nr_writes;
+static bool vcpu_stop;
/*
* Guest physical memory offset of the testing memory slot.
* This will be set to the topmost valid physical address minus
* the test memory size.
*/
-static uint64_t guest_test_phys_mem;
+static u64 guest_test_phys_mem;
/*
* Guest virtual memory offset of the testing memory slot.
* Must not conflict with identity mapped test code.
*/
-static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM;
+static u64 guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM;
/*
* Continuously write to the first 8 bytes of a random pages within
@@ -95,8 +100,10 @@ static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM;
*/
static void guest_code(void)
{
- uint64_t addr;
- int i;
+ u64 addr;
+
+#ifdef __s390x__
+ u64 i;
/*
* On s390x, all pages of a 1M segment are initially marked as dirty
@@ -106,17 +113,20 @@ static void guest_code(void)
*/
for (i = 0; i < guest_num_pages; i++) {
addr = guest_test_virt_mem + i * guest_page_size;
- vcpu_arch_put_guest(*(uint64_t *)addr, READ_ONCE(iteration));
+ vcpu_arch_put_guest(*(u64 *)addr, READ_ONCE(iteration));
+ nr_writes++;
}
+#endif
while (true) {
- for (i = 0; i < TEST_PAGES_PER_LOOP; i++) {
+ while (!READ_ONCE(vcpu_stop)) {
addr = guest_test_virt_mem;
addr += (guest_random_u64(&guest_rng) % guest_num_pages)
* guest_page_size;
addr = align_down(addr, host_page_size);
- vcpu_arch_put_guest(*(uint64_t *)addr, READ_ONCE(iteration));
+ vcpu_arch_put_guest(*(u64 *)addr, READ_ONCE(iteration));
+ nr_writes++;
}
GUEST_SYNC(1);
@@ -128,30 +138,23 @@ static bool host_quit;
/* Points to the test VM memory region on which we track dirty logs */
static void *host_test_mem;
-static uint64_t host_num_pages;
+static u64 host_num_pages;
/* For statistics only */
-static uint64_t host_dirty_count;
-static uint64_t host_clear_count;
-static uint64_t host_track_next_count;
+static u64 host_dirty_count;
+static u64 host_clear_count;
/* Whether dirty ring reset is requested, or finished */
static sem_t sem_vcpu_stop;
static sem_t sem_vcpu_cont;
-/*
- * This is only set by main thread, and only cleared by vcpu thread. It is
- * used to request vcpu thread to stop at the next GUEST_SYNC, since GUEST_SYNC
- * is the only place that we'll guarantee both "dirty bit" and "dirty data"
- * will match. E.g., SIG_IPI won't guarantee that if the vcpu is interrupted
- * after setting dirty bit but before the data is written.
- */
-static atomic_t vcpu_sync_stop_requested;
+
/*
* This is updated by the vcpu thread to tell the host whether it's a
* ring-full event. It should only be read until a sem_wait() of
* sem_vcpu_stop and before vcpu continues to run.
*/
static bool dirty_ring_vcpu_ring_full;
+
/*
* This is only used for verifying the dirty pages. Dirty ring has a very
* tricky case when the ring just got full, kvm will do userspace exit due to
@@ -166,7 +169,51 @@ static bool dirty_ring_vcpu_ring_full;
* dirty gfn we've collected, so that if a mismatch of data found later in the
* verifying process, we let it pass.
*/
-static uint64_t dirty_ring_last_page;
+static u64 dirty_ring_last_page = -1ULL;
+
+/*
+ * In addition to the above, it is possible (especially if this
+ * test is run nested) for the above scenario to repeat multiple times:
+ *
+ * The following can happen:
+ *
+ * - L1 vCPU: Memory write is logged to PML but not committed.
+ *
+ * - L1 test thread: Ignores the write because its last dirty ring entry
+ * Resets the dirty ring which:
+ * - Resets the A/D bits in EPT
+ * - Issues tlb flush (invept), which is intercepted by L0
+ *
+ * - L0: frees the whole nested ept mmu root as the response to invept,
+ * and thus ensures that when memory write is retried, it will fault again
+ *
+ * - L1 vCPU: Same memory write is logged to the PML but not committed again.
+ *
+ * - L1 test thread: Ignores the write because its last dirty ring entry (again)
+ * Resets the dirty ring which:
+ * - Resets the A/D bits in EPT (again)
+ * - Issues tlb flush (again) which is intercepted by L0
+ *
+ * ...
+ *
+ * N times
+ *
+ * - L1 vCPU: Memory write is logged in the PML and then committed.
+ * Lots of other memory writes are logged and committed.
+ * ...
+ *
+ * - L1 test thread: Sees the memory write along with other memory writes
+ * in the dirty ring, and since the write is usually not
+ * the last entry in the dirty-ring and has a very outdated
+ * iteration, the test fails.
+ *
+ *
+ * Note that this is only possible when the write was the last log entry
+ * write during iteration N-1, thus remember last iteration last log entry
+ * and also don't fail when it is reported in the next iteration, together with
+ * an outdated iteration count.
+ */
+static u64 dirty_ring_prev_iteration_last_page;
enum log_mode_t {
/* Only use KVM_GET_DIRTY_LOG for logging */
@@ -189,25 +236,7 @@ static enum log_mode_t host_log_mode_option = LOG_MODE_ALL;
/* Logging mode for current run */
static enum log_mode_t host_log_mode;
static pthread_t vcpu_thread;
-static uint32_t test_dirty_ring_count = TEST_DIRTY_RING_COUNT;
-
-static void vcpu_kick(void)
-{
- pthread_kill(vcpu_thread, SIG_IPI);
-}
-
-/*
- * In our test we do signal tricks, let's use a better version of
- * sem_wait to avoid signal interrupts
- */
-static void sem_wait_until(sem_t *sem)
-{
- int ret;
-
- do
- ret = sem_wait(sem);
- while (ret == -1 && errno == EINTR);
-}
+static u32 test_dirty_ring_count = TEST_DIRTY_RING_COUNT;
static bool clear_log_supported(void)
{
@@ -226,15 +255,15 @@ static void clear_log_create_vm_done(struct kvm_vm *vm)
}
static void dirty_log_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot,
- void *bitmap, uint32_t num_pages,
- uint32_t *unused)
+ void *bitmap, u32 num_pages,
+ u32 *unused)
{
kvm_vm_get_dirty_log(vcpu->vm, slot, bitmap);
}
static void clear_log_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot,
- void *bitmap, uint32_t num_pages,
- uint32_t *unused)
+ void *bitmap, u32 num_pages,
+ u32 *unused)
{
kvm_vm_get_dirty_log(vcpu->vm, slot, bitmap);
kvm_vm_clear_dirty_log(vcpu->vm, slot, bitmap, 0, num_pages);
@@ -243,21 +272,16 @@ static void clear_log_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot,
/* Should only be called after a GUEST_SYNC */
static void vcpu_handle_sync_stop(void)
{
- if (atomic_read(&vcpu_sync_stop_requested)) {
- /* It means main thread is sleeping waiting */
- atomic_set(&vcpu_sync_stop_requested, false);
+ if (READ_ONCE(vcpu_stop)) {
sem_post(&sem_vcpu_stop);
- sem_wait_until(&sem_vcpu_cont);
+ sem_wait(&sem_vcpu_cont);
}
}
-static void default_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err)
+static void default_after_vcpu_run(struct kvm_vcpu *vcpu)
{
struct kvm_run *run = vcpu->run;
- TEST_ASSERT(ret == 0 || (ret == -1 && err == EINTR),
- "vcpu run failed: errno=%d", err);
-
TEST_ASSERT(get_ucall(vcpu, NULL) == UCALL_SYNC,
"Invalid guest sync status: exit_reason=%s",
exit_reason_str(run->exit_reason));
@@ -273,8 +297,8 @@ static bool dirty_ring_supported(void)
static void dirty_ring_create_vm_done(struct kvm_vm *vm)
{
- uint64_t pages;
- uint32_t limit;
+ u64 pages;
+ u32 limit;
/*
* We rely on vcpu exit due to full dirty ring state. Adjust
@@ -309,12 +333,12 @@ static inline void dirty_gfn_set_collected(struct kvm_dirty_gfn *gfn)
smp_store_release(&gfn->flags, KVM_DIRTY_GFN_F_RESET);
}
-static uint32_t dirty_ring_collect_one(struct kvm_dirty_gfn *dirty_gfns,
- int slot, void *bitmap,
- uint32_t num_pages, uint32_t *fetch_index)
+static u32 dirty_ring_collect_one(struct kvm_dirty_gfn *dirty_gfns,
+ int slot, void *bitmap,
+ u32 num_pages, u32 *fetch_index)
{
struct kvm_dirty_gfn *cur;
- uint32_t count = 0;
+ u32 count = 0;
while (true) {
cur = &dirty_gfns[*fetch_index % test_dirty_ring_count];
@@ -324,7 +348,6 @@ static uint32_t dirty_ring_collect_one(struct kvm_dirty_gfn *dirty_gfns,
"%u != %u", cur->slot, slot);
TEST_ASSERT(cur->offset < num_pages, "Offset overflow: "
"0x%llx >= 0x%x", cur->offset, num_pages);
- //pr_info("fetch 0x%x page %llu\n", *fetch_index, cur->offset);
__set_bit_le(cur->offset, bitmap);
dirty_ring_last_page = cur->offset;
dirty_gfn_set_collected(cur);
@@ -335,36 +358,11 @@ static uint32_t dirty_ring_collect_one(struct kvm_dirty_gfn *dirty_gfns,
return count;
}
-static void dirty_ring_wait_vcpu(void)
-{
- /* This makes sure that hardware PML cache flushed */
- vcpu_kick();
- sem_wait_until(&sem_vcpu_stop);
-}
-
-static void dirty_ring_continue_vcpu(void)
-{
- pr_info("Notifying vcpu to continue\n");
- sem_post(&sem_vcpu_cont);
-}
-
static void dirty_ring_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot,
- void *bitmap, uint32_t num_pages,
- uint32_t *ring_buf_idx)
+ void *bitmap, u32 num_pages,
+ u32 *ring_buf_idx)
{
- uint32_t count = 0, cleared;
- bool continued_vcpu = false;
-
- dirty_ring_wait_vcpu();
-
- if (!dirty_ring_vcpu_ring_full) {
- /*
- * This is not a ring-full event, it's safe to allow
- * vcpu to continue
- */
- dirty_ring_continue_vcpu();
- continued_vcpu = true;
- }
+ u32 count, cleared;
/* Only have one vcpu */
count = dirty_ring_collect_one(vcpu_map_dirty_ring(vcpu),
@@ -379,35 +377,18 @@ static void dirty_ring_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot,
*/
TEST_ASSERT(cleared == count, "Reset dirty pages (%u) mismatch "
"with collected (%u)", cleared, count);
-
- if (!continued_vcpu) {
- TEST_ASSERT(dirty_ring_vcpu_ring_full,
- "Didn't continue vcpu even without ring full");
- dirty_ring_continue_vcpu();
- }
-
- pr_info("Iteration %ld collected %u pages\n", iteration, count);
}
-static void dirty_ring_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err)
+static void dirty_ring_after_vcpu_run(struct kvm_vcpu *vcpu)
{
struct kvm_run *run = vcpu->run;
/* A ucall-sync or ring-full event is allowed */
if (get_ucall(vcpu, NULL) == UCALL_SYNC) {
- /* We should allow this to continue */
- ;
- } else if (run->exit_reason == KVM_EXIT_DIRTY_RING_FULL ||
- (ret == -1 && err == EINTR)) {
- /* Update the flag first before pause */
- WRITE_ONCE(dirty_ring_vcpu_ring_full,
- run->exit_reason == KVM_EXIT_DIRTY_RING_FULL);
- sem_post(&sem_vcpu_stop);
- pr_info("vcpu stops because %s...\n",
- dirty_ring_vcpu_ring_full ?
- "dirty ring is full" : "vcpu is kicked out");
- sem_wait_until(&sem_vcpu_cont);
- pr_info("vcpu continues now.\n");
+ vcpu_handle_sync_stop();
+ } else if (run->exit_reason == KVM_EXIT_DIRTY_RING_FULL) {
+ WRITE_ONCE(dirty_ring_vcpu_ring_full, true);
+ vcpu_handle_sync_stop();
} else {
TEST_ASSERT(false, "Invalid guest sync status: "
"exit_reason=%s",
@@ -423,10 +404,10 @@ struct log_mode {
void (*create_vm_done)(struct kvm_vm *vm);
/* Hook to collect the dirty pages into the bitmap provided */
void (*collect_dirty_pages) (struct kvm_vcpu *vcpu, int slot,
- void *bitmap, uint32_t num_pages,
- uint32_t *ring_buf_idx);
+ void *bitmap, u32 num_pages,
+ u32 *ring_buf_idx);
/* Hook to call when after each vcpu run */
- void (*after_vcpu_run)(struct kvm_vcpu *vcpu, int ret, int err);
+ void (*after_vcpu_run)(struct kvm_vcpu *vcpu);
} log_modes[LOG_MODE_NUM] = {
{
.name = "dirty-log",
@@ -449,15 +430,6 @@ struct log_mode {
},
};
-/*
- * We use this bitmap to track some pages that should have its dirty
- * bit set in the _next_ iteration. For example, if we detected the
- * page value changed to current iteration but at the same time the
- * page bit is cleared in the latest bitmap, then the system must
- * report that write in the next get dirty log call.
- */
-static unsigned long *host_bmap_track;
-
static void log_modes_dump(void)
{
int i;
@@ -487,8 +459,8 @@ static void log_mode_create_vm_done(struct kvm_vm *vm)
}
static void log_mode_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot,
- void *bitmap, uint32_t num_pages,
- uint32_t *ring_buf_idx)
+ void *bitmap, u32 num_pages,
+ u32 *ring_buf_idx)
{
struct log_mode *mode = &log_modes[host_log_mode];
@@ -497,174 +469,113 @@ static void log_mode_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot,
mode->collect_dirty_pages(vcpu, slot, bitmap, num_pages, ring_buf_idx);
}
-static void log_mode_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err)
+static void log_mode_after_vcpu_run(struct kvm_vcpu *vcpu)
{
struct log_mode *mode = &log_modes[host_log_mode];
if (mode->after_vcpu_run)
- mode->after_vcpu_run(vcpu, ret, err);
+ mode->after_vcpu_run(vcpu);
}
static void *vcpu_worker(void *data)
{
- int ret;
struct kvm_vcpu *vcpu = data;
- uint64_t pages_count = 0;
- struct kvm_signal_mask *sigmask = alloca(offsetof(struct kvm_signal_mask, sigset)
- + sizeof(sigset_t));
- sigset_t *sigset = (sigset_t *) &sigmask->sigset;
- /*
- * SIG_IPI is unblocked atomically while in KVM_RUN. It causes the
- * ioctl to return with -EINTR, but it is still pending and we need
- * to accept it with the sigwait.
- */
- sigmask->len = 8;
- pthread_sigmask(0, NULL, sigset);
- sigdelset(sigset, SIG_IPI);
- vcpu_ioctl(vcpu, KVM_SET_SIGNAL_MASK, sigmask);
-
- sigemptyset(sigset);
- sigaddset(sigset, SIG_IPI);
+ sem_wait(&sem_vcpu_cont);
while (!READ_ONCE(host_quit)) {
- /* Clear any existing kick signals */
- pages_count += TEST_PAGES_PER_LOOP;
/* Let the guest dirty the random pages */
- ret = __vcpu_run(vcpu);
- if (ret == -1 && errno == EINTR) {
- int sig = -1;
- sigwait(sigset, &sig);
- assert(sig == SIG_IPI);
- }
- log_mode_after_vcpu_run(vcpu, ret, errno);
+ vcpu_run(vcpu);
+ log_mode_after_vcpu_run(vcpu);
}
- pr_info("Dirtied %"PRIu64" pages\n", pages_count);
-
return NULL;
}
-static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long *bmap)
+static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long **bmap)
{
- uint64_t step = vm_num_host_pages(mode, 1);
- uint64_t page;
- uint64_t *value_ptr;
- uint64_t min_iter = 0;
+ u64 page, nr_dirty_pages = 0, nr_clean_pages = 0;
+ u64 step = vm_num_host_pages(mode, 1);
for (page = 0; page < host_num_pages; page += step) {
- value_ptr = host_test_mem + page * host_page_size;
-
- /* If this is a special page that we were tracking... */
- if (__test_and_clear_bit_le(page, host_bmap_track)) {
- host_track_next_count++;
- TEST_ASSERT(test_bit_le(page, bmap),
- "Page %"PRIu64" should have its dirty bit "
- "set in this iteration but it is missing",
- page);
- }
+ u64 val = *(u64 *)(host_test_mem + page * host_page_size);
+ bool bmap0_dirty = __test_and_clear_bit_le(page, bmap[0]);
- if (__test_and_clear_bit_le(page, bmap)) {
- bool matched;
-
- host_dirty_count++;
+ /*
+ * Ensure both bitmaps are cleared, as a page can be written
+ * multiple times per iteration, i.e. can show up in both
+ * bitmaps, and the dirty ring is additive, i.e. doesn't purge
+ * bitmap entries from previous collections.
+ */
+ if (__test_and_clear_bit_le(page, bmap[1]) || bmap0_dirty) {
+ nr_dirty_pages++;
/*
- * If the bit is set, the value written onto
- * the corresponding page should be either the
- * previous iteration number or the current one.
+ * If the page is dirty, the value written to memory
+ * should be the current iteration number.
*/
- matched = (*value_ptr == iteration ||
- *value_ptr == iteration - 1);
-
- if (host_log_mode == LOG_MODE_DIRTY_RING && !matched) {
- if (*value_ptr == iteration - 2 && min_iter <= iteration - 2) {
- /*
- * Short answer: this case is special
- * only for dirty ring test where the
- * page is the last page before a kvm
- * dirty ring full in iteration N-2.
- *
- * Long answer: Assuming ring size R,
- * one possible condition is:
- *
- * main thr vcpu thr
- * -------- --------
- * iter=1
- * write 1 to page 0~(R-1)
- * full, vmexit
- * collect 0~(R-1)
- * kick vcpu
- * write 1 to (R-1)~(2R-2)
- * full, vmexit
- * iter=2
- * collect (R-1)~(2R-2)
- * kick vcpu
- * write 1 to (2R-2)
- * (NOTE!!! "1" cached in cpu reg)
- * write 2 to (2R-1)~(3R-3)
- * full, vmexit
- * iter=3
- * collect (2R-2)~(3R-3)
- * (here if we read value on page
- * "2R-2" is 1, while iter=3!!!)
- *
- * This however can only happen once per iteration.
- */
- min_iter = iteration - 1;
+ if (val == iteration)
+ continue;
+
+ if (host_log_mode == LOG_MODE_DIRTY_RING) {
+ /*
+ * The last page in the ring from previous
+ * iteration can be written with the value
+ * from the previous iteration, as the value to
+ * be written may be cached in a CPU register.
+ */
+ if (page == dirty_ring_prev_iteration_last_page &&
+ val == iteration - 1)
continue;
- } else if (page == dirty_ring_last_page) {
- /*
- * Please refer to comments in
- * dirty_ring_last_page.
- */
+
+ /*
+ * Any value from a previous iteration is legal
+ * for the last entry, as the write may not yet
+ * have retired, i.e. the page may hold whatever
+ * it had before this iteration started.
+ */
+ if (page == dirty_ring_last_page &&
+ val < iteration)
continue;
- }
+ } else if (!val && iteration == 1 && bmap0_dirty) {
+ /*
+ * When testing get+clear, the dirty bitmap
+ * starts with all bits set, and so the first
+ * iteration can observe a "dirty" page that
+ * was never written, but only in the first
+ * bitmap (collecting the bitmap also clears
+ * all dirty pages).
+ */
+ continue;
}
- TEST_ASSERT(matched,
- "Set page %"PRIu64" value %"PRIu64
- " incorrect (iteration=%"PRIu64")",
- page, *value_ptr, iteration);
+ TEST_FAIL("Dirty page %lu value (%lu) != iteration (%lu) "
+ "(last = %lu, prev_last = %lu)",
+ page, val, iteration, dirty_ring_last_page,
+ dirty_ring_prev_iteration_last_page);
} else {
- host_clear_count++;
+ nr_clean_pages++;
/*
* If cleared, the value written can be any
- * value smaller or equals to the iteration
- * number. Note that the value can be exactly
- * (iteration-1) if that write can happen
- * like this:
- *
- * (1) increase loop count to "iteration-1"
- * (2) write to page P happens (with value
- * "iteration-1")
- * (3) get dirty log for "iteration-1"; we'll
- * see that page P bit is set (dirtied),
- * and not set the bit in host_bmap_track
- * (4) increase loop count to "iteration"
- * (which is current iteration)
- * (5) get dirty log for current iteration,
- * we'll see that page P is cleared, with
- * value "iteration-1".
+ * value smaller than the iteration number.
*/
- TEST_ASSERT(*value_ptr <= iteration,
- "Clear page %"PRIu64" value %"PRIu64
- " incorrect (iteration=%"PRIu64")",
- page, *value_ptr, iteration);
- if (*value_ptr == iteration) {
- /*
- * This page is _just_ modified; it
- * should report its dirtyness in the
- * next run
- */
- __set_bit_le(page, host_bmap_track);
- }
+ TEST_ASSERT(val < iteration,
+ "Clear page %lu value (%lu) >= iteration (%lu) "
+ "(last = %lu, prev_last = %lu)",
+ page, val, iteration, dirty_ring_last_page,
+ dirty_ring_prev_iteration_last_page);
}
}
+
+ pr_info("Iteration %2ld: dirty: %-6lu clean: %-6lu writes: %-6lu\n",
+ iteration, nr_dirty_pages, nr_clean_pages, nr_writes);
+
+ host_dirty_count += nr_dirty_pages;
+ host_clear_count += nr_clean_pages;
}
static struct kvm_vm *create_vm(enum vm_guest_mode mode, struct kvm_vcpu **vcpu,
- uint64_t extra_mem_pages, void *guest_code)
+ u64 extra_mem_pages, void *guest_code)
{
struct kvm_vm *vm;
@@ -674,13 +585,14 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, struct kvm_vcpu **vcpu,
log_mode_create_vm_done(vm);
*vcpu = vm_vcpu_add(vm, 0, guest_code);
+ kvm_arch_vm_finalize_vcpus(vm);
return vm;
}
struct test_params {
unsigned long iterations;
unsigned long interval;
- uint64_t phys_offset;
+ u64 phys_offset;
};
static void run_test(enum vm_guest_mode mode, void *arg)
@@ -688,8 +600,8 @@ static void run_test(enum vm_guest_mode mode, void *arg)
struct test_params *p = arg;
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
- unsigned long *bmap;
- uint32_t ring_buf_idx = 0;
+ unsigned long *bmap[2];
+ u32 ring_buf_idx = 0;
int sem_val;
if (!log_mode_supported()) {
@@ -729,14 +641,20 @@ static void run_test(enum vm_guest_mode mode, void *arg)
}
#ifdef __s390x__
- /* Align to 1M (segment size) */
- guest_test_phys_mem = align_down(guest_test_phys_mem, 1 << 20);
+ /*
+ * The workaround in guest_code() to write all pages prior to the first
+ * iteration isn't compatible with the dirty ring, as the dirty ring
+ * support relies on the vCPU to actually stop when vcpu_stop is set so
+ * that the vCPU doesn't hang waiting for the dirty ring to be emptied.
+ */
+ TEST_ASSERT(host_log_mode != LOG_MODE_DIRTY_RING,
+ "Test needs to be updated to support s390 dirty ring");
#endif
pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem);
- bmap = bitmap_zalloc(host_num_pages);
- host_bmap_track = bitmap_zalloc(host_num_pages);
+ bmap[0] = bitmap_zalloc(host_num_pages);
+ bmap[1] = bitmap_zalloc(host_num_pages);
/* Add an extra memory slot for testing dirty logging */
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
@@ -749,7 +667,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages);
/* Cache the HVA pointer of the region */
- host_test_mem = addr_gpa2hva(vm, (vm_paddr_t)guest_test_phys_mem);
+ host_test_mem = addr_gpa2hva(vm, (gpa_t)guest_test_phys_mem);
/* Export the shared variables to the guest */
sync_global_to_guest(vm, host_page_size);
@@ -757,14 +675,9 @@ static void run_test(enum vm_guest_mode mode, void *arg)
sync_global_to_guest(vm, guest_test_virt_mem);
sync_global_to_guest(vm, guest_num_pages);
- /* Start the iterations */
- iteration = 1;
- sync_global_to_guest(vm, iteration);
- WRITE_ONCE(host_quit, false);
host_dirty_count = 0;
host_clear_count = 0;
- host_track_next_count = 0;
- WRITE_ONCE(dirty_ring_vcpu_ring_full, false);
+ WRITE_ONCE(host_quit, false);
/*
* Ensure the previous iteration didn't leave a dangling semaphore, i.e.
@@ -776,21 +689,95 @@ static void run_test(enum vm_guest_mode mode, void *arg)
sem_getvalue(&sem_vcpu_cont, &sem_val);
TEST_ASSERT_EQ(sem_val, 0);
+ TEST_ASSERT_EQ(vcpu_stop, false);
+
pthread_create(&vcpu_thread, NULL, vcpu_worker, vcpu);
- while (iteration < p->iterations) {
- /* Give the vcpu thread some time to dirty some pages */
- usleep(p->interval * 1000);
- log_mode_collect_dirty_pages(vcpu, TEST_MEM_SLOT_INDEX,
- bmap, host_num_pages,
- &ring_buf_idx);
+ for (iteration = 1; iteration <= p->iterations; iteration++) {
+ unsigned long i;
+
+ sync_global_to_guest(vm, iteration);
+
+ WRITE_ONCE(nr_writes, 0);
+ sync_global_to_guest(vm, nr_writes);
+
+ dirty_ring_prev_iteration_last_page = dirty_ring_last_page;
+ WRITE_ONCE(dirty_ring_vcpu_ring_full, false);
+
+ sem_post(&sem_vcpu_cont);
/*
- * See vcpu_sync_stop_requested definition for details on why
- * we need to stop vcpu when verify data.
+ * Let the vCPU run beyond the configured interval until it has
+ * performed the minimum number of writes. This verifies the
+ * guest is making forward progress, e.g. isn't stuck because
+ * of a KVM bug, and puts a firm floor on test coverage.
*/
- atomic_set(&vcpu_sync_stop_requested, true);
- sem_wait_until(&sem_vcpu_stop);
+ for (i = 0; i < p->interval || nr_writes < TEST_MIN_WRITES_PER_ITERATION; i++) {
+ /*
+ * Sleep in 1ms chunks to keep the interval math simple
+ * and so that the test doesn't run too far beyond the
+ * specified interval.
+ */
+ usleep(1000);
+
+ sync_global_from_guest(vm, nr_writes);
+
+ /*
+ * Reap dirty pages while the guest is running so that
+ * dirty ring full events are resolved, i.e. so that a
+ * larger interval doesn't always end up with a vCPU
+ * that's effectively blocked. Collecting while the
+ * guest is running also verifies KVM doesn't lose any
+ * state.
+ *
+ * For bitmap modes, KVM overwrites the entire bitmap,
+ * i.e. collecting the bitmaps is destructive. Collect
+ * the bitmap only on the first pass, otherwise this
+ * test would lose track of dirty pages.
+ */
+ if (i && host_log_mode != LOG_MODE_DIRTY_RING)
+ continue;
+
+ /*
+ * For the dirty ring, empty the ring on subsequent
+ * passes only if the ring was filled at least once,
+ * to verify KVM's handling of a full ring (emptying
+ * the ring on every pass would make it unlikely the
+ * vCPU would ever fill the fing).
+ */
+ if (i && !READ_ONCE(dirty_ring_vcpu_ring_full))
+ continue;
+
+ log_mode_collect_dirty_pages(vcpu, TEST_MEM_SLOT_INDEX,
+ bmap[0], host_num_pages,
+ &ring_buf_idx);
+ }
+
+ /*
+ * Stop the vCPU prior to collecting and verifying the dirty
+ * log. If the vCPU is allowed to run during collection, then
+ * pages that are written during this iteration may be missed,
+ * i.e. collected in the next iteration. And if the vCPU is
+ * writing memory during verification, pages that this thread
+ * sees as clean may be written with this iteration's value.
+ */
+ WRITE_ONCE(vcpu_stop, true);
+ sync_global_to_guest(vm, vcpu_stop);
+ sem_wait(&sem_vcpu_stop);
+
+ /*
+ * Clear vcpu_stop after the vCPU thread has acknowledge the
+ * stop request and is waiting, i.e. is definitely not running!
+ */
+ WRITE_ONCE(vcpu_stop, false);
+ sync_global_to_guest(vm, vcpu_stop);
+
+ /*
+ * Sync the number of writes performed before verification, the
+ * info will be printed along with the dirty/clean page counts.
+ */
+ sync_global_from_guest(vm, nr_writes);
+
/*
* NOTE: for dirty ring, it's possible that we didn't stop at
* GUEST_SYNC but instead we stopped because ring is full;
@@ -798,32 +785,22 @@ static void run_test(enum vm_guest_mode mode, void *arg)
* the flush of the last page, and since we handle the last
* page specially verification will succeed anyway.
*/
- assert(host_log_mode == LOG_MODE_DIRTY_RING ||
- atomic_read(&vcpu_sync_stop_requested) == false);
+ log_mode_collect_dirty_pages(vcpu, TEST_MEM_SLOT_INDEX,
+ bmap[1], host_num_pages,
+ &ring_buf_idx);
vm_dirty_log_verify(mode, bmap);
-
- /*
- * Set host_quit before sem_vcpu_cont in the final iteration to
- * ensure that the vCPU worker doesn't resume the guest. As
- * above, the dirty ring test may stop and wait even when not
- * explicitly request to do so, i.e. would hang waiting for a
- * "continue" if it's allowed to resume the guest.
- */
- if (++iteration == p->iterations)
- WRITE_ONCE(host_quit, true);
-
- sem_post(&sem_vcpu_cont);
- sync_global_to_guest(vm, iteration);
}
+ WRITE_ONCE(host_quit, true);
+ sem_post(&sem_vcpu_cont);
+
pthread_join(vcpu_thread, NULL);
- pr_info("Total bits checked: dirty (%"PRIu64"), clear (%"PRIu64"), "
- "track_next (%"PRIu64")\n", host_dirty_count, host_clear_count,
- host_track_next_count);
+ pr_info("Total bits checked: dirty (%lu), clear (%lu)\n",
+ host_dirty_count, host_clear_count);
- free(bmap);
- free(host_bmap_track);
+ free(bmap[0]);
+ free(bmap[1]);
kvm_vm_free(vm);
}
@@ -857,7 +834,6 @@ int main(int argc, char *argv[])
.interval = TEST_HOST_LOOP_INTERVAL,
};
int opt, i;
- sigset_t sigset;
sem_init(&sem_vcpu_stop, 0, 0);
sem_init(&sem_vcpu_cont, 0, 0);
@@ -908,19 +884,12 @@ int main(int argc, char *argv[])
}
}
- TEST_ASSERT(p.iterations > 2, "Iterations must be greater than two");
+ TEST_ASSERT(p.iterations > 0, "Iterations must be greater than zero");
TEST_ASSERT(p.interval > 0, "Interval must be greater than zero");
pr_info("Test iterations: %"PRIu64", interval: %"PRIu64" (ms)\n",
p.iterations, p.interval);
- srandom(time(0));
-
- /* Ensure that vCPU threads start with SIG_IPI blocked. */
- sigemptyset(&sigset);
- sigaddset(&sigset, SIG_IPI);
- pthread_sigmask(SIG_BLOCK, &sigset, NULL);
-
if (host_log_mode_option == LOG_MODE_ALL) {
/* Run each log mode */
for (i = 0; i < LOG_MODE_NUM; i++) {
diff --git a/tools/testing/selftests/kvm/get-reg-list.c b/tools/testing/selftests/kvm/get-reg-list.c
index 91f05f78e824..216f10644c1a 100644
--- a/tools/testing/selftests/kvm/get-reg-list.c
+++ b/tools/testing/selftests/kvm/get-reg-list.c
@@ -116,10 +116,13 @@ void __weak finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c)
}
#ifdef __aarch64__
-static void prepare_vcpu_init(struct vcpu_reg_list *c, struct kvm_vcpu_init *init)
+static void prepare_vcpu_init(struct kvm_vm *vm, struct vcpu_reg_list *c,
+ struct kvm_vcpu_init *init)
{
struct vcpu_reg_sublist *s;
+ vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, init);
+
for_each_sublist(c, s)
if (s->capability)
init->features[s->feature / 32] |= 1 << (s->feature % 32);
@@ -127,10 +130,10 @@ static void prepare_vcpu_init(struct vcpu_reg_list *c, struct kvm_vcpu_init *ini
static struct kvm_vcpu *vcpu_config_get_vcpu(struct vcpu_reg_list *c, struct kvm_vm *vm)
{
- struct kvm_vcpu_init init = { .target = -1, };
+ struct kvm_vcpu_init init;
struct kvm_vcpu *vcpu;
- prepare_vcpu_init(c, &init);
+ prepare_vcpu_init(vm, c, &init);
vcpu = __vm_vcpu_add(vm, 0);
aarch64_vcpu_setup(vcpu, &init);
@@ -213,7 +216,7 @@ static void run_test(struct vcpu_reg_list *c)
* since we don't know the capabilities of any new registers.
*/
for_each_present_blessed_reg(i) {
- uint8_t addr[2048 / 8];
+ u8 addr[2048 / 8];
struct kvm_one_reg reg = {
.id = reg_list->reg[i],
.addr = (__u64)&addr,
diff --git a/tools/testing/selftests/kvm/guest_memfd_test.c b/tools/testing/selftests/kvm/guest_memfd_test.c
index ce687f8d248f..832ef4dfb99f 100644
--- a/tools/testing/selftests/kvm/guest_memfd_test.c
+++ b/tools/testing/selftests/kvm/guest_memfd_test.c
@@ -13,14 +13,19 @@
#include <linux/bitmap.h>
#include <linux/falloc.h>
-#include <sys/mman.h>
+#include <linux/sizes.h>
#include <sys/types.h>
#include <sys/stat.h>
+#include "kvm_syscalls.h"
#include "kvm_util.h"
+#include "numaif.h"
#include "test_util.h"
+#include "ucall_common.h"
-static void test_file_read_write(int fd)
+static size_t page_size;
+
+static void test_file_read_write(int fd, size_t total_size)
{
char buf[64];
@@ -34,15 +39,235 @@ static void test_file_read_write(int fd)
"pwrite on a guest_mem fd should fail");
}
-static void test_mmap(int fd, size_t page_size)
+static void test_mmap_cow(int fd, size_t size)
+{
+ void *mem;
+
+ mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
+ TEST_ASSERT(mem == MAP_FAILED, "Copy-on-write not allowed by guest_memfd.");
+}
+
+static void test_mmap_supported(int fd, size_t total_size)
+{
+ const char val = 0xaa;
+ char *mem;
+ size_t i;
+ int ret;
+
+ mem = kvm_mmap(total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd);
+
+ memset(mem, val, total_size);
+ for (i = 0; i < total_size; i++)
+ TEST_ASSERT_EQ(READ_ONCE(mem[i]), val);
+
+ ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0,
+ page_size);
+ TEST_ASSERT(!ret, "fallocate the first page should succeed.");
+
+ for (i = 0; i < page_size; i++)
+ TEST_ASSERT_EQ(READ_ONCE(mem[i]), 0x00);
+ for (; i < total_size; i++)
+ TEST_ASSERT_EQ(READ_ONCE(mem[i]), val);
+
+ memset(mem, val, page_size);
+ for (i = 0; i < total_size; i++)
+ TEST_ASSERT_EQ(READ_ONCE(mem[i]), val);
+
+ kvm_munmap(mem, total_size);
+}
+
+static void test_mbind(int fd, size_t total_size)
+{
+ const unsigned long nodemask_0 = 1; /* nid: 0 */
+ unsigned long nodemask = 0;
+ unsigned long maxnode = BITS_PER_TYPE(nodemask);
+ int policy;
+ char *mem;
+ int ret;
+
+ if (!is_multi_numa_node_system())
+ return;
+
+ mem = kvm_mmap(total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd);
+
+ /* Test MPOL_INTERLEAVE policy */
+ kvm_mbind(mem, page_size * 2, MPOL_INTERLEAVE, &nodemask_0, maxnode, 0);
+ kvm_get_mempolicy(&policy, &nodemask, maxnode, mem, MPOL_F_ADDR);
+ TEST_ASSERT(policy == MPOL_INTERLEAVE && nodemask == nodemask_0,
+ "Wanted MPOL_INTERLEAVE (%u) and nodemask 0x%lx, got %u and 0x%lx",
+ MPOL_INTERLEAVE, nodemask_0, policy, nodemask);
+
+ /* Test basic MPOL_BIND policy */
+ kvm_mbind(mem + page_size * 2, page_size * 2, MPOL_BIND, &nodemask_0, maxnode, 0);
+ kvm_get_mempolicy(&policy, &nodemask, maxnode, mem + page_size * 2, MPOL_F_ADDR);
+ TEST_ASSERT(policy == MPOL_BIND && nodemask == nodemask_0,
+ "Wanted MPOL_BIND (%u) and nodemask 0x%lx, got %u and 0x%lx",
+ MPOL_BIND, nodemask_0, policy, nodemask);
+
+ /* Test MPOL_DEFAULT policy */
+ kvm_mbind(mem, total_size, MPOL_DEFAULT, NULL, 0, 0);
+ kvm_get_mempolicy(&policy, &nodemask, maxnode, mem, MPOL_F_ADDR);
+ TEST_ASSERT(policy == MPOL_DEFAULT && !nodemask,
+ "Wanted MPOL_DEFAULT (%u) and nodemask 0x0, got %u and 0x%lx",
+ MPOL_DEFAULT, policy, nodemask);
+
+ /* Test with invalid policy */
+ ret = mbind(mem, page_size, 999, &nodemask_0, maxnode, 0);
+ TEST_ASSERT(ret == -1 && errno == EINVAL,
+ "mbind with invalid policy should fail with EINVAL");
+
+ kvm_munmap(mem, total_size);
+}
+
+static void test_numa_allocation(int fd, size_t total_size)
+{
+ unsigned long node0_mask = 1; /* Node 0 */
+ unsigned long node1_mask = 2; /* Node 1 */
+ unsigned long maxnode = 8;
+ void *pages[4];
+ int status[4];
+ char *mem;
+ int i;
+
+ if (!is_multi_numa_node_system())
+ return;
+
+ mem = kvm_mmap(total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd);
+
+ for (i = 0; i < 4; i++)
+ pages[i] = (char *)mem + page_size * i;
+
+ /* Set NUMA policy after allocation */
+ memset(mem, 0xaa, page_size);
+ kvm_mbind(pages[0], page_size, MPOL_BIND, &node0_mask, maxnode, 0);
+ kvm_fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, page_size);
+
+ /* Set NUMA policy before allocation */
+ kvm_mbind(pages[0], page_size * 2, MPOL_BIND, &node1_mask, maxnode, 0);
+ kvm_mbind(pages[2], page_size * 2, MPOL_BIND, &node0_mask, maxnode, 0);
+ memset(mem, 0xaa, total_size);
+
+ /* Validate if pages are allocated on specified NUMA nodes */
+ kvm_move_pages(0, 4, pages, NULL, status, 0);
+ TEST_ASSERT(status[0] == 1, "Expected page 0 on node 1, got it on node %d", status[0]);
+ TEST_ASSERT(status[1] == 1, "Expected page 1 on node 1, got it on node %d", status[1]);
+ TEST_ASSERT(status[2] == 0, "Expected page 2 on node 0, got it on node %d", status[2]);
+ TEST_ASSERT(status[3] == 0, "Expected page 3 on node 0, got it on node %d", status[3]);
+
+ /* Punch hole for all pages */
+ kvm_fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, total_size);
+
+ /* Change NUMA policy nodes and reallocate */
+ kvm_mbind(pages[0], page_size * 2, MPOL_BIND, &node0_mask, maxnode, 0);
+ kvm_mbind(pages[2], page_size * 2, MPOL_BIND, &node1_mask, maxnode, 0);
+ memset(mem, 0xaa, total_size);
+
+ kvm_move_pages(0, 4, pages, NULL, status, 0);
+ TEST_ASSERT(status[0] == 0, "Expected page 0 on node 0, got it on node %d", status[0]);
+ TEST_ASSERT(status[1] == 0, "Expected page 1 on node 0, got it on node %d", status[1]);
+ TEST_ASSERT(status[2] == 1, "Expected page 2 on node 1, got it on node %d", status[2]);
+ TEST_ASSERT(status[3] == 1, "Expected page 3 on node 1, got it on node %d", status[3]);
+
+ kvm_munmap(mem, total_size);
+}
+
+static void test_collapse(int fd, u64 flags)
+{
+ const size_t pmd_size = get_trans_hugepagesz();
+ void *reserved_addr;
+ void *aligned_addr;
+ char *mem;
+ off_t i;
+
+ /*
+ * To even reach the point where the guest_memfd folios will
+ * get collapsed, both the userspace address and the offset
+ * within the guest_memfd have to be aligned to pmd_size.
+ *
+ * To achieve that alignment, reserve virtual address space
+ * with regular mmap, then use MAP_FIXED to allocate memory
+ * from a pmd_size-aligned offset (0) at a known, available
+ * virtual address.
+ */
+ reserved_addr = kvm_mmap(pmd_size * 2, PROT_NONE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1);
+ aligned_addr = align_ptr_up(reserved_addr, pmd_size);
+
+ mem = mmap(aligned_addr, pmd_size, PROT_READ | PROT_WRITE,
+ MAP_FIXED | MAP_SHARED, fd, 0);
+ TEST_ASSERT(IS_ALIGNED((u64)mem, pmd_size),
+ "Userspace address must be aligned to PMD size.");
+
+ /*
+ * Use reads to populate page table to avoid setting dirty
+ * flag on page.
+ */
+ for (i = 0; i < pmd_size; i += getpagesize())
+ READ_ONCE(mem[i]);
+
+ /*
+ * Advising the use of huge pages in guest_memfd should be
+ * fine...
+ */
+ kvm_madvise(mem, pmd_size, MADV_HUGEPAGE);
+
+ /*
+ * ... but collapsing folios must not be supported to avoid
+ * mapping beyond shared ranges into host userspace page
+ * tables.
+ */
+ TEST_ASSERT_EQ(madvise(mem, pmd_size, MADV_COLLAPSE), -1);
+ TEST_ASSERT_EQ(errno, EINVAL);
+
+ /*
+ * Removing from host page tables and re-faulting should be
+ * fine; should not end up faulting in a collapsed/huge folio.
+ */
+ kvm_madvise(mem, pmd_size, MADV_DONTNEED);
+ READ_ONCE(mem[0]);
+
+ kvm_munmap(reserved_addr, pmd_size * 2);
+}
+
+static void test_fault_sigbus(int fd, size_t accessible_size, size_t map_size)
+{
+ const char val = 0xaa;
+ char *mem;
+ size_t i;
+
+ mem = kvm_mmap(map_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd);
+
+ TEST_EXPECT_SIGBUS(memset(mem, val, map_size));
+ TEST_EXPECT_SIGBUS((void)READ_ONCE(mem[accessible_size]));
+
+ for (i = 0; i < accessible_size; i++)
+ TEST_ASSERT_EQ(READ_ONCE(mem[i]), val);
+
+ kvm_munmap(mem, map_size);
+}
+
+static void test_fault_overflow(int fd, size_t total_size)
+{
+ test_fault_sigbus(fd, total_size, total_size * 4);
+}
+
+static void test_fault_private(int fd, size_t total_size)
+{
+ test_fault_sigbus(fd, 0, total_size);
+}
+
+static void test_mmap_not_supported(int fd, size_t total_size)
{
char *mem;
mem = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
TEST_ASSERT_EQ(mem, MAP_FAILED);
+
+ mem = mmap(NULL, total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+ TEST_ASSERT_EQ(mem, MAP_FAILED);
}
-static void test_file_size(int fd, size_t page_size, size_t total_size)
+static void test_file_size(int fd, size_t total_size)
{
struct stat sb;
int ret;
@@ -53,7 +278,7 @@ static void test_file_size(int fd, size_t page_size, size_t total_size)
TEST_ASSERT_EQ(sb.st_blksize, page_size);
}
-static void test_fallocate(int fd, size_t page_size, size_t total_size)
+static void test_fallocate(int fd, size_t total_size)
{
int ret;
@@ -90,7 +315,7 @@ static void test_fallocate(int fd, size_t page_size, size_t total_size)
TEST_ASSERT(!ret, "fallocate to restore punched hole should succeed");
}
-static void test_invalid_punch_hole(int fd, size_t page_size, size_t total_size)
+static void test_invalid_punch_hole(int fd, size_t total_size)
{
struct {
off_t offset;
@@ -120,26 +345,18 @@ static void test_invalid_punch_hole(int fd, size_t page_size, size_t total_size)
}
}
-static void test_create_guest_memfd_invalid(struct kvm_vm *vm)
+static void test_create_guest_memfd_invalid_sizes(struct kvm_vm *vm,
+ u64 guest_memfd_flags)
{
- size_t page_size = getpagesize();
- uint64_t flag;
size_t size;
int fd;
for (size = 1; size < page_size; size++) {
- fd = __vm_create_guest_memfd(vm, size, 0);
- TEST_ASSERT(fd == -1 && errno == EINVAL,
+ fd = __vm_create_guest_memfd(vm, size, guest_memfd_flags);
+ TEST_ASSERT(fd < 0 && errno == EINVAL,
"guest_memfd() with non-page-aligned page size '0x%lx' should fail with EINVAL",
size);
}
-
- for (flag = BIT(0); flag; flag <<= 1) {
- fd = __vm_create_guest_memfd(vm, page_size, flag);
- TEST_ASSERT(fd == -1 && errno == EINVAL,
- "guest_memfd() with flag '0x%lx' should fail with EINVAL",
- flag);
- }
}
static void test_create_guest_memfd_multiple(struct kvm_vm *vm)
@@ -147,53 +364,198 @@ static void test_create_guest_memfd_multiple(struct kvm_vm *vm)
int fd1, fd2, ret;
struct stat st1, st2;
- fd1 = __vm_create_guest_memfd(vm, 4096, 0);
+ fd1 = __vm_create_guest_memfd(vm, page_size, 0);
TEST_ASSERT(fd1 != -1, "memfd creation should succeed");
ret = fstat(fd1, &st1);
TEST_ASSERT(ret != -1, "memfd fstat should succeed");
- TEST_ASSERT(st1.st_size == 4096, "memfd st_size should match requested size");
+ TEST_ASSERT(st1.st_size == page_size, "memfd st_size should match requested size");
- fd2 = __vm_create_guest_memfd(vm, 8192, 0);
+ fd2 = __vm_create_guest_memfd(vm, page_size * 2, 0);
TEST_ASSERT(fd2 != -1, "memfd creation should succeed");
ret = fstat(fd2, &st2);
TEST_ASSERT(ret != -1, "memfd fstat should succeed");
- TEST_ASSERT(st2.st_size == 8192, "second memfd st_size should match requested size");
+ TEST_ASSERT(st2.st_size == page_size * 2, "second memfd st_size should match requested size");
ret = fstat(fd1, &st1);
TEST_ASSERT(ret != -1, "memfd fstat should succeed");
- TEST_ASSERT(st1.st_size == 4096, "first memfd st_size should still match requested size");
+ TEST_ASSERT(st1.st_size == page_size, "first memfd st_size should still match requested size");
TEST_ASSERT(st1.st_ino != st2.st_ino, "different memfd should have different inode numbers");
close(fd2);
close(fd1);
}
-int main(int argc, char *argv[])
+static void test_guest_memfd_flags(struct kvm_vm *vm)
{
- size_t page_size;
- size_t total_size;
+ u64 valid_flags = vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS);
+ u64 flag;
int fd;
- struct kvm_vm *vm;
- TEST_REQUIRE(kvm_has_cap(KVM_CAP_GUEST_MEMFD));
+ for (flag = BIT(0); flag; flag <<= 1) {
+ fd = __vm_create_guest_memfd(vm, page_size, flag);
+ if (flag & valid_flags) {
+ TEST_ASSERT(fd >= 0,
+ "guest_memfd() with flag '0x%lx' should succeed",
+ flag);
+ close(fd);
+ } else {
+ TEST_ASSERT(fd < 0 && errno == EINVAL,
+ "guest_memfd() with flag '0x%lx' should fail with EINVAL",
+ flag);
+ }
+ }
+}
- page_size = getpagesize();
- total_size = page_size * 4;
+#define __gmem_test(__test, __vm, __flags, __gmem_size) \
+do { \
+ int fd = vm_create_guest_memfd(__vm, __gmem_size, __flags); \
+ \
+ test_##__test(fd, __gmem_size); \
+ close(fd); \
+} while (0)
- vm = vm_create_barebones();
+#define gmem_test(__test, __vm, __flags) \
+ __gmem_test(__test, __vm, __flags, page_size * 4)
- test_create_guest_memfd_invalid(vm);
+static void __test_guest_memfd(struct kvm_vm *vm, u64 flags)
+{
test_create_guest_memfd_multiple(vm);
+ test_create_guest_memfd_invalid_sizes(vm, flags);
+
+ gmem_test(file_read_write, vm, flags);
+
+ if (flags & GUEST_MEMFD_FLAG_MMAP) {
+ if (flags & GUEST_MEMFD_FLAG_INIT_SHARED) {
+ size_t pmd_size = get_trans_hugepagesz();
+
+ gmem_test(mmap_supported, vm, flags);
+ gmem_test(fault_overflow, vm, flags);
+ gmem_test(numa_allocation, vm, flags);
+ __gmem_test(collapse, vm, flags, pmd_size);
+ } else {
+ gmem_test(fault_private, vm, flags);
+ }
+
+ gmem_test(mmap_cow, vm, flags);
+ gmem_test(mbind, vm, flags);
+ } else {
+ gmem_test(mmap_not_supported, vm, flags);
+ }
+
+ gmem_test(file_size, vm, flags);
+ gmem_test(fallocate, vm, flags);
+ gmem_test(invalid_punch_hole, vm, flags);
+}
+
+static void test_guest_memfd(unsigned long vm_type)
+{
+ struct kvm_vm *vm = vm_create_barebones_type(vm_type);
+ u64 flags;
- fd = vm_create_guest_memfd(vm, total_size, 0);
+ test_guest_memfd_flags(vm);
- test_file_read_write(fd);
- test_mmap(fd, page_size);
- test_file_size(fd, page_size, total_size);
- test_fallocate(fd, page_size, total_size);
- test_invalid_punch_hole(fd, page_size, total_size);
+ __test_guest_memfd(vm, 0);
+
+ flags = vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS);
+ if (flags & GUEST_MEMFD_FLAG_MMAP)
+ __test_guest_memfd(vm, GUEST_MEMFD_FLAG_MMAP);
+
+ /* MMAP should always be supported if INIT_SHARED is supported. */
+ if (flags & GUEST_MEMFD_FLAG_INIT_SHARED)
+ __test_guest_memfd(vm, GUEST_MEMFD_FLAG_MMAP |
+ GUEST_MEMFD_FLAG_INIT_SHARED);
+
+ kvm_vm_free(vm);
+}
+
+static void guest_code(u8 *mem, u64 size)
+{
+ size_t i;
+
+ for (i = 0; i < size; i++)
+ __GUEST_ASSERT(mem[i] == 0xaa,
+ "Guest expected 0xaa at offset %lu, got 0x%x", i, mem[i]);
+
+ memset(mem, 0xff, size);
+ GUEST_DONE();
+}
+
+static void test_guest_memfd_guest(void)
+{
+ /*
+ * Skip the first 4gb and slot0. slot0 maps <1gb and is used to back
+ * the guest's code, stack, and page tables, and low memory contains
+ * the PCI hole and other MMIO regions that need to be avoided.
+ */
+ const gpa_t gpa = SZ_4G;
+ const int slot = 1;
+
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ u8 *mem;
+ size_t size;
+ int fd, i;
+
+ if (!kvm_check_cap(KVM_CAP_GUEST_MEMFD_FLAGS))
+ return;
+
+ vm = __vm_create_shape_with_one_vcpu(VM_SHAPE_DEFAULT, &vcpu, 1, guest_code);
+
+ TEST_ASSERT(vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS) & GUEST_MEMFD_FLAG_MMAP,
+ "Default VM type should support MMAP, supported flags = 0x%x",
+ vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS));
+ TEST_ASSERT(vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS) & GUEST_MEMFD_FLAG_INIT_SHARED,
+ "Default VM type should support INIT_SHARED, supported flags = 0x%x",
+ vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS));
+
+ /*
+ * Use the max of the host or guest page size for all operations, as
+ * KVM requires guest_memfd files and memslots to be sized to multiples
+ * of the host page size.
+ */
+ size = max_t(size_t, vm->page_size, page_size);
+ fd = vm_create_guest_memfd(vm, size, GUEST_MEMFD_FLAG_MMAP |
+ GUEST_MEMFD_FLAG_INIT_SHARED);
+ vm_set_user_memory_region2(vm, slot, KVM_MEM_GUEST_MEMFD, gpa, size, NULL, fd, 0);
+
+ mem = kvm_mmap(size, PROT_READ | PROT_WRITE, MAP_SHARED, fd);
+ memset(mem, 0xaa, size);
+ kvm_munmap(mem, size);
+
+ virt_map(vm, gpa, gpa, size / vm->page_size);
+ vcpu_args_set(vcpu, 2, gpa, size);
+ vcpu_run(vcpu);
+
+ TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
+
+ mem = kvm_mmap(size, PROT_READ | PROT_WRITE, MAP_SHARED, fd);
+ for (i = 0; i < size; i++)
+ TEST_ASSERT_EQ(mem[i], 0xff);
close(fd);
+ kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+ unsigned long vm_types, vm_type;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_GUEST_MEMFD));
+
+ page_size = getpagesize();
+
+ /*
+ * Not all architectures support KVM_CAP_VM_TYPES. However, those that
+ * support guest_memfd have that support for the default VM type.
+ */
+ vm_types = kvm_check_cap(KVM_CAP_VM_TYPES);
+ if (!vm_types)
+ vm_types = BIT(VM_TYPE_DEFAULT);
+
+ for_each_set_bit(vm_type, &vm_types, BITS_PER_TYPE(vm_types))
+ test_guest_memfd(vm_type);
+
+ test_guest_memfd_guest();
}
diff --git a/tools/testing/selftests/kvm/guest_print_test.c b/tools/testing/selftests/kvm/guest_print_test.c
index bcf582852db9..79d3fc326e91 100644
--- a/tools/testing/selftests/kvm/guest_print_test.c
+++ b/tools/testing/selftests/kvm/guest_print_test.c
@@ -16,22 +16,22 @@
#include "ucall_common.h"
struct guest_vals {
- uint64_t a;
- uint64_t b;
- uint64_t type;
+ u64 a;
+ u64 b;
+ u64 type;
};
static struct guest_vals vals;
/* GUEST_PRINTF()/GUEST_ASSERT_FMT() does not support float or double. */
#define TYPE_LIST \
-TYPE(test_type_i64, I64, "%ld", int64_t) \
-TYPE(test_type_u64, U64u, "%lu", uint64_t) \
-TYPE(test_type_x64, U64x, "0x%lx", uint64_t) \
-TYPE(test_type_X64, U64X, "0x%lX", uint64_t) \
-TYPE(test_type_u32, U32u, "%u", uint32_t) \
-TYPE(test_type_x32, U32x, "0x%x", uint32_t) \
-TYPE(test_type_X32, U32X, "0x%X", uint32_t) \
+TYPE(test_type_i64, I64, "%ld", s64) \
+TYPE(test_type_u64, U64u, "%lu", u64) \
+TYPE(test_type_x64, U64x, "0x%lx", u64) \
+TYPE(test_type_X64, U64X, "0x%lX", u64) \
+TYPE(test_type_u32, U32u, "%u", u32) \
+TYPE(test_type_x32, U32x, "0x%x", u32) \
+TYPE(test_type_X32, U32X, "0x%X", u32) \
TYPE(test_type_int, INT, "%d", int) \
TYPE(test_type_char, CHAR, "%c", char) \
TYPE(test_type_str, STR, "'%s'", const char *) \
@@ -56,7 +56,7 @@ static void fn(struct kvm_vcpu *vcpu, T a, T b) \
\
snprintf(expected_printf, UCALL_BUFFER_LEN, PRINTF_FMT_##ext, a, b); \
snprintf(expected_assert, UCALL_BUFFER_LEN, ASSERT_FMT_##ext, a, b); \
- vals = (struct guest_vals){ (uint64_t)a, (uint64_t)b, TYPE_##ext }; \
+ vals = (struct guest_vals){ (u64)a, (u64)b, TYPE_##ext }; \
sync_global_to_guest(vcpu->vm, vals); \
run_test(vcpu, expected_printf, expected_assert); \
}
diff --git a/tools/testing/selftests/kvm/hardware_disable_test.c b/tools/testing/selftests/kvm/hardware_disable_test.c
index 94bd6ed24cf3..3147f5c97e94 100644
--- a/tools/testing/selftests/kvm/hardware_disable_test.c
+++ b/tools/testing/selftests/kvm/hardware_disable_test.c
@@ -80,7 +80,7 @@ static inline void check_join(pthread_t thread, void **retval)
TEST_ASSERT(r == 0, "%s: failed to join thread", __func__);
}
-static void run_test(uint32_t run)
+static void run_test(u32 run)
{
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
@@ -88,7 +88,7 @@ static void run_test(uint32_t run)
pthread_t threads[VCPU_NUM];
pthread_t throw_away;
void *b;
- uint32_t i, j;
+ u32 i, j;
CPU_ZERO(&cpu_set);
for (i = 0; i < VCPU_NUM; i++)
@@ -149,7 +149,7 @@ void wait_for_child_setup(pid_t pid)
int main(int argc, char **argv)
{
- uint32_t i;
+ u32 i;
int s, r;
pid_t pid;
diff --git a/tools/testing/selftests/kvm/include/arm64/arch_timer.h b/tools/testing/selftests/kvm/include/arm64/arch_timer.h
index bf461de34785..a5836d4ab7ee 100644
--- a/tools/testing/selftests/kvm/include/arm64/arch_timer.h
+++ b/tools/testing/selftests/kvm/include/arm64/arch_timer.h
@@ -18,20 +18,20 @@ enum arch_timer {
#define CTL_ISTATUS (1 << 2)
#define msec_to_cycles(msec) \
- (timer_get_cntfrq() * (uint64_t)(msec) / 1000)
+ (timer_get_cntfrq() * (u64)(msec) / 1000)
#define usec_to_cycles(usec) \
- (timer_get_cntfrq() * (uint64_t)(usec) / 1000000)
+ (timer_get_cntfrq() * (u64)(usec) / 1000000)
#define cycles_to_usec(cycles) \
- ((uint64_t)(cycles) * 1000000 / timer_get_cntfrq())
+ ((u64)(cycles) * 1000000 / timer_get_cntfrq())
-static inline uint32_t timer_get_cntfrq(void)
+static inline u32 timer_get_cntfrq(void)
{
return read_sysreg(cntfrq_el0);
}
-static inline uint64_t timer_get_cntct(enum arch_timer timer)
+static inline u64 timer_get_cntct(enum arch_timer timer)
{
isb();
@@ -48,7 +48,7 @@ static inline uint64_t timer_get_cntct(enum arch_timer timer)
return 0;
}
-static inline void timer_set_cval(enum arch_timer timer, uint64_t cval)
+static inline void timer_set_cval(enum arch_timer timer, u64 cval)
{
switch (timer) {
case VIRTUAL:
@@ -64,7 +64,7 @@ static inline void timer_set_cval(enum arch_timer timer, uint64_t cval)
isb();
}
-static inline uint64_t timer_get_cval(enum arch_timer timer)
+static inline u64 timer_get_cval(enum arch_timer timer)
{
switch (timer) {
case VIRTUAL:
@@ -79,7 +79,7 @@ static inline uint64_t timer_get_cval(enum arch_timer timer)
return 0;
}
-static inline void timer_set_tval(enum arch_timer timer, int32_t tval)
+static inline void timer_set_tval(enum arch_timer timer, s32 tval)
{
switch (timer) {
case VIRTUAL:
@@ -95,7 +95,7 @@ static inline void timer_set_tval(enum arch_timer timer, int32_t tval)
isb();
}
-static inline int32_t timer_get_tval(enum arch_timer timer)
+static inline s32 timer_get_tval(enum arch_timer timer)
{
isb();
switch (timer) {
@@ -111,7 +111,7 @@ static inline int32_t timer_get_tval(enum arch_timer timer)
return 0;
}
-static inline void timer_set_ctl(enum arch_timer timer, uint32_t ctl)
+static inline void timer_set_ctl(enum arch_timer timer, u32 ctl)
{
switch (timer) {
case VIRTUAL:
@@ -127,7 +127,7 @@ static inline void timer_set_ctl(enum arch_timer timer, uint32_t ctl)
isb();
}
-static inline uint32_t timer_get_ctl(enum arch_timer timer)
+static inline u32 timer_get_ctl(enum arch_timer timer)
{
switch (timer) {
case VIRTUAL:
@@ -142,17 +142,41 @@ static inline uint32_t timer_get_ctl(enum arch_timer timer)
return 0;
}
-static inline void timer_set_next_cval_ms(enum arch_timer timer, uint32_t msec)
+static inline void timer_set_next_cval_ms(enum arch_timer timer, u32 msec)
{
- uint64_t now_ct = timer_get_cntct(timer);
- uint64_t next_ct = now_ct + msec_to_cycles(msec);
+ u64 now_ct = timer_get_cntct(timer);
+ u64 next_ct = now_ct + msec_to_cycles(msec);
timer_set_cval(timer, next_ct);
}
-static inline void timer_set_next_tval_ms(enum arch_timer timer, uint32_t msec)
+static inline void timer_set_next_tval_ms(enum arch_timer timer, u32 msec)
{
timer_set_tval(timer, msec_to_cycles(msec));
}
+static inline u32 vcpu_get_vtimer_irq(struct kvm_vcpu *vcpu)
+{
+ u32 intid;
+ u64 attr;
+
+ attr = vcpu_has_el2(vcpu) ? KVM_ARM_VCPU_TIMER_IRQ_HVTIMER :
+ KVM_ARM_VCPU_TIMER_IRQ_VTIMER;
+ vcpu_device_attr_get(vcpu, KVM_ARM_VCPU_TIMER_CTRL, attr, &intid);
+
+ return intid;
+}
+
+static inline u32 vcpu_get_ptimer_irq(struct kvm_vcpu *vcpu)
+{
+ u32 intid;
+ u64 attr;
+
+ attr = vcpu_has_el2(vcpu) ? KVM_ARM_VCPU_TIMER_IRQ_HPTIMER :
+ KVM_ARM_VCPU_TIMER_IRQ_PTIMER;
+ vcpu_device_attr_get(vcpu, KVM_ARM_VCPU_TIMER_CTRL, attr, &intid);
+
+ return intid;
+}
+
#endif /* SELFTEST_KVM_ARCH_TIMER_H */
diff --git a/tools/testing/selftests/kvm/include/arm64/delay.h b/tools/testing/selftests/kvm/include/arm64/delay.h
index 329e4f5079ea..6a5d4634af2c 100644
--- a/tools/testing/selftests/kvm/include/arm64/delay.h
+++ b/tools/testing/selftests/kvm/include/arm64/delay.h
@@ -8,10 +8,10 @@
#include "arch_timer.h"
-static inline void __delay(uint64_t cycles)
+static inline void __delay(u64 cycles)
{
enum arch_timer timer = VIRTUAL;
- uint64_t start = timer_get_cntct(timer);
+ u64 start = timer_get_cntct(timer);
while ((timer_get_cntct(timer) - start) < cycles)
cpu_relax();
diff --git a/tools/testing/selftests/kvm/include/arm64/gic.h b/tools/testing/selftests/kvm/include/arm64/gic.h
index baeb3c859389..615745093c98 100644
--- a/tools/testing/selftests/kvm/include/arm64/gic.h
+++ b/tools/testing/selftests/kvm/include/arm64/gic.h
@@ -48,8 +48,8 @@ void gic_set_dir(unsigned int intid);
* split is true, EOI drops the priority and deactivates the interrupt.
*/
void gic_set_eoi_split(bool split);
-void gic_set_priority_mask(uint64_t mask);
-void gic_set_priority(uint32_t intid, uint32_t prio);
+void gic_set_priority_mask(u64 mask);
+void gic_set_priority(u32 intid, u32 prio);
void gic_irq_set_active(unsigned int intid);
void gic_irq_clear_active(unsigned int intid);
bool gic_irq_get_active(unsigned int intid);
@@ -57,8 +57,9 @@ void gic_irq_set_pending(unsigned int intid);
void gic_irq_clear_pending(unsigned int intid);
bool gic_irq_get_pending(unsigned int intid);
void gic_irq_set_config(unsigned int intid, bool is_edge);
+void gic_irq_set_group(unsigned int intid, bool group);
-void gic_rdist_enable_lpis(vm_paddr_t cfg_table, size_t cfg_table_size,
- vm_paddr_t pend_table);
+void gic_rdist_enable_lpis(gpa_t cfg_table, size_t cfg_table_size,
+ gpa_t pend_table);
#endif /* SELFTEST_KVM_GIC_H */
diff --git a/tools/testing/selftests/kvm/include/arm64/gic_v3_its.h b/tools/testing/selftests/kvm/include/arm64/gic_v3_its.h
index 3722ed9c8f96..a43a407e2d5c 100644
--- a/tools/testing/selftests/kvm/include/arm64/gic_v3_its.h
+++ b/tools/testing/selftests/kvm/include/arm64/gic_v3_its.h
@@ -5,15 +5,15 @@
#include <linux/sizes.h>
-void its_init(vm_paddr_t coll_tbl, size_t coll_tbl_sz,
- vm_paddr_t device_tbl, size_t device_tbl_sz,
- vm_paddr_t cmdq, size_t cmdq_size);
+void its_init(gpa_t coll_tbl, size_t coll_tbl_sz, gpa_t device_tbl,
+ size_t device_tbl_sz, gpa_t cmdq, size_t cmdq_size);
-void its_send_mapd_cmd(void *cmdq_base, u32 device_id, vm_paddr_t itt_base,
+void its_send_mapd_cmd(void *cmdq_base, u32 device_id, gpa_t itt_base,
size_t itt_size, bool valid);
void its_send_mapc_cmd(void *cmdq_base, u32 vcpu_id, u32 collection_id, bool valid);
void its_send_mapti_cmd(void *cmdq_base, u32 device_id, u32 event_id,
u32 collection_id, u32 intid);
void its_send_invall_cmd(void *cmdq_base, u32 collection_id);
+void its_send_sync_cmd(void *cmdq_base, u32 vcpu_id);
#endif // __SELFTESTS_GIC_V3_ITS_H__
diff --git a/tools/testing/selftests/kvm/include/arm64/gic_v5.h b/tools/testing/selftests/kvm/include/arm64/gic_v5.h
new file mode 100644
index 000000000000..eb523d9277cf
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/arm64/gic_v5.h
@@ -0,0 +1,150 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef __SELFTESTS_GIC_V5_H
+#define __SELFTESTS_GIC_V5_H
+
+#include <asm/barrier.h>
+#include <asm/sysreg.h>
+
+#include <linux/bitfield.h>
+
+#include "processor.h"
+
+/*
+ * Definitions for GICv5 instructions for the Current Domain
+ */
+#define GICV5_OP_GIC_CDAFF sys_insn(1, 0, 12, 1, 3)
+#define GICV5_OP_GIC_CDDI sys_insn(1, 0, 12, 2, 0)
+#define GICV5_OP_GIC_CDDIS sys_insn(1, 0, 12, 1, 0)
+#define GICV5_OP_GIC_CDHM sys_insn(1, 0, 12, 2, 1)
+#define GICV5_OP_GIC_CDEN sys_insn(1, 0, 12, 1, 1)
+#define GICV5_OP_GIC_CDEOI sys_insn(1, 0, 12, 1, 7)
+#define GICV5_OP_GIC_CDPEND sys_insn(1, 0, 12, 1, 4)
+#define GICV5_OP_GIC_CDPRI sys_insn(1, 0, 12, 1, 2)
+#define GICV5_OP_GIC_CDRCFG sys_insn(1, 0, 12, 1, 5)
+#define GICV5_OP_GICR_CDIA sys_insn(1, 0, 12, 3, 0)
+#define GICV5_OP_GICR_CDNMIA sys_insn(1, 0, 12, 3, 1)
+
+/* Definitions for GIC CDAFF */
+#define GICV5_GIC_CDAFF_IAFFID_MASK GENMASK_ULL(47, 32)
+#define GICV5_GIC_CDAFF_TYPE_MASK GENMASK_ULL(31, 29)
+#define GICV5_GIC_CDAFF_IRM_MASK BIT_ULL(28)
+#define GICV5_GIC_CDAFF_ID_MASK GENMASK_ULL(23, 0)
+
+/* Definitions for GIC CDDI */
+#define GICV5_GIC_CDDI_TYPE_MASK GENMASK_ULL(31, 29)
+#define GICV5_GIC_CDDI_ID_MASK GENMASK_ULL(23, 0)
+
+/* Definitions for GIC CDDIS */
+#define GICV5_GIC_CDDIS_TYPE_MASK GENMASK_ULL(31, 29)
+#define GICV5_GIC_CDDIS_TYPE(r) FIELD_GET(GICV5_GIC_CDDIS_TYPE_MASK, r)
+#define GICV5_GIC_CDDIS_ID_MASK GENMASK_ULL(23, 0)
+#define GICV5_GIC_CDDIS_ID(r) FIELD_GET(GICV5_GIC_CDDIS_ID_MASK, r)
+
+/* Definitions for GIC CDEN */
+#define GICV5_GIC_CDEN_TYPE_MASK GENMASK_ULL(31, 29)
+#define GICV5_GIC_CDEN_ID_MASK GENMASK_ULL(23, 0)
+
+/* Definitions for GIC CDHM */
+#define GICV5_GIC_CDHM_HM_MASK BIT_ULL(32)
+#define GICV5_GIC_CDHM_TYPE_MASK GENMASK_ULL(31, 29)
+#define GICV5_GIC_CDHM_ID_MASK GENMASK_ULL(23, 0)
+
+/* Definitions for GIC CDPEND */
+#define GICV5_GIC_CDPEND_PENDING_MASK BIT_ULL(32)
+#define GICV5_GIC_CDPEND_TYPE_MASK GENMASK_ULL(31, 29)
+#define GICV5_GIC_CDPEND_ID_MASK GENMASK_ULL(23, 0)
+
+/* Definitions for GIC CDPRI */
+#define GICV5_GIC_CDPRI_PRIORITY_MASK GENMASK_ULL(39, 35)
+#define GICV5_GIC_CDPRI_TYPE_MASK GENMASK_ULL(31, 29)
+#define GICV5_GIC_CDPRI_ID_MASK GENMASK_ULL(23, 0)
+
+/* Definitions for GIC CDRCFG */
+#define GICV5_GIC_CDRCFG_TYPE_MASK GENMASK_ULL(31, 29)
+#define GICV5_GIC_CDRCFG_ID_MASK GENMASK_ULL(23, 0)
+
+/* Definitions for GICR CDIA */
+#define GICV5_GICR_CDIA_VALID_MASK BIT_ULL(32)
+#define GICV5_GICR_CDIA_VALID(r) FIELD_GET(GICV5_GICR_CDIA_VALID_MASK, r)
+#define GICV5_GICR_CDIA_TYPE_MASK GENMASK_ULL(31, 29)
+#define GICV5_GICR_CDIA_ID_MASK GENMASK_ULL(23, 0)
+#define GICV5_GICR_CDIA_INTID GENMASK_ULL(31, 0)
+
+/* Definitions for GICR CDNMIA */
+#define GICV5_GICR_CDNMIA_VALID_MASK BIT_ULL(32)
+#define GICV5_GICR_CDNMIA_VALID(r) FIELD_GET(GICV5_GICR_CDNMIA_VALID_MASK, r)
+#define GICV5_GICR_CDNMIA_TYPE_MASK GENMASK_ULL(31, 29)
+#define GICV5_GICR_CDNMIA_ID_MASK GENMASK_ULL(23, 0)
+
+#define gicr_insn(insn) read_sysreg_s(GICV5_OP_GICR_##insn)
+#define gic_insn(v, insn) write_sysreg_s(v, GICV5_OP_GIC_##insn)
+
+#define __GIC_BARRIER_INSN(op0, op1, CRn, CRm, op2, Rt) \
+ __emit_inst(0xd5000000 | \
+ sys_insn((op0), (op1), (CRn), (CRm), (op2)) | \
+ ((Rt) & 0x1f))
+
+#define GSB_SYS_BARRIER_INSN __GIC_BARRIER_INSN(1, 0, 12, 0, 0, 31)
+#define GSB_ACK_BARRIER_INSN __GIC_BARRIER_INSN(1, 0, 12, 0, 1, 31)
+
+#define gsb_ack() asm volatile(GSB_ACK_BARRIER_INSN : : : "memory")
+#define gsb_sys() asm volatile(GSB_SYS_BARRIER_INSN : : : "memory")
+
+#define REPEAT_BYTE(x) ((~0ul / 0xff) * (x))
+
+#define GICV5_IRQ_DEFAULT_PRI 0b10000
+
+#define GICV5_ARCH_PPI_SW_PPI 0x3
+
+void gicv5_ppi_priority_init(void)
+{
+ write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_DEFAULT_PRI), SYS_ICC_PPI_PRIORITYR0_EL1);
+ write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_DEFAULT_PRI), SYS_ICC_PPI_PRIORITYR1_EL1);
+ write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_DEFAULT_PRI), SYS_ICC_PPI_PRIORITYR2_EL1);
+ write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_DEFAULT_PRI), SYS_ICC_PPI_PRIORITYR3_EL1);
+ write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_DEFAULT_PRI), SYS_ICC_PPI_PRIORITYR4_EL1);
+ write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_DEFAULT_PRI), SYS_ICC_PPI_PRIORITYR5_EL1);
+ write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_DEFAULT_PRI), SYS_ICC_PPI_PRIORITYR6_EL1);
+ write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_DEFAULT_PRI), SYS_ICC_PPI_PRIORITYR7_EL1);
+ write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_DEFAULT_PRI), SYS_ICC_PPI_PRIORITYR8_EL1);
+ write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_DEFAULT_PRI), SYS_ICC_PPI_PRIORITYR9_EL1);
+ write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_DEFAULT_PRI), SYS_ICC_PPI_PRIORITYR10_EL1);
+ write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_DEFAULT_PRI), SYS_ICC_PPI_PRIORITYR11_EL1);
+ write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_DEFAULT_PRI), SYS_ICC_PPI_PRIORITYR12_EL1);
+ write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_DEFAULT_PRI), SYS_ICC_PPI_PRIORITYR13_EL1);
+ write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_DEFAULT_PRI), SYS_ICC_PPI_PRIORITYR14_EL1);
+ write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_DEFAULT_PRI), SYS_ICC_PPI_PRIORITYR15_EL1);
+
+ /*
+ * Context syncronization required to make sure system register writes
+ * effects are synchronised.
+ */
+ isb();
+}
+
+void gicv5_cpu_disable_interrupts(void)
+{
+ u64 cr0;
+
+ cr0 = FIELD_PREP(ICC_CR0_EL1_EN, 0);
+ write_sysreg_s(cr0, SYS_ICC_CR0_EL1);
+}
+
+void gicv5_cpu_enable_interrupts(void)
+{
+ u64 cr0, pcr;
+
+ write_sysreg_s(0, SYS_ICC_PPI_ENABLER0_EL1);
+ write_sysreg_s(0, SYS_ICC_PPI_ENABLER1_EL1);
+
+ gicv5_ppi_priority_init();
+
+ pcr = FIELD_PREP(ICC_PCR_EL1_PRIORITY, GICV5_IRQ_DEFAULT_PRI);
+ write_sysreg_s(pcr, SYS_ICC_PCR_EL1);
+
+ cr0 = FIELD_PREP(ICC_CR0_EL1_EN, 1);
+ write_sysreg_s(cr0, SYS_ICC_CR0_EL1);
+}
+
+#endif
diff --git a/tools/testing/selftests/kvm/include/arm64/kvm_util_arch.h b/tools/testing/selftests/kvm/include/arm64/kvm_util_arch.h
index e43a57d99b56..4a2033708227 100644
--- a/tools/testing/selftests/kvm/include/arm64/kvm_util_arch.h
+++ b/tools/testing/selftests/kvm/include/arm64/kvm_util_arch.h
@@ -2,6 +2,11 @@
#ifndef SELFTEST_KVM_UTIL_ARCH_H
#define SELFTEST_KVM_UTIL_ARCH_H
-struct kvm_vm_arch {};
+struct kvm_mmu_arch {};
+
+struct kvm_vm_arch {
+ bool has_gic;
+ int gic_fd;
+};
#endif // SELFTEST_KVM_UTIL_ARCH_H
diff --git a/tools/testing/selftests/kvm/include/arm64/processor.h b/tools/testing/selftests/kvm/include/arm64/processor.h
index 1e8d0d531fbd..b8a902ba8573 100644
--- a/tools/testing/selftests/kvm/include/arm64/processor.h
+++ b/tools/testing/selftests/kvm/include/arm64/processor.h
@@ -62,8 +62,73 @@
MAIR_ATTRIDX(MAIR_ATTR_NORMAL, MT_NORMAL) | \
MAIR_ATTRIDX(MAIR_ATTR_NORMAL_WT, MT_NORMAL_WT))
+/* TCR_EL1 specific flags */
+#define TCR_T0SZ_OFFSET 0
+#define TCR_T0SZ(x) ((UL(64) - (x)) << TCR_T0SZ_OFFSET)
+
+#define TCR_IRGN0_SHIFT 8
+#define TCR_IRGN0_MASK (UL(3) << TCR_IRGN0_SHIFT)
+#define TCR_IRGN0_NC (UL(0) << TCR_IRGN0_SHIFT)
+#define TCR_IRGN0_WBWA (UL(1) << TCR_IRGN0_SHIFT)
+#define TCR_IRGN0_WT (UL(2) << TCR_IRGN0_SHIFT)
+#define TCR_IRGN0_WBnWA (UL(3) << TCR_IRGN0_SHIFT)
+
+#define TCR_ORGN0_SHIFT 10
+#define TCR_ORGN0_MASK (UL(3) << TCR_ORGN0_SHIFT)
+#define TCR_ORGN0_NC (UL(0) << TCR_ORGN0_SHIFT)
+#define TCR_ORGN0_WBWA (UL(1) << TCR_ORGN0_SHIFT)
+#define TCR_ORGN0_WT (UL(2) << TCR_ORGN0_SHIFT)
+#define TCR_ORGN0_WBnWA (UL(3) << TCR_ORGN0_SHIFT)
+
+#define TCR_SH0_SHIFT 12
+#define TCR_SH0_MASK (UL(3) << TCR_SH0_SHIFT)
+#define TCR_SH0_INNER (UL(3) << TCR_SH0_SHIFT)
+
+#define TCR_TG0_SHIFT 14
+#define TCR_TG0_MASK (UL(3) << TCR_TG0_SHIFT)
+#define TCR_TG0_4K (UL(0) << TCR_TG0_SHIFT)
+#define TCR_TG0_64K (UL(1) << TCR_TG0_SHIFT)
+#define TCR_TG0_16K (UL(2) << TCR_TG0_SHIFT)
+
+#define TCR_EPD1_SHIFT 23
+#define TCR_EPD1_MASK (UL(1) << TCR_EPD1_SHIFT)
+
+#define TCR_IPS_SHIFT 32
+#define TCR_IPS_MASK (UL(7) << TCR_IPS_SHIFT)
+#define TCR_IPS_52_BITS (UL(6) << TCR_IPS_SHIFT)
+#define TCR_IPS_48_BITS (UL(5) << TCR_IPS_SHIFT)
+#define TCR_IPS_40_BITS (UL(2) << TCR_IPS_SHIFT)
+#define TCR_IPS_36_BITS (UL(1) << TCR_IPS_SHIFT)
+
+#define TCR_TBI1 (UL(1) << 38)
+#define TCR_HA (UL(1) << 39)
+#define TCR_DS (UL(1) << 59)
+
+/*
+ * AttrIndx[2:0] encoding (mapping attributes defined in the MAIR* registers).
+ */
+#define PTE_ATTRINDX(t) ((t) << 2)
+#define PTE_ATTRINDX_MASK GENMASK(4, 2)
+#define PTE_ATTRINDX_SHIFT 2
+
+#define PTE_VALID BIT(0)
+#define PGD_TYPE_TABLE BIT(1)
+#define PUD_TYPE_TABLE BIT(1)
+#define PMD_TYPE_TABLE BIT(1)
+#define PTE_TYPE_PAGE BIT(1)
+
+#define PTE_SHARED (UL(3) << 8) /* SH[1:0], inner shareable */
+#define PTE_AF BIT(10)
+
+#define PTE_ADDR_MASK(page_shift) GENMASK(47, (page_shift))
+#define PTE_ADDR_51_48 GENMASK(15, 12)
+#define PTE_ADDR_51_48_SHIFT 12
+#define PTE_ADDR_MASK_LPA2(page_shift) GENMASK(49, (page_shift))
+#define PTE_ADDR_51_50_LPA2 GENMASK(9, 8)
+#define PTE_ADDR_51_50_LPA2_SHIFT 8
+
void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init);
-struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
+struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, u32 vcpu_id,
struct kvm_vcpu_init *init, void *guest_code);
struct ex_regs {
@@ -102,14 +167,8 @@ enum {
(v) == VECTOR_SYNC_LOWER_64 || \
(v) == VECTOR_SYNC_LOWER_32)
-/* Access flag */
-#define PTE_AF (1ULL << 10)
-
-/* Access flag update enable/disable */
-#define TCR_EL1_HA (1ULL << 39)
-
-void aarch64_get_supported_page_sizes(uint32_t ipa, uint32_t *ipa4k,
- uint32_t *ipa16k, uint32_t *ipa64k);
+void aarch64_get_supported_page_sizes(u32 ipa, u32 *ipa4k,
+ u32 *ipa16k, u32 *ipa64k);
void vm_init_descriptor_tables(struct kvm_vm *vm);
void vcpu_init_descriptor_tables(struct kvm_vcpu *vcpu);
@@ -120,7 +179,8 @@ void vm_install_exception_handler(struct kvm_vm *vm,
void vm_install_sync_handler(struct kvm_vm *vm,
int vector, int ec, handler_fn handler);
-uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva);
+u64 *virt_get_pte_hva_at_level(struct kvm_vm *vm, gva_t gva, int level);
+u64 *virt_get_pte_hva(struct kvm_vm *vm, gva_t gva);
static inline void cpu_relax(void)
{
@@ -199,6 +259,16 @@ static inline void local_irq_disable(void)
asm volatile("msr daifset, #3" : : : "memory");
}
+static inline void local_serror_enable(void)
+{
+ asm volatile("msr daifclr, #4" : : : "memory");
+}
+
+static inline void local_serror_disable(void)
+{
+ asm volatile("msr daifset, #4" : : : "memory");
+}
+
/**
* struct arm_smccc_res - Result from SMC/HVC call
* @a0-a3 result values from registers 0 to 3
@@ -217,9 +287,9 @@ struct arm_smccc_res {
* @res: pointer to write the return values from registers x0-x3
*
*/
-void smccc_hvc(uint32_t function_id, uint64_t arg0, uint64_t arg1,
- uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5,
- uint64_t arg6, struct arm_smccc_res *res);
+void smccc_hvc(u32 function_id, u64 arg0, u64 arg1,
+ u64 arg2, u64 arg3, u64 arg4, u64 arg5,
+ u64 arg6, struct arm_smccc_res *res);
/**
* smccc_smc - Invoke a SMCCC function using the smc conduit
@@ -228,11 +298,94 @@ void smccc_hvc(uint32_t function_id, uint64_t arg0, uint64_t arg1,
* @res: pointer to write the return values from registers x0-x3
*
*/
-void smccc_smc(uint32_t function_id, uint64_t arg0, uint64_t arg1,
- uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5,
- uint64_t arg6, struct arm_smccc_res *res);
+void smccc_smc(u32 function_id, u64 arg0, u64 arg1,
+ u64 arg2, u64 arg3, u64 arg4, u64 arg5,
+ u64 arg6, struct arm_smccc_res *res);
/* Execute a Wait For Interrupt instruction. */
void wfi(void);
+void test_wants_mte(void);
+void test_disable_default_vgic(void);
+
+bool vm_supports_el2(struct kvm_vm *vm);
+
+static inline bool test_supports_el2(void)
+{
+ struct kvm_vm *vm = vm_create(1);
+ bool supported = vm_supports_el2(vm);
+
+ kvm_vm_free(vm);
+ return supported;
+}
+
+static inline bool vcpu_has_el2(struct kvm_vcpu *vcpu)
+{
+ return vcpu->init.features[0] & BIT(KVM_ARM_VCPU_HAS_EL2);
+}
+
+#define MAPPED_EL2_SYSREG(el2, el1) \
+ case SYS_##el1: \
+ if (vcpu_has_el2(vcpu)) \
+ alias = SYS_##el2; \
+ break
+
+
+static __always_inline u64 ctxt_reg_alias(struct kvm_vcpu *vcpu, u32 encoding)
+{
+ u32 alias = encoding;
+
+ BUILD_BUG_ON(!__builtin_constant_p(encoding));
+
+ switch (encoding) {
+ MAPPED_EL2_SYSREG(SCTLR_EL2, SCTLR_EL1);
+ MAPPED_EL2_SYSREG(CPTR_EL2, CPACR_EL1);
+ MAPPED_EL2_SYSREG(TTBR0_EL2, TTBR0_EL1);
+ MAPPED_EL2_SYSREG(TTBR1_EL2, TTBR1_EL1);
+ MAPPED_EL2_SYSREG(TCR_EL2, TCR_EL1);
+ MAPPED_EL2_SYSREG(VBAR_EL2, VBAR_EL1);
+ MAPPED_EL2_SYSREG(AFSR0_EL2, AFSR0_EL1);
+ MAPPED_EL2_SYSREG(AFSR1_EL2, AFSR1_EL1);
+ MAPPED_EL2_SYSREG(ESR_EL2, ESR_EL1);
+ MAPPED_EL2_SYSREG(FAR_EL2, FAR_EL1);
+ MAPPED_EL2_SYSREG(MAIR_EL2, MAIR_EL1);
+ MAPPED_EL2_SYSREG(TCR2_EL2, TCR2_EL1);
+ MAPPED_EL2_SYSREG(PIR_EL2, PIR_EL1);
+ MAPPED_EL2_SYSREG(PIRE0_EL2, PIRE0_EL1);
+ MAPPED_EL2_SYSREG(POR_EL2, POR_EL1);
+ MAPPED_EL2_SYSREG(AMAIR_EL2, AMAIR_EL1);
+ MAPPED_EL2_SYSREG(ELR_EL2, ELR_EL1);
+ MAPPED_EL2_SYSREG(SPSR_EL2, SPSR_EL1);
+ MAPPED_EL2_SYSREG(ZCR_EL2, ZCR_EL1);
+ MAPPED_EL2_SYSREG(CONTEXTIDR_EL2, CONTEXTIDR_EL1);
+ MAPPED_EL2_SYSREG(SCTLR2_EL2, SCTLR2_EL1);
+ MAPPED_EL2_SYSREG(CNTHCTL_EL2, CNTKCTL_EL1);
+ case SYS_SP_EL1:
+ if (!vcpu_has_el2(vcpu))
+ return ARM64_CORE_REG(sp_el1);
+
+ alias = SYS_SP_EL2;
+ break;
+ default:
+ BUILD_BUG();
+ }
+
+ return KVM_ARM64_SYS_REG(alias);
+}
+
+void kvm_get_default_vcpu_target(struct kvm_vm *vm, struct kvm_vcpu_init *init);
+
+static inline unsigned int get_current_el(void)
+{
+ return (read_sysreg(CurrentEL) >> 2) & 0x3;
+}
+
+#define do_smccc(...) \
+do { \
+ if (get_current_el() == 2) \
+ smccc_smc(__VA_ARGS__); \
+ else \
+ smccc_hvc(__VA_ARGS__); \
+} while (0)
+
#endif /* SELFTEST_KVM_PROCESSOR_H */
diff --git a/tools/testing/selftests/kvm/include/arm64/ucall.h b/tools/testing/selftests/kvm/include/arm64/ucall.h
index 4ec801f37f00..2210d3d94c40 100644
--- a/tools/testing/selftests/kvm/include/arm64/ucall.h
+++ b/tools/testing/selftests/kvm/include/arm64/ucall.h
@@ -10,9 +10,9 @@
* ucall_exit_mmio_addr holds per-VM values (global data is duplicated by each
* VM), it must not be accessed from host code.
*/
-extern vm_vaddr_t *ucall_exit_mmio_addr;
+extern gva_t *ucall_exit_mmio_addr;
-static inline void ucall_arch_do_ucall(vm_vaddr_t uc)
+static inline void ucall_arch_do_ucall(gva_t uc)
{
WRITE_ONCE(*ucall_exit_mmio_addr, uc);
}
diff --git a/tools/testing/selftests/kvm/include/arm64/vgic.h b/tools/testing/selftests/kvm/include/arm64/vgic.h
index c481d0c00a5d..1f8b04373987 100644
--- a/tools/testing/selftests/kvm/include/arm64/vgic.h
+++ b/tools/testing/selftests/kvm/include/arm64/vgic.h
@@ -11,24 +11,27 @@
#include "kvm_util.h"
#define REDIST_REGION_ATTR_ADDR(count, base, flags, index) \
- (((uint64_t)(count) << 52) | \
- ((uint64_t)((base) >> 16) << 16) | \
- ((uint64_t)(flags) << 12) | \
+ (((u64)(count) << 52) | \
+ ((u64)((base) >> 16) << 16) | \
+ ((u64)(flags) << 12) | \
index)
-int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs);
+bool kvm_supports_vgic_v3(void);
+int __vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, u32 nr_irqs);
+void __vgic_v3_init(int fd);
+int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, u32 nr_irqs);
#define VGIC_MAX_RESERVED 1023
-void kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level);
-int _kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level);
+void kvm_irq_set_level_info(int gic_fd, u32 intid, int level);
+int _kvm_irq_set_level_info(int gic_fd, u32 intid, int level);
-void kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level);
-int _kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level);
+void kvm_arm_irq_line(struct kvm_vm *vm, u32 intid, int level);
+int _kvm_arm_irq_line(struct kvm_vm *vm, u32 intid, int level);
/* The vcpu arg only applies to private interrupts. */
-void kvm_irq_write_ispendr(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu);
-void kvm_irq_write_isactiver(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu);
+void kvm_irq_write_ispendr(int gic_fd, u32 intid, struct kvm_vcpu *vcpu);
+void kvm_irq_write_isactiver(int gic_fd, u32 intid, struct kvm_vcpu *vcpu);
#define KVM_IRQCHIP_NUM_PINS (1020 - 32)
diff --git a/tools/testing/selftests/kvm/include/kvm_syscalls.h b/tools/testing/selftests/kvm/include/kvm_syscalls.h
new file mode 100644
index 000000000000..067a4c9cf452
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/kvm_syscalls.h
@@ -0,0 +1,92 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_SYSCALLS_H
+#define SELFTEST_KVM_SYSCALLS_H
+
+/*
+ * Include both the kernel and libc versions of mman.h. The kernel provides
+ * the most up-to-date flags and definitions, while libc provides the syscall
+ * wrappers tests expect.
+ */
+#include <linux/mman.h>
+
+#include <sys/mman.h>
+#include <sys/syscall.h>
+
+#include <test_util.h>
+
+#define MAP_ARGS0(m,...)
+#define MAP_ARGS1(m,t,a,...) m(t,a)
+#define MAP_ARGS2(m,t,a,...) m(t,a), MAP_ARGS1(m,__VA_ARGS__)
+#define MAP_ARGS3(m,t,a,...) m(t,a), MAP_ARGS2(m,__VA_ARGS__)
+#define MAP_ARGS4(m,t,a,...) m(t,a), MAP_ARGS3(m,__VA_ARGS__)
+#define MAP_ARGS5(m,t,a,...) m(t,a), MAP_ARGS4(m,__VA_ARGS__)
+#define MAP_ARGS6(m,t,a,...) m(t,a), MAP_ARGS5(m,__VA_ARGS__)
+#define MAP_ARGS(n,...) MAP_ARGS##n(__VA_ARGS__)
+
+#define __DECLARE_ARGS(t, a) t a
+#define __UNPACK_ARGS(t, a) a
+
+#define DECLARE_ARGS(nr_args, args...) MAP_ARGS(nr_args, __DECLARE_ARGS, args)
+#define UNPACK_ARGS(nr_args, args...) MAP_ARGS(nr_args, __UNPACK_ARGS, args)
+
+#define __KVM_SYSCALL_ERROR(_name, _ret) \
+ "%s failed, rc: %i errno: %i (%s)", (_name), (_ret), errno, strerror(errno)
+
+/* Define a kvm_<syscall>() API to assert success. */
+#define __KVM_SYSCALL_DEFINE(name, nr_args, args...) \
+static inline void kvm_##name(DECLARE_ARGS(nr_args, args)) \
+{ \
+ int r; \
+ \
+ r = name(UNPACK_ARGS(nr_args, args)); \
+ TEST_ASSERT(!r, __KVM_SYSCALL_ERROR(#name, r)); \
+}
+
+/*
+ * Macro to define syscall APIs, either because KVM selftests doesn't link to
+ * the standard library, e.g. libnuma, or because there is no library that yet
+ * provides the syscall. These
+ */
+#define KVM_SYSCALL_DEFINE(name, nr_args, args...) \
+static inline long name(DECLARE_ARGS(nr_args, args)) \
+{ \
+ return syscall(__NR_##name, UNPACK_ARGS(nr_args, args)); \
+} \
+__KVM_SYSCALL_DEFINE(name, nr_args, args)
+
+/*
+ * Special case mmap(), as KVM selftest rarely/never specific an address,
+ * rarely specify an offset, and because the unique return code requires
+ * special handling anyways.
+ */
+static inline void *__kvm_mmap(size_t size, int prot, int flags, int fd,
+ off_t offset)
+{
+ void *mem;
+
+ mem = mmap(NULL, size, prot, flags, fd, offset);
+ TEST_ASSERT(mem != MAP_FAILED, __KVM_SYSCALL_ERROR("mmap()",
+ (int)(unsigned long)MAP_FAILED));
+ return mem;
+}
+
+static inline void *kvm_mmap(size_t size, int prot, int flags, int fd)
+{
+ return __kvm_mmap(size, prot, flags, fd, 0);
+}
+
+static inline int kvm_dup(int fd)
+{
+ int new_fd = dup(fd);
+
+ TEST_ASSERT(new_fd >= 0, __KVM_SYSCALL_ERROR("dup()", new_fd));
+ return new_fd;
+}
+
+__KVM_SYSCALL_DEFINE(munmap, 2, void *, mem, size_t, size);
+__KVM_SYSCALL_DEFINE(close, 1, int, fd);
+__KVM_SYSCALL_DEFINE(fallocate, 4, int, fd, int, mode, loff_t, offset, loff_t, len);
+__KVM_SYSCALL_DEFINE(ftruncate, 2, unsigned int, fd, off_t, length);
+__KVM_SYSCALL_DEFINE(madvise, 3, void *, addr, size_t, length, int, advice);
+
+#endif /* SELFTEST_KVM_SYSCALLS_H */
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index 4c4e5a847f67..2ecaaa0e9965 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -18,8 +18,12 @@
#include <asm/atomic.h>
#include <asm/kvm.h>
+#include <sys/eventfd.h>
#include <sys/ioctl.h>
+#include <pthread.h>
+
+#include "kvm_syscalls.h"
#include "kvm_util_arch.h"
#include "kvm_util_types.h"
#include "sparsebit.h"
@@ -46,18 +50,28 @@ struct userspace_mem_region {
struct hlist_node slot_node;
};
+struct kvm_binary_stats {
+ int fd;
+ struct kvm_stats_header header;
+ struct kvm_stats_desc *desc;
+};
+
struct kvm_vcpu {
struct list_head list;
- uint32_t id;
+ u32 id;
int fd;
struct kvm_vm *vm;
struct kvm_run *run;
#ifdef __x86_64__
struct kvm_cpuid2 *cpuid;
#endif
+#ifdef __aarch64__
+ struct kvm_vcpu_init init;
+#endif
+ struct kvm_binary_stats stats;
struct kvm_dirty_gfn *dirty_gfns;
- uint32_t fetch_index;
- uint32_t dirty_gfns_count;
+ u32 fetch_index;
+ u32 dirty_gfns_count;
};
struct userspace_mem_regions {
@@ -74,42 +88,51 @@ enum kvm_mem_region_type {
NR_MEM_REGIONS,
};
+struct kvm_mmu {
+ bool pgd_created;
+ u64 pgd;
+ int pgtable_levels;
+
+ struct kvm_mmu_arch arch;
+};
+
struct kvm_vm {
int mode;
unsigned long type;
int kvm_fd;
int fd;
- unsigned int pgtable_levels;
unsigned int page_size;
unsigned int page_shift;
unsigned int pa_bits;
unsigned int va_bits;
- uint64_t max_gfn;
+ u64 max_gfn;
struct list_head vcpus;
struct userspace_mem_regions regions;
struct sparsebit *vpages_valid;
struct sparsebit *vpages_mapped;
bool has_irqchip;
- bool pgd_created;
- vm_paddr_t ucall_mmio_addr;
- vm_paddr_t pgd;
- vm_vaddr_t handlers;
- uint32_t dirty_ring_size;
- uint64_t gpa_tag_mask;
+ gpa_t ucall_mmio_addr;
+ gva_t handlers;
+ u32 dirty_ring_size;
+ gpa_t gpa_tag_mask;
+
+ /*
+ * "mmu" is the guest's stage-1, with a short name because the vast
+ * majority of tests only care about the stage-1 MMU.
+ */
+ struct kvm_mmu mmu;
+ struct kvm_mmu stage2_mmu;
struct kvm_vm_arch arch;
- /* Cache of information for binary stats interface */
- int stats_fd;
- struct kvm_stats_header stats_header;
- struct kvm_stats_desc *stats_desc;
+ struct kvm_binary_stats stats;
/*
* KVM region slots. These are the default memslots used by page
* allocators, e.g., lib/elf uses the memslots[MEM_REGION_CODE]
* memslot.
*/
- uint32_t memslots[NR_MEM_REGIONS];
+ u32 memslots[NR_MEM_REGIONS];
};
struct vcpu_reg_sublist {
@@ -141,7 +164,7 @@ struct vcpu_reg_list {
else
struct userspace_mem_region *
-memslot2region(struct kvm_vm *vm, uint32_t memslot);
+memslot2region(struct kvm_vm *vm, u32 memslot);
static inline struct userspace_mem_region *vm_get_mem_region(struct kvm_vm *vm,
enum kvm_mem_region_type type)
@@ -167,24 +190,36 @@ enum vm_guest_mode {
VM_MODE_P40V48_4K,
VM_MODE_P40V48_16K,
VM_MODE_P40V48_64K,
- VM_MODE_PXXV48_4K, /* For 48bits VA but ANY bits PA */
+ VM_MODE_PXXVYY_4K, /* For 48-bit or 57-bit VA, depending on host support */
VM_MODE_P47V64_4K,
VM_MODE_P44V64_4K,
VM_MODE_P36V48_4K,
VM_MODE_P36V48_16K,
VM_MODE_P36V48_64K,
+ VM_MODE_P47V47_16K,
VM_MODE_P36V47_16K,
+
+ VM_MODE_P56V57_4K, /* For riscv64 */
+ VM_MODE_P56V48_4K,
+ VM_MODE_P56V39_4K,
+ VM_MODE_P50V57_4K,
+ VM_MODE_P50V48_4K,
+ VM_MODE_P50V39_4K,
+ VM_MODE_P41V57_4K,
+ VM_MODE_P41V48_4K,
+ VM_MODE_P41V39_4K,
+
NUM_VM_MODES,
};
struct vm_shape {
- uint32_t type;
- uint8_t mode;
- uint8_t pad0;
- uint16_t pad1;
+ u32 type;
+ u8 mode;
+ u8 pad0;
+ u16 pad1;
};
-kvm_static_assert(sizeof(struct vm_shape) == sizeof(uint64_t));
+kvm_static_assert(sizeof(struct vm_shape) == sizeof(u64));
#define VM_TYPE_DEFAULT 0
@@ -198,17 +233,17 @@ kvm_static_assert(sizeof(struct vm_shape) == sizeof(uint64_t));
shape; \
})
-#if defined(__aarch64__)
-
extern enum vm_guest_mode vm_mode_default;
+#if defined(__aarch64__)
+
#define VM_MODE_DEFAULT vm_mode_default
#define MIN_PAGE_SHIFT 12U
#define ptes_per_page(page_size) ((page_size) / 8)
#elif defined(__x86_64__)
-#define VM_MODE_DEFAULT VM_MODE_PXXV48_4K
+#define VM_MODE_DEFAULT VM_MODE_PXXVYY_4K
#define MIN_PAGE_SHIFT 12U
#define ptes_per_page(page_size) ((page_size) / 8)
@@ -224,7 +259,12 @@ extern enum vm_guest_mode vm_mode_default;
#error "RISC-V 32-bit kvm selftests not supported"
#endif
-#define VM_MODE_DEFAULT VM_MODE_P40V48_4K
+#define VM_MODE_DEFAULT vm_mode_default
+#define MIN_PAGE_SHIFT 12U
+#define ptes_per_page(page_size) ((page_size) / 8)
+
+#elif defined(__loongarch__)
+#define VM_MODE_DEFAULT VM_MODE_P47V47_16K
#define MIN_PAGE_SHIFT 12U
#define ptes_per_page(page_size) ((page_size) / 8)
@@ -243,16 +283,22 @@ struct vm_guest_mode_params {
};
extern const struct vm_guest_mode_params vm_guest_mode_params[];
+int __open_path_or_exit(const char *path, int flags, const char *enoent_help);
int open_path_or_exit(const char *path, int flags);
int open_kvm_dev_path_or_exit(void);
-bool get_kvm_param_bool(const char *param);
-bool get_kvm_intel_param_bool(const char *param);
-bool get_kvm_amd_param_bool(const char *param);
+int kvm_get_module_param_integer(const char *module_name, const char *param);
+bool kvm_get_module_param_bool(const char *module_name, const char *param);
-int get_kvm_param_integer(const char *param);
-int get_kvm_intel_param_integer(const char *param);
-int get_kvm_amd_param_integer(const char *param);
+static inline bool get_kvm_param_bool(const char *param)
+{
+ return kvm_get_module_param_bool("kvm", param);
+}
+
+static inline int get_kvm_param_integer(const char *param)
+{
+ return kvm_get_module_param_integer("kvm", param);
+}
unsigned int kvm_check_cap(long cap);
@@ -261,9 +307,6 @@ static inline bool kvm_has_cap(long cap)
return kvm_check_cap(cap);
}
-#define __KVM_SYSCALL_ERROR(_name, _ret) \
- "%s failed, rc: %i errno: %i (%s)", (_name), (_ret), errno, strerror(errno)
-
/*
* Use the "inner", double-underscore macro when reporting errors from within
* other macros so that the name of ioctl() and not its literal numeric value
@@ -361,21 +404,22 @@ static inline int vm_check_cap(struct kvm_vm *vm, long cap)
return ret;
}
-static inline int __vm_enable_cap(struct kvm_vm *vm, uint32_t cap, uint64_t arg0)
+static inline int __vm_enable_cap(struct kvm_vm *vm, u32 cap, u64 arg0)
{
struct kvm_enable_cap enable_cap = { .cap = cap, .args = { arg0 } };
return __vm_ioctl(vm, KVM_ENABLE_CAP, &enable_cap);
}
-static inline void vm_enable_cap(struct kvm_vm *vm, uint32_t cap, uint64_t arg0)
+
+static inline void vm_enable_cap(struct kvm_vm *vm, u32 cap, u64 arg0)
{
struct kvm_enable_cap enable_cap = { .cap = cap, .args = { arg0 } };
vm_ioctl(vm, KVM_ENABLE_CAP, &enable_cap);
}
-static inline void vm_set_memory_attributes(struct kvm_vm *vm, uint64_t gpa,
- uint64_t size, uint64_t attributes)
+static inline void vm_set_memory_attributes(struct kvm_vm *vm, gpa_t gpa,
+ u64 size, u64 attributes)
{
struct kvm_memory_attributes attr = {
.attributes = attributes,
@@ -395,35 +439,35 @@ static inline void vm_set_memory_attributes(struct kvm_vm *vm, uint64_t gpa,
}
-static inline void vm_mem_set_private(struct kvm_vm *vm, uint64_t gpa,
- uint64_t size)
+static inline void vm_mem_set_private(struct kvm_vm *vm, gpa_t gpa,
+ u64 size)
{
vm_set_memory_attributes(vm, gpa, size, KVM_MEMORY_ATTRIBUTE_PRIVATE);
}
-static inline void vm_mem_set_shared(struct kvm_vm *vm, uint64_t gpa,
- uint64_t size)
+static inline void vm_mem_set_shared(struct kvm_vm *vm, gpa_t gpa,
+ u64 size)
{
vm_set_memory_attributes(vm, gpa, size, 0);
}
-void vm_guest_mem_fallocate(struct kvm_vm *vm, uint64_t gpa, uint64_t size,
+void vm_guest_mem_fallocate(struct kvm_vm *vm, gpa_t gpa, u64 size,
bool punch_hole);
-static inline void vm_guest_mem_punch_hole(struct kvm_vm *vm, uint64_t gpa,
- uint64_t size)
+static inline void vm_guest_mem_punch_hole(struct kvm_vm *vm, gpa_t gpa,
+ u64 size)
{
vm_guest_mem_fallocate(vm, gpa, size, true);
}
-static inline void vm_guest_mem_allocate(struct kvm_vm *vm, uint64_t gpa,
- uint64_t size)
+static inline void vm_guest_mem_allocate(struct kvm_vm *vm, gpa_t gpa,
+ u64 size)
{
vm_guest_mem_fallocate(vm, gpa, size, false);
}
-void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size);
-const char *vm_guest_mode_string(uint32_t i);
+void vm_enable_dirty_ring(struct kvm_vm *vm, u32 ring_size);
+const char *vm_guest_mode_string(u32 i);
void kvm_vm_free(struct kvm_vm *vmp);
void kvm_vm_restart(struct kvm_vm *vmp);
@@ -431,7 +475,7 @@ void kvm_vm_release(struct kvm_vm *vmp);
void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename);
int kvm_memfd_alloc(size_t size, bool hugepages);
-void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent);
+void vm_dump(FILE *stream, struct kvm_vm *vm, u8 indent);
static inline void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log)
{
@@ -441,7 +485,7 @@ static inline void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log)
}
static inline void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log,
- uint64_t first_page, uint32_t num_pages)
+ u64 first_page, u32 num_pages)
{
struct kvm_clear_dirty_log args = {
.dirty_bitmap = log,
@@ -453,14 +497,14 @@ static inline void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log
vm_ioctl(vm, KVM_CLEAR_DIRTY_LOG, &args);
}
-static inline uint32_t kvm_vm_reset_dirty_ring(struct kvm_vm *vm)
+static inline u32 kvm_vm_reset_dirty_ring(struct kvm_vm *vm)
{
return __vm_ioctl(vm, KVM_RESET_DIRTY_RINGS, NULL);
}
static inline void kvm_vm_register_coalesced_io(struct kvm_vm *vm,
- uint64_t address,
- uint64_t size, bool pio)
+ u64 address,
+ u64 size, bool pio)
{
struct kvm_coalesced_mmio_zone zone = {
.addr = address,
@@ -472,8 +516,8 @@ static inline void kvm_vm_register_coalesced_io(struct kvm_vm *vm,
}
static inline void kvm_vm_unregister_coalesced_io(struct kvm_vm *vm,
- uint64_t address,
- uint64_t size, bool pio)
+ u64 address,
+ u64 size, bool pio)
{
struct kvm_coalesced_mmio_zone zone = {
.addr = address,
@@ -492,6 +536,44 @@ static inline int vm_get_stats_fd(struct kvm_vm *vm)
return fd;
}
+static inline int __kvm_irqfd(struct kvm_vm *vm, u32 gsi, int eventfd,
+ u32 flags)
+{
+ struct kvm_irqfd irqfd = {
+ .fd = eventfd,
+ .gsi = gsi,
+ .flags = flags,
+ .resamplefd = -1,
+ };
+
+ return __vm_ioctl(vm, KVM_IRQFD, &irqfd);
+}
+
+static inline void kvm_irqfd(struct kvm_vm *vm, u32 gsi, int eventfd, u32 flags)
+{
+ int ret = __kvm_irqfd(vm, gsi, eventfd, flags);
+
+ TEST_ASSERT_VM_VCPU_IOCTL(!ret, KVM_IRQFD, ret, vm);
+}
+
+static inline void kvm_assign_irqfd(struct kvm_vm *vm, u32 gsi, int eventfd)
+{
+ kvm_irqfd(vm, gsi, eventfd, 0);
+}
+
+static inline void kvm_deassign_irqfd(struct kvm_vm *vm, u32 gsi, int eventfd)
+{
+ kvm_irqfd(vm, gsi, eventfd, KVM_IRQFD_FLAG_DEASSIGN);
+}
+
+static inline int kvm_new_eventfd(void)
+{
+ int fd = eventfd(0, 0);
+
+ TEST_ASSERT(fd >= 0, __KVM_SYSCALL_ERROR("eventfd()", fd));
+ return fd;
+}
+
static inline void read_stats_header(int stats_fd, struct kvm_stats_header *header)
{
ssize_t ret;
@@ -528,24 +610,62 @@ static inline struct kvm_stats_desc *get_stats_descriptor(struct kvm_stats_desc
}
void read_stat_data(int stats_fd, struct kvm_stats_header *header,
- struct kvm_stats_desc *desc, uint64_t *data,
+ struct kvm_stats_desc *desc, u64 *data,
size_t max_elements);
-void __vm_get_stat(struct kvm_vm *vm, const char *stat_name, uint64_t *data,
- size_t max_elements);
+void kvm_get_stat(struct kvm_binary_stats *stats, const char *name,
+ u64 *data, size_t max_elements);
-static inline uint64_t vm_get_stat(struct kvm_vm *vm, const char *stat_name)
+#define __get_stat(stats, stat) \
+({ \
+ u64 data; \
+ \
+ kvm_get_stat(stats, #stat, &data, 1); \
+ data; \
+})
+
+#define vm_get_stat(vm, stat) __get_stat(&(vm)->stats, stat)
+#define vcpu_get_stat(vcpu, stat) __get_stat(&(vcpu)->stats, stat)
+
+static inline bool read_smt_control(char *buf, size_t buf_size)
+{
+ FILE *f = fopen("/sys/devices/system/cpu/smt/control", "r");
+ bool ret;
+
+ if (!f)
+ return false;
+
+ ret = fread(buf, sizeof(*buf), buf_size, f) > 0;
+ fclose(f);
+
+ return ret;
+}
+
+static inline bool is_smt_possible(void)
+{
+ char buf[16];
+
+ if (read_smt_control(buf, sizeof(buf)) &&
+ (!strncmp(buf, "forceoff", 8) || !strncmp(buf, "notsupported", 12)))
+ return false;
+
+ return true;
+}
+
+static inline bool is_smt_on(void)
{
- uint64_t data;
+ char buf[16];
+
+ if (read_smt_control(buf, sizeof(buf)) && !strncmp(buf, "on", 2))
+ return true;
- __vm_get_stat(vm, stat_name, &data, 1);
- return data;
+ return false;
}
void vm_create_irqchip(struct kvm_vm *vm);
-static inline int __vm_create_guest_memfd(struct kvm_vm *vm, uint64_t size,
- uint64_t flags)
+static inline int __vm_create_guest_memfd(struct kvm_vm *vm, u64 size,
+ u64 flags)
{
struct kvm_create_guest_memfd guest_memfd = {
.size = size,
@@ -555,8 +675,8 @@ static inline int __vm_create_guest_memfd(struct kvm_vm *vm, uint64_t size,
return __vm_ioctl(vm, KVM_CREATE_GUEST_MEMFD, &guest_memfd);
}
-static inline int vm_create_guest_memfd(struct kvm_vm *vm, uint64_t size,
- uint64_t flags)
+static inline int vm_create_guest_memfd(struct kvm_vm *vm, u64 size,
+ u64 flags)
{
int fd = __vm_create_guest_memfd(vm, size, flags);
@@ -564,24 +684,23 @@ static inline int vm_create_guest_memfd(struct kvm_vm *vm, uint64_t size,
return fd;
}
-void vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags,
- uint64_t gpa, uint64_t size, void *hva);
-int __vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags,
- uint64_t gpa, uint64_t size, void *hva);
-void vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags,
- uint64_t gpa, uint64_t size, void *hva,
- uint32_t guest_memfd, uint64_t guest_memfd_offset);
-int __vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags,
- uint64_t gpa, uint64_t size, void *hva,
- uint32_t guest_memfd, uint64_t guest_memfd_offset);
+void vm_set_user_memory_region(struct kvm_vm *vm, u32 slot, u32 flags,
+ gpa_t gpa, u64 size, void *hva);
+int __vm_set_user_memory_region(struct kvm_vm *vm, u32 slot, u32 flags,
+ gpa_t gpa, u64 size, void *hva);
+void vm_set_user_memory_region2(struct kvm_vm *vm, u32 slot, u32 flags,
+ gpa_t gpa, u64 size, void *hva,
+ u32 guest_memfd, u64 guest_memfd_offset);
+int __vm_set_user_memory_region2(struct kvm_vm *vm, u32 slot, u32 flags,
+ gpa_t gpa, u64 size, void *hva,
+ u32 guest_memfd, u64 guest_memfd_offset);
void vm_userspace_mem_region_add(struct kvm_vm *vm,
- enum vm_mem_backing_src_type src_type,
- uint64_t guest_paddr, uint32_t slot, uint64_t npages,
- uint32_t flags);
+ enum vm_mem_backing_src_type src_type,
+ gpa_t gpa, u32 slot, u64 npages, u32 flags);
void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
- uint64_t guest_paddr, uint32_t slot, uint64_t npages,
- uint32_t flags, int guest_memfd_fd, uint64_t guest_memfd_offset);
+ gpa_t gpa, u32 slot, u64 npages, u32 flags,
+ int guest_memfd_fd, u64 guest_memfd_offset);
#ifndef vm_arch_has_protected_memory
static inline bool vm_arch_has_protected_memory(struct kvm_vm *vm)
@@ -590,35 +709,34 @@ static inline bool vm_arch_has_protected_memory(struct kvm_vm *vm)
}
#endif
-void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags);
-void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa);
-void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot);
-struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id);
-void vm_populate_vaddr_bitmap(struct kvm_vm *vm);
-vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min);
-vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min);
-vm_vaddr_t __vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
- enum kvm_mem_region_type type);
-vm_vaddr_t vm_vaddr_alloc_shared(struct kvm_vm *vm, size_t sz,
- vm_vaddr_t vaddr_min,
- enum kvm_mem_region_type type);
-vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages);
-vm_vaddr_t __vm_vaddr_alloc_page(struct kvm_vm *vm,
- enum kvm_mem_region_type type);
-vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm);
-
-void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+void vm_mem_region_set_flags(struct kvm_vm *vm, u32 slot, u32 flags);
+void vm_mem_region_reload(struct kvm_vm *vm, u32 slot);
+void vm_mem_region_move(struct kvm_vm *vm, u32 slot, u64 new_gpa);
+void vm_mem_region_delete(struct kvm_vm *vm, u32 slot);
+struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, u32 vcpu_id);
+void vm_populate_gva_bitmap(struct kvm_vm *vm);
+gva_t vm_unused_gva_gap(struct kvm_vm *vm, size_t sz, gva_t min_gva);
+gva_t vm_alloc(struct kvm_vm *vm, size_t sz, gva_t min_gva);
+gva_t __vm_alloc(struct kvm_vm *vm, size_t sz, gva_t min_gva,
+ enum kvm_mem_region_type type);
+gva_t vm_alloc_shared(struct kvm_vm *vm, size_t sz, gva_t min_gva,
+ enum kvm_mem_region_type type);
+gva_t vm_alloc_pages(struct kvm_vm *vm, int nr_pages);
+gva_t __vm_alloc_page(struct kvm_vm *vm, enum kvm_mem_region_type type);
+gva_t vm_alloc_page(struct kvm_vm *vm);
+
+void virt_map(struct kvm_vm *vm, gva_t gva, gpa_t gpa,
unsigned int npages);
-void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa);
-void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva);
-vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva);
-void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa);
+void *addr_gpa2hva(struct kvm_vm *vm, gpa_t gpa);
+void *addr_gva2hva(struct kvm_vm *vm, gva_t gva);
+gpa_t addr_hva2gpa(struct kvm_vm *vm, void *hva);
+void *addr_gpa2alias(struct kvm_vm *vm, gpa_t gpa);
#ifndef vcpu_arch_put_guest
#define vcpu_arch_put_guest(mem, val) do { (mem) = (val); } while (0)
#endif
-static inline vm_paddr_t vm_untag_gpa(struct kvm_vm *vm, vm_paddr_t gpa)
+static inline gpa_t vm_untag_gpa(struct kvm_vm *vm, gpa_t gpa)
{
return gpa & ~vm->gpa_tag_mask;
}
@@ -634,8 +752,8 @@ static inline int __vcpu_run(struct kvm_vcpu *vcpu)
void vcpu_run_complete_io(struct kvm_vcpu *vcpu);
struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vcpu *vcpu);
-static inline void vcpu_enable_cap(struct kvm_vcpu *vcpu, uint32_t cap,
- uint64_t arg0)
+static inline void vcpu_enable_cap(struct kvm_vcpu *vcpu, u32 cap,
+ u64 arg0)
{
struct kvm_enable_cap enable_cap = { .cap = cap, .args = { arg0 } };
@@ -690,31 +808,34 @@ static inline void vcpu_fpu_set(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
vcpu_ioctl(vcpu, KVM_SET_FPU, fpu);
}
-static inline int __vcpu_get_reg(struct kvm_vcpu *vcpu, uint64_t id, void *addr)
+static inline int __vcpu_get_reg(struct kvm_vcpu *vcpu, u64 id, void *addr)
{
- struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)addr };
+ struct kvm_one_reg reg = { .id = id, .addr = (u64)addr };
return __vcpu_ioctl(vcpu, KVM_GET_ONE_REG, &reg);
}
-static inline int __vcpu_set_reg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t val)
+
+static inline int __vcpu_set_reg(struct kvm_vcpu *vcpu, u64 id, u64 val)
{
- struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)&val };
+ struct kvm_one_reg reg = { .id = id, .addr = (u64)&val };
return __vcpu_ioctl(vcpu, KVM_SET_ONE_REG, &reg);
}
-static inline uint64_t vcpu_get_reg(struct kvm_vcpu *vcpu, uint64_t id)
+
+static inline u64 vcpu_get_reg(struct kvm_vcpu *vcpu, u64 id)
{
- uint64_t val;
- struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)&val };
+ u64 val;
+ struct kvm_one_reg reg = { .id = id, .addr = (u64)&val };
TEST_ASSERT(KVM_REG_SIZE(id) <= sizeof(val), "Reg %lx too big", id);
vcpu_ioctl(vcpu, KVM_GET_ONE_REG, &reg);
return val;
}
-static inline void vcpu_set_reg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t val)
+
+static inline void vcpu_set_reg(struct kvm_vcpu *vcpu, u64 id, u64 val)
{
- struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)&val };
+ struct kvm_one_reg reg = { .id = id, .addr = (u64)&val };
TEST_ASSERT(KVM_REG_SIZE(id) <= sizeof(val), "Reg %lx too big", id);
@@ -759,75 +880,75 @@ static inline int vcpu_get_stats_fd(struct kvm_vcpu *vcpu)
return fd;
}
-int __kvm_has_device_attr(int dev_fd, uint32_t group, uint64_t attr);
+int __kvm_has_device_attr(int dev_fd, u32 group, u64 attr);
-static inline void kvm_has_device_attr(int dev_fd, uint32_t group, uint64_t attr)
+static inline void kvm_has_device_attr(int dev_fd, u32 group, u64 attr)
{
int ret = __kvm_has_device_attr(dev_fd, group, attr);
TEST_ASSERT(!ret, "KVM_HAS_DEVICE_ATTR failed, rc: %i errno: %i", ret, errno);
}
-int __kvm_device_attr_get(int dev_fd, uint32_t group, uint64_t attr, void *val);
+int __kvm_device_attr_get(int dev_fd, u32 group, u64 attr, void *val);
-static inline void kvm_device_attr_get(int dev_fd, uint32_t group,
- uint64_t attr, void *val)
+static inline void kvm_device_attr_get(int dev_fd, u32 group,
+ u64 attr, void *val)
{
int ret = __kvm_device_attr_get(dev_fd, group, attr, val);
TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_GET_DEVICE_ATTR, ret));
}
-int __kvm_device_attr_set(int dev_fd, uint32_t group, uint64_t attr, void *val);
+int __kvm_device_attr_set(int dev_fd, u32 group, u64 attr, void *val);
-static inline void kvm_device_attr_set(int dev_fd, uint32_t group,
- uint64_t attr, void *val)
+static inline void kvm_device_attr_set(int dev_fd, u32 group,
+ u64 attr, void *val)
{
int ret = __kvm_device_attr_set(dev_fd, group, attr, val);
TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_SET_DEVICE_ATTR, ret));
}
-static inline int __vcpu_has_device_attr(struct kvm_vcpu *vcpu, uint32_t group,
- uint64_t attr)
+static inline int __vcpu_has_device_attr(struct kvm_vcpu *vcpu, u32 group,
+ u64 attr)
{
return __kvm_has_device_attr(vcpu->fd, group, attr);
}
-static inline void vcpu_has_device_attr(struct kvm_vcpu *vcpu, uint32_t group,
- uint64_t attr)
+static inline void vcpu_has_device_attr(struct kvm_vcpu *vcpu, u32 group,
+ u64 attr)
{
kvm_has_device_attr(vcpu->fd, group, attr);
}
-static inline int __vcpu_device_attr_get(struct kvm_vcpu *vcpu, uint32_t group,
- uint64_t attr, void *val)
+static inline int __vcpu_device_attr_get(struct kvm_vcpu *vcpu, u32 group,
+ u64 attr, void *val)
{
return __kvm_device_attr_get(vcpu->fd, group, attr, val);
}
-static inline void vcpu_device_attr_get(struct kvm_vcpu *vcpu, uint32_t group,
- uint64_t attr, void *val)
+static inline void vcpu_device_attr_get(struct kvm_vcpu *vcpu, u32 group,
+ u64 attr, void *val)
{
kvm_device_attr_get(vcpu->fd, group, attr, val);
}
-static inline int __vcpu_device_attr_set(struct kvm_vcpu *vcpu, uint32_t group,
- uint64_t attr, void *val)
+static inline int __vcpu_device_attr_set(struct kvm_vcpu *vcpu, u32 group,
+ u64 attr, void *val)
{
return __kvm_device_attr_set(vcpu->fd, group, attr, val);
}
-static inline void vcpu_device_attr_set(struct kvm_vcpu *vcpu, uint32_t group,
- uint64_t attr, void *val)
+static inline void vcpu_device_attr_set(struct kvm_vcpu *vcpu, u32 group,
+ u64 attr, void *val)
{
kvm_device_attr_set(vcpu->fd, group, attr, val);
}
-int __kvm_test_create_device(struct kvm_vm *vm, uint64_t type);
-int __kvm_create_device(struct kvm_vm *vm, uint64_t type);
+int __kvm_test_create_device(struct kvm_vm *vm, u64 type);
+int __kvm_create_device(struct kvm_vm *vm, u64 type);
-static inline int kvm_create_device(struct kvm_vm *vm, uint64_t type)
+static inline int kvm_create_device(struct kvm_vm *vm, u64 type)
{
int fd = __kvm_create_device(vm, type);
@@ -841,9 +962,9 @@ void *vcpu_map_dirty_ring(struct kvm_vcpu *vcpu);
* VM VCPU Args Set
*
* Input Args:
- * vm - Virtual Machine
+ * vcpu - vCPU
* num - number of arguments
- * ... - arguments, each of type uint64_t
+ * ... - arguments, each of type u64
*
* Output Args: None
*
@@ -851,40 +972,38 @@ void *vcpu_map_dirty_ring(struct kvm_vcpu *vcpu);
*
* Sets the first @num input parameters for the function at @vcpu's entry point,
* per the C calling convention of the architecture, to the values given as
- * variable args. Each of the variable args is expected to be of type uint64_t.
+ * variable args. Each of the variable args is expected to be of type u64.
* The maximum @num can be is specific to the architecture.
*/
void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...);
-void kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level);
-int _kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level);
+void kvm_irq_line(struct kvm_vm *vm, u32 irq, int level);
+int _kvm_irq_line(struct kvm_vm *vm, u32 irq, int level);
#define KVM_MAX_IRQ_ROUTES 4096
struct kvm_irq_routing *kvm_gsi_routing_create(void);
void kvm_gsi_routing_irqchip_add(struct kvm_irq_routing *routing,
- uint32_t gsi, uint32_t pin);
+ u32 gsi, u32 pin);
int _kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing);
void kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing);
const char *exit_reason_str(unsigned int exit_reason);
-vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min,
- uint32_t memslot);
-vm_paddr_t __vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
- vm_paddr_t paddr_min, uint32_t memslot,
- bool protected);
-vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm);
+gpa_t vm_phy_page_alloc(struct kvm_vm *vm, gpa_t min_gpa, u32 memslot);
+gpa_t __vm_phy_pages_alloc(struct kvm_vm *vm, size_t num, gpa_t min_gpa,
+ u32 memslot, bool protected);
+gpa_t vm_alloc_page_table(struct kvm_vm *vm);
-static inline vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
- vm_paddr_t paddr_min, uint32_t memslot)
+static inline gpa_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
+ gpa_t min_gpa, u32 memslot)
{
/*
* By default, allocate memory as protected for VMs that support
* protected memory, as the majority of memory for such VMs is
* protected, i.e. using shared memory is effectively opt-in.
*/
- return __vm_phy_pages_alloc(vm, num, paddr_min, memslot,
+ return __vm_phy_pages_alloc(vm, num, min_gpa, memslot,
vm_arch_has_protected_memory(vm));
}
@@ -895,8 +1014,8 @@ static inline vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
* calculate the amount of memory needed for per-vCPU data, e.g. stacks.
*/
struct kvm_vm *____vm_create(struct vm_shape shape);
-struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus,
- uint64_t nr_extra_pages);
+struct kvm_vm *__vm_create(struct vm_shape shape, u32 nr_runnable_vcpus,
+ u64 nr_extra_pages);
static inline struct kvm_vm *vm_create_barebones(void)
{
@@ -913,16 +1032,16 @@ static inline struct kvm_vm *vm_create_barebones_type(unsigned long type)
return ____vm_create(shape);
}
-static inline struct kvm_vm *vm_create(uint32_t nr_runnable_vcpus)
+static inline struct kvm_vm *vm_create(u32 nr_runnable_vcpus)
{
return __vm_create(VM_SHAPE_DEFAULT, nr_runnable_vcpus, 0);
}
-struct kvm_vm *__vm_create_with_vcpus(struct vm_shape shape, uint32_t nr_vcpus,
- uint64_t extra_mem_pages,
+struct kvm_vm *__vm_create_with_vcpus(struct vm_shape shape, u32 nr_vcpus,
+ u64 extra_mem_pages,
void *guest_code, struct kvm_vcpu *vcpus[]);
-static inline struct kvm_vm *vm_create_with_vcpus(uint32_t nr_vcpus,
+static inline struct kvm_vm *vm_create_with_vcpus(u32 nr_vcpus,
void *guest_code,
struct kvm_vcpu *vcpus[])
{
@@ -933,7 +1052,7 @@ static inline struct kvm_vm *vm_create_with_vcpus(uint32_t nr_vcpus,
struct kvm_vm *__vm_create_shape_with_one_vcpu(struct vm_shape shape,
struct kvm_vcpu **vcpu,
- uint64_t extra_mem_pages,
+ u64 extra_mem_pages,
void *guest_code);
/*
@@ -941,7 +1060,7 @@ struct kvm_vm *__vm_create_shape_with_one_vcpu(struct vm_shape shape,
* additional pages of guest memory. Returns the VM and vCPU (via out param).
*/
static inline struct kvm_vm *__vm_create_with_one_vcpu(struct kvm_vcpu **vcpu,
- uint64_t extra_mem_pages,
+ u64 extra_mem_pages,
void *guest_code)
{
return __vm_create_shape_with_one_vcpu(VM_SHAPE_DEFAULT, vcpu,
@@ -963,9 +1082,38 @@ static inline struct kvm_vm *vm_create_shape_with_one_vcpu(struct vm_shape shape
struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm);
-void kvm_pin_this_task_to_pcpu(uint32_t pcpu);
+void kvm_set_files_rlimit(u32 nr_vcpus);
+
+int __pin_task_to_cpu(pthread_t task, int cpu);
+
+static inline void pin_task_to_cpu(pthread_t task, int cpu)
+{
+ int r;
+
+ r = __pin_task_to_cpu(task, cpu);
+ TEST_ASSERT(!r, "Failed to set thread affinity to pCPU '%u'", cpu);
+}
+
+static inline int pin_task_to_any_cpu(pthread_t task)
+{
+ int cpu = sched_getcpu();
+
+ pin_task_to_cpu(task, cpu);
+ return cpu;
+}
+
+static inline void pin_self_to_cpu(int cpu)
+{
+ pin_task_to_cpu(pthread_self(), cpu);
+}
+
+static inline int pin_self_to_any_cpu(void)
+{
+ return pin_task_to_any_cpu(pthread_self());
+}
+
void kvm_print_vcpu_pinning_help(void);
-void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[],
+void kvm_parse_vcpu_pinning(const char *pcpus_string, u32 vcpu_to_pcpu[],
int nr_vcpus);
unsigned long vm_compute_max_gfn(struct kvm_vm *vm);
@@ -977,20 +1125,16 @@ vm_adjust_num_guest_pages(enum vm_guest_mode mode, unsigned int num_guest_pages)
{
unsigned int n;
n = vm_num_guest_pages(mode, vm_num_host_pages(mode, num_guest_pages));
-#ifdef __s390x__
- /* s390 requires 1M aligned guest sizes */
- n = (n + 255) & ~255;
-#endif
return n;
}
#define sync_global_to_guest(vm, g) ({ \
- typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \
+ typeof(g) *_p = addr_gva2hva(vm, (gva_t)&(g)); \
memcpy(_p, &(g), sizeof(g)); \
})
#define sync_global_from_guest(vm, g) ({ \
- typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \
+ typeof(g) *_p = addr_gva2hva(vm, (gva_t)&(g)); \
memcpy(&(g), _p, sizeof(g)); \
})
@@ -1001,7 +1145,7 @@ vm_adjust_num_guest_pages(enum vm_guest_mode mode, unsigned int num_guest_pages)
* undesirable to change the host's copy of the global.
*/
#define write_guest_global(vm, g, val) ({ \
- typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \
+ typeof(g) *_p = addr_gva2hva(vm, (gva_t)&(g)); \
typeof(g) _val = val; \
\
memcpy(_p, &(_val), sizeof(g)); \
@@ -1010,10 +1154,10 @@ vm_adjust_num_guest_pages(enum vm_guest_mode mode, unsigned int num_guest_pages)
void assert_on_unhandled_exception(struct kvm_vcpu *vcpu);
void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu,
- uint8_t indent);
+ u8 indent);
static inline void vcpu_dump(FILE *stream, struct kvm_vcpu *vcpu,
- uint8_t indent)
+ u8 indent)
{
vcpu_arch_dump(stream, vcpu, indent);
}
@@ -1025,10 +1169,10 @@ static inline void vcpu_dump(FILE *stream, struct kvm_vcpu *vcpu,
* vm - Virtual Machine
* vcpu_id - The id of the VCPU to add to the VM.
*/
-struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id);
+struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, u32 vcpu_id);
void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code);
-static inline struct kvm_vcpu *vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
+static inline struct kvm_vcpu *vm_vcpu_add(struct kvm_vm *vm, u32 vcpu_id,
void *guest_code)
{
struct kvm_vcpu *vcpu = vm_arch_vcpu_add(vm, vcpu_id);
@@ -1039,10 +1183,10 @@ static inline struct kvm_vcpu *vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
}
/* Re-create a vCPU after restarting a VM, e.g. for state save/restore tests. */
-struct kvm_vcpu *vm_arch_vcpu_recreate(struct kvm_vm *vm, uint32_t vcpu_id);
+struct kvm_vcpu *vm_arch_vcpu_recreate(struct kvm_vm *vm, u32 vcpu_id);
static inline struct kvm_vcpu *vm_vcpu_recreate(struct kvm_vm *vm,
- uint32_t vcpu_id)
+ u32 vcpu_id)
{
return vm_arch_vcpu_recreate(vm, vcpu_id);
}
@@ -1057,26 +1201,15 @@ static inline void virt_pgd_alloc(struct kvm_vm *vm)
}
/*
- * VM Virtual Page Map
- *
- * Input Args:
- * vm - Virtual Machine
- * vaddr - VM Virtual Address
- * paddr - VM Physical Address
- * memslot - Memory region slot for new virtual translation tables
- *
- * Output Args: None
- *
- * Return: None
- *
* Within @vm, creates a virtual translation for the page starting
- * at @vaddr to the page starting at @paddr.
+ * at @gva to the page starting at @gpa.
*/
-void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr);
+void virt_arch_pg_map(struct kvm_vm *vm, gva_t gva, gpa_t gpa);
-static inline void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
+static inline void virt_pg_map(struct kvm_vm *vm, gva_t gva, gpa_t gpa)
{
- virt_arch_pg_map(vm, vaddr, paddr);
+ virt_arch_pg_map(vm, gva, gpa);
+ sparsebit_set(vm->vpages_mapped, gva >> vm->page_shift);
}
@@ -1095,9 +1228,9 @@ static inline void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr
* Returns the VM physical address of the translated VM virtual
* address given by @gva.
*/
-vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva);
+gpa_t addr_arch_gva2gpa(struct kvm_vm *vm, gva_t gva);
-static inline vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
+static inline gpa_t addr_gva2gpa(struct kvm_vm *vm, gva_t gva)
{
return addr_arch_gva2gpa(vm, gva);
}
@@ -1117,9 +1250,9 @@ static inline vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
* Dumps to the FILE stream given by @stream, the contents of all the
* virtual translation tables for the VM given by @vm.
*/
-void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent);
+void virt_arch_dump(FILE *stream, struct kvm_vm *vm, u8 indent);
-static inline void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
+static inline void virt_dump(FILE *stream, struct kvm_vm *vm, u8 indent)
{
virt_arch_dump(stream, vm, indent);
}
@@ -1130,17 +1263,26 @@ static inline int __vm_disable_nx_huge_pages(struct kvm_vm *vm)
return __vm_enable_cap(vm, KVM_CAP_VM_DISABLE_NX_HUGE_PAGES, 0);
}
+static inline u64 vm_page_align(struct kvm_vm *vm, u64 v)
+{
+ return (v + vm->page_size - 1) & ~(vm->page_size - 1);
+}
+
/*
- * Arch hook that is invoked via a constructor, i.e. before exeucting main(),
+ * Arch hook that is invoked via a constructor, i.e. before executing main(),
* to allow for arch-specific setup that is common to all tests, e.g. computing
* the default guest "mode".
*/
void kvm_selftest_arch_init(void);
-void kvm_arch_vm_post_create(struct kvm_vm *vm);
+void kvm_arch_vm_post_create(struct kvm_vm *vm, unsigned int nr_vcpus);
+void kvm_arch_vm_finalize_vcpus(struct kvm_vm *vm);
+void kvm_arch_vm_release(struct kvm_vm *vm);
+
+bool vm_is_gpa_protected(struct kvm_vm *vm, gpa_t gpa);
-bool vm_is_gpa_protected(struct kvm_vm *vm, vm_paddr_t paddr);
+u32 guest_get_vcpuid(void);
-uint32_t guest_get_vcpuid(void);
+bool kvm_arch_has_default_irqchip(void);
#endif /* SELFTEST_KVM_UTIL_H */
diff --git a/tools/testing/selftests/kvm/include/kvm_util_types.h b/tools/testing/selftests/kvm/include/kvm_util_types.h
index ec787b97cf18..ed0087e31674 100644
--- a/tools/testing/selftests/kvm/include/kvm_util_types.h
+++ b/tools/testing/selftests/kvm/include/kvm_util_types.h
@@ -2,6 +2,8 @@
#ifndef SELFTEST_KVM_UTIL_TYPES_H
#define SELFTEST_KVM_UTIL_TYPES_H
+#include <linux/types.h>
+
/*
* Provide a version of static_assert() that is guaranteed to have an optional
* message param. _GNU_SOURCE is defined for all KVM selftests, _GNU_SOURCE
@@ -14,7 +16,9 @@
#define __kvm_static_assert(expr, msg, ...) _Static_assert(expr, msg)
#define kvm_static_assert(expr, ...) __kvm_static_assert(expr, ##__VA_ARGS__, #expr)
-typedef uint64_t vm_paddr_t; /* Virtual Machine (Guest) physical address */
-typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */
+typedef u64 gpa_t; /* Virtual Machine (Guest) physical address */
+typedef u64 gva_t; /* Virtual Machine (Guest) virtual address */
+
+#define INVALID_GPA (~(u64)0)
#endif /* SELFTEST_KVM_UTIL_TYPES_H */
diff --git a/tools/testing/selftests/kvm/include/loongarch/arch_timer.h b/tools/testing/selftests/kvm/include/loongarch/arch_timer.h
new file mode 100644
index 000000000000..3888aeeb3524
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/loongarch/arch_timer.h
@@ -0,0 +1,85 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * LoongArch Constant Timer specific interface
+ */
+#ifndef SELFTEST_KVM_ARCH_TIMER_H
+#define SELFTEST_KVM_ARCH_TIMER_H
+
+#include "processor.h"
+
+/* LoongArch timer frequency is constant 100MHZ */
+#define TIMER_FREQ (100UL << 20)
+#define msec_to_cycles(msec) (TIMER_FREQ * (unsigned long)(msec) / 1000)
+#define usec_to_cycles(usec) (TIMER_FREQ * (unsigned long)(usec) / 1000000)
+#define cycles_to_usec(cycles) ((unsigned long)(cycles) * 1000000 / TIMER_FREQ)
+
+static inline unsigned long timer_get_cycles(void)
+{
+ unsigned long val = 0;
+
+ __asm__ __volatile__(
+ "rdtime.d %0, $zero\n\t"
+ : "=r"(val)
+ :
+ );
+
+ return val;
+}
+
+static inline unsigned long timer_get_cfg(void)
+{
+ return csr_read(LOONGARCH_CSR_TCFG);
+}
+
+static inline unsigned long timer_get_val(void)
+{
+ return csr_read(LOONGARCH_CSR_TVAL);
+}
+
+static inline void disable_timer(void)
+{
+ csr_write(0, LOONGARCH_CSR_TCFG);
+}
+
+static inline void timer_irq_enable(void)
+{
+ unsigned long val;
+
+ val = csr_read(LOONGARCH_CSR_ECFG);
+ val |= ECFGF_TIMER;
+ csr_write(val, LOONGARCH_CSR_ECFG);
+}
+
+static inline void timer_irq_disable(void)
+{
+ unsigned long val;
+
+ val = csr_read(LOONGARCH_CSR_ECFG);
+ val &= ~ECFGF_TIMER;
+ csr_write(val, LOONGARCH_CSR_ECFG);
+}
+
+static inline void timer_set_next_cmp_ms(unsigned int msec, bool period)
+{
+ unsigned long val;
+
+ val = msec_to_cycles(msec) & CSR_TCFG_VAL;
+ val |= CSR_TCFG_EN;
+ if (period)
+ val |= CSR_TCFG_PERIOD;
+ csr_write(val, LOONGARCH_CSR_TCFG);
+}
+
+static inline void __delay(u64 cycles)
+{
+ u64 start = timer_get_cycles();
+
+ while ((timer_get_cycles() - start) < cycles)
+ cpu_relax();
+}
+
+static inline void udelay(unsigned long usec)
+{
+ __delay(usec_to_cycles(usec));
+}
+#endif /* SELFTEST_KVM_ARCH_TIMER_H */
diff --git a/tools/testing/selftests/kvm/include/loongarch/kvm_util_arch.h b/tools/testing/selftests/kvm/include/loongarch/kvm_util_arch.h
new file mode 100644
index 000000000000..d5095900e442
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/loongarch/kvm_util_arch.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_UTIL_ARCH_H
+#define SELFTEST_KVM_UTIL_ARCH_H
+
+struct kvm_mmu_arch {};
+struct kvm_vm_arch {};
+
+#endif // SELFTEST_KVM_UTIL_ARCH_H
diff --git a/tools/testing/selftests/kvm/include/loongarch/pmu.h b/tools/testing/selftests/kvm/include/loongarch/pmu.h
new file mode 100644
index 000000000000..478e6a9bbb2b
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/loongarch/pmu.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * LoongArch PMU specific interface
+ */
+#ifndef SELFTEST_KVM_PMU_H
+#define SELFTEST_KVM_PMU_H
+
+#include "processor.h"
+
+#define LOONGARCH_CPUCFG6 0x6
+#define CPUCFG6_PMP BIT(0)
+#define CPUCFG6_PAMVER GENMASK(3, 1)
+#define CPUCFG6_PMNUM GENMASK(7, 4)
+#define CPUCFG6_PMNUM_SHIFT 4
+#define CPUCFG6_PMBITS GENMASK(13, 8)
+#define CPUCFG6_PMBITS_SHIFT 8
+#define CPUCFG6_UPM BIT(14)
+
+/* Performance Counter registers */
+#define LOONGARCH_CSR_PERFCTRL0 0x200 /* perf event 0 config */
+#define LOONGARCH_CSR_PERFCNTR0 0x201 /* perf event 0 count value */
+#define LOONGARCH_CSR_PERFCTRL1 0x202 /* perf event 1 config */
+#define LOONGARCH_CSR_PERFCNTR1 0x203 /* perf event 1 count value */
+#define LOONGARCH_CSR_PERFCTRL2 0x204 /* perf event 2 config */
+#define LOONGARCH_CSR_PERFCNTR2 0x205 /* perf event 2 count value */
+#define LOONGARCH_CSR_PERFCTRL3 0x206 /* perf event 3 config */
+#define LOONGARCH_CSR_PERFCNTR3 0x207 /* perf event 3 count value */
+#define CSR_PERFCTRL_PLV0 BIT(16)
+#define CSR_PERFCTRL_PLV1 BIT(17)
+#define CSR_PERFCTRL_PLV2 BIT(18)
+#define CSR_PERFCTRL_PLV3 BIT(19)
+#define CSR_PERFCTRL_PMIE BIT(20)
+#define PMU_ENVENT_ENABLED (CSR_PERFCTRL_PLV0 | CSR_PERFCTRL_PLV1 | CSR_PERFCTRL_PLV2 | CSR_PERFCTRL_PLV3)
+
+/* Hardware event codes (from LoongArch perf_event.c */
+#define LOONGARCH_PMU_EVENT_CYCLES 0x00 /* CPU cycles */
+#define LOONGARCH_PMU_EVENT_INSTR_RETIRED 0x01 /* Instructions retired */
+#define PERF_COUNT_HW_BRANCH_INSTRUCTIONS 0x02 /* Branch instructions */
+#define PERF_COUNT_HW_BRANCH_MISSES 0x03 /* Branch misses */
+
+#define NUM_LOOPS 1000
+#define EXPECTED_INSTR_MIN (NUM_LOOPS + 10) /* Loop + overhead */
+#define EXPECTED_CYCLES_MIN NUM_LOOPS /* At least 1 cycle per iteration */
+#define UPPER_BOUND (10 * NUM_LOOPS)
+
+#define PMU_OVERFLOW (1ULL << 63)
+
+static inline void pmu_irq_enable(void)
+{
+ unsigned long val;
+
+ val = csr_read(LOONGARCH_CSR_ECFG);
+ val |= ECFGF_PMU;
+ csr_write(val, LOONGARCH_CSR_ECFG);
+}
+
+static inline void pmu_irq_disable(void)
+{
+ unsigned long val;
+
+ val = csr_read(LOONGARCH_CSR_ECFG);
+ val &= ~ECFGF_PMU;
+ csr_write(val, LOONGARCH_CSR_ECFG);
+}
+
+#endif
diff --git a/tools/testing/selftests/kvm/include/loongarch/processor.h b/tools/testing/selftests/kvm/include/loongarch/processor.h
new file mode 100644
index 000000000000..93dc1fbd2e79
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/loongarch/processor.h
@@ -0,0 +1,235 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef SELFTEST_KVM_PROCESSOR_H
+#define SELFTEST_KVM_PROCESSOR_H
+
+#ifndef __ASSEMBLER__
+#include "ucall_common.h"
+
+#else
+/* general registers */
+#define zero $r0
+#define ra $r1
+#define tp $r2
+#define sp $r3
+#define a0 $r4
+#define a1 $r5
+#define a2 $r6
+#define a3 $r7
+#define a4 $r8
+#define a5 $r9
+#define a6 $r10
+#define a7 $r11
+#define t0 $r12
+#define t1 $r13
+#define t2 $r14
+#define t3 $r15
+#define t4 $r16
+#define t5 $r17
+#define t6 $r18
+#define t7 $r19
+#define t8 $r20
+#define u0 $r21
+#define fp $r22
+#define s0 $r23
+#define s1 $r24
+#define s2 $r25
+#define s3 $r26
+#define s4 $r27
+#define s5 $r28
+#define s6 $r29
+#define s7 $r30
+#define s8 $r31
+#endif
+
+/*
+ * LoongArch page table entry definition
+ * Original header file arch/loongarch/include/asm/loongarch.h
+ */
+#define _PAGE_VALID_SHIFT 0
+#define _PAGE_DIRTY_SHIFT 1
+#define _PAGE_PLV_SHIFT 2 /* 2~3, two bits */
+#define PLV_KERN 0
+#define PLV_USER 3
+#define PLV_MASK 0x3
+#define _CACHE_SHIFT 4 /* 4~5, two bits */
+#define _PAGE_PRESENT_SHIFT 7
+#define _PAGE_WRITE_SHIFT 8
+
+#define _PAGE_VALID BIT_ULL(_PAGE_VALID_SHIFT)
+#define _PAGE_PRESENT BIT_ULL(_PAGE_PRESENT_SHIFT)
+#define _PAGE_WRITE BIT_ULL(_PAGE_WRITE_SHIFT)
+#define _PAGE_DIRTY BIT_ULL(_PAGE_DIRTY_SHIFT)
+#define _PAGE_USER (PLV_USER << _PAGE_PLV_SHIFT)
+#define __READABLE (_PAGE_VALID)
+#define __WRITEABLE (_PAGE_DIRTY | _PAGE_WRITE)
+/* Coherent Cached */
+#define _CACHE_CC BIT_ULL(_CACHE_SHIFT)
+#define PS_4K 0x0000000c
+#define PS_16K 0x0000000e
+#define PS_64K 0x00000010
+#define PS_DEFAULT_SIZE PS_16K
+
+/* LoongArch Basic CSR registers */
+#define LOONGARCH_CSR_CRMD 0x0 /* Current mode info */
+#define CSR_CRMD_PG_SHIFT 4
+#define CSR_CRMD_PG BIT_ULL(CSR_CRMD_PG_SHIFT)
+#define CSR_CRMD_IE_SHIFT 2
+#define CSR_CRMD_IE BIT_ULL(CSR_CRMD_IE_SHIFT)
+#define CSR_CRMD_PLV_SHIFT 0
+#define CSR_CRMD_PLV_WIDTH 2
+#define CSR_CRMD_PLV (0x3UL << CSR_CRMD_PLV_SHIFT)
+#define PLV_MASK 0x3
+#define LOONGARCH_CSR_PRMD 0x1
+#define LOONGARCH_CSR_EUEN 0x2
+#define LOONGARCH_CSR_ECFG 0x4
+#define ECFGB_PMU 10
+#define ECFGF_PMU (BIT_ULL(ECFGB_PMU))
+#define ECFGB_TIMER 11
+#define ECFGF_TIMER (BIT_ULL(ECFGB_TIMER))
+#define LOONGARCH_CSR_ESTAT 0x5 /* Exception status */
+#define CSR_ESTAT_EXC_SHIFT 16
+#define CSR_ESTAT_EXC_WIDTH 6
+#define CSR_ESTAT_EXC (0x3f << CSR_ESTAT_EXC_SHIFT)
+#define EXCCODE_INT 0 /* Interrupt */
+#define INT_PMI 10 /* PMU interrupt */
+#define INT_TI 11 /* Timer interrupt*/
+#define LOONGARCH_CSR_ERA 0x6 /* ERA */
+#define LOONGARCH_CSR_BADV 0x7 /* Bad virtual address */
+#define LOONGARCH_CSR_EENTRY 0xc
+#define LOONGARCH_CSR_TLBIDX 0x10 /* TLB Index, EHINV, PageSize */
+#define CSR_TLBIDX_PS_SHIFT 24
+#define CSR_TLBIDX_PS_WIDTH 6
+#define CSR_TLBIDX_PS (0x3fUL << CSR_TLBIDX_PS_SHIFT)
+#define CSR_TLBIDX_SIZEM 0x3f000000
+#define CSR_TLBIDX_SIZE CSR_TLBIDX_PS_SHIFT
+#define LOONGARCH_CSR_ASID 0x18 /* ASID */
+#define LOONGARCH_CSR_PGDL 0x19
+#define LOONGARCH_CSR_PGDH 0x1a
+/* Page table base */
+#define LOONGARCH_CSR_PGD 0x1b
+#define LOONGARCH_CSR_PWCTL0 0x1c
+#define LOONGARCH_CSR_PWCTL1 0x1d
+#define LOONGARCH_CSR_STLBPGSIZE 0x1e
+#define LOONGARCH_CSR_CPUID 0x20
+#define LOONGARCH_CSR_KS0 0x30
+#define LOONGARCH_CSR_KS1 0x31
+#define LOONGARCH_CSR_TMID 0x40
+#define LOONGARCH_CSR_TCFG 0x41
+#define CSR_TCFG_VAL (BIT_ULL(48) - BIT_ULL(2))
+#define CSR_TCFG_PERIOD_SHIFT 1
+#define CSR_TCFG_PERIOD (0x1UL << CSR_TCFG_PERIOD_SHIFT)
+#define CSR_TCFG_EN (0x1UL)
+#define LOONGARCH_CSR_TVAL 0x42
+#define LOONGARCH_CSR_TINTCLR 0x44 /* Timer interrupt clear */
+#define CSR_TINTCLR_TI_SHIFT 0
+#define CSR_TINTCLR_TI (1 << CSR_TINTCLR_TI_SHIFT)
+/* TLB refill exception entry */
+#define LOONGARCH_CSR_TLBRENTRY 0x88
+#define LOONGARCH_CSR_TLBRSAVE 0x8b
+#define LOONGARCH_CSR_TLBREHI 0x8e
+#define CSR_TLBREHI_PS_SHIFT 0
+#define CSR_TLBREHI_PS (0x3fUL << CSR_TLBREHI_PS_SHIFT)
+
+#define read_cpucfg(reg) \
+({ \
+ register unsigned long __v; \
+ __asm__ __volatile__( \
+ "cpucfg %0, %1\n\t" \
+ : "=r" (__v) \
+ : "r" (reg) \
+ : "memory"); \
+ __v; \
+})
+
+#define csr_read(csr) \
+({ \
+ register unsigned long __v; \
+ __asm__ __volatile__( \
+ "csrrd %[val], %[reg]\n\t" \
+ : [val] "=r" (__v) \
+ : [reg] "i" (csr) \
+ : "memory"); \
+ __v; \
+})
+
+#define csr_write(v, csr) \
+({ \
+ register unsigned long __v = v; \
+ __asm__ __volatile__ ( \
+ "csrwr %[val], %[reg]\n\t" \
+ : [val] "+r" (__v) \
+ : [reg] "i" (csr) \
+ : "memory"); \
+ __v; \
+})
+
+#define EXREGS_GPRS (32)
+
+#ifndef __ASSEMBLER__
+void handle_tlb_refill(void);
+void handle_exception(void);
+
+struct ex_regs {
+ unsigned long regs[EXREGS_GPRS];
+ unsigned long pc;
+ unsigned long estat;
+ unsigned long badv;
+ unsigned long prmd;
+};
+
+#define PC_OFFSET_EXREGS offsetof(struct ex_regs, pc)
+#define ESTAT_OFFSET_EXREGS offsetof(struct ex_regs, estat)
+#define BADV_OFFSET_EXREGS offsetof(struct ex_regs, badv)
+#define PRMD_OFFSET_EXREGS offsetof(struct ex_regs, prmd)
+#define EXREGS_SIZE sizeof(struct ex_regs)
+
+#define VECTOR_NUM 64
+
+typedef void(*handler_fn)(struct ex_regs *);
+
+struct handlers {
+ handler_fn exception_handlers[VECTOR_NUM];
+};
+
+void loongarch_vcpu_setup(struct kvm_vcpu *vcpu);
+void vm_init_descriptor_tables(struct kvm_vm *vm);
+void vm_install_exception_handler(struct kvm_vm *vm, int vector, handler_fn handler);
+
+static inline void cpu_relax(void)
+{
+ asm volatile("nop" ::: "memory");
+}
+
+static inline void local_irq_enable(void)
+{
+ unsigned int flags = CSR_CRMD_IE;
+ register unsigned int mask asm("$t0") = CSR_CRMD_IE;
+
+ __asm__ __volatile__(
+ "csrxchg %[val], %[mask], %[reg]\n\t"
+ : [val] "+r" (flags)
+ : [mask] "r" (mask), [reg] "i" (LOONGARCH_CSR_CRMD)
+ : "memory");
+}
+
+static inline void local_irq_disable(void)
+{
+ unsigned int flags = 0;
+ register unsigned int mask asm("$t0") = CSR_CRMD_IE;
+
+ __asm__ __volatile__(
+ "csrxchg %[val], %[mask], %[reg]\n\t"
+ : [val] "+r" (flags)
+ : [mask] "r" (mask), [reg] "i" (LOONGARCH_CSR_CRMD)
+ : "memory");
+}
+#else
+#define PC_OFFSET_EXREGS ((EXREGS_GPRS + 0) * 8)
+#define ESTAT_OFFSET_EXREGS ((EXREGS_GPRS + 1) * 8)
+#define BADV_OFFSET_EXREGS ((EXREGS_GPRS + 2) * 8)
+#define PRMD_OFFSET_EXREGS ((EXREGS_GPRS + 3) * 8)
+#define EXREGS_SIZE ((EXREGS_GPRS + 4) * 8)
+#endif
+
+#endif /* SELFTEST_KVM_PROCESSOR_H */
diff --git a/tools/testing/selftests/kvm/include/loongarch/ucall.h b/tools/testing/selftests/kvm/include/loongarch/ucall.h
new file mode 100644
index 000000000000..2210d3d94c40
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/loongarch/ucall.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_UCALL_H
+#define SELFTEST_KVM_UCALL_H
+
+#include "kvm_util.h"
+
+#define UCALL_EXIT_REASON KVM_EXIT_MMIO
+
+/*
+ * ucall_exit_mmio_addr holds per-VM values (global data is duplicated by each
+ * VM), it must not be accessed from host code.
+ */
+extern gva_t *ucall_exit_mmio_addr;
+
+static inline void ucall_arch_do_ucall(gva_t uc)
+{
+ WRITE_ONCE(*ucall_exit_mmio_addr, uc);
+}
+
+#endif
diff --git a/tools/testing/selftests/kvm/include/lru_gen_util.h b/tools/testing/selftests/kvm/include/lru_gen_util.h
new file mode 100644
index 000000000000..d32ff5d8ffd0
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/lru_gen_util.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Tools for integrating with lru_gen, like parsing the lru_gen debugfs output.
+ *
+ * Copyright (C) 2025, Google LLC.
+ */
+#ifndef SELFTEST_KVM_LRU_GEN_UTIL_H
+#define SELFTEST_KVM_LRU_GEN_UTIL_H
+
+#include <inttypes.h>
+#include <limits.h>
+#include <stdlib.h>
+
+#include "test_util.h"
+
+#define MAX_NR_GENS 16 /* MAX_NR_GENS in include/linux/mmzone.h */
+#define MAX_NR_NODES 4 /* Maximum number of nodes supported by the test */
+
+#define LRU_GEN_DEBUGFS "/sys/kernel/debug/lru_gen"
+#define LRU_GEN_ENABLED_PATH "/sys/kernel/mm/lru_gen/enabled"
+#define LRU_GEN_ENABLED 1
+#define LRU_GEN_MM_WALK 2
+
+struct generation_stats {
+ int gen;
+ long age_ms;
+ long nr_anon;
+ long nr_file;
+};
+
+struct node_stats {
+ int node;
+ int nr_gens; /* Number of populated gens entries. */
+ struct generation_stats gens[MAX_NR_GENS];
+};
+
+struct memcg_stats {
+ unsigned long memcg_id;
+ int nr_nodes; /* Number of populated nodes entries. */
+ struct node_stats nodes[MAX_NR_NODES];
+};
+
+void lru_gen_read_memcg_stats(struct memcg_stats *stats, const char *memcg);
+long lru_gen_sum_memcg_stats(const struct memcg_stats *stats);
+long lru_gen_sum_memcg_stats_for_gen(int gen, const struct memcg_stats *stats);
+void lru_gen_do_aging(struct memcg_stats *stats, const char *memcg);
+int lru_gen_find_generation(const struct memcg_stats *stats,
+ unsigned long total_pages);
+bool lru_gen_usable(void);
+
+#endif /* SELFTEST_KVM_LRU_GEN_UTIL_H */
diff --git a/tools/testing/selftests/kvm/include/memstress.h b/tools/testing/selftests/kvm/include/memstress.h
index 9071eb6dea60..0d1d6230cc05 100644
--- a/tools/testing/selftests/kvm/include/memstress.h
+++ b/tools/testing/selftests/kvm/include/memstress.h
@@ -20,9 +20,9 @@
#define MEMSTRESS_MEM_SLOT_INDEX 1
struct memstress_vcpu_args {
- uint64_t gpa;
- uint64_t gva;
- uint64_t pages;
+ gpa_t gpa;
+ gva_t gva;
+ u64 pages;
/* Only used by the host userspace part of the vCPU thread */
struct kvm_vcpu *vcpu;
@@ -32,11 +32,11 @@ struct memstress_vcpu_args {
struct memstress_args {
struct kvm_vm *vm;
/* The starting address and size of the guest test region. */
- uint64_t gpa;
- uint64_t size;
- uint64_t guest_page_size;
- uint32_t random_seed;
- uint32_t write_percent;
+ gpa_t gpa;
+ u64 size;
+ u64 guest_page_size;
+ u32 random_seed;
+ u32 write_percent;
/* Run vCPUs in L2 instead of L1, if the architecture supports it. */
bool nested;
@@ -45,7 +45,7 @@ struct memstress_args {
/* True if all vCPUs are pinned to pCPUs */
bool pin_vcpus;
/* The vCPU=>pCPU pinning map. Only valid if pin_vcpus is true. */
- uint32_t vcpu_to_pcpu[KVM_MAX_VCPUS];
+ u32 vcpu_to_pcpu[KVM_MAX_VCPUS];
/* Test is done, stop running vCPUs. */
bool stop_vcpus;
@@ -56,27 +56,27 @@ struct memstress_args {
extern struct memstress_args memstress_args;
struct kvm_vm *memstress_create_vm(enum vm_guest_mode mode, int nr_vcpus,
- uint64_t vcpu_memory_bytes, int slots,
+ u64 vcpu_memory_bytes, int slots,
enum vm_mem_backing_src_type backing_src,
bool partition_vcpu_memory_access);
void memstress_destroy_vm(struct kvm_vm *vm);
-void memstress_set_write_percent(struct kvm_vm *vm, uint32_t write_percent);
+void memstress_set_write_percent(struct kvm_vm *vm, u32 write_percent);
void memstress_set_random_access(struct kvm_vm *vm, bool random_access);
void memstress_start_vcpu_threads(int vcpus, void (*vcpu_fn)(struct memstress_vcpu_args *));
void memstress_join_vcpu_threads(int vcpus);
-void memstress_guest_code(uint32_t vcpu_id);
+void memstress_guest_code(u32 vcpu_id);
-uint64_t memstress_nested_pages(int nr_vcpus);
+u64 memstress_nested_pages(int nr_vcpus);
void memstress_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vcpus[]);
void memstress_enable_dirty_logging(struct kvm_vm *vm, int slots);
void memstress_disable_dirty_logging(struct kvm_vm *vm, int slots);
void memstress_get_dirty_log(struct kvm_vm *vm, unsigned long *bitmaps[], int slots);
void memstress_clear_dirty_log(struct kvm_vm *vm, unsigned long *bitmaps[],
- int slots, uint64_t pages_per_slot);
-unsigned long **memstress_alloc_bitmaps(int slots, uint64_t pages_per_slot);
+ int slots, u64 pages_per_slot);
+unsigned long **memstress_alloc_bitmaps(int slots, u64 pages_per_slot);
void memstress_free_bitmaps(unsigned long *bitmaps[], int slots);
#endif /* SELFTEST_KVM_MEMSTRESS_H */
diff --git a/tools/testing/selftests/kvm/include/numaif.h b/tools/testing/selftests/kvm/include/numaif.h
index b020547403fd..29572a6d789c 100644
--- a/tools/testing/selftests/kvm/include/numaif.h
+++ b/tools/testing/selftests/kvm/include/numaif.h
@@ -1,55 +1,83 @@
/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * tools/testing/selftests/kvm/include/numaif.h
- *
- * Copyright (C) 2020, Google LLC.
- *
- * This work is licensed under the terms of the GNU GPL, version 2.
- *
- * Header file that provides access to NUMA API functions not explicitly
- * exported to user space.
- */
+/* Copyright (C) 2020, Google LLC. */
#ifndef SELFTEST_KVM_NUMAIF_H
#define SELFTEST_KVM_NUMAIF_H
-#define __NR_get_mempolicy 239
-#define __NR_migrate_pages 256
+#include <dirent.h>
-/* System calls */
-long get_mempolicy(int *policy, const unsigned long *nmask,
- unsigned long maxnode, void *addr, int flags)
+#include <linux/mempolicy.h>
+
+#include "kvm_syscalls.h"
+
+KVM_SYSCALL_DEFINE(get_mempolicy, 5, int *, policy, const unsigned long *, nmask,
+ unsigned long, maxnode, void *, addr, int, flags);
+
+KVM_SYSCALL_DEFINE(set_mempolicy, 3, int, mode, const unsigned long *, nmask,
+ unsigned long, maxnode);
+
+KVM_SYSCALL_DEFINE(set_mempolicy_home_node, 4, unsigned long, start,
+ unsigned long, len, unsigned long, home_node,
+ unsigned long, flags);
+
+KVM_SYSCALL_DEFINE(migrate_pages, 4, int, pid, unsigned long, maxnode,
+ const unsigned long *, frommask, const unsigned long *, tomask);
+
+KVM_SYSCALL_DEFINE(move_pages, 6, int, pid, unsigned long, count, void *, pages,
+ const int *, nodes, int *, status, int, flags);
+
+KVM_SYSCALL_DEFINE(mbind, 6, void *, addr, unsigned long, size, int, mode,
+ const unsigned long *, nodemask, unsigned long, maxnode,
+ unsigned int, flags);
+
+static inline int get_max_numa_node(void)
{
- return syscall(__NR_get_mempolicy, policy, nmask,
- maxnode, addr, flags);
+ struct dirent *de;
+ int max_node = 0;
+ DIR *d;
+
+ /*
+ * Assume there's a single node if the kernel doesn't support NUMA,
+ * or if no nodes are found.
+ */
+ d = opendir("/sys/devices/system/node");
+ if (!d)
+ return 0;
+
+ while ((de = readdir(d)) != NULL) {
+ int node_id;
+ char *endptr;
+
+ if (strncmp(de->d_name, "node", 4) != 0)
+ continue;
+
+ node_id = strtol(de->d_name + 4, &endptr, 10);
+ if (*endptr != '\0')
+ continue;
+
+ if (node_id > max_node)
+ max_node = node_id;
+ }
+ closedir(d);
+
+ return max_node;
}
-long migrate_pages(int pid, unsigned long maxnode,
- const unsigned long *frommask,
- const unsigned long *tomask)
+static bool is_numa_available(void)
{
- return syscall(__NR_migrate_pages, pid, maxnode, frommask, tomask);
+ /*
+ * Probe for NUMA by doing a dummy get_mempolicy(). If the syscall
+ * fails with ENOSYS, then the kernel was built without NUMA support.
+ * if the syscall fails with EPERM, then the process/user lacks the
+ * necessary capabilities (CAP_SYS_NICE).
+ */
+ return !get_mempolicy(NULL, NULL, 0, NULL, 0) ||
+ (errno != ENOSYS && errno != EPERM);
}
-/* Policies */
-#define MPOL_DEFAULT 0
-#define MPOL_PREFERRED 1
-#define MPOL_BIND 2
-#define MPOL_INTERLEAVE 3
-
-#define MPOL_MAX MPOL_INTERLEAVE
-
-/* Flags for get_mem_policy */
-#define MPOL_F_NODE (1<<0) /* return next il node or node of address */
- /* Warning: MPOL_F_NODE is unsupported and
- * subject to change. Don't use.
- */
-#define MPOL_F_ADDR (1<<1) /* look up vma using address */
-#define MPOL_F_MEMS_ALLOWED (1<<2) /* query nodes allowed in cpuset */
-
-/* Flags for mbind */
-#define MPOL_MF_STRICT (1<<0) /* Verify existing pages in the mapping */
-#define MPOL_MF_MOVE (1<<1) /* Move pages owned by this process to conform to mapping */
-#define MPOL_MF_MOVE_ALL (1<<2) /* Move every page to conform to mapping */
+static inline bool is_multi_numa_node_system(void)
+{
+ return is_numa_available() && get_max_numa_node() >= 1;
+}
#endif /* SELFTEST_KVM_NUMAIF_H */
diff --git a/tools/testing/selftests/kvm/include/riscv/arch_timer.h b/tools/testing/selftests/kvm/include/riscv/arch_timer.h
index 225d81dad064..28ffc014da2a 100644
--- a/tools/testing/selftests/kvm/include/riscv/arch_timer.h
+++ b/tools/testing/selftests/kvm/include/riscv/arch_timer.h
@@ -14,25 +14,25 @@
static unsigned long timer_freq;
#define msec_to_cycles(msec) \
- ((timer_freq) * (uint64_t)(msec) / 1000)
+ ((timer_freq) * (u64)(msec) / 1000)
#define usec_to_cycles(usec) \
- ((timer_freq) * (uint64_t)(usec) / 1000000)
+ ((timer_freq) * (u64)(usec) / 1000000)
#define cycles_to_usec(cycles) \
- ((uint64_t)(cycles) * 1000000 / (timer_freq))
+ ((u64)(cycles) * 1000000 / (timer_freq))
-static inline uint64_t timer_get_cycles(void)
+static inline u64 timer_get_cycles(void)
{
return csr_read(CSR_TIME);
}
-static inline void timer_set_cmp(uint64_t cval)
+static inline void timer_set_cmp(u64 cval)
{
csr_write(CSR_STIMECMP, cval);
}
-static inline uint64_t timer_get_cmp(void)
+static inline u64 timer_get_cmp(void)
{
return csr_read(CSR_STIMECMP);
}
@@ -47,17 +47,17 @@ static inline void timer_irq_disable(void)
csr_clear(CSR_SIE, IE_TIE);
}
-static inline void timer_set_next_cmp_ms(uint32_t msec)
+static inline void timer_set_next_cmp_ms(u32 msec)
{
- uint64_t now_ct = timer_get_cycles();
- uint64_t next_ct = now_ct + msec_to_cycles(msec);
+ u64 now_ct = timer_get_cycles();
+ u64 next_ct = now_ct + msec_to_cycles(msec);
timer_set_cmp(next_ct);
}
-static inline void __delay(uint64_t cycles)
+static inline void __delay(u64 cycles)
{
- uint64_t start = timer_get_cycles();
+ u64 start = timer_get_cycles();
while ((timer_get_cycles() - start) < cycles)
cpu_relax();
diff --git a/tools/testing/selftests/kvm/include/riscv/kvm_util_arch.h b/tools/testing/selftests/kvm/include/riscv/kvm_util_arch.h
index e43a57d99b56..d5095900e442 100644
--- a/tools/testing/selftests/kvm/include/riscv/kvm_util_arch.h
+++ b/tools/testing/selftests/kvm/include/riscv/kvm_util_arch.h
@@ -2,6 +2,7 @@
#ifndef SELFTEST_KVM_UTIL_ARCH_H
#define SELFTEST_KVM_UTIL_ARCH_H
+struct kvm_mmu_arch {};
struct kvm_vm_arch {};
#endif // SELFTEST_KVM_UTIL_ARCH_H
diff --git a/tools/testing/selftests/kvm/include/riscv/processor.h b/tools/testing/selftests/kvm/include/riscv/processor.h
index 5f389166338c..e3acf2ae9881 100644
--- a/tools/testing/selftests/kvm/include/riscv/processor.h
+++ b/tools/testing/selftests/kvm/include/riscv/processor.h
@@ -9,10 +9,23 @@
#include <linux/stringify.h>
#include <asm/csr.h>
+#include <asm/vdso/processor.h>
#include "kvm_util.h"
-static inline uint64_t __kvm_reg_id(uint64_t type, uint64_t subtype,
- uint64_t idx, uint64_t size)
+#define INSN_OPCODE_MASK 0x007c
+#define INSN_OPCODE_SHIFT 2
+#define INSN_OPCODE_SYSTEM 28
+
+#define INSN_MASK_FUNCT3 0x7000
+#define INSN_SHIFT_FUNCT3 12
+
+#define INSN_CSR_MASK 0xfff00000
+#define INSN_CSR_SHIFT 20
+
+#define GET_RM(insn) (((insn) & INSN_MASK_FUNCT3) >> INSN_SHIFT_FUNCT3)
+#define GET_CSR_NUM(insn) (((insn) & INSN_CSR_MASK) >> INSN_CSR_SHIFT)
+
+static inline u64 __kvm_reg_id(u64 type, u64 subtype, u64 idx, u64 size)
{
return KVM_REG_RISCV | type | subtype | idx | size;
}
@@ -48,19 +61,20 @@ static inline uint64_t __kvm_reg_id(uint64_t type, uint64_t subtype,
KVM_REG_RISCV_SBI_SINGLE, \
idx, KVM_REG_SIZE_ULONG)
-bool __vcpu_has_ext(struct kvm_vcpu *vcpu, uint64_t ext);
+bool __vcpu_has_ext(struct kvm_vcpu *vcpu, u64 ext);
-static inline bool __vcpu_has_isa_ext(struct kvm_vcpu *vcpu, uint64_t isa_ext)
+static inline bool __vcpu_has_isa_ext(struct kvm_vcpu *vcpu, u64 isa_ext)
{
return __vcpu_has_ext(vcpu, RISCV_ISA_EXT_REG(isa_ext));
}
-static inline bool __vcpu_has_sbi_ext(struct kvm_vcpu *vcpu, uint64_t sbi_ext)
+static inline bool __vcpu_has_sbi_ext(struct kvm_vcpu *vcpu, u64 sbi_ext)
{
return __vcpu_has_ext(vcpu, RISCV_SBI_EXT_REG(sbi_ext));
}
-struct ex_regs {
+struct pt_regs {
+ unsigned long epc;
unsigned long ra;
unsigned long sp;
unsigned long gp;
@@ -92,16 +106,19 @@ struct ex_regs {
unsigned long t4;
unsigned long t5;
unsigned long t6;
- unsigned long epc;
+ /* Supervisor/Machine CSRs */
unsigned long status;
+ unsigned long badaddr;
unsigned long cause;
+ /* a0 value before the syscall */
+ unsigned long orig_a0;
};
#define NR_VECTORS 2
#define NR_EXCEPTIONS 32
#define EC_MASK (NR_EXCEPTIONS - 1)
-typedef void(*exception_handler_fn)(struct ex_regs *);
+typedef void(*exception_handler_fn)(struct pt_regs *);
void vm_init_vector_tables(struct kvm_vm *vm);
void vcpu_init_vector_tables(struct kvm_vcpu *vcpu);
@@ -174,4 +191,6 @@ static inline void local_irq_disable(void)
csr_clear(CSR_SSTATUS, SR_SIE);
}
+unsigned long riscv64_get_satp_mode(void);
+
#endif /* SELFTEST_KVM_PROCESSOR_H */
diff --git a/tools/testing/selftests/kvm/include/riscv/sbi.h b/tools/testing/selftests/kvm/include/riscv/sbi.h
index 046b432ae896..16f1815ac48f 100644
--- a/tools/testing/selftests/kvm/include/riscv/sbi.h
+++ b/tools/testing/selftests/kvm/include/riscv/sbi.h
@@ -97,6 +97,43 @@ enum sbi_pmu_hw_generic_events_t {
SBI_PMU_HW_GENERAL_MAX,
};
+enum sbi_pmu_fw_generic_events_t {
+ SBI_PMU_FW_MISALIGNED_LOAD = 0,
+ SBI_PMU_FW_MISALIGNED_STORE = 1,
+ SBI_PMU_FW_ACCESS_LOAD = 2,
+ SBI_PMU_FW_ACCESS_STORE = 3,
+ SBI_PMU_FW_ILLEGAL_INSN = 4,
+ SBI_PMU_FW_SET_TIMER = 5,
+ SBI_PMU_FW_IPI_SENT = 6,
+ SBI_PMU_FW_IPI_RCVD = 7,
+ SBI_PMU_FW_FENCE_I_SENT = 8,
+ SBI_PMU_FW_FENCE_I_RCVD = 9,
+ SBI_PMU_FW_SFENCE_VMA_SENT = 10,
+ SBI_PMU_FW_SFENCE_VMA_RCVD = 11,
+ SBI_PMU_FW_SFENCE_VMA_ASID_SENT = 12,
+ SBI_PMU_FW_SFENCE_VMA_ASID_RCVD = 13,
+
+ SBI_PMU_FW_HFENCE_GVMA_SENT = 14,
+ SBI_PMU_FW_HFENCE_GVMA_RCVD = 15,
+ SBI_PMU_FW_HFENCE_GVMA_VMID_SENT = 16,
+ SBI_PMU_FW_HFENCE_GVMA_VMID_RCVD = 17,
+
+ SBI_PMU_FW_HFENCE_VVMA_SENT = 18,
+ SBI_PMU_FW_HFENCE_VVMA_RCVD = 19,
+ SBI_PMU_FW_HFENCE_VVMA_ASID_SENT = 20,
+ SBI_PMU_FW_HFENCE_VVMA_ASID_RCVD = 21,
+ SBI_PMU_FW_MAX,
+};
+
+/* SBI PMU event types */
+enum sbi_pmu_event_type {
+ SBI_PMU_EVENT_TYPE_HW = 0x0,
+ SBI_PMU_EVENT_TYPE_CACHE = 0x1,
+ SBI_PMU_EVENT_TYPE_RAW = 0x2,
+ SBI_PMU_EVENT_TYPE_RAW_V2 = 0x3,
+ SBI_PMU_EVENT_TYPE_FW = 0xf,
+};
+
/* SBI PMU counter types */
enum sbi_pmu_ctr_type {
SBI_PMU_CTR_TYPE_HW = 0x0,
diff --git a/tools/testing/selftests/kvm/include/riscv/ucall.h b/tools/testing/selftests/kvm/include/riscv/ucall.h
index a695ae36f3e0..2de7c6a36096 100644
--- a/tools/testing/selftests/kvm/include/riscv/ucall.h
+++ b/tools/testing/selftests/kvm/include/riscv/ucall.h
@@ -7,11 +7,11 @@
#define UCALL_EXIT_REASON KVM_EXIT_RISCV_SBI
-static inline void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
+static inline void ucall_arch_init(struct kvm_vm *vm, gpa_t mmio_gpa)
{
}
-static inline void ucall_arch_do_ucall(vm_vaddr_t uc)
+static inline void ucall_arch_do_ucall(gva_t uc)
{
sbi_ecall(KVM_RISCV_SELFTESTS_SBI_EXT,
KVM_RISCV_SELFTESTS_SBI_UCALL,
diff --git a/tools/testing/selftests/kvm/include/s390/diag318_test_handler.h b/tools/testing/selftests/kvm/include/s390/diag318_test_handler.h
index b0ed71302722..6deaf18fec22 100644
--- a/tools/testing/selftests/kvm/include/s390/diag318_test_handler.h
+++ b/tools/testing/selftests/kvm/include/s390/diag318_test_handler.h
@@ -8,6 +8,6 @@
#ifndef SELFTEST_KVM_DIAG318_TEST_HANDLER
#define SELFTEST_KVM_DIAG318_TEST_HANDLER
-uint64_t get_diag318_info(void);
+u64 get_diag318_info(void);
#endif
diff --git a/tools/testing/selftests/kvm/include/s390/facility.h b/tools/testing/selftests/kvm/include/s390/facility.h
index 00a1ced6538b..41a265742666 100644
--- a/tools/testing/selftests/kvm/include/s390/facility.h
+++ b/tools/testing/selftests/kvm/include/s390/facility.h
@@ -16,7 +16,7 @@
/* alt_stfle_fac_list[16] + stfle_fac_list[16] */
#define NB_STFL_DOUBLEWORDS 32
-extern uint64_t stfl_doublewords[NB_STFL_DOUBLEWORDS];
+extern u64 stfl_doublewords[NB_STFL_DOUBLEWORDS];
extern bool stfle_flag;
static inline bool test_bit_inv(unsigned long nr, const unsigned long *ptr)
@@ -24,7 +24,7 @@ static inline bool test_bit_inv(unsigned long nr, const unsigned long *ptr)
return test_bit(nr ^ (BITS_PER_LONG - 1), ptr);
}
-static inline void stfle(uint64_t *fac, unsigned int nb_doublewords)
+static inline void stfle(u64 *fac, unsigned int nb_doublewords)
{
register unsigned long r0 asm("0") = nb_doublewords - 1;
diff --git a/tools/testing/selftests/kvm/include/s390/kvm_util_arch.h b/tools/testing/selftests/kvm/include/s390/kvm_util_arch.h
index e43a57d99b56..d5095900e442 100644
--- a/tools/testing/selftests/kvm/include/s390/kvm_util_arch.h
+++ b/tools/testing/selftests/kvm/include/s390/kvm_util_arch.h
@@ -2,6 +2,7 @@
#ifndef SELFTEST_KVM_UTIL_ARCH_H
#define SELFTEST_KVM_UTIL_ARCH_H
+struct kvm_mmu_arch {};
struct kvm_vm_arch {};
#endif // SELFTEST_KVM_UTIL_ARCH_H
diff --git a/tools/testing/selftests/kvm/include/s390/ucall.h b/tools/testing/selftests/kvm/include/s390/ucall.h
index 8035a872a351..3907d629304f 100644
--- a/tools/testing/selftests/kvm/include/s390/ucall.h
+++ b/tools/testing/selftests/kvm/include/s390/ucall.h
@@ -6,11 +6,11 @@
#define UCALL_EXIT_REASON KVM_EXIT_S390_SIEIC
-static inline void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
+static inline void ucall_arch_init(struct kvm_vm *vm, gpa_t mmio_gpa)
{
}
-static inline void ucall_arch_do_ucall(vm_vaddr_t uc)
+static inline void ucall_arch_do_ucall(gva_t uc)
{
/* Exit via DIAGNOSE 0x501 (normally used for breakpoints) */
asm volatile ("diag 0,%0,0x501" : : "a"(uc) : "memory");
diff --git a/tools/testing/selftests/kvm/include/sparsebit.h b/tools/testing/selftests/kvm/include/sparsebit.h
index bc760761e1a3..e027e5790946 100644
--- a/tools/testing/selftests/kvm/include/sparsebit.h
+++ b/tools/testing/selftests/kvm/include/sparsebit.h
@@ -6,7 +6,7 @@
*
* Header file that describes API to the sparsebit library.
* This library provides a memory efficient means of storing
- * the settings of bits indexed via a uint64_t. Memory usage
+ * the settings of bits indexed via a u64. Memory usage
* is reasonable, significantly less than (2^64 / 8) bytes, as
* long as bits that are mostly set or mostly cleared are close
* to each other. This library is efficient in memory usage
@@ -25,8 +25,8 @@ extern "C" {
#endif
struct sparsebit;
-typedef uint64_t sparsebit_idx_t;
-typedef uint64_t sparsebit_num_t;
+typedef u64 sparsebit_idx_t;
+typedef u64 sparsebit_num_t;
struct sparsebit *sparsebit_alloc(void);
void sparsebit_free(struct sparsebit **sbitp);
diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h
index 3e473058849f..a56271c237ae 100644
--- a/tools/testing/selftests/kvm/include/test_util.h
+++ b/tools/testing/selftests/kvm/include/test_util.h
@@ -8,6 +8,8 @@
#ifndef SELFTEST_KVM_TEST_UTIL_H
#define SELFTEST_KVM_TEST_UTIL_H
+#include <setjmp.h>
+#include <signal.h>
#include <stdlib.h>
#include <stdarg.h>
#include <stdbool.h>
@@ -17,12 +19,14 @@
#include <errno.h>
#include <unistd.h>
#include <fcntl.h>
-#include <sys/mman.h>
#include "kselftest.h"
+#include <linux/mman.h>
+#include <linux/types.h>
+
#define msecs_to_usecs(msec) ((msec) * 1000ULL)
-static inline int _no_printf(const char *format, ...) { return 0; }
+static inline __printf(1, 2) int _no_printf(const char *format, ...) { return 0; }
#ifdef DEBUG
#define pr_debug(...) printf(__VA_ARGS__)
@@ -78,27 +82,44 @@ do { \
__builtin_unreachable(); \
} while (0)
+extern sigjmp_buf expect_sigbus_jmpbuf;
+void expect_sigbus_handler(int signum);
+
+#define TEST_EXPECT_SIGBUS(action) \
+do { \
+ struct sigaction sa_old, sa_new = { \
+ .sa_handler = expect_sigbus_handler, \
+ }; \
+ \
+ sigaction(SIGBUS, &sa_new, &sa_old); \
+ if (sigsetjmp(expect_sigbus_jmpbuf, 1) == 0) { \
+ action; \
+ TEST_FAIL("'%s' should have triggered SIGBUS", #action); \
+ } \
+ sigaction(SIGBUS, &sa_old, NULL); \
+} while (0)
+
size_t parse_size(const char *size);
-int64_t timespec_to_ns(struct timespec ts);
-struct timespec timespec_add_ns(struct timespec ts, int64_t ns);
+s64 timespec_to_ns(struct timespec ts);
+struct timespec timespec_add_ns(struct timespec ts, s64 ns);
struct timespec timespec_add(struct timespec ts1, struct timespec ts2);
struct timespec timespec_sub(struct timespec ts1, struct timespec ts2);
struct timespec timespec_elapsed(struct timespec start);
struct timespec timespec_div(struct timespec ts, int divisor);
struct guest_random_state {
- uint32_t seed;
+ u32 seed;
};
-extern uint32_t guest_random_seed;
+extern u32 guest_random_seed;
extern struct guest_random_state guest_rng;
-struct guest_random_state new_guest_random_state(uint32_t seed);
-uint32_t guest_random_u32(struct guest_random_state *state);
+struct guest_random_state new_guest_random_state(u32 seed);
+u32 guest_random_u32(struct guest_random_state *state);
static inline bool __guest_random_bool(struct guest_random_state *state,
- uint8_t percent)
+ u8 percent)
{
return (guest_random_u32(state) % 100) < percent;
}
@@ -108,9 +129,9 @@ static inline bool guest_random_bool(struct guest_random_state *state)
return __guest_random_bool(state, 50);
}
-static inline uint64_t guest_random_u64(struct guest_random_state *state)
+static inline u64 guest_random_u64(struct guest_random_state *state)
{
- return ((uint64_t)guest_random_u32(state) << 32) | guest_random_u32(state);
+ return ((u64)guest_random_u32(state) << 32) | guest_random_u32(state);
}
enum vm_mem_backing_src_type {
@@ -139,7 +160,7 @@ enum vm_mem_backing_src_type {
struct vm_mem_backing_src_alias {
const char *name;
- uint32_t flag;
+ u32 flag;
};
#define MIN_RUN_DELAY_NS 200000UL
@@ -147,12 +168,13 @@ struct vm_mem_backing_src_alias {
bool thp_configured(void);
size_t get_trans_hugepagesz(void);
size_t get_def_hugetlb_pagesz(void);
-const struct vm_mem_backing_src_alias *vm_mem_backing_src_alias(uint32_t i);
-size_t get_backing_src_pagesz(uint32_t i);
-bool is_backing_src_hugetlb(uint32_t i);
+const struct vm_mem_backing_src_alias *vm_mem_backing_src_alias(u32 i);
+size_t get_backing_src_pagesz(u32 i);
+bool is_backing_src_hugetlb(u32 i);
void backing_src_help(const char *flag);
enum vm_mem_backing_src_type parse_backing_src_type(const char *type_name);
long get_run_delay(void);
+bool is_numa_balancing_enabled(void);
/*
* Whether or not the given source type is shared memory (as opposed to
@@ -169,18 +191,18 @@ static inline bool backing_src_can_be_huge(enum vm_mem_backing_src_type t)
}
/* Aligns x up to the next multiple of size. Size must be a power of 2. */
-static inline uint64_t align_up(uint64_t x, uint64_t size)
+static inline u64 align_up(u64 x, u64 size)
{
- uint64_t mask = size - 1;
+ u64 mask = size - 1;
TEST_ASSERT(size != 0 && !(size & (size - 1)),
"size not a power of 2: %lu", size);
return ((x + mask) & ~mask);
}
-static inline uint64_t align_down(uint64_t x, uint64_t size)
+static inline u64 align_down(u64 x, u64 size)
{
- uint64_t x_aligned_up = align_up(x, size);
+ u64 x_aligned_up = align_up(x, size);
if (x == x_aligned_up)
return x;
@@ -195,7 +217,7 @@ static inline void *align_ptr_up(void *x, size_t size)
int atoi_paranoid(const char *num_str);
-static inline uint32_t atoi_positive(const char *name, const char *num_str)
+static inline u32 atoi_positive(const char *name, const char *num_str)
{
int num = atoi_paranoid(num_str);
@@ -203,7 +225,7 @@ static inline uint32_t atoi_positive(const char *name, const char *num_str)
return num;
}
-static inline uint32_t atoi_non_negative(const char *name, const char *num_str)
+static inline u32 atoi_non_negative(const char *name, const char *num_str)
{
int num = atoi_paranoid(num_str);
diff --git a/tools/testing/selftests/kvm/include/timer_test.h b/tools/testing/selftests/kvm/include/timer_test.h
index 9b6edaafe6d4..b7d5d2c84701 100644
--- a/tools/testing/selftests/kvm/include/timer_test.h
+++ b/tools/testing/selftests/kvm/include/timer_test.h
@@ -18,21 +18,21 @@
/* Timer test cmdline parameters */
struct test_args {
- uint32_t nr_vcpus;
- uint32_t nr_iter;
- uint32_t timer_period_ms;
- uint32_t migration_freq_ms;
- uint32_t timer_err_margin_us;
+ u32 nr_vcpus;
+ u32 nr_iter;
+ u32 timer_period_ms;
+ u32 migration_freq_ms;
+ u32 timer_err_margin_us;
/* Members of struct kvm_arm_counter_offset */
- uint64_t counter_offset;
- uint64_t reserved;
+ u64 counter_offset;
+ u64 reserved;
};
/* Shared variables between host and guest */
struct test_vcpu_shared_data {
- uint32_t nr_iter;
+ u32 nr_iter;
int guest_stage;
- uint64_t xcnt;
+ u64 xcnt;
};
extern struct test_args test_args;
diff --git a/tools/testing/selftests/kvm/include/ucall_common.h b/tools/testing/selftests/kvm/include/ucall_common.h
index d9d6581b8d4f..cbdcb0a50c4f 100644
--- a/tools/testing/selftests/kvm/include/ucall_common.h
+++ b/tools/testing/selftests/kvm/include/ucall_common.h
@@ -21,26 +21,26 @@ enum {
#define UCALL_BUFFER_LEN 1024
struct ucall {
- uint64_t cmd;
- uint64_t args[UCALL_MAX_ARGS];
+ u64 cmd;
+ u64 args[UCALL_MAX_ARGS];
char buffer[UCALL_BUFFER_LEN];
/* Host virtual address of this struct. */
struct ucall *hva;
};
-void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa);
-void ucall_arch_do_ucall(vm_vaddr_t uc);
+void ucall_arch_init(struct kvm_vm *vm, gpa_t mmio_gpa);
+void ucall_arch_do_ucall(gva_t uc);
void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu);
-void ucall(uint64_t cmd, int nargs, ...);
-__printf(2, 3) void ucall_fmt(uint64_t cmd, const char *fmt, ...);
-__printf(5, 6) void ucall_assert(uint64_t cmd, const char *exp,
+void ucall(u64 cmd, int nargs, ...);
+__printf(2, 3) void ucall_fmt(u64 cmd, const char *fmt, ...);
+__printf(5, 6) void ucall_assert(u64 cmd, const char *exp,
const char *file, unsigned int line,
const char *fmt, ...);
-uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc);
-void ucall_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa);
-int ucall_nr_pages_required(uint64_t page_size);
+u64 get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc);
+void ucall_init(struct kvm_vm *vm, gpa_t mmio_gpa);
+int ucall_nr_pages_required(u64 page_size);
/*
* Perform userspace call without any associated data. This bare call avoids
@@ -48,7 +48,7 @@ int ucall_nr_pages_required(uint64_t page_size);
* the full ucall() are problematic and/or unwanted. Note, this will come out
* as UCALL_NONE on the backend.
*/
-#define GUEST_UCALL_NONE() ucall_arch_do_ucall((vm_vaddr_t)NULL)
+#define GUEST_UCALL_NONE() ucall_arch_do_ucall((gva_t)NULL)
#define GUEST_SYNC_ARGS(stage, arg1, arg2, arg3, arg4) \
ucall(UCALL_SYNC, 6, "hello", stage, arg1, arg2, arg3, arg4)
diff --git a/tools/testing/selftests/kvm/include/userfaultfd_util.h b/tools/testing/selftests/kvm/include/userfaultfd_util.h
index 60f7f9d435dc..0bc1dc16600e 100644
--- a/tools/testing/selftests/kvm/include/userfaultfd_util.h
+++ b/tools/testing/selftests/kvm/include/userfaultfd_util.h
@@ -25,7 +25,7 @@ struct uffd_reader_args {
struct uffd_desc {
int uffd;
- uint64_t num_readers;
+ u64 num_readers;
/* Holds the write ends of the pipes for killing the readers. */
int *pipefds;
pthread_t *readers;
@@ -33,8 +33,8 @@ struct uffd_desc {
};
struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay,
- void *hva, uint64_t len,
- uint64_t num_readers,
+ void *hva, u64 len,
+ u64 num_readers,
uffd_handler_t handler);
void uffd_stop_demand_paging(struct uffd_desc *uffd);
diff --git a/tools/testing/selftests/kvm/include/x86/apic.h b/tools/testing/selftests/kvm/include/x86/apic.h
index 80fe9f69b38d..31887bdc3d6c 100644
--- a/tools/testing/selftests/kvm/include/x86/apic.h
+++ b/tools/testing/selftests/kvm/include/x86/apic.h
@@ -28,10 +28,13 @@
#define GET_APIC_ID_FIELD(x) (((x) >> 24) & 0xFF)
#define APIC_TASKPRI 0x80
#define APIC_PROCPRI 0xA0
+#define GET_APIC_PRI(x) (((x) & GENMASK(7, 4)) >> 4)
+#define SET_APIC_PRI(x, y) (((x) & ~GENMASK(7, 4)) | (y << 4))
#define APIC_EOI 0xB0
#define APIC_SPIV 0xF0
#define APIC_SPIV_FOCUS_DISABLED (1 << 9)
#define APIC_SPIV_APIC_ENABLED (1 << 8)
+#define APIC_ISR 0x100
#define APIC_IRR 0x200
#define APIC_ICR 0x300
#define APIC_LVTCMCI 0x2f0
@@ -67,47 +70,51 @@
#define APIC_TMICT 0x380
#define APIC_TMCCT 0x390
#define APIC_TDCR 0x3E0
+#define APIC_SELF_IPI 0x3F0
+
+#define APIC_VECTOR_TO_BIT_NUMBER(v) ((unsigned int)(v) % 32)
+#define APIC_VECTOR_TO_REG_OFFSET(v) ((unsigned int)(v) / 32 * 0x10)
void apic_disable(void);
void xapic_enable(void);
void x2apic_enable(void);
-static inline uint32_t get_bsp_flag(void)
+static inline u32 get_bsp_flag(void)
{
return rdmsr(MSR_IA32_APICBASE) & MSR_IA32_APICBASE_BSP;
}
-static inline uint32_t xapic_read_reg(unsigned int reg)
+static inline u32 xapic_read_reg(unsigned int reg)
{
- return ((volatile uint32_t *)APIC_DEFAULT_GPA)[reg >> 2];
+ return ((volatile u32 *)APIC_DEFAULT_GPA)[reg >> 2];
}
-static inline void xapic_write_reg(unsigned int reg, uint32_t val)
+static inline void xapic_write_reg(unsigned int reg, u32 val)
{
- ((volatile uint32_t *)APIC_DEFAULT_GPA)[reg >> 2] = val;
+ ((volatile u32 *)APIC_DEFAULT_GPA)[reg >> 2] = val;
}
-static inline uint64_t x2apic_read_reg(unsigned int reg)
+static inline u64 x2apic_read_reg(unsigned int reg)
{
return rdmsr(APIC_BASE_MSR + (reg >> 4));
}
-static inline uint8_t x2apic_write_reg_safe(unsigned int reg, uint64_t value)
+static inline u8 x2apic_write_reg_safe(unsigned int reg, u64 value)
{
return wrmsr_safe(APIC_BASE_MSR + (reg >> 4), value);
}
-static inline void x2apic_write_reg(unsigned int reg, uint64_t value)
+static inline void x2apic_write_reg(unsigned int reg, u64 value)
{
- uint8_t fault = x2apic_write_reg_safe(reg, value);
+ u8 fault = x2apic_write_reg_safe(reg, value);
__GUEST_ASSERT(!fault, "Unexpected fault 0x%x on WRMSR(%x) = %lx\n",
fault, APIC_BASE_MSR + (reg >> 4), value);
}
-static inline void x2apic_write_reg_fault(unsigned int reg, uint64_t value)
+static inline void x2apic_write_reg_fault(unsigned int reg, u64 value)
{
- uint8_t fault = x2apic_write_reg_safe(reg, value);
+ u8 fault = x2apic_write_reg_safe(reg, value);
__GUEST_ASSERT(fault == GP_VECTOR,
"Wanted #GP on WRMSR(%x) = %lx, got 0x%x\n",
diff --git a/tools/testing/selftests/kvm/include/x86/evmcs.h b/tools/testing/selftests/kvm/include/x86/evmcs.h
index 5a74bb30e2f8..be79bda024bf 100644
--- a/tools/testing/selftests/kvm/include/x86/evmcs.h
+++ b/tools/testing/selftests/kvm/include/x86/evmcs.h
@@ -10,9 +10,9 @@
#include "hyperv.h"
#include "vmx.h"
-#define u16 uint16_t
-#define u32 uint32_t
-#define u64 uint64_t
+#define u16 u16
+#define u32 u32
+#define u64 u64
#define EVMCS_VERSION 1
@@ -245,7 +245,7 @@ static inline void evmcs_enable(void)
enable_evmcs = true;
}
-static inline int evmcs_vmptrld(uint64_t vmcs_pa, void *vmcs)
+static inline int evmcs_vmptrld(u64 vmcs_pa, void *vmcs)
{
current_vp_assist->current_nested_vmcs = vmcs_pa;
current_vp_assist->enlighten_vmentry = 1;
@@ -265,7 +265,7 @@ static inline bool load_evmcs(struct hyperv_test_pages *hv)
return true;
}
-static inline int evmcs_vmptrst(uint64_t *value)
+static inline int evmcs_vmptrst(u64 *value)
{
*value = current_vp_assist->current_nested_vmcs &
~HV_X64_MSR_VP_ASSIST_PAGE_ENABLE;
@@ -273,7 +273,7 @@ static inline int evmcs_vmptrst(uint64_t *value)
return 0;
}
-static inline int evmcs_vmread(uint64_t encoding, uint64_t *value)
+static inline int evmcs_vmread(u64 encoding, u64 *value)
{
switch (encoding) {
case GUEST_RIP:
@@ -672,7 +672,7 @@ static inline int evmcs_vmread(uint64_t encoding, uint64_t *value)
return 0;
}
-static inline int evmcs_vmwrite(uint64_t encoding, uint64_t value)
+static inline int evmcs_vmwrite(u64 encoding, u64 value)
{
switch (encoding) {
case GUEST_RIP:
@@ -1226,9 +1226,9 @@ static inline int evmcs_vmlaunch(void)
"pop %%rbp;"
: [ret]"=&a"(ret)
: [host_rsp]"r"
- ((uint64_t)&current_evmcs->host_rsp),
+ ((u64)&current_evmcs->host_rsp),
[host_rip]"r"
- ((uint64_t)&current_evmcs->host_rip)
+ ((u64)&current_evmcs->host_rip)
: "memory", "cc", "rbx", "r8", "r9", "r10",
"r11", "r12", "r13", "r14", "r15");
return ret;
@@ -1265,9 +1265,9 @@ static inline int evmcs_vmresume(void)
"pop %%rbp;"
: [ret]"=&a"(ret)
: [host_rsp]"r"
- ((uint64_t)&current_evmcs->host_rsp),
+ ((u64)&current_evmcs->host_rsp),
[host_rip]"r"
- ((uint64_t)&current_evmcs->host_rip)
+ ((u64)&current_evmcs->host_rip)
: "memory", "cc", "rbx", "r8", "r9", "r10",
"r11", "r12", "r13", "r14", "r15");
return ret;
diff --git a/tools/testing/selftests/kvm/include/x86/hyperv.h b/tools/testing/selftests/kvm/include/x86/hyperv.h
index f13e532be240..78003f5a22f3 100644
--- a/tools/testing/selftests/kvm/include/x86/hyperv.h
+++ b/tools/testing/selftests/kvm/include/x86/hyperv.h
@@ -254,12 +254,12 @@
* Issue a Hyper-V hypercall. Returns exception vector raised or 0, 'hv_status'
* is set to the hypercall status (if no exception occurred).
*/
-static inline uint8_t __hyperv_hypercall(u64 control, vm_vaddr_t input_address,
- vm_vaddr_t output_address,
- uint64_t *hv_status)
+static inline u8 __hyperv_hypercall(u64 control, gva_t input_address,
+ gva_t output_address,
+ u64 *hv_status)
{
- uint64_t error_code;
- uint8_t vector;
+ u64 error_code;
+ u8 vector;
/* Note both the hypercall and the "asm safe" clobber r9-r11. */
asm volatile("mov %[output_address], %%r8\n\t"
@@ -274,11 +274,11 @@ static inline uint8_t __hyperv_hypercall(u64 control, vm_vaddr_t input_address,
}
/* Issue a Hyper-V hypercall and assert that it succeeded. */
-static inline void hyperv_hypercall(u64 control, vm_vaddr_t input_address,
- vm_vaddr_t output_address)
+static inline void hyperv_hypercall(u64 control, gva_t input_address,
+ gva_t output_address)
{
- uint64_t hv_status;
- uint8_t vector;
+ u64 hv_status;
+ u8 vector;
vector = __hyperv_hypercall(control, input_address, output_address, &hv_status);
@@ -327,27 +327,27 @@ struct hv_vp_assist_page {
extern struct hv_vp_assist_page *current_vp_assist;
-int enable_vp_assist(uint64_t vp_assist_pa, void *vp_assist);
+int enable_vp_assist(u64 vp_assist_pa, void *vp_assist);
struct hyperv_test_pages {
/* VP assist page */
void *vp_assist_hva;
- uint64_t vp_assist_gpa;
+ u64 vp_assist_gpa;
void *vp_assist;
/* Partition assist page */
void *partition_assist_hva;
- uint64_t partition_assist_gpa;
+ u64 partition_assist_gpa;
void *partition_assist;
/* Enlightened VMCS */
void *enlightened_vmcs_hva;
- uint64_t enlightened_vmcs_gpa;
+ u64 enlightened_vmcs_gpa;
void *enlightened_vmcs;
};
struct hyperv_test_pages *vcpu_alloc_hyperv_test_pages(struct kvm_vm *vm,
- vm_vaddr_t *p_hv_pages_gva);
+ gva_t *p_hv_pages_gva);
/* HV_X64_MSR_TSC_INVARIANT_CONTROL bits */
#define HV_INVARIANT_TSC_EXPOSED BIT_ULL(0)
diff --git a/tools/testing/selftests/kvm/include/x86/kvm_util_arch.h b/tools/testing/selftests/kvm/include/x86/kvm_util_arch.h
index 972bb1c4ab4c..c33ab6e04171 100644
--- a/tools/testing/selftests/kvm/include/x86/kvm_util_arch.h
+++ b/tools/testing/selftests/kvm/include/x86/kvm_util_arch.h
@@ -10,13 +10,35 @@
extern bool is_forced_emulation_enabled;
+struct pte_masks {
+ u64 present;
+ u64 writable;
+ u64 user;
+ u64 readable;
+ u64 executable;
+ u64 accessed;
+ u64 dirty;
+ u64 huge;
+ u64 nx;
+ u64 c;
+ u64 s;
+
+ u64 always_set;
+};
+
+struct kvm_mmu_arch {
+ struct pte_masks pte_masks;
+};
+
+struct kvm_mmu;
+
struct kvm_vm_arch {
- vm_vaddr_t gdt;
- vm_vaddr_t tss;
- vm_vaddr_t idt;
+ gva_t gdt;
+ gva_t tss;
+ gva_t idt;
- uint64_t c_bit;
- uint64_t s_bit;
+ u64 c_bit;
+ u64 s_bit;
int sev_fd;
bool is_pt_protected;
};
@@ -40,7 +62,7 @@ do { \
: "+m" (mem) \
: "r" (val) : "memory"); \
} else { \
- uint64_t __old = READ_ONCE(mem); \
+ u64 __old = READ_ONCE(mem); \
\
__asm__ __volatile__(KVM_FEP LOCK_PREFIX "cmpxchg %[new], %[ptr]" \
: [ptr] "+m" (mem), [old] "+a" (__old) \
diff --git a/tools/testing/selftests/kvm/include/x86/pmu.h b/tools/testing/selftests/kvm/include/x86/pmu.h
index 3c10c4dc0ae8..98537cc8840d 100644
--- a/tools/testing/selftests/kvm/include/x86/pmu.h
+++ b/tools/testing/selftests/kvm/include/x86/pmu.h
@@ -5,7 +5,10 @@
#ifndef SELFTEST_KVM_PMU_H
#define SELFTEST_KVM_PMU_H
-#include <stdint.h>
+#include <stdbool.h>
+
+#include <linux/types.h>
+#include <linux/bits.h>
#define KVM_PMU_EVENT_FILTER_MAX_EVENTS 300
@@ -61,6 +64,11 @@
#define INTEL_ARCH_BRANCHES_RETIRED RAW_EVENT(0xc4, 0x00)
#define INTEL_ARCH_BRANCHES_MISPREDICTED RAW_EVENT(0xc5, 0x00)
#define INTEL_ARCH_TOPDOWN_SLOTS RAW_EVENT(0xa4, 0x01)
+#define INTEL_ARCH_TOPDOWN_BE_BOUND RAW_EVENT(0xa4, 0x02)
+#define INTEL_ARCH_TOPDOWN_BAD_SPEC RAW_EVENT(0x73, 0x00)
+#define INTEL_ARCH_TOPDOWN_FE_BOUND RAW_EVENT(0x9c, 0x01)
+#define INTEL_ARCH_TOPDOWN_RETIRING RAW_EVENT(0xc2, 0x02)
+#define INTEL_ARCH_LBR_INSERTS RAW_EVENT(0xe4, 0x01)
#define AMD_ZEN_CORE_CYCLES RAW_EVENT(0x76, 0x00)
#define AMD_ZEN_INSTRUCTIONS_RETIRED RAW_EVENT(0xc0, 0x00)
@@ -80,6 +88,11 @@ enum intel_pmu_architectural_events {
INTEL_ARCH_BRANCHES_RETIRED_INDEX,
INTEL_ARCH_BRANCHES_MISPREDICTED_INDEX,
INTEL_ARCH_TOPDOWN_SLOTS_INDEX,
+ INTEL_ARCH_TOPDOWN_BE_BOUND_INDEX,
+ INTEL_ARCH_TOPDOWN_BAD_SPEC_INDEX,
+ INTEL_ARCH_TOPDOWN_FE_BOUND_INDEX,
+ INTEL_ARCH_TOPDOWN_RETIRING_INDEX,
+ INTEL_ARCH_LBR_INSERTS_INDEX,
NR_INTEL_ARCH_EVENTS,
};
@@ -91,7 +104,21 @@ enum amd_pmu_zen_events {
NR_AMD_ZEN_EVENTS,
};
-extern const uint64_t intel_pmu_arch_events[];
-extern const uint64_t amd_pmu_zen_events[];
+extern const u64 intel_pmu_arch_events[];
+extern const u64 amd_pmu_zen_events[];
+
+enum pmu_errata {
+ INSTRUCTIONS_RETIRED_OVERCOUNT,
+ BRANCHES_RETIRED_OVERCOUNT,
+};
+
+extern u64 pmu_errata_mask;
+
+void kvm_init_pmu_errata(void);
+
+static inline bool this_pmu_has_errata(enum pmu_errata errata)
+{
+ return pmu_errata_mask & BIT_ULL(errata);
+}
#endif /* SELFTEST_KVM_PMU_H */
diff --git a/tools/testing/selftests/kvm/include/x86/processor.h b/tools/testing/selftests/kvm/include/x86/processor.h
index d60da8966772..77f576ee7789 100644
--- a/tools/testing/selftests/kvm/include/x86/processor.h
+++ b/tools/testing/selftests/kvm/include/x86/processor.h
@@ -21,7 +21,9 @@
extern bool host_cpu_is_intel;
extern bool host_cpu_is_amd;
-extern uint64_t guest_tsc_khz;
+extern bool host_cpu_is_hygon;
+extern bool host_cpu_is_amd_compatible;
+extern u64 guest_tsc_khz;
#ifndef MAX_NR_CPUID_ENTRIES
#define MAX_NR_CPUID_ENTRIES 100
@@ -34,6 +36,8 @@ extern uint64_t guest_tsc_khz;
#define NMI_VECTOR 0x02
+const char *ex_str(int vector);
+
#define X86_EFLAGS_FIXED (1u << 1)
#define X86_CR4_VME (1ul << 0)
@@ -183,6 +187,9 @@ struct kvm_x86_cpu_feature {
* Extended Leafs, a.k.a. AMD defined
*/
#define X86_FEATURE_SVM KVM_X86_CPU_FEATURE(0x80000001, 0, ECX, 2)
+#define X86_FEATURE_PERFCTR_CORE KVM_X86_CPU_FEATURE(0x80000001, 0, ECX, 23)
+#define X86_FEATURE_PERFCTR_NB KVM_X86_CPU_FEATURE(0x80000001, 0, ECX, 24)
+#define X86_FEATURE_PERFCTR_LLC KVM_X86_CPU_FEATURE(0x80000001, 0, ECX, 28)
#define X86_FEATURE_NX KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 20)
#define X86_FEATURE_GBPAGES KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 26)
#define X86_FEATURE_RDTSCP KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 27)
@@ -196,9 +203,14 @@ struct kvm_x86_cpu_feature {
#define X86_FEATURE_TSCRATEMSR KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 4)
#define X86_FEATURE_PAUSEFILTER KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 10)
#define X86_FEATURE_PFTHRESHOLD KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 12)
+#define X86_FEATURE_V_VMSAVE_VMLOAD KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 15)
#define X86_FEATURE_VGIF KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 16)
+#define X86_FEATURE_IDLE_HLT KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 30)
#define X86_FEATURE_SEV KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 1)
#define X86_FEATURE_SEV_ES KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 3)
+#define X86_FEATURE_SEV_SNP KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 4)
+#define X86_FEATURE_PERFMON_V2 KVM_X86_CPU_FEATURE(0x80000022, 0, EAX, 0)
+#define X86_FEATURE_LBR_PMC_FREEZE KVM_X86_CPU_FEATURE(0x80000022, 0, EAX, 2)
/*
* KVM defined paravirt features.
@@ -258,7 +270,7 @@ struct kvm_x86_cpu_property {
#define X86_PROPERTY_PMU_NR_GP_COUNTERS KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 8, 15)
#define X86_PROPERTY_PMU_GP_COUNTERS_BIT_WIDTH KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 16, 23)
#define X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 24, 31)
-#define X86_PROPERTY_PMU_EVENTS_MASK KVM_X86_CPU_PROPERTY(0xa, 0, EBX, 0, 7)
+#define X86_PROPERTY_PMU_EVENTS_MASK KVM_X86_CPU_PROPERTY(0xa, 0, EBX, 0, 12)
#define X86_PROPERTY_PMU_FIXED_COUNTERS_BITMASK KVM_X86_CPU_PROPERTY(0xa, 0, ECX, 0, 31)
#define X86_PROPERTY_PMU_NR_FIXED_COUNTERS KVM_X86_CPU_PROPERTY(0xa, 0, EDX, 0, 4)
#define X86_PROPERTY_PMU_FIXED_COUNTERS_BIT_WIDTH KVM_X86_CPU_PROPERTY(0xa, 0, EDX, 5, 12)
@@ -285,6 +297,8 @@ struct kvm_x86_cpu_property {
#define X86_PROPERTY_GUEST_MAX_PHY_ADDR KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 16, 23)
#define X86_PROPERTY_SEV_C_BIT KVM_X86_CPU_PROPERTY(0x8000001F, 0, EBX, 0, 5)
#define X86_PROPERTY_PHYS_ADDR_REDUCTION KVM_X86_CPU_PROPERTY(0x8000001F, 0, EBX, 6, 11)
+#define X86_PROPERTY_NR_PERFCTR_CORE KVM_X86_CPU_PROPERTY(0x80000022, 0, EBX, 0, 3)
+#define X86_PROPERTY_NR_PERFCTR_NB KVM_X86_CPU_PROPERTY(0x80000022, 0, EBX, 10, 15)
#define X86_PROPERTY_MAX_CENTAUR_LEAF KVM_X86_CPU_PROPERTY(0xC0000000, 0, EAX, 0, 31)
@@ -323,6 +337,11 @@ struct kvm_x86_pmu_feature {
#define X86_PMU_FEATURE_BRANCH_INSNS_RETIRED KVM_X86_PMU_FEATURE(EBX, 5)
#define X86_PMU_FEATURE_BRANCHES_MISPREDICTED KVM_X86_PMU_FEATURE(EBX, 6)
#define X86_PMU_FEATURE_TOPDOWN_SLOTS KVM_X86_PMU_FEATURE(EBX, 7)
+#define X86_PMU_FEATURE_TOPDOWN_BE_BOUND KVM_X86_PMU_FEATURE(EBX, 8)
+#define X86_PMU_FEATURE_TOPDOWN_BAD_SPEC KVM_X86_PMU_FEATURE(EBX, 9)
+#define X86_PMU_FEATURE_TOPDOWN_FE_BOUND KVM_X86_PMU_FEATURE(EBX, 10)
+#define X86_PMU_FEATURE_TOPDOWN_RETIRING KVM_X86_PMU_FEATURE(EBX, 11)
+#define X86_PMU_FEATURE_LBR_INSERTS KVM_X86_PMU_FEATURE(EBX, 12)
#define X86_PMU_FEATURE_INSNS_RETIRED_FIXED KVM_X86_PMU_FEATURE(ECX, 0)
#define X86_PMU_FEATURE_CPU_CYCLES_FIXED KVM_X86_PMU_FEATURE(ECX, 1)
@@ -346,16 +365,6 @@ static inline unsigned int x86_model(unsigned int eax)
return ((eax >> 12) & 0xf0) | ((eax >> 4) & 0x0f);
}
-/* Page table bitfield declarations */
-#define PTE_PRESENT_MASK BIT_ULL(0)
-#define PTE_WRITABLE_MASK BIT_ULL(1)
-#define PTE_USER_MASK BIT_ULL(2)
-#define PTE_ACCESSED_MASK BIT_ULL(5)
-#define PTE_DIRTY_MASK BIT_ULL(6)
-#define PTE_LARGE_MASK BIT_ULL(7)
-#define PTE_GLOBAL_MASK BIT_ULL(8)
-#define PTE_NX_MASK BIT_ULL(63)
-
#define PHYSICAL_PAGE_MASK GENMASK_ULL(51, 12)
#define PAGE_SHIFT 12
@@ -390,17 +399,17 @@ struct gpr64_regs {
};
struct desc64 {
- uint16_t limit0;
- uint16_t base0;
+ u16 limit0;
+ u16 base0;
unsigned base1:8, type:4, s:1, dpl:2, p:1;
unsigned limit1:4, avl:1, l:1, db:1, g:1, base2:8;
- uint32_t base3;
- uint32_t zero1;
+ u32 base3;
+ u32 zero1;
} __attribute__((packed));
struct desc_ptr {
- uint16_t size;
- uint64_t address;
+ u16 size;
+ u64 address;
} __attribute__((packed));
struct kvm_x86_state {
@@ -418,16 +427,18 @@ struct kvm_x86_state {
struct kvm_msrs msrs;
};
-static inline uint64_t get_desc64_base(const struct desc64 *desc)
+static inline u64 get_desc64_base(const struct desc64 *desc)
{
- return ((uint64_t)desc->base3 << 32) |
- (desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24));
+ return (u64)desc->base3 << 32 |
+ (u64)desc->base2 << 24 |
+ (u64)desc->base1 << 16 |
+ (u64)desc->base0;
}
-static inline uint64_t rdtsc(void)
+static inline u64 rdtsc(void)
{
- uint32_t eax, edx;
- uint64_t tsc_val;
+ u32 eax, edx;
+ u64 tsc_val;
/*
* The lfence is to wait (on Intel CPUs) until all previous
* instructions have been executed. If software requires RDTSC to be
@@ -435,39 +446,39 @@ static inline uint64_t rdtsc(void)
* execute LFENCE immediately after RDTSC
*/
__asm__ __volatile__("lfence; rdtsc; lfence" : "=a"(eax), "=d"(edx));
- tsc_val = ((uint64_t)edx) << 32 | eax;
+ tsc_val = ((u64)edx) << 32 | eax;
return tsc_val;
}
-static inline uint64_t rdtscp(uint32_t *aux)
+static inline u64 rdtscp(u32 *aux)
{
- uint32_t eax, edx;
+ u32 eax, edx;
__asm__ __volatile__("rdtscp" : "=a"(eax), "=d"(edx), "=c"(*aux));
- return ((uint64_t)edx) << 32 | eax;
+ return ((u64)edx) << 32 | eax;
}
-static inline uint64_t rdmsr(uint32_t msr)
+static inline u64 rdmsr(u32 msr)
{
- uint32_t a, d;
+ u32 a, d;
__asm__ __volatile__("rdmsr" : "=a"(a), "=d"(d) : "c"(msr) : "memory");
- return a | ((uint64_t) d << 32);
+ return a | ((u64)d << 32);
}
-static inline void wrmsr(uint32_t msr, uint64_t value)
+static inline void wrmsr(u32 msr, u64 value)
{
- uint32_t a = value;
- uint32_t d = value >> 32;
+ u32 a = value;
+ u32 d = value >> 32;
__asm__ __volatile__("wrmsr" :: "a"(a), "d"(d), "c"(msr) : "memory");
}
-static inline uint16_t inw(uint16_t port)
+static inline u16 inw(u16 port)
{
- uint16_t tmp;
+ u16 tmp;
__asm__ __volatile__("in %%dx, %%ax"
: /* output */ "=a" (tmp)
@@ -476,101 +487,124 @@ static inline uint16_t inw(uint16_t port)
return tmp;
}
-static inline uint16_t get_es(void)
+static inline u16 get_es(void)
{
- uint16_t es;
+ u16 es;
__asm__ __volatile__("mov %%es, %[es]"
: /* output */ [es]"=rm"(es));
return es;
}
-static inline uint16_t get_cs(void)
+static inline u16 get_cs(void)
{
- uint16_t cs;
+ u16 cs;
__asm__ __volatile__("mov %%cs, %[cs]"
: /* output */ [cs]"=rm"(cs));
return cs;
}
-static inline uint16_t get_ss(void)
+static inline u16 get_ss(void)
{
- uint16_t ss;
+ u16 ss;
__asm__ __volatile__("mov %%ss, %[ss]"
: /* output */ [ss]"=rm"(ss));
return ss;
}
-static inline uint16_t get_ds(void)
+static inline u16 get_ds(void)
{
- uint16_t ds;
+ u16 ds;
__asm__ __volatile__("mov %%ds, %[ds]"
: /* output */ [ds]"=rm"(ds));
return ds;
}
-static inline uint16_t get_fs(void)
+static inline u16 get_fs(void)
{
- uint16_t fs;
+ u16 fs;
__asm__ __volatile__("mov %%fs, %[fs]"
: /* output */ [fs]"=rm"(fs));
return fs;
}
-static inline uint16_t get_gs(void)
+static inline u16 get_gs(void)
{
- uint16_t gs;
+ u16 gs;
__asm__ __volatile__("mov %%gs, %[gs]"
: /* output */ [gs]"=rm"(gs));
return gs;
}
-static inline uint16_t get_tr(void)
+static inline u16 get_tr(void)
{
- uint16_t tr;
+ u16 tr;
__asm__ __volatile__("str %[tr]"
: /* output */ [tr]"=rm"(tr));
return tr;
}
-static inline uint64_t get_cr0(void)
+static inline u64 get_cr0(void)
{
- uint64_t cr0;
+ u64 cr0;
__asm__ __volatile__("mov %%cr0, %[cr0]"
: /* output */ [cr0]"=r"(cr0));
return cr0;
}
-static inline uint64_t get_cr3(void)
+static inline void set_cr0(u64 val)
{
- uint64_t cr3;
+ __asm__ __volatile__("mov %0, %%cr0" : : "r" (val) : "memory");
+}
+
+static inline u64 get_cr3(void)
+{
+ u64 cr3;
__asm__ __volatile__("mov %%cr3, %[cr3]"
: /* output */ [cr3]"=r"(cr3));
return cr3;
}
-static inline uint64_t get_cr4(void)
+static inline void set_cr3(u64 val)
+{
+ __asm__ __volatile__("mov %0, %%cr3" : : "r" (val) : "memory");
+}
+
+static inline u64 get_cr4(void)
{
- uint64_t cr4;
+ u64 cr4;
__asm__ __volatile__("mov %%cr4, %[cr4]"
: /* output */ [cr4]"=r"(cr4));
return cr4;
}
-static inline void set_cr4(uint64_t val)
+static inline void set_cr4(u64 val)
{
__asm__ __volatile__("mov %0, %%cr4" : : "r" (val) : "memory");
}
+static inline u64 get_cr8(void)
+{
+ u64 cr8;
+
+ __asm__ __volatile__("mov %%cr8, %[cr8]" : [cr8]"=r"(cr8));
+ return cr8;
+}
+
+static inline void set_cr8(u64 val)
+{
+ __asm__ __volatile__("mov %0, %%cr8" : : "r" (val) : "memory");
+}
+
static inline void set_idt(const struct desc_ptr *idt_desc)
{
__asm__ __volatile__("lidt %0"::"m"(*idt_desc));
@@ -617,14 +651,14 @@ static inline struct desc_ptr get_idt(void)
return idt;
}
-static inline void outl(uint16_t port, uint32_t value)
+static inline void outl(u16 port, u32 value)
{
__asm__ __volatile__("outl %%eax, %%dx" : : "d"(port), "a"(value));
}
-static inline void __cpuid(uint32_t function, uint32_t index,
- uint32_t *eax, uint32_t *ebx,
- uint32_t *ecx, uint32_t *edx)
+static inline void __cpuid(u32 function, u32 index,
+ u32 *eax, u32 *ebx,
+ u32 *ecx, u32 *edx)
{
*eax = function;
*ecx = index;
@@ -638,35 +672,35 @@ static inline void __cpuid(uint32_t function, uint32_t index,
: "memory");
}
-static inline void cpuid(uint32_t function,
- uint32_t *eax, uint32_t *ebx,
- uint32_t *ecx, uint32_t *edx)
+static inline void cpuid(u32 function,
+ u32 *eax, u32 *ebx,
+ u32 *ecx, u32 *edx)
{
return __cpuid(function, 0, eax, ebx, ecx, edx);
}
-static inline uint32_t this_cpu_fms(void)
+static inline u32 this_cpu_fms(void)
{
- uint32_t eax, ebx, ecx, edx;
+ u32 eax, ebx, ecx, edx;
cpuid(1, &eax, &ebx, &ecx, &edx);
return eax;
}
-static inline uint32_t this_cpu_family(void)
+static inline u32 this_cpu_family(void)
{
return x86_family(this_cpu_fms());
}
-static inline uint32_t this_cpu_model(void)
+static inline u32 this_cpu_model(void)
{
return x86_model(this_cpu_fms());
}
static inline bool this_cpu_vendor_string_is(const char *vendor)
{
- const uint32_t *chunk = (const uint32_t *)vendor;
- uint32_t eax, ebx, ecx, edx;
+ const u32 *chunk = (const u32 *)vendor;
+ u32 eax, ebx, ecx, edx;
cpuid(0, &eax, &ebx, &ecx, &edx);
return (ebx == chunk[0] && edx == chunk[1] && ecx == chunk[2]);
@@ -685,10 +719,14 @@ static inline bool this_cpu_is_amd(void)
return this_cpu_vendor_string_is("AuthenticAMD");
}
-static inline uint32_t __this_cpu_has(uint32_t function, uint32_t index,
- uint8_t reg, uint8_t lo, uint8_t hi)
+static inline bool this_cpu_is_hygon(void)
{
- uint32_t gprs[4];
+ return this_cpu_vendor_string_is("HygonGenuine");
+}
+
+static inline u32 __this_cpu_has(u32 function, u32 index, u8 reg, u8 lo, u8 hi)
+{
+ u32 gprs[4];
__cpuid(function, index,
&gprs[KVM_CPUID_EAX], &gprs[KVM_CPUID_EBX],
@@ -703,7 +741,7 @@ static inline bool this_cpu_has(struct kvm_x86_cpu_feature feature)
feature.reg, feature.bit, feature.bit);
}
-static inline uint32_t this_cpu_property(struct kvm_x86_cpu_property property)
+static inline u32 this_cpu_property(struct kvm_x86_cpu_property property)
{
return __this_cpu_has(property.function, property.index,
property.reg, property.lo_bit, property.hi_bit);
@@ -711,7 +749,7 @@ static inline uint32_t this_cpu_property(struct kvm_x86_cpu_property property)
static __always_inline bool this_cpu_has_p(struct kvm_x86_cpu_property property)
{
- uint32_t max_leaf;
+ u32 max_leaf;
switch (property.function & 0xc0000000) {
case 0:
@@ -731,7 +769,7 @@ static __always_inline bool this_cpu_has_p(struct kvm_x86_cpu_property property)
static inline bool this_pmu_has(struct kvm_x86_pmu_feature feature)
{
- uint32_t nr_bits;
+ u32 nr_bits;
if (feature.f.reg == KVM_CPUID_EBX) {
nr_bits = this_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH);
@@ -743,13 +781,13 @@ static inline bool this_pmu_has(struct kvm_x86_pmu_feature feature)
return nr_bits > feature.f.bit || this_cpu_has(feature.f);
}
-static __always_inline uint64_t this_cpu_supported_xcr0(void)
+static __always_inline u64 this_cpu_supported_xcr0(void)
{
if (!this_cpu_has_p(X86_PROPERTY_SUPPORTED_XCR0_LO))
return 0;
return this_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_LO) |
- ((uint64_t)this_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_HI) << 32);
+ ((u64)this_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_HI) << 32);
}
typedef u32 __attribute__((vector_size(16))) sse128_t;
@@ -828,7 +866,7 @@ static inline void cpu_relax(void)
static inline void udelay(unsigned long usec)
{
- uint64_t start, now, cycles;
+ u64 start, now, cycles;
GUEST_ASSERT(guest_tsc_khz);
cycles = guest_tsc_khz / 1000 * usec;
@@ -859,8 +897,8 @@ void kvm_x86_state_cleanup(struct kvm_x86_state *state);
const struct kvm_msr_list *kvm_get_msr_index_list(void);
const struct kvm_msr_list *kvm_get_feature_msr_index_list(void);
-bool kvm_msr_is_in_save_restore_list(uint32_t msr_index);
-uint64_t kvm_get_feature_msr(uint64_t msr_index);
+bool kvm_msr_is_in_save_restore_list(u32 msr_index);
+u64 kvm_get_feature_msr(u64 msr_index);
static inline void vcpu_msrs_get(struct kvm_vcpu *vcpu,
struct kvm_msrs *msrs)
@@ -915,20 +953,20 @@ static inline void vcpu_xcrs_set(struct kvm_vcpu *vcpu, struct kvm_xcrs *xcrs)
}
const struct kvm_cpuid_entry2 *get_cpuid_entry(const struct kvm_cpuid2 *cpuid,
- uint32_t function, uint32_t index);
+ u32 function, u32 index);
const struct kvm_cpuid2 *kvm_get_supported_cpuid(void);
-static inline uint32_t kvm_cpu_fms(void)
+static inline u32 kvm_cpu_fms(void)
{
return get_cpuid_entry(kvm_get_supported_cpuid(), 0x1, 0)->eax;
}
-static inline uint32_t kvm_cpu_family(void)
+static inline u32 kvm_cpu_family(void)
{
return x86_family(kvm_cpu_fms());
}
-static inline uint32_t kvm_cpu_model(void)
+static inline u32 kvm_cpu_model(void)
{
return x86_model(kvm_cpu_fms());
}
@@ -941,17 +979,17 @@ static inline bool kvm_cpu_has(struct kvm_x86_cpu_feature feature)
return kvm_cpuid_has(kvm_get_supported_cpuid(), feature);
}
-uint32_t kvm_cpuid_property(const struct kvm_cpuid2 *cpuid,
- struct kvm_x86_cpu_property property);
+u32 kvm_cpuid_property(const struct kvm_cpuid2 *cpuid,
+ struct kvm_x86_cpu_property property);
-static inline uint32_t kvm_cpu_property(struct kvm_x86_cpu_property property)
+static inline u32 kvm_cpu_property(struct kvm_x86_cpu_property property)
{
return kvm_cpuid_property(kvm_get_supported_cpuid(), property);
}
static __always_inline bool kvm_cpu_has_p(struct kvm_x86_cpu_property property)
{
- uint32_t max_leaf;
+ u32 max_leaf;
switch (property.function & 0xc0000000) {
case 0:
@@ -971,7 +1009,7 @@ static __always_inline bool kvm_cpu_has_p(struct kvm_x86_cpu_property property)
static inline bool kvm_pmu_has(struct kvm_x86_pmu_feature feature)
{
- uint32_t nr_bits;
+ u32 nr_bits;
if (feature.f.reg == KVM_CPUID_EBX) {
nr_bits = kvm_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH);
@@ -983,13 +1021,13 @@ static inline bool kvm_pmu_has(struct kvm_x86_pmu_feature feature)
return nr_bits > feature.f.bit || kvm_cpu_has(feature.f);
}
-static __always_inline uint64_t kvm_cpu_supported_xcr0(void)
+static __always_inline u64 kvm_cpu_supported_xcr0(void)
{
if (!kvm_cpu_has_p(X86_PROPERTY_SUPPORTED_XCR0_LO))
return 0;
return kvm_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_LO) |
- ((uint64_t)kvm_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_HI) << 32);
+ ((u64)kvm_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_HI) << 32);
}
static inline size_t kvm_cpuid2_size(int nr_entries)
@@ -1023,8 +1061,8 @@ static inline void vcpu_get_cpuid(struct kvm_vcpu *vcpu)
}
static inline struct kvm_cpuid_entry2 *__vcpu_get_cpuid_entry(struct kvm_vcpu *vcpu,
- uint32_t function,
- uint32_t index)
+ u32 function,
+ u32 index)
{
TEST_ASSERT(vcpu->cpuid, "Must do vcpu_init_cpuid() first (or equivalent)");
@@ -1035,7 +1073,7 @@ static inline struct kvm_cpuid_entry2 *__vcpu_get_cpuid_entry(struct kvm_vcpu *v
}
static inline struct kvm_cpuid_entry2 *vcpu_get_cpuid_entry(struct kvm_vcpu *vcpu,
- uint32_t function)
+ u32 function)
{
return __vcpu_get_cpuid_entry(vcpu, function, 0);
}
@@ -1065,10 +1103,10 @@ static inline void vcpu_set_cpuid(struct kvm_vcpu *vcpu)
void vcpu_set_cpuid_property(struct kvm_vcpu *vcpu,
struct kvm_x86_cpu_property property,
- uint32_t value);
-void vcpu_set_cpuid_maxphyaddr(struct kvm_vcpu *vcpu, uint8_t maxphyaddr);
+ u32 value);
+void vcpu_set_cpuid_maxphyaddr(struct kvm_vcpu *vcpu, u8 maxphyaddr);
-void vcpu_clear_cpuid_entry(struct kvm_vcpu *vcpu, uint32_t function);
+void vcpu_clear_cpuid_entry(struct kvm_vcpu *vcpu, u32 function);
static inline bool vcpu_cpuid_has(struct kvm_vcpu *vcpu,
struct kvm_x86_cpu_feature feature)
@@ -1096,8 +1134,8 @@ static inline void vcpu_clear_cpuid_feature(struct kvm_vcpu *vcpu,
vcpu_set_or_clear_cpuid_feature(vcpu, feature, false);
}
-uint64_t vcpu_get_msr(struct kvm_vcpu *vcpu, uint64_t msr_index);
-int _vcpu_set_msr(struct kvm_vcpu *vcpu, uint64_t msr_index, uint64_t msr_value);
+u64 vcpu_get_msr(struct kvm_vcpu *vcpu, u64 msr_index);
+int _vcpu_set_msr(struct kvm_vcpu *vcpu, u64 msr_index, u64 msr_value);
/*
* Assert on an MSR access(es) and pretty print the MSR name when possible.
@@ -1122,14 +1160,14 @@ do { \
* is changing, etc. This is NOT an exhaustive list! The intent is to filter
* out MSRs that are not durable _and_ that a selftest wants to write.
*/
-static inline bool is_durable_msr(uint32_t msr)
+static inline bool is_durable_msr(u32 msr)
{
return msr != MSR_IA32_TSC;
}
#define vcpu_set_msr(vcpu, msr, val) \
do { \
- uint64_t r, v = val; \
+ u64 r, v = val; \
\
TEST_ASSERT_MSR(_vcpu_set_msr(vcpu, msr, v) == 1, \
"KVM_SET_MSRS failed on %s, value = 0x%lx", msr, #msr, v); \
@@ -1141,36 +1179,41 @@ do { \
void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits);
void kvm_init_vm_address_properties(struct kvm_vm *vm);
-bool vm_is_unrestricted_guest(struct kvm_vm *vm);
struct ex_regs {
- uint64_t rax, rcx, rdx, rbx;
- uint64_t rbp, rsi, rdi;
- uint64_t r8, r9, r10, r11;
- uint64_t r12, r13, r14, r15;
- uint64_t vector;
- uint64_t error_code;
- uint64_t rip;
- uint64_t cs;
- uint64_t rflags;
+ u64 rax, rcx, rdx, rbx;
+ u64 rbp, rsi, rdi;
+ u64 r8, r9, r10, r11;
+ u64 r12, r13, r14, r15;
+ u64 vector;
+ u64 error_code;
+ u64 rip;
+ u64 cs;
+ u64 rflags;
};
struct idt_entry {
- uint16_t offset0;
- uint16_t selector;
- uint16_t ist : 3;
- uint16_t : 5;
- uint16_t type : 4;
- uint16_t : 1;
- uint16_t dpl : 2;
- uint16_t p : 1;
- uint16_t offset1;
- uint32_t offset2; uint32_t reserved;
+ u16 offset0;
+ u16 selector;
+ u16 ist : 3;
+ u16 : 5;
+ u16 type : 4;
+ u16 : 1;
+ u16 dpl : 2;
+ u16 p : 1;
+ u16 offset1;
+ u32 offset2; u32 reserved;
};
void vm_install_exception_handler(struct kvm_vm *vm, int vector,
void (*handler)(struct ex_regs *));
+/*
+ * Exception fixup morphs #DE to an arbitrary magic vector so that '0' can be
+ * used to signal "no expcetion".
+ */
+#define KVM_MAGIC_DE_VECTOR 0xff
+
/* If a toddler were to say "abracadabra". */
#define KVM_EXCEPTION_MAGIC 0xabacadabaULL
@@ -1218,8 +1261,8 @@ void vm_install_exception_handler(struct kvm_vm *vm, int vector,
#define kvm_asm_safe(insn, inputs...) \
({ \
- uint64_t ign_error_code; \
- uint8_t vector; \
+ u64 ign_error_code; \
+ u8 vector; \
\
asm volatile(KVM_ASM_SAFE(insn) \
: KVM_ASM_SAFE_OUTPUTS(vector, ign_error_code) \
@@ -1230,7 +1273,7 @@ void vm_install_exception_handler(struct kvm_vm *vm, int vector,
#define kvm_asm_safe_ec(insn, error_code, inputs...) \
({ \
- uint8_t vector; \
+ u8 vector; \
\
asm volatile(KVM_ASM_SAFE(insn) \
: KVM_ASM_SAFE_OUTPUTS(vector, error_code) \
@@ -1241,10 +1284,10 @@ void vm_install_exception_handler(struct kvm_vm *vm, int vector,
#define kvm_asm_safe_fep(insn, inputs...) \
({ \
- uint64_t ign_error_code; \
- uint8_t vector; \
+ u64 ign_error_code; \
+ u8 vector; \
\
- asm volatile(KVM_ASM_SAFE(insn) \
+ asm volatile(KVM_ASM_SAFE_FEP(insn) \
: KVM_ASM_SAFE_OUTPUTS(vector, ign_error_code) \
: inputs \
: KVM_ASM_SAFE_CLOBBERS); \
@@ -1253,7 +1296,7 @@ void vm_install_exception_handler(struct kvm_vm *vm, int vector,
#define kvm_asm_safe_ec_fep(insn, error_code, inputs...) \
({ \
- uint8_t vector; \
+ u8 vector; \
\
asm volatile(KVM_ASM_SAFE_FEP(insn) \
: KVM_ASM_SAFE_OUTPUTS(vector, error_code) \
@@ -1263,11 +1306,11 @@ void vm_install_exception_handler(struct kvm_vm *vm, int vector,
})
#define BUILD_READ_U64_SAFE_HELPER(insn, _fep, _FEP) \
-static inline uint8_t insn##_safe ##_fep(uint32_t idx, uint64_t *val) \
+static inline u8 insn##_safe ##_fep(u32 idx, u64 *val) \
{ \
- uint64_t error_code; \
- uint8_t vector; \
- uint32_t a, d; \
+ u64 error_code; \
+ u8 vector; \
+ u32 a, d; \
\
asm volatile(KVM_ASM_SAFE##_FEP(#insn) \
: "=a"(a), "=d"(d), \
@@ -1275,7 +1318,7 @@ static inline uint8_t insn##_safe ##_fep(uint32_t idx, uint64_t *val) \
: "c"(idx) \
: KVM_ASM_SAFE_CLOBBERS); \
\
- *val = (uint64_t)a | ((uint64_t)d << 32); \
+ *val = (u64)a | ((u64)d << 32); \
return vector; \
}
@@ -1291,12 +1334,12 @@ BUILD_READ_U64_SAFE_HELPERS(rdmsr)
BUILD_READ_U64_SAFE_HELPERS(rdpmc)
BUILD_READ_U64_SAFE_HELPERS(xgetbv)
-static inline uint8_t wrmsr_safe(uint32_t msr, uint64_t val)
+static inline u8 wrmsr_safe(u32 msr, u64 val)
{
return kvm_asm_safe("wrmsr", "a"(val & -1u), "d"(val >> 32), "c"(msr));
}
-static inline uint8_t xsetbv_safe(uint32_t index, uint64_t value)
+static inline u8 xsetbv_safe(u32 index, u64 value)
{
u32 eax = value;
u32 edx = value >> 32;
@@ -1306,6 +1349,26 @@ static inline uint8_t xsetbv_safe(uint32_t index, uint64_t value)
bool kvm_is_tdp_enabled(void);
+static inline bool get_kvm_intel_param_bool(const char *param)
+{
+ return kvm_get_module_param_bool("kvm_intel", param);
+}
+
+static inline bool get_kvm_amd_param_bool(const char *param)
+{
+ return kvm_get_module_param_bool("kvm_amd", param);
+}
+
+static inline int get_kvm_intel_param_integer(const char *param)
+{
+ return kvm_get_module_param_integer("kvm_intel", param);
+}
+
+static inline int get_kvm_amd_param_integer(const char *param)
+{
+ return kvm_get_module_param_integer("kvm_amd", param);
+}
+
static inline bool kvm_is_pmu_enabled(void)
{
return get_kvm_param_bool("enable_pmu");
@@ -1316,30 +1379,80 @@ static inline bool kvm_is_forced_emulation_enabled(void)
return !!get_kvm_param_integer("force_emulation_prefix");
}
-uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr,
- int *level);
-uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr);
+static inline bool kvm_is_unrestricted_guest_enabled(void)
+{
+ return get_kvm_intel_param_bool("unrestricted_guest");
+}
+
+static inline bool kvm_is_ignore_msrs(void)
+{
+ return get_kvm_param_bool("ignore_msrs");
+}
+
+static inline bool kvm_is_lbrv_enabled(void)
+{
+ return !!get_kvm_amd_param_integer("lbrv");
+}
+
+u64 *vm_get_pte(struct kvm_vm *vm, gva_t gva);
-uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2,
- uint64_t a3);
-uint64_t __xen_hypercall(uint64_t nr, uint64_t a0, void *a1);
-void xen_hypercall(uint64_t nr, uint64_t a0, void *a1);
+u64 kvm_hypercall(u64 nr, u64 a0, u64 a1, u64 a2, u64 a3);
+u64 __xen_hypercall(u64 nr, u64 a0, void *a1);
+void xen_hypercall(u64 nr, u64 a0, void *a1);
-static inline uint64_t __kvm_hypercall_map_gpa_range(uint64_t gpa,
- uint64_t size, uint64_t flags)
+static inline u64 __kvm_hypercall_map_gpa_range(gpa_t gpa, u64 size, u64 flags)
{
return kvm_hypercall(KVM_HC_MAP_GPA_RANGE, gpa, size >> PAGE_SHIFT, flags, 0);
}
-static inline void kvm_hypercall_map_gpa_range(uint64_t gpa, uint64_t size,
- uint64_t flags)
+static inline void kvm_hypercall_map_gpa_range(gpa_t gpa, u64 size, u64 flags)
{
- uint64_t ret = __kvm_hypercall_map_gpa_range(gpa, size, flags);
+ u64 ret = __kvm_hypercall_map_gpa_range(gpa, size, flags);
GUEST_ASSERT(!ret);
}
-void __vm_xsave_require_permission(uint64_t xfeature, const char *name);
+/*
+ * Execute HLT in an STI interrupt shadow to ensure that a pending IRQ that's
+ * intended to be a wake event arrives *after* HLT is executed. Modern CPUs,
+ * except for a few oddballs that KVM is unlikely to run on, block IRQs for one
+ * instruction after STI, *if* RFLAGS.IF=0 before STI. Note, Intel CPUs may
+ * block other events beyond regular IRQs, e.g. may block NMIs and SMIs too.
+ */
+static inline void safe_halt(void)
+{
+ asm volatile("sti; hlt");
+}
+
+/*
+ * Enable interrupts and ensure that interrupts are evaluated upon return from
+ * this function, i.e. execute a nop to consume the STi interrupt shadow.
+ */
+static inline void sti_nop(void)
+{
+ asm volatile ("sti; nop");
+}
+
+/*
+ * Enable interrupts for one instruction (nop), to allow the CPU to process all
+ * interrupts that are already pending.
+ */
+static inline void sti_nop_cli(void)
+{
+ asm volatile ("sti; nop; cli");
+}
+
+static inline void sti(void)
+{
+ asm volatile("sti");
+}
+
+static inline void cli(void)
+{
+ asm volatile ("cli");
+}
+
+void __vm_xsave_require_permission(u64 xfeature, const char *name);
#define vm_xsave_require_permission(xfeature) \
__vm_xsave_require_permission(xfeature, #xfeature)
@@ -1350,7 +1463,7 @@ enum pg_level {
PG_LEVEL_2M,
PG_LEVEL_1G,
PG_LEVEL_512G,
- PG_LEVEL_NUM
+ PG_LEVEL_256T
};
#define PG_LEVEL_SHIFT(_level) ((_level - 1) * 9 + 12)
@@ -1360,9 +1473,51 @@ enum pg_level {
#define PG_SIZE_2M PG_LEVEL_SIZE(PG_LEVEL_2M)
#define PG_SIZE_1G PG_LEVEL_SIZE(PG_LEVEL_1G)
-void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level);
-void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
- uint64_t nr_bytes, int level);
+#define PTE_PRESENT_MASK(mmu) ((mmu)->arch.pte_masks.present)
+#define PTE_WRITABLE_MASK(mmu) ((mmu)->arch.pte_masks.writable)
+#define PTE_USER_MASK(mmu) ((mmu)->arch.pte_masks.user)
+#define PTE_READABLE_MASK(mmu) ((mmu)->arch.pte_masks.readable)
+#define PTE_EXECUTABLE_MASK(mmu) ((mmu)->arch.pte_masks.executable)
+#define PTE_ACCESSED_MASK(mmu) ((mmu)->arch.pte_masks.accessed)
+#define PTE_DIRTY_MASK(mmu) ((mmu)->arch.pte_masks.dirty)
+#define PTE_HUGE_MASK(mmu) ((mmu)->arch.pte_masks.huge)
+#define PTE_NX_MASK(mmu) ((mmu)->arch.pte_masks.nx)
+#define PTE_C_BIT_MASK(mmu) ((mmu)->arch.pte_masks.c)
+#define PTE_S_BIT_MASK(mmu) ((mmu)->arch.pte_masks.s)
+#define PTE_ALWAYS_SET_MASK(mmu) ((mmu)->arch.pte_masks.always_set)
+
+/*
+ * For PTEs without a PRESENT bit (i.e. EPT entries), treat the PTE as present
+ * if it's executable or readable, as EPT supports execute-only PTEs, but not
+ * write-only PTEs.
+ */
+#define is_present_pte(mmu, pte) \
+ (PTE_PRESENT_MASK(mmu) ? \
+ !!(*(pte) & PTE_PRESENT_MASK(mmu)) : \
+ !!(*(pte) & (PTE_READABLE_MASK(mmu) | PTE_EXECUTABLE_MASK(mmu))))
+#define is_executable_pte(mmu, pte) \
+ ((*(pte) & (PTE_EXECUTABLE_MASK(mmu) | PTE_NX_MASK(mmu))) == PTE_EXECUTABLE_MASK(mmu))
+#define is_writable_pte(mmu, pte) (!!(*(pte) & PTE_WRITABLE_MASK(mmu)))
+#define is_user_pte(mmu, pte) (!!(*(pte) & PTE_USER_MASK(mmu)))
+#define is_accessed_pte(mmu, pte) (!!(*(pte) & PTE_ACCESSED_MASK(mmu)))
+#define is_dirty_pte(mmu, pte) (!!(*(pte) & PTE_DIRTY_MASK(mmu)))
+#define is_huge_pte(mmu, pte) (!!(*(pte) & PTE_HUGE_MASK(mmu)))
+#define is_nx_pte(mmu, pte) (!is_executable_pte(mmu, pte))
+
+void tdp_mmu_init(struct kvm_vm *vm, int pgtable_levels,
+ struct pte_masks *pte_masks);
+
+void __virt_pg_map(struct kvm_vm *vm, struct kvm_mmu *mmu, gva_t gva,
+ gpa_t gpa, int level);
+void virt_map_level(struct kvm_vm *vm, gva_t gva, gpa_t gpa,
+ u64 nr_bytes, int level);
+
+void vm_enable_tdp(struct kvm_vm *vm);
+bool kvm_cpu_has_tdp(void);
+void tdp_map(struct kvm_vm *vm, gpa_t l2_gpa, gpa_t gpa, u64 size);
+void tdp_identity_map_default_memslots(struct kvm_vm *vm);
+void tdp_identity_map_1g(struct kvm_vm *vm, u64 addr, u64 size);
+u64 *tdp_get_pte(struct kvm_vm *vm, u64 l2_gpa);
/*
* Basic CPU control in CR0
diff --git a/tools/testing/selftests/kvm/include/x86/sev.h b/tools/testing/selftests/kvm/include/x86/sev.h
index 82c11c81a956..1af44c151d60 100644
--- a/tools/testing/selftests/kvm/include/x86/sev.h
+++ b/tools/testing/selftests/kvm/include/x86/sev.h
@@ -25,19 +25,51 @@ enum sev_guest_state {
#define SEV_POLICY_NO_DBG (1UL << 0)
#define SEV_POLICY_ES (1UL << 2)
+#define SNP_POLICY_SMT (1ULL << 16)
+#define SNP_POLICY_RSVD_MBO (1ULL << 17)
+#define SNP_POLICY_DBG (1ULL << 19)
+
#define GHCB_MSR_TERM_REQ 0x100
-void sev_vm_launch(struct kvm_vm *vm, uint32_t policy);
-void sev_vm_launch_measure(struct kvm_vm *vm, uint8_t *measurement);
+static inline bool is_sev_snp_vm(struct kvm_vm *vm)
+{
+ return vm->type == KVM_X86_SNP_VM;
+}
+
+static inline bool is_sev_es_vm(struct kvm_vm *vm)
+{
+ return is_sev_snp_vm(vm) || vm->type == KVM_X86_SEV_ES_VM;
+}
+
+static inline bool is_sev_vm(struct kvm_vm *vm)
+{
+ return is_sev_es_vm(vm) || vm->type == KVM_X86_SEV_VM;
+}
+
+void sev_vm_launch(struct kvm_vm *vm, u32 policy);
+void sev_vm_launch_measure(struct kvm_vm *vm, u8 *measurement);
void sev_vm_launch_finish(struct kvm_vm *vm);
+void snp_vm_launch_start(struct kvm_vm *vm, u64 policy);
+void snp_vm_launch_update(struct kvm_vm *vm);
+void snp_vm_launch_finish(struct kvm_vm *vm);
-struct kvm_vm *vm_sev_create_with_one_vcpu(uint32_t type, void *guest_code,
+struct kvm_vm *vm_sev_create_with_one_vcpu(u32 type, void *guest_code,
struct kvm_vcpu **cpu);
-void vm_sev_launch(struct kvm_vm *vm, uint32_t policy, uint8_t *measurement);
+void vm_sev_launch(struct kvm_vm *vm, u64 policy, u8 *measurement);
kvm_static_assert(SEV_RET_SUCCESS == 0);
/*
+ * A SEV-SNP VM requires the policy reserved bit to always be set.
+ * The SMT policy bit is also required to be set based on SMT being
+ * available and active on the system.
+ */
+static inline u64 snp_default_policy(void)
+{
+ return SNP_POLICY_RSVD_MBO | (is_smt_on() ? SNP_POLICY_SMT : 0);
+}
+
+/*
* The KVM_MEMORY_ENCRYPT_OP uAPI is utter garbage and takes an "unsigned long"
* instead of a proper struct. The size of the parameter is embedded in the
* ioctl number, i.e. is ABI and thus immutable. Hack around the mess by
@@ -53,7 +85,7 @@ kvm_static_assert(SEV_RET_SUCCESS == 0);
unsigned long raw; \
} sev_cmd = { .c = { \
.id = (cmd), \
- .data = (uint64_t)(arg), \
+ .data = (u64)(arg), \
.sev_fd = (vm)->arch.sev_fd, \
} }; \
\
@@ -70,6 +102,12 @@ kvm_static_assert(SEV_RET_SUCCESS == 0);
void sev_vm_init(struct kvm_vm *vm);
void sev_es_vm_init(struct kvm_vm *vm);
+void snp_vm_init(struct kvm_vm *vm);
+
+static inline void vmgexit(void)
+{
+ __asm__ __volatile__("rep; vmmcall");
+}
static inline void sev_register_encrypted_memory(struct kvm_vm *vm,
struct userspace_mem_region *region)
@@ -82,8 +120,8 @@ static inline void sev_register_encrypted_memory(struct kvm_vm *vm,
vm_ioctl(vm, KVM_MEMORY_ENCRYPT_REG_REGION, &range);
}
-static inline void sev_launch_update_data(struct kvm_vm *vm, vm_paddr_t gpa,
- uint64_t size)
+static inline void sev_launch_update_data(struct kvm_vm *vm, gpa_t gpa,
+ u64 size)
{
struct kvm_sev_launch_update_data update_data = {
.uaddr = (unsigned long)addr_gpa2hva(vm, gpa),
@@ -93,4 +131,17 @@ static inline void sev_launch_update_data(struct kvm_vm *vm, vm_paddr_t gpa,
vm_sev_ioctl(vm, KVM_SEV_LAUNCH_UPDATE_DATA, &update_data);
}
+static inline void snp_launch_update_data(struct kvm_vm *vm, gpa_t gpa,
+ u64 hva, u64 size, u8 type)
+{
+ struct kvm_sev_snp_launch_update update_data = {
+ .uaddr = hva,
+ .gfn_start = gpa >> PAGE_SHIFT,
+ .len = size,
+ .type = type,
+ };
+
+ vm_sev_ioctl(vm, KVM_SEV_SNP_LAUNCH_UPDATE, &update_data);
+}
+
#endif /* SELFTEST_KVM_SEV_H */
diff --git a/tools/testing/selftests/kvm/include/x86/smm.h b/tools/testing/selftests/kvm/include/x86/smm.h
new file mode 100644
index 000000000000..2d1afa09819b
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/x86/smm.h
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#ifndef SELFTEST_KVM_SMM_H
+#define SELFTEST_KVM_SMM_H
+
+#include "kvm_util.h"
+
+#define SMRAM_SIZE 65536
+#define SMRAM_MEMSLOT ((1 << 16) | 1)
+#define SMRAM_PAGES (SMRAM_SIZE / PAGE_SIZE)
+
+void setup_smram(struct kvm_vm *vm, struct kvm_vcpu *vcpu, u64 smram_gpa,
+ const void *smi_handler, size_t handler_size);
+
+void inject_smi(struct kvm_vcpu *vcpu);
+
+#endif /* SELFTEST_KVM_SMM_H */
diff --git a/tools/testing/selftests/kvm/include/x86/svm.h b/tools/testing/selftests/kvm/include/x86/svm.h
index 29cffd0a9181..c8539166270e 100644
--- a/tools/testing/selftests/kvm/include/x86/svm.h
+++ b/tools/testing/selftests/kvm/include/x86/svm.h
@@ -92,19 +92,18 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
u32 int_vector;
u32 int_state;
u8 reserved_3[4];
- u32 exit_code;
- u32 exit_code_hi;
+ u64 exit_code;
u64 exit_info_1;
u64 exit_info_2;
u32 exit_int_info;
u32 exit_int_info_err;
- u64 nested_ctl;
+ u64 misc_ctl;
u64 avic_vapic_bar;
u8 reserved_4[8];
u32 event_inj;
u32 event_inj_err;
u64 nested_cr3;
- u64 virt_ext;
+ u64 misc_ctl2;
u32 clean;
u32 reserved_5;
u64 next_rip;
@@ -156,9 +155,6 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
#define AVIC_ENABLE_SHIFT 31
#define AVIC_ENABLE_MASK (1 << AVIC_ENABLE_SHIFT)
-#define LBR_CTL_ENABLE_MASK BIT_ULL(0)
-#define VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK BIT_ULL(1)
-
#define SVM_INTERRUPT_SHADOW_MASK 1
#define SVM_IOIO_STR_SHIFT 2
@@ -176,8 +172,11 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
#define SVM_VM_CR_SVM_LOCK_MASK 0x0008ULL
#define SVM_VM_CR_SVM_DIS_MASK 0x0010ULL
-#define SVM_NESTED_CTL_NP_ENABLE BIT(0)
-#define SVM_NESTED_CTL_SEV_ENABLE BIT(1)
+#define SVM_MISC_ENABLE_NP BIT(0)
+#define SVM_MISC_ENABLE_SEV BIT(1)
+
+#define SVM_MISC2_ENABLE_V_LBR BIT_ULL(0)
+#define SVM_MISC2_ENABLE_V_VMLOAD_VMSAVE BIT_ULL(1)
struct __attribute__ ((__packed__)) vmcb_seg {
u16 selector;
diff --git a/tools/testing/selftests/kvm/include/x86/svm_util.h b/tools/testing/selftests/kvm/include/x86/svm_util.h
index b74c6dcddcbd..6c013eb838be 100644
--- a/tools/testing/selftests/kvm/include/x86/svm_util.h
+++ b/tools/testing/selftests/kvm/include/x86/svm_util.h
@@ -16,17 +16,20 @@ struct svm_test_data {
/* VMCB */
struct vmcb *vmcb; /* gva */
void *vmcb_hva;
- uint64_t vmcb_gpa;
+ u64 vmcb_gpa;
/* host state-save area */
struct vmcb_save_area *save_area; /* gva */
void *save_area_hva;
- uint64_t save_area_gpa;
+ u64 save_area_gpa;
/* MSR-Bitmap */
void *msr; /* gva */
void *msr_hva;
- uint64_t msr_gpa;
+ u64 msr_gpa;
+
+ /* NPT */
+ u64 ncr3_gpa;
};
static inline void vmmcall(void)
@@ -53,9 +56,15 @@ static inline void vmmcall(void)
"clgi\n" \
)
-struct svm_test_data *vcpu_alloc_svm(struct kvm_vm *vm, vm_vaddr_t *p_svm_gva);
+struct svm_test_data *vcpu_alloc_svm(struct kvm_vm *vm, gva_t *p_svm_gva);
void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_rsp);
-void run_guest(struct vmcb *vmcb, uint64_t vmcb_gpa);
+void run_guest(struct vmcb *vmcb, u64 vmcb_gpa);
+
+static inline bool kvm_cpu_has_npt(void)
+{
+ return kvm_cpu_has(X86_FEATURE_NPT);
+}
+void vm_enable_npt(struct kvm_vm *vm);
int open_sev_dev_path_or_exit(void);
diff --git a/tools/testing/selftests/kvm/include/x86/ucall.h b/tools/testing/selftests/kvm/include/x86/ucall.h
index d3825dcc3cd9..0e4950041e3e 100644
--- a/tools/testing/selftests/kvm/include/x86/ucall.h
+++ b/tools/testing/selftests/kvm/include/x86/ucall.h
@@ -6,7 +6,7 @@
#define UCALL_EXIT_REASON KVM_EXIT_IO
-static inline void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
+static inline void ucall_arch_init(struct kvm_vm *vm, gpa_t mmio_gpa)
{
}
diff --git a/tools/testing/selftests/kvm/include/x86/vmx.h b/tools/testing/selftests/kvm/include/x86/vmx.h
index edb3c391b982..90fffaf91595 100644
--- a/tools/testing/selftests/kvm/include/x86/vmx.h
+++ b/tools/testing/selftests/kvm/include/x86/vmx.h
@@ -285,16 +285,16 @@ enum vmcs_field {
};
struct vmx_msr_entry {
- uint32_t index;
- uint32_t reserved;
- uint64_t value;
+ u32 index;
+ u32 reserved;
+ u64 value;
} __attribute__ ((aligned(16)));
#include "evmcs.h"
-static inline int vmxon(uint64_t phys)
+static inline int vmxon(u64 phys)
{
- uint8_t ret;
+ u8 ret;
__asm__ __volatile__ ("vmxon %[pa]; setna %[ret]"
: [ret]"=rm"(ret)
@@ -309,9 +309,9 @@ static inline void vmxoff(void)
__asm__ __volatile__("vmxoff");
}
-static inline int vmclear(uint64_t vmcs_pa)
+static inline int vmclear(u64 vmcs_pa)
{
- uint8_t ret;
+ u8 ret;
__asm__ __volatile__ ("vmclear %[pa]; setna %[ret]"
: [ret]"=rm"(ret)
@@ -321,9 +321,9 @@ static inline int vmclear(uint64_t vmcs_pa)
return ret;
}
-static inline int vmptrld(uint64_t vmcs_pa)
+static inline int vmptrld(u64 vmcs_pa)
{
- uint8_t ret;
+ u8 ret;
if (enable_evmcs)
return -1;
@@ -336,10 +336,10 @@ static inline int vmptrld(uint64_t vmcs_pa)
return ret;
}
-static inline int vmptrst(uint64_t *value)
+static inline int vmptrst(u64 *value)
{
- uint64_t tmp;
- uint8_t ret;
+ u64 tmp;
+ u8 ret;
if (enable_evmcs)
return evmcs_vmptrst(value);
@@ -356,9 +356,9 @@ static inline int vmptrst(uint64_t *value)
* A wrapper around vmptrst that ignores errors and returns zero if the
* vmptrst instruction fails.
*/
-static inline uint64_t vmptrstz(void)
+static inline u64 vmptrstz(void)
{
- uint64_t value = 0;
+ u64 value = 0;
vmptrst(&value);
return value;
}
@@ -391,8 +391,8 @@ static inline int vmlaunch(void)
"pop %%rcx;"
"pop %%rbp;"
: [ret]"=&a"(ret)
- : [host_rsp]"r"((uint64_t)HOST_RSP),
- [host_rip]"r"((uint64_t)HOST_RIP)
+ : [host_rsp]"r"((u64)HOST_RSP),
+ [host_rip]"r"((u64)HOST_RIP)
: "memory", "cc", "rbx", "r8", "r9", "r10",
"r11", "r12", "r13", "r14", "r15");
return ret;
@@ -426,8 +426,8 @@ static inline int vmresume(void)
"pop %%rcx;"
"pop %%rbp;"
: [ret]"=&a"(ret)
- : [host_rsp]"r"((uint64_t)HOST_RSP),
- [host_rip]"r"((uint64_t)HOST_RIP)
+ : [host_rsp]"r"((u64)HOST_RSP),
+ [host_rip]"r"((u64)HOST_RIP)
: "memory", "cc", "rbx", "r8", "r9", "r10",
"r11", "r12", "r13", "r14", "r15");
return ret;
@@ -447,10 +447,10 @@ static inline void vmcall(void)
"r10", "r11", "r12", "r13", "r14", "r15");
}
-static inline int vmread(uint64_t encoding, uint64_t *value)
+static inline int vmread(u64 encoding, u64 *value)
{
- uint64_t tmp;
- uint8_t ret;
+ u64 tmp;
+ u8 ret;
if (enable_evmcs)
return evmcs_vmread(encoding, value);
@@ -468,16 +468,16 @@ static inline int vmread(uint64_t encoding, uint64_t *value)
* A wrapper around vmread that ignores errors and returns zero if the
* vmread instruction fails.
*/
-static inline uint64_t vmreadz(uint64_t encoding)
+static inline u64 vmreadz(u64 encoding)
{
- uint64_t value = 0;
+ u64 value = 0;
vmread(encoding, &value);
return value;
}
-static inline int vmwrite(uint64_t encoding, uint64_t value)
+static inline int vmwrite(u64 encoding, u64 value)
{
- uint8_t ret;
+ u8 ret;
if (enable_evmcs)
return evmcs_vmwrite(encoding, value);
@@ -490,43 +490,41 @@ static inline int vmwrite(uint64_t encoding, uint64_t value)
return ret;
}
-static inline uint32_t vmcs_revision(void)
+static inline u32 vmcs_revision(void)
{
return rdmsr(MSR_IA32_VMX_BASIC);
}
struct vmx_pages {
void *vmxon_hva;
- uint64_t vmxon_gpa;
+ u64 vmxon_gpa;
void *vmxon;
void *vmcs_hva;
- uint64_t vmcs_gpa;
+ u64 vmcs_gpa;
void *vmcs;
void *msr_hva;
- uint64_t msr_gpa;
+ u64 msr_gpa;
void *msr;
void *shadow_vmcs_hva;
- uint64_t shadow_vmcs_gpa;
+ u64 shadow_vmcs_gpa;
void *shadow_vmcs;
void *vmread_hva;
- uint64_t vmread_gpa;
+ u64 vmread_gpa;
void *vmread;
void *vmwrite_hva;
- uint64_t vmwrite_gpa;
+ u64 vmwrite_gpa;
void *vmwrite;
- void *eptp_hva;
- uint64_t eptp_gpa;
- void *eptp;
-
void *apic_access_hva;
- uint64_t apic_access_gpa;
+ u64 apic_access_gpa;
void *apic_access;
+
+ u64 eptp_gpa;
};
union vmx_basic {
@@ -552,24 +550,15 @@ union vmx_ctrl_msr {
};
};
-struct vmx_pages *vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva);
+struct vmx_pages *vcpu_alloc_vmx(struct kvm_vm *vm, gva_t *p_vmx_gva);
bool prepare_for_vmx_operation(struct vmx_pages *vmx);
void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp);
bool load_vmcs(struct vmx_pages *vmx);
bool ept_1g_pages_supported(void);
-void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint64_t nested_paddr, uint64_t paddr);
-void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint64_t nested_paddr, uint64_t paddr, uint64_t size);
-void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint32_t memslot);
-void nested_identity_map_1g(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint64_t addr, uint64_t size);
bool kvm_cpu_has_ept(void);
-void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint32_t eptp_memslot);
+void vm_enable_ept(struct kvm_vm *vm);
void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm);
#endif /* SELFTEST_KVM_VMX_H */
diff --git a/tools/testing/selftests/kvm/irqfd_test.c b/tools/testing/selftests/kvm/irqfd_test.c
new file mode 100644
index 000000000000..5d7590d01868
--- /dev/null
+++ b/tools/testing/selftests/kvm/irqfd_test.c
@@ -0,0 +1,143 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <errno.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <signal.h>
+#include <stdint.h>
+#include <sys/sysinfo.h>
+
+#include "kvm_util.h"
+
+static struct kvm_vm *vm1;
+static struct kvm_vm *vm2;
+static int __eventfd;
+static bool done;
+
+/*
+ * KVM de-assigns based on eventfd *and* GSI, but requires unique eventfds when
+ * assigning (the API isn't symmetrical). Abuse the oddity and use a per-task
+ * GSI base to avoid false failures due to cross-task de-assign, i.e. so that
+ * the secondary doesn't de-assign the primary's eventfd and cause assign to
+ * unexpectedly succeed on the primary.
+ */
+#define GSI_BASE_PRIMARY 0x20
+#define GSI_BASE_SECONDARY 0x30
+
+static void juggle_eventfd_secondary(struct kvm_vm *vm, int eventfd)
+{
+ int r, i;
+
+ /*
+ * The secondary task can encounter EBADF since the primary can close
+ * the eventfd at any time. And because the primary can recreate the
+ * eventfd, at the safe fd in the file table, the secondary can also
+ * encounter "unexpected" success, e.g. if the close+recreate happens
+ * between the first and second assignments. The secondary's role is
+ * mostly to antagonize KVM, not to detect bugs.
+ */
+ for (i = 0; i < 2; i++) {
+ r = __kvm_irqfd(vm, GSI_BASE_SECONDARY, eventfd, 0);
+ TEST_ASSERT(!r || errno == EBUSY || errno == EBADF,
+ "Wanted success, EBUSY, or EBADF, r = %d, errno = %d",
+ r, errno);
+
+ /* De-assign should succeed unless the eventfd was closed. */
+ r = __kvm_irqfd(vm, GSI_BASE_SECONDARY + i, eventfd, KVM_IRQFD_FLAG_DEASSIGN);
+ TEST_ASSERT(!r || errno == EBADF,
+ "De-assign should succeed unless the fd was closed");
+ }
+}
+
+static void *secondary_irqfd_juggler(void *ign)
+{
+ while (!READ_ONCE(done)) {
+ juggle_eventfd_secondary(vm1, READ_ONCE(__eventfd));
+ juggle_eventfd_secondary(vm2, READ_ONCE(__eventfd));
+ }
+
+ return NULL;
+}
+
+static void juggle_eventfd_primary(struct kvm_vm *vm, int eventfd)
+{
+ int r1, r2;
+
+ /*
+ * At least one of the assigns should fail. KVM disallows assigning a
+ * single eventfd to multiple GSIs (or VMs), so it's possible that both
+ * assignments can fail, too.
+ */
+ r1 = __kvm_irqfd(vm, GSI_BASE_PRIMARY, eventfd, 0);
+ TEST_ASSERT(!r1 || errno == EBUSY,
+ "Wanted success or EBUSY, r = %d, errno = %d", r1, errno);
+
+ r2 = __kvm_irqfd(vm, GSI_BASE_PRIMARY + 1, eventfd, 0);
+ TEST_ASSERT(r1 || (r2 && errno == EBUSY),
+ "Wanted failure (EBUSY), r1 = %d, r2 = %d, errno = %d",
+ r1, r2, errno);
+
+ /*
+ * De-assign should always succeed, even if the corresponding assign
+ * failed.
+ */
+ kvm_irqfd(vm, GSI_BASE_PRIMARY, eventfd, KVM_IRQFD_FLAG_DEASSIGN);
+ kvm_irqfd(vm, GSI_BASE_PRIMARY + 1, eventfd, KVM_IRQFD_FLAG_DEASSIGN);
+}
+
+int main(int argc, char *argv[])
+{
+ pthread_t racing_thread;
+ struct kvm_vcpu *unused;
+ int r, i;
+
+ TEST_REQUIRE(kvm_arch_has_default_irqchip());
+
+ /*
+ * Create "full" VMs, as KVM_IRQFD requires an in-kernel IRQ chip. Also
+ * create an unused vCPU as certain architectures (like arm64) need to
+ * complete IRQ chip initialization after all possible vCPUs for a VM
+ * have been created.
+ */
+ vm1 = vm_create_with_one_vcpu(&unused, NULL);
+ vm2 = vm_create_with_one_vcpu(&unused, NULL);
+
+ WRITE_ONCE(__eventfd, kvm_new_eventfd());
+
+ kvm_irqfd(vm1, 10, __eventfd, 0);
+
+ r = __kvm_irqfd(vm1, 11, __eventfd, 0);
+ TEST_ASSERT(r && errno == EBUSY,
+ "Wanted EBUSY, r = %d, errno = %d", r, errno);
+
+ r = __kvm_irqfd(vm2, 12, __eventfd, 0);
+ TEST_ASSERT(r && errno == EBUSY,
+ "Wanted EBUSY, r = %d, errno = %d", r, errno);
+
+ /*
+ * De-assign all eventfds, along with multiple eventfds that were never
+ * assigned. KVM's ABI is that de-assign is allowed so long as the
+ * eventfd itself is valid.
+ */
+ kvm_irqfd(vm1, 11, READ_ONCE(__eventfd), KVM_IRQFD_FLAG_DEASSIGN);
+ kvm_irqfd(vm1, 12, READ_ONCE(__eventfd), KVM_IRQFD_FLAG_DEASSIGN);
+ kvm_irqfd(vm1, 13, READ_ONCE(__eventfd), KVM_IRQFD_FLAG_DEASSIGN);
+ kvm_irqfd(vm1, 14, READ_ONCE(__eventfd), KVM_IRQFD_FLAG_DEASSIGN);
+ kvm_irqfd(vm1, 10, READ_ONCE(__eventfd), KVM_IRQFD_FLAG_DEASSIGN);
+
+ close(__eventfd);
+
+ pthread_create(&racing_thread, NULL, secondary_irqfd_juggler, vm2);
+
+ for (i = 0; i < 10000; i++) {
+ WRITE_ONCE(__eventfd, kvm_new_eventfd());
+
+ juggle_eventfd_primary(vm1, __eventfd);
+ juggle_eventfd_primary(vm2, __eventfd);
+ close(__eventfd);
+ }
+
+ WRITE_ONCE(done, true);
+ pthread_join(racing_thread, NULL);
+}
diff --git a/tools/testing/selftests/kvm/kvm_binary_stats_test.c b/tools/testing/selftests/kvm/kvm_binary_stats_test.c
index f02355c3c4c2..b7dbde9c0843 100644
--- a/tools/testing/selftests/kvm/kvm_binary_stats_test.c
+++ b/tools/testing/selftests/kvm/kvm_binary_stats_test.c
@@ -239,14 +239,14 @@ int main(int argc, char *argv[])
* single stats file works and doesn't cause explosions.
*/
vm_stats_fds = vm_get_stats_fd(vms[i]);
- stats_test(dup(vm_stats_fds));
+ stats_test(kvm_dup(vm_stats_fds));
/* Verify userspace can instantiate multiple stats files. */
stats_test(vm_get_stats_fd(vms[i]));
for (j = 0; j < max_vcpu; ++j) {
vcpu_stats_fds[j] = vcpu_get_stats_fd(vcpus[i * max_vcpu + j]);
- stats_test(dup(vcpu_stats_fds[j]));
+ stats_test(kvm_dup(vcpu_stats_fds[j]));
stats_test(vcpu_get_stats_fd(vcpus[i * max_vcpu + j]));
}
diff --git a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c
index c78f34699f73..c5310736ed06 100644
--- a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c
+++ b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c
@@ -10,7 +10,6 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-#include <sys/resource.h>
#include "test_util.h"
@@ -39,36 +38,11 @@ int main(int argc, char *argv[])
{
int kvm_max_vcpu_id = kvm_check_cap(KVM_CAP_MAX_VCPU_ID);
int kvm_max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS);
- /*
- * Number of file descriptors reqired, KVM_CAP_MAX_VCPUS for vCPU fds +
- * an arbitrary number for everything else.
- */
- int nr_fds_wanted = kvm_max_vcpus + 100;
- struct rlimit rl;
pr_info("KVM_CAP_MAX_VCPU_ID: %d\n", kvm_max_vcpu_id);
pr_info("KVM_CAP_MAX_VCPUS: %d\n", kvm_max_vcpus);
- /*
- * Check that we're allowed to open nr_fds_wanted file descriptors and
- * try raising the limits if needed.
- */
- TEST_ASSERT(!getrlimit(RLIMIT_NOFILE, &rl), "getrlimit() failed!");
-
- if (rl.rlim_cur < nr_fds_wanted) {
- rl.rlim_cur = nr_fds_wanted;
- if (rl.rlim_max < nr_fds_wanted) {
- int old_rlim_max = rl.rlim_max;
- rl.rlim_max = nr_fds_wanted;
-
- int r = setrlimit(RLIMIT_NOFILE, &rl);
- __TEST_REQUIRE(r >= 0,
- "RLIMIT_NOFILE hard limit is too low (%d, wanted %d)",
- old_rlim_max, nr_fds_wanted);
- } else {
- TEST_ASSERT(!setrlimit(RLIMIT_NOFILE, &rl), "setrlimit() failed!");
- }
- }
+ kvm_set_files_rlimit(kvm_max_vcpus);
/*
* Upstream KVM prior to 4.8 does not support KVM_CAP_MAX_VCPU_ID.
diff --git a/tools/testing/selftests/kvm/kvm_page_table_test.c b/tools/testing/selftests/kvm/kvm_page_table_test.c
index dd8b12f626d3..fc5242fb956f 100644
--- a/tools/testing/selftests/kvm/kvm_page_table_test.c
+++ b/tools/testing/selftests/kvm/kvm_page_table_test.c
@@ -46,12 +46,12 @@ static const char * const test_stage_string[] = {
struct test_args {
struct kvm_vm *vm;
- uint64_t guest_test_virt_mem;
- uint64_t host_page_size;
- uint64_t host_num_pages;
- uint64_t large_page_size;
- uint64_t large_num_pages;
- uint64_t host_pages_per_lpage;
+ u64 guest_test_virt_mem;
+ u64 host_page_size;
+ u64 host_num_pages;
+ u64 large_page_size;
+ u64 large_num_pages;
+ u64 host_pages_per_lpage;
enum vm_mem_backing_src_type src_type;
struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
};
@@ -63,7 +63,7 @@ struct test_args {
static enum test_stage guest_test_stage;
/* Host variables */
-static uint32_t nr_vcpus = 1;
+static u32 nr_vcpus = 1;
static struct test_args test_args;
static enum test_stage *current_stage;
static bool host_quit;
@@ -77,19 +77,19 @@ static sem_t test_stage_completed;
* This will be set to the topmost valid physical address minus
* the test memory size.
*/
-static uint64_t guest_test_phys_mem;
+static u64 guest_test_phys_mem;
/*
* Guest virtual memory offset of the testing memory slot.
* Must not conflict with identity mapped test code.
*/
-static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM;
+static u64 guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM;
static void guest_code(bool do_write)
{
struct test_args *p = &test_args;
enum test_stage *current_stage = &guest_test_stage;
- uint64_t addr;
+ u64 addr;
int i, j;
while (true) {
@@ -113,9 +113,9 @@ static void guest_code(bool do_write)
case KVM_CREATE_MAPPINGS:
for (i = 0; i < p->large_num_pages; i++) {
if (do_write)
- *(uint64_t *)addr = 0x0123456789ABCDEF;
+ *(u64 *)addr = 0x0123456789ABCDEF;
else
- READ_ONCE(*(uint64_t *)addr);
+ READ_ONCE(*(u64 *)addr);
addr += p->large_page_size;
}
@@ -131,7 +131,7 @@ static void guest_code(bool do_write)
case KVM_UPDATE_MAPPINGS:
if (p->src_type == VM_MEM_SRC_ANONYMOUS) {
for (i = 0; i < p->host_num_pages; i++) {
- *(uint64_t *)addr = 0x0123456789ABCDEF;
+ *(u64 *)addr = 0x0123456789ABCDEF;
addr += p->host_page_size;
}
break;
@@ -142,7 +142,7 @@ static void guest_code(bool do_write)
* Write to the first host page in each large
* page region, and triger break of large pages.
*/
- *(uint64_t *)addr = 0x0123456789ABCDEF;
+ *(u64 *)addr = 0x0123456789ABCDEF;
/*
* Access the middle host pages in each large
@@ -152,7 +152,7 @@ static void guest_code(bool do_write)
*/
addr += p->large_page_size / 2;
for (j = 0; j < p->host_pages_per_lpage / 2; j++) {
- READ_ONCE(*(uint64_t *)addr);
+ READ_ONCE(*(u64 *)addr);
addr += p->host_page_size;
}
}
@@ -167,7 +167,7 @@ static void guest_code(bool do_write)
*/
case KVM_ADJUST_MAPPINGS:
for (i = 0; i < p->host_num_pages; i++) {
- READ_ONCE(*(uint64_t *)addr);
+ READ_ONCE(*(u64 *)addr);
addr += p->host_page_size;
}
break;
@@ -227,8 +227,8 @@ static void *vcpu_worker(void *data)
}
struct test_params {
- uint64_t phys_offset;
- uint64_t test_mem_size;
+ u64 phys_offset;
+ u64 test_mem_size;
enum vm_mem_backing_src_type src_type;
};
@@ -237,12 +237,12 @@ static struct kvm_vm *pre_init_before_test(enum vm_guest_mode mode, void *arg)
int ret;
struct test_params *p = arg;
enum vm_mem_backing_src_type src_type = p->src_type;
- uint64_t large_page_size = get_backing_src_pagesz(src_type);
- uint64_t guest_page_size = vm_guest_mode_params[mode].page_size;
- uint64_t host_page_size = getpagesize();
- uint64_t test_mem_size = p->test_mem_size;
- uint64_t guest_num_pages;
- uint64_t alignment;
+ u64 large_page_size = get_backing_src_pagesz(src_type);
+ u64 guest_page_size = vm_guest_mode_params[mode].page_size;
+ u64 host_page_size = getpagesize();
+ u64 test_mem_size = p->test_mem_size;
+ u64 guest_num_pages;
+ u64 alignment;
void *host_test_mem;
struct kvm_vm *vm;
@@ -261,9 +261,6 @@ static struct kvm_vm *pre_init_before_test(enum vm_guest_mode mode, void *arg)
guest_page_size;
else
guest_test_phys_mem = p->phys_offset;
-#ifdef __s390x__
- alignment = max(0x100000UL, alignment);
-#endif
guest_test_phys_mem = align_down(guest_test_phys_mem, alignment);
/* Set up the shared data structure test_args */
@@ -284,7 +281,7 @@ static struct kvm_vm *pre_init_before_test(enum vm_guest_mode mode, void *arg)
virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages);
/* Cache the HVA pointer of the region */
- host_test_mem = addr_gpa2hva(vm, (vm_paddr_t)guest_test_phys_mem);
+ host_test_mem = addr_gpa2hva(vm, (gpa_t)guest_test_phys_mem);
/* Export shared structure test_args to guest */
sync_global_to_guest(vm, test_args);
@@ -295,7 +292,7 @@ static struct kvm_vm *pre_init_before_test(enum vm_guest_mode mode, void *arg)
ret = sem_init(&test_stage_completed, 0, 0);
TEST_ASSERT(ret == 0, "Error in sem_init");
- current_stage = addr_gva2hva(vm, (vm_vaddr_t)(&guest_test_stage));
+ current_stage = addr_gva2hva(vm, (gva_t)(&guest_test_stage));
*current_stage = NUM_TEST_STAGES;
pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode));
@@ -307,7 +304,7 @@ static struct kvm_vm *pre_init_before_test(enum vm_guest_mode mode, void *arg)
pr_info("Guest physical test memory offset: 0x%lx\n",
guest_test_phys_mem);
pr_info("Host virtual test memory offset: 0x%lx\n",
- (uint64_t)host_test_mem);
+ (u64)host_test_mem);
pr_info("Number of testing vCPUs: %d\n", nr_vcpus);
return vm;
diff --git a/tools/testing/selftests/kvm/lib/arm64/gic.c b/tools/testing/selftests/kvm/lib/arm64/gic.c
index 7abbf8866512..011dfe1dfcb3 100644
--- a/tools/testing/selftests/kvm/lib/arm64/gic.c
+++ b/tools/testing/selftests/kvm/lib/arm64/gic.c
@@ -50,7 +50,7 @@ static void gic_dist_init(enum gic_type type, unsigned int nr_cpus)
void gic_init(enum gic_type type, unsigned int nr_cpus)
{
- uint32_t cpu = guest_get_vcpuid();
+ u32 cpu = guest_get_vcpuid();
GUEST_ASSERT(type < GIC_TYPE_MAX);
GUEST_ASSERT(nr_cpus);
@@ -73,7 +73,7 @@ void gic_irq_disable(unsigned int intid)
unsigned int gic_get_and_ack_irq(void)
{
- uint64_t irqstat;
+ u64 irqstat;
unsigned int intid;
GUEST_ASSERT(gic_common_ops);
@@ -102,7 +102,7 @@ void gic_set_eoi_split(bool split)
gic_common_ops->gic_set_eoi_split(split);
}
-void gic_set_priority_mask(uint64_t pmr)
+void gic_set_priority_mask(u64 pmr)
{
GUEST_ASSERT(gic_common_ops);
gic_common_ops->gic_set_priority_mask(pmr);
@@ -155,3 +155,9 @@ void gic_irq_set_config(unsigned int intid, bool is_edge)
GUEST_ASSERT(gic_common_ops);
gic_common_ops->gic_irq_set_config(intid, is_edge);
}
+
+void gic_irq_set_group(unsigned int intid, bool group)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_irq_set_group(intid, group);
+}
diff --git a/tools/testing/selftests/kvm/lib/arm64/gic_private.h b/tools/testing/selftests/kvm/lib/arm64/gic_private.h
index d24e9ecc96c6..6d393f5c5685 100644
--- a/tools/testing/selftests/kvm/lib/arm64/gic_private.h
+++ b/tools/testing/selftests/kvm/lib/arm64/gic_private.h
@@ -12,19 +12,20 @@ struct gic_common_ops {
void (*gic_cpu_init)(unsigned int cpu);
void (*gic_irq_enable)(unsigned int intid);
void (*gic_irq_disable)(unsigned int intid);
- uint64_t (*gic_read_iar)(void);
- void (*gic_write_eoir)(uint32_t irq);
- void (*gic_write_dir)(uint32_t irq);
+ u64 (*gic_read_iar)(void);
+ void (*gic_write_eoir)(u32 irq);
+ void (*gic_write_dir)(u32 irq);
void (*gic_set_eoi_split)(bool split);
- void (*gic_set_priority_mask)(uint64_t mask);
- void (*gic_set_priority)(uint32_t intid, uint32_t prio);
- void (*gic_irq_set_active)(uint32_t intid);
- void (*gic_irq_clear_active)(uint32_t intid);
- bool (*gic_irq_get_active)(uint32_t intid);
- void (*gic_irq_set_pending)(uint32_t intid);
- void (*gic_irq_clear_pending)(uint32_t intid);
- bool (*gic_irq_get_pending)(uint32_t intid);
- void (*gic_irq_set_config)(uint32_t intid, bool is_edge);
+ void (*gic_set_priority_mask)(u64 mask);
+ void (*gic_set_priority)(u32 intid, u32 prio);
+ void (*gic_irq_set_active)(u32 intid);
+ void (*gic_irq_clear_active)(u32 intid);
+ bool (*gic_irq_get_active)(u32 intid);
+ void (*gic_irq_set_pending)(u32 intid);
+ void (*gic_irq_clear_pending)(u32 intid);
+ bool (*gic_irq_get_pending)(u32 intid);
+ void (*gic_irq_set_config)(u32 intid, bool is_edge);
+ void (*gic_irq_set_group)(u32 intid, bool group);
};
extern const struct gic_common_ops gicv3_ops;
diff --git a/tools/testing/selftests/kvm/lib/arm64/gic_v3.c b/tools/testing/selftests/kvm/lib/arm64/gic_v3.c
index 66d05506f78b..a99a53accfe9 100644
--- a/tools/testing/selftests/kvm/lib/arm64/gic_v3.c
+++ b/tools/testing/selftests/kvm/lib/arm64/gic_v3.c
@@ -50,13 +50,13 @@ static void gicv3_gicd_wait_for_rwp(void)
}
}
-static inline volatile void *gicr_base_cpu(uint32_t cpu)
+static inline volatile void *gicr_base_cpu(u32 cpu)
{
/* Align all the redistributors sequentially */
return GICR_BASE_GVA + cpu * SZ_64K * 2;
}
-static void gicv3_gicr_wait_for_rwp(uint32_t cpu)
+static void gicv3_gicr_wait_for_rwp(u32 cpu)
{
unsigned int count = 100000; /* 1s */
@@ -66,7 +66,7 @@ static void gicv3_gicr_wait_for_rwp(uint32_t cpu)
}
}
-static void gicv3_wait_for_rwp(uint32_t cpu_or_dist)
+static void gicv3_wait_for_rwp(u32 cpu_or_dist)
{
if (cpu_or_dist & DIST_BIT)
gicv3_gicd_wait_for_rwp();
@@ -91,34 +91,34 @@ static enum gicv3_intid_range get_intid_range(unsigned int intid)
return INVALID_RANGE;
}
-static uint64_t gicv3_read_iar(void)
+static u64 gicv3_read_iar(void)
{
- uint64_t irqstat = read_sysreg_s(SYS_ICC_IAR1_EL1);
+ u64 irqstat = read_sysreg_s(SYS_ICC_IAR1_EL1);
dsb(sy);
return irqstat;
}
-static void gicv3_write_eoir(uint32_t irq)
+static void gicv3_write_eoir(u32 irq)
{
write_sysreg_s(irq, SYS_ICC_EOIR1_EL1);
isb();
}
-static void gicv3_write_dir(uint32_t irq)
+static void gicv3_write_dir(u32 irq)
{
write_sysreg_s(irq, SYS_ICC_DIR_EL1);
isb();
}
-static void gicv3_set_priority_mask(uint64_t mask)
+static void gicv3_set_priority_mask(u64 mask)
{
write_sysreg_s(mask, SYS_ICC_PMR_EL1);
}
static void gicv3_set_eoi_split(bool split)
{
- uint32_t val;
+ u32 val;
/*
* All other fields are read-only, so no need to read CTLR first. In
@@ -129,29 +129,29 @@ static void gicv3_set_eoi_split(bool split)
isb();
}
-uint32_t gicv3_reg_readl(uint32_t cpu_or_dist, uint64_t offset)
+u32 gicv3_reg_readl(u32 cpu_or_dist, u64 offset)
{
volatile void *base = cpu_or_dist & DIST_BIT ? GICD_BASE_GVA
: sgi_base_from_redist(gicr_base_cpu(cpu_or_dist));
return readl(base + offset);
}
-void gicv3_reg_writel(uint32_t cpu_or_dist, uint64_t offset, uint32_t reg_val)
+void gicv3_reg_writel(u32 cpu_or_dist, u64 offset, u32 reg_val)
{
volatile void *base = cpu_or_dist & DIST_BIT ? GICD_BASE_GVA
: sgi_base_from_redist(gicr_base_cpu(cpu_or_dist));
writel(reg_val, base + offset);
}
-uint32_t gicv3_getl_fields(uint32_t cpu_or_dist, uint64_t offset, uint32_t mask)
+u32 gicv3_getl_fields(u32 cpu_or_dist, u64 offset, u32 mask)
{
return gicv3_reg_readl(cpu_or_dist, offset) & mask;
}
-void gicv3_setl_fields(uint32_t cpu_or_dist, uint64_t offset,
- uint32_t mask, uint32_t reg_val)
+void gicv3_setl_fields(u32 cpu_or_dist, u64 offset,
+ u32 mask, u32 reg_val)
{
- uint32_t tmp = gicv3_reg_readl(cpu_or_dist, offset) & ~mask;
+ u32 tmp = gicv3_reg_readl(cpu_or_dist, offset) & ~mask;
tmp |= (reg_val & mask);
gicv3_reg_writel(cpu_or_dist, offset, tmp);
@@ -165,14 +165,14 @@ void gicv3_setl_fields(uint32_t cpu_or_dist, uint64_t offset,
* map that doesn't implement it; like GICR_WAKER's offset of 0x0014 being
* marked as "Reserved" in the Distributor map.
*/
-static void gicv3_access_reg(uint32_t intid, uint64_t offset,
- uint32_t reg_bits, uint32_t bits_per_field,
- bool write, uint32_t *val)
+static void gicv3_access_reg(u32 intid, u64 offset,
+ u32 reg_bits, u32 bits_per_field,
+ bool write, u32 *val)
{
- uint32_t cpu = guest_get_vcpuid();
+ u32 cpu = guest_get_vcpuid();
enum gicv3_intid_range intid_range = get_intid_range(intid);
- uint32_t fields_per_reg, index, mask, shift;
- uint32_t cpu_or_dist;
+ u32 fields_per_reg, index, mask, shift;
+ u32 cpu_or_dist;
GUEST_ASSERT(bits_per_field <= reg_bits);
GUEST_ASSERT(!write || *val < (1U << bits_per_field));
@@ -197,32 +197,32 @@ static void gicv3_access_reg(uint32_t intid, uint64_t offset,
*val = gicv3_getl_fields(cpu_or_dist, offset, mask) >> shift;
}
-static void gicv3_write_reg(uint32_t intid, uint64_t offset,
- uint32_t reg_bits, uint32_t bits_per_field, uint32_t val)
+static void gicv3_write_reg(u32 intid, u64 offset,
+ u32 reg_bits, u32 bits_per_field, u32 val)
{
gicv3_access_reg(intid, offset, reg_bits,
bits_per_field, true, &val);
}
-static uint32_t gicv3_read_reg(uint32_t intid, uint64_t offset,
- uint32_t reg_bits, uint32_t bits_per_field)
+static u32 gicv3_read_reg(u32 intid, u64 offset,
+ u32 reg_bits, u32 bits_per_field)
{
- uint32_t val;
+ u32 val;
gicv3_access_reg(intid, offset, reg_bits,
bits_per_field, false, &val);
return val;
}
-static void gicv3_set_priority(uint32_t intid, uint32_t prio)
+static void gicv3_set_priority(u32 intid, u32 prio)
{
gicv3_write_reg(intid, GICD_IPRIORITYR, 32, 8, prio);
}
/* Sets the intid to be level-sensitive or edge-triggered. */
-static void gicv3_irq_set_config(uint32_t intid, bool is_edge)
+static void gicv3_irq_set_config(u32 intid, bool is_edge)
{
- uint32_t val;
+ u32 val;
/* N/A for private interrupts. */
GUEST_ASSERT(get_intid_range(intid) == SPI_RANGE);
@@ -230,57 +230,57 @@ static void gicv3_irq_set_config(uint32_t intid, bool is_edge)
gicv3_write_reg(intid, GICD_ICFGR, 32, 2, val);
}
-static void gicv3_irq_enable(uint32_t intid)
+static void gicv3_irq_enable(u32 intid)
{
bool is_spi = get_intid_range(intid) == SPI_RANGE;
- uint32_t cpu = guest_get_vcpuid();
+ u32 cpu = guest_get_vcpuid();
gicv3_write_reg(intid, GICD_ISENABLER, 32, 1, 1);
gicv3_wait_for_rwp(is_spi ? DIST_BIT : cpu);
}
-static void gicv3_irq_disable(uint32_t intid)
+static void gicv3_irq_disable(u32 intid)
{
bool is_spi = get_intid_range(intid) == SPI_RANGE;
- uint32_t cpu = guest_get_vcpuid();
+ u32 cpu = guest_get_vcpuid();
gicv3_write_reg(intid, GICD_ICENABLER, 32, 1, 1);
gicv3_wait_for_rwp(is_spi ? DIST_BIT : cpu);
}
-static void gicv3_irq_set_active(uint32_t intid)
+static void gicv3_irq_set_active(u32 intid)
{
gicv3_write_reg(intid, GICD_ISACTIVER, 32, 1, 1);
}
-static void gicv3_irq_clear_active(uint32_t intid)
+static void gicv3_irq_clear_active(u32 intid)
{
gicv3_write_reg(intid, GICD_ICACTIVER, 32, 1, 1);
}
-static bool gicv3_irq_get_active(uint32_t intid)
+static bool gicv3_irq_get_active(u32 intid)
{
return gicv3_read_reg(intid, GICD_ISACTIVER, 32, 1);
}
-static void gicv3_irq_set_pending(uint32_t intid)
+static void gicv3_irq_set_pending(u32 intid)
{
gicv3_write_reg(intid, GICD_ISPENDR, 32, 1, 1);
}
-static void gicv3_irq_clear_pending(uint32_t intid)
+static void gicv3_irq_clear_pending(u32 intid)
{
gicv3_write_reg(intid, GICD_ICPENDR, 32, 1, 1);
}
-static bool gicv3_irq_get_pending(uint32_t intid)
+static bool gicv3_irq_get_pending(u32 intid)
{
return gicv3_read_reg(intid, GICD_ISPENDR, 32, 1);
}
static void gicv3_enable_redist(volatile void *redist_base)
{
- uint32_t val = readl(redist_base + GICR_WAKER);
+ u32 val = readl(redist_base + GICR_WAKER);
unsigned int count = 100000; /* 1s */
val &= ~GICR_WAKER_ProcessorSleep;
@@ -293,17 +293,36 @@ static void gicv3_enable_redist(volatile void *redist_base)
}
}
+static void gicv3_set_group(u32 intid, bool grp)
+{
+ u32 cpu_or_dist;
+ u32 val;
+
+ cpu_or_dist = (get_intid_range(intid) == SPI_RANGE) ? DIST_BIT : guest_get_vcpuid();
+ val = gicv3_reg_readl(cpu_or_dist, GICD_IGROUPR + (intid / 32) * 4);
+ if (grp)
+ val |= BIT(intid % 32);
+ else
+ val &= ~BIT(intid % 32);
+ gicv3_reg_writel(cpu_or_dist, GICD_IGROUPR + (intid / 32) * 4, val);
+}
+
static void gicv3_cpu_init(unsigned int cpu)
{
volatile void *sgi_base;
unsigned int i;
volatile void *redist_base_cpu;
+ u64 typer;
GUEST_ASSERT(cpu < gicv3_data.nr_cpus);
redist_base_cpu = gicr_base_cpu(cpu);
sgi_base = sgi_base_from_redist(redist_base_cpu);
+ /* Verify assumption that GICR_TYPER.Processor_number == cpu */
+ typer = readq_relaxed(redist_base_cpu + GICR_TYPER);
+ GUEST_ASSERT_EQ(GICR_TYPER_CPU_NUMBER(typer), cpu);
+
gicv3_enable_redist(redist_base_cpu);
/*
@@ -328,6 +347,8 @@ static void gicv3_cpu_init(unsigned int cpu)
/* Set a default priority threshold */
write_sysreg_s(ICC_PMR_DEF_PRIO, SYS_ICC_PMR_EL1);
+ /* Disable Group-0 interrupts */
+ write_sysreg_s(ICC_IGRPEN0_EL1_MASK, SYS_ICC_IGRPEN1_EL1);
/* Enable non-secure Group-1 interrupts */
write_sysreg_s(ICC_IGRPEN1_EL1_MASK, SYS_ICC_IGRPEN1_EL1);
}
@@ -400,10 +421,11 @@ const struct gic_common_ops gicv3_ops = {
.gic_irq_clear_pending = gicv3_irq_clear_pending,
.gic_irq_get_pending = gicv3_irq_get_pending,
.gic_irq_set_config = gicv3_irq_set_config,
+ .gic_irq_set_group = gicv3_set_group,
};
-void gic_rdist_enable_lpis(vm_paddr_t cfg_table, size_t cfg_table_size,
- vm_paddr_t pend_table)
+void gic_rdist_enable_lpis(gpa_t cfg_table, size_t cfg_table_size,
+ gpa_t pend_table)
{
volatile void *rdist_base = gicr_base_cpu(guest_get_vcpuid());
diff --git a/tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c b/tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c
index 09f270545646..1188b578121d 100644
--- a/tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c
+++ b/tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c
@@ -15,6 +15,8 @@
#include "gic_v3.h"
#include "processor.h"
+#define GITS_COLLECTION_TARGET_SHIFT 16
+
static u64 its_read_u64(unsigned long offset)
{
return readq_relaxed(GITS_BASE_GVA + offset);
@@ -52,7 +54,7 @@ static unsigned long its_find_baser(unsigned int type)
return -1;
}
-static void its_install_table(unsigned int type, vm_paddr_t base, size_t size)
+static void its_install_table(unsigned int type, gpa_t base, size_t size)
{
unsigned long offset = its_find_baser(type);
u64 baser;
@@ -67,7 +69,7 @@ static void its_install_table(unsigned int type, vm_paddr_t base, size_t size)
its_write_u64(offset, baser);
}
-static void its_install_cmdq(vm_paddr_t base, size_t size)
+static void its_install_cmdq(gpa_t base, size_t size)
{
u64 cbaser;
@@ -80,9 +82,8 @@ static void its_install_cmdq(vm_paddr_t base, size_t size)
its_write_u64(GITS_CBASER, cbaser);
}
-void its_init(vm_paddr_t coll_tbl, size_t coll_tbl_sz,
- vm_paddr_t device_tbl, size_t device_tbl_sz,
- vm_paddr_t cmdq, size_t cmdq_size)
+void its_init(gpa_t coll_tbl, size_t coll_tbl_sz, gpa_t device_tbl,
+ size_t device_tbl_sz, gpa_t cmdq, size_t cmdq_size)
{
u32 ctlr;
@@ -163,6 +164,11 @@ static void its_encode_collection(struct its_cmd_block *cmd, u16 col)
its_mask_encode(&cmd->raw_cmd[2], col, 15, 0);
}
+static u64 procnum_to_rdbase(u32 vcpu_id)
+{
+ return vcpu_id << GITS_COLLECTION_TARGET_SHIFT;
+}
+
#define GITS_CMDQ_POLL_ITERATIONS 0
static void its_send_cmd(void *cmdq_base, struct its_cmd_block *cmd)
@@ -197,7 +203,7 @@ static void its_send_cmd(void *cmdq_base, struct its_cmd_block *cmd)
}
}
-void its_send_mapd_cmd(void *cmdq_base, u32 device_id, vm_paddr_t itt_base,
+void its_send_mapd_cmd(void *cmdq_base, u32 device_id, gpa_t itt_base,
size_t itt_size, bool valid)
{
struct its_cmd_block cmd = {};
@@ -217,7 +223,7 @@ void its_send_mapc_cmd(void *cmdq_base, u32 vcpu_id, u32 collection_id, bool val
its_encode_cmd(&cmd, GITS_CMD_MAPC);
its_encode_collection(&cmd, collection_id);
- its_encode_target(&cmd, vcpu_id);
+ its_encode_target(&cmd, procnum_to_rdbase(vcpu_id));
its_encode_valid(&cmd, valid);
its_send_cmd(cmdq_base, &cmd);
@@ -246,3 +252,13 @@ void its_send_invall_cmd(void *cmdq_base, u32 collection_id)
its_send_cmd(cmdq_base, &cmd);
}
+
+void its_send_sync_cmd(void *cmdq_base, u32 vcpu_id)
+{
+ struct its_cmd_block cmd = {};
+
+ its_encode_cmd(&cmd, GITS_CMD_SYNC);
+ its_encode_target(&cmd, procnum_to_rdbase(vcpu_id));
+
+ its_send_cmd(cmdq_base, &cmd);
+}
diff --git a/tools/testing/selftests/kvm/lib/arm64/processor.c b/tools/testing/selftests/kvm/lib/arm64/processor.c
index 7ba3aa3755f3..01325bf4d36f 100644
--- a/tools/testing/selftests/kvm/lib/arm64/processor.c
+++ b/tools/testing/selftests/kvm/lib/arm64/processor.c
@@ -12,52 +12,48 @@
#include "kvm_util.h"
#include "processor.h"
#include "ucall_common.h"
+#include "vgic.h"
#include <linux/bitfield.h>
#include <linux/sizes.h>
#define DEFAULT_ARM64_GUEST_STACK_VADDR_MIN 0xac0000
-static vm_vaddr_t exception_handlers;
+static gva_t exception_handlers;
-static uint64_t page_align(struct kvm_vm *vm, uint64_t v)
+static u64 pgd_index(struct kvm_vm *vm, gva_t gva)
{
- return (v + vm->page_size) & ~(vm->page_size - 1);
-}
-
-static uint64_t pgd_index(struct kvm_vm *vm, vm_vaddr_t gva)
-{
- unsigned int shift = (vm->pgtable_levels - 1) * (vm->page_shift - 3) + vm->page_shift;
- uint64_t mask = (1UL << (vm->va_bits - shift)) - 1;
+ unsigned int shift = (vm->mmu.pgtable_levels - 1) * (vm->page_shift - 3) + vm->page_shift;
+ u64 mask = (1UL << (vm->va_bits - shift)) - 1;
return (gva >> shift) & mask;
}
-static uint64_t pud_index(struct kvm_vm *vm, vm_vaddr_t gva)
+static u64 pud_index(struct kvm_vm *vm, gva_t gva)
{
unsigned int shift = 2 * (vm->page_shift - 3) + vm->page_shift;
- uint64_t mask = (1UL << (vm->page_shift - 3)) - 1;
+ u64 mask = (1UL << (vm->page_shift - 3)) - 1;
- TEST_ASSERT(vm->pgtable_levels == 4,
+ TEST_ASSERT(vm->mmu.pgtable_levels == 4,
"Mode %d does not have 4 page table levels", vm->mode);
return (gva >> shift) & mask;
}
-static uint64_t pmd_index(struct kvm_vm *vm, vm_vaddr_t gva)
+static u64 pmd_index(struct kvm_vm *vm, gva_t gva)
{
unsigned int shift = (vm->page_shift - 3) + vm->page_shift;
- uint64_t mask = (1UL << (vm->page_shift - 3)) - 1;
+ u64 mask = (1UL << (vm->page_shift - 3)) - 1;
- TEST_ASSERT(vm->pgtable_levels >= 3,
+ TEST_ASSERT(vm->mmu.pgtable_levels >= 3,
"Mode %d does not have >= 3 page table levels", vm->mode);
return (gva >> shift) & mask;
}
-static uint64_t pte_index(struct kvm_vm *vm, vm_vaddr_t gva)
+static u64 pte_index(struct kvm_vm *vm, gva_t gva)
{
- uint64_t mask = (1UL << (vm->page_shift - 3)) - 1;
+ u64 mask = (1UL << (vm->page_shift - 3)) - 1;
return (gva >> vm->page_shift) & mask;
}
@@ -67,137 +63,150 @@ static inline bool use_lpa2_pte_format(struct kvm_vm *vm)
(vm->pa_bits > 48 || vm->va_bits > 48);
}
-static uint64_t addr_pte(struct kvm_vm *vm, uint64_t pa, uint64_t attrs)
+static u64 addr_pte(struct kvm_vm *vm, u64 pa, u64 attrs)
{
- uint64_t pte;
+ u64 pte;
if (use_lpa2_pte_format(vm)) {
- pte = pa & GENMASK(49, vm->page_shift);
- pte |= FIELD_GET(GENMASK(51, 50), pa) << 8;
- attrs &= ~GENMASK(9, 8);
+ pte = pa & PTE_ADDR_MASK_LPA2(vm->page_shift);
+ pte |= FIELD_GET(GENMASK(51, 50), pa) << PTE_ADDR_51_50_LPA2_SHIFT;
+ attrs &= ~PTE_ADDR_51_50_LPA2;
} else {
- pte = pa & GENMASK(47, vm->page_shift);
+ pte = pa & PTE_ADDR_MASK(vm->page_shift);
if (vm->page_shift == 16)
- pte |= FIELD_GET(GENMASK(51, 48), pa) << 12;
+ pte |= FIELD_GET(GENMASK(51, 48), pa) << PTE_ADDR_51_48_SHIFT;
}
pte |= attrs;
return pte;
}
-static uint64_t pte_addr(struct kvm_vm *vm, uint64_t pte)
+static u64 pte_addr(struct kvm_vm *vm, u64 pte)
{
- uint64_t pa;
+ u64 pa;
if (use_lpa2_pte_format(vm)) {
- pa = pte & GENMASK(49, vm->page_shift);
- pa |= FIELD_GET(GENMASK(9, 8), pte) << 50;
+ pa = pte & PTE_ADDR_MASK_LPA2(vm->page_shift);
+ pa |= FIELD_GET(PTE_ADDR_51_50_LPA2, pte) << 50;
} else {
- pa = pte & GENMASK(47, vm->page_shift);
+ pa = pte & PTE_ADDR_MASK(vm->page_shift);
if (vm->page_shift == 16)
- pa |= FIELD_GET(GENMASK(15, 12), pte) << 48;
+ pa |= FIELD_GET(PTE_ADDR_51_48, pte) << 48;
}
return pa;
}
-static uint64_t ptrs_per_pgd(struct kvm_vm *vm)
+static u64 ptrs_per_pgd(struct kvm_vm *vm)
{
- unsigned int shift = (vm->pgtable_levels - 1) * (vm->page_shift - 3) + vm->page_shift;
+ unsigned int shift = (vm->mmu.pgtable_levels - 1) * (vm->page_shift - 3) + vm->page_shift;
return 1 << (vm->va_bits - shift);
}
-static uint64_t __maybe_unused ptrs_per_pte(struct kvm_vm *vm)
+static u64 __maybe_unused ptrs_per_pte(struct kvm_vm *vm)
{
return 1 << (vm->page_shift - 3);
}
void virt_arch_pgd_alloc(struct kvm_vm *vm)
{
- size_t nr_pages = page_align(vm, ptrs_per_pgd(vm) * 8) / vm->page_size;
+ size_t nr_pages = vm_page_align(vm, ptrs_per_pgd(vm) * 8) / vm->page_size;
- if (vm->pgd_created)
+ if (vm->mmu.pgd_created)
return;
- vm->pgd = vm_phy_pages_alloc(vm, nr_pages,
- KVM_GUEST_PAGE_TABLE_MIN_PADDR,
- vm->memslots[MEM_REGION_PT]);
- vm->pgd_created = true;
+ vm->mmu.pgd = vm_phy_pages_alloc(vm, nr_pages,
+ KVM_GUEST_PAGE_TABLE_MIN_PADDR,
+ vm->memslots[MEM_REGION_PT]);
+ vm->mmu.pgd_created = true;
}
-static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
- uint64_t flags)
+static void _virt_pg_map(struct kvm_vm *vm, gva_t gva, gpa_t gpa,
+ u64 flags)
{
- uint8_t attr_idx = flags & 7;
- uint64_t *ptep;
+ u8 attr_idx = flags & (PTE_ATTRINDX_MASK >> PTE_ATTRINDX_SHIFT);
+ u64 pg_attr;
+ u64 *ptep;
- TEST_ASSERT((vaddr % vm->page_size) == 0,
+ TEST_ASSERT((gva % vm->page_size) == 0,
"Virtual address not on page boundary,\n"
- " vaddr: 0x%lx vm->page_size: 0x%x", vaddr, vm->page_size);
- TEST_ASSERT(sparsebit_is_set(vm->vpages_valid,
- (vaddr >> vm->page_shift)),
- "Invalid virtual address, vaddr: 0x%lx", vaddr);
- TEST_ASSERT((paddr % vm->page_size) == 0,
- "Physical address not on page boundary,\n"
- " paddr: 0x%lx vm->page_size: 0x%x", paddr, vm->page_size);
- TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
- "Physical address beyond beyond maximum supported,\n"
- " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
- paddr, vm->max_gfn, vm->page_size);
-
- ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, vaddr) * 8;
+ " gva: 0x%lx vm->page_size: 0x%x", gva, vm->page_size);
+ TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, (gva >> vm->page_shift)),
+ "Invalid virtual address, gva: 0x%lx", gva);
+ TEST_ASSERT((gpa % vm->page_size) == 0,
+ "Physical address not on page boundary,\n"
+ " gpa: 0x%lx vm->page_size: 0x%x", gpa, vm->page_size);
+ TEST_ASSERT((gpa >> vm->page_shift) <= vm->max_gfn,
+ "Physical address beyond beyond maximum supported,\n"
+ " gpa: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
+ gpa, vm->max_gfn, vm->page_size);
+
+ ptep = addr_gpa2hva(vm, vm->mmu.pgd) + pgd_index(vm, gva) * 8;
if (!*ptep)
- *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3);
+ *ptep = addr_pte(vm, vm_alloc_page_table(vm),
+ PGD_TYPE_TABLE | PTE_VALID);
- switch (vm->pgtable_levels) {
+ switch (vm->mmu.pgtable_levels) {
case 4:
- ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, vaddr) * 8;
+ ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, gva) * 8;
if (!*ptep)
- *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3);
+ *ptep = addr_pte(vm, vm_alloc_page_table(vm),
+ PUD_TYPE_TABLE | PTE_VALID);
/* fall through */
case 3:
- ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, vaddr) * 8;
+ ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, gva) * 8;
if (!*ptep)
- *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3);
+ *ptep = addr_pte(vm, vm_alloc_page_table(vm),
+ PMD_TYPE_TABLE | PTE_VALID);
/* fall through */
case 2:
- ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, vaddr) * 8;
+ ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, gva) * 8;
break;
default:
TEST_FAIL("Page table levels must be 2, 3, or 4");
}
- *ptep = addr_pte(vm, paddr, (attr_idx << 2) | (1 << 10) | 3); /* AF */
+ pg_attr = PTE_AF | PTE_ATTRINDX(attr_idx) | PTE_TYPE_PAGE | PTE_VALID;
+ if (!use_lpa2_pte_format(vm))
+ pg_attr |= PTE_SHARED;
+
+ *ptep = addr_pte(vm, gpa, pg_attr);
}
-void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
+void virt_arch_pg_map(struct kvm_vm *vm, gva_t gva, gpa_t gpa)
{
- uint64_t attr_idx = MT_NORMAL;
+ u64 attr_idx = MT_NORMAL;
- _virt_pg_map(vm, vaddr, paddr, attr_idx);
+ _virt_pg_map(vm, gva, gpa, attr_idx);
}
-uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva)
+u64 *virt_get_pte_hva_at_level(struct kvm_vm *vm, gva_t gva, int level)
{
- uint64_t *ptep;
+ u64 *ptep;
- if (!vm->pgd_created)
+ if (!vm->mmu.pgd_created)
goto unmapped_gva;
- ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, gva) * 8;
+ ptep = addr_gpa2hva(vm, vm->mmu.pgd) + pgd_index(vm, gva) * 8;
if (!ptep)
goto unmapped_gva;
+ if (level == 0)
+ return ptep;
- switch (vm->pgtable_levels) {
+ switch (vm->mmu.pgtable_levels) {
case 4:
ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, gva) * 8;
if (!ptep)
goto unmapped_gva;
+ if (level == 1)
+ break;
/* fall through */
case 3:
ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, gva) * 8;
if (!ptep)
goto unmapped_gva;
+ if (level == 2)
+ break;
/* fall through */
case 2:
ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, gva) * 8;
@@ -215,18 +224,23 @@ unmapped_gva:
exit(EXIT_FAILURE);
}
-vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
+u64 *virt_get_pte_hva(struct kvm_vm *vm, gva_t gva)
+{
+ return virt_get_pte_hva_at_level(vm, gva, 3);
+}
+
+gpa_t addr_arch_gva2gpa(struct kvm_vm *vm, gva_t gva)
{
- uint64_t *ptep = virt_get_pte_hva(vm, gva);
+ u64 *ptep = virt_get_pte_hva(vm, gva);
return pte_addr(vm, *ptep) + (gva & (vm->page_size - 1));
}
-static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, uint64_t page, int level)
+static void pte_dump(FILE *stream, struct kvm_vm *vm, u8 indent, u64 page, int level)
{
#ifdef DEBUG
static const char * const type[] = { "", "pud", "pmd", "pte" };
- uint64_t pte, *ptep;
+ u64 pte, *ptep;
if (level == 4)
return;
@@ -241,15 +255,15 @@ static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, uint64_t p
#endif
}
-void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
+void virt_arch_dump(FILE *stream, struct kvm_vm *vm, u8 indent)
{
- int level = 4 - (vm->pgtable_levels - 1);
- uint64_t pgd, *ptep;
+ int level = 4 - (vm->mmu.pgtable_levels - 1);
+ u64 pgd, *ptep;
- if (!vm->pgd_created)
+ if (!vm->mmu.pgd_created)
return;
- for (pgd = vm->pgd; pgd < vm->pgd + ptrs_per_pgd(vm) * 8; pgd += 8) {
+ for (pgd = vm->mmu.pgd; pgd < vm->mmu.pgd + ptrs_per_pgd(vm) * 8; pgd += 8) {
ptep = addr_gpa2hva(vm, pgd);
if (!*ptep)
continue;
@@ -258,107 +272,133 @@ void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
}
}
+bool vm_supports_el2(struct kvm_vm *vm)
+{
+ const char *value = getenv("NV");
+
+ if (value && *value == '0')
+ return false;
+
+ return vm_check_cap(vm, KVM_CAP_ARM_EL2) && vm->arch.has_gic;
+}
+
+void kvm_get_default_vcpu_target(struct kvm_vm *vm, struct kvm_vcpu_init *init)
+{
+ struct kvm_vcpu_init preferred = {};
+
+ vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &preferred);
+ if (vm_supports_el2(vm))
+ preferred.features[0] |= BIT(KVM_ARM_VCPU_HAS_EL2);
+
+ *init = preferred;
+}
+
void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init)
{
struct kvm_vcpu_init default_init = { .target = -1, };
struct kvm_vm *vm = vcpu->vm;
- uint64_t sctlr_el1, tcr_el1, ttbr0_el1;
+ u64 sctlr_el1, tcr_el1, ttbr0_el1;
- if (!init)
+ if (!init) {
+ kvm_get_default_vcpu_target(vm, &default_init);
init = &default_init;
-
- if (init->target == -1) {
- struct kvm_vcpu_init preferred;
- vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &preferred);
- init->target = preferred.target;
}
vcpu_ioctl(vcpu, KVM_ARM_VCPU_INIT, init);
+ vcpu->init = *init;
/*
* Enable FP/ASIMD to avoid trapping when accessing Q0-Q15
* registers, which the variable argument list macros do.
*/
- vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CPACR_EL1), 3 << 20);
+ vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_CPACR_EL1), 3 << 20);
- sctlr_el1 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1));
- tcr_el1 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TCR_EL1));
+ sctlr_el1 = vcpu_get_reg(vcpu, ctxt_reg_alias(vcpu, SYS_SCTLR_EL1));
+ tcr_el1 = vcpu_get_reg(vcpu, ctxt_reg_alias(vcpu, SYS_TCR_EL1));
/* Configure base granule size */
switch (vm->mode) {
- case VM_MODE_PXXV48_4K:
+ case VM_MODE_PXXVYY_4K:
TEST_FAIL("AArch64 does not support 4K sized pages "
"with ANY-bit physical address ranges");
case VM_MODE_P52V48_64K:
case VM_MODE_P48V48_64K:
case VM_MODE_P40V48_64K:
case VM_MODE_P36V48_64K:
- tcr_el1 |= 1ul << 14; /* TG0 = 64KB */
+ tcr_el1 |= TCR_TG0_64K;
break;
case VM_MODE_P52V48_16K:
case VM_MODE_P48V48_16K:
case VM_MODE_P40V48_16K:
case VM_MODE_P36V48_16K:
case VM_MODE_P36V47_16K:
- tcr_el1 |= 2ul << 14; /* TG0 = 16KB */
+ tcr_el1 |= TCR_TG0_16K;
break;
case VM_MODE_P52V48_4K:
case VM_MODE_P48V48_4K:
case VM_MODE_P40V48_4K:
case VM_MODE_P36V48_4K:
- tcr_el1 |= 0ul << 14; /* TG0 = 4KB */
+ tcr_el1 |= TCR_TG0_4K;
break;
default:
TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode);
}
- ttbr0_el1 = vm->pgd & GENMASK(47, vm->page_shift);
+ ttbr0_el1 = vm->mmu.pgd & GENMASK(47, vm->page_shift);
/* Configure output size */
switch (vm->mode) {
case VM_MODE_P52V48_4K:
case VM_MODE_P52V48_16K:
case VM_MODE_P52V48_64K:
- tcr_el1 |= 6ul << 32; /* IPS = 52 bits */
- ttbr0_el1 |= FIELD_GET(GENMASK(51, 48), vm->pgd) << 2;
+ tcr_el1 |= TCR_IPS_52_BITS;
+ ttbr0_el1 |= FIELD_GET(GENMASK(51, 48), vm->mmu.pgd) << 2;
break;
case VM_MODE_P48V48_4K:
case VM_MODE_P48V48_16K:
case VM_MODE_P48V48_64K:
- tcr_el1 |= 5ul << 32; /* IPS = 48 bits */
+ tcr_el1 |= TCR_IPS_48_BITS;
break;
case VM_MODE_P40V48_4K:
case VM_MODE_P40V48_16K:
case VM_MODE_P40V48_64K:
- tcr_el1 |= 2ul << 32; /* IPS = 40 bits */
+ tcr_el1 |= TCR_IPS_40_BITS;
break;
case VM_MODE_P36V48_4K:
case VM_MODE_P36V48_16K:
case VM_MODE_P36V48_64K:
case VM_MODE_P36V47_16K:
- tcr_el1 |= 1ul << 32; /* IPS = 36 bits */
+ tcr_el1 |= TCR_IPS_36_BITS;
break;
default:
TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode);
}
- sctlr_el1 |= (1 << 0) | (1 << 2) | (1 << 12) /* M | C | I */;
- /* TCR_EL1 |= IRGN0:WBWA | ORGN0:WBWA | SH0:Inner-Shareable */;
- tcr_el1 |= (1 << 8) | (1 << 10) | (3 << 12);
- tcr_el1 |= (64 - vm->va_bits) /* T0SZ */;
+ sctlr_el1 |= SCTLR_ELx_M | SCTLR_ELx_C | SCTLR_ELx_I;
+
+ tcr_el1 |= TCR_IRGN0_WBWA | TCR_ORGN0_WBWA | TCR_SH0_INNER;
+ tcr_el1 |= TCR_T0SZ(vm->va_bits);
+ tcr_el1 |= TCR_TBI1;
+ tcr_el1 |= TCR_EPD1_MASK;
if (use_lpa2_pte_format(vm))
- tcr_el1 |= (1ul << 59) /* DS */;
+ tcr_el1 |= TCR_DS;
- vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1), sctlr_el1);
- vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TCR_EL1), tcr_el1);
- vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_MAIR_EL1), DEFAULT_MAIR_EL1);
- vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TTBR0_EL1), ttbr0_el1);
+ vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_SCTLR_EL1), sctlr_el1);
+ vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_TCR_EL1), tcr_el1);
+ vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_MAIR_EL1), DEFAULT_MAIR_EL1);
+ vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_TTBR0_EL1), ttbr0_el1);
vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TPIDR_EL1), vcpu->id);
+
+ if (!vcpu_has_el2(vcpu))
+ return;
+
+ vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_HCR_EL2),
+ HCR_EL2_RW | HCR_EL2_TGE | HCR_EL2_E2H);
}
-void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent)
+void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, u8 indent)
{
- uint64_t pstate, pc;
+ u64 pstate, pc;
pstate = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pstate));
pc = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc));
@@ -369,29 +409,29 @@ void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent)
void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code)
{
- vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.pc), (uint64_t)guest_code);
+ vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.pc), (u64)guest_code);
}
-static struct kvm_vcpu *__aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
+static struct kvm_vcpu *__aarch64_vcpu_add(struct kvm_vm *vm, u32 vcpu_id,
struct kvm_vcpu_init *init)
{
size_t stack_size;
- uint64_t stack_vaddr;
+ gva_t stack_gva;
struct kvm_vcpu *vcpu = __vm_vcpu_add(vm, vcpu_id);
stack_size = vm->page_size == 4096 ? DEFAULT_STACK_PGS * vm->page_size :
vm->page_size;
- stack_vaddr = __vm_vaddr_alloc(vm, stack_size,
- DEFAULT_ARM64_GUEST_STACK_VADDR_MIN,
- MEM_REGION_DATA);
+ stack_gva = __vm_alloc(vm, stack_size,
+ DEFAULT_ARM64_GUEST_STACK_VADDR_MIN,
+ MEM_REGION_DATA);
aarch64_vcpu_setup(vcpu, init);
- vcpu_set_reg(vcpu, ARM64_CORE_REG(sp_el1), stack_vaddr + stack_size);
+ vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_SP_EL1), stack_gva + stack_size);
return vcpu;
}
-struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
+struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, u32 vcpu_id,
struct kvm_vcpu_init *init, void *guest_code)
{
struct kvm_vcpu *vcpu = __aarch64_vcpu_add(vm, vcpu_id, init);
@@ -401,7 +441,7 @@ struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
return vcpu;
}
-struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
+struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, u32 vcpu_id)
{
return __aarch64_vcpu_add(vm, vcpu_id, NULL);
}
@@ -418,13 +458,13 @@ void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...)
for (i = 0; i < num; i++) {
vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.regs[i]),
- va_arg(ap, uint64_t));
+ va_arg(ap, u64));
}
va_end(ap);
}
-void kvm_exit_unexpected_exception(int vector, uint64_t ec, bool valid_ec)
+void kvm_exit_unexpected_exception(int vector, u64 ec, bool valid_ec)
{
ucall(UCALL_UNHANDLED, 3, vector, ec, valid_ec);
while (1)
@@ -457,7 +497,7 @@ void vcpu_init_descriptor_tables(struct kvm_vcpu *vcpu)
{
extern char vectors;
- vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_VBAR_EL1), (uint64_t)&vectors);
+ vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_VBAR_EL1), (u64)&vectors);
}
void route_exception(struct ex_regs *regs, int vector)
@@ -495,10 +535,10 @@ unexpected_exception:
void vm_init_descriptor_tables(struct kvm_vm *vm)
{
- vm->handlers = __vm_vaddr_alloc(vm, sizeof(struct handlers),
- vm->page_size, MEM_REGION_DATA);
+ vm->handlers = __vm_alloc(vm, sizeof(struct handlers), vm->page_size,
+ MEM_REGION_DATA);
- *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers;
+ *(gva_t *)addr_gva2hva(vm, (gva_t)(&exception_handlers)) = vm->handlers;
}
void vm_install_sync_handler(struct kvm_vm *vm, int vector, int ec,
@@ -522,13 +562,13 @@ void vm_install_exception_handler(struct kvm_vm *vm, int vector,
handlers->exception_handlers[vector][0] = handler;
}
-uint32_t guest_get_vcpuid(void)
+u32 guest_get_vcpuid(void)
{
return read_sysreg(tpidr_el1);
}
-static uint32_t max_ipa_for_page_size(uint32_t vm_ipa, uint32_t gran,
- uint32_t not_sup_val, uint32_t ipa52_min_val)
+static u32 max_ipa_for_page_size(u32 vm_ipa, u32 gran,
+ u32 not_sup_val, u32 ipa52_min_val)
{
if (gran == not_sup_val)
return 0;
@@ -538,16 +578,16 @@ static uint32_t max_ipa_for_page_size(uint32_t vm_ipa, uint32_t gran,
return min(vm_ipa, 48U);
}
-void aarch64_get_supported_page_sizes(uint32_t ipa, uint32_t *ipa4k,
- uint32_t *ipa16k, uint32_t *ipa64k)
+void aarch64_get_supported_page_sizes(u32 ipa, u32 *ipa4k,
+ u32 *ipa16k, u32 *ipa64k)
{
struct kvm_vcpu_init preferred_init;
int kvm_fd, vm_fd, vcpu_fd, err;
- uint64_t val;
- uint32_t gran;
+ u64 val;
+ u32 gran;
struct kvm_one_reg reg = {
.id = KVM_ARM64_SYS_REG(SYS_ID_AA64MMFR0_EL1),
- .addr = (uint64_t)&val,
+ .addr = (u64)&val,
};
kvm_fd = open_kvm_dev_path_or_exit();
@@ -565,15 +605,15 @@ void aarch64_get_supported_page_sizes(uint32_t ipa, uint32_t *ipa4k,
err = ioctl(vcpu_fd, KVM_GET_ONE_REG, &reg);
TEST_ASSERT(err == 0, KVM_IOCTL_ERROR(KVM_GET_ONE_REG, vcpu_fd));
- gran = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN4), val);
+ gran = FIELD_GET(ID_AA64MMFR0_EL1_TGRAN4, val);
*ipa4k = max_ipa_for_page_size(ipa, gran, ID_AA64MMFR0_EL1_TGRAN4_NI,
ID_AA64MMFR0_EL1_TGRAN4_52_BIT);
- gran = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN64), val);
+ gran = FIELD_GET(ID_AA64MMFR0_EL1_TGRAN64, val);
*ipa64k = max_ipa_for_page_size(ipa, gran, ID_AA64MMFR0_EL1_TGRAN64_NI,
ID_AA64MMFR0_EL1_TGRAN64_IMP);
- gran = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN16), val);
+ gran = FIELD_GET(ID_AA64MMFR0_EL1_TGRAN16, val);
*ipa16k = max_ipa_for_page_size(ipa, gran, ID_AA64MMFR0_EL1_TGRAN16_NI,
ID_AA64MMFR0_EL1_TGRAN16_52_BIT);
@@ -605,17 +645,17 @@ void aarch64_get_supported_page_sizes(uint32_t ipa, uint32_t *ipa4k,
: "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7")
-void smccc_hvc(uint32_t function_id, uint64_t arg0, uint64_t arg1,
- uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5,
- uint64_t arg6, struct arm_smccc_res *res)
+void smccc_hvc(u32 function_id, u64 arg0, u64 arg1,
+ u64 arg2, u64 arg3, u64 arg4, u64 arg5,
+ u64 arg6, struct arm_smccc_res *res)
{
__smccc_call(hvc, function_id, arg0, arg1, arg2, arg3, arg4, arg5,
arg6, res);
}
-void smccc_smc(uint32_t function_id, uint64_t arg0, uint64_t arg1,
- uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5,
- uint64_t arg6, struct arm_smccc_res *res)
+void smccc_smc(u32 function_id, u64 arg0, u64 arg1,
+ u64 arg2, u64 arg3, u64 arg4, u64 arg5,
+ u64 arg6, struct arm_smccc_res *res)
{
__smccc_call(smc, function_id, arg0, arg1, arg2, arg3, arg4, arg5,
arg6, res);
@@ -630,7 +670,7 @@ void kvm_selftest_arch_init(void)
guest_modes_append_default();
}
-void vm_vaddr_populate_bitmap(struct kvm_vm *vm)
+void vm_populate_gva_bitmap(struct kvm_vm *vm)
{
/*
* arm64 selftests use only TTBR0_EL1, meaning that the valid VA space
@@ -645,3 +685,44 @@ void wfi(void)
{
asm volatile("wfi");
}
+
+static bool request_mte;
+static bool request_vgic = true;
+
+void test_wants_mte(void)
+{
+ request_mte = true;
+}
+
+void test_disable_default_vgic(void)
+{
+ request_vgic = false;
+}
+
+void kvm_arch_vm_post_create(struct kvm_vm *vm, unsigned int nr_vcpus)
+{
+ if (request_mte && vm_check_cap(vm, KVM_CAP_ARM_MTE))
+ vm_enable_cap(vm, KVM_CAP_ARM_MTE, 0);
+
+ if (request_vgic && kvm_supports_vgic_v3()) {
+ vm->arch.gic_fd = __vgic_v3_setup(vm, nr_vcpus, 64);
+ vm->arch.has_gic = true;
+ }
+}
+
+void kvm_arch_vm_finalize_vcpus(struct kvm_vm *vm)
+{
+ if (vm->arch.has_gic)
+ __vgic_v3_init(vm->arch.gic_fd);
+}
+
+void kvm_arch_vm_release(struct kvm_vm *vm)
+{
+ if (vm->arch.has_gic)
+ close(vm->arch.gic_fd);
+}
+
+bool kvm_arch_has_default_irqchip(void)
+{
+ return request_vgic && kvm_supports_vgic_v3();
+}
diff --git a/tools/testing/selftests/kvm/lib/arm64/ucall.c b/tools/testing/selftests/kvm/lib/arm64/ucall.c
index ddab0ce89d4d..e0550ad5aa75 100644
--- a/tools/testing/selftests/kvm/lib/arm64/ucall.c
+++ b/tools/testing/selftests/kvm/lib/arm64/ucall.c
@@ -6,17 +6,17 @@
*/
#include "kvm_util.h"
-vm_vaddr_t *ucall_exit_mmio_addr;
+gva_t *ucall_exit_mmio_addr;
-void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
+void ucall_arch_init(struct kvm_vm *vm, gpa_t mmio_gpa)
{
- vm_vaddr_t mmio_gva = vm_vaddr_unused_gap(vm, vm->page_size, KVM_UTIL_MIN_VADDR);
+ gva_t mmio_gva = vm_unused_gva_gap(vm, vm->page_size, KVM_UTIL_MIN_VADDR);
virt_map(vm, mmio_gva, mmio_gpa, 1);
vm->ucall_mmio_addr = mmio_gpa;
- write_guest_global(vm, ucall_exit_mmio_addr, (vm_vaddr_t *)mmio_gva);
+ write_guest_global(vm, ucall_exit_mmio_addr, (gva_t *)mmio_gva);
}
void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
@@ -25,9 +25,9 @@ void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
if (run->exit_reason == KVM_EXIT_MMIO &&
run->mmio.phys_addr == vcpu->vm->ucall_mmio_addr) {
- TEST_ASSERT(run->mmio.is_write && run->mmio.len == sizeof(uint64_t),
+ TEST_ASSERT(run->mmio.is_write && run->mmio.len == sizeof(u64),
"Unexpected ucall exit mmio address access");
- return (void *)(*((uint64_t *)run->mmio.data));
+ return (void *)(*((u64 *)run->mmio.data));
}
return NULL;
diff --git a/tools/testing/selftests/kvm/lib/arm64/vgic.c b/tools/testing/selftests/kvm/lib/arm64/vgic.c
index 4427f43f73ea..4ecebf3146a2 100644
--- a/tools/testing/selftests/kvm/lib/arm64/vgic.c
+++ b/tools/testing/selftests/kvm/lib/arm64/vgic.c
@@ -15,6 +15,17 @@
#include "gic.h"
#include "gic_v3.h"
+bool kvm_supports_vgic_v3(void)
+{
+ struct kvm_vm *vm = vm_create_barebones();
+ int r;
+
+ r = __kvm_test_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_V3);
+ kvm_vm_free(vm);
+
+ return !r;
+}
+
/*
* vGIC-v3 default host setup
*
@@ -30,24 +41,11 @@
* redistributor regions of the guest. Since it depends on the number of
* vCPUs for the VM, it must be called after all the vCPUs have been created.
*/
-int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs)
+int __vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, u32 nr_irqs)
{
int gic_fd;
- uint64_t attr;
- struct list_head *iter;
- unsigned int nr_gic_pages, nr_vcpus_created = 0;
-
- TEST_ASSERT(nr_vcpus, "Number of vCPUs cannot be empty");
-
- /*
- * Make sure that the caller is infact calling this
- * function after all the vCPUs are added.
- */
- list_for_each(iter, &vm->vcpus)
- nr_vcpus_created++;
- TEST_ASSERT(nr_vcpus == nr_vcpus_created,
- "Number of vCPUs requested (%u) doesn't match with the ones created for the VM (%u)",
- nr_vcpus, nr_vcpus_created);
+ u64 attr;
+ unsigned int nr_gic_pages;
/* Distributor setup */
gic_fd = __kvm_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_V3);
@@ -56,9 +54,6 @@ int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs)
kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_NR_IRQS, 0, &nr_irqs);
- kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
- KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
-
attr = GICD_BASE_GPA;
kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
KVM_VGIC_V3_ADDR_TYPE_DIST, &attr);
@@ -73,18 +68,47 @@ int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs)
KVM_VGIC_V3_REDIST_SIZE * nr_vcpus);
virt_map(vm, GICR_BASE_GPA, GICR_BASE_GPA, nr_gic_pages);
- kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+ return gic_fd;
+}
+
+void __vgic_v3_init(int fd)
+{
+ kvm_device_attr_set(fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
+}
- return gic_fd;
+int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, u32 nr_irqs)
+{
+ unsigned int nr_vcpus_created = 0;
+ struct list_head *iter;
+ int fd;
+
+ TEST_ASSERT(nr_vcpus, "Number of vCPUs cannot be empty");
+
+ /*
+ * Make sure that the caller is infact calling this
+ * function after all the vCPUs are added.
+ */
+ list_for_each(iter, &vm->vcpus)
+ nr_vcpus_created++;
+ TEST_ASSERT(nr_vcpus == nr_vcpus_created,
+ "Number of vCPUs requested (%u) doesn't match with the ones created for the VM (%u)",
+ nr_vcpus, nr_vcpus_created);
+
+ fd = __vgic_v3_setup(vm, nr_vcpus, nr_irqs);
+ if (fd < 0)
+ return fd;
+
+ __vgic_v3_init(fd);
+ return fd;
}
/* should only work for level sensitive interrupts */
-int _kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level)
+int _kvm_irq_set_level_info(int gic_fd, u32 intid, int level)
{
- uint64_t attr = 32 * (intid / 32);
- uint64_t index = intid % 32;
- uint64_t val;
+ u64 attr = 32 * (intid / 32);
+ u64 index = intid % 32;
+ u64 val;
int ret;
ret = __kvm_device_attr_get(gic_fd, KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO,
@@ -98,16 +122,16 @@ int _kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level)
return ret;
}
-void kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level)
+void kvm_irq_set_level_info(int gic_fd, u32 intid, int level)
{
int ret = _kvm_irq_set_level_info(gic_fd, intid, level);
TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO, ret));
}
-int _kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level)
+int _kvm_arm_irq_line(struct kvm_vm *vm, u32 intid, int level)
{
- uint32_t irq = intid & KVM_ARM_IRQ_NUM_MASK;
+ u32 irq = intid & KVM_ARM_IRQ_NUM_MASK;
TEST_ASSERT(!INTID_IS_SGI(intid), "KVM_IRQ_LINE's interface itself "
"doesn't allow injecting SGIs. There's no mask for it.");
@@ -120,23 +144,23 @@ int _kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level)
return _kvm_irq_line(vm, irq, level);
}
-void kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level)
+void kvm_arm_irq_line(struct kvm_vm *vm, u32 intid, int level)
{
int ret = _kvm_arm_irq_line(vm, intid, level);
TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_IRQ_LINE, ret));
}
-static void vgic_poke_irq(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu,
- uint64_t reg_off)
+static void vgic_poke_irq(int gic_fd, u32 intid, struct kvm_vcpu *vcpu,
+ u64 reg_off)
{
- uint64_t reg = intid / 32;
- uint64_t index = intid % 32;
- uint64_t attr = reg_off + reg * 4;
- uint64_t val;
+ u64 reg = intid / 32;
+ u64 index = intid % 32;
+ u64 attr = reg_off + reg * 4;
+ u64 val;
bool intid_is_private = INTID_IS_SGI(intid) || INTID_IS_PPI(intid);
- uint32_t group = intid_is_private ? KVM_DEV_ARM_VGIC_GRP_REDIST_REGS
+ u32 group = intid_is_private ? KVM_DEV_ARM_VGIC_GRP_REDIST_REGS
: KVM_DEV_ARM_VGIC_GRP_DIST_REGS;
if (intid_is_private) {
@@ -159,12 +183,12 @@ static void vgic_poke_irq(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu,
kvm_device_attr_set(gic_fd, group, attr, &val);
}
-void kvm_irq_write_ispendr(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu)
+void kvm_irq_write_ispendr(int gic_fd, u32 intid, struct kvm_vcpu *vcpu)
{
vgic_poke_irq(gic_fd, intid, vcpu, GICD_ISPENDR);
}
-void kvm_irq_write_isactiver(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu)
+void kvm_irq_write_isactiver(int gic_fd, u32 intid, struct kvm_vcpu *vcpu)
{
vgic_poke_irq(gic_fd, intid, vcpu, GICD_ISACTIVER);
}
diff --git a/tools/testing/selftests/kvm/lib/assert.c b/tools/testing/selftests/kvm/lib/assert.c
index b49690658c60..8be0d09ecf0f 100644
--- a/tools/testing/selftests/kvm/lib/assert.c
+++ b/tools/testing/selftests/kvm/lib/assert.c
@@ -6,11 +6,14 @@
*/
#include "test_util.h"
-#include <execinfo.h>
+
#include <sys/syscall.h>
#include "kselftest.h"
+#ifdef __GLIBC__
+#include <execinfo.h>
+
/* Dumps the current stack trace to stderr. */
static void __attribute__((noinline)) test_dump_stack(void);
static void test_dump_stack(void)
@@ -57,6 +60,9 @@ static void test_dump_stack(void)
system(cmd);
#pragma GCC diagnostic pop
}
+#else
+static void test_dump_stack(void) {}
+#endif
static pid_t _gettid(void)
{
diff --git a/tools/testing/selftests/kvm/lib/elf.c b/tools/testing/selftests/kvm/lib/elf.c
index f34d926d9735..1924a9895834 100644
--- a/tools/testing/selftests/kvm/lib/elf.c
+++ b/tools/testing/selftests/kvm/lib/elf.c
@@ -7,7 +7,7 @@
#include "test_util.h"
-#include <bits/endian.h>
+#include <endian.h>
#include <linux/elf.h>
#include "kvm_util.h"
@@ -156,21 +156,20 @@ void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename)
TEST_ASSERT(phdr.p_memsz > 0, "Unexpected loadable segment "
"memsize of 0,\n"
" phdr index: %u p_memsz: 0x%" PRIx64,
- n1, (uint64_t) phdr.p_memsz);
- vm_vaddr_t seg_vstart = align_down(phdr.p_vaddr, vm->page_size);
- vm_vaddr_t seg_vend = phdr.p_vaddr + phdr.p_memsz - 1;
+ n1, (u64)phdr.p_memsz);
+ gva_t seg_vstart = align_down(phdr.p_vaddr, vm->page_size);
+ gva_t seg_vend = phdr.p_vaddr + phdr.p_memsz - 1;
seg_vend |= vm->page_size - 1;
size_t seg_size = seg_vend - seg_vstart + 1;
- vm_vaddr_t vaddr = __vm_vaddr_alloc(vm, seg_size, seg_vstart,
- MEM_REGION_CODE);
- TEST_ASSERT(vaddr == seg_vstart, "Unable to allocate "
+ gva_t gva = __vm_alloc(vm, seg_size, seg_vstart, MEM_REGION_CODE);
+ TEST_ASSERT(gva == seg_vstart, "Unable to allocate "
"virtual memory for segment at requested min addr,\n"
" segment idx: %u\n"
" seg_vstart: 0x%lx\n"
- " vaddr: 0x%lx",
- n1, seg_vstart, vaddr);
- memset(addr_gva2hva(vm, vaddr), 0, seg_size);
+ " gva: 0x%lx",
+ n1, seg_vstart, gva);
+ memset(addr_gva2hva(vm, gva), 0, seg_size);
/* TODO(lhuemill): Set permissions of each memory segment
* based on the least-significant 3 bits of phdr.p_flags.
*/
diff --git a/tools/testing/selftests/kvm/lib/guest_modes.c b/tools/testing/selftests/kvm/lib/guest_modes.c
index b04901e55138..7a96c43b5704 100644
--- a/tools/testing/selftests/kvm/lib/guest_modes.c
+++ b/tools/testing/selftests/kvm/lib/guest_modes.c
@@ -4,7 +4,7 @@
*/
#include "guest_modes.h"
-#ifdef __aarch64__
+#if defined(__aarch64__) || defined(__riscv)
#include "processor.h"
enum vm_guest_mode vm_mode_default;
#endif
@@ -13,12 +13,14 @@ struct guest_mode guest_modes[NUM_VM_MODES];
void guest_modes_append_default(void)
{
-#ifndef __aarch64__
+#if !defined(__aarch64__) && !defined(__riscv)
guest_mode_append(VM_MODE_DEFAULT, true);
-#else
+#endif
+
+#ifdef __aarch64__
{
unsigned int limit = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE);
- uint32_t ipa4k, ipa16k, ipa64k;
+ u32 ipa4k, ipa16k, ipa64k;
int i;
aarch64_get_supported_page_sizes(limit, &ipa4k, &ipa16k, &ipa64k);
@@ -74,11 +76,36 @@ void guest_modes_append_default(void)
#ifdef __riscv
{
unsigned int sz = kvm_check_cap(KVM_CAP_VM_GPA_BITS);
+ unsigned long satp_mode = riscv64_get_satp_mode() << SATP_MODE_SHIFT;
+ int i;
- if (sz >= 52)
- guest_mode_append(VM_MODE_P52V48_4K, true);
- if (sz >= 48)
- guest_mode_append(VM_MODE_P48V48_4K, true);
+ switch (sz) {
+ case 59:
+ guest_mode_append(VM_MODE_P56V57_4K, satp_mode >= SATP_MODE_57);
+ guest_mode_append(VM_MODE_P56V48_4K, satp_mode >= SATP_MODE_48);
+ guest_mode_append(VM_MODE_P56V39_4K, satp_mode >= SATP_MODE_39);
+ break;
+ case 50:
+ guest_mode_append(VM_MODE_P50V57_4K, satp_mode >= SATP_MODE_57);
+ guest_mode_append(VM_MODE_P50V48_4K, satp_mode >= SATP_MODE_48);
+ guest_mode_append(VM_MODE_P50V39_4K, satp_mode >= SATP_MODE_39);
+ break;
+ case 41:
+ guest_mode_append(VM_MODE_P41V57_4K, satp_mode >= SATP_MODE_57);
+ guest_mode_append(VM_MODE_P41V48_4K, satp_mode >= SATP_MODE_48);
+ guest_mode_append(VM_MODE_P41V39_4K, satp_mode >= SATP_MODE_39);
+ break;
+ default:
+ break;
+ }
+
+ /* set the first supported mode as default */
+ vm_mode_default = NUM_VM_MODES;
+ for (i = 0; vm_mode_default == NUM_VM_MODES && i < NUM_VM_MODES; i++) {
+ if (guest_modes[i].supported && guest_modes[i].enabled)
+ vm_mode_default = i;
+ }
+ TEST_ASSERT(vm_mode_default != NUM_VM_MODES, "No supported mode!");
}
#endif
}
diff --git a/tools/testing/selftests/kvm/lib/guest_sprintf.c b/tools/testing/selftests/kvm/lib/guest_sprintf.c
index 74627514c4d4..7a33965349a7 100644
--- a/tools/testing/selftests/kvm/lib/guest_sprintf.c
+++ b/tools/testing/selftests/kvm/lib/guest_sprintf.c
@@ -35,8 +35,8 @@ static int skip_atoi(const char **s)
({ \
int __res; \
\
- __res = ((uint64_t) n) % (uint32_t) base; \
- n = ((uint64_t) n) / (uint32_t) base; \
+ __res = ((u64)n) % (u32)base; \
+ n = ((u64)n) / (u32)base; \
__res; \
})
@@ -119,7 +119,7 @@ int guest_vsnprintf(char *buf, int n, const char *fmt, va_list args)
{
char *str, *end;
const char *s;
- uint64_t num;
+ u64 num;
int i, base;
int len;
@@ -216,7 +216,7 @@ repeat:
while (--field_width > 0)
APPEND_BUFFER_SAFE(str, end, ' ');
APPEND_BUFFER_SAFE(str, end,
- (uint8_t)va_arg(args, int));
+ (u8)va_arg(args, int));
while (--field_width > 0)
APPEND_BUFFER_SAFE(str, end, ' ');
continue;
@@ -240,7 +240,7 @@ repeat:
flags |= SPECIAL | SMALL | ZEROPAD;
}
str = number(str, end,
- (uint64_t)va_arg(args, void *), 16,
+ (u64)va_arg(args, void *), 16,
field_width, precision, flags);
continue;
@@ -284,15 +284,15 @@ repeat:
continue;
}
if (qualifier == 'l')
- num = va_arg(args, uint64_t);
+ num = va_arg(args, u64);
else if (qualifier == 'h') {
- num = (uint16_t)va_arg(args, int);
+ num = (u16)va_arg(args, int);
if (flags & SIGN)
- num = (int16_t)num;
+ num = (s16)num;
} else if (flags & SIGN)
num = va_arg(args, int);
else
- num = va_arg(args, uint32_t);
+ num = va_arg(args, u32);
str = number(str, end, num, base, field_width, precision, flags);
}
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 33fefeb3ca44..e08967ef7b7b 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -5,13 +5,14 @@
* Copyright (C) 2018, Google LLC.
*/
#include "test_util.h"
+#include "kvm_syscalls.h"
#include "kvm_util.h"
#include "processor.h"
#include "ucall_common.h"
#include <assert.h>
#include <sched.h>
-#include <sys/mman.h>
+#include <sys/resource.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
@@ -19,21 +20,33 @@
#define KVM_UTIL_MIN_PFN 2
-uint32_t guest_random_seed;
+u32 guest_random_seed;
struct guest_random_state guest_rng;
-static uint32_t last_guest_seed;
+static u32 last_guest_seed;
-static int vcpu_mmap_sz(void);
+static size_t vcpu_mmap_sz(void);
-int open_path_or_exit(const char *path, int flags)
+int __open_path_or_exit(const char *path, int flags, const char *enoent_help)
{
int fd;
fd = open(path, flags);
- __TEST_REQUIRE(fd >= 0 || errno != ENOENT, "Cannot open %s: %s", path, strerror(errno));
- TEST_ASSERT(fd >= 0, "Failed to open '%s'", path);
+ if (fd < 0)
+ goto error;
return fd;
+
+error:
+ if (errno == EACCES || errno == ENOENT)
+ ksft_exit_skip("- Cannot open '%s': %s. %s\n",
+ path, strerror(errno),
+ errno == EACCES ? "Root required?" : enoent_help);
+ TEST_FAIL("Failed to open '%s'", path);
+}
+
+int open_path_or_exit(const char *path, int flags)
+{
+ return __open_path_or_exit(path, flags, "");
}
/*
@@ -47,7 +60,7 @@ int open_path_or_exit(const char *path, int flags)
*/
static int _open_kvm_dev_path_or_exit(int flags)
{
- return open_path_or_exit(KVM_DEV_PATH, flags);
+ return __open_path_or_exit(KVM_DEV_PATH, flags, "Is KVM loaded and enabled?");
}
int open_kvm_dev_path_or_exit(void)
@@ -63,6 +76,9 @@ static ssize_t get_module_param(const char *module_name, const char *param,
ssize_t bytes_read;
int fd, r;
+ /* Verify KVM is loaded, to provide a more helpful SKIP message. */
+ close(open_kvm_dev_path_or_exit());
+
r = snprintf(path, path_size, "/sys/module/%s/parameters/%s",
module_name, param);
TEST_ASSERT(r < path_size,
@@ -79,7 +95,7 @@ static ssize_t get_module_param(const char *module_name, const char *param,
return bytes_read;
}
-static int get_module_param_integer(const char *module_name, const char *param)
+int kvm_get_module_param_integer(const char *module_name, const char *param)
{
/*
* 16 bytes to hold a 64-bit value (1 byte per char), 1 byte for the
@@ -103,7 +119,7 @@ static int get_module_param_integer(const char *module_name, const char *param)
return atoi_paranoid(value);
}
-static bool get_module_param_bool(const char *module_name, const char *param)
+bool kvm_get_module_param_bool(const char *module_name, const char *param)
{
char value;
ssize_t r;
@@ -119,36 +135,6 @@ static bool get_module_param_bool(const char *module_name, const char *param)
TEST_FAIL("Unrecognized value '%c' for boolean module param", value);
}
-bool get_kvm_param_bool(const char *param)
-{
- return get_module_param_bool("kvm", param);
-}
-
-bool get_kvm_intel_param_bool(const char *param)
-{
- return get_module_param_bool("kvm_intel", param);
-}
-
-bool get_kvm_amd_param_bool(const char *param)
-{
- return get_module_param_bool("kvm_amd", param);
-}
-
-int get_kvm_param_integer(const char *param)
-{
- return get_module_param_integer("kvm", param);
-}
-
-int get_kvm_intel_param_integer(const char *param)
-{
- return get_module_param_integer("kvm_intel", param);
-}
-
-int get_kvm_amd_param_integer(const char *param)
-{
- return get_module_param_integer("kvm_amd", param);
-}
-
/*
* Capability
*
@@ -179,7 +165,7 @@ unsigned int kvm_check_cap(long cap)
return (unsigned int)ret;
}
-void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size)
+void vm_enable_dirty_ring(struct kvm_vm *vm, u32 ring_size)
{
if (vm_check_cap(vm, KVM_CAP_DIRTY_LOG_RING_ACQ_REL))
vm_enable_cap(vm, KVM_CAP_DIRTY_LOG_RING_ACQ_REL, ring_size);
@@ -196,9 +182,14 @@ static void vm_open(struct kvm_vm *vm)
vm->fd = __kvm_ioctl(vm->kvm_fd, KVM_CREATE_VM, (void *)vm->type);
TEST_ASSERT(vm->fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VM, vm->fd));
+
+ if (kvm_has_cap(KVM_CAP_BINARY_STATS_FD))
+ vm->stats.fd = vm_get_stats_fd(vm);
+ else
+ vm->stats.fd = -1;
}
-const char *vm_guest_mode_string(uint32_t i)
+const char *vm_guest_mode_string(u32 i)
{
static const char * const strings[] = {
[VM_MODE_P52V48_4K] = "PA-bits:52, VA-bits:48, 4K pages",
@@ -210,13 +201,23 @@ const char *vm_guest_mode_string(uint32_t i)
[VM_MODE_P40V48_4K] = "PA-bits:40, VA-bits:48, 4K pages",
[VM_MODE_P40V48_16K] = "PA-bits:40, VA-bits:48, 16K pages",
[VM_MODE_P40V48_64K] = "PA-bits:40, VA-bits:48, 64K pages",
- [VM_MODE_PXXV48_4K] = "PA-bits:ANY, VA-bits:48, 4K pages",
+ [VM_MODE_PXXVYY_4K] = "PA-bits:ANY, VA-bits:48 or 57, 4K pages",
[VM_MODE_P47V64_4K] = "PA-bits:47, VA-bits:64, 4K pages",
[VM_MODE_P44V64_4K] = "PA-bits:44, VA-bits:64, 4K pages",
[VM_MODE_P36V48_4K] = "PA-bits:36, VA-bits:48, 4K pages",
[VM_MODE_P36V48_16K] = "PA-bits:36, VA-bits:48, 16K pages",
[VM_MODE_P36V48_64K] = "PA-bits:36, VA-bits:48, 64K pages",
+ [VM_MODE_P47V47_16K] = "PA-bits:47, VA-bits:47, 16K pages",
[VM_MODE_P36V47_16K] = "PA-bits:36, VA-bits:47, 16K pages",
+ [VM_MODE_P56V57_4K] = "PA-bits:56, VA-bits:57, 4K pages",
+ [VM_MODE_P56V48_4K] = "PA-bits:56, VA-bits:48, 4K pages",
+ [VM_MODE_P56V39_4K] = "PA-bits:56, VA-bits:39, 4K pages",
+ [VM_MODE_P50V57_4K] = "PA-bits:50, VA-bits:57, 4K pages",
+ [VM_MODE_P50V48_4K] = "PA-bits:50, VA-bits:48, 4K pages",
+ [VM_MODE_P50V39_4K] = "PA-bits:50, VA-bits:39, 4K pages",
+ [VM_MODE_P41V57_4K] = "PA-bits:41, VA-bits:57, 4K pages",
+ [VM_MODE_P41V48_4K] = "PA-bits:41, VA-bits:48, 4K pages",
+ [VM_MODE_P41V39_4K] = "PA-bits:41, VA-bits:39, 4K pages",
};
_Static_assert(sizeof(strings)/sizeof(char *) == NUM_VM_MODES,
"Missing new mode strings?");
@@ -236,13 +237,23 @@ const struct vm_guest_mode_params vm_guest_mode_params[] = {
[VM_MODE_P40V48_4K] = { 40, 48, 0x1000, 12 },
[VM_MODE_P40V48_16K] = { 40, 48, 0x4000, 14 },
[VM_MODE_P40V48_64K] = { 40, 48, 0x10000, 16 },
- [VM_MODE_PXXV48_4K] = { 0, 0, 0x1000, 12 },
+ [VM_MODE_PXXVYY_4K] = { 0, 0, 0x1000, 12 },
[VM_MODE_P47V64_4K] = { 47, 64, 0x1000, 12 },
[VM_MODE_P44V64_4K] = { 44, 64, 0x1000, 12 },
[VM_MODE_P36V48_4K] = { 36, 48, 0x1000, 12 },
[VM_MODE_P36V48_16K] = { 36, 48, 0x4000, 14 },
[VM_MODE_P36V48_64K] = { 36, 48, 0x10000, 16 },
+ [VM_MODE_P47V47_16K] = { 47, 47, 0x4000, 14 },
[VM_MODE_P36V47_16K] = { 36, 47, 0x4000, 14 },
+ [VM_MODE_P56V57_4K] = { 56, 57, 0x1000, 12 },
+ [VM_MODE_P56V48_4K] = { 56, 48, 0x1000, 12 },
+ [VM_MODE_P56V39_4K] = { 56, 39, 0x1000, 12 },
+ [VM_MODE_P50V57_4K] = { 50, 57, 0x1000, 12 },
+ [VM_MODE_P50V48_4K] = { 50, 48, 0x1000, 12 },
+ [VM_MODE_P50V39_4K] = { 50, 39, 0x1000, 12 },
+ [VM_MODE_P41V57_4K] = { 41, 57, 0x1000, 12 },
+ [VM_MODE_P41V48_4K] = { 41, 48, 0x1000, 12 },
+ [VM_MODE_P41V39_4K] = { 41, 39, 0x1000, 12 },
};
_Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES,
"Missing new mode params?");
@@ -256,7 +267,7 @@ _Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params)
* based on the MSB of the VA. On architectures with this behavior
* the VA region spans [0, 2^(va_bits - 1)), [-(2^(va_bits - 1), -1].
*/
-__weak void vm_vaddr_populate_bitmap(struct kvm_vm *vm)
+__weak void vm_populate_gva_bitmap(struct kvm_vm *vm)
{
sparsebit_set_num(vm->vpages_valid,
0, (1ULL << (vm->va_bits - 1)) >> vm->page_shift);
@@ -288,59 +299,77 @@ struct kvm_vm *____vm_create(struct vm_shape shape)
/* Setup mode specific traits. */
switch (vm->mode) {
case VM_MODE_P52V48_4K:
- vm->pgtable_levels = 4;
+ vm->mmu.pgtable_levels = 4;
break;
case VM_MODE_P52V48_64K:
- vm->pgtable_levels = 3;
+ vm->mmu.pgtable_levels = 3;
break;
case VM_MODE_P48V48_4K:
- vm->pgtable_levels = 4;
+ vm->mmu.pgtable_levels = 4;
break;
case VM_MODE_P48V48_64K:
- vm->pgtable_levels = 3;
+ vm->mmu.pgtable_levels = 3;
break;
case VM_MODE_P40V48_4K:
case VM_MODE_P36V48_4K:
- vm->pgtable_levels = 4;
+ vm->mmu.pgtable_levels = 4;
break;
case VM_MODE_P40V48_64K:
case VM_MODE_P36V48_64K:
- vm->pgtable_levels = 3;
+ vm->mmu.pgtable_levels = 3;
break;
case VM_MODE_P52V48_16K:
case VM_MODE_P48V48_16K:
case VM_MODE_P40V48_16K:
case VM_MODE_P36V48_16K:
- vm->pgtable_levels = 4;
+ vm->mmu.pgtable_levels = 4;
break;
+ case VM_MODE_P47V47_16K:
case VM_MODE_P36V47_16K:
- vm->pgtable_levels = 3;
+ vm->mmu.pgtable_levels = 3;
break;
- case VM_MODE_PXXV48_4K:
+ case VM_MODE_PXXVYY_4K:
#ifdef __x86_64__
kvm_get_cpu_address_width(&vm->pa_bits, &vm->va_bits);
kvm_init_vm_address_properties(vm);
- /*
- * Ignore KVM support for 5-level paging (vm->va_bits == 57),
- * it doesn't take effect unless a CR4.LA57 is set, which it
- * isn't for this mode (48-bit virtual address space).
- */
- TEST_ASSERT(vm->va_bits == 48 || vm->va_bits == 57,
- "Linear address width (%d bits) not supported",
- vm->va_bits);
+
pr_debug("Guest physical address width detected: %d\n",
vm->pa_bits);
- vm->pgtable_levels = 4;
- vm->va_bits = 48;
+ pr_debug("Guest virtual address width detected: %d\n",
+ vm->va_bits);
+
+ if (vm->va_bits == 57) {
+ vm->mmu.pgtable_levels = 5;
+ } else {
+ TEST_ASSERT(vm->va_bits == 48,
+ "Unexpected guest virtual address width: %d",
+ vm->va_bits);
+ vm->mmu.pgtable_levels = 4;
+ }
#else
- TEST_FAIL("VM_MODE_PXXV48_4K not supported on non-x86 platforms");
+ TEST_FAIL("VM_MODE_PXXVYY_4K not supported on non-x86 platforms");
#endif
break;
case VM_MODE_P47V64_4K:
- vm->pgtable_levels = 5;
+ vm->mmu.pgtable_levels = 5;
break;
case VM_MODE_P44V64_4K:
- vm->pgtable_levels = 5;
+ vm->mmu.pgtable_levels = 5;
+ break;
+ case VM_MODE_P56V57_4K:
+ case VM_MODE_P50V57_4K:
+ case VM_MODE_P41V57_4K:
+ vm->mmu.pgtable_levels = 5;
+ break;
+ case VM_MODE_P56V48_4K:
+ case VM_MODE_P50V48_4K:
+ case VM_MODE_P41V48_4K:
+ vm->mmu.pgtable_levels = 4;
+ break;
+ case VM_MODE_P56V39_4K:
+ case VM_MODE_P50V39_4K:
+ case VM_MODE_P41V39_4K:
+ vm->mmu.pgtable_levels = 3;
break;
default:
TEST_FAIL("Unknown guest mode: 0x%x", vm->mode);
@@ -356,7 +385,7 @@ struct kvm_vm *____vm_create(struct vm_shape shape)
/* Limit to VA-bit canonical virtual addresses. */
vm->vpages_valid = sparsebit_alloc();
- vm_vaddr_populate_bitmap(vm);
+ vm_populate_gva_bitmap(vm);
/* Limit physical addresses to PA-bits. */
vm->max_gfn = vm_compute_max_gfn(vm);
@@ -367,12 +396,12 @@ struct kvm_vm *____vm_create(struct vm_shape shape)
return vm;
}
-static uint64_t vm_nr_pages_required(enum vm_guest_mode mode,
- uint32_t nr_runnable_vcpus,
- uint64_t extra_mem_pages)
+static u64 vm_nr_pages_required(enum vm_guest_mode mode,
+ u32 nr_runnable_vcpus,
+ u64 extra_mem_pages)
{
- uint64_t page_size = vm_guest_mode_params[mode].page_size;
- uint64_t nr_pages;
+ u64 page_size = vm_guest_mode_params[mode].page_size;
+ u64 nr_pages;
TEST_ASSERT(nr_runnable_vcpus,
"Use vm_create_barebones() for VMs that _never_ have vCPUs");
@@ -406,21 +435,72 @@ static uint64_t vm_nr_pages_required(enum vm_guest_mode mode,
return vm_adjust_num_guest_pages(mode, nr_pages);
}
-struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus,
- uint64_t nr_extra_pages)
+void kvm_set_files_rlimit(u32 nr_vcpus)
+{
+ /*
+ * Each vCPU will open two file descriptors: the vCPU itself and the
+ * vCPU's binary stats file descriptor. Add an arbitrary amount of
+ * buffer for all other files a test may open.
+ */
+ int nr_fds_wanted = nr_vcpus * 2 + 100;
+ struct rlimit rl;
+
+ /*
+ * Check that we're allowed to open nr_fds_wanted file descriptors and
+ * try raising the limits if needed.
+ */
+ TEST_ASSERT(!getrlimit(RLIMIT_NOFILE, &rl), "getrlimit() failed!");
+
+ if (rl.rlim_cur < nr_fds_wanted) {
+ rl.rlim_cur = nr_fds_wanted;
+ if (rl.rlim_max < nr_fds_wanted) {
+ int old_rlim_max = rl.rlim_max;
+
+ rl.rlim_max = nr_fds_wanted;
+ __TEST_REQUIRE(setrlimit(RLIMIT_NOFILE, &rl) >= 0,
+ "RLIMIT_NOFILE hard limit is too low (%d, wanted %d)",
+ old_rlim_max, nr_fds_wanted);
+ } else {
+ TEST_ASSERT(!setrlimit(RLIMIT_NOFILE, &rl), "setrlimit() failed!");
+ }
+ }
+
+}
+
+static bool is_guest_memfd_required(struct vm_shape shape)
+{
+#ifdef __x86_64__
+ return shape.type == KVM_X86_SNP_VM;
+#else
+ return false;
+#endif
+}
+
+struct kvm_vm *__vm_create(struct vm_shape shape, u32 nr_runnable_vcpus,
+ u64 nr_extra_pages)
{
- uint64_t nr_pages = vm_nr_pages_required(shape.mode, nr_runnable_vcpus,
+ u64 nr_pages = vm_nr_pages_required(shape.mode, nr_runnable_vcpus,
nr_extra_pages);
struct userspace_mem_region *slot0;
struct kvm_vm *vm;
- int i;
+ int i, flags;
+
+ kvm_set_files_rlimit(nr_runnable_vcpus);
pr_debug("%s: mode='%s' type='%d', pages='%ld'\n", __func__,
vm_guest_mode_string(shape.mode), shape.type, nr_pages);
vm = ____vm_create(shape);
- vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0, 0, nr_pages, 0);
+ /*
+ * Force GUEST_MEMFD for the primary memory region if necessary, e.g.
+ * for CoCo VMs that require GUEST_MEMFD backed private memory.
+ */
+ flags = 0;
+ if (is_guest_memfd_required(shape))
+ flags |= KVM_MEM_GUEST_MEMFD;
+
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0, 0, nr_pages, flags);
for (i = 0; i < NR_MEM_REGIONS; i++)
vm->memslots[i] = 0;
@@ -442,7 +522,7 @@ struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus,
guest_rng = new_guest_random_state(guest_random_seed);
sync_global_to_guest(vm, guest_rng);
- kvm_arch_vm_post_create(vm);
+ kvm_arch_vm_post_create(vm, nr_runnable_vcpus);
return vm;
}
@@ -466,8 +546,8 @@ struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus,
* extra_mem_pages is only used to calculate the maximum page table size,
* no real memory allocation for non-slot0 memory in this function.
*/
-struct kvm_vm *__vm_create_with_vcpus(struct vm_shape shape, uint32_t nr_vcpus,
- uint64_t extra_mem_pages,
+struct kvm_vm *__vm_create_with_vcpus(struct vm_shape shape, u32 nr_vcpus,
+ u64 extra_mem_pages,
void *guest_code, struct kvm_vcpu *vcpus[])
{
struct kvm_vm *vm;
@@ -480,12 +560,13 @@ struct kvm_vm *__vm_create_with_vcpus(struct vm_shape shape, uint32_t nr_vcpus,
for (i = 0; i < nr_vcpus; ++i)
vcpus[i] = vm_vcpu_add(vm, i, guest_code);
+ kvm_arch_vm_finalize_vcpus(vm);
return vm;
}
struct kvm_vm *__vm_create_shape_with_one_vcpu(struct vm_shape shape,
struct kvm_vcpu **vcpu,
- uint64_t extra_mem_pages,
+ u64 extra_mem_pages,
void *guest_code)
{
struct kvm_vcpu *vcpus[1];
@@ -533,7 +614,7 @@ void kvm_vm_restart(struct kvm_vm *vmp)
}
__weak struct kvm_vcpu *vm_arch_vcpu_recreate(struct kvm_vm *vm,
- uint32_t vcpu_id)
+ u32 vcpu_id)
{
return __vm_vcpu_add(vm, vcpu_id);
}
@@ -545,20 +626,19 @@ struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm)
return vm_vcpu_recreate(vm, 0);
}
-void kvm_pin_this_task_to_pcpu(uint32_t pcpu)
+int __pin_task_to_cpu(pthread_t task, int cpu)
{
- cpu_set_t mask;
- int r;
+ cpu_set_t cpuset;
- CPU_ZERO(&mask);
- CPU_SET(pcpu, &mask);
- r = sched_setaffinity(0, sizeof(mask), &mask);
- TEST_ASSERT(!r, "sched_setaffinity() failed for pCPU '%u'.", pcpu);
+ CPU_ZERO(&cpuset);
+ CPU_SET(cpu, &cpuset);
+
+ return pthread_setaffinity_np(task, sizeof(cpuset), &cpuset);
}
-static uint32_t parse_pcpu(const char *cpu_str, const cpu_set_t *allowed_mask)
+static u32 parse_pcpu(const char *cpu_str, const cpu_set_t *allowed_mask)
{
- uint32_t pcpu = atoi_non_negative("CPU number", cpu_str);
+ u32 pcpu = atoi_non_negative("CPU number", cpu_str);
TEST_ASSERT(CPU_ISSET(pcpu, allowed_mask),
"Not allowed to run on pCPU '%d', check cgroups?", pcpu);
@@ -582,7 +662,7 @@ void kvm_print_vcpu_pinning_help(void)
" (default: no pinning)\n", name, name);
}
-void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[],
+void kvm_parse_vcpu_pinning(const char *pcpus_string, u32 vcpu_to_pcpu[],
int nr_vcpus)
{
cpu_set_t allowed_mask;
@@ -607,7 +687,7 @@ void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[],
/* 2. Check if the main worker needs to be pinned. */
if (cpu) {
- kvm_pin_this_task_to_pcpu(parse_pcpu(cpu, &allowed_mask));
+ pin_self_to_cpu(parse_pcpu(cpu, &allowed_mask));
cpu = strtok(NULL, delim);
}
@@ -635,15 +715,15 @@ void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[],
* region exists.
*/
static struct userspace_mem_region *
-userspace_mem_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end)
+userspace_mem_region_find(struct kvm_vm *vm, u64 start, u64 end)
{
struct rb_node *node;
for (node = vm->regions.gpa_tree.rb_node; node; ) {
struct userspace_mem_region *region =
container_of(node, struct userspace_mem_region, gpa_node);
- uint64_t existing_start = region->region.guest_phys_addr;
- uint64_t existing_end = region->region.guest_phys_addr
+ u64 existing_start = region->region.guest_phys_addr;
+ u64 existing_end = region->region.guest_phys_addr
+ region->region.memory_size - 1;
if (start <= existing_end && end >= existing_start)
return region;
@@ -657,6 +737,20 @@ userspace_mem_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end)
return NULL;
}
+static void kvm_stats_release(struct kvm_binary_stats *stats)
+{
+ if (stats->fd < 0)
+ return;
+
+ if (stats->desc) {
+ free(stats->desc);
+ stats->desc = NULL;
+ }
+
+ kvm_close(stats->fd);
+ stats->fd = -1;
+}
+
__weak void vcpu_arch_free(struct kvm_vcpu *vcpu)
{
@@ -676,19 +770,15 @@ __weak void vcpu_arch_free(struct kvm_vcpu *vcpu)
*/
static void vm_vcpu_rm(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
{
- int ret;
-
if (vcpu->dirty_gfns) {
- ret = munmap(vcpu->dirty_gfns, vm->dirty_ring_size);
- TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret));
+ kvm_munmap(vcpu->dirty_gfns, vm->dirty_ring_size);
vcpu->dirty_gfns = NULL;
}
- ret = munmap(vcpu->run, vcpu_mmap_sz());
- TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret));
+ kvm_munmap(vcpu->run, vcpu_mmap_sz());
- ret = close(vcpu->fd);
- TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret));
+ kvm_close(vcpu->fd);
+ kvm_stats_release(&vcpu->stats);
list_del(&vcpu->list);
@@ -699,35 +789,32 @@ static void vm_vcpu_rm(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
void kvm_vm_release(struct kvm_vm *vmp)
{
struct kvm_vcpu *vcpu, *tmp;
- int ret;
list_for_each_entry_safe(vcpu, tmp, &vmp->vcpus, list)
vm_vcpu_rm(vmp, vcpu);
- ret = close(vmp->fd);
- TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret));
+ kvm_close(vmp->fd);
+ kvm_close(vmp->kvm_fd);
- ret = close(vmp->kvm_fd);
- TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret));
+ /* Free cached stats metadata and close FD */
+ kvm_stats_release(&vmp->stats);
+
+ kvm_arch_vm_release(vmp);
}
static void __vm_mem_region_delete(struct kvm_vm *vm,
struct userspace_mem_region *region)
{
- int ret;
-
rb_erase(&region->gpa_node, &vm->regions.gpa_tree);
rb_erase(&region->hva_node, &vm->regions.hva_tree);
hash_del(&region->slot_node);
sparsebit_free(&region->unused_phy_pages);
sparsebit_free(&region->protected_phy_pages);
- ret = munmap(region->mmap_start, region->mmap_size);
- TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret));
+ kvm_munmap(region->mmap_start, region->mmap_size);
if (region->fd >= 0) {
/* There's an extra map when using shared memory. */
- ret = munmap(region->mmap_alias, region->mmap_size);
- TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret));
+ kvm_munmap(region->mmap_alias, region->mmap_size);
close(region->fd);
}
if (region->region.guest_memfd >= 0)
@@ -748,12 +835,6 @@ void kvm_vm_free(struct kvm_vm *vmp)
if (vmp == NULL)
return;
- /* Free cached stats metadata and close FD */
- if (vmp->stats_fd) {
- free(vmp->stats_desc);
- close(vmp->stats_fd);
- }
-
/* Free userspace_mem_regions. */
hash_for_each_safe(vmp->regions.slot_hash, ctr, node, region, slot_node)
__vm_mem_region_delete(vmp, region);
@@ -771,7 +852,7 @@ void kvm_vm_free(struct kvm_vm *vmp)
int kvm_memfd_alloc(size_t size, bool hugepages)
{
int memfd_flags = MFD_CLOEXEC;
- int fd, r;
+ int fd;
if (hugepages)
memfd_flags |= MFD_HUGETLB;
@@ -779,11 +860,8 @@ int kvm_memfd_alloc(size_t size, bool hugepages)
fd = memfd_create("kvm_selftest", memfd_flags);
TEST_ASSERT(fd != -1, __KVM_SYSCALL_ERROR("memfd_create()", fd));
- r = ftruncate(fd, size);
- TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("ftruncate()", r));
-
- r = fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, size);
- TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r));
+ kvm_ftruncate(fd, size);
+ kvm_fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, size);
return fd;
}
@@ -840,8 +918,8 @@ static void vm_userspace_mem_region_hva_insert(struct rb_root *hva_tree,
}
-int __vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags,
- uint64_t gpa, uint64_t size, void *hva)
+int __vm_set_user_memory_region(struct kvm_vm *vm, u32 slot, u32 flags,
+ gpa_t gpa, u64 size, void *hva)
{
struct kvm_userspace_memory_region region = {
.slot = slot,
@@ -854,8 +932,8 @@ int __vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags
return ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region);
}
-void vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags,
- uint64_t gpa, uint64_t size, void *hva)
+void vm_set_user_memory_region(struct kvm_vm *vm, u32 slot, u32 flags,
+ gpa_t gpa, u64 size, void *hva)
{
int ret = __vm_set_user_memory_region(vm, slot, flags, gpa, size, hva);
@@ -867,9 +945,9 @@ void vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags,
__TEST_REQUIRE(kvm_has_cap(KVM_CAP_USER_MEMORY2), \
"KVM selftests now require KVM_SET_USER_MEMORY_REGION2 (introduced in v6.8)")
-int __vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags,
- uint64_t gpa, uint64_t size, void *hva,
- uint32_t guest_memfd, uint64_t guest_memfd_offset)
+int __vm_set_user_memory_region2(struct kvm_vm *vm, u32 slot, u32 flags,
+ gpa_t gpa, u64 size, void *hva,
+ u32 guest_memfd, u64 guest_memfd_offset)
{
struct kvm_userspace_memory_region2 region = {
.slot = slot,
@@ -886,9 +964,9 @@ int __vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flag
return ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION2, &region);
}
-void vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags,
- uint64_t gpa, uint64_t size, void *hva,
- uint32_t guest_memfd, uint64_t guest_memfd_offset)
+void vm_set_user_memory_region2(struct kvm_vm *vm, u32 slot, u32 flags,
+ gpa_t gpa, u64 size, void *hva,
+ u32 guest_memfd, u64 guest_memfd_offset)
{
int ret = __vm_set_user_memory_region2(vm, slot, flags, gpa, size, hva,
guest_memfd, guest_memfd_offset);
@@ -900,14 +978,14 @@ void vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags
/* FIXME: This thing needs to be ripped apart and rewritten. */
void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
- uint64_t guest_paddr, uint32_t slot, uint64_t npages,
- uint32_t flags, int guest_memfd, uint64_t guest_memfd_offset)
+ gpa_t gpa, u32 slot, u64 npages, u32 flags,
+ int guest_memfd, u64 guest_memfd_offset)
{
int ret;
struct userspace_mem_region *region;
size_t backing_src_pagesz = get_backing_src_pagesz(src_type);
size_t mem_size = npages * vm->page_size;
- size_t alignment;
+ size_t alignment = 1;
TEST_REQUIRE_SET_USER_MEMORY_REGION2();
@@ -915,32 +993,31 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
"Number of guest pages is not compatible with the host. "
"Try npages=%d", vm_adjust_num_guest_pages(vm->mode, npages));
- TEST_ASSERT((guest_paddr % vm->page_size) == 0, "Guest physical "
+ TEST_ASSERT((gpa % vm->page_size) == 0, "Guest physical "
"address not on a page boundary.\n"
- " guest_paddr: 0x%lx vm->page_size: 0x%x",
- guest_paddr, vm->page_size);
- TEST_ASSERT((((guest_paddr >> vm->page_shift) + npages) - 1)
+ " gpa: 0x%lx vm->page_size: 0x%x",
+ gpa, vm->page_size);
+ TEST_ASSERT((((gpa >> vm->page_shift) + npages) - 1)
<= vm->max_gfn, "Physical range beyond maximum "
"supported physical address,\n"
- " guest_paddr: 0x%lx npages: 0x%lx\n"
+ " gpa: 0x%lx npages: 0x%lx\n"
" vm->max_gfn: 0x%lx vm->page_size: 0x%x",
- guest_paddr, npages, vm->max_gfn, vm->page_size);
+ gpa, npages, vm->max_gfn, vm->page_size);
/*
* Confirm a mem region with an overlapping address doesn't
* already exist.
*/
region = (struct userspace_mem_region *) userspace_mem_region_find(
- vm, guest_paddr, (guest_paddr + npages * vm->page_size) - 1);
+ vm, gpa, (gpa + npages * vm->page_size) - 1);
if (region != NULL)
TEST_FAIL("overlapping userspace_mem_region already "
"exists\n"
- " requested guest_paddr: 0x%lx npages: 0x%lx "
- "page_size: 0x%x\n"
- " existing guest_paddr: 0x%lx size: 0x%lx",
- guest_paddr, npages, vm->page_size,
- (uint64_t) region->region.guest_phys_addr,
- (uint64_t) region->region.memory_size);
+ " requested gpa: 0x%lx npages: 0x%lx page_size: 0x%x\n"
+ " existing gpa: 0x%lx size: 0x%lx",
+ gpa, npages, vm->page_size,
+ (u64)region->region.guest_phys_addr,
+ (u64)region->region.memory_size);
/* Confirm no region with the requested slot already exists. */
hash_for_each_possible(vm->regions.slot_hash, region, slot_node,
@@ -950,12 +1027,11 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
TEST_FAIL("A mem region with the requested slot "
"already exists.\n"
- " requested slot: %u paddr: 0x%lx npages: 0x%lx\n"
- " existing slot: %u paddr: 0x%lx size: 0x%lx",
- slot, guest_paddr, npages,
- region->region.slot,
- (uint64_t) region->region.guest_phys_addr,
- (uint64_t) region->region.memory_size);
+ " requested slot: %u gpa: 0x%lx npages: 0x%lx\n"
+ " existing slot: %u gpa: 0x%lx size: 0x%lx",
+ slot, gpa, npages, region->region.slot,
+ (u64)region->region.guest_phys_addr,
+ (u64)region->region.memory_size);
}
/* Allocate and initialize new mem region structure. */
@@ -963,13 +1039,6 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
TEST_ASSERT(region != NULL, "Insufficient Memory");
region->mmap_size = mem_size;
-#ifdef __s390x__
- /* On s390x, the host address must be aligned to 1M (due to PGSTEs) */
- alignment = 0x100000;
-#else
- alignment = 1;
-#endif
-
/*
* When using THP mmap is not guaranteed to returned a hugepage aligned
* address so we have to pad the mmap. Padding is not needed for HugeTLB
@@ -979,7 +1048,7 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
if (src_type == VM_MEM_SRC_ANONYMOUS_THP)
alignment = max(backing_src_pagesz, alignment);
- TEST_ASSERT_EQ(guest_paddr, align_up(guest_paddr, backing_src_pagesz));
+ TEST_ASSERT_EQ(gpa, align_up(gpa, backing_src_pagesz));
/* Add enough memory to align up if necessary */
if (alignment > 1)
@@ -990,12 +1059,9 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
region->fd = kvm_memfd_alloc(region->mmap_size,
src_type == VM_MEM_SRC_SHARED_HUGETLB);
- region->mmap_start = mmap(NULL, region->mmap_size,
- PROT_READ | PROT_WRITE,
- vm_mem_backing_src_alias(src_type)->flag,
- region->fd, 0);
- TEST_ASSERT(region->mmap_start != MAP_FAILED,
- __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED));
+ region->mmap_start = kvm_mmap(region->mmap_size, PROT_READ | PROT_WRITE,
+ vm_mem_backing_src_alias(src_type)->flag,
+ region->fd);
TEST_ASSERT(!is_backing_src_hugetlb(src_type) ||
region->mmap_start == align_ptr_up(region->mmap_start, backing_src_pagesz),
@@ -1019,7 +1085,7 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
if (flags & KVM_MEM_GUEST_MEMFD) {
if (guest_memfd < 0) {
- uint32_t guest_memfd_flags = 0;
+ u32 guest_memfd_flags = 0;
TEST_ASSERT(!guest_memfd_offset,
"Offset must be zero when creating new guest_memfd");
guest_memfd = vm_create_guest_memfd(vm, mem_size, guest_memfd_flags);
@@ -1030,8 +1096,7 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
* needing to track if the fd is owned by the framework
* or by the caller.
*/
- guest_memfd = dup(guest_memfd);
- TEST_ASSERT(guest_memfd >= 0, __KVM_SYSCALL_ERROR("dup()", guest_memfd));
+ guest_memfd = kvm_dup(guest_memfd);
}
region->region.guest_memfd = guest_memfd;
@@ -1043,20 +1108,18 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
region->unused_phy_pages = sparsebit_alloc();
if (vm_arch_has_protected_memory(vm))
region->protected_phy_pages = sparsebit_alloc();
- sparsebit_set_num(region->unused_phy_pages,
- guest_paddr >> vm->page_shift, npages);
+ sparsebit_set_num(region->unused_phy_pages, gpa >> vm->page_shift, npages);
region->region.slot = slot;
region->region.flags = flags;
- region->region.guest_phys_addr = guest_paddr;
+ region->region.guest_phys_addr = gpa;
region->region.memory_size = npages * vm->page_size;
region->region.userspace_addr = (uintptr_t) region->host_mem;
ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, &region->region);
TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION2 IOCTL failed,\n"
" rc: %i errno: %i\n"
" slot: %u flags: 0x%x\n"
- " guest_phys_addr: 0x%lx size: 0x%lx guest_memfd: %d",
- ret, errno, slot, flags,
- guest_paddr, (uint64_t) region->region.memory_size,
+ " guest_phys_addr: 0x%lx size: 0x%llx guest_memfd: %d",
+ ret, errno, slot, flags, gpa, region->region.memory_size,
region->region.guest_memfd);
/* Add to quick lookup data structures */
@@ -1066,12 +1129,10 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
/* If shared memory, create an alias. */
if (region->fd >= 0) {
- region->mmap_alias = mmap(NULL, region->mmap_size,
- PROT_READ | PROT_WRITE,
- vm_mem_backing_src_alias(src_type)->flag,
- region->fd, 0);
- TEST_ASSERT(region->mmap_alias != MAP_FAILED,
- __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED));
+ region->mmap_alias = kvm_mmap(region->mmap_size,
+ PROT_READ | PROT_WRITE,
+ vm_mem_backing_src_alias(src_type)->flag,
+ region->fd);
/* Align host alias address */
region->host_alias = align_ptr_up(region->mmap_alias, alignment);
@@ -1080,10 +1141,9 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
void vm_userspace_mem_region_add(struct kvm_vm *vm,
enum vm_mem_backing_src_type src_type,
- uint64_t guest_paddr, uint32_t slot,
- uint64_t npages, uint32_t flags)
+ gpa_t gpa, u32 slot, u64 npages, u32 flags)
{
- vm_mem_add(vm, src_type, guest_paddr, slot, npages, flags, -1, 0);
+ vm_mem_add(vm, src_type, gpa, slot, npages, flags, -1, 0);
}
/*
@@ -1102,7 +1162,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
* memory slot ID).
*/
struct userspace_mem_region *
-memslot2region(struct kvm_vm *vm, uint32_t memslot)
+memslot2region(struct kvm_vm *vm, u32 memslot)
{
struct userspace_mem_region *region;
@@ -1133,7 +1193,7 @@ memslot2region(struct kvm_vm *vm, uint32_t memslot)
* Sets the flags of the memory region specified by the value of slot,
* to the values given by flags.
*/
-void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags)
+void vm_mem_region_set_flags(struct kvm_vm *vm, u32 slot, u32 flags)
{
int ret;
struct userspace_mem_region *region;
@@ -1149,6 +1209,16 @@ void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags)
ret, errno, slot, flags);
}
+void vm_mem_region_reload(struct kvm_vm *vm, u32 slot)
+{
+ struct userspace_mem_region *region = memslot2region(vm, slot);
+ struct kvm_userspace_memory_region2 tmp = region->region;
+
+ tmp.memory_size = 0;
+ vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, &tmp);
+ vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, &region->region);
+}
+
/*
* VM Memory Region Move
*
@@ -1163,7 +1233,7 @@ void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags)
*
* Change the gpa of a memory region.
*/
-void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa)
+void vm_mem_region_move(struct kvm_vm *vm, u32 slot, u64 new_gpa)
{
struct userspace_mem_region *region;
int ret;
@@ -1192,7 +1262,7 @@ void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa)
*
* Delete a memory region.
*/
-void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot)
+void vm_mem_region_delete(struct kvm_vm *vm, u32 slot)
{
struct userspace_mem_region *region = memslot2region(vm, slot);
@@ -1202,18 +1272,18 @@ void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot)
__vm_mem_region_delete(vm, region);
}
-void vm_guest_mem_fallocate(struct kvm_vm *vm, uint64_t base, uint64_t size,
+void vm_guest_mem_fallocate(struct kvm_vm *vm, u64 base, u64 size,
bool punch_hole)
{
const int mode = FALLOC_FL_KEEP_SIZE | (punch_hole ? FALLOC_FL_PUNCH_HOLE : 0);
struct userspace_mem_region *region;
- uint64_t end = base + size;
- uint64_t gpa, len;
+ u64 end = base + size;
+ gpa_t gpa, len;
off_t fd_offset;
int ret;
for (gpa = base; gpa < end; gpa += len) {
- uint64_t offset;
+ u64 offset;
region = userspace_mem_region_find(vm, gpa, gpa);
TEST_ASSERT(region && region->region.flags & KVM_MEM_GUEST_MEMFD,
@@ -1221,7 +1291,7 @@ void vm_guest_mem_fallocate(struct kvm_vm *vm, uint64_t base, uint64_t size,
offset = gpa - region->region.guest_phys_addr;
fd_offset = region->region.guest_memfd_offset + offset;
- len = min_t(uint64_t, end - gpa, region->region.memory_size - offset);
+ len = min_t(u64, end - gpa, region->region.memory_size - offset);
ret = fallocate(region->region.guest_memfd, mode, fd_offset, len);
TEST_ASSERT(!ret, "fallocate() failed to %s at %lx (len = %lu), fd = %d, mode = %x, offset = %lx",
@@ -1231,14 +1301,14 @@ void vm_guest_mem_fallocate(struct kvm_vm *vm, uint64_t base, uint64_t size,
}
/* Returns the size of a vCPU's kvm_run structure. */
-static int vcpu_mmap_sz(void)
+static size_t vcpu_mmap_sz(void)
{
int dev_fd, ret;
dev_fd = open_kvm_dev_path_or_exit();
ret = ioctl(dev_fd, KVM_GET_VCPU_MMAP_SIZE, NULL);
- TEST_ASSERT(ret >= sizeof(struct kvm_run),
+ TEST_ASSERT(ret >= 0 && ret >= sizeof(struct kvm_run),
KVM_IOCTL_ERROR(KVM_GET_VCPU_MMAP_SIZE, ret));
close(dev_fd);
@@ -1246,7 +1316,7 @@ static int vcpu_mmap_sz(void)
return ret;
}
-static bool vcpu_exists(struct kvm_vm *vm, uint32_t vcpu_id)
+static bool vcpu_exists(struct kvm_vm *vm, u32 vcpu_id)
{
struct kvm_vcpu *vcpu;
@@ -1262,7 +1332,7 @@ static bool vcpu_exists(struct kvm_vm *vm, uint32_t vcpu_id)
* Adds a virtual CPU to the VM specified by vm with the ID given by vcpu_id.
* No additional vCPU setup is done. Returns the vCPU.
*/
-struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
+struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, u32 vcpu_id)
{
struct kvm_vcpu *vcpu;
@@ -1279,12 +1349,15 @@ struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
TEST_ASSERT_VM_VCPU_IOCTL(vcpu->fd >= 0, KVM_CREATE_VCPU, vcpu->fd, vm);
TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->run), "vcpu mmap size "
- "smaller than expected, vcpu_mmap_sz: %i expected_min: %zi",
+ "smaller than expected, vcpu_mmap_sz: %zi expected_min: %zi",
vcpu_mmap_sz(), sizeof(*vcpu->run));
- vcpu->run = (struct kvm_run *) mmap(NULL, vcpu_mmap_sz(),
- PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 0);
- TEST_ASSERT(vcpu->run != MAP_FAILED,
- __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED));
+ vcpu->run = kvm_mmap(vcpu_mmap_sz(), PROT_READ | PROT_WRITE,
+ MAP_SHARED, vcpu->fd);
+
+ if (kvm_has_cap(KVM_CAP_BINARY_STATS_FD))
+ vcpu->stats.fd = vcpu_get_stats_fd(vcpu);
+ else
+ vcpu->stats.fd = -1;
/* Add to linked-list of VCPUs. */
list_add(&vcpu->list, &vm->vcpus);
@@ -1293,33 +1366,18 @@ struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
}
/*
- * VM Virtual Address Unused Gap
- *
- * Input Args:
- * vm - Virtual Machine
- * sz - Size (bytes)
- * vaddr_min - Minimum Virtual Address
- *
- * Output Args: None
- *
- * Return:
- * Lowest virtual address at or below vaddr_min, with at least
- * sz unused bytes. TEST_ASSERT failure if no area of at least
- * size sz is available.
- *
- * Within the VM specified by vm, locates the lowest starting virtual
- * address >= vaddr_min, that has at least sz unallocated bytes. A
+ * Within the VM specified by @vm, locates the lowest starting guest virtual
+ * address >= @min_gva, that has at least @sz unallocated bytes. A
* TEST_ASSERT failure occurs for invalid input or no area of at least
- * sz unallocated bytes >= vaddr_min is available.
+ * @sz unallocated bytes >= @min_gva is available.
*/
-vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz,
- vm_vaddr_t vaddr_min)
+gva_t vm_unused_gva_gap(struct kvm_vm *vm, size_t sz, gva_t min_gva)
{
- uint64_t pages = (sz + vm->page_size - 1) >> vm->page_shift;
+ u64 pages = (sz + vm->page_size - 1) >> vm->page_shift;
/* Determine lowest permitted virtual page index. */
- uint64_t pgidx_start = (vaddr_min + vm->page_size - 1) >> vm->page_shift;
- if ((pgidx_start * vm->page_size) < vaddr_min)
+ u64 pgidx_start = (min_gva + vm->page_size - 1) >> vm->page_shift;
+ if ((pgidx_start * vm->page_size) < min_gva)
goto no_va_found;
/* Loop over section with enough valid virtual page indexes. */
@@ -1356,7 +1414,7 @@ vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz,
} while (pgidx_start != 0);
no_va_found:
- TEST_FAIL("No vaddr of specified pages available, pages: 0x%lx", pages);
+ TEST_FAIL("No gva of specified pages available, pages: 0x%lx", pages);
/* NOT REACHED */
return -1;
@@ -1378,148 +1436,91 @@ va_found:
return pgidx_start * vm->page_size;
}
-static vm_vaddr_t ____vm_vaddr_alloc(struct kvm_vm *vm, size_t sz,
- vm_vaddr_t vaddr_min,
- enum kvm_mem_region_type type,
- bool protected)
+static gva_t ____vm_alloc(struct kvm_vm *vm, size_t sz, gva_t min_gva,
+ enum kvm_mem_region_type type, bool protected)
{
- uint64_t pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0);
+ u64 pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0);
virt_pgd_alloc(vm);
- vm_paddr_t paddr = __vm_phy_pages_alloc(vm, pages,
- KVM_UTIL_MIN_PFN * vm->page_size,
- vm->memslots[type], protected);
+ gpa_t gpa = __vm_phy_pages_alloc(vm, pages,
+ KVM_UTIL_MIN_PFN * vm->page_size,
+ vm->memslots[type], protected);
/*
* Find an unused range of virtual page addresses of at least
* pages in length.
*/
- vm_vaddr_t vaddr_start = vm_vaddr_unused_gap(vm, sz, vaddr_min);
+ gva_t gva_start = vm_unused_gva_gap(vm, sz, min_gva);
/* Map the virtual pages. */
- for (vm_vaddr_t vaddr = vaddr_start; pages > 0;
- pages--, vaddr += vm->page_size, paddr += vm->page_size) {
-
- virt_pg_map(vm, vaddr, paddr);
+ for (gva_t gva = gva_start; pages > 0;
+ pages--, gva += vm->page_size, gpa += vm->page_size) {
- sparsebit_set(vm->vpages_mapped, vaddr >> vm->page_shift);
+ virt_pg_map(vm, gva, gpa);
}
- return vaddr_start;
+ return gva_start;
}
-vm_vaddr_t __vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
- enum kvm_mem_region_type type)
+gva_t __vm_alloc(struct kvm_vm *vm, size_t sz, gva_t min_gva,
+ enum kvm_mem_region_type type)
{
- return ____vm_vaddr_alloc(vm, sz, vaddr_min, type,
- vm_arch_has_protected_memory(vm));
+ return ____vm_alloc(vm, sz, min_gva, type,
+ vm_arch_has_protected_memory(vm));
}
-vm_vaddr_t vm_vaddr_alloc_shared(struct kvm_vm *vm, size_t sz,
- vm_vaddr_t vaddr_min,
- enum kvm_mem_region_type type)
+gva_t vm_alloc_shared(struct kvm_vm *vm, size_t sz, gva_t min_gva,
+ enum kvm_mem_region_type type)
{
- return ____vm_vaddr_alloc(vm, sz, vaddr_min, type, false);
+ return ____vm_alloc(vm, sz, min_gva, type, false);
}
/*
- * VM Virtual Address Allocate
- *
- * Input Args:
- * vm - Virtual Machine
- * sz - Size in bytes
- * vaddr_min - Minimum starting virtual address
- *
- * Output Args: None
- *
- * Return:
- * Starting guest virtual address
- *
- * Allocates at least sz bytes within the virtual address space of the vm
- * given by vm. The allocated bytes are mapped to a virtual address >=
- * the address given by vaddr_min. Note that each allocation uses a
- * a unique set of pages, with the minimum real allocation being at least
- * a page. The allocated physical space comes from the TEST_DATA memory region.
+ * Allocates at least sz bytes within the virtual address space of the VM
+ * given by @vm. The allocated bytes are mapped to a virtual address >= the
+ * address given by @min_gva. Note that each allocation uses a a unique set
+ * of pages, with the minimum real allocation being at least a page. The
+ * allocated physical space comes from the TEST_DATA memory region.
*/
-vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min)
+gva_t vm_alloc(struct kvm_vm *vm, size_t sz, gva_t min_gva)
{
- return __vm_vaddr_alloc(vm, sz, vaddr_min, MEM_REGION_TEST_DATA);
+ return __vm_alloc(vm, sz, min_gva, MEM_REGION_TEST_DATA);
}
-/*
- * VM Virtual Address Allocate Pages
- *
- * Input Args:
- * vm - Virtual Machine
- *
- * Output Args: None
- *
- * Return:
- * Starting guest virtual address
- *
- * Allocates at least N system pages worth of bytes within the virtual address
- * space of the vm.
- */
-vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages)
+gva_t vm_alloc_pages(struct kvm_vm *vm, int nr_pages)
{
- return vm_vaddr_alloc(vm, nr_pages * getpagesize(), KVM_UTIL_MIN_VADDR);
+ return vm_alloc(vm, nr_pages * getpagesize(), KVM_UTIL_MIN_VADDR);
}
-vm_vaddr_t __vm_vaddr_alloc_page(struct kvm_vm *vm, enum kvm_mem_region_type type)
+gva_t __vm_alloc_page(struct kvm_vm *vm, enum kvm_mem_region_type type)
{
- return __vm_vaddr_alloc(vm, getpagesize(), KVM_UTIL_MIN_VADDR, type);
+ return __vm_alloc(vm, getpagesize(), KVM_UTIL_MIN_VADDR, type);
}
-/*
- * VM Virtual Address Allocate Page
- *
- * Input Args:
- * vm - Virtual Machine
- *
- * Output Args: None
- *
- * Return:
- * Starting guest virtual address
- *
- * Allocates at least one system page worth of bytes within the virtual address
- * space of the vm.
- */
-vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm)
+gva_t vm_alloc_page(struct kvm_vm *vm)
{
- return vm_vaddr_alloc_pages(vm, 1);
+ return vm_alloc_pages(vm, 1);
}
/*
- * Map a range of VM virtual address to the VM's physical address
- *
- * Input Args:
- * vm - Virtual Machine
- * vaddr - Virtuall address to map
- * paddr - VM Physical Address
- * npages - The number of pages to map
+ * Map a range of VM virtual address to the VM's physical address.
*
- * Output Args: None
- *
- * Return: None
- *
- * Within the VM given by @vm, creates a virtual translation for
- * @npages starting at @vaddr to the page range starting at @paddr.
+ * Within the VM given by @vm, creates a virtual translation for @npages
+ * starting at @gva to the page range starting at @gpa.
*/
-void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
- unsigned int npages)
+void virt_map(struct kvm_vm *vm, gva_t gva, gpa_t gpa, unsigned int npages)
{
size_t page_size = vm->page_size;
size_t size = npages * page_size;
- TEST_ASSERT(vaddr + size > vaddr, "Vaddr overflow");
- TEST_ASSERT(paddr + size > paddr, "Paddr overflow");
+ TEST_ASSERT(gva + size > gva, "Vaddr overflow");
+ TEST_ASSERT(gpa + size > gpa, "Paddr overflow");
while (npages--) {
- virt_pg_map(vm, vaddr, paddr);
- sparsebit_set(vm->vpages_mapped, vaddr >> vm->page_shift);
+ virt_pg_map(vm, gva, gpa);
- vaddr += page_size;
- paddr += page_size;
+ gva += page_size;
+ gpa += page_size;
}
}
@@ -1540,7 +1541,7 @@ void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
* address providing the memory to the vm physical address is returned.
* A TEST_ASSERT failure occurs if no region containing gpa exists.
*/
-void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa)
+void *addr_gpa2hva(struct kvm_vm *vm, gpa_t gpa)
{
struct userspace_mem_region *region;
@@ -1573,7 +1574,7 @@ void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa)
* VM physical address is returned. A TEST_ASSERT failure occurs if no
* region containing hva exists.
*/
-vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva)
+gpa_t addr_hva2gpa(struct kvm_vm *vm, void *hva)
{
struct rb_node *node;
@@ -1584,7 +1585,7 @@ vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva)
if (hva >= region->host_mem) {
if (hva <= (region->host_mem
+ region->region.memory_size - 1))
- return (vm_paddr_t)((uintptr_t)
+ return (gpa_t)((uintptr_t)
region->region.guest_phys_addr
+ (hva - (uintptr_t)region->host_mem));
@@ -1616,7 +1617,7 @@ vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva)
* memory without mapping said memory in the guest's address space. And, for
* userfaultfd-based demand paging, to do so without triggering userfaults.
*/
-void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa)
+void *addr_gpa2alias(struct kvm_vm *vm, gpa_t gpa)
{
struct userspace_mem_region *region;
uintptr_t offset;
@@ -1635,7 +1636,18 @@ void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa)
/* Create an interrupt controller chip for the specified VM. */
void vm_create_irqchip(struct kvm_vm *vm)
{
- vm_ioctl(vm, KVM_CREATE_IRQCHIP, NULL);
+ int r;
+
+ /*
+ * Allocate a fully in-kernel IRQ chip by default, but fall back to a
+ * split model (x86 only) if that fails (KVM x86 allows compiling out
+ * support for KVM_CREATE_IRQCHIP).
+ */
+ r = __vm_ioctl(vm, KVM_CREATE_IRQCHIP, NULL);
+ if (r && errno == ENOTTY && kvm_has_cap(KVM_CAP_SPLIT_IRQCHIP))
+ vm_enable_cap(vm, KVM_CAP_SPLIT_IRQCHIP, 24);
+ else
+ TEST_ASSERT_VM_VCPU_IOCTL(!r, KVM_CREATE_IRQCHIP, r, vm);
vm->has_irqchip = true;
}
@@ -1699,8 +1711,8 @@ struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vcpu *vcpu)
void *vcpu_map_dirty_ring(struct kvm_vcpu *vcpu)
{
- uint32_t page_size = getpagesize();
- uint32_t size = vcpu->vm->dirty_ring_size;
+ u32 page_size = getpagesize();
+ u32 size = vcpu->vm->dirty_ring_size;
TEST_ASSERT(size > 0, "Should enable dirty ring first");
@@ -1715,9 +1727,8 @@ void *vcpu_map_dirty_ring(struct kvm_vcpu *vcpu)
page_size * KVM_DIRTY_LOG_PAGE_OFFSET);
TEST_ASSERT(addr == MAP_FAILED, "Dirty ring mapped exec");
- addr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd,
- page_size * KVM_DIRTY_LOG_PAGE_OFFSET);
- TEST_ASSERT(addr != MAP_FAILED, "Dirty ring map failed");
+ addr = __kvm_mmap(size, PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd,
+ page_size * KVM_DIRTY_LOG_PAGE_OFFSET);
vcpu->dirty_gfns = addr;
vcpu->dirty_gfns_count = size / sizeof(struct kvm_dirty_gfn);
@@ -1730,7 +1741,7 @@ void *vcpu_map_dirty_ring(struct kvm_vcpu *vcpu)
* Device Ioctl
*/
-int __kvm_has_device_attr(int dev_fd, uint32_t group, uint64_t attr)
+int __kvm_has_device_attr(int dev_fd, u32 group, u64 attr)
{
struct kvm_device_attr attribute = {
.group = group,
@@ -1741,7 +1752,7 @@ int __kvm_has_device_attr(int dev_fd, uint32_t group, uint64_t attr)
return ioctl(dev_fd, KVM_HAS_DEVICE_ATTR, &attribute);
}
-int __kvm_test_create_device(struct kvm_vm *vm, uint64_t type)
+int __kvm_test_create_device(struct kvm_vm *vm, u64 type)
{
struct kvm_create_device create_dev = {
.type = type,
@@ -1751,7 +1762,7 @@ int __kvm_test_create_device(struct kvm_vm *vm, uint64_t type)
return __vm_ioctl(vm, KVM_CREATE_DEVICE, &create_dev);
}
-int __kvm_create_device(struct kvm_vm *vm, uint64_t type)
+int __kvm_create_device(struct kvm_vm *vm, u64 type)
{
struct kvm_create_device create_dev = {
.type = type,
@@ -1765,7 +1776,7 @@ int __kvm_create_device(struct kvm_vm *vm, uint64_t type)
return err ? : create_dev.fd;
}
-int __kvm_device_attr_get(int dev_fd, uint32_t group, uint64_t attr, void *val)
+int __kvm_device_attr_get(int dev_fd, u32 group, u64 attr, void *val)
{
struct kvm_device_attr kvmattr = {
.group = group,
@@ -1777,7 +1788,7 @@ int __kvm_device_attr_get(int dev_fd, uint32_t group, uint64_t attr, void *val)
return __kvm_ioctl(dev_fd, KVM_GET_DEVICE_ATTR, &kvmattr);
}
-int __kvm_device_attr_set(int dev_fd, uint32_t group, uint64_t attr, void *val)
+int __kvm_device_attr_set(int dev_fd, u32 group, u64 attr, void *val)
{
struct kvm_device_attr kvmattr = {
.group = group,
@@ -1793,7 +1804,7 @@ int __kvm_device_attr_set(int dev_fd, uint32_t group, uint64_t attr, void *val)
* IRQ related functions.
*/
-int _kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level)
+int _kvm_irq_line(struct kvm_vm *vm, u32 irq, int level)
{
struct kvm_irq_level irq_level = {
.irq = irq,
@@ -1803,7 +1814,7 @@ int _kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level)
return __vm_ioctl(vm, KVM_IRQ_LINE, &irq_level);
}
-void kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level)
+void kvm_irq_line(struct kvm_vm *vm, u32 irq, int level)
{
int ret = _kvm_irq_line(vm, irq, level);
@@ -1825,7 +1836,7 @@ struct kvm_irq_routing *kvm_gsi_routing_create(void)
}
void kvm_gsi_routing_irqchip_add(struct kvm_irq_routing *routing,
- uint32_t gsi, uint32_t pin)
+ u32 gsi, u32 pin)
{
int i;
@@ -1875,7 +1886,7 @@ void kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing)
* Dumps the current state of the VM given by vm, to the FILE stream
* given by stream.
*/
-void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
+void vm_dump(FILE *stream, struct kvm_vm *vm, u8 indent)
{
int ctr;
struct userspace_mem_region *region;
@@ -1888,8 +1899,8 @@ void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
hash_for_each(vm->regions.slot_hash, ctr, region, slot_node) {
fprintf(stream, "%*sguest_phys: 0x%lx size: 0x%lx "
"host_virt: %p\n", indent + 2, "",
- (uint64_t) region->region.guest_phys_addr,
- (uint64_t) region->region.memory_size,
+ (u64)region->region.guest_phys_addr,
+ (u64)region->region.memory_size,
region->host_mem);
fprintf(stream, "%*sunused_phy_pages: ", indent + 2, "");
sparsebit_dump(stream, region->unused_phy_pages, 0);
@@ -1901,8 +1912,8 @@ void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
fprintf(stream, "%*sMapped Virtual Pages:\n", indent, "");
sparsebit_dump(stream, vm->vpages_mapped, indent + 2);
fprintf(stream, "%*spgd_created: %u\n", indent, "",
- vm->pgd_created);
- if (vm->pgd_created) {
+ vm->mmu.pgd_created);
+ if (vm->mmu.pgd_created) {
fprintf(stream, "%*sVirtual Translation Tables:\n",
indent + 2, "");
virt_dump(stream, vm, indent + 4);
@@ -1958,9 +1969,9 @@ static struct exit_reason {
KVM_EXIT_STRING(RISCV_SBI),
KVM_EXIT_STRING(RISCV_CSR),
KVM_EXIT_STRING(NOTIFY),
-#ifdef KVM_EXIT_MEMORY_NOT_PRESENT
- KVM_EXIT_STRING(MEMORY_NOT_PRESENT),
-#endif
+ KVM_EXIT_STRING(LOONGARCH_IOCSR),
+ KVM_EXIT_STRING(MEMORY_FAULT),
+ KVM_EXIT_STRING(ARM_SEA),
};
/*
@@ -1996,7 +2007,7 @@ const char *exit_reason_str(unsigned int exit_reason)
* Input Args:
* vm - Virtual Machine
* num - number of pages
- * paddr_min - Physical address minimum
+ * min_gpa - Physical address minimum
* memslot - Memory region to allocate page from
* protected - True if the pages will be used as protected/private memory
*
@@ -2006,29 +2017,29 @@ const char *exit_reason_str(unsigned int exit_reason)
* Starting physical address
*
* Within the VM specified by vm, locates a range of available physical
- * pages at or above paddr_min. If found, the pages are marked as in use
+ * pages at or above min_gpa. If found, the pages are marked as in use
* and their base address is returned. A TEST_ASSERT failure occurs if
- * not enough pages are available at or above paddr_min.
+ * not enough pages are available at or above min_gpa.
*/
-vm_paddr_t __vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
- vm_paddr_t paddr_min, uint32_t memslot,
- bool protected)
+gpa_t __vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
+ gpa_t min_gpa, u32 memslot,
+ bool protected)
{
struct userspace_mem_region *region;
sparsebit_idx_t pg, base;
TEST_ASSERT(num > 0, "Must allocate at least one page");
- TEST_ASSERT((paddr_min % vm->page_size) == 0, "Min physical address "
+ TEST_ASSERT((min_gpa % vm->page_size) == 0, "Min physical address "
"not divisible by page size.\n"
- " paddr_min: 0x%lx page_size: 0x%x",
- paddr_min, vm->page_size);
+ " min_gpa: 0x%lx page_size: 0x%x",
+ min_gpa, vm->page_size);
region = memslot2region(vm, memslot);
TEST_ASSERT(!protected || region->protected_phy_pages,
"Region doesn't support protected memory");
- base = pg = paddr_min >> vm->page_shift;
+ base = pg = min_gpa >> vm->page_shift;
do {
for (; pg < base + num; ++pg) {
if (!sparsebit_is_set(region->unused_phy_pages, pg)) {
@@ -2040,8 +2051,8 @@ vm_paddr_t __vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
if (pg == 0) {
fprintf(stderr, "No guest physical page available, "
- "paddr_min: 0x%lx page_size: 0x%x memslot: %u\n",
- paddr_min, vm->page_size, memslot);
+ "min_gpa: 0x%lx page_size: 0x%x memslot: %u\n",
+ min_gpa, vm->page_size, memslot);
fputs("---- vm dump ----\n", stderr);
vm_dump(stderr, vm, 2);
abort();
@@ -2056,13 +2067,12 @@ vm_paddr_t __vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
return base * vm->page_size;
}
-vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min,
- uint32_t memslot)
+gpa_t vm_phy_page_alloc(struct kvm_vm *vm, gpa_t min_gpa, u32 memslot)
{
- return vm_phy_pages_alloc(vm, 1, paddr_min, memslot);
+ return vm_phy_pages_alloc(vm, 1, min_gpa, memslot);
}
-vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm)
+gpa_t vm_alloc_page_table(struct kvm_vm *vm)
{
return vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR,
vm->memslots[MEM_REGION_PT]);
@@ -2080,7 +2090,7 @@ vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm)
* Return:
* Equivalent host virtual address
*/
-void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva)
+void *addr_gva2hva(struct kvm_vm *vm, gva_t gva)
{
return addr_gpa2hva(vm, addr_gva2gpa(vm, gva));
}
@@ -2178,7 +2188,7 @@ struct kvm_stats_desc *read_stats_descriptors(int stats_fd,
* Read the data values of a specified stat from the binary stats interface.
*/
void read_stat_data(int stats_fd, struct kvm_stats_header *header,
- struct kvm_stats_desc *desc, uint64_t *data,
+ struct kvm_stats_desc *desc, u64 *data,
size_t max_elements)
{
size_t nr_elements = min_t(ssize_t, desc->size, max_elements);
@@ -2198,49 +2208,42 @@ void read_stat_data(int stats_fd, struct kvm_stats_header *header,
desc->name, size, ret);
}
-/*
- * Read the data of the named stat
- *
- * Input Args:
- * vm - the VM for which the stat should be read
- * stat_name - the name of the stat to read
- * max_elements - the maximum number of 8-byte values to read into data
- *
- * Output Args:
- * data - the buffer into which stat data should be read
- *
- * Read the data values of a specified stat from the binary stats interface.
- */
-void __vm_get_stat(struct kvm_vm *vm, const char *stat_name, uint64_t *data,
- size_t max_elements)
+void kvm_get_stat(struct kvm_binary_stats *stats, const char *name,
+ u64 *data, size_t max_elements)
{
struct kvm_stats_desc *desc;
size_t size_desc;
int i;
- if (!vm->stats_fd) {
- vm->stats_fd = vm_get_stats_fd(vm);
- read_stats_header(vm->stats_fd, &vm->stats_header);
- vm->stats_desc = read_stats_descriptors(vm->stats_fd,
- &vm->stats_header);
+ if (!stats->desc) {
+ read_stats_header(stats->fd, &stats->header);
+ stats->desc = read_stats_descriptors(stats->fd, &stats->header);
}
- size_desc = get_stats_descriptor_size(&vm->stats_header);
+ size_desc = get_stats_descriptor_size(&stats->header);
- for (i = 0; i < vm->stats_header.num_desc; ++i) {
- desc = (void *)vm->stats_desc + (i * size_desc);
+ for (i = 0; i < stats->header.num_desc; ++i) {
+ desc = (void *)stats->desc + (i * size_desc);
- if (strcmp(desc->name, stat_name))
+ if (strcmp(desc->name, name))
continue;
- read_stat_data(vm->stats_fd, &vm->stats_header, desc,
- data, max_elements);
-
- break;
+ read_stat_data(stats->fd, &stats->header, desc, data, max_elements);
+ return;
}
+
+ TEST_FAIL("Unable to find stat '%s'", name);
+}
+
+__weak void kvm_arch_vm_post_create(struct kvm_vm *vm, unsigned int nr_vcpus)
+{
}
-__weak void kvm_arch_vm_post_create(struct kvm_vm *vm)
+__weak void kvm_arch_vm_finalize_vcpus(struct kvm_vm *vm)
+{
+}
+
+__weak void kvm_arch_vm_release(struct kvm_vm *vm)
{
}
@@ -2248,18 +2251,42 @@ __weak void kvm_selftest_arch_init(void)
{
}
+static void report_unexpected_signal(int signum)
+{
+#define KVM_CASE_SIGNUM(sig) \
+ case sig: TEST_FAIL("Unexpected " #sig " (%d)\n", signum)
+
+ switch (signum) {
+ KVM_CASE_SIGNUM(SIGBUS);
+ KVM_CASE_SIGNUM(SIGSEGV);
+ KVM_CASE_SIGNUM(SIGILL);
+ KVM_CASE_SIGNUM(SIGFPE);
+ default:
+ TEST_FAIL("Unexpected signal %d\n", signum);
+ }
+}
+
void __attribute((constructor)) kvm_selftest_init(void)
{
+ struct sigaction sig_sa = {
+ .sa_handler = report_unexpected_signal,
+ };
+
/* Tell stdout not to buffer its content. */
setbuf(stdout, NULL);
+ sigaction(SIGBUS, &sig_sa, NULL);
+ sigaction(SIGSEGV, &sig_sa, NULL);
+ sigaction(SIGILL, &sig_sa, NULL);
+ sigaction(SIGFPE, &sig_sa, NULL);
+
guest_random_seed = last_guest_seed = random();
pr_info("Random seed: 0x%x\n", guest_random_seed);
kvm_selftest_arch_init();
}
-bool vm_is_gpa_protected(struct kvm_vm *vm, vm_paddr_t paddr)
+bool vm_is_gpa_protected(struct kvm_vm *vm, gpa_t gpa)
{
sparsebit_idx_t pg = 0;
struct userspace_mem_region *region;
@@ -2267,9 +2294,14 @@ bool vm_is_gpa_protected(struct kvm_vm *vm, vm_paddr_t paddr)
if (!vm_arch_has_protected_memory(vm))
return false;
- region = userspace_mem_region_find(vm, paddr, paddr);
- TEST_ASSERT(region, "No vm physical memory at 0x%lx", paddr);
+ region = userspace_mem_region_find(vm, gpa, gpa);
+ TEST_ASSERT(region, "No vm physical memory at 0x%lx", gpa);
- pg = paddr >> vm->page_shift;
+ pg = gpa >> vm->page_shift;
return sparsebit_is_set(region->protected_phy_pages, pg);
}
+
+__weak bool kvm_arch_has_default_irqchip(void)
+{
+ return false;
+}
diff --git a/tools/testing/selftests/kvm/lib/loongarch/exception.S b/tools/testing/selftests/kvm/lib/loongarch/exception.S
new file mode 100644
index 000000000000..3f1e4b67c5ae
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/loongarch/exception.S
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include "processor.h"
+
+/* address of refill exception should be 4K aligned */
+.balign 4096
+.global handle_tlb_refill
+handle_tlb_refill:
+ csrwr t0, LOONGARCH_CSR_TLBRSAVE
+ csrrd t0, LOONGARCH_CSR_PGD
+ lddir t0, t0, 3
+ lddir t0, t0, 1
+ ldpte t0, 0
+ ldpte t0, 1
+ tlbfill
+ csrrd t0, LOONGARCH_CSR_TLBRSAVE
+ ertn
+
+ /*
+ * save and restore all gprs except base register,
+ * and default value of base register is sp ($r3).
+ */
+.macro save_gprs base
+ .irp n,1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
+ st.d $r\n, \base, 8 * \n
+ .endr
+.endm
+
+.macro restore_gprs base
+ .irp n,1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
+ ld.d $r\n, \base, 8 * \n
+ .endr
+.endm
+
+/* address of general exception should be 4K aligned */
+.balign 4096
+.global handle_exception
+handle_exception:
+ csrwr sp, LOONGARCH_CSR_KS0
+ csrrd sp, LOONGARCH_CSR_KS1
+ addi.d sp, sp, -EXREGS_SIZE
+
+ save_gprs sp
+ /* save sp register to stack */
+ csrrd t0, LOONGARCH_CSR_KS0
+ st.d t0, sp, 3 * 8
+
+ csrrd t0, LOONGARCH_CSR_ERA
+ st.d t0, sp, PC_OFFSET_EXREGS
+ csrrd t0, LOONGARCH_CSR_ESTAT
+ st.d t0, sp, ESTAT_OFFSET_EXREGS
+ csrrd t0, LOONGARCH_CSR_BADV
+ st.d t0, sp, BADV_OFFSET_EXREGS
+ csrrd t0, LOONGARCH_CSR_PRMD
+ st.d t0, sp, PRMD_OFFSET_EXREGS
+
+ or a0, sp, zero
+ bl route_exception
+ ld.d t0, sp, PC_OFFSET_EXREGS
+ csrwr t0, LOONGARCH_CSR_ERA
+ ld.d t0, sp, PRMD_OFFSET_EXREGS
+ csrwr t0, LOONGARCH_CSR_PRMD
+ restore_gprs sp
+ csrrd sp, LOONGARCH_CSR_KS0
+ ertn
diff --git a/tools/testing/selftests/kvm/lib/loongarch/processor.c b/tools/testing/selftests/kvm/lib/loongarch/processor.c
new file mode 100644
index 000000000000..64d91fb76522
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/loongarch/processor.c
@@ -0,0 +1,402 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <assert.h>
+#include <linux/compiler.h>
+
+#include <asm/kvm.h>
+#include "kvm_util.h"
+#include "pmu.h"
+#include "processor.h"
+#include "ucall_common.h"
+
+#define LOONGARCH_PAGE_TABLE_PHYS_MIN 0x200000
+#define LOONGARCH_GUEST_STACK_VADDR_MIN 0x200000
+
+static gpa_t invalid_pgtable[4];
+static gva_t exception_handlers;
+
+static u64 virt_pte_index(struct kvm_vm *vm, gva_t gva, int level)
+{
+ unsigned int shift;
+ u64 mask;
+
+ shift = level * (vm->page_shift - 3) + vm->page_shift;
+ mask = (1UL << (vm->page_shift - 3)) - 1;
+ return (gva >> shift) & mask;
+}
+
+static u64 pte_addr(struct kvm_vm *vm, u64 entry)
+{
+ return entry & ~((0x1UL << vm->page_shift) - 1);
+}
+
+static u64 ptrs_per_pte(struct kvm_vm *vm)
+{
+ return 1 << (vm->page_shift - 3);
+}
+
+static void virt_set_pgtable(struct kvm_vm *vm, gpa_t table, gpa_t child)
+{
+ u64 *ptep;
+ int i, ptrs_per_pte;
+
+ ptep = addr_gpa2hva(vm, table);
+ ptrs_per_pte = 1 << (vm->page_shift - 3);
+ for (i = 0; i < ptrs_per_pte; i++)
+ WRITE_ONCE(*(ptep + i), child);
+}
+
+void virt_arch_pgd_alloc(struct kvm_vm *vm)
+{
+ int i;
+ gpa_t child, table;
+
+ if (vm->mmu.pgd_created)
+ return;
+
+ child = table = 0;
+ for (i = 0; i < vm->mmu.pgtable_levels; i++) {
+ invalid_pgtable[i] = child;
+ table = vm_phy_page_alloc(vm, LOONGARCH_PAGE_TABLE_PHYS_MIN,
+ vm->memslots[MEM_REGION_PT]);
+ TEST_ASSERT(table, "Fail to allocate page tale at level %d\n", i);
+ virt_set_pgtable(vm, table, child);
+ child = table;
+ }
+ vm->mmu.pgd = table;
+ vm->mmu.pgd_created = true;
+}
+
+static int virt_pte_none(u64 *ptep, int level)
+{
+ return *ptep == invalid_pgtable[level];
+}
+
+static u64 *virt_populate_pte(struct kvm_vm *vm, gva_t gva, int alloc)
+{
+ int level;
+ u64 *ptep;
+ gpa_t child;
+
+ if (!vm->mmu.pgd_created)
+ goto unmapped_gva;
+
+ child = vm->mmu.pgd;
+ level = vm->mmu.pgtable_levels - 1;
+ while (level > 0) {
+ ptep = addr_gpa2hva(vm, child) + virt_pte_index(vm, gva, level) * 8;
+ if (virt_pte_none(ptep, level)) {
+ if (alloc) {
+ child = vm_alloc_page_table(vm);
+ virt_set_pgtable(vm, child, invalid_pgtable[level - 1]);
+ WRITE_ONCE(*ptep, child);
+ } else
+ goto unmapped_gva;
+
+ } else
+ child = pte_addr(vm, *ptep);
+ level--;
+ }
+
+ ptep = addr_gpa2hva(vm, child) + virt_pte_index(vm, gva, level) * 8;
+ return ptep;
+
+unmapped_gva:
+ TEST_FAIL("No mapping for vm virtual address, gva: 0x%lx", gva);
+ exit(EXIT_FAILURE);
+}
+
+gpa_t addr_arch_gva2gpa(struct kvm_vm *vm, gva_t gva)
+{
+ u64 *ptep;
+
+ ptep = virt_populate_pte(vm, gva, 0);
+ TEST_ASSERT(*ptep != 0, "Virtual address gva: 0x%lx not mapped\n", gva);
+
+ return pte_addr(vm, *ptep) + (gva & (vm->page_size - 1));
+}
+
+void virt_arch_pg_map(struct kvm_vm *vm, gva_t gva, gpa_t gpa)
+{
+ u32 prot_bits;
+ u64 *ptep;
+
+ TEST_ASSERT((gva % vm->page_size) == 0,
+ "Virtual address not on page boundary,\n"
+ "gva: 0x%lx vm->page_size: 0x%x", gva, vm->page_size);
+ TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, (gva >> vm->page_shift)),
+ "Invalid virtual address, gva: 0x%lx", gva);
+ TEST_ASSERT((gpa % vm->page_size) == 0,
+ "Physical address not on page boundary,\n"
+ "gpa: 0x%lx vm->page_size: 0x%x", gpa, vm->page_size);
+ TEST_ASSERT((gpa >> vm->page_shift) <= vm->max_gfn,
+ "Physical address beyond maximum supported,\n"
+ "gpa: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
+ gpa, vm->max_gfn, vm->page_size);
+
+ ptep = virt_populate_pte(vm, gva, 1);
+ prot_bits = _PAGE_PRESENT | __READABLE | __WRITEABLE | _CACHE_CC | _PAGE_USER;
+ WRITE_ONCE(*ptep, gpa | prot_bits);
+}
+
+static void pte_dump(FILE *stream, struct kvm_vm *vm, u8 indent, u64 page, int level)
+{
+ u64 pte, *ptep;
+ static const char * const type[] = { "pte", "pmd", "pud", "pgd"};
+
+ if (level < 0)
+ return;
+
+ for (pte = page; pte < page + ptrs_per_pte(vm) * 8; pte += 8) {
+ ptep = addr_gpa2hva(vm, pte);
+ if (virt_pte_none(ptep, level))
+ continue;
+ fprintf(stream, "%*s%s: %lx: %lx at %p\n",
+ indent, "", type[level], pte, *ptep, ptep);
+ pte_dump(stream, vm, indent + 1, pte_addr(vm, *ptep), level--);
+ }
+}
+
+void virt_arch_dump(FILE *stream, struct kvm_vm *vm, u8 indent)
+{
+ int level;
+
+ if (!vm->mmu.pgd_created)
+ return;
+
+ level = vm->mmu.pgtable_levels - 1;
+ pte_dump(stream, vm, indent, vm->mmu.pgd, level);
+}
+
+void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, u8 indent)
+{
+}
+
+void assert_on_unhandled_exception(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ if (get_ucall(vcpu, &uc) != UCALL_UNHANDLED)
+ return;
+
+ TEST_FAIL("Unexpected exception (pc:0x%lx, estat:0x%lx, badv:0x%lx)",
+ uc.args[0], uc.args[1], uc.args[2]);
+}
+
+void route_exception(struct ex_regs *regs)
+{
+ int vector;
+ unsigned long pc, estat, badv;
+ struct handlers *handlers;
+
+ handlers = (struct handlers *)exception_handlers;
+ vector = (regs->estat & CSR_ESTAT_EXC) >> CSR_ESTAT_EXC_SHIFT;
+ if (handlers && handlers->exception_handlers[vector])
+ return handlers->exception_handlers[vector](regs);
+
+ pc = regs->pc;
+ badv = regs->badv;
+ estat = regs->estat;
+ ucall(UCALL_UNHANDLED, 3, pc, estat, badv);
+ while (1) ;
+}
+
+void vm_init_descriptor_tables(struct kvm_vm *vm)
+{
+ void *addr;
+
+ vm->handlers = __vm_alloc(vm, sizeof(struct handlers),
+ LOONGARCH_GUEST_STACK_VADDR_MIN,
+ MEM_REGION_DATA);
+
+ addr = addr_gva2hva(vm, vm->handlers);
+ memset(addr, 0, vm->page_size);
+ exception_handlers = vm->handlers;
+ sync_global_to_guest(vm, exception_handlers);
+}
+
+void vm_install_exception_handler(struct kvm_vm *vm, int vector, handler_fn handler)
+{
+ struct handlers *handlers = addr_gva2hva(vm, vm->handlers);
+
+ assert(vector < VECTOR_NUM);
+ handlers->exception_handlers[vector] = handler;
+}
+
+u32 guest_get_vcpuid(void)
+{
+ return csr_read(LOONGARCH_CSR_CPUID);
+}
+
+void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...)
+{
+ int i;
+ va_list ap;
+ struct kvm_regs regs;
+
+ TEST_ASSERT(num >= 1 && num <= 8, "Unsupported number of args,\n"
+ "num: %u\n", num);
+
+ vcpu_regs_get(vcpu, &regs);
+
+ va_start(ap, num);
+ for (i = 0; i < num; i++)
+ regs.gpr[i + 4] = va_arg(ap, u64);
+ va_end(ap);
+
+ vcpu_regs_set(vcpu, &regs);
+}
+
+static void loongarch_set_reg(struct kvm_vcpu *vcpu, u64 id, u64 val)
+{
+ __vcpu_set_reg(vcpu, id, val);
+}
+
+static void loongarch_set_cpucfg(struct kvm_vcpu *vcpu, u64 id, u64 val)
+{
+ u64 cfgid;
+
+ cfgid = KVM_REG_LOONGARCH_CPUCFG | KVM_REG_SIZE_U64 | 8 * id;
+ __vcpu_set_reg(vcpu, cfgid, val);
+}
+
+static void loongarch_get_csr(struct kvm_vcpu *vcpu, u64 id, void *addr)
+{
+ u64 csrid;
+
+ csrid = KVM_REG_LOONGARCH_CSR | KVM_REG_SIZE_U64 | 8 * id;
+ __vcpu_get_reg(vcpu, csrid, addr);
+}
+
+static void loongarch_set_csr(struct kvm_vcpu *vcpu, u64 id, u64 val)
+{
+ u64 csrid;
+
+ csrid = KVM_REG_LOONGARCH_CSR | KVM_REG_SIZE_U64 | 8 * id;
+ __vcpu_set_reg(vcpu, csrid, val);
+}
+
+void loongarch_vcpu_setup(struct kvm_vcpu *vcpu)
+{
+ int width;
+ unsigned int cfg;
+ unsigned long val;
+ struct kvm_vm *vm = vcpu->vm;
+
+ switch (vm->mode) {
+ case VM_MODE_P36V47_16K:
+ case VM_MODE_P47V47_16K:
+ break;
+
+ default:
+ TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode);
+ }
+
+ cfg = read_cpucfg(LOONGARCH_CPUCFG6);
+ loongarch_set_cpucfg(vcpu, LOONGARCH_CPUCFG6, cfg);
+
+ /* kernel mode and page enable mode */
+ val = PLV_KERN | CSR_CRMD_PG;
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_CRMD, val);
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_PRMD, val);
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_EUEN, 1);
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_ECFG, 0);
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_TCFG, 0);
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_ASID, 1);
+
+ /* time count start from 0 */
+ val = 0;
+ loongarch_set_reg(vcpu, KVM_REG_LOONGARCH_COUNTER, val);
+
+ width = vm->page_shift - 3;
+
+ switch (vm->mmu.pgtable_levels) {
+ case 4:
+ /* pud page shift and width */
+ val = (vm->page_shift + width * 2) << 20 | (width << 25);
+ /* fall throuth */
+ case 3:
+ /* pmd page shift and width */
+ val |= (vm->page_shift + width) << 10 | (width << 15);
+ /* pte page shift and width */
+ val |= vm->page_shift | width << 5;
+ break;
+ default:
+ TEST_FAIL("Got %u page table levels, expected 3 or 4", vm->mmu.pgtable_levels);
+ }
+
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_PWCTL0, val);
+
+ /* PGD page shift and width */
+ val = (vm->page_shift + width * (vm->mmu.pgtable_levels - 1)) | width << 6;
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_PWCTL1, val);
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_PGDL, vm->mmu.pgd);
+
+ /*
+ * Refill exception runs on real mode
+ * Entry address should be physical address
+ */
+ val = addr_gva2gpa(vm, (unsigned long)handle_tlb_refill);
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_TLBRENTRY, val);
+
+ /*
+ * General exception runs on page-enabled mode
+ * Entry address should be virtual address
+ */
+ val = (unsigned long)handle_exception;
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_EENTRY, val);
+
+ loongarch_get_csr(vcpu, LOONGARCH_CSR_TLBIDX, &val);
+ val &= ~CSR_TLBIDX_SIZEM;
+ val |= PS_DEFAULT_SIZE << CSR_TLBIDX_SIZE;
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_TLBIDX, val);
+
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_STLBPGSIZE, PS_DEFAULT_SIZE);
+
+ /* LOONGARCH_CSR_KS1 is used for exception stack */
+ val = __vm_alloc(vm, vm->page_size, LOONGARCH_GUEST_STACK_VADDR_MIN,
+ MEM_REGION_DATA);
+ TEST_ASSERT(val != 0, "No memory for exception stack");
+ val = val + vm->page_size;
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_KS1, val);
+
+ loongarch_get_csr(vcpu, LOONGARCH_CSR_TLBREHI, &val);
+ val &= ~CSR_TLBREHI_PS;
+ val |= PS_DEFAULT_SIZE << CSR_TLBREHI_PS_SHIFT;
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_TLBREHI, val);
+
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_CPUID, vcpu->id);
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_TMID, vcpu->id);
+}
+
+struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, u32 vcpu_id)
+{
+ size_t stack_size;
+ u64 stack_gva;
+ struct kvm_regs regs;
+ struct kvm_vcpu *vcpu;
+
+ vcpu = __vm_vcpu_add(vm, vcpu_id);
+ stack_size = vm->page_size;
+ stack_gva = __vm_alloc(vm, stack_size,
+ LOONGARCH_GUEST_STACK_VADDR_MIN, MEM_REGION_DATA);
+ TEST_ASSERT(stack_gva != 0, "No memory for vm stack");
+
+ loongarch_vcpu_setup(vcpu);
+ /* Setup guest general purpose registers */
+ vcpu_regs_get(vcpu, &regs);
+ regs.gpr[3] = stack_gva + stack_size;
+ vcpu_regs_set(vcpu, &regs);
+
+ return vcpu;
+}
+
+void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code)
+{
+ struct kvm_regs regs;
+
+ /* Setup guest PC register */
+ vcpu_regs_get(vcpu, &regs);
+ regs.pc = (u64)guest_code;
+ vcpu_regs_set(vcpu, &regs);
+}
diff --git a/tools/testing/selftests/kvm/lib/loongarch/ucall.c b/tools/testing/selftests/kvm/lib/loongarch/ucall.c
new file mode 100644
index 000000000000..cd49a3440ead
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/loongarch/ucall.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ucall support. A ucall is a "hypercall to userspace".
+ *
+ */
+#include "kvm_util.h"
+
+/*
+ * ucall_exit_mmio_addr holds per-VM values (global data is duplicated by each
+ * VM), it must not be accessed from host code.
+ */
+gva_t *ucall_exit_mmio_addr;
+
+void ucall_arch_init(struct kvm_vm *vm, gpa_t mmio_gpa)
+{
+ gva_t mmio_gva = vm_unused_gva_gap(vm, vm->page_size, KVM_UTIL_MIN_VADDR);
+
+ virt_map(vm, mmio_gva, mmio_gpa, 1);
+
+ vm->ucall_mmio_addr = mmio_gpa;
+
+ write_guest_global(vm, ucall_exit_mmio_addr, (gva_t *)mmio_gva);
+}
+
+void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
+{
+ struct kvm_run *run = vcpu->run;
+
+ if (run->exit_reason == KVM_EXIT_MMIO &&
+ run->mmio.phys_addr == vcpu->vm->ucall_mmio_addr) {
+ TEST_ASSERT(run->mmio.is_write && run->mmio.len == sizeof(u64),
+ "Unexpected ucall exit mmio address access");
+
+ return (void *)(*((u64 *)run->mmio.data));
+ }
+
+ return NULL;
+}
diff --git a/tools/testing/selftests/kvm/lib/lru_gen_util.c b/tools/testing/selftests/kvm/lib/lru_gen_util.c
new file mode 100644
index 000000000000..46a14fd63d9e
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/lru_gen_util.c
@@ -0,0 +1,387 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2025, Google LLC.
+ */
+
+#include <time.h>
+
+#include "lru_gen_util.h"
+
+/*
+ * Tracks state while we parse memcg lru_gen stats. The file we're parsing is
+ * structured like this (some extra whitespace elided):
+ *
+ * memcg (id) (path)
+ * node (id)
+ * (gen_nr) (age_in_ms) (nr_anon_pages) (nr_file_pages)
+ */
+struct memcg_stats_parse_context {
+ bool consumed; /* Whether or not this line was consumed */
+ /* Next parse handler to invoke */
+ void (*next_handler)(struct memcg_stats *stats,
+ struct memcg_stats_parse_context *ctx,
+ char *line);
+ int current_node_idx; /* Current index in nodes array */
+ const char *name; /* The name of the memcg we're looking for */
+};
+
+static void memcg_stats_handle_searching(struct memcg_stats *stats,
+ struct memcg_stats_parse_context *ctx,
+ char *line);
+static void memcg_stats_handle_in_memcg(struct memcg_stats *stats,
+ struct memcg_stats_parse_context *ctx,
+ char *line);
+static void memcg_stats_handle_in_node(struct memcg_stats *stats,
+ struct memcg_stats_parse_context *ctx,
+ char *line);
+
+struct split_iterator {
+ char *str;
+ char *save;
+};
+
+static char *split_next(struct split_iterator *it)
+{
+ char *ret = strtok_r(it->str, " \t\n\r", &it->save);
+
+ it->str = NULL;
+ return ret;
+}
+
+static void memcg_stats_handle_searching(struct memcg_stats *stats,
+ struct memcg_stats_parse_context *ctx,
+ char *line)
+{
+ struct split_iterator it = { .str = line };
+ char *prefix = split_next(&it);
+ char *memcg_id = split_next(&it);
+ char *memcg_name = split_next(&it);
+ char *end;
+
+ ctx->consumed = true;
+
+ if (!prefix || strcmp("memcg", prefix))
+ return; /* Not a memcg line (maybe empty), skip */
+
+ TEST_ASSERT(memcg_id && memcg_name,
+ "malformed memcg line; no memcg id or memcg_name");
+
+ if (strcmp(memcg_name + 1, ctx->name))
+ return; /* Wrong memcg, skip */
+
+ /* Found it! */
+
+ stats->memcg_id = strtoul(memcg_id, &end, 10);
+ TEST_ASSERT(*end == '\0', "malformed memcg id '%s'", memcg_id);
+ if (!stats->memcg_id)
+ return; /* Removed memcg? */
+
+ ctx->next_handler = memcg_stats_handle_in_memcg;
+}
+
+static void memcg_stats_handle_in_memcg(struct memcg_stats *stats,
+ struct memcg_stats_parse_context *ctx,
+ char *line)
+{
+ struct split_iterator it = { .str = line };
+ char *prefix = split_next(&it);
+ char *id = split_next(&it);
+ long found_node_id;
+ char *end;
+
+ ctx->consumed = true;
+ ctx->current_node_idx = -1;
+
+ if (!prefix)
+ return; /* Skip empty lines */
+
+ if (!strcmp("memcg", prefix)) {
+ /* Memcg done, found next one; stop. */
+ ctx->next_handler = NULL;
+ return;
+ } else if (strcmp("node", prefix))
+ TEST_ASSERT(false, "found malformed line after 'memcg ...',"
+ "token: '%s'", prefix);
+
+ /* At this point we know we have a node line. Parse the ID. */
+
+ TEST_ASSERT(id, "malformed node line; no node id");
+
+ found_node_id = strtol(id, &end, 10);
+ TEST_ASSERT(*end == '\0', "malformed node id '%s'", id);
+
+ ctx->current_node_idx = stats->nr_nodes++;
+ TEST_ASSERT(ctx->current_node_idx < MAX_NR_NODES,
+ "memcg has stats for too many nodes, max is %d",
+ MAX_NR_NODES);
+ stats->nodes[ctx->current_node_idx].node = found_node_id;
+
+ ctx->next_handler = memcg_stats_handle_in_node;
+}
+
+static void memcg_stats_handle_in_node(struct memcg_stats *stats,
+ struct memcg_stats_parse_context *ctx,
+ char *line)
+{
+ char *my_line = strdup(line);
+ struct split_iterator it = { .str = my_line };
+ char *gen, *age, *nr_anon, *nr_file;
+ struct node_stats *node_stats;
+ struct generation_stats *gen_stats;
+ char *end;
+
+ TEST_ASSERT(it.str, "failed to copy input line");
+
+ gen = split_next(&it);
+
+ if (!gen)
+ goto out_consume; /* Skip empty lines */
+
+ if (!strcmp("memcg", gen) || !strcmp("node", gen)) {
+ /*
+ * Reached next memcg or node section. Don't consume, let the
+ * other handler deal with this.
+ */
+ ctx->next_handler = memcg_stats_handle_in_memcg;
+ goto out;
+ }
+
+ node_stats = &stats->nodes[ctx->current_node_idx];
+ TEST_ASSERT(node_stats->nr_gens < MAX_NR_GENS,
+ "found too many generation lines; max is %d",
+ MAX_NR_GENS);
+ gen_stats = &node_stats->gens[node_stats->nr_gens++];
+
+ age = split_next(&it);
+ nr_anon = split_next(&it);
+ nr_file = split_next(&it);
+
+ TEST_ASSERT(age && nr_anon && nr_file,
+ "malformed generation line; not enough tokens");
+
+ gen_stats->gen = (int)strtol(gen, &end, 10);
+ TEST_ASSERT(*end == '\0', "malformed generation number '%s'", gen);
+
+ gen_stats->age_ms = strtol(age, &end, 10);
+ TEST_ASSERT(*end == '\0', "malformed generation age '%s'", age);
+
+ gen_stats->nr_anon = strtol(nr_anon, &end, 10);
+ TEST_ASSERT(*end == '\0', "malformed anonymous page count '%s'",
+ nr_anon);
+
+ gen_stats->nr_file = strtol(nr_file, &end, 10);
+ TEST_ASSERT(*end == '\0', "malformed file page count '%s'", nr_file);
+
+out_consume:
+ ctx->consumed = true;
+out:
+ free(my_line);
+}
+
+static void print_memcg_stats(const struct memcg_stats *stats, const char *name)
+{
+ int node, gen;
+
+ pr_debug("stats for memcg %s (id %lu):\n", name, stats->memcg_id);
+ for (node = 0; node < stats->nr_nodes; ++node) {
+ pr_debug("\tnode %d\n", stats->nodes[node].node);
+ for (gen = 0; gen < stats->nodes[node].nr_gens; ++gen) {
+ const struct generation_stats *gstats =
+ &stats->nodes[node].gens[gen];
+
+ pr_debug("\t\tgen %d\tage_ms %ld"
+ "\tnr_anon %ld\tnr_file %ld\n",
+ gstats->gen, gstats->age_ms, gstats->nr_anon,
+ gstats->nr_file);
+ }
+ }
+}
+
+/* Re-read lru_gen debugfs information for @memcg into @stats. */
+void lru_gen_read_memcg_stats(struct memcg_stats *stats, const char *memcg)
+{
+ FILE *f;
+ ssize_t read = 0;
+ char *line = NULL;
+ size_t bufsz;
+ struct memcg_stats_parse_context ctx = {
+ .next_handler = memcg_stats_handle_searching,
+ .name = memcg,
+ };
+
+ memset(stats, 0, sizeof(struct memcg_stats));
+
+ f = fopen(LRU_GEN_DEBUGFS, "r");
+ TEST_ASSERT(f, "fopen(%s) failed", LRU_GEN_DEBUGFS);
+
+ while (ctx.next_handler && (read = getline(&line, &bufsz, f)) > 0) {
+ ctx.consumed = false;
+
+ do {
+ ctx.next_handler(stats, &ctx, line);
+ if (!ctx.next_handler)
+ break;
+ } while (!ctx.consumed);
+ }
+
+ if (read < 0 && !feof(f))
+ TEST_ASSERT(false, "getline(%s) failed", LRU_GEN_DEBUGFS);
+
+ TEST_ASSERT(stats->memcg_id > 0, "Couldn't find memcg: %s\n"
+ "Did the memcg get created in the proper mount?",
+ memcg);
+ if (line)
+ free(line);
+ TEST_ASSERT(!fclose(f), "fclose(%s) failed", LRU_GEN_DEBUGFS);
+
+ print_memcg_stats(stats, memcg);
+}
+
+/*
+ * Find all pages tracked by lru_gen for this memcg in generation @target_gen.
+ *
+ * If @target_gen is negative, look for all generations.
+ */
+long lru_gen_sum_memcg_stats_for_gen(int target_gen,
+ const struct memcg_stats *stats)
+{
+ int node, gen;
+ long total_nr = 0;
+
+ for (node = 0; node < stats->nr_nodes; ++node) {
+ const struct node_stats *node_stats = &stats->nodes[node];
+
+ for (gen = 0; gen < node_stats->nr_gens; ++gen) {
+ const struct generation_stats *gen_stats =
+ &node_stats->gens[gen];
+
+ if (target_gen >= 0 && gen_stats->gen != target_gen)
+ continue;
+
+ total_nr += gen_stats->nr_anon + gen_stats->nr_file;
+ }
+ }
+
+ return total_nr;
+}
+
+/* Find all pages tracked by lru_gen for this memcg. */
+long lru_gen_sum_memcg_stats(const struct memcg_stats *stats)
+{
+ return lru_gen_sum_memcg_stats_for_gen(-1, stats);
+}
+
+/*
+ * If lru_gen aging should force page table scanning.
+ *
+ * If you want to set this to false, you will need to do eviction
+ * before doing extra aging passes.
+ */
+static const bool force_scan = true;
+
+static void run_aging_impl(unsigned long memcg_id, int node_id, int max_gen)
+{
+ FILE *f = fopen(LRU_GEN_DEBUGFS, "w");
+ char *command;
+ size_t sz;
+
+ TEST_ASSERT(f, "fopen(%s) failed", LRU_GEN_DEBUGFS);
+ sz = asprintf(&command, "+ %lu %d %d 1 %d\n",
+ memcg_id, node_id, max_gen, force_scan);
+ TEST_ASSERT(sz > 0, "creating aging command failed");
+
+ pr_debug("Running aging command: %s", command);
+ if (fwrite(command, sizeof(char), sz, f) < sz) {
+ TEST_ASSERT(false, "writing aging command %s to %s failed",
+ command, LRU_GEN_DEBUGFS);
+ }
+
+ TEST_ASSERT(!fclose(f), "fclose(%s) failed", LRU_GEN_DEBUGFS);
+}
+
+void lru_gen_do_aging(struct memcg_stats *stats, const char *memcg)
+{
+ int node, gen;
+
+ pr_debug("lru_gen: invoking aging...\n");
+
+ /* Must read memcg stats to construct the proper aging command. */
+ lru_gen_read_memcg_stats(stats, memcg);
+
+ for (node = 0; node < stats->nr_nodes; ++node) {
+ int max_gen = 0;
+
+ for (gen = 0; gen < stats->nodes[node].nr_gens; ++gen) {
+ int this_gen = stats->nodes[node].gens[gen].gen;
+
+ max_gen = max_gen > this_gen ? max_gen : this_gen;
+ }
+
+ run_aging_impl(stats->memcg_id, stats->nodes[node].node,
+ max_gen);
+ }
+
+ /* Re-read so callers get updated information */
+ lru_gen_read_memcg_stats(stats, memcg);
+}
+
+/*
+ * Find which generation contains at least @pages pages, assuming that
+ * such a generation exists.
+ */
+int lru_gen_find_generation(const struct memcg_stats *stats,
+ unsigned long pages)
+{
+ int node, gen, gen_idx, min_gen = INT_MAX, max_gen = -1;
+
+ for (node = 0; node < stats->nr_nodes; ++node)
+ for (gen_idx = 0; gen_idx < stats->nodes[node].nr_gens;
+ ++gen_idx) {
+ gen = stats->nodes[node].gens[gen_idx].gen;
+ max_gen = gen > max_gen ? gen : max_gen;
+ min_gen = gen < min_gen ? gen : min_gen;
+ }
+
+ for (gen = min_gen; gen <= max_gen; ++gen)
+ /* See if this generation has enough pages. */
+ if (lru_gen_sum_memcg_stats_for_gen(gen, stats) > pages)
+ return gen;
+
+ return -1;
+}
+
+bool lru_gen_usable(void)
+{
+ long required_features = LRU_GEN_ENABLED | LRU_GEN_MM_WALK;
+ int lru_gen_fd, lru_gen_debug_fd;
+ char mglru_feature_str[8] = {};
+ long mglru_features;
+
+ lru_gen_fd = open(LRU_GEN_ENABLED_PATH, O_RDONLY);
+ if (lru_gen_fd < 0) {
+ puts("lru_gen: Could not open " LRU_GEN_ENABLED_PATH);
+ return false;
+ }
+ if (read(lru_gen_fd, &mglru_feature_str, 7) < 7) {
+ puts("lru_gen: Could not read from " LRU_GEN_ENABLED_PATH);
+ close(lru_gen_fd);
+ return false;
+ }
+ close(lru_gen_fd);
+
+ mglru_features = strtol(mglru_feature_str, NULL, 16);
+ if ((mglru_features & required_features) != required_features) {
+ printf("lru_gen: missing features, got: 0x%lx, expected: 0x%lx\n",
+ mglru_features, required_features);
+ printf("lru_gen: Try 'echo 0x%lx > /sys/kernel/mm/lru_gen/enabled'\n",
+ required_features);
+ return false;
+ }
+
+ lru_gen_debug_fd = open(LRU_GEN_DEBUGFS, O_RDWR);
+ __TEST_REQUIRE(lru_gen_debug_fd >= 0,
+ "lru_gen: Could not open " LRU_GEN_DEBUGFS ", "
+ "but lru_gen is enabled, so cannot use page_idle.");
+ close(lru_gen_debug_fd);
+ return true;
+}
diff --git a/tools/testing/selftests/kvm/lib/memstress.c b/tools/testing/selftests/kvm/lib/memstress.c
index 313277486a1d..6dcd15910a06 100644
--- a/tools/testing/selftests/kvm/lib/memstress.c
+++ b/tools/testing/selftests/kvm/lib/memstress.c
@@ -16,7 +16,7 @@ struct memstress_args memstress_args;
* Guest virtual memory offset of the testing memory slot.
* Must not conflict with identity mapped test code.
*/
-static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM;
+static u64 guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM;
struct vcpu_thread {
/* The index of the vCPU. */
@@ -44,15 +44,15 @@ static struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
* Continuously write to the first 8 bytes of each page in the
* specified region.
*/
-void memstress_guest_code(uint32_t vcpu_idx)
+void memstress_guest_code(u32 vcpu_idx)
{
struct memstress_args *args = &memstress_args;
struct memstress_vcpu_args *vcpu_args = &args->vcpu_args[vcpu_idx];
struct guest_random_state rand_state;
- uint64_t gva;
- uint64_t pages;
- uint64_t addr;
- uint64_t page;
+ gva_t gva;
+ u64 pages;
+ u64 addr;
+ u64 page;
int i;
rand_state = new_guest_random_state(guest_random_seed + vcpu_idx);
@@ -76,9 +76,9 @@ void memstress_guest_code(uint32_t vcpu_idx)
addr = gva + (page * args->guest_page_size);
if (__guest_random_bool(&rand_state, args->write_percent))
- *(uint64_t *)addr = 0x0123456789ABCDEF;
+ *(u64 *)addr = 0x0123456789ABCDEF;
else
- READ_ONCE(*(uint64_t *)addr);
+ READ_ONCE(*(u64 *)addr);
}
GUEST_SYNC(1);
@@ -87,7 +87,7 @@ void memstress_guest_code(uint32_t vcpu_idx)
void memstress_setup_vcpus(struct kvm_vm *vm, int nr_vcpus,
struct kvm_vcpu *vcpus[],
- uint64_t vcpu_memory_bytes,
+ u64 vcpu_memory_bytes,
bool partition_vcpu_memory_access)
{
struct memstress_args *args = &memstress_args;
@@ -122,15 +122,15 @@ void memstress_setup_vcpus(struct kvm_vm *vm, int nr_vcpus,
}
struct kvm_vm *memstress_create_vm(enum vm_guest_mode mode, int nr_vcpus,
- uint64_t vcpu_memory_bytes, int slots,
+ u64 vcpu_memory_bytes, int slots,
enum vm_mem_backing_src_type backing_src,
bool partition_vcpu_memory_access)
{
struct memstress_args *args = &memstress_args;
struct kvm_vm *vm;
- uint64_t guest_num_pages, slot0_pages = 0;
- uint64_t backing_src_pagesz = get_backing_src_pagesz(backing_src);
- uint64_t region_end_gfn;
+ u64 guest_num_pages, slot0_pages = 0;
+ u64 backing_src_pagesz = get_backing_src_pagesz(backing_src);
+ u64 region_end_gfn;
int i;
pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode));
@@ -196,18 +196,14 @@ struct kvm_vm *memstress_create_vm(enum vm_guest_mode mode, int nr_vcpus,
args->gpa = (region_end_gfn - guest_num_pages - 1) * args->guest_page_size;
args->gpa = align_down(args->gpa, backing_src_pagesz);
-#ifdef __s390x__
- /* Align to 1M (segment size) */
- args->gpa = align_down(args->gpa, 1 << 20);
-#endif
args->size = guest_num_pages * args->guest_page_size;
pr_info("guest physical test memory: [0x%lx, 0x%lx)\n",
args->gpa, args->gpa + args->size);
/* Add extra memory slots for testing */
for (i = 0; i < slots; i++) {
- uint64_t region_pages = guest_num_pages / slots;
- vm_paddr_t region_start = args->gpa + region_pages * args->guest_page_size * i;
+ u64 region_pages = guest_num_pages / slots;
+ gpa_t region_start = args->gpa + region_pages * args->guest_page_size * i;
vm_userspace_mem_region_add(vm, backing_src, region_start,
MEMSTRESS_MEM_SLOT_INDEX + i,
@@ -236,7 +232,7 @@ void memstress_destroy_vm(struct kvm_vm *vm)
kvm_vm_free(vm);
}
-void memstress_set_write_percent(struct kvm_vm *vm, uint32_t write_percent)
+void memstress_set_write_percent(struct kvm_vm *vm, u32 write_percent)
{
memstress_args.write_percent = write_percent;
sync_global_to_guest(vm, memstress_args.write_percent);
@@ -248,7 +244,7 @@ void memstress_set_random_access(struct kvm_vm *vm, bool random_access)
sync_global_to_guest(vm, memstress_args.random_access);
}
-uint64_t __weak memstress_nested_pages(int nr_vcpus)
+u64 __weak memstress_nested_pages(int nr_vcpus)
{
return 0;
}
@@ -265,7 +261,7 @@ static void *vcpu_thread_main(void *data)
int vcpu_idx = vcpu->vcpu_idx;
if (memstress_args.pin_vcpus)
- kvm_pin_this_task_to_pcpu(memstress_args.vcpu_to_pcpu[vcpu_idx]);
+ pin_self_to_cpu(memstress_args.vcpu_to_pcpu[vcpu_idx]);
WRITE_ONCE(vcpu->running, true);
@@ -353,7 +349,7 @@ void memstress_get_dirty_log(struct kvm_vm *vm, unsigned long *bitmaps[], int sl
}
void memstress_clear_dirty_log(struct kvm_vm *vm, unsigned long *bitmaps[],
- int slots, uint64_t pages_per_slot)
+ int slots, u64 pages_per_slot)
{
int i;
@@ -364,7 +360,7 @@ void memstress_clear_dirty_log(struct kvm_vm *vm, unsigned long *bitmaps[],
}
}
-unsigned long **memstress_alloc_bitmaps(int slots, uint64_t pages_per_slot)
+unsigned long **memstress_alloc_bitmaps(int slots, u64 pages_per_slot)
{
unsigned long **bitmaps;
int i;
diff --git a/tools/testing/selftests/kvm/lib/riscv/handlers.S b/tools/testing/selftests/kvm/lib/riscv/handlers.S
index aa0abd3f35bb..b787b982e922 100644
--- a/tools/testing/selftests/kvm/lib/riscv/handlers.S
+++ b/tools/testing/selftests/kvm/lib/riscv/handlers.S
@@ -10,85 +10,88 @@
#include <asm/csr.h>
.macro save_context
- addi sp, sp, (-8*34)
- sd x1, 0(sp)
- sd x2, 8(sp)
- sd x3, 16(sp)
- sd x4, 24(sp)
- sd x5, 32(sp)
- sd x6, 40(sp)
- sd x7, 48(sp)
- sd x8, 56(sp)
- sd x9, 64(sp)
- sd x10, 72(sp)
- sd x11, 80(sp)
- sd x12, 88(sp)
- sd x13, 96(sp)
- sd x14, 104(sp)
- sd x15, 112(sp)
- sd x16, 120(sp)
- sd x17, 128(sp)
- sd x18, 136(sp)
- sd x19, 144(sp)
- sd x20, 152(sp)
- sd x21, 160(sp)
- sd x22, 168(sp)
- sd x23, 176(sp)
- sd x24, 184(sp)
- sd x25, 192(sp)
- sd x26, 200(sp)
- sd x27, 208(sp)
- sd x28, 216(sp)
- sd x29, 224(sp)
- sd x30, 232(sp)
- sd x31, 240(sp)
+ addi sp, sp, (-8*36)
+ sd x1, 8(sp)
+ sd x2, 16(sp)
+ sd x3, 24(sp)
+ sd x4, 32(sp)
+ sd x5, 40(sp)
+ sd x6, 48(sp)
+ sd x7, 56(sp)
+ sd x8, 64(sp)
+ sd x9, 72(sp)
+ sd x10, 80(sp)
+ sd x11, 88(sp)
+ sd x12, 96(sp)
+ sd x13, 104(sp)
+ sd x14, 112(sp)
+ sd x15, 120(sp)
+ sd x16, 128(sp)
+ sd x17, 136(sp)
+ sd x18, 144(sp)
+ sd x19, 152(sp)
+ sd x20, 160(sp)
+ sd x21, 168(sp)
+ sd x22, 176(sp)
+ sd x23, 184(sp)
+ sd x24, 192(sp)
+ sd x25, 200(sp)
+ sd x26, 208(sp)
+ sd x27, 216(sp)
+ sd x28, 224(sp)
+ sd x29, 232(sp)
+ sd x30, 240(sp)
+ sd x31, 248(sp)
csrr s0, CSR_SEPC
csrr s1, CSR_SSTATUS
- csrr s2, CSR_SCAUSE
- sd s0, 248(sp)
+ csrr s2, CSR_STVAL
+ csrr s3, CSR_SCAUSE
+ sd s0, 0(sp)
sd s1, 256(sp)
sd s2, 264(sp)
+ sd s3, 272(sp)
.endm
.macro restore_context
+ ld s3, 272(sp)
ld s2, 264(sp)
ld s1, 256(sp)
- ld s0, 248(sp)
- csrw CSR_SCAUSE, s2
+ ld s0, 0(sp)
+ csrw CSR_SCAUSE, s3
csrw CSR_SSTATUS, s1
csrw CSR_SEPC, s0
- ld x31, 240(sp)
- ld x30, 232(sp)
- ld x29, 224(sp)
- ld x28, 216(sp)
- ld x27, 208(sp)
- ld x26, 200(sp)
- ld x25, 192(sp)
- ld x24, 184(sp)
- ld x23, 176(sp)
- ld x22, 168(sp)
- ld x21, 160(sp)
- ld x20, 152(sp)
- ld x19, 144(sp)
- ld x18, 136(sp)
- ld x17, 128(sp)
- ld x16, 120(sp)
- ld x15, 112(sp)
- ld x14, 104(sp)
- ld x13, 96(sp)
- ld x12, 88(sp)
- ld x11, 80(sp)
- ld x10, 72(sp)
- ld x9, 64(sp)
- ld x8, 56(sp)
- ld x7, 48(sp)
- ld x6, 40(sp)
- ld x5, 32(sp)
- ld x4, 24(sp)
- ld x3, 16(sp)
- ld x2, 8(sp)
- ld x1, 0(sp)
- addi sp, sp, (8*34)
+ ld x31, 248(sp)
+ ld x30, 240(sp)
+ ld x29, 232(sp)
+ ld x28, 224(sp)
+ ld x27, 216(sp)
+ ld x26, 208(sp)
+ ld x25, 200(sp)
+ ld x24, 192(sp)
+ ld x23, 184(sp)
+ ld x22, 176(sp)
+ ld x21, 168(sp)
+ ld x20, 160(sp)
+ ld x19, 152(sp)
+ ld x18, 144(sp)
+ ld x17, 136(sp)
+ ld x16, 128(sp)
+ ld x15, 120(sp)
+ ld x14, 112(sp)
+ ld x13, 104(sp)
+ ld x12, 96(sp)
+ ld x11, 88(sp)
+ ld x10, 80(sp)
+ ld x9, 72(sp)
+ ld x8, 64(sp)
+ ld x7, 56(sp)
+ ld x6, 48(sp)
+ ld x5, 40(sp)
+ ld x4, 32(sp)
+ ld x3, 24(sp)
+ ld x2, 16(sp)
+ ld x1, 8(sp)
+ addi sp, sp, (8*36)
.endm
.balign 4
diff --git a/tools/testing/selftests/kvm/lib/riscv/processor.c b/tools/testing/selftests/kvm/lib/riscv/processor.c
index dd663bcf0cc0..ded5429f3448 100644
--- a/tools/testing/selftests/kvm/lib/riscv/processor.c
+++ b/tools/testing/selftests/kvm/lib/riscv/processor.c
@@ -8,15 +8,16 @@
#include <linux/compiler.h>
#include <assert.h>
+#include "guest_modes.h"
#include "kvm_util.h"
#include "processor.h"
#include "ucall_common.h"
#define DEFAULT_RISCV_GUEST_STACK_VADDR_MIN 0xac0000
-static vm_vaddr_t exception_handlers;
+static gva_t exception_handlers;
-bool __vcpu_has_ext(struct kvm_vcpu *vcpu, uint64_t ext)
+bool __vcpu_has_ext(struct kvm_vcpu *vcpu, u64 ext)
{
unsigned long value = 0;
int ret;
@@ -26,41 +27,36 @@ bool __vcpu_has_ext(struct kvm_vcpu *vcpu, uint64_t ext)
return !ret && !!value;
}
-static uint64_t page_align(struct kvm_vm *vm, uint64_t v)
-{
- return (v + vm->page_size) & ~(vm->page_size - 1);
-}
-
-static uint64_t pte_addr(struct kvm_vm *vm, uint64_t entry)
+static u64 pte_addr(struct kvm_vm *vm, u64 entry)
{
return ((entry & PGTBL_PTE_ADDR_MASK) >> PGTBL_PTE_ADDR_SHIFT) <<
PGTBL_PAGE_SIZE_SHIFT;
}
-static uint64_t ptrs_per_pte(struct kvm_vm *vm)
+static u64 ptrs_per_pte(struct kvm_vm *vm)
{
- return PGTBL_PAGE_SIZE / sizeof(uint64_t);
+ return PGTBL_PAGE_SIZE / sizeof(u64);
}
-static uint64_t pte_index_mask[] = {
+static u64 pte_index_mask[] = {
PGTBL_L0_INDEX_MASK,
PGTBL_L1_INDEX_MASK,
PGTBL_L2_INDEX_MASK,
PGTBL_L3_INDEX_MASK,
};
-static uint32_t pte_index_shift[] = {
+static u32 pte_index_shift[] = {
PGTBL_L0_INDEX_SHIFT,
PGTBL_L1_INDEX_SHIFT,
PGTBL_L2_INDEX_SHIFT,
PGTBL_L3_INDEX_SHIFT,
};
-static uint64_t pte_index(struct kvm_vm *vm, vm_vaddr_t gva, int level)
+static u64 pte_index(struct kvm_vm *vm, gva_t gva, int level)
{
TEST_ASSERT(level > -1,
"Negative page table level (%d) not possible", level);
- TEST_ASSERT(level < vm->pgtable_levels,
+ TEST_ASSERT(level < vm->mmu.pgtable_levels,
"Invalid page table level (%d)", level);
return (gva & pte_index_mask[level]) >> pte_index_shift[level];
@@ -68,37 +64,36 @@ static uint64_t pte_index(struct kvm_vm *vm, vm_vaddr_t gva, int level)
void virt_arch_pgd_alloc(struct kvm_vm *vm)
{
- size_t nr_pages = page_align(vm, ptrs_per_pte(vm) * 8) / vm->page_size;
+ size_t nr_pages = vm_page_align(vm, ptrs_per_pte(vm) * 8) / vm->page_size;
- if (vm->pgd_created)
+ if (vm->mmu.pgd_created)
return;
- vm->pgd = vm_phy_pages_alloc(vm, nr_pages,
- KVM_GUEST_PAGE_TABLE_MIN_PADDR,
- vm->memslots[MEM_REGION_PT]);
- vm->pgd_created = true;
+ vm->mmu.pgd = vm_phy_pages_alloc(vm, nr_pages,
+ KVM_GUEST_PAGE_TABLE_MIN_PADDR,
+ vm->memslots[MEM_REGION_PT]);
+ vm->mmu.pgd_created = true;
}
-void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
+void virt_arch_pg_map(struct kvm_vm *vm, gva_t gva, gpa_t gpa)
{
- uint64_t *ptep, next_ppn;
- int level = vm->pgtable_levels - 1;
+ u64 *ptep, next_ppn;
+ int level = vm->mmu.pgtable_levels - 1;
- TEST_ASSERT((vaddr % vm->page_size) == 0,
+ TEST_ASSERT((gva % vm->page_size) == 0,
"Virtual address not on page boundary,\n"
- " vaddr: 0x%lx vm->page_size: 0x%x", vaddr, vm->page_size);
- TEST_ASSERT(sparsebit_is_set(vm->vpages_valid,
- (vaddr >> vm->page_shift)),
- "Invalid virtual address, vaddr: 0x%lx", vaddr);
- TEST_ASSERT((paddr % vm->page_size) == 0,
+ " gva: 0x%lx vm->page_size: 0x%x", gva, vm->page_size);
+ TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, (gva >> vm->page_shift)),
+ "Invalid virtual address, gva: 0x%lx", gva);
+ TEST_ASSERT((gpa % vm->page_size) == 0,
"Physical address not on page boundary,\n"
- " paddr: 0x%lx vm->page_size: 0x%x", paddr, vm->page_size);
- TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
+ " gpa: 0x%lx vm->page_size: 0x%x", gpa, vm->page_size);
+ TEST_ASSERT((gpa >> vm->page_shift) <= vm->max_gfn,
"Physical address beyond maximum supported,\n"
- " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
- paddr, vm->max_gfn, vm->page_size);
+ " gpa: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
+ gpa, vm->max_gfn, vm->page_size);
- ptep = addr_gpa2hva(vm, vm->pgd) + pte_index(vm, vaddr, level) * 8;
+ ptep = addr_gpa2hva(vm, vm->mmu.pgd) + pte_index(vm, gva, level) * 8;
if (!*ptep) {
next_ppn = vm_alloc_page_table(vm) >> PGTBL_PAGE_SIZE_SHIFT;
*ptep = (next_ppn << PGTBL_PTE_ADDR_SHIFT) |
@@ -108,7 +103,7 @@ void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
while (level > -1) {
ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) +
- pte_index(vm, vaddr, level) * 8;
+ pte_index(vm, gva, level) * 8;
if (!*ptep && level > 0) {
next_ppn = vm_alloc_page_table(vm) >>
PGTBL_PAGE_SIZE_SHIFT;
@@ -118,20 +113,20 @@ void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
level--;
}
- paddr = paddr >> PGTBL_PAGE_SIZE_SHIFT;
- *ptep = (paddr << PGTBL_PTE_ADDR_SHIFT) |
+ gpa = gpa >> PGTBL_PAGE_SIZE_SHIFT;
+ *ptep = (gpa << PGTBL_PTE_ADDR_SHIFT) |
PGTBL_PTE_PERM_MASK | PGTBL_PTE_VALID_MASK;
}
-vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
+gpa_t addr_arch_gva2gpa(struct kvm_vm *vm, gva_t gva)
{
- uint64_t *ptep;
- int level = vm->pgtable_levels - 1;
+ u64 *ptep;
+ int level = vm->mmu.pgtable_levels - 1;
- if (!vm->pgd_created)
+ if (!vm->mmu.pgd_created)
goto unmapped_gva;
- ptep = addr_gpa2hva(vm, vm->pgd) + pte_index(vm, gva, level) * 8;
+ ptep = addr_gpa2hva(vm, vm->mmu.pgd) + pte_index(vm, gva, level) * 8;
if (!ptep)
goto unmapped_gva;
level--;
@@ -152,12 +147,12 @@ unmapped_gva:
exit(1);
}
-static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent,
- uint64_t page, int level)
+static void pte_dump(FILE *stream, struct kvm_vm *vm, u8 indent,
+ u64 page, int level)
{
#ifdef DEBUG
static const char *const type[] = { "pte", "pmd", "pud", "p4d"};
- uint64_t pte, *ptep;
+ u64 pte, *ptep;
if (level < 0)
return;
@@ -174,15 +169,16 @@ static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent,
#endif
}
-void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
+void virt_arch_dump(FILE *stream, struct kvm_vm *vm, u8 indent)
{
- int level = vm->pgtable_levels - 1;
- uint64_t pgd, *ptep;
+ struct kvm_mmu *mmu = &vm->mmu;
+ int level = mmu->pgtable_levels - 1;
+ u64 pgd, *ptep;
- if (!vm->pgd_created)
+ if (!mmu->pgd_created)
return;
- for (pgd = vm->pgd; pgd < vm->pgd + ptrs_per_pte(vm) * 8; pgd += 8) {
+ for (pgd = mmu->pgd; pgd < mmu->pgd + ptrs_per_pte(vm) * 8; pgd += 8) {
ptep = addr_gpa2hva(vm, pgd);
if (!*ptep)
continue;
@@ -197,27 +193,46 @@ void riscv_vcpu_mmu_setup(struct kvm_vcpu *vcpu)
{
struct kvm_vm *vm = vcpu->vm;
unsigned long satp;
+ unsigned long satp_mode;
+ unsigned long max_satp_mode;
/*
* The RISC-V Sv48 MMU mode supports 56-bit physical address
* for 48-bit virtual address with 4KB last level page size.
*/
switch (vm->mode) {
- case VM_MODE_P52V48_4K:
- case VM_MODE_P48V48_4K:
- case VM_MODE_P40V48_4K:
+ case VM_MODE_P56V57_4K:
+ case VM_MODE_P50V57_4K:
+ case VM_MODE_P41V57_4K:
+ satp_mode = SATP_MODE_57;
+ break;
+ case VM_MODE_P56V48_4K:
+ case VM_MODE_P50V48_4K:
+ case VM_MODE_P41V48_4K:
+ satp_mode = SATP_MODE_48;
+ break;
+ case VM_MODE_P56V39_4K:
+ case VM_MODE_P50V39_4K:
+ case VM_MODE_P41V39_4K:
+ satp_mode = SATP_MODE_39;
break;
default:
TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode);
}
- satp = (vm->pgd >> PGTBL_PAGE_SIZE_SHIFT) & SATP_PPN;
- satp |= SATP_MODE_48;
+ max_satp_mode = vcpu_get_reg(vcpu, RISCV_CONFIG_REG(satp_mode));
+
+ if ((satp_mode >> SATP_MODE_SHIFT) > max_satp_mode)
+ TEST_FAIL("Unable to set satp mode 0x%lx, max mode 0x%lx\n",
+ satp_mode >> SATP_MODE_SHIFT, max_satp_mode);
+
+ satp = (vm->mmu.pgd >> PGTBL_PAGE_SIZE_SHIFT) & SATP_PPN;
+ satp |= satp_mode;
vcpu_set_reg(vcpu, RISCV_GENERAL_CSR_REG(satp), satp);
}
-void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent)
+void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, u8 indent)
{
struct kvm_riscv_core core;
@@ -295,20 +310,20 @@ void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code)
vcpu_set_reg(vcpu, RISCV_CORE_REG(regs.pc), (unsigned long)guest_code);
}
-struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
+struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, u32 vcpu_id)
{
int r;
size_t stack_size;
- unsigned long stack_vaddr;
+ unsigned long stack_gva;
unsigned long current_gp = 0;
struct kvm_mp_state mps;
struct kvm_vcpu *vcpu;
stack_size = vm->page_size == 4096 ? DEFAULT_STACK_PGS * vm->page_size :
vm->page_size;
- stack_vaddr = __vm_vaddr_alloc(vm, stack_size,
- DEFAULT_RISCV_GUEST_STACK_VADDR_MIN,
- MEM_REGION_DATA);
+ stack_gva = __vm_alloc(vm, stack_size,
+ DEFAULT_RISCV_GUEST_STACK_VADDR_MIN,
+ MEM_REGION_DATA);
vcpu = __vm_vcpu_add(vm, vcpu_id);
riscv_vcpu_mmu_setup(vcpu);
@@ -328,7 +343,7 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
vcpu_set_reg(vcpu, RISCV_CORE_REG(regs.gp), current_gp);
/* Setup stack pointer and program counter of guest */
- vcpu_set_reg(vcpu, RISCV_CORE_REG(regs.sp), stack_vaddr + stack_size);
+ vcpu_set_reg(vcpu, RISCV_CORE_REG(regs.sp), stack_gva + stack_size);
/* Setup sscratch for guest_get_vcpuid() */
vcpu_set_reg(vcpu, RISCV_GENERAL_CSR_REG(sscratch), vcpu_id);
@@ -342,7 +357,7 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...)
{
va_list ap;
- uint64_t id = RISCV_CORE_REG(regs.a0);
+ u64 id = RISCV_CORE_REG(regs.a0);
int i;
TEST_ASSERT(num >= 1 && num <= 8, "Unsupported number of args,\n"
@@ -377,7 +392,7 @@ void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...)
id = RISCV_CORE_REG(regs.a7);
break;
}
- vcpu_set_reg(vcpu, id, va_arg(ap, uint64_t));
+ vcpu_set_reg(vcpu, id, va_arg(ap, u64));
}
va_end(ap);
@@ -402,7 +417,7 @@ struct handlers {
exception_handler_fn exception_handlers[NR_VECTORS][NR_EXCEPTIONS];
};
-void route_exception(struct ex_regs *regs)
+void route_exception(struct pt_regs *regs)
{
struct handlers *handlers = (struct handlers *)exception_handlers;
int vector = 0, ec;
@@ -433,10 +448,10 @@ void vcpu_init_vector_tables(struct kvm_vcpu *vcpu)
void vm_init_vector_tables(struct kvm_vm *vm)
{
- vm->handlers = __vm_vaddr_alloc(vm, sizeof(struct handlers),
- vm->page_size, MEM_REGION_DATA);
+ vm->handlers = __vm_alloc(vm, sizeof(struct handlers), vm->page_size,
+ MEM_REGION_DATA);
- *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers;
+ *(gva_t *)addr_gva2hva(vm, (gva_t)(&exception_handlers)) = vm->handlers;
}
void vm_install_exception_handler(struct kvm_vm *vm, int vector, exception_handler_fn handler)
@@ -454,7 +469,7 @@ void vm_install_interrupt_handler(struct kvm_vm *vm, exception_handler_fn handle
handlers->exception_handlers[1][0] = handler;
}
-uint32_t guest_get_vcpuid(void)
+u32 guest_get_vcpuid(void)
{
return csr_read(CSR_SSCRATCH);
}
@@ -515,3 +530,43 @@ unsigned long get_host_sbi_spec_version(void)
return ret.value;
}
+
+void kvm_selftest_arch_init(void)
+{
+ /*
+ * riscv64 doesn't have a true default mode, so start by detecting the
+ * supported vm mode.
+ */
+ guest_modes_append_default();
+}
+
+unsigned long riscv64_get_satp_mode(void)
+{
+ int kvm_fd, vm_fd, vcpu_fd, err;
+ u64 val;
+ struct kvm_one_reg reg = {
+ .id = RISCV_CONFIG_REG(satp_mode),
+ .addr = (u64)&val,
+ };
+
+ kvm_fd = open_kvm_dev_path_or_exit();
+ vm_fd = __kvm_ioctl(kvm_fd, KVM_CREATE_VM, NULL);
+ TEST_ASSERT(vm_fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VM, vm_fd));
+
+ vcpu_fd = ioctl(vm_fd, KVM_CREATE_VCPU, 0);
+ TEST_ASSERT(vcpu_fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VCPU, vcpu_fd));
+
+ err = ioctl(vcpu_fd, KVM_GET_ONE_REG, &reg);
+ TEST_ASSERT(err == 0, KVM_IOCTL_ERROR(KVM_GET_ONE_REG, vcpu_fd));
+
+ close(vcpu_fd);
+ close(vm_fd);
+ close(kvm_fd);
+
+ return val;
+}
+
+bool kvm_arch_has_default_irqchip(void)
+{
+ return kvm_check_cap(KVM_CAP_IRQCHIP);
+}
diff --git a/tools/testing/selftests/kvm/lib/s390/diag318_test_handler.c b/tools/testing/selftests/kvm/lib/s390/diag318_test_handler.c
index 2c432fa164f1..f5480473f192 100644
--- a/tools/testing/selftests/kvm/lib/s390/diag318_test_handler.c
+++ b/tools/testing/selftests/kvm/lib/s390/diag318_test_handler.c
@@ -13,7 +13,7 @@
static void guest_code(void)
{
- uint64_t diag318_info = 0x12345678;
+ u64 diag318_info = 0x12345678;
asm volatile ("diag %0,0,0x318\n" : : "d" (diag318_info));
}
@@ -23,13 +23,13 @@ static void guest_code(void)
* we create an ad-hoc VM here to handle the instruction then extract the
* necessary data. It is up to the caller to decide what to do with that data.
*/
-static uint64_t diag318_handler(void)
+static u64 diag318_handler(void)
{
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
struct kvm_run *run;
- uint64_t reg;
- uint64_t diag318_info;
+ u64 reg;
+ u64 diag318_info;
vm = vm_create_with_one_vcpu(&vcpu, guest_code);
vcpu_run(vcpu);
@@ -51,9 +51,9 @@ static uint64_t diag318_handler(void)
return diag318_info;
}
-uint64_t get_diag318_info(void)
+u64 get_diag318_info(void)
{
- static uint64_t diag318_info;
+ static u64 diag318_info;
static bool printed_skip;
/*
diff --git a/tools/testing/selftests/kvm/lib/s390/facility.c b/tools/testing/selftests/kvm/lib/s390/facility.c
index d540812d911a..9a778054f07f 100644
--- a/tools/testing/selftests/kvm/lib/s390/facility.c
+++ b/tools/testing/selftests/kvm/lib/s390/facility.c
@@ -10,5 +10,5 @@
#include "facility.h"
-uint64_t stfl_doublewords[NB_STFL_DOUBLEWORDS];
+u64 stfl_doublewords[NB_STFL_DOUBLEWORDS];
bool stfle_flag;
diff --git a/tools/testing/selftests/kvm/lib/s390/processor.c b/tools/testing/selftests/kvm/lib/s390/processor.c
index 20cfe970e3e3..a9adb3782b35 100644
--- a/tools/testing/selftests/kvm/lib/s390/processor.c
+++ b/tools/testing/selftests/kvm/lib/s390/processor.c
@@ -12,21 +12,21 @@
void virt_arch_pgd_alloc(struct kvm_vm *vm)
{
- vm_paddr_t paddr;
+ gpa_t gpa;
TEST_ASSERT(vm->page_size == PAGE_SIZE, "Unsupported page size: 0x%x",
vm->page_size);
- if (vm->pgd_created)
+ if (vm->mmu.pgd_created)
return;
- paddr = vm_phy_pages_alloc(vm, PAGES_PER_REGION,
+ gpa = vm_phy_pages_alloc(vm, PAGES_PER_REGION,
KVM_GUEST_PAGE_TABLE_MIN_PADDR,
vm->memslots[MEM_REGION_PT]);
- memset(addr_gpa2hva(vm, paddr), 0xff, PAGES_PER_REGION * vm->page_size);
+ memset(addr_gpa2hva(vm, gpa), 0xff, PAGES_PER_REGION * vm->page_size);
- vm->pgd = paddr;
- vm->pgd_created = true;
+ vm->mmu.pgd = gpa;
+ vm->mmu.pgd_created = true;
}
/*
@@ -34,9 +34,9 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm)
* a page table (ri == 4). Returns a suitable region/segment table entry
* which points to the freshly allocated pages.
*/
-static uint64_t virt_alloc_region(struct kvm_vm *vm, int ri)
+static u64 virt_alloc_region(struct kvm_vm *vm, int ri)
{
- uint64_t taddr;
+ u64 taddr;
taddr = vm_phy_pages_alloc(vm, ri < 4 ? PAGES_PER_REGION : 1,
KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0);
@@ -47,30 +47,28 @@ static uint64_t virt_alloc_region(struct kvm_vm *vm, int ri)
| ((ri < 4 ? (PAGES_PER_REGION - 1) : 0) & REGION_ENTRY_LENGTH);
}
-void virt_arch_pg_map(struct kvm_vm *vm, uint64_t gva, uint64_t gpa)
+void virt_arch_pg_map(struct kvm_vm *vm, gva_t gva, gpa_t gpa)
{
int ri, idx;
- uint64_t *entry;
+ u64 *entry;
TEST_ASSERT((gva % vm->page_size) == 0,
- "Virtual address not on page boundary,\n"
- " vaddr: 0x%lx vm->page_size: 0x%x",
- gva, vm->page_size);
- TEST_ASSERT(sparsebit_is_set(vm->vpages_valid,
- (gva >> vm->page_shift)),
- "Invalid virtual address, vaddr: 0x%lx",
- gva);
+ "Virtual address not on page boundary,\n"
+ " gva: 0x%lx vm->page_size: 0x%x",
+ gva, vm->page_size);
+ TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, (gva >> vm->page_shift)),
+ "Invalid virtual address, gva: 0x%lx", gva);
TEST_ASSERT((gpa % vm->page_size) == 0,
"Physical address not on page boundary,\n"
- " paddr: 0x%lx vm->page_size: 0x%x",
+ " gpa: 0x%lx vm->page_size: 0x%x",
gva, vm->page_size);
TEST_ASSERT((gpa >> vm->page_shift) <= vm->max_gfn,
"Physical address beyond beyond maximum supported,\n"
- " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
+ " gpa: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
gva, vm->max_gfn, vm->page_size);
/* Walk through region and segment tables */
- entry = addr_gpa2hva(vm, vm->pgd);
+ entry = addr_gpa2hva(vm, vm->mmu.pgd);
for (ri = 1; ri <= 4; ri++) {
idx = (gva >> (64 - 11 * ri)) & 0x7ffu;
if (entry[idx] & REGION_ENTRY_INVALID)
@@ -86,15 +84,15 @@ void virt_arch_pg_map(struct kvm_vm *vm, uint64_t gva, uint64_t gpa)
entry[idx] = gpa;
}
-vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
+gpa_t addr_arch_gva2gpa(struct kvm_vm *vm, gva_t gva)
{
int ri, idx;
- uint64_t *entry;
+ u64 *entry;
TEST_ASSERT(vm->page_size == PAGE_SIZE, "Unsupported page size: 0x%x",
vm->page_size);
- entry = addr_gpa2hva(vm, vm->pgd);
+ entry = addr_gpa2hva(vm, vm->mmu.pgd);
for (ri = 1; ri <= 4; ri++) {
idx = (gva >> (64 - 11 * ri)) & 0x7ffu;
TEST_ASSERT(!(entry[idx] & REGION_ENTRY_INVALID),
@@ -111,10 +109,10 @@ vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
return (entry[idx] & ~0xffful) + (gva & 0xffful);
}
-static void virt_dump_ptes(FILE *stream, struct kvm_vm *vm, uint8_t indent,
- uint64_t ptea_start)
+static void virt_dump_ptes(FILE *stream, struct kvm_vm *vm, u8 indent,
+ u64 ptea_start)
{
- uint64_t *pte, ptea;
+ u64 *pte, ptea;
for (ptea = ptea_start; ptea < ptea_start + 0x100 * 8; ptea += 8) {
pte = addr_gpa2hva(vm, ptea);
@@ -125,10 +123,10 @@ static void virt_dump_ptes(FILE *stream, struct kvm_vm *vm, uint8_t indent,
}
}
-static void virt_dump_region(FILE *stream, struct kvm_vm *vm, uint8_t indent,
- uint64_t reg_tab_addr)
+static void virt_dump_region(FILE *stream, struct kvm_vm *vm, u8 indent,
+ u64 reg_tab_addr)
{
- uint64_t addr, *entry;
+ u64 addr, *entry;
for (addr = reg_tab_addr; addr < reg_tab_addr + 0x400 * 8; addr += 8) {
entry = addr_gpa2hva(vm, addr);
@@ -147,12 +145,12 @@ static void virt_dump_region(FILE *stream, struct kvm_vm *vm, uint8_t indent,
}
}
-void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
+void virt_arch_dump(FILE *stream, struct kvm_vm *vm, u8 indent)
{
- if (!vm->pgd_created)
+ if (!vm->mmu.pgd_created)
return;
- virt_dump_region(stream, vm, indent, vm->pgd);
+ virt_dump_region(stream, vm, indent, vm->mmu.pgd);
}
void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code)
@@ -160,10 +158,10 @@ void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code)
vcpu->run->psw_addr = (uintptr_t)guest_code;
}
-struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
+struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, u32 vcpu_id)
{
size_t stack_size = DEFAULT_STACK_PGS * getpagesize();
- uint64_t stack_vaddr;
+ u64 stack_gva;
struct kvm_regs regs;
struct kvm_sregs sregs;
struct kvm_vcpu *vcpu;
@@ -171,20 +169,19 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
TEST_ASSERT(vm->page_size == PAGE_SIZE, "Unsupported page size: 0x%x",
vm->page_size);
- stack_vaddr = __vm_vaddr_alloc(vm, stack_size,
- DEFAULT_GUEST_STACK_VADDR_MIN,
- MEM_REGION_DATA);
+ stack_gva = __vm_alloc(vm, stack_size, DEFAULT_GUEST_STACK_VADDR_MIN,
+ MEM_REGION_DATA);
vcpu = __vm_vcpu_add(vm, vcpu_id);
/* Setup guest registers */
vcpu_regs_get(vcpu, &regs);
- regs.gprs[15] = stack_vaddr + (DEFAULT_STACK_PGS * getpagesize()) - 160;
+ regs.gprs[15] = stack_gva + (DEFAULT_STACK_PGS * getpagesize()) - 160;
vcpu_regs_set(vcpu, &regs);
vcpu_sregs_get(vcpu, &sregs);
sregs.crs[0] |= 0x00040000; /* Enable floating point regs */
- sregs.crs[1] = vm->pgd | 0xf; /* Primary region table */
+ sregs.crs[1] = vm->mmu.pgd | 0xf; /* Primary region table */
vcpu_sregs_set(vcpu, &sregs);
vcpu->run->psw_mask = 0x0400000180000000ULL; /* DAT enabled + 64 bit mode */
@@ -206,13 +203,13 @@ void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...)
vcpu_regs_get(vcpu, &regs);
for (i = 0; i < num; i++)
- regs.gprs[i + 2] = va_arg(ap, uint64_t);
+ regs.gprs[i + 2] = va_arg(ap, u64);
vcpu_regs_set(vcpu, &regs);
va_end(ap);
}
-void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent)
+void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, u8 indent)
{
fprintf(stream, "%*spstate: psw: 0x%.16llx:0x%.16llx\n",
indent, "", vcpu->run->psw_mask, vcpu->run->psw_addr);
@@ -221,3 +218,8 @@ void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent)
void assert_on_unhandled_exception(struct kvm_vcpu *vcpu)
{
}
+
+bool kvm_arch_has_default_irqchip(void)
+{
+ return true;
+}
diff --git a/tools/testing/selftests/kvm/lib/sparsebit.c b/tools/testing/selftests/kvm/lib/sparsebit.c
index cfed9d26cc71..4d845000de15 100644
--- a/tools/testing/selftests/kvm/lib/sparsebit.c
+++ b/tools/testing/selftests/kvm/lib/sparsebit.c
@@ -76,11 +76,11 @@
* the use of a binary-search tree, where each node contains at least
* the following members:
*
- * typedef uint64_t sparsebit_idx_t;
- * typedef uint64_t sparsebit_num_t;
+ * typedef u64 sparsebit_idx_t;
+ * typedef u64 sparsebit_num_t;
*
* sparsebit_idx_t idx;
- * uint32_t mask;
+ * u32 mask;
* sparsebit_num_t num_after;
*
* The idx member contains the bit index of the first bit described by this
@@ -116,7 +116,7 @@
*
* + A node with all mask bits set only occurs when the last bit
* described by the previous node is not equal to this nodes
- * starting index - 1. All such occurences of this condition are
+ * starting index - 1. All such occurrences of this condition are
* avoided by moving the setting of the nodes mask bits into
* the previous nodes num_after setting.
*
@@ -162,7 +162,7 @@
#define DUMP_LINE_MAX 100 /* Does not include indent amount */
-typedef uint32_t mask_t;
+typedef u32 mask_t;
#define MASK_BITS (sizeof(mask_t) * CHAR_BIT)
struct node {
@@ -592,7 +592,7 @@ static struct node *node_split(struct sparsebit *s, sparsebit_idx_t idx)
*
* + A node with all mask bits set only occurs when the last bit
* described by the previous node is not equal to this nodes
- * starting index - 1. All such occurences of this condition are
+ * starting index - 1. All such occurrences of this condition are
* avoided by moving the setting of the nodes mask bits into
* the previous nodes num_after setting.
*/
@@ -2056,9 +2056,9 @@ unsigned char get8(void)
return ch;
}
-uint64_t get64(void)
+u64 get64(void)
{
- uint64_t x;
+ u64 x;
x = get8();
x = (x << 8) | get8();
@@ -2074,9 +2074,9 @@ int main(void)
{
s = sparsebit_alloc();
for (;;) {
- uint8_t op = get8() & 0xf;
- uint64_t first = get64();
- uint64_t last = get64();
+ u8 op = get8() & 0xf;
+ u64 first = get64();
+ u64 last = get64();
operate(op, first, last);
}
diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c
index 8ed0b74ae837..bab1bd2b775b 100644
--- a/tools/testing/selftests/kvm/lib/test_util.c
+++ b/tools/testing/selftests/kvm/lib/test_util.c
@@ -18,20 +18,27 @@
#include "test_util.h"
+sigjmp_buf expect_sigbus_jmpbuf;
+
+void __attribute__((used)) expect_sigbus_handler(int signum)
+{
+ siglongjmp(expect_sigbus_jmpbuf, 1);
+}
+
/*
* Random number generator that is usable from guest code. This is the
* Park-Miller LCG using standard constants.
*/
-struct guest_random_state new_guest_random_state(uint32_t seed)
+struct guest_random_state new_guest_random_state(u32 seed)
{
struct guest_random_state s = {.seed = seed};
return s;
}
-uint32_t guest_random_u32(struct guest_random_state *state)
+u32 guest_random_u32(struct guest_random_state *state)
{
- state->seed = (uint64_t)state->seed * 48271 % ((uint32_t)(1 << 31) - 1);
+ state->seed = (u64)state->seed * 48271 % ((u32)(1 << 31) - 1);
return state->seed;
}
@@ -76,12 +83,12 @@ size_t parse_size(const char *size)
return base << shift;
}
-int64_t timespec_to_ns(struct timespec ts)
+s64 timespec_to_ns(struct timespec ts)
{
- return (int64_t)ts.tv_nsec + 1000000000LL * (int64_t)ts.tv_sec;
+ return (s64)ts.tv_nsec + 1000000000LL * (s64)ts.tv_sec;
}
-struct timespec timespec_add_ns(struct timespec ts, int64_t ns)
+struct timespec timespec_add_ns(struct timespec ts, s64 ns)
{
struct timespec res;
@@ -94,15 +101,15 @@ struct timespec timespec_add_ns(struct timespec ts, int64_t ns)
struct timespec timespec_add(struct timespec ts1, struct timespec ts2)
{
- int64_t ns1 = timespec_to_ns(ts1);
- int64_t ns2 = timespec_to_ns(ts2);
+ s64 ns1 = timespec_to_ns(ts1);
+ s64 ns2 = timespec_to_ns(ts2);
return timespec_add_ns((struct timespec){0}, ns1 + ns2);
}
struct timespec timespec_sub(struct timespec ts1, struct timespec ts2)
{
- int64_t ns1 = timespec_to_ns(ts1);
- int64_t ns2 = timespec_to_ns(ts2);
+ s64 ns1 = timespec_to_ns(ts1);
+ s64 ns2 = timespec_to_ns(ts2);
return timespec_add_ns((struct timespec){0}, ns1 - ns2);
}
@@ -116,7 +123,7 @@ struct timespec timespec_elapsed(struct timespec start)
struct timespec timespec_div(struct timespec ts, int divisor)
{
- int64_t ns = timespec_to_ns(ts) / divisor;
+ s64 ns = timespec_to_ns(ts) / divisor;
return timespec_add_ns((struct timespec){0}, ns);
}
@@ -132,37 +139,57 @@ void print_skip(const char *fmt, ...)
puts(", skipping test");
}
-bool thp_configured(void)
+static bool test_sysfs_path(const char *path)
{
- int ret;
struct stat statbuf;
+ int ret;
- ret = stat("/sys/kernel/mm/transparent_hugepage", &statbuf);
+ ret = stat(path, &statbuf);
TEST_ASSERT(ret == 0 || (ret == -1 && errno == ENOENT),
- "Error in stating /sys/kernel/mm/transparent_hugepage");
+ "Error in stat()ing '%s'", path);
return ret == 0;
}
-size_t get_trans_hugepagesz(void)
+bool thp_configured(void)
+{
+ return test_sysfs_path("/sys/kernel/mm/transparent_hugepage");
+}
+
+static size_t get_sysfs_val(const char *path)
{
size_t size;
FILE *f;
int ret;
- TEST_ASSERT(thp_configured(), "THP is not configured in host kernel");
-
- f = fopen("/sys/kernel/mm/transparent_hugepage/hpage_pmd_size", "r");
- TEST_ASSERT(f != NULL, "Error in opening transparent_hugepage/hpage_pmd_size");
+ f = fopen(path, "r");
+ TEST_ASSERT(f, "Error opening '%s'", path);
ret = fscanf(f, "%ld", &size);
+ TEST_ASSERT(ret > 0, "Error reading '%s'", path);
+
+ /* Re-scan the input stream to verify the entire file was read. */
ret = fscanf(f, "%ld", &size);
- TEST_ASSERT(ret < 1, "Error reading transparent_hugepage/hpage_pmd_size");
- fclose(f);
+ TEST_ASSERT(ret < 1, "Error reading '%s'", path);
+ fclose(f);
return size;
}
+size_t get_trans_hugepagesz(void)
+{
+ TEST_ASSERT(thp_configured(), "THP is not configured in host kernel");
+
+ return get_sysfs_val("/sys/kernel/mm/transparent_hugepage/hpage_pmd_size");
+}
+
+bool is_numa_balancing_enabled(void)
+{
+ if (!test_sysfs_path("/proc/sys/kernel/numa_balancing"))
+ return false;
+ return get_sysfs_val("/proc/sys/kernel/numa_balancing") == 1;
+}
+
size_t get_def_hugetlb_pagesz(void)
{
char buf[64];
@@ -198,7 +225,7 @@ size_t get_def_hugetlb_pagesz(void)
#define ANON_FLAGS (MAP_PRIVATE | MAP_ANONYMOUS)
#define ANON_HUGE_FLAGS (ANON_FLAGS | MAP_HUGETLB)
-const struct vm_mem_backing_src_alias *vm_mem_backing_src_alias(uint32_t i)
+const struct vm_mem_backing_src_alias *vm_mem_backing_src_alias(u32 i)
{
static const struct vm_mem_backing_src_alias aliases[] = {
[VM_MEM_SRC_ANONYMOUS] = {
@@ -290,9 +317,9 @@ const struct vm_mem_backing_src_alias *vm_mem_backing_src_alias(uint32_t i)
#define MAP_HUGE_PAGE_SIZE(x) (1ULL << ((x >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK))
-size_t get_backing_src_pagesz(uint32_t i)
+size_t get_backing_src_pagesz(u32 i)
{
- uint32_t flag = vm_mem_backing_src_alias(i)->flag;
+ u32 flag = vm_mem_backing_src_alias(i)->flag;
switch (i) {
case VM_MEM_SRC_ANONYMOUS:
@@ -308,7 +335,7 @@ size_t get_backing_src_pagesz(uint32_t i)
}
}
-bool is_backing_src_hugetlb(uint32_t i)
+bool is_backing_src_hugetlb(u32 i)
{
return !!(vm_mem_backing_src_alias(i)->flag & MAP_HUGETLB);
}
diff --git a/tools/testing/selftests/kvm/lib/ucall_common.c b/tools/testing/selftests/kvm/lib/ucall_common.c
index 42151e571953..029ce21f9f2f 100644
--- a/tools/testing/selftests/kvm/lib/ucall_common.c
+++ b/tools/testing/selftests/kvm/lib/ucall_common.c
@@ -14,7 +14,7 @@ struct ucall_header {
struct ucall ucalls[KVM_MAX_VCPUS];
};
-int ucall_nr_pages_required(uint64_t page_size)
+int ucall_nr_pages_required(u64 page_size)
{
return align_up(sizeof(struct ucall_header), page_size) / page_size;
}
@@ -25,16 +25,16 @@ int ucall_nr_pages_required(uint64_t page_size)
*/
static struct ucall_header *ucall_pool;
-void ucall_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
+void ucall_init(struct kvm_vm *vm, gpa_t mmio_gpa)
{
struct ucall_header *hdr;
struct ucall *uc;
- vm_vaddr_t vaddr;
+ gva_t gva;
int i;
- vaddr = vm_vaddr_alloc_shared(vm, sizeof(*hdr), KVM_UTIL_MIN_VADDR,
- MEM_REGION_DATA);
- hdr = (struct ucall_header *)addr_gva2hva(vm, vaddr);
+ gva = vm_alloc_shared(vm, sizeof(*hdr), KVM_UTIL_MIN_VADDR,
+ MEM_REGION_DATA);
+ hdr = (struct ucall_header *)addr_gva2hva(vm, gva);
memset(hdr, 0, sizeof(*hdr));
for (i = 0; i < KVM_MAX_VCPUS; ++i) {
@@ -42,7 +42,7 @@ void ucall_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
uc->hva = uc;
}
- write_guest_global(vm, ucall_pool, (struct ucall_header *)vaddr);
+ write_guest_global(vm, ucall_pool, (struct ucall_header *)gva);
ucall_arch_init(vm, mmio_gpa);
}
@@ -79,7 +79,7 @@ static void ucall_free(struct ucall *uc)
clear_bit(uc - ucall_pool->ucalls, ucall_pool->in_use);
}
-void ucall_assert(uint64_t cmd, const char *exp, const char *file,
+void ucall_assert(u64 cmd, const char *exp, const char *file,
unsigned int line, const char *fmt, ...)
{
struct ucall *uc;
@@ -88,20 +88,20 @@ void ucall_assert(uint64_t cmd, const char *exp, const char *file,
uc = ucall_alloc();
uc->cmd = cmd;
- WRITE_ONCE(uc->args[GUEST_ERROR_STRING], (uint64_t)(exp));
- WRITE_ONCE(uc->args[GUEST_FILE], (uint64_t)(file));
+ WRITE_ONCE(uc->args[GUEST_ERROR_STRING], (u64)(exp));
+ WRITE_ONCE(uc->args[GUEST_FILE], (u64)(file));
WRITE_ONCE(uc->args[GUEST_LINE], line);
va_start(va, fmt);
guest_vsnprintf(uc->buffer, UCALL_BUFFER_LEN, fmt, va);
va_end(va);
- ucall_arch_do_ucall((vm_vaddr_t)uc->hva);
+ ucall_arch_do_ucall((gva_t)uc->hva);
ucall_free(uc);
}
-void ucall_fmt(uint64_t cmd, const char *fmt, ...)
+void ucall_fmt(u64 cmd, const char *fmt, ...)
{
struct ucall *uc;
va_list va;
@@ -113,12 +113,12 @@ void ucall_fmt(uint64_t cmd, const char *fmt, ...)
guest_vsnprintf(uc->buffer, UCALL_BUFFER_LEN, fmt, va);
va_end(va);
- ucall_arch_do_ucall((vm_vaddr_t)uc->hva);
+ ucall_arch_do_ucall((gva_t)uc->hva);
ucall_free(uc);
}
-void ucall(uint64_t cmd, int nargs, ...)
+void ucall(u64 cmd, int nargs, ...)
{
struct ucall *uc;
va_list va;
@@ -132,15 +132,15 @@ void ucall(uint64_t cmd, int nargs, ...)
va_start(va, nargs);
for (i = 0; i < nargs; ++i)
- WRITE_ONCE(uc->args[i], va_arg(va, uint64_t));
+ WRITE_ONCE(uc->args[i], va_arg(va, u64));
va_end(va);
- ucall_arch_do_ucall((vm_vaddr_t)uc->hva);
+ ucall_arch_do_ucall((gva_t)uc->hva);
ucall_free(uc);
}
-uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc)
+u64 get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc)
{
struct ucall ucall;
void *addr;
diff --git a/tools/testing/selftests/kvm/lib/userfaultfd_util.c b/tools/testing/selftests/kvm/lib/userfaultfd_util.c
index 7c9de8414462..ef8d76f71f83 100644
--- a/tools/testing/selftests/kvm/lib/userfaultfd_util.c
+++ b/tools/testing/selftests/kvm/lib/userfaultfd_util.c
@@ -27,7 +27,7 @@ static void *uffd_handler_thread_fn(void *arg)
{
struct uffd_reader_args *reader_args = (struct uffd_reader_args *)arg;
int uffd = reader_args->uffd;
- int64_t pages = 0;
+ s64 pages = 0;
struct timespec start;
struct timespec ts_diff;
struct epoll_event evt;
@@ -100,8 +100,8 @@ static void *uffd_handler_thread_fn(void *arg)
}
struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay,
- void *hva, uint64_t len,
- uint64_t num_readers,
+ void *hva, u64 len,
+ u64 num_readers,
uffd_handler_t handler)
{
struct uffd_desc *uffd_desc;
@@ -109,12 +109,12 @@ struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay,
int uffd;
struct uffdio_api uffdio_api;
struct uffdio_register uffdio_register;
- uint64_t expected_ioctls = ((uint64_t) 1) << _UFFDIO_COPY;
+ u64 expected_ioctls = ((u64)1) << _UFFDIO_COPY;
int ret, i;
PER_PAGE_DEBUG("Userfaultfd %s mode, faults resolved with %s\n",
is_minor ? "MINOR" : "MISSING",
- is_minor ? "UFFDIO_CONINUE" : "UFFDIO_COPY");
+ is_minor ? "UFFDIO_CONTINUE" : "UFFDIO_COPY");
uffd_desc = malloc(sizeof(struct uffd_desc));
TEST_ASSERT(uffd_desc, "Failed to malloc uffd descriptor");
@@ -132,7 +132,7 @@ struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay,
/* In order to get minor faults, prefault via the alias. */
if (is_minor)
- expected_ioctls = ((uint64_t) 1) << _UFFDIO_CONTINUE;
+ expected_ioctls = ((u64)1) << _UFFDIO_CONTINUE;
uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
TEST_ASSERT(uffd >= 0, "uffd creation failed, errno: %d", errno);
@@ -141,9 +141,9 @@ struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay,
uffdio_api.features = 0;
TEST_ASSERT(ioctl(uffd, UFFDIO_API, &uffdio_api) != -1,
"ioctl UFFDIO_API failed: %" PRIu64,
- (uint64_t)uffdio_api.api);
+ (u64)uffdio_api.api);
- uffdio_register.range.start = (uint64_t)hva;
+ uffdio_register.range.start = (u64)hva;
uffdio_register.range.len = len;
uffdio_register.mode = uffd_mode;
TEST_ASSERT(ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) != -1,
diff --git a/tools/testing/selftests/kvm/lib/x86/apic.c b/tools/testing/selftests/kvm/lib/x86/apic.c
index 89153a333e83..5182fd0d6a76 100644
--- a/tools/testing/selftests/kvm/lib/x86/apic.c
+++ b/tools/testing/selftests/kvm/lib/x86/apic.c
@@ -14,7 +14,7 @@ void apic_disable(void)
void xapic_enable(void)
{
- uint64_t val = rdmsr(MSR_IA32_APICBASE);
+ u64 val = rdmsr(MSR_IA32_APICBASE);
/* Per SDM: to enable xAPIC when in x2APIC must first disable APIC */
if (val & MSR_IA32_APICBASE_EXTD) {
diff --git a/tools/testing/selftests/kvm/lib/x86/hyperv.c b/tools/testing/selftests/kvm/lib/x86/hyperv.c
index 15bc8cd583aa..d200c5c26e2e 100644
--- a/tools/testing/selftests/kvm/lib/x86/hyperv.c
+++ b/tools/testing/selftests/kvm/lib/x86/hyperv.c
@@ -76,23 +76,23 @@ bool kvm_hv_cpu_has(struct kvm_x86_cpu_feature feature)
}
struct hyperv_test_pages *vcpu_alloc_hyperv_test_pages(struct kvm_vm *vm,
- vm_vaddr_t *p_hv_pages_gva)
+ gva_t *p_hv_pages_gva)
{
- vm_vaddr_t hv_pages_gva = vm_vaddr_alloc_page(vm);
+ gva_t hv_pages_gva = vm_alloc_page(vm);
struct hyperv_test_pages *hv = addr_gva2hva(vm, hv_pages_gva);
/* Setup of a region of guest memory for the VP Assist page. */
- hv->vp_assist = (void *)vm_vaddr_alloc_page(vm);
+ hv->vp_assist = (void *)vm_alloc_page(vm);
hv->vp_assist_hva = addr_gva2hva(vm, (uintptr_t)hv->vp_assist);
hv->vp_assist_gpa = addr_gva2gpa(vm, (uintptr_t)hv->vp_assist);
/* Setup of a region of guest memory for the partition assist page. */
- hv->partition_assist = (void *)vm_vaddr_alloc_page(vm);
+ hv->partition_assist = (void *)vm_alloc_page(vm);
hv->partition_assist_hva = addr_gva2hva(vm, (uintptr_t)hv->partition_assist);
hv->partition_assist_gpa = addr_gva2gpa(vm, (uintptr_t)hv->partition_assist);
/* Setup of a region of guest memory for the enlightened VMCS. */
- hv->enlightened_vmcs = (void *)vm_vaddr_alloc_page(vm);
+ hv->enlightened_vmcs = (void *)vm_alloc_page(vm);
hv->enlightened_vmcs_hva = addr_gva2hva(vm, (uintptr_t)hv->enlightened_vmcs);
hv->enlightened_vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)hv->enlightened_vmcs);
@@ -100,9 +100,9 @@ struct hyperv_test_pages *vcpu_alloc_hyperv_test_pages(struct kvm_vm *vm,
return hv;
}
-int enable_vp_assist(uint64_t vp_assist_pa, void *vp_assist)
+int enable_vp_assist(u64 vp_assist_pa, void *vp_assist)
{
- uint64_t val = (vp_assist_pa & HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK) |
+ u64 val = (vp_assist_pa & HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK) |
HV_X64_MSR_VP_ASSIST_PAGE_ENABLE;
wrmsr(HV_X64_MSR_VP_ASSIST_PAGE, val);
diff --git a/tools/testing/selftests/kvm/lib/x86/memstress.c b/tools/testing/selftests/kvm/lib/x86/memstress.c
index 7f5d62a65c68..61cf952cd2dc 100644
--- a/tools/testing/selftests/kvm/lib/x86/memstress.c
+++ b/tools/testing/selftests/kvm/lib/x86/memstress.c
@@ -13,9 +13,10 @@
#include "kvm_util.h"
#include "memstress.h"
#include "processor.h"
+#include "svm_util.h"
#include "vmx.h"
-void memstress_l2_guest_code(uint64_t vcpu_id)
+void memstress_l2_guest_code(u64 vcpu_id)
{
memstress_guest_code(vcpu_id);
vmcall();
@@ -29,9 +30,10 @@ __asm__(
" ud2;"
);
-static void memstress_l1_guest_code(struct vmx_pages *vmx, uint64_t vcpu_id)
-{
#define L2_GUEST_STACK_SIZE 64
+
+static void l1_vmx_code(struct vmx_pages *vmx, u64 vcpu_id)
+{
unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
unsigned long *rsp;
@@ -45,11 +47,35 @@ static void memstress_l1_guest_code(struct vmx_pages *vmx, uint64_t vcpu_id)
prepare_vmcs(vmx, memstress_l2_guest_entry, rsp);
GUEST_ASSERT(!vmlaunch());
- GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+ GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_VMCALL);
GUEST_DONE();
}
-uint64_t memstress_nested_pages(int nr_vcpus)
+static void l1_svm_code(struct svm_test_data *svm, u64 vcpu_id)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ unsigned long *rsp;
+
+
+ rsp = &l2_guest_stack[L2_GUEST_STACK_SIZE - 1];
+ *rsp = vcpu_id;
+ generic_svm_setup(svm, memstress_l2_guest_entry, rsp);
+
+ run_guest(svm->vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT_EQ(svm->vmcb->control.exit_code, SVM_EXIT_VMMCALL);
+ GUEST_DONE();
+}
+
+
+static void memstress_l1_guest_code(void *data, u64 vcpu_id)
+{
+ if (this_cpu_has(X86_FEATURE_VMX))
+ l1_vmx_code(data, vcpu_id);
+ else
+ l1_svm_code(data, vcpu_id);
+}
+
+u64 memstress_nested_pages(int nr_vcpus)
{
/*
* 513 page tables is enough to identity-map 256 TiB of L2 with 1G
@@ -59,46 +85,37 @@ uint64_t memstress_nested_pages(int nr_vcpus)
return 513 + 10 * nr_vcpus;
}
-void memstress_setup_ept(struct vmx_pages *vmx, struct kvm_vm *vm)
+static void memstress_setup_ept_mappings(struct kvm_vm *vm)
{
- uint64_t start, end;
-
- prepare_eptp(vmx, vm, 0);
+ u64 start, end;
/*
* Identity map the first 4G and the test region with 1G pages so that
* KVM can shadow the EPT12 with the maximum huge page size supported
* by the backing source.
*/
- nested_identity_map_1g(vmx, vm, 0, 0x100000000ULL);
+ tdp_identity_map_1g(vm, 0, 0x100000000ULL);
start = align_down(memstress_args.gpa, PG_SIZE_1G);
end = align_up(memstress_args.gpa + memstress_args.size, PG_SIZE_1G);
- nested_identity_map_1g(vmx, vm, start, end - start);
+ tdp_identity_map_1g(vm, start, end - start);
}
void memstress_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vcpus[])
{
- struct vmx_pages *vmx, *vmx0 = NULL;
struct kvm_regs regs;
- vm_vaddr_t vmx_gva;
+ gva_t nested_gva;
int vcpu_id;
- TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
- TEST_REQUIRE(kvm_cpu_has_ept());
+ TEST_REQUIRE(kvm_cpu_has_tdp());
+ vm_enable_tdp(vm);
+ memstress_setup_ept_mappings(vm);
for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
- vmx = vcpu_alloc_vmx(vm, &vmx_gva);
-
- if (vcpu_id == 0) {
- memstress_setup_ept(vmx, vm);
- vmx0 = vmx;
- } else {
- /* Share the same EPT table across all vCPUs. */
- vmx->eptp = vmx0->eptp;
- vmx->eptp_hva = vmx0->eptp_hva;
- vmx->eptp_gpa = vmx0->eptp_gpa;
- }
+ if (kvm_cpu_has(X86_FEATURE_VMX))
+ vcpu_alloc_vmx(vm, &nested_gva);
+ else
+ vcpu_alloc_svm(vm, &nested_gva);
/*
* Override the vCPU to run memstress_l1_guest_code() which will
@@ -107,6 +124,6 @@ void memstress_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vc
vcpu_regs_get(vcpus[vcpu_id], &regs);
regs.rip = (unsigned long) memstress_l1_guest_code;
vcpu_regs_set(vcpus[vcpu_id], &regs);
- vcpu_args_set(vcpus[vcpu_id], 2, vmx_gva, vcpu_id);
+ vcpu_args_set(vcpus[vcpu_id], 2, nested_gva, vcpu_id);
}
}
diff --git a/tools/testing/selftests/kvm/lib/x86/pmu.c b/tools/testing/selftests/kvm/lib/x86/pmu.c
index f31f0427c17c..0851b74b4e46 100644
--- a/tools/testing/selftests/kvm/lib/x86/pmu.c
+++ b/tools/testing/selftests/kvm/lib/x86/pmu.c
@@ -8,9 +8,10 @@
#include <linux/kernel.h>
#include "kvm_util.h"
+#include "processor.h"
#include "pmu.h"
-const uint64_t intel_pmu_arch_events[] = {
+const u64 intel_pmu_arch_events[] = {
INTEL_ARCH_CPU_CYCLES,
INTEL_ARCH_INSTRUCTIONS_RETIRED,
INTEL_ARCH_REFERENCE_CYCLES,
@@ -19,13 +20,61 @@ const uint64_t intel_pmu_arch_events[] = {
INTEL_ARCH_BRANCHES_RETIRED,
INTEL_ARCH_BRANCHES_MISPREDICTED,
INTEL_ARCH_TOPDOWN_SLOTS,
+ INTEL_ARCH_TOPDOWN_BE_BOUND,
+ INTEL_ARCH_TOPDOWN_BAD_SPEC,
+ INTEL_ARCH_TOPDOWN_FE_BOUND,
+ INTEL_ARCH_TOPDOWN_RETIRING,
+ INTEL_ARCH_LBR_INSERTS,
};
kvm_static_assert(ARRAY_SIZE(intel_pmu_arch_events) == NR_INTEL_ARCH_EVENTS);
-const uint64_t amd_pmu_zen_events[] = {
+const u64 amd_pmu_zen_events[] = {
AMD_ZEN_CORE_CYCLES,
AMD_ZEN_INSTRUCTIONS_RETIRED,
AMD_ZEN_BRANCHES_RETIRED,
AMD_ZEN_BRANCHES_MISPREDICTED,
};
kvm_static_assert(ARRAY_SIZE(amd_pmu_zen_events) == NR_AMD_ZEN_EVENTS);
+
+/*
+ * For Intel Atom CPUs, the PMU events "Instruction Retired" or
+ * "Branch Instruction Retired" may be overcounted for some certain
+ * instructions, like FAR CALL/JMP, RETF, IRET, VMENTRY/VMEXIT/VMPTRLD
+ * and complex SGX/SMX/CSTATE instructions/flows.
+ *
+ * The detailed information can be found in the errata (section SRF7):
+ * https://edc.intel.com/content/www/us/en/design/products-and-solutions/processors-and-chipsets/sierra-forest/xeon-6700-series-processor-with-e-cores-specification-update/errata-details/
+ *
+ * For the Atom platforms before Sierra Forest (including Sierra Forest),
+ * Both 2 events "Instruction Retired" and "Branch Instruction Retired" would
+ * be overcounted on these certain instructions, but for Clearwater Forest
+ * only "Instruction Retired" event is overcounted on these instructions.
+ */
+static u64 get_pmu_errata(void)
+{
+ if (!this_cpu_is_intel())
+ return 0;
+
+ if (this_cpu_family() != 0x6)
+ return 0;
+
+ switch (this_cpu_model()) {
+ case 0xDD: /* Clearwater Forest */
+ return BIT_ULL(INSTRUCTIONS_RETIRED_OVERCOUNT);
+ case 0xAF: /* Sierra Forest */
+ case 0x4D: /* Avaton, Rangely */
+ case 0x5F: /* Denverton */
+ case 0x86: /* Jacobsville */
+ return BIT_ULL(INSTRUCTIONS_RETIRED_OVERCOUNT) |
+ BIT_ULL(BRANCHES_RETIRED_OVERCOUNT);
+ default:
+ return 0;
+ }
+}
+
+u64 pmu_errata_mask;
+
+void kvm_init_pmu_errata(void)
+{
+ pmu_errata_mask = get_pmu_errata();
+}
diff --git a/tools/testing/selftests/kvm/lib/x86/processor.c b/tools/testing/selftests/kvm/lib/x86/processor.c
index bd5a802fa7a5..b51467d70f6e 100644
--- a/tools/testing/selftests/kvm/lib/x86/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86/processor.c
@@ -6,8 +6,12 @@
#include "linux/bitmap.h"
#include "test_util.h"
#include "kvm_util.h"
+#include "pmu.h"
#include "processor.h"
+#include "smm.h"
+#include "svm_util.h"
#include "sev.h"
+#include "vmx.h"
#ifndef NUM_INTERRUPTS
#define NUM_INTERRUPTS 256
@@ -17,13 +21,48 @@
#define KERNEL_DS 0x10
#define KERNEL_TSS 0x18
-vm_vaddr_t exception_handlers;
+gva_t exception_handlers;
bool host_cpu_is_amd;
bool host_cpu_is_intel;
+bool host_cpu_is_hygon;
+bool host_cpu_is_amd_compatible;
bool is_forced_emulation_enabled;
-uint64_t guest_tsc_khz;
+u64 guest_tsc_khz;
+
+const char *ex_str(int vector)
+{
+ switch (vector) {
+#define VEC_STR(v) case v##_VECTOR: return "#" #v
+ case DE_VECTOR: return "no exception";
+ case KVM_MAGIC_DE_VECTOR: return "#DE";
+ VEC_STR(DB);
+ VEC_STR(NMI);
+ VEC_STR(BP);
+ VEC_STR(OF);
+ VEC_STR(BR);
+ VEC_STR(UD);
+ VEC_STR(NM);
+ VEC_STR(DF);
+ VEC_STR(TS);
+ VEC_STR(NP);
+ VEC_STR(SS);
+ VEC_STR(GP);
+ VEC_STR(PF);
+ VEC_STR(MF);
+ VEC_STR(AC);
+ VEC_STR(MC);
+ VEC_STR(XM);
+ VEC_STR(VE);
+ VEC_STR(CP);
+ VEC_STR(HV);
+ VEC_STR(VC);
+ VEC_STR(SX);
+ default: return "#??";
+#undef VEC_STR
+ }
+}
-static void regs_dump(FILE *stream, struct kvm_regs *regs, uint8_t indent)
+static void regs_dump(FILE *stream, struct kvm_regs *regs, u8 indent)
{
fprintf(stream, "%*srax: 0x%.16llx rbx: 0x%.16llx "
"rcx: 0x%.16llx rdx: 0x%.16llx\n",
@@ -47,7 +86,7 @@ static void regs_dump(FILE *stream, struct kvm_regs *regs, uint8_t indent)
}
static void segment_dump(FILE *stream, struct kvm_segment *segment,
- uint8_t indent)
+ u8 indent)
{
fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.8x "
"selector: 0x%.4x type: 0x%.2x\n",
@@ -64,7 +103,7 @@ static void segment_dump(FILE *stream, struct kvm_segment *segment,
}
static void dtable_dump(FILE *stream, struct kvm_dtable *dtable,
- uint8_t indent)
+ u8 indent)
{
fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.4x "
"padding: 0x%.4x 0x%.4x 0x%.4x\n",
@@ -72,7 +111,7 @@ static void dtable_dump(FILE *stream, struct kvm_dtable *dtable,
dtable->padding[0], dtable->padding[1], dtable->padding[2]);
}
-static void sregs_dump(FILE *stream, struct kvm_sregs *sregs, uint8_t indent)
+static void sregs_dump(FILE *stream, struct kvm_sregs *sregs, u8 indent)
{
unsigned int i;
@@ -122,47 +161,83 @@ bool kvm_is_tdp_enabled(void)
return get_kvm_amd_param_bool("npt");
}
+static void virt_mmu_init(struct kvm_vm *vm, struct kvm_mmu *mmu,
+ struct pte_masks *pte_masks)
+{
+ /* If needed, create the top-level page table. */
+ if (!mmu->pgd_created) {
+ mmu->pgd = vm_alloc_page_table(vm);
+ mmu->pgd_created = true;
+ mmu->arch.pte_masks = *pte_masks;
+ }
+
+ TEST_ASSERT(mmu->pgtable_levels == 4 || mmu->pgtable_levels == 5,
+ "Selftests MMU only supports 4-level and 5-level paging, not %u-level paging",
+ mmu->pgtable_levels);
+}
+
void virt_arch_pgd_alloc(struct kvm_vm *vm)
{
- TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
- "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
+ TEST_ASSERT(vm->mode == VM_MODE_PXXVYY_4K,
+ "Unknown or unsupported guest mode: 0x%x", vm->mode);
+
+ struct pte_masks pte_masks = (struct pte_masks){
+ .present = BIT_ULL(0),
+ .writable = BIT_ULL(1),
+ .user = BIT_ULL(2),
+ .accessed = BIT_ULL(5),
+ .dirty = BIT_ULL(6),
+ .huge = BIT_ULL(7),
+ .nx = BIT_ULL(63),
+ .executable = 0,
+ .c = vm->arch.c_bit,
+ .s = vm->arch.s_bit,
+ };
- /* If needed, create page map l4 table. */
- if (!vm->pgd_created) {
- vm->pgd = vm_alloc_page_table(vm);
- vm->pgd_created = true;
- }
+ virt_mmu_init(vm, &vm->mmu, &pte_masks);
}
-static void *virt_get_pte(struct kvm_vm *vm, uint64_t *parent_pte,
- uint64_t vaddr, int level)
+void tdp_mmu_init(struct kvm_vm *vm, int pgtable_levels,
+ struct pte_masks *pte_masks)
{
- uint64_t pt_gpa = PTE_GET_PA(*parent_pte);
- uint64_t *page_table = addr_gpa2hva(vm, pt_gpa);
- int index = (vaddr >> PG_LEVEL_SHIFT(level)) & 0x1ffu;
+ TEST_ASSERT(!vm->stage2_mmu.pgtable_levels, "TDP MMU already initialized");
- TEST_ASSERT((*parent_pte & PTE_PRESENT_MASK) || parent_pte == &vm->pgd,
+ vm->stage2_mmu.pgtable_levels = pgtable_levels;
+ virt_mmu_init(vm, &vm->stage2_mmu, pte_masks);
+}
+
+static void *virt_get_pte(struct kvm_vm *vm, struct kvm_mmu *mmu,
+ u64 *parent_pte, gva_t gva, int level)
+{
+ u64 pt_gpa = PTE_GET_PA(*parent_pte);
+ u64 *page_table = addr_gpa2hva(vm, pt_gpa);
+ int index = (gva >> PG_LEVEL_SHIFT(level)) & 0x1ffu;
+
+ TEST_ASSERT((*parent_pte == mmu->pgd) || is_present_pte(mmu, parent_pte),
"Parent PTE (level %d) not PRESENT for gva: 0x%08lx",
- level + 1, vaddr);
+ level + 1, gva);
return &page_table[index];
}
-static uint64_t *virt_create_upper_pte(struct kvm_vm *vm,
- uint64_t *parent_pte,
- uint64_t vaddr,
- uint64_t paddr,
- int current_level,
- int target_level)
+static u64 *virt_create_upper_pte(struct kvm_vm *vm,
+ struct kvm_mmu *mmu,
+ u64 *parent_pte,
+ gva_t gva,
+ gpa_t gpa,
+ int current_level,
+ int target_level)
{
- uint64_t *pte = virt_get_pte(vm, parent_pte, vaddr, current_level);
+ u64 *pte = virt_get_pte(vm, mmu, parent_pte, gva, current_level);
- paddr = vm_untag_gpa(vm, paddr);
+ gpa = vm_untag_gpa(vm, gpa);
- if (!(*pte & PTE_PRESENT_MASK)) {
- *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK;
+ if (!is_present_pte(mmu, pte)) {
+ *pte = PTE_PRESENT_MASK(mmu) | PTE_READABLE_MASK(mmu) |
+ PTE_WRITABLE_MASK(mmu) | PTE_EXECUTABLE_MASK(mmu) |
+ PTE_ALWAYS_SET_MASK(mmu);
if (current_level == target_level)
- *pte |= PTE_LARGE_MASK | (paddr & PHYSICAL_PAGE_MASK);
+ *pte |= PTE_HUGE_MASK(mmu) | (gpa & PHYSICAL_PAGE_MASK);
else
*pte |= vm_alloc_page_table(vm) & PHYSICAL_PAGE_MASK;
} else {
@@ -172,81 +247,84 @@ static uint64_t *virt_create_upper_pte(struct kvm_vm *vm,
* this level.
*/
TEST_ASSERT(current_level != target_level,
- "Cannot create hugepage at level: %u, vaddr: 0x%lx",
- current_level, vaddr);
- TEST_ASSERT(!(*pte & PTE_LARGE_MASK),
- "Cannot create page table at level: %u, vaddr: 0x%lx",
- current_level, vaddr);
+ "Cannot create hugepage at level: %u, gva: 0x%lx",
+ current_level, gva);
+ TEST_ASSERT(!is_huge_pte(mmu, pte),
+ "Cannot create page table at level: %u, gva: 0x%lx",
+ current_level, gva);
}
return pte;
}
-void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level)
+void __virt_pg_map(struct kvm_vm *vm, struct kvm_mmu *mmu, gva_t gva,
+ gpa_t gpa, int level)
{
- const uint64_t pg_size = PG_LEVEL_SIZE(level);
- uint64_t *pml4e, *pdpe, *pde;
- uint64_t *pte;
+ const u64 pg_size = PG_LEVEL_SIZE(level);
+ u64 *pte = &mmu->pgd;
+ int current_level;
- TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K,
- "Unknown or unsupported guest mode, mode: 0x%x", vm->mode);
+ TEST_ASSERT(vm->mode == VM_MODE_PXXVYY_4K,
+ "Unknown or unsupported guest mode: 0x%x", vm->mode);
- TEST_ASSERT((vaddr % pg_size) == 0,
+ TEST_ASSERT((gva % pg_size) == 0,
"Virtual address not aligned,\n"
- "vaddr: 0x%lx page size: 0x%lx", vaddr, pg_size);
- TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, (vaddr >> vm->page_shift)),
- "Invalid virtual address, vaddr: 0x%lx", vaddr);
- TEST_ASSERT((paddr % pg_size) == 0,
+ "gva: 0x%lx page size: 0x%lx", gva, pg_size);
+ TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, (gva >> vm->page_shift)),
+ "Invalid virtual address, gva: 0x%lx", gva);
+ TEST_ASSERT((gpa % pg_size) == 0,
"Physical address not aligned,\n"
- " paddr: 0x%lx page size: 0x%lx", paddr, pg_size);
- TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
+ " gpa: 0x%lx page size: 0x%lx", gpa, pg_size);
+ TEST_ASSERT((gpa >> vm->page_shift) <= vm->max_gfn,
"Physical address beyond maximum supported,\n"
- " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
- paddr, vm->max_gfn, vm->page_size);
- TEST_ASSERT(vm_untag_gpa(vm, paddr) == paddr,
- "Unexpected bits in paddr: %lx", paddr);
+ " gpa: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
+ gpa, vm->max_gfn, vm->page_size);
+ TEST_ASSERT(vm_untag_gpa(vm, gpa) == gpa,
+ "Unexpected bits in gpa: %lx", gpa);
+
+ TEST_ASSERT(!PTE_EXECUTABLE_MASK(mmu) || !PTE_NX_MASK(mmu),
+ "X and NX bit masks cannot be used simultaneously");
/*
* Allocate upper level page tables, if not already present. Return
* early if a hugepage was created.
*/
- pml4e = virt_create_upper_pte(vm, &vm->pgd, vaddr, paddr, PG_LEVEL_512G, level);
- if (*pml4e & PTE_LARGE_MASK)
- return;
-
- pdpe = virt_create_upper_pte(vm, pml4e, vaddr, paddr, PG_LEVEL_1G, level);
- if (*pdpe & PTE_LARGE_MASK)
- return;
-
- pde = virt_create_upper_pte(vm, pdpe, vaddr, paddr, PG_LEVEL_2M, level);
- if (*pde & PTE_LARGE_MASK)
- return;
+ for (current_level = mmu->pgtable_levels;
+ current_level > PG_LEVEL_4K;
+ current_level--) {
+ pte = virt_create_upper_pte(vm, mmu, pte, gva, gpa,
+ current_level, level);
+ if (is_huge_pte(mmu, pte))
+ return;
+ }
/* Fill in page table entry. */
- pte = virt_get_pte(vm, pde, vaddr, PG_LEVEL_4K);
- TEST_ASSERT(!(*pte & PTE_PRESENT_MASK),
- "PTE already present for 4k page at vaddr: 0x%lx", vaddr);
- *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK | (paddr & PHYSICAL_PAGE_MASK);
+ pte = virt_get_pte(vm, mmu, pte, gva, PG_LEVEL_4K);
+ TEST_ASSERT(!is_present_pte(mmu, pte),
+ "PTE already present for 4k page at gva: 0x%lx", gva);
+ *pte = PTE_PRESENT_MASK(mmu) | PTE_READABLE_MASK(mmu) |
+ PTE_WRITABLE_MASK(mmu) | PTE_EXECUTABLE_MASK(mmu) |
+ PTE_ALWAYS_SET_MASK(mmu) | (gpa & PHYSICAL_PAGE_MASK);
/*
* Neither SEV nor TDX supports shared page tables, so only the final
* leaf PTE needs manually set the C/S-bit.
*/
- if (vm_is_gpa_protected(vm, paddr))
- *pte |= vm->arch.c_bit;
+ if (vm_is_gpa_protected(vm, gpa))
+ *pte |= PTE_C_BIT_MASK(mmu);
else
- *pte |= vm->arch.s_bit;
+ *pte |= PTE_S_BIT_MASK(mmu);
}
-void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
+void virt_arch_pg_map(struct kvm_vm *vm, gva_t gva, gpa_t gpa)
{
- __virt_pg_map(vm, vaddr, paddr, PG_LEVEL_4K);
+ __virt_pg_map(vm, &vm->mmu, gva, gpa, PG_LEVEL_4K);
}
-void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
- uint64_t nr_bytes, int level)
+void virt_map_level(struct kvm_vm *vm, gva_t gva, gpa_t gpa,
+ u64 nr_bytes, int level)
{
- uint64_t pg_size = PG_LEVEL_SIZE(level);
- uint64_t nr_pages = nr_bytes / pg_size;
+ u64 pg_size = PG_LEVEL_SIZE(level);
+ u64 nr_pages = nr_bytes / pg_size;
int i;
TEST_ASSERT(nr_bytes % pg_size == 0,
@@ -254,16 +332,19 @@ void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
nr_bytes, pg_size);
for (i = 0; i < nr_pages; i++) {
- __virt_pg_map(vm, vaddr, paddr, level);
+ __virt_pg_map(vm, &vm->mmu, gva, gpa, level);
+ sparsebit_set_num(vm->vpages_mapped, gva >> vm->page_shift,
+ nr_bytes / PAGE_SIZE);
- vaddr += pg_size;
- paddr += pg_size;
+ gva += pg_size;
+ gpa += pg_size;
}
}
-static bool vm_is_target_pte(uint64_t *pte, int *level, int current_level)
+static bool vm_is_target_pte(struct kvm_mmu *mmu, u64 *pte,
+ int *level, int current_level)
{
- if (*pte & PTE_LARGE_MASK) {
+ if (is_huge_pte(mmu, pte)) {
TEST_ASSERT(*level == PG_LEVEL_NONE ||
*level == current_level,
"Unexpected hugepage at level %d", current_level);
@@ -273,60 +354,65 @@ static bool vm_is_target_pte(uint64_t *pte, int *level, int current_level)
return *level == current_level;
}
-uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr,
- int *level)
+static u64 *__vm_get_page_table_entry(struct kvm_vm *vm,
+ struct kvm_mmu *mmu,
+ gva_t gva,
+ int *level)
{
- uint64_t *pml4e, *pdpe, *pde;
+ int va_width = 12 + (mmu->pgtable_levels) * 9;
+ u64 *pte = &mmu->pgd;
+ int current_level;
TEST_ASSERT(!vm->arch.is_pt_protected,
"Walking page tables of protected guests is impossible");
- TEST_ASSERT(*level >= PG_LEVEL_NONE && *level < PG_LEVEL_NUM,
+ TEST_ASSERT(*level >= PG_LEVEL_NONE && *level <= mmu->pgtable_levels,
"Invalid PG_LEVEL_* '%d'", *level);
- TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
- "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
- TEST_ASSERT(sparsebit_is_set(vm->vpages_valid,
- (vaddr >> vm->page_shift)),
- "Invalid virtual address, vaddr: 0x%lx",
- vaddr);
+ TEST_ASSERT(vm->mode == VM_MODE_PXXVYY_4K,
+ "Unknown or unsupported guest mode: 0x%x", vm->mode);
+ TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, (gva >> vm->page_shift)),
+ "Invalid virtual address, gva: 0x%lx", gva);
/*
- * Based on the mode check above there are 48 bits in the vaddr, so
- * shift 16 to sign extend the last bit (bit-47),
+ * Check that the gva is a sign-extended va_width value.
*/
- TEST_ASSERT(vaddr == (((int64_t)vaddr << 16) >> 16),
- "Canonical check failed. The virtual address is invalid.");
-
- pml4e = virt_get_pte(vm, &vm->pgd, vaddr, PG_LEVEL_512G);
- if (vm_is_target_pte(pml4e, level, PG_LEVEL_512G))
- return pml4e;
+ TEST_ASSERT(gva == (((s64)gva << (64 - va_width) >> (64 - va_width))),
+ "Canonical check failed. The virtual address is invalid.");
+
+ for (current_level = mmu->pgtable_levels;
+ current_level > PG_LEVEL_4K;
+ current_level--) {
+ pte = virt_get_pte(vm, mmu, pte, gva, current_level);
+ if (vm_is_target_pte(mmu, pte, level, current_level))
+ return pte;
+ }
- pdpe = virt_get_pte(vm, pml4e, vaddr, PG_LEVEL_1G);
- if (vm_is_target_pte(pdpe, level, PG_LEVEL_1G))
- return pdpe;
+ return virt_get_pte(vm, mmu, pte, gva, PG_LEVEL_4K);
+}
- pde = virt_get_pte(vm, pdpe, vaddr, PG_LEVEL_2M);
- if (vm_is_target_pte(pde, level, PG_LEVEL_2M))
- return pde;
+u64 *tdp_get_pte(struct kvm_vm *vm, u64 l2_gpa)
+{
+ int level = PG_LEVEL_4K;
- return virt_get_pte(vm, pde, vaddr, PG_LEVEL_4K);
+ return __vm_get_page_table_entry(vm, &vm->stage2_mmu, l2_gpa, &level);
}
-uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr)
+u64 *vm_get_pte(struct kvm_vm *vm, gva_t gva)
{
int level = PG_LEVEL_4K;
- return __vm_get_page_table_entry(vm, vaddr, &level);
+ return __vm_get_page_table_entry(vm, &vm->mmu, gva, &level);
}
-void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
+void virt_arch_dump(FILE *stream, struct kvm_vm *vm, u8 indent)
{
- uint64_t *pml4e, *pml4e_start;
- uint64_t *pdpe, *pdpe_start;
- uint64_t *pde, *pde_start;
- uint64_t *pte, *pte_start;
+ struct kvm_mmu *mmu = &vm->mmu;
+ u64 *pml4e, *pml4e_start;
+ u64 *pdpe, *pdpe_start;
+ u64 *pde, *pde_start;
+ u64 *pte, *pte_start;
- if (!vm->pgd_created)
+ if (!mmu->pgd_created)
return;
fprintf(stream, "%*s "
@@ -334,47 +420,47 @@ void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
fprintf(stream, "%*s index hvaddr gpaddr "
"addr w exec dirty\n",
indent, "");
- pml4e_start = (uint64_t *) addr_gpa2hva(vm, vm->pgd);
- for (uint16_t n1 = 0; n1 <= 0x1ffu; n1++) {
+ pml4e_start = (u64 *)addr_gpa2hva(vm, mmu->pgd);
+ for (u16 n1 = 0; n1 <= 0x1ffu; n1++) {
pml4e = &pml4e_start[n1];
- if (!(*pml4e & PTE_PRESENT_MASK))
+ if (!is_present_pte(mmu, pml4e))
continue;
fprintf(stream, "%*spml4e 0x%-3zx %p 0x%-12lx 0x%-10llx %u "
" %u\n",
indent, "",
pml4e - pml4e_start, pml4e,
addr_hva2gpa(vm, pml4e), PTE_GET_PFN(*pml4e),
- !!(*pml4e & PTE_WRITABLE_MASK), !!(*pml4e & PTE_NX_MASK));
+ is_writable_pte(mmu, pml4e), is_nx_pte(mmu, pml4e));
pdpe_start = addr_gpa2hva(vm, *pml4e & PHYSICAL_PAGE_MASK);
- for (uint16_t n2 = 0; n2 <= 0x1ffu; n2++) {
+ for (u16 n2 = 0; n2 <= 0x1ffu; n2++) {
pdpe = &pdpe_start[n2];
- if (!(*pdpe & PTE_PRESENT_MASK))
+ if (!is_present_pte(mmu, pdpe))
continue;
fprintf(stream, "%*spdpe 0x%-3zx %p 0x%-12lx 0x%-10llx "
"%u %u\n",
indent, "",
pdpe - pdpe_start, pdpe,
addr_hva2gpa(vm, pdpe),
- PTE_GET_PFN(*pdpe), !!(*pdpe & PTE_WRITABLE_MASK),
- !!(*pdpe & PTE_NX_MASK));
+ PTE_GET_PFN(*pdpe), is_writable_pte(mmu, pdpe),
+ is_nx_pte(mmu, pdpe));
pde_start = addr_gpa2hva(vm, *pdpe & PHYSICAL_PAGE_MASK);
- for (uint16_t n3 = 0; n3 <= 0x1ffu; n3++) {
+ for (u16 n3 = 0; n3 <= 0x1ffu; n3++) {
pde = &pde_start[n3];
- if (!(*pde & PTE_PRESENT_MASK))
+ if (!is_present_pte(mmu, pde))
continue;
fprintf(stream, "%*spde 0x%-3zx %p "
"0x%-12lx 0x%-10llx %u %u\n",
indent, "", pde - pde_start, pde,
addr_hva2gpa(vm, pde),
- PTE_GET_PFN(*pde), !!(*pde & PTE_WRITABLE_MASK),
- !!(*pde & PTE_NX_MASK));
+ PTE_GET_PFN(*pde), is_writable_pte(mmu, pde),
+ is_nx_pte(mmu, pde));
pte_start = addr_gpa2hva(vm, *pde & PHYSICAL_PAGE_MASK);
- for (uint16_t n4 = 0; n4 <= 0x1ffu; n4++) {
+ for (u16 n4 = 0; n4 <= 0x1ffu; n4++) {
pte = &pte_start[n4];
- if (!(*pte & PTE_PRESENT_MASK))
+ if (!is_present_pte(mmu, pte))
continue;
fprintf(stream, "%*spte 0x%-3zx %p "
"0x%-12lx 0x%-10llx %u %u "
@@ -383,19 +469,83 @@ void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
pte - pte_start, pte,
addr_hva2gpa(vm, pte),
PTE_GET_PFN(*pte),
- !!(*pte & PTE_WRITABLE_MASK),
- !!(*pte & PTE_NX_MASK),
- !!(*pte & PTE_DIRTY_MASK),
- ((uint64_t) n1 << 27)
- | ((uint64_t) n2 << 18)
- | ((uint64_t) n3 << 9)
- | ((uint64_t) n4));
+ is_writable_pte(mmu, pte),
+ is_nx_pte(mmu, pte),
+ is_dirty_pte(mmu, pte),
+ ((u64)n1 << 27)
+ | ((u64)n2 << 18)
+ | ((u64)n3 << 9)
+ | ((u64)n4));
}
}
}
}
}
+void vm_enable_tdp(struct kvm_vm *vm)
+{
+ if (kvm_cpu_has(X86_FEATURE_VMX))
+ vm_enable_ept(vm);
+ else
+ vm_enable_npt(vm);
+}
+
+bool kvm_cpu_has_tdp(void)
+{
+ return kvm_cpu_has_ept() || kvm_cpu_has_npt();
+}
+
+void __tdp_map(struct kvm_vm *vm, gpa_t l2_gpa, gpa_t gpa, u64 size, int level)
+{
+ size_t page_size = PG_LEVEL_SIZE(level);
+ size_t npages = size / page_size;
+
+ TEST_ASSERT(l2_gpa + size > l2_gpa, "L2 GPA overflow");
+ TEST_ASSERT(gpa + size > gpa, "GPA overflow");
+
+ while (npages--) {
+ __virt_pg_map(vm, &vm->stage2_mmu, l2_gpa, gpa, level);
+ l2_gpa += page_size;
+ gpa += page_size;
+ }
+}
+
+void tdp_map(struct kvm_vm *vm, gpa_t l2_gpa, gpa_t gpa, u64 size)
+{
+ __tdp_map(vm, l2_gpa, gpa, size, PG_LEVEL_4K);
+}
+
+/* Prepare an identity extended page table that maps all the
+ * physical pages in VM.
+ */
+void tdp_identity_map_default_memslots(struct kvm_vm *vm)
+{
+ u32 s, memslot = 0;
+ sparsebit_idx_t i, last;
+ struct userspace_mem_region *region = memslot2region(vm, memslot);
+
+ /* Only memslot 0 is mapped here, ensure it's the only one being used */
+ for (s = 0; s < NR_MEM_REGIONS; s++)
+ TEST_ASSERT_EQ(vm->memslots[s], 0);
+
+ i = (region->region.guest_phys_addr >> vm->page_shift) - 1;
+ last = i + (region->region.memory_size >> vm->page_shift);
+ for (;;) {
+ i = sparsebit_next_clear(region->unused_phy_pages, i);
+ if (i > last)
+ break;
+
+ tdp_map(vm, (u64)i << vm->page_shift,
+ (u64)i << vm->page_shift, 1 << vm->page_shift);
+ }
+}
+
+/* Identity map a region with 1GiB Pages. */
+void tdp_identity_map_1g(struct kvm_vm *vm, u64 addr, u64 size)
+{
+ __tdp_map(vm, addr, addr, size, PG_LEVEL_1G);
+}
+
/*
* Set Unusable Segment
*
@@ -463,12 +613,12 @@ static void kvm_seg_set_kernel_data_64bit(struct kvm_segment *segp)
segp->present = true;
}
-vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
+gpa_t addr_arch_gva2gpa(struct kvm_vm *vm, gva_t gva)
{
int level = PG_LEVEL_NONE;
- uint64_t *pte = __vm_get_page_table_entry(vm, gva, &level);
+ u64 *pte = __vm_get_page_table_entry(vm, &vm->mmu, gva, &level);
- TEST_ASSERT(*pte & PTE_PRESENT_MASK,
+ TEST_ASSERT(is_present_pte(&vm->mmu, pte),
"Leaf PTE not PRESENT for gva: 0x%08lx", gva);
/*
@@ -478,7 +628,7 @@ vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
return vm_untag_gpa(vm, PTE_GET_PA(*pte)) | (gva & ~HUGEPAGE_MASK(level));
}
-static void kvm_seg_set_tss_64bit(vm_vaddr_t base, struct kvm_segment *segp)
+static void kvm_seg_set_tss_64bit(gva_t base, struct kvm_segment *segp)
{
memset(segp, 0, sizeof(*segp));
segp->base = base;
@@ -492,7 +642,8 @@ static void vcpu_init_sregs(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
{
struct kvm_sregs sregs;
- TEST_ASSERT_EQ(vm->mode, VM_MODE_PXXV48_4K);
+ TEST_ASSERT(vm->mode == VM_MODE_PXXVYY_4K,
+ "Unknown or unsupported guest mode: 0x%x", vm->mode);
/* Set mode specific system register values. */
vcpu_sregs_get(vcpu, &sregs);
@@ -506,6 +657,8 @@ static void vcpu_init_sregs(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
sregs.cr4 |= X86_CR4_PAE | X86_CR4_OSFXSR;
if (kvm_cpu_has(X86_FEATURE_XSAVE))
sregs.cr4 |= X86_CR4_OSXSAVE;
+ if (vm->mmu.pgtable_levels == 5)
+ sregs.cr4 |= X86_CR4_LA57;
sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX);
kvm_seg_set_unusable(&sregs.ldt);
@@ -515,7 +668,7 @@ static void vcpu_init_sregs(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
kvm_seg_set_kernel_data_64bit(&sregs.gs);
kvm_seg_set_tss_64bit(vm->arch.tss, &sregs.tr);
- sregs.cr3 = vm->pgd;
+ sregs.cr3 = vm->mmu.pgd;
vcpu_sregs_set(vcpu, &sregs);
}
@@ -557,7 +710,7 @@ static bool kvm_fixup_exception(struct ex_regs *regs)
return false;
if (regs->vector == DE_VECTOR)
- return false;
+ regs->vector = KVM_MAGIC_DE_VECTOR;
regs->rip = regs->r11;
regs->r9 = regs->vector;
@@ -588,16 +741,16 @@ static void vm_init_descriptor_tables(struct kvm_vm *vm)
struct kvm_segment seg;
int i;
- vm->arch.gdt = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA);
- vm->arch.idt = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA);
- vm->handlers = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA);
- vm->arch.tss = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA);
+ vm->arch.gdt = __vm_alloc_page(vm, MEM_REGION_DATA);
+ vm->arch.idt = __vm_alloc_page(vm, MEM_REGION_DATA);
+ vm->handlers = __vm_alloc_page(vm, MEM_REGION_DATA);
+ vm->arch.tss = __vm_alloc_page(vm, MEM_REGION_DATA);
/* Handlers have the same address in both address spaces.*/
for (i = 0; i < NUM_INTERRUPTS; i++)
set_idt_entry(vm, i, (unsigned long)(&idt_handlers)[i], 0, KERNEL_CS);
- *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers;
+ *(gva_t *)addr_gva2hva(vm, (gva_t)(&exception_handlers)) = vm->handlers;
kvm_seg_set_kernel_code_64bit(&seg);
kvm_seg_fill_gdt_64bit(vm, &seg);
@@ -612,9 +765,9 @@ static void vm_init_descriptor_tables(struct kvm_vm *vm)
void vm_install_exception_handler(struct kvm_vm *vm, int vector,
void (*handler)(struct ex_regs *))
{
- vm_vaddr_t *handlers = (vm_vaddr_t *)addr_gva2hva(vm, vm->handlers);
+ gva_t *handlers = (gva_t *)addr_gva2hva(vm, vm->handlers);
- handlers[vector] = (vm_vaddr_t)handler;
+ handlers[vector] = (gva_t)handler;
}
void assert_on_unhandled_exception(struct kvm_vcpu *vcpu)
@@ -625,7 +778,7 @@ void assert_on_unhandled_exception(struct kvm_vcpu *vcpu)
REPORT_GUEST_ASSERT(uc);
}
-void kvm_arch_vm_post_create(struct kvm_vm *vm)
+void kvm_arch_vm_post_create(struct kvm_vm *vm, unsigned int nr_vcpus)
{
int r;
@@ -637,9 +790,12 @@ void kvm_arch_vm_post_create(struct kvm_vm *vm)
sync_global_to_guest(vm, host_cpu_is_intel);
sync_global_to_guest(vm, host_cpu_is_amd);
+ sync_global_to_guest(vm, host_cpu_is_hygon);
+ sync_global_to_guest(vm, host_cpu_is_amd_compatible);
sync_global_to_guest(vm, is_forced_emulation_enabled);
+ sync_global_to_guest(vm, pmu_errata_mask);
- if (vm->type == KVM_X86_SEV_VM || vm->type == KVM_X86_SEV_ES_VM) {
+ if (is_sev_vm(vm)) {
struct kvm_sev_init init = { 0 };
vm_sev_ioctl(vm, KVM_SEV_INIT2, &init);
@@ -660,18 +816,17 @@ void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code)
vcpu_regs_set(vcpu, &regs);
}
-struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
+struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, u32 vcpu_id)
{
struct kvm_mp_state mp_state;
struct kvm_regs regs;
- vm_vaddr_t stack_vaddr;
+ gva_t stack_gva;
struct kvm_vcpu *vcpu;
- stack_vaddr = __vm_vaddr_alloc(vm, DEFAULT_STACK_PGS * getpagesize(),
- DEFAULT_GUEST_STACK_VADDR_MIN,
- MEM_REGION_DATA);
+ stack_gva = __vm_alloc(vm, DEFAULT_STACK_PGS * getpagesize(),
+ DEFAULT_GUEST_STACK_VADDR_MIN, MEM_REGION_DATA);
- stack_vaddr += DEFAULT_STACK_PGS * getpagesize();
+ stack_gva += DEFAULT_STACK_PGS * getpagesize();
/*
* Align stack to match calling sequence requirements in section "The
@@ -682,9 +837,9 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
* If this code is ever used to launch a vCPU with 32-bit entry point it
* may need to subtract 4 bytes instead of 8 bytes.
*/
- TEST_ASSERT(IS_ALIGNED(stack_vaddr, PAGE_SIZE),
- "__vm_vaddr_alloc() did not provide a page-aligned address");
- stack_vaddr -= 8;
+ TEST_ASSERT(IS_ALIGNED(stack_gva, PAGE_SIZE),
+ "__vm_alloc() did not provide a page-aligned address");
+ stack_gva -= 8;
vcpu = __vm_vcpu_add(vm, vcpu_id);
vcpu_init_cpuid(vcpu, kvm_get_supported_cpuid());
@@ -694,7 +849,7 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
/* Setup guest general purpose registers */
vcpu_regs_get(vcpu, &regs);
regs.rflags = regs.rflags | 0x2;
- regs.rsp = stack_vaddr;
+ regs.rsp = stack_gva;
vcpu_regs_set(vcpu, &regs);
/* Setup the MP state */
@@ -711,7 +866,7 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
return vcpu;
}
-struct kvm_vcpu *vm_arch_vcpu_recreate(struct kvm_vm *vm, uint32_t vcpu_id)
+struct kvm_vcpu *vm_arch_vcpu_recreate(struct kvm_vm *vm, u32 vcpu_id)
{
struct kvm_vcpu *vcpu = __vm_vcpu_add(vm, vcpu_id);
@@ -746,9 +901,9 @@ const struct kvm_cpuid2 *kvm_get_supported_cpuid(void)
return kvm_supported_cpuid;
}
-static uint32_t __kvm_cpu_has(const struct kvm_cpuid2 *cpuid,
- uint32_t function, uint32_t index,
- uint8_t reg, uint8_t lo, uint8_t hi)
+static u32 __kvm_cpu_has(const struct kvm_cpuid2 *cpuid,
+ u32 function, u32 index,
+ u8 reg, u8 lo, u8 hi)
{
const struct kvm_cpuid_entry2 *entry;
int i;
@@ -775,14 +930,14 @@ bool kvm_cpuid_has(const struct kvm_cpuid2 *cpuid,
feature.reg, feature.bit, feature.bit);
}
-uint32_t kvm_cpuid_property(const struct kvm_cpuid2 *cpuid,
- struct kvm_x86_cpu_property property)
+u32 kvm_cpuid_property(const struct kvm_cpuid2 *cpuid,
+ struct kvm_x86_cpu_property property)
{
return __kvm_cpu_has(cpuid, property.function, property.index,
property.reg, property.lo_bit, property.hi_bit);
}
-uint64_t kvm_get_feature_msr(uint64_t msr_index)
+u64 kvm_get_feature_msr(u64 msr_index)
{
struct {
struct kvm_msrs header;
@@ -801,7 +956,7 @@ uint64_t kvm_get_feature_msr(uint64_t msr_index)
return buffer.entry.data;
}
-void __vm_xsave_require_permission(uint64_t xfeature, const char *name)
+void __vm_xsave_require_permission(u64 xfeature, const char *name)
{
int kvm_fd;
u64 bitmask;
@@ -858,7 +1013,7 @@ void vcpu_init_cpuid(struct kvm_vcpu *vcpu, const struct kvm_cpuid2 *cpuid)
void vcpu_set_cpuid_property(struct kvm_vcpu *vcpu,
struct kvm_x86_cpu_property property,
- uint32_t value)
+ u32 value)
{
struct kvm_cpuid_entry2 *entry;
@@ -873,7 +1028,7 @@ void vcpu_set_cpuid_property(struct kvm_vcpu *vcpu,
TEST_ASSERT_EQ(kvm_cpuid_property(vcpu->cpuid, property), value);
}
-void vcpu_clear_cpuid_entry(struct kvm_vcpu *vcpu, uint32_t function)
+void vcpu_clear_cpuid_entry(struct kvm_vcpu *vcpu, u32 function)
{
struct kvm_cpuid_entry2 *entry = vcpu_get_cpuid_entry(vcpu, function);
@@ -902,7 +1057,7 @@ void vcpu_set_or_clear_cpuid_feature(struct kvm_vcpu *vcpu,
vcpu_set_cpuid(vcpu);
}
-uint64_t vcpu_get_msr(struct kvm_vcpu *vcpu, uint64_t msr_index)
+u64 vcpu_get_msr(struct kvm_vcpu *vcpu, u64 msr_index)
{
struct {
struct kvm_msrs header;
@@ -917,7 +1072,7 @@ uint64_t vcpu_get_msr(struct kvm_vcpu *vcpu, uint64_t msr_index)
return buffer.entry.data;
}
-int _vcpu_set_msr(struct kvm_vcpu *vcpu, uint64_t msr_index, uint64_t msr_value)
+int _vcpu_set_msr(struct kvm_vcpu *vcpu, u64 msr_index, u64 msr_value)
{
struct {
struct kvm_msrs header;
@@ -945,28 +1100,28 @@ void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...)
vcpu_regs_get(vcpu, &regs);
if (num >= 1)
- regs.rdi = va_arg(ap, uint64_t);
+ regs.rdi = va_arg(ap, u64);
if (num >= 2)
- regs.rsi = va_arg(ap, uint64_t);
+ regs.rsi = va_arg(ap, u64);
if (num >= 3)
- regs.rdx = va_arg(ap, uint64_t);
+ regs.rdx = va_arg(ap, u64);
if (num >= 4)
- regs.rcx = va_arg(ap, uint64_t);
+ regs.rcx = va_arg(ap, u64);
if (num >= 5)
- regs.r8 = va_arg(ap, uint64_t);
+ regs.r8 = va_arg(ap, u64);
if (num >= 6)
- regs.r9 = va_arg(ap, uint64_t);
+ regs.r9 = va_arg(ap, u64);
vcpu_regs_set(vcpu, &regs);
va_end(ap);
}
-void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent)
+void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, u8 indent)
{
struct kvm_regs regs;
struct kvm_sregs sregs;
@@ -1035,7 +1190,7 @@ const struct kvm_msr_list *kvm_get_feature_msr_index_list(void)
return list;
}
-bool kvm_msr_is_in_save_restore_list(uint32_t msr_index)
+bool kvm_msr_is_in_save_restore_list(u32 msr_index)
{
const struct kvm_msr_list *list = kvm_get_msr_index_list();
int i;
@@ -1156,7 +1311,7 @@ void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits)
void kvm_init_vm_address_properties(struct kvm_vm *vm)
{
- if (vm->type == KVM_X86_SEV_VM || vm->type == KVM_X86_SEV_ES_VM) {
+ if (is_sev_vm(vm)) {
vm->arch.sev_fd = open_sev_dev_path_or_exit();
vm->arch.c_bit = BIT_ULL(this_cpu_property(X86_PROPERTY_SEV_C_BIT));
vm->gpa_tag_mask = vm->arch.c_bit;
@@ -1166,7 +1321,7 @@ void kvm_init_vm_address_properties(struct kvm_vm *vm)
}
const struct kvm_cpuid_entry2 *get_cpuid_entry(const struct kvm_cpuid2 *cpuid,
- uint32_t function, uint32_t index)
+ u32 function, u32 index)
{
int i;
@@ -1183,7 +1338,7 @@ const struct kvm_cpuid_entry2 *get_cpuid_entry(const struct kvm_cpuid2 *cpuid,
#define X86_HYPERCALL(inputs...) \
({ \
- uint64_t r; \
+ u64 r; \
\
asm volatile("test %[use_vmmcall], %[use_vmmcall]\n\t" \
"jnz 1f\n\t" \
@@ -1192,23 +1347,23 @@ const struct kvm_cpuid_entry2 *get_cpuid_entry(const struct kvm_cpuid2 *cpuid,
"1: vmmcall\n\t" \
"2:" \
: "=a"(r) \
- : [use_vmmcall] "r" (host_cpu_is_amd), inputs); \
+ : [use_vmmcall] "r" (host_cpu_is_amd_compatible), \
+ inputs); \
\
r; \
})
-uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2,
- uint64_t a3)
+u64 kvm_hypercall(u64 nr, u64 a0, u64 a1, u64 a2, u64 a3)
{
return X86_HYPERCALL("a"(nr), "b"(a0), "c"(a1), "d"(a2), "S"(a3));
}
-uint64_t __xen_hypercall(uint64_t nr, uint64_t a0, void *a1)
+u64 __xen_hypercall(u64 nr, u64 a0, void *a1)
{
return X86_HYPERCALL("a"(nr), "D"(a0), "S"(a1));
}
-void xen_hypercall(uint64_t nr, uint64_t a0, void *a1)
+void xen_hypercall(u64 nr, u64 a0, void *a1)
{
GUEST_ASSERT(!__xen_hypercall(nr, a0, a1));
}
@@ -1217,7 +1372,7 @@ unsigned long vm_compute_max_gfn(struct kvm_vm *vm)
{
const unsigned long num_ht_pages = 12 << (30 - vm->page_shift); /* 12 GiB */
unsigned long ht_gfn, max_gfn, max_pfn;
- uint8_t maxphyaddr, guest_maxphyaddr;
+ u8 maxphyaddr, guest_maxphyaddr;
/*
* Use "guest MAXPHYADDR" from KVM if it's available. Guest MAXPHYADDR
@@ -1232,8 +1387,8 @@ unsigned long vm_compute_max_gfn(struct kvm_vm *vm)
max_gfn = (1ULL << (guest_maxphyaddr - vm->page_shift)) - 1;
- /* Avoid reserved HyperTransport region on AMD processors. */
- if (!host_cpu_is_amd)
+ /* Avoid reserved HyperTransport region on AMD or Hygon processors. */
+ if (!host_cpu_is_amd_compatible)
return max_gfn;
/* On parts with <40 physical address bits, the area is fully hidden */
@@ -1247,7 +1402,7 @@ unsigned long vm_compute_max_gfn(struct kvm_vm *vm)
/*
* Otherwise it's at the top of the physical address space, possibly
- * reduced due to SME by bits 11:6 of CPUID[0x8000001f].EBX. Use
+ * reduced due to SME or CSV by bits 11:6 of CPUID[0x8000001f].EBX. Use
* the old conservative value if MAXPHYADDR is not enumerated.
*/
if (!this_cpu_has_p(X86_PROPERTY_MAX_PHY_ADDR))
@@ -1264,21 +1419,15 @@ done:
return min(max_gfn, ht_gfn - 1);
}
-/* Returns true if kvm_intel was loaded with unrestricted_guest=1. */
-bool vm_is_unrestricted_guest(struct kvm_vm *vm)
-{
- /* Ensure that a KVM vendor-specific module is loaded. */
- if (vm == NULL)
- close(open_kvm_dev_path_or_exit());
-
- return get_kvm_intel_param_bool("unrestricted_guest");
-}
-
void kvm_selftest_arch_init(void)
{
host_cpu_is_intel = this_cpu_is_intel();
host_cpu_is_amd = this_cpu_is_amd();
+ host_cpu_is_hygon = this_cpu_is_hygon();
+ host_cpu_is_amd_compatible = host_cpu_is_amd || host_cpu_is_hygon;
is_forced_emulation_enabled = kvm_is_forced_emulation_enabled();
+
+ kvm_init_pmu_errata();
}
bool sys_clocksource_is_based_on_tsc(void)
@@ -1291,3 +1440,32 @@ bool sys_clocksource_is_based_on_tsc(void)
return ret;
}
+
+bool kvm_arch_has_default_irqchip(void)
+{
+ return true;
+}
+
+void setup_smram(struct kvm_vm *vm, struct kvm_vcpu *vcpu, u64 smram_gpa,
+ const void *smi_handler, size_t handler_size)
+{
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, smram_gpa,
+ SMRAM_MEMSLOT, SMRAM_PAGES, 0);
+ TEST_ASSERT(vm_phy_pages_alloc(vm, SMRAM_PAGES, smram_gpa,
+ SMRAM_MEMSLOT) == smram_gpa,
+ "Could not allocate guest physical addresses for SMRAM");
+
+ memset(addr_gpa2hva(vm, smram_gpa), 0x0, SMRAM_SIZE);
+ memcpy(addr_gpa2hva(vm, smram_gpa) + 0x8000, smi_handler, handler_size);
+ vcpu_set_msr(vcpu, MSR_IA32_SMBASE, smram_gpa);
+}
+
+void inject_smi(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vcpu_events events;
+
+ vcpu_events_get(vcpu, &events);
+ events.smi.pending = 1;
+ events.flags |= KVM_VCPUEVENT_VALID_SMM;
+ vcpu_events_set(vcpu, &events);
+}
diff --git a/tools/testing/selftests/kvm/lib/x86/sev.c b/tools/testing/selftests/kvm/lib/x86/sev.c
index e9535ee20b7f..93f916903461 100644
--- a/tools/testing/selftests/kvm/lib/x86/sev.c
+++ b/tools/testing/selftests/kvm/lib/x86/sev.c
@@ -14,35 +14,46 @@
* and find the first range, but that's correct because the condition
* expression would cause us to quit the loop.
*/
-static void encrypt_region(struct kvm_vm *vm, struct userspace_mem_region *region)
+static void encrypt_region(struct kvm_vm *vm, struct userspace_mem_region *region,
+ u8 page_type, bool private)
{
const struct sparsebit *protected_phy_pages = region->protected_phy_pages;
- const vm_paddr_t gpa_base = region->region.guest_phys_addr;
+ const gpa_t gpa_base = region->region.guest_phys_addr;
const sparsebit_idx_t lowest_page_in_region = gpa_base >> vm->page_shift;
sparsebit_idx_t i, j;
if (!sparsebit_any_set(protected_phy_pages))
return;
- sev_register_encrypted_memory(vm, region);
+ if (!is_sev_snp_vm(vm))
+ sev_register_encrypted_memory(vm, region);
sparsebit_for_each_set_range(protected_phy_pages, i, j) {
- const uint64_t size = (j - i + 1) * vm->page_size;
- const uint64_t offset = (i - lowest_page_in_region) * vm->page_size;
+ const u64 size = (j - i + 1) * vm->page_size;
+ const u64 offset = (i - lowest_page_in_region) * vm->page_size;
+
+ if (private)
+ vm_mem_set_private(vm, gpa_base + offset, size);
+
+ if (is_sev_snp_vm(vm))
+ snp_launch_update_data(vm, gpa_base + offset,
+ (u64)addr_gpa2hva(vm, gpa_base + offset),
+ size, page_type);
+ else
+ sev_launch_update_data(vm, gpa_base + offset, size);
- sev_launch_update_data(vm, gpa_base + offset, size);
}
}
void sev_vm_init(struct kvm_vm *vm)
{
if (vm->type == KVM_X86_DEFAULT_VM) {
- assert(vm->arch.sev_fd == -1);
+ TEST_ASSERT_EQ(vm->arch.sev_fd, -1);
vm->arch.sev_fd = open_sev_dev_path_or_exit();
vm_sev_ioctl(vm, KVM_SEV_INIT, NULL);
} else {
struct kvm_sev_init init = { 0 };
- assert(vm->type == KVM_X86_SEV_VM);
+ TEST_ASSERT_EQ(vm->type, KVM_X86_SEV_VM);
vm_sev_ioctl(vm, KVM_SEV_INIT2, &init);
}
}
@@ -50,17 +61,25 @@ void sev_vm_init(struct kvm_vm *vm)
void sev_es_vm_init(struct kvm_vm *vm)
{
if (vm->type == KVM_X86_DEFAULT_VM) {
- assert(vm->arch.sev_fd == -1);
+ TEST_ASSERT_EQ(vm->arch.sev_fd, -1);
vm->arch.sev_fd = open_sev_dev_path_or_exit();
vm_sev_ioctl(vm, KVM_SEV_ES_INIT, NULL);
} else {
struct kvm_sev_init init = { 0 };
- assert(vm->type == KVM_X86_SEV_ES_VM);
+ TEST_ASSERT_EQ(vm->type, KVM_X86_SEV_ES_VM);
vm_sev_ioctl(vm, KVM_SEV_INIT2, &init);
}
}
-void sev_vm_launch(struct kvm_vm *vm, uint32_t policy)
+void snp_vm_init(struct kvm_vm *vm)
+{
+ struct kvm_sev_init init = { 0 };
+
+ TEST_ASSERT_EQ(vm->type, KVM_X86_SNP_VM);
+ vm_sev_ioctl(vm, KVM_SEV_INIT2, &init);
+}
+
+void sev_vm_launch(struct kvm_vm *vm, u32 policy)
{
struct kvm_sev_launch_start launch_start = {
.policy = policy,
@@ -76,7 +95,7 @@ void sev_vm_launch(struct kvm_vm *vm, uint32_t policy)
TEST_ASSERT_EQ(status.state, SEV_GUEST_STATE_LAUNCH_UPDATE);
hash_for_each(vm->regions.slot_hash, ctr, region, slot_node)
- encrypt_region(vm, region);
+ encrypt_region(vm, region, KVM_SEV_PAGE_TYPE_INVALID, false);
if (policy & SEV_POLICY_ES)
vm_sev_ioctl(vm, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL);
@@ -84,7 +103,7 @@ void sev_vm_launch(struct kvm_vm *vm, uint32_t policy)
vm->arch.is_pt_protected = true;
}
-void sev_vm_launch_measure(struct kvm_vm *vm, uint8_t *measurement)
+void sev_vm_launch_measure(struct kvm_vm *vm, u8 *measurement)
{
struct kvm_sev_launch_measure launch_measure;
struct kvm_sev_guest_status guest_status;
@@ -112,7 +131,34 @@ void sev_vm_launch_finish(struct kvm_vm *vm)
TEST_ASSERT_EQ(status.state, SEV_GUEST_STATE_RUNNING);
}
-struct kvm_vm *vm_sev_create_with_one_vcpu(uint32_t type, void *guest_code,
+void snp_vm_launch_start(struct kvm_vm *vm, u64 policy)
+{
+ struct kvm_sev_snp_launch_start launch_start = {
+ .policy = policy,
+ };
+
+ vm_sev_ioctl(vm, KVM_SEV_SNP_LAUNCH_START, &launch_start);
+}
+
+void snp_vm_launch_update(struct kvm_vm *vm)
+{
+ struct userspace_mem_region *region;
+ int ctr;
+
+ hash_for_each(vm->regions.slot_hash, ctr, region, slot_node)
+ encrypt_region(vm, region, KVM_SEV_SNP_PAGE_TYPE_NORMAL, true);
+
+ vm->arch.is_pt_protected = true;
+}
+
+void snp_vm_launch_finish(struct kvm_vm *vm)
+{
+ struct kvm_sev_snp_launch_finish launch_finish = { 0 };
+
+ vm_sev_ioctl(vm, KVM_SEV_SNP_LAUNCH_FINISH, &launch_finish);
+}
+
+struct kvm_vm *vm_sev_create_with_one_vcpu(u32 type, void *guest_code,
struct kvm_vcpu **cpu)
{
struct vm_shape shape = {
@@ -128,8 +174,20 @@ struct kvm_vm *vm_sev_create_with_one_vcpu(uint32_t type, void *guest_code,
return vm;
}
-void vm_sev_launch(struct kvm_vm *vm, uint32_t policy, uint8_t *measurement)
+void vm_sev_launch(struct kvm_vm *vm, u64 policy, u8 *measurement)
{
+ if (is_sev_snp_vm(vm)) {
+ vm_enable_cap(vm, KVM_CAP_EXIT_HYPERCALL, BIT(KVM_HC_MAP_GPA_RANGE));
+
+ snp_vm_launch_start(vm, policy);
+
+ snp_vm_launch_update(vm);
+
+ snp_vm_launch_finish(vm);
+
+ return;
+ }
+
sev_vm_launch(vm, policy);
if (!measurement)
diff --git a/tools/testing/selftests/kvm/lib/x86/svm.c b/tools/testing/selftests/kvm/lib/x86/svm.c
index d239c2097391..3b01605ab016 100644
--- a/tools/testing/selftests/kvm/lib/x86/svm.c
+++ b/tools/testing/selftests/kvm/lib/x86/svm.c
@@ -28,24 +28,27 @@ u64 rflags;
* Pointer to structure with the addresses of the SVM areas.
*/
struct svm_test_data *
-vcpu_alloc_svm(struct kvm_vm *vm, vm_vaddr_t *p_svm_gva)
+vcpu_alloc_svm(struct kvm_vm *vm, gva_t *p_svm_gva)
{
- vm_vaddr_t svm_gva = vm_vaddr_alloc_page(vm);
+ gva_t svm_gva = vm_alloc_page(vm);
struct svm_test_data *svm = addr_gva2hva(vm, svm_gva);
- svm->vmcb = (void *)vm_vaddr_alloc_page(vm);
+ svm->vmcb = (void *)vm_alloc_page(vm);
svm->vmcb_hva = addr_gva2hva(vm, (uintptr_t)svm->vmcb);
svm->vmcb_gpa = addr_gva2gpa(vm, (uintptr_t)svm->vmcb);
- svm->save_area = (void *)vm_vaddr_alloc_page(vm);
+ svm->save_area = (void *)vm_alloc_page(vm);
svm->save_area_hva = addr_gva2hva(vm, (uintptr_t)svm->save_area);
svm->save_area_gpa = addr_gva2gpa(vm, (uintptr_t)svm->save_area);
- svm->msr = (void *)vm_vaddr_alloc_page(vm);
+ svm->msr = (void *)vm_alloc_page(vm);
svm->msr_hva = addr_gva2hva(vm, (uintptr_t)svm->msr);
svm->msr_gpa = addr_gva2gpa(vm, (uintptr_t)svm->msr);
memset(svm->msr_hva, 0, getpagesize());
+ if (vm->stage2_mmu.pgd_created)
+ svm->ncr3_gpa = vm->stage2_mmu.pgd;
+
*p_svm_gva = svm_gva;
return svm;
}
@@ -59,17 +62,36 @@ static void vmcb_set_seg(struct vmcb_seg *seg, u16 selector,
seg->base = base;
}
+void vm_enable_npt(struct kvm_vm *vm)
+{
+ struct pte_masks pte_masks;
+
+ TEST_ASSERT(kvm_cpu_has_npt(), "KVM doesn't supported nested NPT");
+
+ /*
+ * NPTs use the same PTE format, but deliberately drop the C-bit as the
+ * per-VM shared vs. private information is only meant for stage-1.
+ */
+ pte_masks = vm->mmu.arch.pte_masks;
+ pte_masks.c = 0;
+
+ /* NPT walks are treated as user accesses, so set the 'user' bit. */
+ pte_masks.always_set = pte_masks.user;
+
+ tdp_mmu_init(vm, vm->mmu.pgtable_levels, &pte_masks);
+}
+
void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_rsp)
{
struct vmcb *vmcb = svm->vmcb;
- uint64_t vmcb_gpa = svm->vmcb_gpa;
+ u64 vmcb_gpa = svm->vmcb_gpa;
struct vmcb_save_area *save = &vmcb->save;
struct vmcb_control_area *ctrl = &vmcb->control;
u32 data_seg_attr = 3 | SVM_SELECTOR_S_MASK | SVM_SELECTOR_P_MASK
| SVM_SELECTOR_DB_MASK | SVM_SELECTOR_G_MASK;
u32 code_seg_attr = 9 | SVM_SELECTOR_S_MASK | SVM_SELECTOR_P_MASK
| SVM_SELECTOR_L_MASK | SVM_SELECTOR_G_MASK;
- uint64_t efer;
+ u64 efer;
efer = rdmsr(MSR_EFER);
wrmsr(MSR_EFER, efer | EFER_SVME);
@@ -102,6 +124,11 @@ void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_r
vmcb->save.rip = (u64)guest_rip;
vmcb->save.rsp = (u64)guest_rsp;
guest_regs.rdi = (u64)svm;
+
+ if (svm->ncr3_gpa) {
+ ctrl->misc_ctl |= SVM_MISC_ENABLE_NP;
+ ctrl->nested_cr3 = svm->ncr3_gpa;
+ }
}
/*
@@ -131,7 +158,7 @@ void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_r
* for now. registers involved in LOAD/SAVE_GPR_C are eventually
* unmodified so they do not need to be in the clobber list.
*/
-void run_guest(struct vmcb *vmcb, uint64_t vmcb_gpa)
+void run_guest(struct vmcb *vmcb, u64 vmcb_gpa)
{
asm volatile (
"vmload %[vmcb_gpa]\n\t"
diff --git a/tools/testing/selftests/kvm/lib/x86/ucall.c b/tools/testing/selftests/kvm/lib/x86/ucall.c
index 1265cecc7dd1..e7dd5791959b 100644
--- a/tools/testing/selftests/kvm/lib/x86/ucall.c
+++ b/tools/testing/selftests/kvm/lib/x86/ucall.c
@@ -6,9 +6,9 @@
*/
#include "kvm_util.h"
-#define UCALL_PIO_PORT ((uint16_t)0x1000)
+#define UCALL_PIO_PORT ((u16)0x1000)
-void ucall_arch_do_ucall(vm_vaddr_t uc)
+void ucall_arch_do_ucall(gva_t uc)
{
/*
* FIXME: Revert this hack (the entire commit that added it) once nVMX
diff --git a/tools/testing/selftests/kvm/lib/x86/vmx.c b/tools/testing/selftests/kvm/lib/x86/vmx.c
index d4d1208dd023..67642759e4a0 100644
--- a/tools/testing/selftests/kvm/lib/x86/vmx.c
+++ b/tools/testing/selftests/kvm/lib/x86/vmx.c
@@ -10,41 +10,24 @@
#include "processor.h"
#include "vmx.h"
-#define PAGE_SHIFT_4K 12
-
#define KVM_EPT_PAGE_TABLE_MIN_PADDR 0x1c0000
+#define EPTP_MT_SHIFT 0 /* EPTP memtype bits 2:0 */
+#define EPTP_PWL_SHIFT 3 /* EPTP page walk length bits 5:3 */
+#define EPTP_AD_ENABLED_SHIFT 6 /* EPTP AD enabled bit 6 */
+
+#define EPTP_WB (X86_MEMTYPE_WB << EPTP_MT_SHIFT)
+#define EPTP_PWL_4 (3ULL << EPTP_PWL_SHIFT) /* PWL is (levels - 1) */
+#define EPTP_AD_ENABLED (1ULL << EPTP_AD_ENABLED_SHIFT)
+
bool enable_evmcs;
struct hv_enlightened_vmcs *current_evmcs;
struct hv_vp_assist_page *current_vp_assist;
-struct eptPageTableEntry {
- uint64_t readable:1;
- uint64_t writable:1;
- uint64_t executable:1;
- uint64_t memory_type:3;
- uint64_t ignore_pat:1;
- uint64_t page_size:1;
- uint64_t accessed:1;
- uint64_t dirty:1;
- uint64_t ignored_11_10:2;
- uint64_t address:40;
- uint64_t ignored_62_52:11;
- uint64_t suppress_ve:1;
-};
-
-struct eptPageTablePointer {
- uint64_t memory_type:3;
- uint64_t page_walk_length:3;
- uint64_t ad_enabled:1;
- uint64_t reserved_11_07:5;
- uint64_t address:40;
- uint64_t reserved_63_52:12;
-};
int vcpu_enable_evmcs(struct kvm_vcpu *vcpu)
{
- uint16_t evmcs_ver;
+ u16 evmcs_ver;
vcpu_enable_cap(vcpu, KVM_CAP_HYPERV_ENLIGHTENED_VMCS,
(unsigned long)&evmcs_ver);
@@ -58,6 +41,32 @@ int vcpu_enable_evmcs(struct kvm_vcpu *vcpu)
return evmcs_ver;
}
+void vm_enable_ept(struct kvm_vm *vm)
+{
+ struct pte_masks pte_masks;
+
+ TEST_ASSERT(kvm_cpu_has_ept(), "KVM doesn't support nested EPT");
+
+ /*
+ * EPTs do not have 'present' or 'user' bits, instead bit 0 is the
+ * 'readable' bit.
+ */
+ pte_masks = (struct pte_masks) {
+ .present = 0,
+ .user = 0,
+ .readable = BIT_ULL(0),
+ .writable = BIT_ULL(1),
+ .executable = BIT_ULL(2),
+ .huge = BIT_ULL(7),
+ .accessed = BIT_ULL(8),
+ .dirty = BIT_ULL(9),
+ .nx = 0,
+ };
+
+ /* TODO: Add support for 5-level EPT. */
+ tdp_mmu_init(vm, 4, &pte_masks);
+}
+
/* Allocate memory regions for nested VMX tests.
*
* Input Args:
@@ -70,51 +79,54 @@ int vcpu_enable_evmcs(struct kvm_vcpu *vcpu)
* Pointer to structure with the addresses of the VMX areas.
*/
struct vmx_pages *
-vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva)
+vcpu_alloc_vmx(struct kvm_vm *vm, gva_t *p_vmx_gva)
{
- vm_vaddr_t vmx_gva = vm_vaddr_alloc_page(vm);
+ gva_t vmx_gva = vm_alloc_page(vm);
struct vmx_pages *vmx = addr_gva2hva(vm, vmx_gva);
/* Setup of a region of guest memory for the vmxon region. */
- vmx->vmxon = (void *)vm_vaddr_alloc_page(vm);
+ vmx->vmxon = (void *)vm_alloc_page(vm);
vmx->vmxon_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmxon);
vmx->vmxon_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmxon);
/* Setup of a region of guest memory for a vmcs. */
- vmx->vmcs = (void *)vm_vaddr_alloc_page(vm);
+ vmx->vmcs = (void *)vm_alloc_page(vm);
vmx->vmcs_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmcs);
vmx->vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmcs);
/* Setup of a region of guest memory for the MSR bitmap. */
- vmx->msr = (void *)vm_vaddr_alloc_page(vm);
+ vmx->msr = (void *)vm_alloc_page(vm);
vmx->msr_hva = addr_gva2hva(vm, (uintptr_t)vmx->msr);
vmx->msr_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->msr);
memset(vmx->msr_hva, 0, getpagesize());
/* Setup of a region of guest memory for the shadow VMCS. */
- vmx->shadow_vmcs = (void *)vm_vaddr_alloc_page(vm);
+ vmx->shadow_vmcs = (void *)vm_alloc_page(vm);
vmx->shadow_vmcs_hva = addr_gva2hva(vm, (uintptr_t)vmx->shadow_vmcs);
vmx->shadow_vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->shadow_vmcs);
/* Setup of a region of guest memory for the VMREAD and VMWRITE bitmaps. */
- vmx->vmread = (void *)vm_vaddr_alloc_page(vm);
+ vmx->vmread = (void *)vm_alloc_page(vm);
vmx->vmread_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmread);
vmx->vmread_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmread);
memset(vmx->vmread_hva, 0, getpagesize());
- vmx->vmwrite = (void *)vm_vaddr_alloc_page(vm);
+ vmx->vmwrite = (void *)vm_alloc_page(vm);
vmx->vmwrite_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmwrite);
vmx->vmwrite_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmwrite);
memset(vmx->vmwrite_hva, 0, getpagesize());
+ if (vm->stage2_mmu.pgd_created)
+ vmx->eptp_gpa = vm->stage2_mmu.pgd;
+
*p_vmx_gva = vmx_gva;
return vmx;
}
bool prepare_for_vmx_operation(struct vmx_pages *vmx)
{
- uint64_t feature_control;
- uint64_t required;
+ u64 feature_control;
+ u64 required;
unsigned long cr0;
unsigned long cr4;
@@ -148,7 +160,7 @@ bool prepare_for_vmx_operation(struct vmx_pages *vmx)
wrmsr(MSR_IA32_FEAT_CTL, feature_control | required);
/* Enter VMX root operation. */
- *(uint32_t *)(vmx->vmxon) = vmcs_revision();
+ *(u32 *)(vmx->vmxon) = vmcs_revision();
if (vmxon(vmx->vmxon_gpa))
return false;
@@ -158,7 +170,7 @@ bool prepare_for_vmx_operation(struct vmx_pages *vmx)
bool load_vmcs(struct vmx_pages *vmx)
{
/* Load a VMCS. */
- *(uint32_t *)(vmx->vmcs) = vmcs_revision();
+ *(u32 *)(vmx->vmcs) = vmcs_revision();
if (vmclear(vmx->vmcs_gpa))
return false;
@@ -166,14 +178,14 @@ bool load_vmcs(struct vmx_pages *vmx)
return false;
/* Setup shadow VMCS, do not load it yet. */
- *(uint32_t *)(vmx->shadow_vmcs) = vmcs_revision() | 0x80000000ul;
+ *(u32 *)(vmx->shadow_vmcs) = vmcs_revision() | 0x80000000ul;
if (vmclear(vmx->shadow_vmcs_gpa))
return false;
return true;
}
-static bool ept_vpid_cap_supported(uint64_t mask)
+static bool ept_vpid_cap_supported(u64 mask)
{
return rdmsr(MSR_IA32_VMX_EPT_VPID_CAP) & mask;
}
@@ -188,7 +200,7 @@ bool ept_1g_pages_supported(void)
*/
static inline void init_vmcs_control_fields(struct vmx_pages *vmx)
{
- uint32_t sec_exec_ctl = 0;
+ u32 sec_exec_ctl = 0;
vmwrite(VIRTUAL_PROCESSOR_ID, 0);
vmwrite(POSTED_INTR_NV, 0);
@@ -196,16 +208,15 @@ static inline void init_vmcs_control_fields(struct vmx_pages *vmx)
vmwrite(PIN_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_TRUE_PINBASED_CTLS));
if (vmx->eptp_gpa) {
- uint64_t ept_paddr;
- struct eptPageTablePointer eptp = {
- .memory_type = X86_MEMTYPE_WB,
- .page_walk_length = 3, /* + 1 */
- .ad_enabled = ept_vpid_cap_supported(VMX_EPT_VPID_CAP_AD_BITS),
- .address = vmx->eptp_gpa >> PAGE_SHIFT_4K,
- };
-
- memcpy(&ept_paddr, &eptp, sizeof(ept_paddr));
- vmwrite(EPT_POINTER, ept_paddr);
+ u64 eptp = vmx->eptp_gpa | EPTP_WB | EPTP_PWL_4;
+
+ TEST_ASSERT((vmx->eptp_gpa & ~PHYSICAL_PAGE_MASK) == 0,
+ "Illegal bits set in vmx->eptp_gpa");
+
+ if (ept_vpid_cap_supported(VMX_EPT_VPID_CAP_AD_BITS))
+ eptp |= EPTP_AD_ENABLED;
+
+ vmwrite(EPT_POINTER, eptp);
sec_exec_ctl |= SECONDARY_EXEC_ENABLE_EPT;
}
@@ -248,7 +259,7 @@ static inline void init_vmcs_control_fields(struct vmx_pages *vmx)
*/
static inline void init_vmcs_host_state(void)
{
- uint32_t exit_controls = vmreadz(VM_EXIT_CONTROLS);
+ u32 exit_controls = vmreadz(VM_EXIT_CONTROLS);
vmwrite(HOST_ES_SELECTOR, get_es());
vmwrite(HOST_CS_SELECTOR, get_cs());
@@ -347,8 +358,8 @@ static inline void init_vmcs_guest_state(void *rip, void *rsp)
vmwrite(GUEST_GDTR_BASE, vmreadz(HOST_GDTR_BASE));
vmwrite(GUEST_IDTR_BASE, vmreadz(HOST_IDTR_BASE));
vmwrite(GUEST_DR7, 0x400);
- vmwrite(GUEST_RSP, (uint64_t)rsp);
- vmwrite(GUEST_RIP, (uint64_t)rip);
+ vmwrite(GUEST_RSP, (u64)rsp);
+ vmwrite(GUEST_RIP, (u64)rip);
vmwrite(GUEST_RFLAGS, 2);
vmwrite(GUEST_PENDING_DBG_EXCEPTIONS, 0);
vmwrite(GUEST_SYSENTER_ESP, vmreadz(HOST_IA32_SYSENTER_ESP));
@@ -362,169 +373,12 @@ void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp)
init_vmcs_guest_state(guest_rip, guest_rsp);
}
-static void nested_create_pte(struct kvm_vm *vm,
- struct eptPageTableEntry *pte,
- uint64_t nested_paddr,
- uint64_t paddr,
- int current_level,
- int target_level)
-{
- if (!pte->readable) {
- pte->writable = true;
- pte->readable = true;
- pte->executable = true;
- pte->page_size = (current_level == target_level);
- if (pte->page_size)
- pte->address = paddr >> vm->page_shift;
- else
- pte->address = vm_alloc_page_table(vm) >> vm->page_shift;
- } else {
- /*
- * Entry already present. Assert that the caller doesn't want
- * a hugepage at this level, and that there isn't a hugepage at
- * this level.
- */
- TEST_ASSERT(current_level != target_level,
- "Cannot create hugepage at level: %u, nested_paddr: 0x%lx",
- current_level, nested_paddr);
- TEST_ASSERT(!pte->page_size,
- "Cannot create page table at level: %u, nested_paddr: 0x%lx",
- current_level, nested_paddr);
- }
-}
-
-
-void __nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint64_t nested_paddr, uint64_t paddr, int target_level)
-{
- const uint64_t page_size = PG_LEVEL_SIZE(target_level);
- struct eptPageTableEntry *pt = vmx->eptp_hva, *pte;
- uint16_t index;
-
- TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
- "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
-
- TEST_ASSERT((nested_paddr >> 48) == 0,
- "Nested physical address 0x%lx requires 5-level paging",
- nested_paddr);
- TEST_ASSERT((nested_paddr % page_size) == 0,
- "Nested physical address not on page boundary,\n"
- " nested_paddr: 0x%lx page_size: 0x%lx",
- nested_paddr, page_size);
- TEST_ASSERT((nested_paddr >> vm->page_shift) <= vm->max_gfn,
- "Physical address beyond beyond maximum supported,\n"
- " nested_paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
- paddr, vm->max_gfn, vm->page_size);
- TEST_ASSERT((paddr % page_size) == 0,
- "Physical address not on page boundary,\n"
- " paddr: 0x%lx page_size: 0x%lx",
- paddr, page_size);
- TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
- "Physical address beyond beyond maximum supported,\n"
- " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
- paddr, vm->max_gfn, vm->page_size);
-
- for (int level = PG_LEVEL_512G; level >= PG_LEVEL_4K; level--) {
- index = (nested_paddr >> PG_LEVEL_SHIFT(level)) & 0x1ffu;
- pte = &pt[index];
-
- nested_create_pte(vm, pte, nested_paddr, paddr, level, target_level);
-
- if (pte->page_size)
- break;
-
- pt = addr_gpa2hva(vm, pte->address * vm->page_size);
- }
-
- /*
- * For now mark these as accessed and dirty because the only
- * testcase we have needs that. Can be reconsidered later.
- */
- pte->accessed = true;
- pte->dirty = true;
-
-}
-
-void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint64_t nested_paddr, uint64_t paddr)
-{
- __nested_pg_map(vmx, vm, nested_paddr, paddr, PG_LEVEL_4K);
-}
-
-/*
- * Map a range of EPT guest physical addresses to the VM's physical address
- *
- * Input Args:
- * vm - Virtual Machine
- * nested_paddr - Nested guest physical address to map
- * paddr - VM Physical Address
- * size - The size of the range to map
- * level - The level at which to map the range
- *
- * Output Args: None
- *
- * Return: None
- *
- * Within the VM given by vm, creates a nested guest translation for the
- * page range starting at nested_paddr to the page range starting at paddr.
- */
-void __nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint64_t nested_paddr, uint64_t paddr, uint64_t size,
- int level)
-{
- size_t page_size = PG_LEVEL_SIZE(level);
- size_t npages = size / page_size;
-
- TEST_ASSERT(nested_paddr + size > nested_paddr, "Vaddr overflow");
- TEST_ASSERT(paddr + size > paddr, "Paddr overflow");
-
- while (npages--) {
- __nested_pg_map(vmx, vm, nested_paddr, paddr, level);
- nested_paddr += page_size;
- paddr += page_size;
- }
-}
-
-void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint64_t nested_paddr, uint64_t paddr, uint64_t size)
-{
- __nested_map(vmx, vm, nested_paddr, paddr, size, PG_LEVEL_4K);
-}
-
-/* Prepare an identity extended page table that maps all the
- * physical pages in VM.
- */
-void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint32_t memslot)
-{
- sparsebit_idx_t i, last;
- struct userspace_mem_region *region =
- memslot2region(vm, memslot);
-
- i = (region->region.guest_phys_addr >> vm->page_shift) - 1;
- last = i + (region->region.memory_size >> vm->page_shift);
- for (;;) {
- i = sparsebit_next_clear(region->unused_phy_pages, i);
- if (i > last)
- break;
-
- nested_map(vmx, vm,
- (uint64_t)i << vm->page_shift,
- (uint64_t)i << vm->page_shift,
- 1 << vm->page_shift);
- }
-}
-
-/* Identity map a region with 1GiB Pages. */
-void nested_identity_map_1g(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint64_t addr, uint64_t size)
-{
- __nested_map(vmx, vm, addr, addr, size, PG_LEVEL_1G);
-}
-
bool kvm_cpu_has_ept(void)
{
- uint64_t ctrl;
+ u64 ctrl;
+
+ if (!kvm_cpu_has(X86_FEATURE_VMX))
+ return false;
ctrl = kvm_get_feature_msr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS) >> 32;
if (!(ctrl & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS))
@@ -534,19 +388,9 @@ bool kvm_cpu_has_ept(void)
return ctrl & SECONDARY_EXEC_ENABLE_EPT;
}
-void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint32_t eptp_memslot)
-{
- TEST_ASSERT(kvm_cpu_has_ept(), "KVM doesn't support nested EPT");
-
- vmx->eptp = (void *)vm_vaddr_alloc_page(vm);
- vmx->eptp_hva = addr_gva2hva(vm, (uintptr_t)vmx->eptp);
- vmx->eptp_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->eptp);
-}
-
void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm)
{
- vmx->apic_access = (void *)vm_vaddr_alloc_page(vm);
+ vmx->apic_access = (void *)vm_alloc_page(vm);
vmx->apic_access_hva = addr_gva2hva(vm, (uintptr_t)vmx->apic_access);
vmx->apic_access_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->apic_access);
}
diff --git a/tools/testing/selftests/kvm/loongarch/arch_timer.c b/tools/testing/selftests/kvm/loongarch/arch_timer.c
new file mode 100644
index 000000000000..a7279ded8518
--- /dev/null
+++ b/tools/testing/selftests/kvm/loongarch/arch_timer.c
@@ -0,0 +1,200 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * The test validates periodic/one-shot constant timer IRQ using
+ * CSR.TCFG and CSR.TVAL registers.
+ */
+#include "arch_timer.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "timer_test.h"
+#include "ucall_common.h"
+
+static void do_idle(void)
+{
+ unsigned int intid;
+ unsigned long estat;
+
+ __asm__ __volatile__("idle 0" : : : "memory");
+
+ estat = csr_read(LOONGARCH_CSR_ESTAT);
+ intid = !!(estat & BIT(INT_TI));
+
+ /* Make sure pending timer IRQ arrived */
+ GUEST_ASSERT_EQ(intid, 1);
+ csr_write(CSR_TINTCLR_TI, LOONGARCH_CSR_TINTCLR);
+}
+
+static void guest_irq_handler(struct ex_regs *regs)
+{
+ unsigned int intid;
+ u32 cpu = guest_get_vcpuid();
+ u64 xcnt, val, cfg, xcnt_diff_us;
+ struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu];
+
+ intid = !!(regs->estat & BIT(INT_TI));
+
+ /* Make sure we are dealing with the correct timer IRQ */
+ GUEST_ASSERT_EQ(intid, 1);
+
+ cfg = timer_get_cfg();
+ if (cfg & CSR_TCFG_PERIOD) {
+ WRITE_ONCE(shared_data->nr_iter, shared_data->nr_iter - 1);
+ if (shared_data->nr_iter == 0)
+ disable_timer();
+ csr_write(CSR_TINTCLR_TI, LOONGARCH_CSR_TINTCLR);
+ return;
+ }
+
+ /*
+ * On real machine, value of LOONGARCH_CSR_TVAL is BIT_ULL(48) - 1
+ * On virtual machine, its value counts down from BIT_ULL(48) - 1
+ */
+ val = timer_get_val();
+ xcnt = timer_get_cycles();
+ xcnt_diff_us = cycles_to_usec(xcnt - shared_data->xcnt);
+
+ /* Basic 'timer condition met' check */
+ __GUEST_ASSERT(val > cfg,
+ "val = 0x%lx, cfg = 0x%lx, xcnt_diff_us = 0x%lx",
+ val, cfg, xcnt_diff_us);
+
+ csr_write(CSR_TINTCLR_TI, LOONGARCH_CSR_TINTCLR);
+ WRITE_ONCE(shared_data->nr_iter, shared_data->nr_iter + 1);
+}
+
+static void guest_test_period_timer(u32 cpu)
+{
+ u32 irq_iter, config_iter;
+ u64 us;
+ struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu];
+
+ shared_data->nr_iter = test_args.nr_iter;
+ shared_data->xcnt = timer_get_cycles();
+ us = msecs_to_usecs(test_args.timer_period_ms) + test_args.timer_err_margin_us;
+ timer_set_next_cmp_ms(test_args.timer_period_ms, true);
+
+ for (config_iter = 0; config_iter < test_args.nr_iter; config_iter++) {
+ /* Setup a timeout for the interrupt to arrive */
+ udelay(us);
+ }
+
+ irq_iter = READ_ONCE(shared_data->nr_iter);
+ __GUEST_ASSERT(irq_iter == 0,
+ "irq_iter = 0x%x.\n"
+ " Guest period timer interrupt was not triggered within the specified\n"
+ " interval, try to increase the error margin by [-e] option.\n",
+ irq_iter);
+}
+
+static void guest_test_oneshot_timer(u32 cpu)
+{
+ u32 irq_iter, config_iter;
+ u64 us;
+ struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu];
+
+ shared_data->nr_iter = 0;
+ shared_data->guest_stage = 0;
+ us = msecs_to_usecs(test_args.timer_period_ms) + test_args.timer_err_margin_us;
+ for (config_iter = 0; config_iter < test_args.nr_iter; config_iter++) {
+ shared_data->xcnt = timer_get_cycles();
+
+ /* Setup the next interrupt */
+ timer_set_next_cmp_ms(test_args.timer_period_ms, false);
+ /* Setup a timeout for the interrupt to arrive */
+ udelay(us);
+
+ irq_iter = READ_ONCE(shared_data->nr_iter);
+ __GUEST_ASSERT(config_iter + 1 == irq_iter,
+ "config_iter + 1 = 0x%x, irq_iter = 0x%x.\n"
+ " Guest timer interrupt was not triggered within the specified\n"
+ " interval, try to increase the error margin by [-e] option.\n",
+ config_iter + 1, irq_iter);
+ }
+}
+
+static void guest_test_emulate_timer(u32 cpu)
+{
+ u32 config_iter;
+ u64 xcnt_diff_us, us;
+ struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu];
+
+ local_irq_disable();
+ shared_data->nr_iter = 0;
+ us = msecs_to_usecs(test_args.timer_period_ms);
+ for (config_iter = 0; config_iter < test_args.nr_iter; config_iter++) {
+ shared_data->xcnt = timer_get_cycles();
+
+ /* Setup the next interrupt */
+ timer_set_next_cmp_ms(test_args.timer_period_ms, false);
+ do_idle();
+
+ xcnt_diff_us = cycles_to_usec(timer_get_cycles() - shared_data->xcnt);
+ __GUEST_ASSERT(xcnt_diff_us >= us,
+ "xcnt_diff_us = 0x%lx, us = 0x%lx.\n",
+ xcnt_diff_us, us);
+ }
+ local_irq_enable();
+}
+
+static void guest_time_count_test(u32 cpu)
+{
+ u32 config_iter;
+ unsigned long start, end, prev, us;
+
+ /* Assuming that test case starts to run in 1 second */
+ start = timer_get_cycles();
+ us = msec_to_cycles(1000);
+ __GUEST_ASSERT(start <= us,
+ "start = 0x%lx, us = 0x%lx.\n",
+ start, us);
+
+ us = msec_to_cycles(test_args.timer_period_ms);
+ for (config_iter = 0; config_iter < test_args.nr_iter; config_iter++) {
+ start = timer_get_cycles();
+ end = start + us;
+ /* test time count growing up always */
+ while (start < end) {
+ prev = start;
+ start = timer_get_cycles();
+ __GUEST_ASSERT(prev <= start,
+ "prev = 0x%lx, start = 0x%lx.\n",
+ prev, start);
+ }
+ }
+}
+
+static void guest_code(void)
+{
+ u32 cpu = guest_get_vcpuid();
+
+ /* must run at first */
+ guest_time_count_test(cpu);
+
+ timer_irq_enable();
+ local_irq_enable();
+ guest_test_period_timer(cpu);
+ guest_test_oneshot_timer(cpu);
+ guest_test_emulate_timer(cpu);
+
+ GUEST_DONE();
+}
+
+struct kvm_vm *test_vm_create(void)
+{
+ struct kvm_vm *vm;
+ int nr_vcpus = test_args.nr_vcpus;
+
+ vm = vm_create_with_vcpus(nr_vcpus, guest_code, vcpus);
+ vm_init_descriptor_tables(vm);
+ vm_install_exception_handler(vm, EXCCODE_INT, guest_irq_handler);
+
+ /* Make all the test's cmdline args visible to the guest */
+ sync_global_to_guest(vm, test_args);
+
+ return vm;
+}
+
+void test_vm_cleanup(struct kvm_vm *vm)
+{
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/loongarch/pmu_test.c b/tools/testing/selftests/kvm/loongarch/pmu_test.c
new file mode 100644
index 000000000000..ec3fefb9ea97
--- /dev/null
+++ b/tools/testing/selftests/kvm/loongarch/pmu_test.c
@@ -0,0 +1,205 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * LoongArch KVM PMU event counting test
+ *
+ * Test hardware event counting: CPU_CYCLES, INSTR_RETIRED,
+ * BRANCH_INSTRUCTIONS and BRANCH_MISSES.
+ */
+#include <linux/bitops.h>
+#include "kvm_util.h"
+#include "pmu.h"
+#include "loongarch/processor.h"
+
+static int pmu_irq_count;
+
+/* Check PMU support */
+static bool has_pmu_support(void)
+{
+ u32 cfg6;
+
+ /* Read CPUCFG6 to check PMU */
+ cfg6 = read_cpucfg(LOONGARCH_CPUCFG6);
+
+ /* Check PMU present bit */
+ if (!(cfg6 & CPUCFG6_PMP))
+ return false;
+
+ /* Check that at least one counter exists */
+ if (((cfg6 & CPUCFG6_PMNUM) >> CPUCFG6_PMNUM_SHIFT) == 0)
+ return false;
+
+ return true;
+}
+
+/* Dump PMU capabilities */
+static void dump_pmu_caps(void)
+{
+ u32 cfg6;
+ int nr_counters, counter_bits;
+
+ cfg6 = read_cpucfg(LOONGARCH_CPUCFG6);
+ nr_counters = ((cfg6 & CPUCFG6_PMNUM) >> CPUCFG6_PMNUM_SHIFT) + 1;
+ counter_bits = ((cfg6 & CPUCFG6_PMBITS) >> CPUCFG6_PMBITS_SHIFT) + 1;
+
+ pr_info("PMU capabilities:\n");
+ pr_info(" Counters present: %s\n", cfg6 & CPUCFG6_PMP ? "yes" : "no");
+ pr_info(" Number of counters: %d\n", nr_counters);
+ pr_info(" Counter width: %d bits\n", counter_bits);
+}
+
+/* Guest test code - runs inside VM */
+static void guest_pmu_base_test(void)
+{
+ int i;
+ u32 cfg6, pmnum;
+ u64 cnt[4];
+
+ cfg6 = read_cpucfg(LOONGARCH_CPUCFG6);
+ pmnum = (cfg6 >> 4) & 0xf;
+ GUEST_PRINTF("CPUCFG6 = 0x%x\n", cfg6);
+ GUEST_PRINTF("PMP enabled: %s\n", (cfg6 & 0x1) ? "YES" : "NO");
+ GUEST_PRINTF("Number of counters (PMNUM): %x\n", pmnum + 1);
+ GUEST_ASSERT(pmnum == 3);
+
+ GUEST_PRINTF("Clean csr_perfcntr0-3\n");
+ csr_write(0, LOONGARCH_CSR_PERFCNTR0);
+ csr_write(0, LOONGARCH_CSR_PERFCNTR1);
+ csr_write(0, LOONGARCH_CSR_PERFCNTR2);
+ csr_write(0, LOONGARCH_CSR_PERFCNTR3);
+ GUEST_PRINTF("Set csr_perfctrl0 for cycles event\n");
+ csr_write(PMU_ENVENT_ENABLED |
+ LOONGARCH_PMU_EVENT_CYCLES, LOONGARCH_CSR_PERFCTRL0);
+ GUEST_PRINTF("Set csr_perfctrl1 for instr_retired event\n");
+ csr_write(PMU_ENVENT_ENABLED |
+ LOONGARCH_PMU_EVENT_INSTR_RETIRED, LOONGARCH_CSR_PERFCTRL1);
+ GUEST_PRINTF("Set csr_perfctrl2 for branch_instructions event\n");
+ csr_write(PMU_ENVENT_ENABLED |
+ PERF_COUNT_HW_BRANCH_INSTRUCTIONS, LOONGARCH_CSR_PERFCTRL2);
+ GUEST_PRINTF("Set csr_perfctrl3 for branch_misses event\n");
+ csr_write(PMU_ENVENT_ENABLED |
+ PERF_COUNT_HW_BRANCH_MISSES, LOONGARCH_CSR_PERFCTRL3);
+
+ for (i = 0; i < NUM_LOOPS; i++)
+ cpu_relax();
+
+ cnt[0] = csr_read(LOONGARCH_CSR_PERFCNTR0);
+ GUEST_PRINTF("csr_perfcntr0 is %lx\n", cnt[0]);
+ cnt[1] = csr_read(LOONGARCH_CSR_PERFCNTR1);
+ GUEST_PRINTF("csr_perfcntr1 is %lx\n", cnt[1]);
+ cnt[2] = csr_read(LOONGARCH_CSR_PERFCNTR2);
+ GUEST_PRINTF("csr_perfcntr2 is %lx\n", cnt[2]);
+ cnt[3] = csr_read(LOONGARCH_CSR_PERFCNTR3);
+ GUEST_PRINTF("csr_perfcntr3 is %lx\n", cnt[3]);
+
+ GUEST_PRINTF("assert csr_perfcntr0 >EXPECTED_CYCLES_MIN && csr_perfcntr0 < UPPER_BOUND\n");
+ GUEST_ASSERT(cnt[0] > EXPECTED_CYCLES_MIN && cnt[0] < UPPER_BOUND);
+ GUEST_PRINTF("assert csr_perfcntr1 > EXPECTED_INSTR_MIN && csr_perfcntr1 < UPPER_BOUND\n");
+ GUEST_ASSERT(cnt[1] > EXPECTED_INSTR_MIN && cnt[1] < UPPER_BOUND);
+ GUEST_PRINTF("assert csr_perfcntr2 > 0 && csr_perfcntr2 < UPPER_BOUND\n");
+ GUEST_ASSERT(cnt[2] > 0 && cnt[2] < UPPER_BOUND);
+ GUEST_PRINTF("assert csr_perfcntr3 > 0 && csr_perfcntr3 < UPPER_BOUND\n");
+ GUEST_ASSERT(cnt[3] > 0 && cnt[3] < UPPER_BOUND);
+}
+
+static void guest_irq_handler(struct ex_regs *regs)
+{
+ unsigned int intid;
+
+ pmu_irq_disable();
+ intid = !!(regs->estat & BIT(INT_PMI));
+ GUEST_ASSERT_EQ(intid, 1);
+ GUEST_PRINTF("Get PMU interrupt\n");
+ WRITE_ONCE(pmu_irq_count, pmu_irq_count + 1);
+}
+
+static void guest_pmu_interrupt_test(void)
+{
+ u64 cnt;
+
+ csr_write(PMU_OVERFLOW - 1, LOONGARCH_CSR_PERFCNTR0);
+ csr_write(PMU_ENVENT_ENABLED | CSR_PERFCTRL_PMIE | LOONGARCH_PMU_EVENT_CYCLES, LOONGARCH_CSR_PERFCTRL0);
+
+ cpu_relax();
+
+ GUEST_ASSERT_EQ(pmu_irq_count, 1);
+ cnt = csr_read(LOONGARCH_CSR_PERFCNTR0);
+ GUEST_PRINTF("csr_perfcntr0 is %lx\n", cnt);
+ GUEST_PRINTF("PMU interrupt test success\n");
+
+}
+
+static void guest_code(void)
+{
+ guest_pmu_base_test();
+
+ pmu_irq_enable();
+ local_irq_enable();
+ guest_pmu_interrupt_test();
+
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ int ret = 0;
+ struct kvm_device_attr attr;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct ucall uc;
+
+ /* Check host KVM PMU support */
+ if (!has_pmu_support()) {
+ print_skip("PMU not supported by host hardware\n");
+ dump_pmu_caps();
+ return KSFT_SKIP;
+ }
+ pr_info("Host support PMU\n");
+
+ /* Dump PMU capabilities */
+ dump_pmu_caps();
+
+ vm = vm_create(VM_MODE_P47V47_16K);
+ vcpu = vm_vcpu_add(vm, 0, guest_code);
+
+ pmu_irq_count = 0;
+ vm_init_descriptor_tables(vm);
+ loongarch_vcpu_setup(vcpu);
+ vm_install_exception_handler(vm, EXCCODE_INT, guest_irq_handler);
+ sync_global_to_guest(vm, pmu_irq_count);
+
+ attr.group = KVM_LOONGARCH_VM_FEAT_CTRL,
+ attr.attr = KVM_LOONGARCH_VM_FEAT_PMU,
+
+ ret = ioctl(vm->fd, KVM_HAS_DEVICE_ATTR, &attr);
+
+ if (ret == 0) {
+ pr_info("PMU is enabled in VM\n");
+ } else {
+ print_skip("PMU not enabled by VM config\n");
+ return KSFT_SKIP;
+ }
+
+ while (1) {
+ vcpu_run(vcpu);
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_PRINTF:
+ printf("%s", (const char *)uc.buffer);
+ break;
+ case UCALL_DONE:
+ printf("PMU test PASSED\n");
+ goto done;
+ case UCALL_ABORT:
+ printf("PMU test FAILED\n");
+ ret = -1;
+ goto done;
+ default:
+ printf("Unexpected exit\n");
+ ret = -1;
+ goto done;
+ }
+ }
+
+done:
+ kvm_vm_free(vm);
+ return ret;
+}
diff --git a/tools/testing/selftests/kvm/memslot_modification_stress_test.c b/tools/testing/selftests/kvm/memslot_modification_stress_test.c
index c81a84990eab..9c7578a098c3 100644
--- a/tools/testing/selftests/kvm/memslot_modification_stress_test.c
+++ b/tools/testing/selftests/kvm/memslot_modification_stress_test.c
@@ -22,6 +22,7 @@
#include "processor.h"
#include "test_util.h"
#include "guest_modes.h"
+#include "ucall_common.h"
#define DUMMY_MEMSLOT_INDEX 7
@@ -29,7 +30,7 @@
static int nr_vcpus = 1;
-static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE;
+static u64 guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE;
static void vcpu_worker(struct memstress_vcpu_args *vcpu_args)
{
@@ -54,10 +55,10 @@ static void vcpu_worker(struct memstress_vcpu_args *vcpu_args)
}
static void add_remove_memslot(struct kvm_vm *vm, useconds_t delay,
- uint64_t nr_modifications)
+ u64 nr_modifications)
{
- uint64_t pages = max_t(int, vm->page_size, getpagesize()) / vm->page_size;
- uint64_t gpa;
+ u64 pages = max_t(int, vm->page_size, getpagesize()) / vm->page_size;
+ gpa_t gpa;
int i;
/*
@@ -77,7 +78,7 @@ static void add_remove_memslot(struct kvm_vm *vm, useconds_t delay,
struct test_params {
useconds_t delay;
- uint64_t nr_iterations;
+ u64 nr_iterations;
bool partition_vcpu_memory_access;
bool disable_slot_zap_quirk;
};
diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c
index e3711beff7f3..e977e979470f 100644
--- a/tools/testing/selftests/kvm/memslot_perf_test.c
+++ b/tools/testing/selftests/kvm/memslot_perf_test.c
@@ -15,7 +15,6 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-#include <sys/mman.h>
#include <time.h>
#include <unistd.h>
@@ -23,8 +22,10 @@
#include <linux/sizes.h>
#include <test_util.h>
+#include <kvm_syscalls.h>
#include <kvm_util.h>
#include <processor.h>
+#include <ucall_common.h>
#define MEM_EXTRA_SIZE SZ_64K
@@ -84,17 +85,17 @@ struct vm_data {
struct kvm_vm *vm;
struct kvm_vcpu *vcpu;
pthread_t vcpu_thread;
- uint32_t nslots;
- uint64_t npages;
- uint64_t pages_per_slot;
+ u32 nslots;
+ u64 npages;
+ u64 pages_per_slot;
void **hva_slots;
bool mmio_ok;
- uint64_t mmio_gpa_min;
- uint64_t mmio_gpa_max;
+ u64 mmio_gpa_min;
+ u64 mmio_gpa_max;
};
struct sync_area {
- uint32_t guest_page_size;
+ u32 guest_page_size;
atomic_bool start_flag;
atomic_bool exit_flag;
atomic_bool sync_flag;
@@ -185,12 +186,12 @@ static void wait_for_vcpu(void)
"sem_timedwait() failed: %d", errno);
}
-static void *vm_gpa2hva(struct vm_data *data, uint64_t gpa, uint64_t *rempages)
+static void *vm_gpa2hva(struct vm_data *data, gpa_t gpa, u64 *rempages)
{
- uint64_t gpage, pgoffs;
- uint32_t slot, slotoffs;
+ gpa_t gpage, pgoffs;
+ u32 slot, slotoffs;
void *base;
- uint32_t guest_page_size = data->vm->page_size;
+ u32 guest_page_size = data->vm->page_size;
TEST_ASSERT(gpa >= MEM_GPA, "Too low gpa to translate");
TEST_ASSERT(gpa < MEM_GPA + data->npages * guest_page_size,
@@ -199,11 +200,11 @@ static void *vm_gpa2hva(struct vm_data *data, uint64_t gpa, uint64_t *rempages)
gpage = gpa / guest_page_size;
pgoffs = gpa % guest_page_size;
- slot = min(gpage / data->pages_per_slot, (uint64_t)data->nslots - 1);
+ slot = min(gpage / data->pages_per_slot, (u64)data->nslots - 1);
slotoffs = gpage - (slot * data->pages_per_slot);
if (rempages) {
- uint64_t slotpages;
+ u64 slotpages;
if (slot == data->nslots - 1)
slotpages = data->npages - slot * data->pages_per_slot;
@@ -216,12 +217,12 @@ static void *vm_gpa2hva(struct vm_data *data, uint64_t gpa, uint64_t *rempages)
}
base = data->hva_slots[slot];
- return (uint8_t *)base + slotoffs * guest_page_size + pgoffs;
+ return (u8 *)base + slotoffs * guest_page_size + pgoffs;
}
-static uint64_t vm_slot2gpa(struct vm_data *data, uint32_t slot)
+static u64 vm_slot2gpa(struct vm_data *data, u32 slot)
{
- uint32_t guest_page_size = data->vm->page_size;
+ u32 guest_page_size = data->vm->page_size;
TEST_ASSERT(slot < data->nslots, "Too high slot number");
@@ -242,8 +243,8 @@ static struct vm_data *alloc_vm(void)
return data;
}
-static bool check_slot_pages(uint32_t host_page_size, uint32_t guest_page_size,
- uint64_t pages_per_slot, uint64_t rempages)
+static bool check_slot_pages(u32 host_page_size, u32 guest_page_size,
+ u64 pages_per_slot, u64 rempages)
{
if (!pages_per_slot)
return false;
@@ -258,11 +259,11 @@ static bool check_slot_pages(uint32_t host_page_size, uint32_t guest_page_size,
}
-static uint64_t get_max_slots(struct vm_data *data, uint32_t host_page_size)
+static u64 get_max_slots(struct vm_data *data, u32 host_page_size)
{
- uint32_t guest_page_size = data->vm->page_size;
- uint64_t mempages, pages_per_slot, rempages;
- uint64_t slots;
+ u32 guest_page_size = data->vm->page_size;
+ u64 mempages, pages_per_slot, rempages;
+ u64 slots;
mempages = data->npages;
slots = data->nslots;
@@ -280,13 +281,13 @@ static uint64_t get_max_slots(struct vm_data *data, uint32_t host_page_size)
return 0;
}
-static bool prepare_vm(struct vm_data *data, int nslots, uint64_t *maxslots,
- void *guest_code, uint64_t mem_size,
+static bool prepare_vm(struct vm_data *data, int nslots, u64 *maxslots,
+ void *guest_code, u64 mem_size,
struct timespec *slot_runtime)
{
- uint64_t mempages, rempages;
- uint64_t guest_addr;
- uint32_t slot, host_page_size, guest_page_size;
+ u64 mempages, rempages;
+ u64 guest_addr;
+ u32 slot, host_page_size, guest_page_size;
struct timespec tstart;
struct sync_area *sync;
@@ -316,7 +317,7 @@ static bool prepare_vm(struct vm_data *data, int nslots, uint64_t *maxslots,
clock_gettime(CLOCK_MONOTONIC, &tstart);
for (slot = 1, guest_addr = MEM_GPA; slot <= data->nslots; slot++) {
- uint64_t npages;
+ u64 npages;
npages = data->pages_per_slot;
if (slot == data->nslots)
@@ -330,8 +331,8 @@ static bool prepare_vm(struct vm_data *data, int nslots, uint64_t *maxslots,
*slot_runtime = timespec_elapsed(tstart);
for (slot = 1, guest_addr = MEM_GPA; slot <= data->nslots; slot++) {
- uint64_t npages;
- uint64_t gpa;
+ u64 npages;
+ gpa_t gpa;
npages = data->pages_per_slot;
if (slot == data->nslots)
@@ -447,7 +448,7 @@ static bool guest_perform_sync(void)
static void guest_code_test_memslot_move(void)
{
struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
- uint32_t page_size = (typeof(page_size))READ_ONCE(sync->guest_page_size);
+ u32 page_size = (typeof(page_size))READ_ONCE(sync->guest_page_size);
uintptr_t base = (typeof(base))READ_ONCE(sync->move_area_ptr);
GUEST_SYNC(0);
@@ -459,7 +460,7 @@ static void guest_code_test_memslot_move(void)
for (ptr = base; ptr < base + MEM_TEST_MOVE_SIZE;
ptr += page_size)
- *(uint64_t *)ptr = MEM_TEST_VAL_1;
+ *(u64 *)ptr = MEM_TEST_VAL_1;
/*
* No host sync here since the MMIO exits are so expensive
@@ -476,7 +477,7 @@ static void guest_code_test_memslot_move(void)
static void guest_code_test_memslot_map(void)
{
struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
- uint32_t page_size = (typeof(page_size))READ_ONCE(sync->guest_page_size);
+ u32 page_size = (typeof(page_size))READ_ONCE(sync->guest_page_size);
GUEST_SYNC(0);
@@ -488,7 +489,7 @@ static void guest_code_test_memslot_map(void)
for (ptr = MEM_TEST_GPA;
ptr < MEM_TEST_GPA + MEM_TEST_MAP_SIZE / 2;
ptr += page_size)
- *(uint64_t *)ptr = MEM_TEST_VAL_1;
+ *(u64 *)ptr = MEM_TEST_VAL_1;
if (!guest_perform_sync())
break;
@@ -496,7 +497,7 @@ static void guest_code_test_memslot_map(void)
for (ptr = MEM_TEST_GPA + MEM_TEST_MAP_SIZE / 2;
ptr < MEM_TEST_GPA + MEM_TEST_MAP_SIZE;
ptr += page_size)
- *(uint64_t *)ptr = MEM_TEST_VAL_2;
+ *(u64 *)ptr = MEM_TEST_VAL_2;
if (!guest_perform_sync())
break;
@@ -525,13 +526,13 @@ static void guest_code_test_memslot_unmap(void)
*
* Just access a single page to be on the safe side.
*/
- *(uint64_t *)ptr = MEM_TEST_VAL_1;
+ *(u64 *)ptr = MEM_TEST_VAL_1;
if (!guest_perform_sync())
break;
ptr += MEM_TEST_UNMAP_SIZE / 2;
- *(uint64_t *)ptr = MEM_TEST_VAL_2;
+ *(u64 *)ptr = MEM_TEST_VAL_2;
if (!guest_perform_sync())
break;
@@ -543,7 +544,7 @@ static void guest_code_test_memslot_unmap(void)
static void guest_code_test_memslot_rw(void)
{
struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
- uint32_t page_size = (typeof(page_size))READ_ONCE(sync->guest_page_size);
+ u32 page_size = (typeof(page_size))READ_ONCE(sync->guest_page_size);
GUEST_SYNC(0);
@@ -554,17 +555,17 @@ static void guest_code_test_memslot_rw(void)
for (ptr = MEM_TEST_GPA;
ptr < MEM_TEST_GPA + MEM_TEST_SIZE; ptr += page_size)
- *(uint64_t *)ptr = MEM_TEST_VAL_1;
+ *(u64 *)ptr = MEM_TEST_VAL_1;
if (!guest_perform_sync())
break;
for (ptr = MEM_TEST_GPA + page_size / 2;
ptr < MEM_TEST_GPA + MEM_TEST_SIZE; ptr += page_size) {
- uint64_t val = *(uint64_t *)ptr;
+ u64 val = *(u64 *)ptr;
GUEST_ASSERT_EQ(val, MEM_TEST_VAL_2);
- *(uint64_t *)ptr = 0;
+ *(u64 *)ptr = 0;
}
if (!guest_perform_sync())
@@ -576,10 +577,10 @@ static void guest_code_test_memslot_rw(void)
static bool test_memslot_move_prepare(struct vm_data *data,
struct sync_area *sync,
- uint64_t *maxslots, bool isactive)
+ u64 *maxslots, bool isactive)
{
- uint32_t guest_page_size = data->vm->page_size;
- uint64_t movesrcgpa, movetestgpa;
+ u32 guest_page_size = data->vm->page_size;
+ u64 movesrcgpa, movetestgpa;
#ifdef __x86_64__
if (disable_slot_zap_quirk)
@@ -589,7 +590,7 @@ static bool test_memslot_move_prepare(struct vm_data *data,
movesrcgpa = vm_slot2gpa(data, data->nslots - 1);
if (isactive) {
- uint64_t lastpages;
+ u64 lastpages;
vm_gpa2hva(data, movesrcgpa, &lastpages);
if (lastpages * guest_page_size < MEM_TEST_MOVE_SIZE / 2) {
@@ -612,21 +613,21 @@ static bool test_memslot_move_prepare(struct vm_data *data,
static bool test_memslot_move_prepare_active(struct vm_data *data,
struct sync_area *sync,
- uint64_t *maxslots)
+ u64 *maxslots)
{
return test_memslot_move_prepare(data, sync, maxslots, true);
}
static bool test_memslot_move_prepare_inactive(struct vm_data *data,
struct sync_area *sync,
- uint64_t *maxslots)
+ u64 *maxslots)
{
return test_memslot_move_prepare(data, sync, maxslots, false);
}
static void test_memslot_move_loop(struct vm_data *data, struct sync_area *sync)
{
- uint64_t movesrcgpa;
+ u64 movesrcgpa;
movesrcgpa = vm_slot2gpa(data, data->nslots - 1);
vm_mem_region_move(data->vm, data->nslots - 1 + 1,
@@ -635,13 +636,13 @@ static void test_memslot_move_loop(struct vm_data *data, struct sync_area *sync)
}
static void test_memslot_do_unmap(struct vm_data *data,
- uint64_t offsp, uint64_t count)
+ u64 offsp, u64 count)
{
- uint64_t gpa, ctr;
- uint32_t guest_page_size = data->vm->page_size;
+ gpa_t gpa, ctr;
+ u32 guest_page_size = data->vm->page_size;
for (gpa = MEM_TEST_GPA + offsp * guest_page_size, ctr = 0; ctr < count; ) {
- uint64_t npages;
+ u64 npages;
void *hva;
int ret;
@@ -660,11 +661,11 @@ static void test_memslot_do_unmap(struct vm_data *data,
}
static void test_memslot_map_unmap_check(struct vm_data *data,
- uint64_t offsp, uint64_t valexp)
+ u64 offsp, u64 valexp)
{
- uint64_t gpa;
- uint64_t *val;
- uint32_t guest_page_size = data->vm->page_size;
+ gpa_t gpa;
+ u64 *val;
+ u32 guest_page_size = data->vm->page_size;
if (!map_unmap_verify)
return;
@@ -679,8 +680,8 @@ static void test_memslot_map_unmap_check(struct vm_data *data,
static void test_memslot_map_loop(struct vm_data *data, struct sync_area *sync)
{
- uint32_t guest_page_size = data->vm->page_size;
- uint64_t guest_pages = MEM_TEST_MAP_SIZE / guest_page_size;
+ u32 guest_page_size = data->vm->page_size;
+ u64 guest_pages = MEM_TEST_MAP_SIZE / guest_page_size;
/*
* Unmap the second half of the test area while guest writes to (maps)
@@ -717,11 +718,11 @@ static void test_memslot_map_loop(struct vm_data *data, struct sync_area *sync)
static void test_memslot_unmap_loop_common(struct vm_data *data,
struct sync_area *sync,
- uint64_t chunk)
+ u64 chunk)
{
- uint32_t guest_page_size = data->vm->page_size;
- uint64_t guest_pages = MEM_TEST_UNMAP_SIZE / guest_page_size;
- uint64_t ctr;
+ u32 guest_page_size = data->vm->page_size;
+ u64 guest_pages = MEM_TEST_UNMAP_SIZE / guest_page_size;
+ u64 ctr;
/*
* Wait for the guest to finish mapping page(s) in the first half
@@ -745,9 +746,9 @@ static void test_memslot_unmap_loop_common(struct vm_data *data,
static void test_memslot_unmap_loop(struct vm_data *data,
struct sync_area *sync)
{
- uint32_t host_page_size = getpagesize();
- uint32_t guest_page_size = data->vm->page_size;
- uint64_t guest_chunk_pages = guest_page_size >= host_page_size ?
+ u32 host_page_size = getpagesize();
+ u32 guest_page_size = data->vm->page_size;
+ u64 guest_chunk_pages = guest_page_size >= host_page_size ?
1 : host_page_size / guest_page_size;
test_memslot_unmap_loop_common(data, sync, guest_chunk_pages);
@@ -756,27 +757,27 @@ static void test_memslot_unmap_loop(struct vm_data *data,
static void test_memslot_unmap_loop_chunked(struct vm_data *data,
struct sync_area *sync)
{
- uint32_t guest_page_size = data->vm->page_size;
- uint64_t guest_chunk_pages = MEM_TEST_UNMAP_CHUNK_SIZE / guest_page_size;
+ u32 guest_page_size = data->vm->page_size;
+ u64 guest_chunk_pages = MEM_TEST_UNMAP_CHUNK_SIZE / guest_page_size;
test_memslot_unmap_loop_common(data, sync, guest_chunk_pages);
}
static void test_memslot_rw_loop(struct vm_data *data, struct sync_area *sync)
{
- uint64_t gptr;
- uint32_t guest_page_size = data->vm->page_size;
+ u64 gptr;
+ u32 guest_page_size = data->vm->page_size;
for (gptr = MEM_TEST_GPA + guest_page_size / 2;
gptr < MEM_TEST_GPA + MEM_TEST_SIZE; gptr += guest_page_size)
- *(uint64_t *)vm_gpa2hva(data, gptr, NULL) = MEM_TEST_VAL_2;
+ *(u64 *)vm_gpa2hva(data, gptr, NULL) = MEM_TEST_VAL_2;
host_perform_sync(sync);
for (gptr = MEM_TEST_GPA;
gptr < MEM_TEST_GPA + MEM_TEST_SIZE; gptr += guest_page_size) {
- uint64_t *vptr = (typeof(vptr))vm_gpa2hva(data, gptr, NULL);
- uint64_t val = *vptr;
+ u64 *vptr = (typeof(vptr))vm_gpa2hva(data, gptr, NULL);
+ u64 val = *vptr;
TEST_ASSERT(val == MEM_TEST_VAL_1,
"Guest written values should read back correctly (is %"PRIu64" @ %"PRIx64")",
@@ -789,21 +790,21 @@ static void test_memslot_rw_loop(struct vm_data *data, struct sync_area *sync)
struct test_data {
const char *name;
- uint64_t mem_size;
+ u64 mem_size;
void (*guest_code)(void);
bool (*prepare)(struct vm_data *data, struct sync_area *sync,
- uint64_t *maxslots);
+ u64 *maxslots);
void (*loop)(struct vm_data *data, struct sync_area *sync);
};
-static bool test_execute(int nslots, uint64_t *maxslots,
+static bool test_execute(int nslots, u64 *maxslots,
unsigned int maxtime,
const struct test_data *tdata,
- uint64_t *nloops,
+ u64 *nloops,
struct timespec *slot_runtime,
struct timespec *guest_runtime)
{
- uint64_t mem_size = tdata->mem_size ? : MEM_SIZE;
+ u64 mem_size = tdata->mem_size ? : MEM_SIZE;
struct vm_data *data;
struct sync_area *sync;
struct timespec tstart;
@@ -923,8 +924,8 @@ static void help(char *name, struct test_args *targs)
static bool check_memory_sizes(void)
{
- uint32_t host_page_size = getpagesize();
- uint32_t guest_page_size = vm_guest_mode_params[VM_MODE_DEFAULT].page_size;
+ u32 host_page_size = getpagesize();
+ u32 guest_page_size = vm_guest_mode_params[VM_MODE_DEFAULT].page_size;
if (host_page_size > SZ_64K || guest_page_size > SZ_64K) {
pr_info("Unsupported page size on host (0x%x) or guest (0x%x)\n",
@@ -960,7 +961,7 @@ static bool check_memory_sizes(void)
static bool parse_args(int argc, char *argv[],
struct test_args *targs)
{
- uint32_t max_mem_slots;
+ u32 max_mem_slots;
int opt;
while ((opt = getopt(argc, argv, "hvdqs:f:e:l:r:")) != -1) {
@@ -1039,8 +1040,8 @@ static bool parse_args(int argc, char *argv[],
struct test_result {
struct timespec slot_runtime, guest_runtime, iter_runtime;
- int64_t slottimens, runtimens;
- uint64_t nloops;
+ s64 slottimens, runtimens;
+ u64 nloops;
};
static bool test_loop(const struct test_data *data,
@@ -1048,7 +1049,7 @@ static bool test_loop(const struct test_data *data,
struct test_result *rbestslottime,
struct test_result *rbestruntime)
{
- uint64_t maxslots;
+ u64 maxslots;
struct test_result result = {};
if (!test_execute(targs->nslots, &maxslots, targs->seconds, data,
diff --git a/tools/testing/selftests/kvm/mmu_stress_test.c b/tools/testing/selftests/kvm/mmu_stress_test.c
index 6a437d2be9fa..54d281419d31 100644
--- a/tools/testing/selftests/kvm/mmu_stress_test.c
+++ b/tools/testing/selftests/kvm/mmu_stress_test.c
@@ -20,19 +20,19 @@
static bool mprotect_ro_done;
static bool all_vcpus_hit_ro_fault;
-static void guest_code(uint64_t start_gpa, uint64_t end_gpa, uint64_t stride)
+static void guest_code(u64 start_gpa, u64 end_gpa, u64 stride)
{
- uint64_t gpa;
+ gpa_t gpa;
int i;
for (i = 0; i < 2; i++) {
for (gpa = start_gpa; gpa < end_gpa; gpa += stride)
- vcpu_arch_put_guest(*((volatile uint64_t *)gpa), gpa);
+ vcpu_arch_put_guest(*((volatile u64 *)gpa), gpa);
GUEST_SYNC(i);
}
for (gpa = start_gpa; gpa < end_gpa; gpa += stride)
- *((volatile uint64_t *)gpa);
+ *((volatile u64 *)gpa);
GUEST_SYNC(2);
/*
@@ -55,7 +55,7 @@ static void guest_code(uint64_t start_gpa, uint64_t end_gpa, uint64_t stride)
#elif defined(__aarch64__)
asm volatile("str %0, [%0]" :: "r" (gpa) : "memory");
#else
- vcpu_arch_put_guest(*((volatile uint64_t *)gpa), gpa);
+ vcpu_arch_put_guest(*((volatile u64 *)gpa), gpa);
#endif
} while (!READ_ONCE(mprotect_ro_done) || !READ_ONCE(all_vcpus_hit_ro_fault));
@@ -68,7 +68,7 @@ static void guest_code(uint64_t start_gpa, uint64_t end_gpa, uint64_t stride)
#endif
for (gpa = start_gpa; gpa < end_gpa; gpa += stride)
- vcpu_arch_put_guest(*((volatile uint64_t *)gpa), gpa);
+ vcpu_arch_put_guest(*((volatile u64 *)gpa), gpa);
GUEST_SYNC(4);
GUEST_ASSERT(0);
@@ -76,8 +76,8 @@ static void guest_code(uint64_t start_gpa, uint64_t end_gpa, uint64_t stride)
struct vcpu_info {
struct kvm_vcpu *vcpu;
- uint64_t start_gpa;
- uint64_t end_gpa;
+ u64 start_gpa;
+ u64 end_gpa;
};
static int nr_vcpus;
@@ -203,10 +203,10 @@ static void *vcpu_worker(void *data)
}
static pthread_t *spawn_workers(struct kvm_vm *vm, struct kvm_vcpu **vcpus,
- uint64_t start_gpa, uint64_t end_gpa)
+ u64 start_gpa, u64 end_gpa)
{
struct vcpu_info *info;
- uint64_t gpa, nr_bytes;
+ gpa_t gpa, nr_bytes;
pthread_t *threads;
int i;
@@ -217,7 +217,7 @@ static pthread_t *spawn_workers(struct kvm_vm *vm, struct kvm_vcpu **vcpus,
TEST_ASSERT(info, "Failed to allocate vCPU gpa ranges");
nr_bytes = ((end_gpa - start_gpa) / nr_vcpus) &
- ~((uint64_t)vm->page_size - 1);
+ ~((u64)vm->page_size - 1);
TEST_ASSERT(nr_bytes, "C'mon, no way you have %d CPUs", nr_vcpus);
for (i = 0, gpa = start_gpa; i < nr_vcpus; i++, gpa += nr_bytes) {
@@ -263,8 +263,10 @@ static void calc_default_nr_vcpus(void)
TEST_ASSERT(!r, "sched_getaffinity failed, errno = %d (%s)",
errno, strerror(errno));
- nr_vcpus = CPU_COUNT(&possible_mask) * 3/4;
+ nr_vcpus = CPU_COUNT(&possible_mask);
TEST_ASSERT(nr_vcpus > 0, "Uh, no CPUs?");
+ if (nr_vcpus >= 2)
+ nr_vcpus = nr_vcpus * 3/4;
}
int main(int argc, char *argv[])
@@ -276,11 +278,11 @@ int main(int argc, char *argv[])
* just below the 4gb boundary. This test could create memory at
* 1gb-3gb,but it's simpler to skip straight to 4gb.
*/
- const uint64_t start_gpa = SZ_4G;
+ const u64 start_gpa = SZ_4G;
const int first_slot = 1;
struct timespec time_start, time_run1, time_reset, time_run2, time_ro, time_rw;
- uint64_t max_gpa, gpa, slot_size, max_mem, i;
+ u64 max_gpa, gpa, slot_size, max_mem, i;
int max_slots, slot, opt, fd;
bool hugepages = false;
struct kvm_vcpu **vcpus;
@@ -339,14 +341,13 @@ int main(int argc, char *argv[])
TEST_ASSERT(max_gpa > (4 * slot_size), "MAXPHYADDR <4gb ");
fd = kvm_memfd_alloc(slot_size, hugepages);
- mem = mmap(NULL, slot_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
- TEST_ASSERT(mem != MAP_FAILED, "mmap() failed");
+ mem = kvm_mmap(slot_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd);
TEST_ASSERT(!madvise(mem, slot_size, MADV_NOHUGEPAGE), "madvise() failed");
/* Pre-fault the memory to avoid taking mmap_sem on guest page faults. */
for (i = 0; i < slot_size; i += vm->page_size)
- ((uint8_t *)mem)[i] = 0xaa;
+ ((u8 *)mem)[i] = 0xaa;
gpa = 0;
for (slot = first_slot; slot < max_slots; slot++) {
@@ -361,11 +362,9 @@ int main(int argc, char *argv[])
#ifdef __x86_64__
/* Identity map memory in the guest using 1gb pages. */
- for (i = 0; i < slot_size; i += SZ_1G)
- __virt_pg_map(vm, gpa + i, gpa + i, PG_LEVEL_1G);
+ virt_map_level(vm, gpa, gpa, slot_size, PG_LEVEL_1G);
#else
- for (i = 0; i < slot_size; i += vm->page_size)
- virt_pg_map(vm, gpa + i, gpa + i);
+ virt_map(vm, gpa, gpa, slot_size >> vm->page_shift);
#endif
}
@@ -413,7 +412,7 @@ int main(int argc, char *argv[])
for (slot = (slot - 1) & ~1ull; slot >= first_slot; slot -= 2)
vm_set_user_memory_region(vm, slot, 0, 0, 0, NULL);
- munmap(mem, slot_size / 2);
+ kvm_munmap(mem, slot_size / 2);
/* Sanity check that the vCPUs actually ran. */
for (i = 0; i < nr_vcpus; i++)
diff --git a/tools/testing/selftests/kvm/pre_fault_memory_test.c b/tools/testing/selftests/kvm/pre_fault_memory_test.c
index 0350a8896a2f..fcb57fd034e6 100644
--- a/tools/testing/selftests/kvm/pre_fault_memory_test.c
+++ b/tools/testing/selftests/kvm/pre_fault_memory_test.c
@@ -10,19 +10,20 @@
#include <test_util.h>
#include <kvm_util.h>
#include <processor.h>
+#include <pthread.h>
/* Arbitrarily chosen values */
#define TEST_SIZE (SZ_2M + PAGE_SIZE)
#define TEST_NPAGES (TEST_SIZE / PAGE_SIZE)
#define TEST_SLOT 10
-static void guest_code(uint64_t base_gpa)
+static void guest_code(u64 base_gva)
{
- volatile uint64_t val __used;
+ volatile u64 val __used;
int i;
for (i = 0; i < TEST_NPAGES; i++) {
- uint64_t *src = (uint64_t *)(base_gpa + i * PAGE_SIZE);
+ u64 *src = (u64 *)(base_gva + i * PAGE_SIZE);
val = *src;
}
@@ -30,18 +31,66 @@ static void guest_code(uint64_t base_gpa)
GUEST_DONE();
}
-static void pre_fault_memory(struct kvm_vcpu *vcpu, u64 gpa, u64 size,
- u64 left)
+struct slot_worker_data {
+ struct kvm_vm *vm;
+ gpa_t gpa;
+ u32 flags;
+ bool worker_ready;
+ bool prefault_ready;
+ bool recreate_slot;
+};
+
+static void *delete_slot_worker(void *__data)
+{
+ struct slot_worker_data *data = __data;
+ struct kvm_vm *vm = data->vm;
+
+ WRITE_ONCE(data->worker_ready, true);
+
+ while (!READ_ONCE(data->prefault_ready))
+ cpu_relax();
+
+ vm_mem_region_delete(vm, TEST_SLOT);
+
+ while (!READ_ONCE(data->recreate_slot))
+ cpu_relax();
+
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, data->gpa,
+ TEST_SLOT, TEST_NPAGES, data->flags);
+
+ return NULL;
+}
+
+static void pre_fault_memory(struct kvm_vcpu *vcpu, u64 base_gpa, u64 offset,
+ u64 size, u64 expected_left, bool private)
{
struct kvm_pre_fault_memory range = {
- .gpa = gpa,
+ .gpa = base_gpa + offset,
.size = size,
.flags = 0,
};
- u64 prev;
+ struct slot_worker_data data = {
+ .vm = vcpu->vm,
+ .gpa = base_gpa,
+ .flags = private ? KVM_MEM_GUEST_MEMFD : 0,
+ };
+ bool slot_recreated = false;
+ pthread_t slot_worker;
int ret, save_errno;
+ u64 prev;
+
+ /*
+ * Concurrently delete (and recreate) the slot to test KVM's handling
+ * of a racing memslot deletion with prefaulting.
+ */
+ pthread_create(&slot_worker, NULL, delete_slot_worker, &data);
+
+ while (!READ_ONCE(data.worker_ready))
+ cpu_relax();
- do {
+ WRITE_ONCE(data.prefault_ready, true);
+
+ for (;;) {
prev = range.size;
ret = __vcpu_ioctl(vcpu, KVM_PRE_FAULT_MEMORY, &range);
save_errno = errno;
@@ -49,22 +98,70 @@ static void pre_fault_memory(struct kvm_vcpu *vcpu, u64 gpa, u64 size,
"%sexpecting range.size to change on %s",
ret < 0 ? "not " : "",
ret < 0 ? "failure" : "success");
- } while (ret >= 0 ? range.size : save_errno == EINTR);
- TEST_ASSERT(range.size == left,
- "Completed with %lld bytes left, expected %" PRId64,
- range.size, left);
+ /*
+ * Immediately retry prefaulting if KVM was interrupted by an
+ * unrelated signal/event.
+ */
+ if (ret < 0 && save_errno == EINTR)
+ continue;
+
+ /*
+ * Tell the worker to recreate the slot in order to complete
+ * prefaulting (if prefault didn't already succeed before the
+ * slot was deleted) and/or to prepare for the next testcase.
+ * Wait for the worker to exit so that the next invocation of
+ * prefaulting is guaranteed to complete (assuming no KVM bugs).
+ */
+ if (!slot_recreated) {
+ WRITE_ONCE(data.recreate_slot, true);
+ pthread_join(slot_worker, NULL);
+ slot_recreated = true;
+
+ /*
+ * Retry prefaulting to get a stable result, i.e. to
+ * avoid seeing random EAGAIN failures. Don't retry if
+ * prefaulting already succeeded, as KVM disallows
+ * prefaulting with size=0, i.e. blindly retrying would
+ * result in test failures due to EINVAL. KVM should
+ * always return success if all bytes are prefaulted,
+ * i.e. there is no need to guard against EAGAIN being
+ * returned.
+ */
+ if (range.size)
+ continue;
+ }
+
+ /*
+ * All done if there are no remaining bytes to prefault, or if
+ * prefaulting failed (EINTR was handled above, and EAGAIN due
+ * to prefaulting a memslot that's being actively deleted should
+ * be impossible since the memslot has already been recreated).
+ */
+ if (!range.size || ret < 0)
+ break;
+ }
- if (left == 0)
- __TEST_ASSERT_VM_VCPU_IOCTL(!ret, "KVM_PRE_FAULT_MEMORY", ret, vcpu->vm);
+ TEST_ASSERT(range.size == expected_left,
+ "Completed with %llu bytes left, expected %lu",
+ range.size, expected_left);
+
+ /*
+ * Assert success if prefaulting the entire range should succeed, i.e.
+ * complete with no bytes remaining. Otherwise prefaulting should have
+ * failed due to ENOENT (due to RET_PF_EMULATE for emulated MMIO when
+ * no memslot exists).
+ */
+ if (!expected_left)
+ TEST_ASSERT_VM_VCPU_IOCTL(!ret, KVM_PRE_FAULT_MEMORY, ret, vcpu->vm);
else
- /* No memory slot causes RET_PF_EMULATE. it results in -ENOENT. */
- __TEST_ASSERT_VM_VCPU_IOCTL(ret && save_errno == ENOENT,
- "KVM_PRE_FAULT_MEMORY", ret, vcpu->vm);
+ TEST_ASSERT_VM_VCPU_IOCTL(ret && save_errno == ENOENT,
+ KVM_PRE_FAULT_MEMORY, ret, vcpu->vm);
}
static void __test_pre_fault_memory(unsigned long vm_type, bool private)
{
+ gpa_t gpa, gva, alignment, guest_page_size;
const struct vm_shape shape = {
.mode = VM_MODE_DEFAULT,
.type = vm_type,
@@ -74,34 +171,26 @@ static void __test_pre_fault_memory(unsigned long vm_type, bool private)
struct kvm_vm *vm;
struct ucall uc;
- uint64_t guest_test_phys_mem;
- uint64_t guest_test_virt_mem;
- uint64_t alignment, guest_page_size;
-
vm = vm_create_shape_with_one_vcpu(shape, &vcpu, guest_code);
alignment = guest_page_size = vm_guest_mode_params[VM_MODE_DEFAULT].page_size;
- guest_test_phys_mem = (vm->max_gfn - TEST_NPAGES) * guest_page_size;
-#ifdef __s390x__
- alignment = max(0x100000UL, guest_page_size);
-#else
+ gpa = (vm->max_gfn - TEST_NPAGES) * guest_page_size;
alignment = SZ_2M;
-#endif
- guest_test_phys_mem = align_down(guest_test_phys_mem, alignment);
- guest_test_virt_mem = guest_test_phys_mem & ((1ULL << (vm->va_bits - 1)) - 1);
+ gpa = align_down(gpa, alignment);
+ gva = gpa & ((1ULL << (vm->va_bits - 1)) - 1);
- vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
- guest_test_phys_mem, TEST_SLOT, TEST_NPAGES,
- private ? KVM_MEM_GUEST_MEMFD : 0);
- virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, TEST_NPAGES);
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, gpa, TEST_SLOT,
+ TEST_NPAGES, private ? KVM_MEM_GUEST_MEMFD : 0);
+ virt_map(vm, gva, gpa, TEST_NPAGES);
if (private)
- vm_mem_set_private(vm, guest_test_phys_mem, TEST_SIZE);
- pre_fault_memory(vcpu, guest_test_phys_mem, SZ_2M, 0);
- pre_fault_memory(vcpu, guest_test_phys_mem + SZ_2M, PAGE_SIZE * 2, PAGE_SIZE);
- pre_fault_memory(vcpu, guest_test_phys_mem + TEST_SIZE, PAGE_SIZE, PAGE_SIZE);
+ vm_mem_set_private(vm, gpa, TEST_SIZE);
+
+ pre_fault_memory(vcpu, gpa, 0, SZ_2M, 0, private);
+ pre_fault_memory(vcpu, gpa, SZ_2M, PAGE_SIZE * 2, PAGE_SIZE, private);
+ pre_fault_memory(vcpu, gpa, TEST_SIZE, PAGE_SIZE, PAGE_SIZE, private);
- vcpu_args_set(vcpu, 1, guest_test_virt_mem);
+ vcpu_args_set(vcpu, 1, gva);
vcpu_run(vcpu);
run = vcpu->run;
diff --git a/tools/testing/selftests/kvm/riscv/arch_timer.c b/tools/testing/selftests/kvm/riscv/arch_timer.c
index 9e370800a6a2..d67c918ee310 100644
--- a/tools/testing/selftests/kvm/riscv/arch_timer.c
+++ b/tools/testing/selftests/kvm/riscv/arch_timer.c
@@ -15,11 +15,11 @@
static int timer_irq = IRQ_S_TIMER;
-static void guest_irq_handler(struct ex_regs *regs)
+static void guest_irq_handler(struct pt_regs *regs)
{
- uint64_t xcnt, xcnt_diff_us, cmp;
+ u64 xcnt, xcnt_diff_us, cmp;
unsigned int intid = regs->cause & ~CAUSE_IRQ_FLAG;
- uint32_t cpu = guest_get_vcpuid();
+ u32 cpu = guest_get_vcpuid();
struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu];
timer_irq_disable();
@@ -40,7 +40,7 @@ static void guest_irq_handler(struct ex_regs *regs)
static void guest_run(struct test_vcpu_shared_data *shared_data)
{
- uint32_t irq_iter, config_iter;
+ u32 irq_iter, config_iter;
shared_data->nr_iter = 0;
shared_data->guest_stage = 0;
@@ -66,7 +66,7 @@ static void guest_run(struct test_vcpu_shared_data *shared_data)
static void guest_code(void)
{
- uint32_t cpu = guest_get_vcpuid();
+ u32 cpu = guest_get_vcpuid();
struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu];
timer_irq_disable();
diff --git a/tools/testing/selftests/kvm/riscv/ebreak_test.c b/tools/testing/selftests/kvm/riscv/ebreak_test.c
index cfed6c727bfc..3f44b045a22e 100644
--- a/tools/testing/selftests/kvm/riscv/ebreak_test.c
+++ b/tools/testing/selftests/kvm/riscv/ebreak_test.c
@@ -8,10 +8,10 @@
#include "kvm_util.h"
#include "ucall_common.h"
-#define LABEL_ADDRESS(v) ((uint64_t)&(v))
+#define LABEL_ADDRESS(v) ((u64)&(v))
extern unsigned char sw_bp_1, sw_bp_2;
-static uint64_t sw_bp_addr;
+static u64 sw_bp_addr;
static void guest_code(void)
{
@@ -27,7 +27,7 @@ static void guest_code(void)
GUEST_DONE();
}
-static void guest_breakpoint_handler(struct ex_regs *regs)
+static void guest_breakpoint_handler(struct pt_regs *regs)
{
WRITE_ONCE(sw_bp_addr, regs->epc);
regs->epc += 4;
@@ -37,7 +37,7 @@ int main(void)
{
struct kvm_vm *vm;
struct kvm_vcpu *vcpu;
- uint64_t pc;
+ u64 pc;
struct kvm_guest_debug debug = {
.control = KVM_GUESTDBG_ENABLE,
};
diff --git a/tools/testing/selftests/kvm/riscv/get-reg-list.c b/tools/testing/selftests/kvm/riscv/get-reg-list.c
index 8515921dfdbf..8d6fdb5d38b8 100644
--- a/tools/testing/selftests/kvm/riscv/get-reg-list.c
+++ b/tools/testing/selftests/kvm/riscv/get-reg-list.c
@@ -17,6 +17,15 @@ enum {
VCPU_FEATURE_SBI_EXT,
};
+enum {
+ KVM_RISC_V_REG_OFFSET_VSTART = 0,
+ KVM_RISC_V_REG_OFFSET_VL,
+ KVM_RISC_V_REG_OFFSET_VTYPE,
+ KVM_RISC_V_REG_OFFSET_VCSR,
+ KVM_RISC_V_REG_OFFSET_VLENB,
+ KVM_RISC_V_REG_OFFSET_MAX,
+};
+
static bool isa_ext_cant_disable[KVM_RISCV_ISA_EXT_MAX];
bool filter_reg(__u64 reg)
@@ -53,8 +62,11 @@ bool filter_reg(__u64 reg)
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVNAPOT:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVPBMT:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVVPTC:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZAAMO:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZABHA:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZACAS:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZALASR:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZALRSC:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZAWRS:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBA:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBB:
@@ -67,11 +79,14 @@ bool filter_reg(__u64 reg)
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZCB:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZCD:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZCF:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZCLSD:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZCMOP:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZFA:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZFBFMIN:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZFH:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZFHMIN:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOM:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOP:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOZ:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICCRSE:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICNTR:
@@ -81,6 +96,7 @@ bool filter_reg(__u64 reg)
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZIHINTNTL:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZIHINTPAUSE:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZIHPM:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZILSD:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZIMOP:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZKND:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZKNE:
@@ -92,6 +108,8 @@ bool filter_reg(__u64 reg)
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZTSO:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVBB:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVBC:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVFBFMIN:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVFBFWMA:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVFH:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVFHMIN:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVKB:
@@ -117,6 +135,8 @@ bool filter_reg(__u64 reg)
case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_DBCN:
case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_SUSP:
case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_STA:
+ case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_FWFT:
+ case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_MPXY:
case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_EXPERIMENTAL:
case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_VENDOR:
return true;
@@ -141,11 +161,43 @@ bool check_reject_set(int err)
return err == EINVAL;
}
+static int override_vector_reg_size(struct kvm_vcpu *vcpu, struct vcpu_reg_sublist *s,
+ u64 feature)
+{
+ unsigned long vlenb_reg = 0;
+ int rc;
+ u64 reg, size;
+
+ /* Enable V extension so that we can get the vlenb register */
+ rc = __vcpu_set_reg(vcpu, feature, 1);
+ if (rc)
+ return rc;
+
+ vlenb_reg = vcpu_get_reg(vcpu, s->regs[KVM_RISC_V_REG_OFFSET_VLENB]);
+ if (!vlenb_reg) {
+ TEST_FAIL("Can't compute vector register size from zero vlenb\n");
+ return -EPERM;
+ }
+
+ size = __builtin_ctzl(vlenb_reg);
+ size <<= KVM_REG_SIZE_SHIFT;
+
+ for (int i = 0; i < 32; i++) {
+ reg = KVM_REG_RISCV | KVM_REG_RISCV_VECTOR | size | KVM_REG_RISCV_VECTOR_REG(i);
+ s->regs[KVM_RISC_V_REG_OFFSET_MAX + i] = reg;
+ }
+
+ /* We should assert if disabling failed here while enabling succeeded before */
+ vcpu_set_reg(vcpu, feature, 0);
+
+ return 0;
+}
+
void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c)
{
unsigned long isa_ext_state[KVM_RISCV_ISA_EXT_MAX] = { 0 };
struct vcpu_reg_sublist *s;
- uint64_t feature;
+ u64 feature;
int rc;
for (int i = 0; i < KVM_RISCV_ISA_EXT_MAX; i++)
@@ -170,6 +222,13 @@ void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c)
if (!s->feature)
continue;
+ if (s->feature == KVM_RISCV_ISA_EXT_V) {
+ feature = RISCV_ISA_EXT_REG(s->feature);
+ rc = override_vector_reg_size(vcpu, s, feature);
+ if (rc)
+ goto skip;
+ }
+
switch (s->feature_type) {
case VCPU_FEATURE_ISA_EXT:
feature = RISCV_ISA_EXT_REG(s->feature);
@@ -184,6 +243,7 @@ void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c)
/* Try to enable the desired extension */
__vcpu_set_reg(vcpu, feature, 1);
+skip:
/* Double check whether the desired extension was enabled */
__TEST_REQUIRE(__vcpu_has_ext(vcpu, feature),
"%s not available, skipping tests", s->name);
@@ -204,6 +264,8 @@ static const char *config_id_to_str(const char *prefix, __u64 id)
return "KVM_REG_RISCV_CONFIG_REG(zicbom_block_size)";
case KVM_REG_RISCV_CONFIG_REG(zicboz_block_size):
return "KVM_REG_RISCV_CONFIG_REG(zicboz_block_size)";
+ case KVM_REG_RISCV_CONFIG_REG(zicbop_block_size):
+ return "KVM_REG_RISCV_CONFIG_REG(zicbop_block_size)";
case KVM_REG_RISCV_CONFIG_REG(mvendorid):
return "KVM_REG_RISCV_CONFIG_REG(mvendorid)";
case KVM_REG_RISCV_CONFIG_REG(marchid):
@@ -408,6 +470,35 @@ static const char *fp_d_id_to_str(const char *prefix, __u64 id)
return strdup_printf("%lld /* UNKNOWN */", reg_off);
}
+static const char *vector_id_to_str(const char *prefix, __u64 id)
+{
+ /* reg_off is the offset into struct __riscv_v_ext_state */
+ __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_VECTOR);
+ int reg_index = 0;
+
+ assert((id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_VECTOR);
+
+ if (reg_off >= KVM_REG_RISCV_VECTOR_REG(0))
+ reg_index = reg_off - KVM_REG_RISCV_VECTOR_REG(0);
+ switch (reg_off) {
+ case KVM_REG_RISCV_VECTOR_REG(0) ...
+ KVM_REG_RISCV_VECTOR_REG(31):
+ return strdup_printf("KVM_REG_RISCV_VECTOR_REG(%d)", reg_index);
+ case KVM_REG_RISCV_VECTOR_CSR_REG(vstart):
+ return "KVM_REG_RISCV_VECTOR_CSR_REG(vstart)";
+ case KVM_REG_RISCV_VECTOR_CSR_REG(vl):
+ return "KVM_REG_RISCV_VECTOR_CSR_REG(vl)";
+ case KVM_REG_RISCV_VECTOR_CSR_REG(vtype):
+ return "KVM_REG_RISCV_VECTOR_CSR_REG(vtype)";
+ case KVM_REG_RISCV_VECTOR_CSR_REG(vcsr):
+ return "KVM_REG_RISCV_VECTOR_CSR_REG(vcsr)";
+ case KVM_REG_RISCV_VECTOR_CSR_REG(vlenb):
+ return "KVM_REG_RISCV_VECTOR_CSR_REG(vlenb)";
+ }
+
+ return strdup_printf("%lld /* UNKNOWN */", reg_off);
+}
+
#define KVM_ISA_EXT_ARR(ext) \
[KVM_RISCV_ISA_EXT_##ext] = "KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_" #ext
@@ -434,8 +525,11 @@ static const char *isa_ext_single_id_to_str(__u64 reg_off)
KVM_ISA_EXT_ARR(SVNAPOT),
KVM_ISA_EXT_ARR(SVPBMT),
KVM_ISA_EXT_ARR(SVVPTC),
+ KVM_ISA_EXT_ARR(ZAAMO),
KVM_ISA_EXT_ARR(ZABHA),
KVM_ISA_EXT_ARR(ZACAS),
+ KVM_ISA_EXT_ARR(ZALASR),
+ KVM_ISA_EXT_ARR(ZALRSC),
KVM_ISA_EXT_ARR(ZAWRS),
KVM_ISA_EXT_ARR(ZBA),
KVM_ISA_EXT_ARR(ZBB),
@@ -448,11 +542,14 @@ static const char *isa_ext_single_id_to_str(__u64 reg_off)
KVM_ISA_EXT_ARR(ZCB),
KVM_ISA_EXT_ARR(ZCD),
KVM_ISA_EXT_ARR(ZCF),
+ KVM_ISA_EXT_ARR(ZCLSD),
KVM_ISA_EXT_ARR(ZCMOP),
KVM_ISA_EXT_ARR(ZFA),
+ KVM_ISA_EXT_ARR(ZFBFMIN),
KVM_ISA_EXT_ARR(ZFH),
KVM_ISA_EXT_ARR(ZFHMIN),
KVM_ISA_EXT_ARR(ZICBOM),
+ KVM_ISA_EXT_ARR(ZICBOP),
KVM_ISA_EXT_ARR(ZICBOZ),
KVM_ISA_EXT_ARR(ZICCRSE),
KVM_ISA_EXT_ARR(ZICNTR),
@@ -462,6 +559,7 @@ static const char *isa_ext_single_id_to_str(__u64 reg_off)
KVM_ISA_EXT_ARR(ZIHINTNTL),
KVM_ISA_EXT_ARR(ZIHINTPAUSE),
KVM_ISA_EXT_ARR(ZIHPM),
+ KVM_ISA_EXT_ARR(ZILSD),
KVM_ISA_EXT_ARR(ZIMOP),
KVM_ISA_EXT_ARR(ZKND),
KVM_ISA_EXT_ARR(ZKNE),
@@ -473,6 +571,8 @@ static const char *isa_ext_single_id_to_str(__u64 reg_off)
KVM_ISA_EXT_ARR(ZTSO),
KVM_ISA_EXT_ARR(ZVBB),
KVM_ISA_EXT_ARR(ZVBC),
+ KVM_ISA_EXT_ARR(ZVFBFMIN),
+ KVM_ISA_EXT_ARR(ZVFBFWMA),
KVM_ISA_EXT_ARR(ZVFH),
KVM_ISA_EXT_ARR(ZVFHMIN),
KVM_ISA_EXT_ARR(ZVKB),
@@ -545,6 +645,8 @@ static const char *sbi_ext_single_id_to_str(__u64 reg_off)
KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_DBCN),
KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_SUSP),
KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_STA),
+ KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_FWFT),
+ KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_MPXY),
KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_EXPERIMENTAL),
KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_VENDOR),
};
@@ -601,6 +703,19 @@ static const char *sbi_sta_id_to_str(__u64 reg_off)
return strdup_printf("KVM_REG_RISCV_SBI_STA | %lld /* UNKNOWN */", reg_off);
}
+static const char *sbi_fwft_id_to_str(__u64 reg_off)
+{
+ switch (reg_off) {
+ case 0: return "KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(misaligned_deleg.enable)";
+ case 1: return "KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(misaligned_deleg.flags)";
+ case 2: return "KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(misaligned_deleg.value)";
+ case 3: return "KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(pointer_masking.enable)";
+ case 4: return "KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(pointer_masking.flags)";
+ case 5: return "KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(pointer_masking.value)";
+ }
+ return strdup_printf("KVM_REG_RISCV_SBI_FWFT | %lld /* UNKNOWN */", reg_off);
+}
+
static const char *sbi_id_to_str(const char *prefix, __u64 id)
{
__u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_SBI_STATE);
@@ -613,6 +728,8 @@ static const char *sbi_id_to_str(const char *prefix, __u64 id)
switch (reg_subtype) {
case KVM_REG_RISCV_SBI_STA:
return sbi_sta_id_to_str(reg_off);
+ case KVM_REG_RISCV_SBI_FWFT:
+ return sbi_fwft_id_to_str(reg_off);
}
return strdup_printf("%lld | %lld /* UNKNOWN */", reg_subtype, reg_off);
@@ -635,6 +752,9 @@ void print_reg(const char *prefix, __u64 id)
case KVM_REG_SIZE_U128:
reg_size = "KVM_REG_SIZE_U128";
break;
+ case KVM_REG_SIZE_U256:
+ reg_size = "KVM_REG_SIZE_U256";
+ break;
default:
printf("\tKVM_REG_RISCV | (%lld << KVM_REG_SIZE_SHIFT) | 0x%llx /* UNKNOWN */,\n",
(id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT, id & ~REG_MASK);
@@ -666,6 +786,10 @@ void print_reg(const char *prefix, __u64 id)
printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_FP_D | %s,\n",
reg_size, fp_d_id_to_str(prefix, id));
break;
+ case KVM_REG_RISCV_VECTOR:
+ printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_VECTOR | %s,\n",
+ reg_size, vector_id_to_str(prefix, id));
+ break;
case KVM_REG_RISCV_ISA_EXT:
printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_ISA_EXT | %s,\n",
reg_size, isa_ext_id_to_str(prefix, id));
@@ -691,10 +815,13 @@ void print_reg(const char *prefix, __u64 id)
*/
static __u64 base_regs[] = {
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(isa),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(zicbom_block_size),
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(mvendorid),
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(marchid),
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(mimpid),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(zicboz_block_size),
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(satp_mode),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(zicbop_block_size),
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.pc),
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.ra),
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.sp),
@@ -770,11 +897,26 @@ static __u64 sbi_sta_regs[] = {
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_STA | KVM_REG_RISCV_SBI_STA_REG(shmem_hi),
};
+static __u64 sbi_fwft_regs[] = {
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_FWFT,
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(misaligned_deleg.enable),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(misaligned_deleg.flags),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(misaligned_deleg.value),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(pointer_masking.enable),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(pointer_masking.flags),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(pointer_masking.value),
+};
+
static __u64 zicbom_regs[] = {
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(zicbom_block_size),
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOM,
};
+static __u64 zicbop_regs[] = {
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(zicbop_block_size),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOP,
+};
+
static __u64 zicboz_regs[] = {
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(zicboz_block_size),
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOZ,
@@ -870,6 +1012,48 @@ static __u64 fp_d_regs[] = {
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_D,
};
+/* Define a default vector registers with length. This will be overwritten at runtime */
+static __u64 vector_regs[] = {
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_CSR_REG(vstart),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_CSR_REG(vl),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_CSR_REG(vtype),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_CSR_REG(vcsr),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_CSR_REG(vlenb),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(0),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(1),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(2),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(3),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(4),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(5),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(6),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(7),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(8),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(9),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(10),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(11),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(12),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(13),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(14),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(15),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(16),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(17),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(18),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(19),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(20),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(21),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(22),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(23),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(24),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(25),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(26),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(27),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(28),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(29),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(30),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(31),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_V,
+};
+
#define SUBLIST_BASE \
{"base", .regs = base_regs, .regs_n = ARRAY_SIZE(base_regs), \
.skips_set = base_skips_set, .skips_set_n = ARRAY_SIZE(base_skips_set),}
@@ -879,8 +1063,13 @@ static __u64 fp_d_regs[] = {
#define SUBLIST_SBI_STA \
{"sbi-sta", .feature_type = VCPU_FEATURE_SBI_EXT, .feature = KVM_RISCV_SBI_EXT_STA, \
.regs = sbi_sta_regs, .regs_n = ARRAY_SIZE(sbi_sta_regs),}
+#define SUBLIST_SBI_FWFT \
+ {"sbi-fwft", .feature_type = VCPU_FEATURE_SBI_EXT, .feature = KVM_RISCV_SBI_EXT_FWFT, \
+ .regs = sbi_fwft_regs, .regs_n = ARRAY_SIZE(sbi_fwft_regs),}
#define SUBLIST_ZICBOM \
{"zicbom", .feature = KVM_RISCV_ISA_EXT_ZICBOM, .regs = zicbom_regs, .regs_n = ARRAY_SIZE(zicbom_regs),}
+#define SUBLIST_ZICBOP \
+ {"zicbop", .feature = KVM_RISCV_ISA_EXT_ZICBOP, .regs = zicbop_regs, .regs_n = ARRAY_SIZE(zicbop_regs),}
#define SUBLIST_ZICBOZ \
{"zicboz", .feature = KVM_RISCV_ISA_EXT_ZICBOZ, .regs = zicboz_regs, .regs_n = ARRAY_SIZE(zicboz_regs),}
#define SUBLIST_AIA \
@@ -894,6 +1083,9 @@ static __u64 fp_d_regs[] = {
{"fp_d", .feature = KVM_RISCV_ISA_EXT_D, .regs = fp_d_regs, \
.regs_n = ARRAY_SIZE(fp_d_regs),}
+#define SUBLIST_V \
+ {"v", .feature = KVM_RISCV_ISA_EXT_V, .regs = vector_regs, .regs_n = ARRAY_SIZE(vector_regs),}
+
#define KVM_ISA_EXT_SIMPLE_CONFIG(ext, extu) \
static __u64 regs_##ext[] = { \
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | \
@@ -958,10 +1150,13 @@ KVM_SBI_EXT_SUBLIST_CONFIG(sta, STA);
KVM_SBI_EXT_SIMPLE_CONFIG(pmu, PMU);
KVM_SBI_EXT_SIMPLE_CONFIG(dbcn, DBCN);
KVM_SBI_EXT_SIMPLE_CONFIG(susp, SUSP);
+KVM_SBI_EXT_SIMPLE_CONFIG(mpxy, MPXY);
+KVM_SBI_EXT_SUBLIST_CONFIG(fwft, FWFT);
KVM_ISA_EXT_SUBLIST_CONFIG(aia, AIA);
KVM_ISA_EXT_SUBLIST_CONFIG(fp_f, FP_F);
KVM_ISA_EXT_SUBLIST_CONFIG(fp_d, FP_D);
+KVM_ISA_EXT_SUBLIST_CONFIG(v, V);
KVM_ISA_EXT_SIMPLE_CONFIG(h, H);
KVM_ISA_EXT_SIMPLE_CONFIG(smnpm, SMNPM);
KVM_ISA_EXT_SUBLIST_CONFIG(smstateen, SMSTATEEN);
@@ -974,8 +1169,11 @@ KVM_ISA_EXT_SIMPLE_CONFIG(svinval, SVINVAL);
KVM_ISA_EXT_SIMPLE_CONFIG(svnapot, SVNAPOT);
KVM_ISA_EXT_SIMPLE_CONFIG(svpbmt, SVPBMT);
KVM_ISA_EXT_SIMPLE_CONFIG(svvptc, SVVPTC);
+KVM_ISA_EXT_SIMPLE_CONFIG(zaamo, ZAAMO);
KVM_ISA_EXT_SIMPLE_CONFIG(zabha, ZABHA);
KVM_ISA_EXT_SIMPLE_CONFIG(zacas, ZACAS);
+KVM_ISA_EXT_SIMPLE_CONFIG(zalasr, ZALASR);
+KVM_ISA_EXT_SIMPLE_CONFIG(zalrsc, ZALRSC);
KVM_ISA_EXT_SIMPLE_CONFIG(zawrs, ZAWRS);
KVM_ISA_EXT_SIMPLE_CONFIG(zba, ZBA);
KVM_ISA_EXT_SIMPLE_CONFIG(zbb, ZBB);
@@ -988,11 +1186,14 @@ KVM_ISA_EXT_SIMPLE_CONFIG(zca, ZCA);
KVM_ISA_EXT_SIMPLE_CONFIG(zcb, ZCB);
KVM_ISA_EXT_SIMPLE_CONFIG(zcd, ZCD);
KVM_ISA_EXT_SIMPLE_CONFIG(zcf, ZCF);
+KVM_ISA_EXT_SIMPLE_CONFIG(zclsd, ZCLSD);
KVM_ISA_EXT_SIMPLE_CONFIG(zcmop, ZCMOP);
KVM_ISA_EXT_SIMPLE_CONFIG(zfa, ZFA);
+KVM_ISA_EXT_SIMPLE_CONFIG(zfbfmin, ZFBFMIN);
KVM_ISA_EXT_SIMPLE_CONFIG(zfh, ZFH);
KVM_ISA_EXT_SIMPLE_CONFIG(zfhmin, ZFHMIN);
KVM_ISA_EXT_SUBLIST_CONFIG(zicbom, ZICBOM);
+KVM_ISA_EXT_SUBLIST_CONFIG(zicbop, ZICBOP);
KVM_ISA_EXT_SUBLIST_CONFIG(zicboz, ZICBOZ);
KVM_ISA_EXT_SIMPLE_CONFIG(ziccrse, ZICCRSE);
KVM_ISA_EXT_SIMPLE_CONFIG(zicntr, ZICNTR);
@@ -1002,6 +1203,7 @@ KVM_ISA_EXT_SIMPLE_CONFIG(zifencei, ZIFENCEI);
KVM_ISA_EXT_SIMPLE_CONFIG(zihintntl, ZIHINTNTL);
KVM_ISA_EXT_SIMPLE_CONFIG(zihintpause, ZIHINTPAUSE);
KVM_ISA_EXT_SIMPLE_CONFIG(zihpm, ZIHPM);
+KVM_ISA_EXT_SIMPLE_CONFIG(zilsd, ZILSD);
KVM_ISA_EXT_SIMPLE_CONFIG(zimop, ZIMOP);
KVM_ISA_EXT_SIMPLE_CONFIG(zknd, ZKND);
KVM_ISA_EXT_SIMPLE_CONFIG(zkne, ZKNE);
@@ -1013,6 +1215,8 @@ KVM_ISA_EXT_SIMPLE_CONFIG(zkt, ZKT);
KVM_ISA_EXT_SIMPLE_CONFIG(ztso, ZTSO);
KVM_ISA_EXT_SIMPLE_CONFIG(zvbb, ZVBB);
KVM_ISA_EXT_SIMPLE_CONFIG(zvbc, ZVBC);
+KVM_ISA_EXT_SIMPLE_CONFIG(zvfbfmin, ZVFBFMIN);
+KVM_ISA_EXT_SIMPLE_CONFIG(zvfbfwma, ZVFBFWMA);
KVM_ISA_EXT_SIMPLE_CONFIG(zvfh, ZVFH);
KVM_ISA_EXT_SIMPLE_CONFIG(zvfhmin, ZVFHMIN);
KVM_ISA_EXT_SIMPLE_CONFIG(zvkb, ZVKB);
@@ -1030,10 +1234,13 @@ struct vcpu_reg_list *vcpu_configs[] = {
&config_sbi_pmu,
&config_sbi_dbcn,
&config_sbi_susp,
+ &config_sbi_mpxy,
+ &config_sbi_fwft,
&config_aia,
&config_fp_f,
&config_fp_d,
&config_h,
+ &config_v,
&config_smnpm,
&config_smstateen,
&config_sscofpmf,
@@ -1045,8 +1252,11 @@ struct vcpu_reg_list *vcpu_configs[] = {
&config_svnapot,
&config_svpbmt,
&config_svvptc,
+ &config_zaamo,
&config_zabha,
&config_zacas,
+ &config_zalrsc,
+ &config_zalasr,
&config_zawrs,
&config_zba,
&config_zbb,
@@ -1059,11 +1269,14 @@ struct vcpu_reg_list *vcpu_configs[] = {
&config_zcb,
&config_zcd,
&config_zcf,
+ &config_zclsd,
&config_zcmop,
&config_zfa,
+ &config_zfbfmin,
&config_zfh,
&config_zfhmin,
&config_zicbom,
+ &config_zicbop,
&config_zicboz,
&config_ziccrse,
&config_zicntr,
@@ -1073,6 +1286,7 @@ struct vcpu_reg_list *vcpu_configs[] = {
&config_zihintntl,
&config_zihintpause,
&config_zihpm,
+ &config_zilsd,
&config_zimop,
&config_zknd,
&config_zkne,
@@ -1084,6 +1298,8 @@ struct vcpu_reg_list *vcpu_configs[] = {
&config_ztso,
&config_zvbb,
&config_zvbc,
+ &config_zvfbfmin,
+ &config_zvfbfwma,
&config_zvfh,
&config_zvfhmin,
&config_zvkb,
diff --git a/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c b/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c
index f45c0ecc902d..e56a3dd6a51e 100644
--- a/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c
+++ b/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c
@@ -24,7 +24,7 @@ union sbi_pmu_ctr_info ctrinfo_arr[RISCV_MAX_PMU_COUNTERS];
/* Snapshot shared memory data */
#define PMU_SNAPSHOT_GPA_BASE BIT(30)
static void *snapshot_gva;
-static vm_paddr_t snapshot_gpa;
+static gpa_t snapshot_gpa;
static int vcpu_shared_irq_count;
static int counter_in_use;
@@ -39,7 +39,13 @@ static bool illegal_handler_invoked;
#define SBI_PMU_TEST_SNAPSHOT BIT(2)
#define SBI_PMU_TEST_OVERFLOW BIT(3)
-static int disabled_tests;
+#define SBI_PMU_OVERFLOW_IRQNUM_DEFAULT 5
+struct test_args {
+ int disabled_tests;
+ int overflow_irqnum;
+};
+
+static struct test_args targs;
unsigned long pmu_csr_read_num(int csr_num)
{
@@ -67,7 +73,6 @@ unsigned long pmu_csr_read_num(int csr_num)
switch (csr_num) {
switchcase_csr_read_32(CSR_CYCLE, ret)
- switchcase_csr_read_32(CSR_CYCLEH, ret)
default :
break;
}
@@ -81,7 +86,7 @@ unsigned long pmu_csr_read_num(int csr_num)
#undef switchcase_csr_read
}
-static inline void dummy_func_loop(uint64_t iter)
+static inline void dummy_func_loop(u64 iter)
{
int i = 0;
@@ -118,26 +123,44 @@ static void stop_counter(unsigned long counter, unsigned long stop_flags)
ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, counter, 1, stop_flags,
0, 0, 0);
- __GUEST_ASSERT(ret.error == 0, "Unable to stop counter %ld error %ld\n",
- counter, ret.error);
+ __GUEST_ASSERT(ret.error == 0 || ret.error == SBI_ERR_ALREADY_STOPPED,
+ "Unable to stop counter %ld error %ld\n", counter, ret.error);
}
-static void guest_illegal_exception_handler(struct ex_regs *regs)
+static void guest_illegal_exception_handler(struct pt_regs *regs)
{
+ unsigned long insn;
+ int opcode, csr_num, funct3;
+
__GUEST_ASSERT(regs->cause == EXC_INST_ILLEGAL,
"Unexpected exception handler %lx\n", regs->cause);
+ insn = regs->badaddr;
+ opcode = (insn & INSN_OPCODE_MASK) >> INSN_OPCODE_SHIFT;
+ __GUEST_ASSERT(opcode == INSN_OPCODE_SYSTEM,
+ "Unexpected instruction with opcode 0x%x insn 0x%lx\n", opcode, insn);
+
+ csr_num = GET_CSR_NUM(insn);
+ funct3 = GET_RM(insn);
+ /* Validate if it is a CSR read/write operation */
+ __GUEST_ASSERT(funct3 <= 7 && (funct3 != 0 && funct3 != 4),
+ "Unexpected system opcode with funct3 0x%x csr_num 0x%x\n",
+ funct3, csr_num);
+
+ /* Validate if it is a HPMCOUNTER CSR operation */
+ __GUEST_ASSERT((csr_num >= CSR_CYCLE && csr_num <= CSR_HPMCOUNTER31),
+ "Unexpected csr_num 0x%x\n", csr_num);
+
illegal_handler_invoked = true;
/* skip the trapping instruction */
regs->epc += 4;
}
-static void guest_irq_handler(struct ex_regs *regs)
+static void guest_irq_handler(struct pt_regs *regs)
{
unsigned int irq_num = regs->cause & ~CAUSE_IRQ_FLAG;
struct riscv_pmu_snapshot_data *snapshot_data = snapshot_gva;
unsigned long overflown_mask;
- unsigned long counter_val = 0;
/* Validate that we are in the correct irq handler */
GUEST_ASSERT_EQ(irq_num, IRQ_PMU_OVF);
@@ -151,10 +174,6 @@ static void guest_irq_handler(struct ex_regs *regs)
GUEST_ASSERT(overflown_mask & 0x01);
WRITE_ONCE(vcpu_shared_irq_count, vcpu_shared_irq_count+1);
-
- counter_val = READ_ONCE(snapshot_data->ctr_values[0]);
- /* Now start the counter to mimick the real driver behavior */
- start_counter(counter_in_use, SBI_PMU_START_FLAG_SET_INIT_VALUE, counter_val);
}
static unsigned long get_counter_index(unsigned long cbase, unsigned long cmask,
@@ -240,7 +259,7 @@ static inline void verify_sbi_requirement_assert(void)
__GUEST_ASSERT(0, "SBI implementation version doesn't support PMU Snapshot");
}
-static void snapshot_set_shmem(vm_paddr_t gpa, unsigned long flags)
+static void snapshot_set_shmem(gpa_t gpa, unsigned long flags)
{
unsigned long lo = (unsigned long)gpa;
#if __riscv_xlen == 32
@@ -417,6 +436,7 @@ static void test_pmu_basic_sanity(void)
struct sbiret ret;
int num_counters = 0, i;
union sbi_pmu_ctr_info ctrinfo;
+ unsigned long fw_eidx;
probe = guest_sbi_probe_extension(SBI_EXT_PMU, &out_val);
GUEST_ASSERT(probe && out_val == 1);
@@ -442,7 +462,24 @@ static void test_pmu_basic_sanity(void)
pmu_csr_read_num(ctrinfo.csr);
GUEST_ASSERT(illegal_handler_invoked);
} else if (ctrinfo.type == SBI_PMU_CTR_TYPE_FW) {
- read_fw_counter(i, ctrinfo);
+ /* Read without configure should fail */
+ ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_FW_READ,
+ i, 0, 0, 0, 0, 0);
+ GUEST_ASSERT(ret.error == SBI_ERR_INVALID_PARAM);
+
+ /*
+ * Try to configure with a common firmware event.
+ * If configuration succeeds, verify we can read it.
+ */
+ fw_eidx = ((unsigned long)SBI_PMU_EVENT_TYPE_FW << 16) |
+ SBI_PMU_FW_ACCESS_LOAD;
+
+ ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_CFG_MATCH,
+ i, 1, 0, fw_eidx, 0, 0);
+ if (ret.error == 0) {
+ GUEST_ASSERT(ret.value == i);
+ read_fw_counter(i, ctrinfo);
+ }
}
}
@@ -479,7 +516,7 @@ static void test_pmu_events_snaphost(void)
static void test_pmu_events_overflow(void)
{
- int num_counters = 0;
+ int num_counters = 0, i = 0;
/* Verify presence of SBI PMU and minimum requrired SBI version */
verify_sbi_requirement_assert();
@@ -496,11 +533,15 @@ static void test_pmu_events_overflow(void)
* Qemu supports overflow for cycle/instruction.
* This test may fail on any platform that do not support overflow for these two events.
*/
- test_pmu_event_overflow(SBI_PMU_HW_CPU_CYCLES);
- GUEST_ASSERT_EQ(vcpu_shared_irq_count, 1);
+ for (i = 0; i < targs.overflow_irqnum; i++)
+ test_pmu_event_overflow(SBI_PMU_HW_CPU_CYCLES);
+ GUEST_ASSERT_EQ(vcpu_shared_irq_count, targs.overflow_irqnum);
- test_pmu_event_overflow(SBI_PMU_HW_INSTRUCTIONS);
- GUEST_ASSERT_EQ(vcpu_shared_irq_count, 2);
+ vcpu_shared_irq_count = 0;
+
+ for (i = 0; i < targs.overflow_irqnum; i++)
+ test_pmu_event_overflow(SBI_PMU_HW_INSTRUCTIONS);
+ GUEST_ASSERT_EQ(vcpu_shared_irq_count, targs.overflow_irqnum);
GUEST_DONE();
}
@@ -569,7 +610,7 @@ static void test_vm_setup_snapshot_mem(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
virt_map(vm, PMU_SNAPSHOT_GPA_BASE, PMU_SNAPSHOT_GPA_BASE, 1);
snapshot_gva = (void *)(PMU_SNAPSHOT_GPA_BASE);
- snapshot_gpa = addr_gva2gpa(vcpu->vm, (vm_vaddr_t)snapshot_gva);
+ snapshot_gpa = addr_gva2gpa(vcpu->vm, (gva_t)snapshot_gva);
sync_global_to_guest(vcpu->vm, snapshot_gva);
sync_global_to_guest(vcpu->vm, snapshot_gpa);
}
@@ -609,7 +650,11 @@ static void test_vm_events_overflow(void *guest_code)
vcpu_init_vector_tables(vcpu);
/* Initialize guest timer frequency. */
timer_freq = vcpu_get_reg(vcpu, RISCV_TIMER_REG(frequency));
+
+ /* Export the shared variables to the guest */
sync_global_to_guest(vm, timer_freq);
+ sync_global_to_guest(vm, vcpu_shared_irq_count);
+ sync_global_to_guest(vm, targs);
run_vcpu(vcpu);
@@ -618,28 +663,38 @@ static void test_vm_events_overflow(void *guest_code)
static void test_print_help(char *name)
{
- pr_info("Usage: %s [-h] [-d <test name>]\n", name);
- pr_info("\t-d: Test to disable. Available tests are 'basic', 'events', 'snapshot', 'overflow'\n");
+ pr_info("Usage: %s [-h] [-t <test name>] [-n <number of LCOFI interrupt for overflow test>]\n",
+ name);
+ pr_info("\t-t: Test to run (default all). Available tests are 'basic', 'events', 'snapshot', 'overflow'\n");
+ pr_info("\t-n: Number of LCOFI interrupt to trigger for each event in overflow test (default: %d)\n",
+ SBI_PMU_OVERFLOW_IRQNUM_DEFAULT);
pr_info("\t-h: print this help screen\n");
}
static bool parse_args(int argc, char *argv[])
{
int opt;
+ int temp_disabled_tests = SBI_PMU_TEST_BASIC | SBI_PMU_TEST_EVENTS | SBI_PMU_TEST_SNAPSHOT |
+ SBI_PMU_TEST_OVERFLOW;
+ int overflow_interrupts = 0;
- while ((opt = getopt(argc, argv, "hd:")) != -1) {
+ while ((opt = getopt(argc, argv, "ht:n:")) != -1) {
switch (opt) {
- case 'd':
+ case 't':
if (!strncmp("basic", optarg, 5))
- disabled_tests |= SBI_PMU_TEST_BASIC;
+ temp_disabled_tests &= ~SBI_PMU_TEST_BASIC;
else if (!strncmp("events", optarg, 6))
- disabled_tests |= SBI_PMU_TEST_EVENTS;
+ temp_disabled_tests &= ~SBI_PMU_TEST_EVENTS;
else if (!strncmp("snapshot", optarg, 8))
- disabled_tests |= SBI_PMU_TEST_SNAPSHOT;
+ temp_disabled_tests &= ~SBI_PMU_TEST_SNAPSHOT;
else if (!strncmp("overflow", optarg, 8))
- disabled_tests |= SBI_PMU_TEST_OVERFLOW;
+ temp_disabled_tests &= ~SBI_PMU_TEST_OVERFLOW;
else
goto done;
+ targs.disabled_tests = temp_disabled_tests;
+ break;
+ case 'n':
+ overflow_interrupts = atoi_positive("Number of LCOFI", optarg);
break;
case 'h':
default:
@@ -647,6 +702,15 @@ static bool parse_args(int argc, char *argv[])
}
}
+ if (overflow_interrupts > 0) {
+ if (targs.disabled_tests & SBI_PMU_TEST_OVERFLOW) {
+ pr_info("-n option is only available for overflow test\n");
+ goto done;
+ } else {
+ targs.overflow_irqnum = overflow_interrupts;
+ }
+ }
+
return true;
done:
test_print_help(argv[0]);
@@ -655,25 +719,28 @@ done:
int main(int argc, char *argv[])
{
+ targs.disabled_tests = 0;
+ targs.overflow_irqnum = SBI_PMU_OVERFLOW_IRQNUM_DEFAULT;
+
if (!parse_args(argc, argv))
exit(KSFT_SKIP);
- if (!(disabled_tests & SBI_PMU_TEST_BASIC)) {
+ if (!(targs.disabled_tests & SBI_PMU_TEST_BASIC)) {
test_vm_basic_test(test_pmu_basic_sanity);
pr_info("SBI PMU basic test : PASS\n");
}
- if (!(disabled_tests & SBI_PMU_TEST_EVENTS)) {
+ if (!(targs.disabled_tests & SBI_PMU_TEST_EVENTS)) {
test_vm_events_test(test_pmu_events);
pr_info("SBI PMU event verification test : PASS\n");
}
- if (!(disabled_tests & SBI_PMU_TEST_SNAPSHOT)) {
+ if (!(targs.disabled_tests & SBI_PMU_TEST_SNAPSHOT)) {
test_vm_events_snapshot_test(test_pmu_events_snaphost);
pr_info("SBI PMU event verification with snapshot test : PASS\n");
}
- if (!(disabled_tests & SBI_PMU_TEST_OVERFLOW)) {
+ if (!(targs.disabled_tests & SBI_PMU_TEST_OVERFLOW)) {
test_vm_events_overflow(test_pmu_events_overflow);
pr_info("SBI PMU event verification with overflow test : PASS\n");
}
diff --git a/tools/testing/selftests/kvm/rseq_test.c b/tools/testing/selftests/kvm/rseq_test.c
index e5898678bfab..f80ad6b47d16 100644
--- a/tools/testing/selftests/kvm/rseq_test.c
+++ b/tools/testing/selftests/kvm/rseq_test.c
@@ -196,26 +196,29 @@ static void calc_min_max_cpu(void)
static void help(const char *name)
{
puts("");
- printf("usage: %s [-h] [-u]\n", name);
+ printf("usage: %s [-h] [-u] [-l latency]\n", name);
printf(" -u: Don't sanity check the number of successful KVM_RUNs\n");
+ printf(" -l: Set /dev/cpu_dma_latency to suppress deep sleep states\n");
puts("");
exit(0);
}
int main(int argc, char *argv[])
{
+ int r, i, snapshot, opt, fd = -1, latency = -1;
bool skip_sanity_check = false;
- int r, i, snapshot;
struct kvm_vm *vm;
struct kvm_vcpu *vcpu;
u32 cpu, rseq_cpu;
- int opt;
- while ((opt = getopt(argc, argv, "hu")) != -1) {
+ while ((opt = getopt(argc, argv, "hl:u")) != -1) {
switch (opt) {
case 'u':
skip_sanity_check = true;
break;
+ case 'l':
+ latency = atoi_paranoid(optarg);
+ break;
case 'h':
default:
help(argv[0]);
@@ -243,6 +246,20 @@ int main(int argc, char *argv[])
pthread_create(&migration_thread, NULL, migration_worker,
(void *)(unsigned long)syscall(SYS_gettid));
+ if (latency >= 0) {
+ /*
+ * Writes to cpu_dma_latency persist only while the file is
+ * open, i.e. it allows userspace to provide guaranteed latency
+ * while running a workload. Keep the file open until the test
+ * completes, otherwise writing cpu_dma_latency is meaningless.
+ */
+ fd = open("/dev/cpu_dma_latency", O_RDWR);
+ TEST_ASSERT(fd >= 0, __KVM_SYSCALL_ERROR("open() /dev/cpu_dma_latency", fd));
+
+ r = write(fd, &latency, 4);
+ TEST_ASSERT(r >= 1, "Error setting /dev/cpu_dma_latency");
+ }
+
for (i = 0; !done; i++) {
vcpu_run(vcpu);
TEST_ASSERT(get_ucall(vcpu, NULL) == UCALL_SYNC,
@@ -278,6 +295,9 @@ int main(int argc, char *argv[])
"rseq CPU = %d, sched CPU = %d", rseq_cpu, cpu);
}
+ if (fd > 0)
+ close(fd);
+
/*
* Sanity check that the test was able to enter the guest a reasonable
* number of times, e.g. didn't get stalled too often/long waiting for
@@ -293,8 +313,8 @@ int main(int argc, char *argv[])
TEST_ASSERT(skip_sanity_check || i > (NR_TASK_MIGRATIONS / 2),
"Only performed %d KVM_RUNs, task stalled too much?\n\n"
" Try disabling deep sleep states to reduce CPU wakeup latency,\n"
- " e.g. via cpuidle.off=1 or setting /dev/cpu_dma_latency to '0',\n"
- " or run with -u to disable this sanity check.", i);
+ " e.g. via cpuidle.off=1 or via -l <latency>, or run with -u to\n"
+ " disable this sanity check.", i);
pthread_join(migration_thread, NULL);
diff --git a/tools/testing/selftests/kvm/s390/cmma_test.c b/tools/testing/selftests/kvm/s390/cmma_test.c
index 85cc8c18d6e7..e39a724fe860 100644
--- a/tools/testing/selftests/kvm/s390/cmma_test.c
+++ b/tools/testing/selftests/kvm/s390/cmma_test.c
@@ -145,7 +145,7 @@ static void finish_vm_setup(struct kvm_vm *vm)
slot0 = memslot2region(vm, 0);
ucall_init(vm, slot0->region.guest_phys_addr + slot0->region.memory_size);
- kvm_arch_vm_post_create(vm);
+ kvm_arch_vm_post_create(vm, 0);
}
static struct kvm_vm *create_vm_two_memslots(void)
diff --git a/tools/testing/selftests/kvm/s390/cpumodel_subfuncs_test.c b/tools/testing/selftests/kvm/s390/cpumodel_subfuncs_test.c
index 27255880dabd..aded795d42be 100644
--- a/tools/testing/selftests/kvm/s390/cpumodel_subfuncs_test.c
+++ b/tools/testing/selftests/kvm/s390/cpumodel_subfuncs_test.c
@@ -291,7 +291,7 @@ int main(int argc, char *argv[])
ksft_test_result_pass("%s\n", testlist[idx].subfunc_name);
free(array);
} else {
- ksft_test_result_skip("%s feature is not avaialable\n",
+ ksft_test_result_skip("%s feature is not available\n",
testlist[idx].subfunc_name);
}
}
diff --git a/tools/testing/selftests/kvm/s390/debug_test.c b/tools/testing/selftests/kvm/s390/debug_test.c
index ad8095968601..751c61c0f056 100644
--- a/tools/testing/selftests/kvm/s390/debug_test.c
+++ b/tools/testing/selftests/kvm/s390/debug_test.c
@@ -17,7 +17,7 @@ asm("int_handler:\n"
"j .\n");
static struct kvm_vm *test_step_int_1(struct kvm_vcpu **vcpu, void *guest_code,
- size_t new_psw_off, uint64_t *new_psw)
+ size_t new_psw_off, u64 *new_psw)
{
struct kvm_guest_debug debug = {};
struct kvm_regs regs;
@@ -27,7 +27,7 @@ static struct kvm_vm *test_step_int_1(struct kvm_vcpu **vcpu, void *guest_code,
vm = vm_create_with_one_vcpu(vcpu, guest_code);
lowcore = addr_gpa2hva(vm, 0);
new_psw[0] = (*vcpu)->run->psw_mask;
- new_psw[1] = (uint64_t)int_handler;
+ new_psw[1] = (u64)int_handler;
memcpy(lowcore + new_psw_off, new_psw, 16);
vcpu_regs_get(*vcpu, &regs);
regs.gprs[2] = -1;
@@ -42,7 +42,7 @@ static struct kvm_vm *test_step_int_1(struct kvm_vcpu **vcpu, void *guest_code,
static void test_step_int(void *guest_code, size_t new_psw_off)
{
struct kvm_vcpu *vcpu;
- uint64_t new_psw[2];
+ u64 new_psw[2];
struct kvm_vm *vm;
vm = test_step_int_1(&vcpu, guest_code, new_psw_off, new_psw);
@@ -79,7 +79,7 @@ static void test_step_pgm_diag(void)
.u.pgm.code = PGM_SPECIFICATION,
};
struct kvm_vcpu *vcpu;
- uint64_t new_psw[2];
+ u64 new_psw[2];
struct kvm_vm *vm;
vm = test_step_int_1(&vcpu, test_step_pgm_diag_guest_code,
diff --git a/tools/testing/selftests/kvm/s390/irq_routing.c b/tools/testing/selftests/kvm/s390/irq_routing.c
new file mode 100644
index 000000000000..f3839284ac08
--- /dev/null
+++ b/tools/testing/selftests/kvm/s390/irq_routing.c
@@ -0,0 +1,75 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * IRQ routing offset tests.
+ *
+ * Copyright IBM Corp. 2026
+ *
+ * Authors:
+ * Janosch Frank <frankja@linux.ibm.com>
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "kselftest.h"
+#include "ucall_common.h"
+
+extern char guest_code[];
+asm("guest_code:\n"
+ "diag %r0,%r0,0\n"
+ "j .\n");
+
+static void test(void)
+{
+ struct kvm_irq_routing *routing;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ gpa_t mem;
+ int ret;
+
+ struct kvm_irq_routing_entry ue = {
+ .type = KVM_IRQ_ROUTING_S390_ADAPTER,
+ .gsi = 1,
+ };
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ mem = vm_phy_pages_alloc(vm, 2, 4096 * 42, 0);
+
+ routing = kvm_gsi_routing_create();
+ routing->nr = 1;
+ routing->entries[0] = ue;
+ routing->entries[0].u.adapter.summary_addr = (uintptr_t)mem;
+ routing->entries[0].u.adapter.ind_addr = (uintptr_t)mem;
+
+ routing->entries[0].u.adapter.summary_offset = 4096 * 8;
+ ret = __vm_ioctl(vm, KVM_SET_GSI_ROUTING, routing);
+ ksft_test_result(ret == -1 && errno == EINVAL, "summary offset outside of page\n");
+
+ routing->entries[0].u.adapter.summary_offset -= 4;
+ ret = __vm_ioctl(vm, KVM_SET_GSI_ROUTING, routing);
+ ksft_test_result(ret == 0, "summary offset inside of page\n");
+
+ routing->entries[0].u.adapter.ind_offset = 4096 * 8;
+ ret = __vm_ioctl(vm, KVM_SET_GSI_ROUTING, routing);
+ ksft_test_result(ret == -1 && errno == EINVAL, "ind offset outside of page\n");
+
+ routing->entries[0].u.adapter.ind_offset -= 4;
+ ret = __vm_ioctl(vm, KVM_SET_GSI_ROUTING, routing);
+ ksft_test_result(ret == 0, "ind offset inside of page\n");
+
+ kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_IRQ_ROUTING));
+
+ ksft_print_header();
+ ksft_set_plan(4);
+ test();
+
+ ksft_finished(); /* Print results and exit() accordingly */
+}
diff --git a/tools/testing/selftests/kvm/s390/keyop.c b/tools/testing/selftests/kvm/s390/keyop.c
new file mode 100644
index 000000000000..c7805e87d12c
--- /dev/null
+++ b/tools/testing/selftests/kvm/s390/keyop.c
@@ -0,0 +1,299 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Test for s390x KVM_S390_KEYOP
+ *
+ * Copyright IBM Corp. 2026
+ *
+ * Authors:
+ * Claudio Imbrenda <imbrenda@linux.ibm.com>
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include <linux/bits.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "kselftest.h"
+#include "processor.h"
+
+#define BUF_PAGES 128UL
+#define GUEST_PAGES 256UL
+
+#define BUF_START_GFN (GUEST_PAGES - BUF_PAGES)
+#define BUF_START_ADDR (BUF_START_GFN << PAGE_SHIFT)
+
+#define KEY_BITS_ACC 0xf0
+#define KEY_BIT_F 0x08
+#define KEY_BIT_R 0x04
+#define KEY_BIT_C 0x02
+
+#define KEY_BITS_RC (KEY_BIT_R | KEY_BIT_C)
+#define KEY_BITS_ALL (KEY_BITS_ACC | KEY_BIT_F | KEY_BITS_RC)
+
+static unsigned char tmp[BUF_PAGES];
+static unsigned char old[BUF_PAGES];
+static unsigned char expected[BUF_PAGES];
+
+static int _get_skeys(struct kvm_vcpu *vcpu, unsigned char skeys[])
+{
+ struct kvm_s390_skeys skeys_ioctl = {
+ .start_gfn = BUF_START_GFN,
+ .count = BUF_PAGES,
+ .skeydata_addr = (unsigned long)skeys,
+ };
+
+ return __vm_ioctl(vcpu->vm, KVM_S390_GET_SKEYS, &skeys_ioctl);
+}
+
+static void get_skeys(struct kvm_vcpu *vcpu, unsigned char skeys[])
+{
+ int r = _get_skeys(vcpu, skeys);
+
+ TEST_ASSERT(!r, "Failed to get storage keys, r=%d", r);
+}
+
+static void set_skeys(struct kvm_vcpu *vcpu, unsigned char skeys[])
+{
+ struct kvm_s390_skeys skeys_ioctl = {
+ .start_gfn = BUF_START_GFN,
+ .count = BUF_PAGES,
+ .skeydata_addr = (unsigned long)skeys,
+ };
+ int r;
+
+ r = __vm_ioctl(vcpu->vm, KVM_S390_SET_SKEYS, &skeys_ioctl);
+ TEST_ASSERT(!r, "Failed to set storage keys, r=%d", r);
+}
+
+static int do_keyop(struct kvm_vcpu *vcpu, int op, unsigned long page_idx, unsigned char skey)
+{
+ struct kvm_s390_keyop keyop = {
+ .guest_addr = BUF_START_ADDR + page_idx * PAGE_SIZE,
+ .key = skey,
+ .operation = op,
+ };
+ int r;
+
+ r = __vm_ioctl(vcpu->vm, KVM_S390_KEYOP, &keyop);
+ TEST_ASSERT(!r, "Failed to perform keyop, r=%d", r);
+ TEST_ASSERT((keyop.key & 1) == 0,
+ "Last bit of key is 1, should be 0! page %lu, new key=%#x, old key=%#x",
+ page_idx, skey, keyop.key);
+
+ return keyop.key;
+}
+
+static void fault_in_buffer(struct kvm_vcpu *vcpu, int where, int cur_loc)
+{
+ unsigned long i;
+ int r;
+
+ if (where != cur_loc)
+ return;
+
+ for (i = 0; i < BUF_PAGES; i++) {
+ r = ioctl(vcpu->fd, KVM_S390_VCPU_FAULT, BUF_START_ADDR + i * PAGE_SIZE);
+ TEST_ASSERT(!r, "Faulting in buffer page %lu, r=%d", i, r);
+ }
+}
+
+static inline void set_pattern(unsigned char skeys[])
+{
+ int i;
+
+ for (i = 0; i < BUF_PAGES; i++)
+ skeys[i] = i << 1;
+}
+
+static void dump_sk(const unsigned char skeys[], const char *descr)
+{
+ int i, j;
+
+ fprintf(stderr, "# %s:\n", descr);
+ for (i = 0; i < BUF_PAGES; i += 32) {
+ fprintf(stderr, "# %3d: ", i);
+ for (j = 0; j < 32; j++)
+ fprintf(stderr, "%02x ", skeys[i + j]);
+ fprintf(stderr, "\n");
+ }
+}
+
+static inline void compare(const unsigned char what[], const unsigned char expected[],
+ const char *descr, int fault_in_loc)
+{
+ int i;
+
+ for (i = 0; i < BUF_PAGES; i++) {
+ if (expected[i] != what[i]) {
+ dump_sk(expected, "Expected");
+ dump_sk(what, "Got");
+ }
+ TEST_ASSERT(expected[i] == what[i],
+ "%s! fault-in location %d, page %d, expected %#x, got %#x",
+ descr, fault_in_loc, i, expected[i], what[i]);
+ }
+}
+
+static inline void clear_all(void)
+{
+ memset(tmp, 0, BUF_PAGES);
+ memset(old, 0, BUF_PAGES);
+ memset(expected, 0, BUF_PAGES);
+}
+
+static void test_init(struct kvm_vcpu *vcpu, int fault_in)
+{
+ /* Set all storage keys to zero */
+ fault_in_buffer(vcpu, fault_in, 1);
+ set_skeys(vcpu, expected);
+
+ fault_in_buffer(vcpu, fault_in, 2);
+ get_skeys(vcpu, tmp);
+ compare(tmp, expected, "Setting keys not zero", fault_in);
+
+ /* Set storage keys to a sequential pattern */
+ fault_in_buffer(vcpu, fault_in, 3);
+ set_pattern(expected);
+ set_skeys(vcpu, expected);
+
+ fault_in_buffer(vcpu, fault_in, 4);
+ get_skeys(vcpu, tmp);
+ compare(tmp, expected, "Setting storage keys failed", fault_in);
+}
+
+static void test_rrbe(struct kvm_vcpu *vcpu, int fault_in)
+{
+ unsigned char k;
+ int i;
+
+ /* Set storage keys to a sequential pattern */
+ fault_in_buffer(vcpu, fault_in, 1);
+ set_pattern(expected);
+ set_skeys(vcpu, expected);
+
+ /* Call the RRBE KEYOP ioctl on each page and verify the result */
+ fault_in_buffer(vcpu, fault_in, 2);
+ for (i = 0; i < BUF_PAGES; i++) {
+ k = do_keyop(vcpu, KVM_S390_KEYOP_RRBE, i, 0xff);
+ TEST_ASSERT((expected[i] & KEY_BITS_RC) == k,
+ "Old R or C value mismatch! expected: %#x, got %#x",
+ expected[i] & KEY_BITS_RC, k);
+ if (i == BUF_PAGES / 2)
+ fault_in_buffer(vcpu, fault_in, 3);
+ }
+
+ for (i = 0; i < BUF_PAGES; i++)
+ expected[i] &= ~KEY_BIT_R;
+
+ /* Verify that only the R bit has been cleared */
+ fault_in_buffer(vcpu, fault_in, 4);
+ get_skeys(vcpu, tmp);
+ compare(tmp, expected, "New value mismatch", fault_in);
+}
+
+static void test_iske(struct kvm_vcpu *vcpu, int fault_in)
+{
+ int i;
+
+ /* Set storage keys to a sequential pattern */
+ fault_in_buffer(vcpu, fault_in, 1);
+ set_pattern(expected);
+ set_skeys(vcpu, expected);
+
+ /* Call the ISKE KEYOP ioctl on each page and verify the result */
+ fault_in_buffer(vcpu, fault_in, 2);
+ for (i = 0; i < BUF_PAGES; i++) {
+ tmp[i] = do_keyop(vcpu, KVM_S390_KEYOP_ISKE, i, 0xff);
+ if (i == BUF_PAGES / 2)
+ fault_in_buffer(vcpu, fault_in, 3);
+ }
+ compare(tmp, expected, "Old value mismatch", fault_in);
+
+ /* Check storage keys have not changed */
+ fault_in_buffer(vcpu, fault_in, 4);
+ get_skeys(vcpu, tmp);
+ compare(tmp, expected, "Storage keys values changed", fault_in);
+}
+
+static void test_sske(struct kvm_vcpu *vcpu, int fault_in)
+{
+ int i;
+
+ /* Set storage keys to a sequential pattern */
+ fault_in_buffer(vcpu, fault_in, 1);
+ set_pattern(tmp);
+ set_skeys(vcpu, tmp);
+
+ /* Call the SSKE KEYOP ioctl on each page and verify the result */
+ fault_in_buffer(vcpu, fault_in, 2);
+ for (i = 0; i < BUF_PAGES; i++) {
+ expected[i] = ~tmp[i] & KEY_BITS_ALL;
+ /* Set the new storage keys to be the bit-inversion of the previous ones */
+ old[i] = do_keyop(vcpu, KVM_S390_KEYOP_SSKE, i, expected[i] | 1);
+ if (i == BUF_PAGES / 2)
+ fault_in_buffer(vcpu, fault_in, 3);
+ }
+ compare(old, tmp, "Old value mismatch", fault_in);
+
+ /* Verify that the storage keys have been set correctly */
+ fault_in_buffer(vcpu, fault_in, 4);
+ get_skeys(vcpu, tmp);
+ compare(tmp, expected, "New value mismatch", fault_in);
+}
+
+static struct testdef {
+ const char *name;
+ void (*test)(struct kvm_vcpu *vcpu, int fault_in_location);
+ int n_fault_in_locations;
+} testplan[] = {
+ { "Initialization", test_init, 5 },
+ { "RRBE", test_rrbe, 5 },
+ { "ISKE", test_iske, 5 },
+ { "SSKE", test_sske, 5 },
+};
+
+static void run_test(void (*the_test)(struct kvm_vcpu *, int), int fault_in_location)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ int r;
+
+ vm = vm_create_barebones();
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0, 0, GUEST_PAGES, 0);
+ vcpu = __vm_vcpu_add(vm, 0);
+
+ r = _get_skeys(vcpu, tmp);
+ TEST_ASSERT(r == KVM_S390_GET_SKEYS_NONE,
+ "Storage keys are not disabled initially, r=%d", r);
+
+ clear_all();
+
+ the_test(vcpu, fault_in_location);
+
+ kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+ int i, f;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_S390_KEYOP));
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_S390_UCONTROL));
+
+ ksft_print_header();
+ for (i = 0, f = 0; i < ARRAY_SIZE(testplan); i++)
+ f += testplan[i].n_fault_in_locations;
+ ksft_set_plan(f);
+
+ for (i = 0; i < ARRAY_SIZE(testplan); i++) {
+ for (f = 0; f < testplan[i].n_fault_in_locations; f++) {
+ run_test(testplan[i].test, f);
+ ksft_test_result_pass("%s (fault-in location %d)\n", testplan[i].name, f);
+ }
+ }
+
+ ksft_finished(); /* Print results and exit() accordingly */
+}
diff --git a/tools/testing/selftests/kvm/s390/memop.c b/tools/testing/selftests/kvm/s390/memop.c
index 4374b4cd2a80..0244848621b3 100644
--- a/tools/testing/selftests/kvm/s390/memop.c
+++ b/tools/testing/selftests/kvm/s390/memop.c
@@ -34,7 +34,7 @@ enum mop_access_mode {
struct mop_desc {
uintptr_t gaddr;
uintptr_t gaddr_v;
- uint64_t set_flags;
+ u64 set_flags;
unsigned int f_check : 1;
unsigned int f_inject : 1;
unsigned int f_key : 1;
@@ -42,19 +42,19 @@ struct mop_desc {
unsigned int _set_flags : 1;
unsigned int _sida_offset : 1;
unsigned int _ar : 1;
- uint32_t size;
+ u32 size;
enum mop_target target;
enum mop_access_mode mode;
void *buf;
- uint32_t sida_offset;
+ u32 sida_offset;
void *old;
- uint8_t old_value[16];
+ u8 old_value[16];
bool *cmpxchg_success;
- uint8_t ar;
- uint8_t key;
+ u8 ar;
+ u8 key;
};
-const uint8_t NO_KEY = 0xff;
+const u8 NO_KEY = 0xff;
static struct kvm_s390_mem_op ksmo_from_desc(struct mop_desc *desc)
{
@@ -85,7 +85,7 @@ static struct kvm_s390_mem_op ksmo_from_desc(struct mop_desc *desc)
ksmo.op = KVM_S390_MEMOP_ABSOLUTE_WRITE;
if (desc->mode == CMPXCHG) {
ksmo.op = KVM_S390_MEMOP_ABSOLUTE_CMPXCHG;
- ksmo.old_addr = (uint64_t)desc->old;
+ ksmo.old_addr = (u64)desc->old;
memcpy(desc->old_value, desc->old, desc->size);
}
break;
@@ -230,8 +230,8 @@ static void memop_ioctl(struct test_info info, struct kvm_s390_mem_op *ksmo,
#define CR0_FETCH_PROTECTION_OVERRIDE (1UL << (63 - 38))
#define CR0_STORAGE_PROTECTION_OVERRIDE (1UL << (63 - 39))
-static uint8_t __aligned(PAGE_SIZE) mem1[65536];
-static uint8_t __aligned(PAGE_SIZE) mem2[65536];
+static u8 __aligned(PAGE_SIZE) mem1[65536];
+static u8 __aligned(PAGE_SIZE) mem2[65536];
struct test_default {
struct kvm_vm *kvm_vm;
@@ -296,7 +296,7 @@ static void prepare_mem12(void)
TEST_ASSERT(!memcmp(p1, p2, size), "Memory contents do not match!")
static void default_write_read(struct test_info copy_cpu, struct test_info mop_cpu,
- enum mop_target mop_target, uint32_t size, uint8_t key)
+ enum mop_target mop_target, u32 size, u8 key)
{
prepare_mem12();
CHECK_N_DO(MOP, mop_cpu, mop_target, WRITE, mem1, size,
@@ -308,7 +308,7 @@ static void default_write_read(struct test_info copy_cpu, struct test_info mop_c
}
static void default_read(struct test_info copy_cpu, struct test_info mop_cpu,
- enum mop_target mop_target, uint32_t size, uint8_t key)
+ enum mop_target mop_target, u32 size, u8 key)
{
prepare_mem12();
CHECK_N_DO(MOP, mop_cpu, mop_target, WRITE, mem1, size, GADDR_V(mem1));
@@ -318,12 +318,12 @@ static void default_read(struct test_info copy_cpu, struct test_info mop_cpu,
ASSERT_MEM_EQ(mem1, mem2, size);
}
-static void default_cmpxchg(struct test_default *test, uint8_t key)
+static void default_cmpxchg(struct test_default *test, u8 key)
{
for (int size = 1; size <= 16; size *= 2) {
for (int offset = 0; offset < 16; offset += size) {
- uint8_t __aligned(16) new[16] = {};
- uint8_t __aligned(16) old[16];
+ u8 __aligned(16) new[16] = {};
+ u8 __aligned(16) old[16];
bool succ;
prepare_mem12();
@@ -400,7 +400,7 @@ static void test_copy_access_register(void)
kvm_vm_free(t.kvm_vm);
}
-static void set_storage_key_range(void *addr, size_t len, uint8_t key)
+static void set_storage_key_range(void *addr, size_t len, u8 key)
{
uintptr_t _addr, abs, i;
int not_mapped = 0;
@@ -483,13 +483,13 @@ static __uint128_t cut_to_size(int size, __uint128_t val)
{
switch (size) {
case 1:
- return (uint8_t)val;
+ return (u8)val;
case 2:
- return (uint16_t)val;
+ return (u16)val;
case 4:
- return (uint32_t)val;
+ return (u32)val;
case 8:
- return (uint64_t)val;
+ return (u64)val;
case 16:
return val;
}
@@ -501,10 +501,10 @@ static bool popcount_eq(__uint128_t a, __uint128_t b)
{
unsigned int count_a, count_b;
- count_a = __builtin_popcountl((uint64_t)(a >> 64)) +
- __builtin_popcountl((uint64_t)a);
- count_b = __builtin_popcountl((uint64_t)(b >> 64)) +
- __builtin_popcountl((uint64_t)b);
+ count_a = __builtin_popcountl((u64)(a >> 64)) +
+ __builtin_popcountl((u64)a);
+ count_b = __builtin_popcountl((u64)(b >> 64)) +
+ __builtin_popcountl((u64)b);
return count_a == count_b;
}
@@ -553,7 +553,7 @@ static __uint128_t permutate_bits(bool guest, int i, int size, __uint128_t old)
if (swap) {
int i, j;
__uint128_t new;
- uint8_t byte0, byte1;
+ u8 byte0, byte1;
rand = rand * 3 + 1;
i = rand % size;
@@ -585,28 +585,28 @@ static bool _cmpxchg(int size, void *target, __uint128_t *old_addr, __uint128_t
switch (size) {
case 4: {
- uint32_t old = *old_addr;
+ u32 old = *old_addr;
asm volatile ("cs %[old],%[new],%[address]"
: [old] "+d" (old),
- [address] "+Q" (*(uint32_t *)(target))
- : [new] "d" ((uint32_t)new)
+ [address] "+Q" (*(u32 *)(target))
+ : [new] "d" ((u32)new)
: "cc"
);
- ret = old == (uint32_t)*old_addr;
+ ret = old == (u32)*old_addr;
*old_addr = old;
return ret;
}
case 8: {
- uint64_t old = *old_addr;
+ u64 old = *old_addr;
asm volatile ("csg %[old],%[new],%[address]"
: [old] "+d" (old),
- [address] "+Q" (*(uint64_t *)(target))
- : [new] "d" ((uint64_t)new)
+ [address] "+Q" (*(u64 *)(target))
+ : [new] "d" ((u64)new)
: "cc"
);
- ret = old == (uint64_t)*old_addr;
+ ret = old == (u64)*old_addr;
*old_addr = old;
return ret;
}
@@ -811,10 +811,10 @@ static void test_errors_cmpxchg_key(void)
static void test_termination(void)
{
struct test_default t = test_default_init(guest_error_key);
- uint64_t prefix;
- uint64_t teid;
- uint64_t teid_mask = BIT(63 - 56) | BIT(63 - 60) | BIT(63 - 61);
- uint64_t psw[2];
+ u64 prefix;
+ u64 teid;
+ u64 teid_mask = BIT(63 - 56) | BIT(63 - 60) | BIT(63 - 61);
+ u64 psw[2];
HOST_SYNC(t.vcpu, STAGE_INITED);
HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
@@ -855,7 +855,7 @@ static void test_errors_key_storage_prot_override(void)
kvm_vm_free(t.kvm_vm);
}
-const uint64_t last_page_addr = -PAGE_SIZE;
+const u64 last_page_addr = -PAGE_SIZE;
static void guest_copy_key_fetch_prot_override(void)
{
@@ -878,10 +878,10 @@ static void guest_copy_key_fetch_prot_override(void)
static void test_copy_key_fetch_prot_override(void)
{
struct test_default t = test_default_init(guest_copy_key_fetch_prot_override);
- vm_vaddr_t guest_0_page, guest_last_page;
+ gva_t guest_0_page, guest_last_page;
- guest_0_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, 0);
- guest_last_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, last_page_addr);
+ guest_0_page = vm_alloc(t.kvm_vm, PAGE_SIZE, 0);
+ guest_last_page = vm_alloc(t.kvm_vm, PAGE_SIZE, last_page_addr);
if (guest_0_page != 0 || guest_last_page != last_page_addr) {
print_skip("did not allocate guest pages at required positions");
goto out;
@@ -917,10 +917,10 @@ out:
static void test_errors_key_fetch_prot_override_not_enabled(void)
{
struct test_default t = test_default_init(guest_copy_key_fetch_prot_override);
- vm_vaddr_t guest_0_page, guest_last_page;
+ gva_t guest_0_page, guest_last_page;
- guest_0_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, 0);
- guest_last_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, last_page_addr);
+ guest_0_page = vm_alloc(t.kvm_vm, PAGE_SIZE, 0);
+ guest_last_page = vm_alloc(t.kvm_vm, PAGE_SIZE, last_page_addr);
if (guest_0_page != 0 || guest_last_page != last_page_addr) {
print_skip("did not allocate guest pages at required positions");
goto out;
@@ -938,10 +938,10 @@ out:
static void test_errors_key_fetch_prot_override_enabled(void)
{
struct test_default t = test_default_init(guest_copy_key_fetch_prot_override);
- vm_vaddr_t guest_0_page, guest_last_page;
+ gva_t guest_0_page, guest_last_page;
- guest_0_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, 0);
- guest_last_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, last_page_addr);
+ guest_0_page = vm_alloc(t.kvm_vm, PAGE_SIZE, 0);
+ guest_last_page = vm_alloc(t.kvm_vm, PAGE_SIZE, last_page_addr);
if (guest_0_page != 0 || guest_last_page != last_page_addr) {
print_skip("did not allocate guest pages at required positions");
goto out;
diff --git a/tools/testing/selftests/kvm/s390/resets.c b/tools/testing/selftests/kvm/s390/resets.c
index b58f75b381e5..e3c7a2f148f9 100644
--- a/tools/testing/selftests/kvm/s390/resets.c
+++ b/tools/testing/selftests/kvm/s390/resets.c
@@ -20,7 +20,7 @@
struct kvm_s390_irq buf[ARBITRARY_NON_ZERO_VCPU_ID + LOCAL_IRQS];
-static uint8_t regs_null[512];
+static u8 regs_null[512];
static void guest_code_initial(void)
{
@@ -57,9 +57,9 @@ static void guest_code_initial(void)
);
}
-static void test_one_reg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t value)
+static void test_one_reg(struct kvm_vcpu *vcpu, u64 id, u64 value)
{
- uint64_t eval_reg;
+ u64 eval_reg;
eval_reg = vcpu_get_reg(vcpu, id);
TEST_ASSERT(eval_reg == value, "value == 0x%lx", value);
diff --git a/tools/testing/selftests/kvm/s390/shared_zeropage_test.c b/tools/testing/selftests/kvm/s390/shared_zeropage_test.c
index bba0d9a6dcc8..478381e6f84e 100644
--- a/tools/testing/selftests/kvm/s390/shared_zeropage_test.c
+++ b/tools/testing/selftests/kvm/s390/shared_zeropage_test.c
@@ -4,16 +4,15 @@
*
* Copyright (C) 2024, Red Hat, Inc.
*/
-#include <sys/mman.h>
-
#include <linux/fs.h>
#include "test_util.h"
+#include "kvm_syscalls.h"
#include "kvm_util.h"
#include "kselftest.h"
#include "ucall_common.h"
-static void set_storage_key(void *addr, uint8_t skey)
+static void set_storage_key(void *addr, u8 skey)
{
asm volatile("sske %0,%1" : : "d" (skey), "a" (addr));
}
diff --git a/tools/testing/selftests/kvm/s390/tprot.c b/tools/testing/selftests/kvm/s390/tprot.c
index 12d5e1cb62e3..d86179827a18 100644
--- a/tools/testing/selftests/kvm/s390/tprot.c
+++ b/tools/testing/selftests/kvm/s390/tprot.c
@@ -4,8 +4,8 @@
*
* Copyright IBM Corp. 2021
*/
-#include <sys/mman.h>
#include "test_util.h"
+#include "kvm_syscalls.h"
#include "kvm_util.h"
#include "kselftest.h"
#include "ucall_common.h"
@@ -14,12 +14,12 @@
#define CR0_FETCH_PROTECTION_OVERRIDE (1UL << (63 - 38))
#define CR0_STORAGE_PROTECTION_OVERRIDE (1UL << (63 - 39))
-static __aligned(PAGE_SIZE) uint8_t pages[2][PAGE_SIZE];
-static uint8_t *const page_store_prot = pages[0];
-static uint8_t *const page_fetch_prot = pages[1];
+static __aligned(PAGE_SIZE) u8 pages[2][PAGE_SIZE];
+static u8 *const page_store_prot = pages[0];
+static u8 *const page_fetch_prot = pages[1];
/* Nonzero return value indicates that address not mapped */
-static int set_storage_key(void *addr, uint8_t key)
+static int set_storage_key(void *addr, u8 key)
{
int not_mapped = 0;
@@ -44,9 +44,9 @@ enum permission {
TRANSL_UNAVAIL = 3,
};
-static enum permission test_protection(void *addr, uint8_t key)
+static enum permission test_protection(void *addr, u8 key)
{
- uint64_t mask;
+ u64 mask;
asm volatile (
"tprot %[addr], 0(%[key])\n"
@@ -72,7 +72,7 @@ enum stage {
struct test {
enum stage stage;
void *addr;
- uint8_t key;
+ u8 key;
enum permission expected;
} tests[] = {
/*
@@ -146,7 +146,7 @@ static enum stage perform_next_stage(int *i, bool mapped_0)
/*
* Some fetch protection override tests require that page 0
* be mapped, however, when the hosts tries to map that page via
- * vm_vaddr_alloc, it may happen that some other page gets mapped
+ * vm_alloc, it may happen that some other page gets mapped
* instead.
* In order to skip these tests we detect this inside the guest
*/
@@ -207,7 +207,7 @@ int main(int argc, char *argv[])
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
struct kvm_run *run;
- vm_vaddr_t guest_0_page;
+ gva_t guest_0_page;
ksft_print_header();
ksft_set_plan(STAGE_END);
@@ -216,10 +216,10 @@ int main(int argc, char *argv[])
run = vcpu->run;
HOST_SYNC(vcpu, STAGE_INIT_SIMPLE);
- mprotect(addr_gva2hva(vm, (vm_vaddr_t)pages), PAGE_SIZE * 2, PROT_READ);
+ mprotect(addr_gva2hva(vm, (gva_t)pages), PAGE_SIZE * 2, PROT_READ);
HOST_SYNC(vcpu, TEST_SIMPLE);
- guest_0_page = vm_vaddr_alloc(vm, PAGE_SIZE, 0);
+ guest_0_page = vm_alloc(vm, PAGE_SIZE, 0);
if (guest_0_page != 0) {
/* Use NO_TAP so we don't get a PASS print */
HOST_SYNC_NO_TAP(vcpu, STAGE_INIT_FETCH_PROT_OVERRIDE);
@@ -229,7 +229,7 @@ int main(int argc, char *argv[])
HOST_SYNC(vcpu, STAGE_INIT_FETCH_PROT_OVERRIDE);
}
if (guest_0_page == 0)
- mprotect(addr_gva2hva(vm, (vm_vaddr_t)0), PAGE_SIZE, PROT_READ);
+ mprotect(addr_gva2hva(vm, (gva_t)0), PAGE_SIZE, PROT_READ);
run->s.regs.crs[0] |= CR0_FETCH_PROTECTION_OVERRIDE;
run->kvm_dirty_regs = KVM_SYNC_CRS;
HOST_SYNC(vcpu, TEST_FETCH_PROT_OVERRIDE);
diff --git a/tools/testing/selftests/kvm/s390/ucontrol_test.c b/tools/testing/selftests/kvm/s390/ucontrol_test.c
index d265b34c54be..b8c6f37b53e0 100644
--- a/tools/testing/selftests/kvm/s390/ucontrol_test.c
+++ b/tools/testing/selftests/kvm/s390/ucontrol_test.c
@@ -111,7 +111,7 @@ FIXTURE(uc_kvm)
uintptr_t base_hva;
uintptr_t code_hva;
int kvm_run_size;
- vm_paddr_t pgd;
+ gpa_t pgd;
void *vm_mem;
int vcpu_fd;
int kvm_fd;
@@ -142,19 +142,17 @@ FIXTURE_SETUP(uc_kvm)
self->kvm_run_size = ioctl(self->kvm_fd, KVM_GET_VCPU_MMAP_SIZE, NULL);
ASSERT_GE(self->kvm_run_size, sizeof(struct kvm_run))
TH_LOG(KVM_IOCTL_ERROR(KVM_GET_VCPU_MMAP_SIZE, self->kvm_run_size));
- self->run = (struct kvm_run *)mmap(NULL, self->kvm_run_size,
- PROT_READ | PROT_WRITE, MAP_SHARED, self->vcpu_fd, 0);
- ASSERT_NE(self->run, MAP_FAILED);
+ self->run = kvm_mmap(self->kvm_run_size, PROT_READ | PROT_WRITE,
+ MAP_SHARED, self->vcpu_fd);
/**
* For virtual cpus that have been created with S390 user controlled
* virtual machines, the resulting vcpu fd can be memory mapped at page
* offset KVM_S390_SIE_PAGE_OFFSET in order to obtain a memory map of
* the virtual cpu's hardware control block.
*/
- self->sie_block = (struct kvm_s390_sie_block *)mmap(NULL, PAGE_SIZE,
- PROT_READ | PROT_WRITE, MAP_SHARED,
- self->vcpu_fd, KVM_S390_SIE_PAGE_OFFSET << PAGE_SHIFT);
- ASSERT_NE(self->sie_block, MAP_FAILED);
+ self->sie_block = __kvm_mmap(PAGE_SIZE, PROT_READ | PROT_WRITE,
+ MAP_SHARED, self->vcpu_fd,
+ KVM_S390_SIE_PAGE_OFFSET << PAGE_SHIFT);
TH_LOG("VM created %p %p", self->run, self->sie_block);
@@ -186,8 +184,8 @@ FIXTURE_SETUP(uc_kvm)
FIXTURE_TEARDOWN(uc_kvm)
{
- munmap(self->sie_block, PAGE_SIZE);
- munmap(self->run, self->kvm_run_size);
+ kvm_munmap(self->sie_block, PAGE_SIZE);
+ kvm_munmap(self->run, self->kvm_run_size);
close(self->vcpu_fd);
close(self->vm_fd);
close(self->kvm_fd);
@@ -271,7 +269,7 @@ TEST(uc_cap_hpage)
}
/* calculate host virtual addr from guest physical addr */
-static void *gpa2hva(FIXTURE_DATA(uc_kvm) *self, u64 gpa)
+static void *gpa2hva(FIXTURE_DATA(uc_kvm) *self, gpa_t gpa)
{
return (void *)(self->base_hva - self->base_gpa + gpa);
}
@@ -573,7 +571,7 @@ TEST_F(uc_kvm, uc_skey)
{
struct kvm_s390_sie_block *sie_block = self->sie_block;
struct kvm_sync_regs *sync_regs = &self->run->s.regs;
- u64 test_vaddr = VM_MEM_SIZE - (SZ_1M / 2);
+ u64 test_gva = VM_MEM_SIZE - (SZ_1M / 2);
struct kvm_run *run = self->run;
const u8 skeyvalue = 0x34;
@@ -585,7 +583,7 @@ TEST_F(uc_kvm, uc_skey)
/* set register content for test_skey_asm to access not mapped memory */
sync_regs->gprs[1] = skeyvalue;
sync_regs->gprs[5] = self->base_gpa;
- sync_regs->gprs[6] = test_vaddr;
+ sync_regs->gprs[6] = test_gva;
run->kvm_dirty_regs |= KVM_SYNC_GPRS;
/* DAT disabled + 64 bit mode */
diff --git a/tools/testing/selftests/kvm/s390/user_operexec.c b/tools/testing/selftests/kvm/s390/user_operexec.c
new file mode 100644
index 000000000000..714906c1d12a
--- /dev/null
+++ b/tools/testing/selftests/kvm/s390/user_operexec.c
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Test operation exception forwarding.
+ *
+ * Copyright IBM Corp. 2025
+ *
+ * Authors:
+ * Janosch Frank <frankja@linux.ibm.com>
+ */
+#include "kselftest.h"
+#include "kvm_util.h"
+#include "test_util.h"
+#include "sie.h"
+
+#include <linux/kvm.h>
+
+static void guest_code_instr0(void)
+{
+ asm(".word 0x0000");
+}
+
+static void test_user_instr0(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ int rc;
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code_instr0);
+ rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_INSTR0, 0);
+ TEST_ASSERT_EQ(0, rc);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_OPEREXC);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0);
+
+ kvm_vm_free(vm);
+}
+
+static void guest_code_user_operexec(void)
+{
+ asm(".word 0x0807");
+}
+
+static void test_user_operexec(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ int rc;
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code_user_operexec);
+ rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_OPEREXEC, 0);
+ TEST_ASSERT_EQ(0, rc);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_OPEREXC);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0x0807);
+
+ kvm_vm_free(vm);
+
+ /*
+ * Since user_operexec is the superset it can be used for the
+ * 0 instruction.
+ */
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code_instr0);
+ rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_OPEREXEC, 0);
+ TEST_ASSERT_EQ(0, rc);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_OPEREXC);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0);
+
+ kvm_vm_free(vm);
+}
+
+/* combine user_instr0 and user_operexec */
+static void test_user_operexec_combined(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ int rc;
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code_user_operexec);
+ rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_INSTR0, 0);
+ TEST_ASSERT_EQ(0, rc);
+ rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_OPEREXEC, 0);
+ TEST_ASSERT_EQ(0, rc);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_OPEREXC);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0x0807);
+
+ kvm_vm_free(vm);
+
+ /* Reverse enablement order */
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code_user_operexec);
+ rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_OPEREXEC, 0);
+ TEST_ASSERT_EQ(0, rc);
+ rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_INSTR0, 0);
+ TEST_ASSERT_EQ(0, rc);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_OPEREXC);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0x0807);
+
+ kvm_vm_free(vm);
+}
+
+/*
+ * Run all tests above.
+ *
+ * Enablement after VCPU has been added is automatically tested since
+ * we enable the capability after VCPU creation.
+ */
+static struct testdef {
+ const char *name;
+ void (*test)(void);
+} testlist[] = {
+ { "instr0", test_user_instr0 },
+ { "operexec", test_user_operexec },
+ { "operexec_combined", test_user_operexec_combined},
+};
+
+int main(int argc, char *argv[])
+{
+ int idx;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_S390_USER_INSTR0));
+
+ ksft_print_header();
+ ksft_set_plan(ARRAY_SIZE(testlist));
+ for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) {
+ testlist[idx].test();
+ ksft_test_result_pass("%s\n", testlist[idx].name);
+ }
+ ksft_finished();
+}
diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c
index bc440d5aba57..e639a9db51ee 100644
--- a/tools/testing/selftests/kvm/set_memory_region_test.c
+++ b/tools/testing/selftests/kvm/set_memory_region_test.c
@@ -8,11 +8,11 @@
#include <stdlib.h>
#include <string.h>
#include <sys/ioctl.h>
-#include <sys/mman.h>
#include <linux/compiler.h>
#include <test_util.h>
+#include <kvm_syscalls.h>
#include <kvm_util.h>
#include <processor.h>
@@ -30,19 +30,19 @@
#define MEM_REGION_GPA 0xc0000000
#define MEM_REGION_SLOT 10
-static const uint64_t MMIO_VAL = 0xbeefull;
+static const u64 MMIO_VAL = 0xbeefull;
-extern const uint64_t final_rip_start;
-extern const uint64_t final_rip_end;
+extern const u64 final_rip_start;
+extern const u64 final_rip_end;
static sem_t vcpu_ready;
-static inline uint64_t guest_spin_on_val(uint64_t spin_val)
+static inline u64 guest_spin_on_val(u64 spin_val)
{
- uint64_t val;
+ u64 val;
do {
- val = READ_ONCE(*((uint64_t *)MEM_REGION_GPA));
+ val = READ_ONCE(*((u64 *)MEM_REGION_GPA));
} while (val == spin_val);
GUEST_SYNC(0);
@@ -54,7 +54,7 @@ static void *vcpu_worker(void *data)
struct kvm_vcpu *vcpu = data;
struct kvm_run *run = vcpu->run;
struct ucall uc;
- uint64_t cmd;
+ u64 cmd;
/*
* Loop until the guest is done. Re-enter the guest on all MMIO exits,
@@ -111,8 +111,8 @@ static struct kvm_vm *spawn_vm(struct kvm_vcpu **vcpu, pthread_t *vcpu_thread,
void *guest_code)
{
struct kvm_vm *vm;
- uint64_t *hva;
- uint64_t gpa;
+ u64 *hva;
+ gpa_t gpa;
vm = vm_create_with_one_vcpu(vcpu, guest_code);
@@ -144,7 +144,7 @@ static struct kvm_vm *spawn_vm(struct kvm_vcpu **vcpu, pthread_t *vcpu_thread,
static void guest_code_move_memory_region(void)
{
- uint64_t val;
+ u64 val;
GUEST_SYNC(0);
@@ -180,7 +180,7 @@ static void test_move_memory_region(bool disable_slot_zap_quirk)
pthread_t vcpu_thread;
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
- uint64_t *hva;
+ u64 *hva;
vm = spawn_vm(&vcpu, &vcpu_thread, guest_code_move_memory_region);
@@ -224,7 +224,7 @@ static void test_move_memory_region(bool disable_slot_zap_quirk)
static void guest_code_delete_memory_region(void)
{
struct desc_ptr idt;
- uint64_t val;
+ u64 val;
/*
* Clobber the IDT so that a #PF due to the memory region being deleted
@@ -345,12 +345,12 @@ static void test_zero_memory_regions(void)
static void test_invalid_memory_region_flags(void)
{
- uint32_t supported_flags = KVM_MEM_LOG_DIRTY_PAGES;
- const uint32_t v2_only_flags = KVM_MEM_GUEST_MEMFD;
+ u32 supported_flags = KVM_MEM_LOG_DIRTY_PAGES;
+ const u32 v2_only_flags = KVM_MEM_GUEST_MEMFD;
struct kvm_vm *vm;
int r, i;
-#if defined __aarch64__ || defined __riscv || defined __x86_64__
+#if defined __aarch64__ || defined __riscv || defined __x86_64__ || defined __loongarch__
supported_flags |= KVM_MEM_READONLY;
#endif
@@ -410,17 +410,10 @@ static void test_add_max_memory_regions(void)
{
int ret;
struct kvm_vm *vm;
- uint32_t max_mem_slots;
- uint32_t slot;
+ u32 max_mem_slots;
+ u32 slot;
void *mem, *mem_aligned, *mem_extra;
- size_t alignment;
-
-#ifdef __s390x__
- /* On s390x, the host address must be aligned to 1M (due to PGSTEs) */
- alignment = 0x100000;
-#else
- alignment = 1;
-#endif
+ size_t alignment = 1;
max_mem_slots = kvm_check_cap(KVM_CAP_NR_MEMSLOTS);
TEST_ASSERT(max_mem_slots > 0,
@@ -433,31 +426,30 @@ static void test_add_max_memory_regions(void)
pr_info("Adding slots 0..%i, each memory region with %dK size\n",
(max_mem_slots - 1), MEM_REGION_SIZE >> 10);
- mem = mmap(NULL, (size_t)max_mem_slots * MEM_REGION_SIZE + alignment,
- PROT_READ | PROT_WRITE,
- MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0);
- TEST_ASSERT(mem != MAP_FAILED, "Failed to mmap() host");
+
+ mem = kvm_mmap((size_t)max_mem_slots * MEM_REGION_SIZE + alignment,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1);
mem_aligned = (void *)(((size_t) mem + alignment - 1) & ~(alignment - 1));
for (slot = 0; slot < max_mem_slots; slot++)
vm_set_user_memory_region(vm, slot, 0,
- ((uint64_t)slot * MEM_REGION_SIZE),
+ ((u64)slot * MEM_REGION_SIZE),
MEM_REGION_SIZE,
- mem_aligned + (uint64_t)slot * MEM_REGION_SIZE);
+ mem_aligned + (u64)slot * MEM_REGION_SIZE);
/* Check it cannot be added memory slots beyond the limit */
- mem_extra = mmap(NULL, MEM_REGION_SIZE, PROT_READ | PROT_WRITE,
- MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
- TEST_ASSERT(mem_extra != MAP_FAILED, "Failed to mmap() host");
+ mem_extra = kvm_mmap(MEM_REGION_SIZE, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1);
ret = __vm_set_user_memory_region(vm, max_mem_slots, 0,
- (uint64_t)max_mem_slots * MEM_REGION_SIZE,
+ (u64)max_mem_slots * MEM_REGION_SIZE,
MEM_REGION_SIZE, mem_extra);
TEST_ASSERT(ret == -1 && errno == EINVAL,
"Adding one more memory slot should fail with EINVAL");
- munmap(mem, (size_t)max_mem_slots * MEM_REGION_SIZE + alignment);
- munmap(mem_extra, MEM_REGION_SIZE);
+ kvm_munmap(mem, (size_t)max_mem_slots * MEM_REGION_SIZE + alignment);
+ kvm_munmap(mem_extra, MEM_REGION_SIZE);
kvm_vm_free(vm);
}
@@ -564,7 +556,7 @@ static void guest_code_mmio_during_vectoring(void)
set_idt(&idt_desc);
/* Generate a #GP by dereferencing a non-canonical address */
- *((uint8_t *)NONCANONICAL) = 0x1;
+ *((u8 *)NONCANONICAL) = 0x1;
GUEST_ASSERT(0);
}
diff --git a/tools/testing/selftests/kvm/steal_time.c b/tools/testing/selftests/kvm/steal_time.c
index cce2520af720..76fcdd1fd3cb 100644
--- a/tools/testing/selftests/kvm/steal_time.c
+++ b/tools/testing/selftests/kvm/steal_time.c
@@ -25,7 +25,7 @@
#define ST_GPA_BASE (1 << 30)
static void *st_gva[NR_VCPUS];
-static uint64_t guest_stolen_time[NR_VCPUS];
+static u64 guest_stolen_time[NR_VCPUS];
#if defined(__x86_64__)
@@ -42,9 +42,9 @@ static void check_status(struct kvm_steal_time *st)
static void guest_code(int cpu)
{
struct kvm_steal_time *st = st_gva[cpu];
- uint32_t version;
+ u32 version;
- GUEST_ASSERT_EQ(rdmsr(MSR_KVM_STEAL_TIME), ((uint64_t)st_gva[cpu] | KVM_MSR_ENABLED));
+ GUEST_ASSERT_EQ(rdmsr(MSR_KVM_STEAL_TIME), ((u64)st_gva[cpu] | KVM_MSR_ENABLED));
memset(st, 0, sizeof(*st));
GUEST_SYNC(0);
@@ -67,22 +67,16 @@ static bool is_steal_time_supported(struct kvm_vcpu *vcpu)
return kvm_cpu_has(X86_FEATURE_KVM_STEAL_TIME);
}
-static void steal_time_init(struct kvm_vcpu *vcpu, uint32_t i)
+static void steal_time_init(struct kvm_vcpu *vcpu, u32 i)
{
- int ret;
-
/* ST_GPA_BASE is identity mapped */
st_gva[i] = (void *)(ST_GPA_BASE + i * STEAL_TIME_SIZE);
sync_global_to_guest(vcpu->vm, st_gva[i]);
- ret = _vcpu_set_msr(vcpu, MSR_KVM_STEAL_TIME,
- (ulong)st_gva[i] | KVM_STEAL_RESERVED_MASK);
- TEST_ASSERT(ret == 0, "Bad GPA didn't fail");
-
vcpu_set_msr(vcpu, MSR_KVM_STEAL_TIME, (ulong)st_gva[i] | KVM_MSR_ENABLED);
}
-static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpu_idx)
+static void steal_time_dump(struct kvm_vm *vm, u32 vcpu_idx)
{
struct kvm_steal_time *st = addr_gva2hva(vm, (ulong)st_gva[vcpu_idx]);
@@ -99,6 +93,21 @@ static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpu_idx)
st->pad[8], st->pad[9], st->pad[10]);
}
+static void check_steal_time_uapi(void)
+{
+ struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu;
+ int ret;
+
+ vm = vm_create_with_one_vcpu(&vcpu, NULL);
+
+ ret = _vcpu_set_msr(vcpu, MSR_KVM_STEAL_TIME,
+ (ulong)ST_GPA_BASE | KVM_STEAL_RESERVED_MASK);
+ TEST_ASSERT(ret == 0, "Bad GPA didn't fail");
+
+ kvm_vm_free(vm);
+}
+
#elif defined(__aarch64__)
/* PV_TIME_ST must have 64-byte alignment */
@@ -109,16 +118,16 @@ static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpu_idx)
#define PV_TIME_ST 0xc5000021
struct st_time {
- uint32_t rev;
- uint32_t attr;
- uint64_t st_time;
+ u32 rev;
+ u32 attr;
+ u64 st_time;
};
-static int64_t smccc(uint32_t func, uint64_t arg)
+static s64 smccc(u32 func, u64 arg)
{
struct arm_smccc_res res;
- smccc_hvc(func, arg, 0, 0, 0, 0, 0, 0, &res);
+ do_smccc(func, arg, 0, 0, 0, 0, 0, 0, &res);
return res.a0;
}
@@ -131,7 +140,7 @@ static void check_status(struct st_time *st)
static void guest_code(int cpu)
{
struct st_time *st;
- int64_t status;
+ s64 status;
status = smccc(SMCCC_ARCH_FEATURES, PV_TIME_FEATURES);
GUEST_ASSERT_EQ(status, 0);
@@ -166,36 +175,26 @@ static bool is_steal_time_supported(struct kvm_vcpu *vcpu)
return !__vcpu_ioctl(vcpu, KVM_HAS_DEVICE_ATTR, &dev);
}
-static void steal_time_init(struct kvm_vcpu *vcpu, uint32_t i)
+static void steal_time_init(struct kvm_vcpu *vcpu, u32 i)
{
struct kvm_vm *vm = vcpu->vm;
- uint64_t st_ipa;
- int ret;
+ u64 st_ipa;
struct kvm_device_attr dev = {
.group = KVM_ARM_VCPU_PVTIME_CTRL,
.attr = KVM_ARM_VCPU_PVTIME_IPA,
- .addr = (uint64_t)&st_ipa,
+ .addr = (u64)&st_ipa,
};
- vcpu_ioctl(vcpu, KVM_HAS_DEVICE_ATTR, &dev);
-
/* ST_GPA_BASE is identity mapped */
st_gva[i] = (void *)(ST_GPA_BASE + i * STEAL_TIME_SIZE);
sync_global_to_guest(vm, st_gva[i]);
- st_ipa = (ulong)st_gva[i] | 1;
- ret = __vcpu_ioctl(vcpu, KVM_SET_DEVICE_ATTR, &dev);
- TEST_ASSERT(ret == -1 && errno == EINVAL, "Bad IPA didn't report EINVAL");
-
st_ipa = (ulong)st_gva[i];
vcpu_ioctl(vcpu, KVM_SET_DEVICE_ATTR, &dev);
-
- ret = __vcpu_ioctl(vcpu, KVM_SET_DEVICE_ATTR, &dev);
- TEST_ASSERT(ret == -1 && errno == EEXIST, "Set IPA twice without EEXIST");
}
-static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpu_idx)
+static void steal_time_dump(struct kvm_vm *vm, u32 vcpu_idx)
{
struct st_time *st = addr_gva2hva(vm, (ulong)st_gva[vcpu_idx]);
@@ -205,22 +204,54 @@ static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpu_idx)
ksft_print_msg(" st_time: %ld\n", st->st_time);
}
+static void check_steal_time_uapi(void)
+{
+ struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu;
+ u64 st_ipa;
+ int ret;
+
+ vm = vm_create_with_one_vcpu(&vcpu, NULL);
+
+ struct kvm_device_attr dev = {
+ .group = KVM_ARM_VCPU_PVTIME_CTRL,
+ .attr = KVM_ARM_VCPU_PVTIME_IPA,
+ .addr = (u64)&st_ipa,
+ };
+
+ vcpu_ioctl(vcpu, KVM_HAS_DEVICE_ATTR, &dev);
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, ST_GPA_BASE, 1, 1, 0);
+ virt_map(vm, ST_GPA_BASE, ST_GPA_BASE, 1);
+
+ st_ipa = (ulong)ST_GPA_BASE | 1;
+ ret = __vcpu_ioctl(vcpu, KVM_SET_DEVICE_ATTR, &dev);
+ TEST_ASSERT(ret == -1 && errno == EINVAL, "Bad IPA didn't report EINVAL");
+
+ st_ipa = (ulong)ST_GPA_BASE;
+ vcpu_ioctl(vcpu, KVM_SET_DEVICE_ATTR, &dev);
+
+ ret = __vcpu_ioctl(vcpu, KVM_SET_DEVICE_ATTR, &dev);
+ TEST_ASSERT(ret == -1 && errno == EEXIST, "Set IPA twice without EEXIST");
+
+ kvm_vm_free(vm);
+}
+
#elif defined(__riscv)
/* SBI STA shmem must have 64-byte alignment */
#define STEAL_TIME_SIZE ((sizeof(struct sta_struct) + 63) & ~63)
-static vm_paddr_t st_gpa[NR_VCPUS];
+static gpa_t st_gpa[NR_VCPUS];
struct sta_struct {
- uint32_t sequence;
- uint32_t flags;
- uint64_t steal;
- uint8_t preempted;
- uint8_t pad[47];
+ u32 sequence;
+ u32 flags;
+ u64 steal;
+ u8 preempted;
+ u8 pad[47];
} __packed;
-static void sta_set_shmem(vm_paddr_t gpa, unsigned long flags)
+static void sta_set_shmem(gpa_t gpa, unsigned long flags)
{
unsigned long lo = (unsigned long)gpa;
#if __riscv_xlen == 32
@@ -243,7 +274,7 @@ static void check_status(struct sta_struct *st)
static void guest_code(int cpu)
{
struct sta_struct *st = st_gva[cpu];
- uint32_t sequence;
+ u32 sequence;
long out_val = 0;
bool probe;
@@ -268,7 +299,7 @@ static void guest_code(int cpu)
static bool is_steal_time_supported(struct kvm_vcpu *vcpu)
{
- uint64_t id = RISCV_SBI_EXT_REG(KVM_RISCV_SBI_EXT_STA);
+ u64 id = RISCV_SBI_EXT_REG(KVM_RISCV_SBI_EXT_STA);
unsigned long enabled = vcpu_get_reg(vcpu, id);
TEST_ASSERT(enabled == 0 || enabled == 1, "Expected boolean result");
@@ -276,16 +307,16 @@ static bool is_steal_time_supported(struct kvm_vcpu *vcpu)
return enabled;
}
-static void steal_time_init(struct kvm_vcpu *vcpu, uint32_t i)
+static void steal_time_init(struct kvm_vcpu *vcpu, u32 i)
{
/* ST_GPA_BASE is identity mapped */
st_gva[i] = (void *)(ST_GPA_BASE + i * STEAL_TIME_SIZE);
- st_gpa[i] = addr_gva2gpa(vcpu->vm, (vm_vaddr_t)st_gva[i]);
+ st_gpa[i] = addr_gva2gpa(vcpu->vm, (gva_t)st_gva[i]);
sync_global_to_guest(vcpu->vm, st_gva[i]);
sync_global_to_guest(vcpu->vm, st_gpa[i]);
}
-static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpu_idx)
+static void steal_time_dump(struct kvm_vm *vm, u32 vcpu_idx)
{
struct sta_struct *st = addr_gva2hva(vm, (ulong)st_gva[vcpu_idx]);
int i;
@@ -301,6 +332,142 @@ static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpu_idx)
pr_info("\n");
}
+static void check_steal_time_uapi(void)
+{
+ struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu;
+ struct kvm_one_reg reg;
+ u64 shmem;
+ int ret;
+
+ vm = vm_create_with_one_vcpu(&vcpu, NULL);
+
+ reg.id = KVM_REG_RISCV |
+ KVM_REG_SIZE_ULONG |
+ KVM_REG_RISCV_SBI_STATE |
+ KVM_REG_RISCV_SBI_STA |
+ KVM_REG_RISCV_SBI_STA_REG(shmem_lo);
+ reg.addr = (u64)&shmem;
+
+ shmem = ST_GPA_BASE + 1;
+ ret = __vcpu_ioctl(vcpu, KVM_SET_ONE_REG, &reg);
+ TEST_ASSERT(ret == -1 && errno == EINVAL,
+ "misaligned STA shmem returns -EINVAL");
+
+ shmem = ST_GPA_BASE;
+ ret = __vcpu_ioctl(vcpu, KVM_SET_ONE_REG, &reg);
+ TEST_ASSERT(ret == 0,
+ "aligned STA shmem succeeds");
+
+ shmem = INVALID_GPA;
+ ret = __vcpu_ioctl(vcpu, KVM_SET_ONE_REG, &reg);
+ TEST_ASSERT(ret == 0,
+ "all-ones for STA shmem succeeds");
+
+ kvm_vm_free(vm);
+}
+
+#elif defined(__loongarch__)
+
+/* steal_time must have 64-byte alignment */
+#define STEAL_TIME_SIZE ((sizeof(struct kvm_steal_time) + 63) & ~63)
+#define KVM_STEAL_PHYS_VALID BIT_ULL(0)
+
+struct kvm_steal_time {
+ __u64 steal;
+ __u32 version;
+ __u32 flags;
+ __u8 preempted;
+ __u8 pad[47];
+};
+
+static void check_status(struct kvm_steal_time *st)
+{
+ GUEST_ASSERT(!(READ_ONCE(st->version) & 1));
+ GUEST_ASSERT_EQ(READ_ONCE(st->flags), 0);
+ GUEST_ASSERT_EQ(READ_ONCE(st->preempted), 0);
+}
+
+static void guest_code(int cpu)
+{
+ u32 version;
+ struct kvm_steal_time *st = st_gva[cpu];
+
+ memset(st, 0, sizeof(*st));
+ GUEST_SYNC(0);
+
+ check_status(st);
+ WRITE_ONCE(guest_stolen_time[cpu], st->steal);
+ version = READ_ONCE(st->version);
+ check_status(st);
+ GUEST_SYNC(1);
+
+ check_status(st);
+ GUEST_ASSERT(version < READ_ONCE(st->version));
+ WRITE_ONCE(guest_stolen_time[cpu], st->steal);
+ check_status(st);
+ GUEST_DONE();
+}
+
+static bool is_steal_time_supported(struct kvm_vcpu *vcpu)
+{
+ int err;
+ u64 val;
+ struct kvm_device_attr attr = {
+ .group = KVM_LOONGARCH_VCPU_CPUCFG,
+ .attr = CPUCFG_KVM_FEATURE,
+ .addr = (u64)&val,
+ };
+
+ err = __vcpu_ioctl(vcpu, KVM_HAS_DEVICE_ATTR, &attr);
+ if (err)
+ return false;
+
+ err = __vcpu_ioctl(vcpu, KVM_GET_DEVICE_ATTR, &attr);
+ if (err)
+ return false;
+
+ return val & BIT(KVM_FEATURE_STEAL_TIME);
+}
+
+static void steal_time_init(struct kvm_vcpu *vcpu, u32 i)
+{
+ int err;
+ u64 st_gpa;
+ struct kvm_vm *vm = vcpu->vm;
+ struct kvm_device_attr attr = {
+ .group = KVM_LOONGARCH_VCPU_PVTIME_CTRL,
+ .attr = KVM_LOONGARCH_VCPU_PVTIME_GPA,
+ .addr = (u64)&st_gpa,
+ };
+
+ /* ST_GPA_BASE is identity mapped */
+ st_gva[i] = (void *)(ST_GPA_BASE + i * STEAL_TIME_SIZE);
+ sync_global_to_guest(vm, st_gva[i]);
+
+ err = __vcpu_ioctl(vcpu, KVM_HAS_DEVICE_ATTR, &attr);
+ TEST_ASSERT(err == 0, "No PV stealtime Feature");
+
+ st_gpa = (unsigned long)st_gva[i] | KVM_STEAL_PHYS_VALID;
+ err = __vcpu_ioctl(vcpu, KVM_SET_DEVICE_ATTR, &attr);
+ TEST_ASSERT(err == 0, "Fail to set PV stealtime GPA");
+}
+
+static void steal_time_dump(struct kvm_vm *vm, u32 vcpu_idx)
+{
+ struct kvm_steal_time *st = addr_gva2hva(vm, (ulong)st_gva[vcpu_idx]);
+
+ ksft_print_msg("VCPU%d:\n", vcpu_idx);
+ ksft_print_msg(" steal: %lld\n", st->steal);
+ ksft_print_msg(" flags: %d\n", st->flags);
+ ksft_print_msg(" version: %d\n", st->version);
+ ksft_print_msg(" preempted: %d\n", st->preempted);
+}
+
+static void check_steal_time_uapi(void)
+{
+
+}
#endif
static void *do_steal_time(void *arg)
@@ -369,6 +536,8 @@ int main(int ac, char **av)
TEST_REQUIRE(is_steal_time_supported(vcpus[0]));
ksft_set_plan(NR_VCPUS);
+ check_steal_time_uapi();
+
/* Run test on each VCPU */
for (i = 0; i < NR_VCPUS; ++i) {
steal_time_init(vcpus[i], i);
diff --git a/tools/testing/selftests/kvm/system_counter_offset_test.c b/tools/testing/selftests/kvm/system_counter_offset_test.c
index 513d421a9bff..dc5e30b7b77f 100644
--- a/tools/testing/selftests/kvm/system_counter_offset_test.c
+++ b/tools/testing/selftests/kvm/system_counter_offset_test.c
@@ -17,7 +17,7 @@
#ifdef __x86_64__
struct test_case {
- uint64_t tsc_offset;
+ u64 tsc_offset;
};
static struct test_case test_cases[] = {
@@ -39,12 +39,12 @@ static void setup_system_counter(struct kvm_vcpu *vcpu, struct test_case *test)
&test->tsc_offset);
}
-static uint64_t guest_read_system_counter(struct test_case *test)
+static u64 guest_read_system_counter(struct test_case *test)
{
return rdtsc();
}
-static uint64_t host_read_guest_system_counter(struct test_case *test)
+static u64 host_read_guest_system_counter(struct test_case *test)
{
return rdtsc() + test->tsc_offset;
}
@@ -69,9 +69,9 @@ static void guest_main(void)
}
}
-static void handle_sync(struct ucall *uc, uint64_t start, uint64_t end)
+static void handle_sync(struct ucall *uc, u64 start, u64 end)
{
- uint64_t obs = uc->args[2];
+ u64 obs = uc->args[2];
TEST_ASSERT(start <= obs && obs <= end,
"unexpected system counter value: %"PRIu64" expected range: [%"PRIu64", %"PRIu64"]",
@@ -88,7 +88,7 @@ static void handle_abort(struct ucall *uc)
static void enter_guest(struct kvm_vcpu *vcpu)
{
- uint64_t start, end;
+ u64 start, end;
struct ucall uc;
int i;
diff --git a/tools/testing/selftests/kvm/x86/amx_test.c b/tools/testing/selftests/kvm/x86/amx_test.c
index f4ce5a185a7d..4e63da2b1889 100644
--- a/tools/testing/selftests/kvm/x86/amx_test.c
+++ b/tools/testing/selftests/kvm/x86/amx_test.c
@@ -69,15 +69,21 @@ static inline void __tileloadd(void *tile)
: : "a"(tile), "d"(0));
}
+static inline int tileloadd_safe(void *tile)
+{
+ return kvm_asm_safe(".byte 0xc4,0xe2,0x7b,0x4b,0x04,0x10",
+ "a"(tile), "d"(0));
+}
+
static inline void __tilerelease(void)
{
asm volatile(".byte 0xc4, 0xe2, 0x78, 0x49, 0xc0" ::);
}
-static inline void __xsavec(struct xstate *xstate, uint64_t rfbm)
+static inline void __xsavec(struct xstate *xstate, u64 rfbm)
{
- uint32_t rfbm_lo = rfbm;
- uint32_t rfbm_hi = rfbm >> 32;
+ u32 rfbm_lo = rfbm;
+ u32 rfbm_hi = rfbm >> 32;
asm volatile("xsavec (%%rdi)"
: : "D" (xstate), "a" (rfbm_lo), "d" (rfbm_hi)
@@ -124,27 +130,52 @@ static void set_tilecfg(struct tile_config *cfg)
}
}
+enum {
+ /* Retrieve TMM0 from guest, stash it for TEST_RESTORE_TILEDATA */
+ TEST_SAVE_TILEDATA = 1,
+
+ /* Check TMM0 against tiledata */
+ TEST_COMPARE_TILEDATA = 2,
+
+ /* Restore TMM0 from earlier save */
+ TEST_RESTORE_TILEDATA = 4,
+
+ /* Full VM save/restore */
+ TEST_SAVE_RESTORE = 8,
+};
+
static void __attribute__((__flatten__)) guest_code(struct tile_config *amx_cfg,
struct tile_data *tiledata,
struct xstate *xstate)
{
+ int vector;
+
GUEST_ASSERT(this_cpu_has(X86_FEATURE_XSAVE) &&
this_cpu_has(X86_FEATURE_OSXSAVE));
check_xtile_info();
- GUEST_SYNC(1);
+ GUEST_SYNC(TEST_SAVE_RESTORE);
/* xfd=0, enable amx */
wrmsr(MSR_IA32_XFD, 0);
- GUEST_SYNC(2);
+ GUEST_SYNC(TEST_SAVE_RESTORE);
GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == 0);
set_tilecfg(amx_cfg);
__ldtilecfg(amx_cfg);
- GUEST_SYNC(3);
+ GUEST_SYNC(TEST_SAVE_RESTORE);
/* Check save/restore when trap to userspace */
__tileloadd(tiledata);
- GUEST_SYNC(4);
+ GUEST_SYNC(TEST_SAVE_TILEDATA | TEST_COMPARE_TILEDATA | TEST_SAVE_RESTORE);
+
+ /* xfd=0x40000, disable amx tiledata */
+ wrmsr(MSR_IA32_XFD, XFEATURE_MASK_XTILE_DATA);
+
+ /* host tries setting tiledata while guest XFD is set */
+ GUEST_SYNC(TEST_RESTORE_TILEDATA);
+ GUEST_SYNC(TEST_SAVE_RESTORE);
+
+ wrmsr(MSR_IA32_XFD, 0);
__tilerelease();
- GUEST_SYNC(5);
+ GUEST_SYNC(TEST_SAVE_RESTORE);
/*
* After XSAVEC, XTILEDATA is cleared in the xstate_bv but is set in
* the xcomp_bv.
@@ -154,6 +185,8 @@ static void __attribute__((__flatten__)) guest_code(struct tile_config *amx_cfg,
GUEST_ASSERT(!(xstate->header.xstate_bv & XFEATURE_MASK_XTILE_DATA));
GUEST_ASSERT(xstate->header.xcomp_bv & XFEATURE_MASK_XTILE_DATA);
+ /* #NM test */
+
/* xfd=0x40000, disable amx tiledata */
wrmsr(MSR_IA32_XFD, XFEATURE_MASK_XTILE_DATA);
@@ -166,32 +199,33 @@ static void __attribute__((__flatten__)) guest_code(struct tile_config *amx_cfg,
GUEST_ASSERT(!(xstate->header.xstate_bv & XFEATURE_MASK_XTILE_DATA));
GUEST_ASSERT((xstate->header.xcomp_bv & XFEATURE_MASK_XTILE_DATA));
- GUEST_SYNC(6);
+ GUEST_SYNC(TEST_SAVE_RESTORE);
GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == XFEATURE_MASK_XTILE_DATA);
set_tilecfg(amx_cfg);
__ldtilecfg(amx_cfg);
- /* Trigger #NM exception */
- __tileloadd(tiledata);
- GUEST_SYNC(10);
- GUEST_DONE();
-}
+ /* Trigger #NM exception */
+ vector = tileloadd_safe(tiledata);
+ __GUEST_ASSERT(vector == NM_VECTOR,
+ "Wanted #NM on tileloadd with XFD[18]=1, got %s",
+ ex_str(vector));
-void guest_nm_handler(struct ex_regs *regs)
-{
- /* Check if #NM is triggered by XFEATURE_MASK_XTILE_DATA */
- GUEST_SYNC(7);
GUEST_ASSERT(!(get_cr0() & X86_CR0_TS));
GUEST_ASSERT(rdmsr(MSR_IA32_XFD_ERR) == XFEATURE_MASK_XTILE_DATA);
GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == XFEATURE_MASK_XTILE_DATA);
- GUEST_SYNC(8);
+ GUEST_SYNC(TEST_SAVE_RESTORE);
GUEST_ASSERT(rdmsr(MSR_IA32_XFD_ERR) == XFEATURE_MASK_XTILE_DATA);
GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == XFEATURE_MASK_XTILE_DATA);
/* Clear xfd_err */
wrmsr(MSR_IA32_XFD_ERR, 0);
/* xfd=0, enable amx */
wrmsr(MSR_IA32_XFD, 0);
- GUEST_SYNC(9);
+ GUEST_SYNC(TEST_SAVE_RESTORE);
+
+ __tileloadd(tiledata);
+ GUEST_SYNC(TEST_COMPARE_TILEDATA | TEST_SAVE_RESTORE);
+
+ GUEST_DONE();
}
int main(int argc, char *argv[])
@@ -200,10 +234,10 @@ int main(int argc, char *argv[])
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
struct kvm_x86_state *state;
+ struct kvm_x86_state *tile_state = NULL;
int xsave_restore_size;
- vm_vaddr_t amx_cfg, tiledata, xstate;
+ gva_t amx_cfg, tiledata, xstate;
struct ucall uc;
- u32 amx_offset;
int ret;
/*
@@ -228,22 +262,20 @@ int main(int argc, char *argv[])
vcpu_regs_get(vcpu, &regs1);
- /* Register #NM handler */
- vm_install_exception_handler(vm, NM_VECTOR, guest_nm_handler);
-
/* amx cfg for guest_code */
- amx_cfg = vm_vaddr_alloc_page(vm);
+ amx_cfg = vm_alloc_page(vm);
memset(addr_gva2hva(vm, amx_cfg), 0x0, getpagesize());
/* amx tiledata for guest_code */
- tiledata = vm_vaddr_alloc_pages(vm, 2);
+ tiledata = vm_alloc_pages(vm, 2);
memset(addr_gva2hva(vm, tiledata), rand() | 1, 2 * getpagesize());
/* XSAVE state for guest_code */
- xstate = vm_vaddr_alloc_pages(vm, DIV_ROUND_UP(XSAVE_SIZE, PAGE_SIZE));
+ xstate = vm_alloc_pages(vm, DIV_ROUND_UP(XSAVE_SIZE, PAGE_SIZE));
memset(addr_gva2hva(vm, xstate), 0, PAGE_SIZE * DIV_ROUND_UP(XSAVE_SIZE, PAGE_SIZE));
vcpu_args_set(vcpu, 3, amx_cfg, tiledata, xstate);
+ int iter = 0;
for (;;) {
vcpu_run(vcpu);
TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
@@ -253,37 +285,47 @@ int main(int argc, char *argv[])
REPORT_GUEST_ASSERT(uc);
/* NOT REACHED */
case UCALL_SYNC:
- switch (uc.args[1]) {
- case 1:
- case 2:
- case 3:
- case 5:
- case 6:
- case 7:
- case 8:
- fprintf(stderr, "GUEST_SYNC(%ld)\n", uc.args[1]);
- break;
- case 4:
- case 10:
- fprintf(stderr,
- "GUEST_SYNC(%ld), check save/restore status\n", uc.args[1]);
+ ++iter;
+ if (uc.args[1] & TEST_SAVE_TILEDATA) {
+ fprintf(stderr, "GUEST_SYNC #%d, save tiledata\n", iter);
+ tile_state = vcpu_save_state(vcpu);
+ }
+ if (uc.args[1] & TEST_COMPARE_TILEDATA) {
+ fprintf(stderr, "GUEST_SYNC #%d, check TMM0 contents\n", iter);
/* Compacted mode, get amx offset by xsave area
* size subtract 8K amx size.
*/
- amx_offset = xsave_restore_size - NUM_TILES*TILE_SIZE;
- state = vcpu_save_state(vcpu);
- void *amx_start = (void *)state->xsave + amx_offset;
+ u32 amx_offset = xsave_restore_size - NUM_TILES*TILE_SIZE;
+ void *amx_start = (void *)tile_state->xsave + amx_offset;
void *tiles_data = (void *)addr_gva2hva(vm, tiledata);
/* Only check TMM0 register, 1 tile */
ret = memcmp(amx_start, tiles_data, TILE_SIZE);
TEST_ASSERT(ret == 0, "memcmp failed, ret=%d", ret);
+ }
+ if (uc.args[1] & TEST_RESTORE_TILEDATA) {
+ fprintf(stderr, "GUEST_SYNC #%d, before KVM_SET_XSAVE\n", iter);
+ vcpu_xsave_set(vcpu, tile_state->xsave);
+ fprintf(stderr, "GUEST_SYNC #%d, after KVM_SET_XSAVE\n", iter);
+ }
+ if (uc.args[1] & TEST_SAVE_RESTORE) {
+ fprintf(stderr, "GUEST_SYNC #%d, save/restore VM state\n", iter);
+ state = vcpu_save_state(vcpu);
+ memset(&regs1, 0, sizeof(regs1));
+ vcpu_regs_get(vcpu, &regs1);
+
+ kvm_vm_release(vm);
+
+ /* Restore state in a new VM. */
+ vcpu = vm_recreate_with_one_vcpu(vm);
+ vcpu_load_state(vcpu, state);
kvm_x86_state_cleanup(state);
- break;
- case 9:
- fprintf(stderr,
- "GUEST_SYNC(%ld), #NM exception and enable amx\n", uc.args[1]);
- break;
+
+ memset(&regs2, 0, sizeof(regs2));
+ vcpu_regs_get(vcpu, &regs2);
+ TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
+ "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
+ (ulong) regs2.rdi, (ulong) regs2.rsi);
}
break;
case UCALL_DONE:
@@ -293,22 +335,6 @@ int main(int argc, char *argv[])
TEST_FAIL("Unknown ucall %lu", uc.cmd);
}
- state = vcpu_save_state(vcpu);
- memset(&regs1, 0, sizeof(regs1));
- vcpu_regs_get(vcpu, &regs1);
-
- kvm_vm_release(vm);
-
- /* Restore state in a new VM. */
- vcpu = vm_recreate_with_one_vcpu(vm);
- vcpu_load_state(vcpu, state);
- kvm_x86_state_cleanup(state);
-
- memset(&regs2, 0, sizeof(regs2));
- vcpu_regs_get(vcpu, &regs2);
- TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
- "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
- (ulong) regs2.rdi, (ulong) regs2.rsi);
}
done:
kvm_vm_free(vm);
diff --git a/tools/testing/selftests/kvm/x86/aperfmperf_test.c b/tools/testing/selftests/kvm/x86/aperfmperf_test.c
new file mode 100644
index 000000000000..c91660103137
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/aperfmperf_test.c
@@ -0,0 +1,213 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Test for KVM_X86_DISABLE_EXITS_APERFMPERF
+ *
+ * Copyright (C) 2025, Google LLC.
+ *
+ * Test the ability to disable VM-exits for rdmsr of IA32_APERF and
+ * IA32_MPERF. When these VM-exits are disabled, reads of these MSRs
+ * return the host's values.
+ *
+ * Note: Requires read access to /dev/cpu/<lpu>/msr to read host MSRs.
+ */
+
+#include <fcntl.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <asm/msr-index.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+#include "test_util.h"
+#include "vmx.h"
+
+#define NUM_ITERATIONS 10000
+
+static int open_dev_msr(int cpu)
+{
+ char path[PATH_MAX];
+
+ snprintf(path, sizeof(path), "/dev/cpu/%d/msr", cpu);
+ return open_path_or_exit(path, O_RDONLY);
+}
+
+static u64 read_dev_msr(int msr_fd, u32 msr)
+{
+ u64 data;
+ ssize_t rc;
+
+ rc = pread(msr_fd, &data, sizeof(data), msr);
+ TEST_ASSERT(rc == sizeof(data), "Read of MSR 0x%x failed", msr);
+
+ return data;
+}
+
+static void guest_read_aperf_mperf(void)
+{
+ int i;
+
+ for (i = 0; i < NUM_ITERATIONS; i++)
+ GUEST_SYNC2(rdmsr(MSR_IA32_APERF), rdmsr(MSR_IA32_MPERF));
+}
+
+#define L2_GUEST_STACK_SIZE 64
+
+static void l2_guest_code(void)
+{
+ guest_read_aperf_mperf();
+ GUEST_DONE();
+}
+
+static void l1_svm_code(struct svm_test_data *svm)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ struct vmcb *vmcb = svm->vmcb;
+
+ generic_svm_setup(svm, l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+ run_guest(vmcb, svm->vmcb_gpa);
+}
+
+static void l1_vmx_code(struct vmx_pages *vmx)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+ GUEST_ASSERT_EQ(prepare_for_vmx_operation(vmx), true);
+ GUEST_ASSERT_EQ(load_vmcs(vmx), true);
+
+ prepare_vmcs(vmx, NULL, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ /*
+ * Enable MSR bitmaps (the bitmap itself is allocated, zeroed, and set
+ * in the VMCS by prepare_vmcs()), as MSR exiting mandatory on Intel.
+ */
+ vmwrite(CPU_BASED_VM_EXEC_CONTROL,
+ vmreadz(CPU_BASED_VM_EXEC_CONTROL) | CPU_BASED_USE_MSR_BITMAPS);
+
+ GUEST_ASSERT(!vmwrite(GUEST_RIP, (u64)l2_guest_code));
+ GUEST_ASSERT(!vmlaunch());
+}
+
+static void guest_code(void *nested_test_data)
+{
+ guest_read_aperf_mperf();
+
+ if (this_cpu_has(X86_FEATURE_SVM))
+ l1_svm_code(nested_test_data);
+ else if (this_cpu_has(X86_FEATURE_VMX))
+ l1_vmx_code(nested_test_data);
+ else
+ GUEST_DONE();
+
+ TEST_FAIL("L2 should have signaled 'done'");
+}
+
+static void guest_no_aperfmperf(void)
+{
+ u64 msr_val;
+ u8 vector;
+
+ vector = rdmsr_safe(MSR_IA32_APERF, &msr_val);
+ GUEST_ASSERT(vector == GP_VECTOR);
+
+ vector = rdmsr_safe(MSR_IA32_APERF, &msr_val);
+ GUEST_ASSERT(vector == GP_VECTOR);
+
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ const bool has_nested = kvm_cpu_has(X86_FEATURE_SVM) || kvm_cpu_has(X86_FEATURE_VMX);
+ u64 host_aperf_before, host_mperf_before;
+ gva_t nested_test_data_gva;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ int msr_fd, cpu, i;
+
+ /* Sanity check that APERF/MPERF are unsupported by default. */
+ vm = vm_create_with_one_vcpu(&vcpu, guest_no_aperfmperf);
+ vcpu_run(vcpu);
+ TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
+ kvm_vm_free(vm);
+
+ cpu = pin_self_to_any_cpu();
+
+ msr_fd = open_dev_msr(cpu);
+
+ /*
+ * This test requires a non-standard VM initialization, because
+ * KVM_ENABLE_CAP cannot be used on a VM file descriptor after
+ * a VCPU has been created.
+ */
+ vm = vm_create(1);
+
+ TEST_REQUIRE(vm_check_cap(vm, KVM_CAP_X86_DISABLE_EXITS) &
+ KVM_X86_DISABLE_EXITS_APERFMPERF);
+
+ vm_enable_cap(vm, KVM_CAP_X86_DISABLE_EXITS,
+ KVM_X86_DISABLE_EXITS_APERFMPERF);
+
+ vcpu = vm_vcpu_add(vm, 0, guest_code);
+
+ if (!has_nested)
+ nested_test_data_gva = NONCANONICAL;
+ else if (kvm_cpu_has(X86_FEATURE_SVM))
+ vcpu_alloc_svm(vm, &nested_test_data_gva);
+ else
+ vcpu_alloc_vmx(vm, &nested_test_data_gva);
+
+ vcpu_args_set(vcpu, 1, nested_test_data_gva);
+
+ host_aperf_before = read_dev_msr(msr_fd, MSR_IA32_APERF);
+ host_mperf_before = read_dev_msr(msr_fd, MSR_IA32_MPERF);
+
+ for (i = 0; i <= NUM_ITERATIONS * (1 + has_nested); i++) {
+ u64 host_aperf_after, host_mperf_after;
+ u64 guest_aperf, guest_mperf;
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_DONE:
+ goto done;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ case UCALL_SYNC:
+ guest_aperf = uc.args[0];
+ guest_mperf = uc.args[1];
+
+ host_aperf_after = read_dev_msr(msr_fd, MSR_IA32_APERF);
+ host_mperf_after = read_dev_msr(msr_fd, MSR_IA32_MPERF);
+
+ TEST_ASSERT(host_aperf_before < guest_aperf,
+ "APERF: host_before (0x%" PRIx64 ") >= guest (0x%" PRIx64 ")",
+ host_aperf_before, guest_aperf);
+ TEST_ASSERT(guest_aperf < host_aperf_after,
+ "APERF: guest (0x%" PRIx64 ") >= host_after (0x%" PRIx64 ")",
+ guest_aperf, host_aperf_after);
+ TEST_ASSERT(host_mperf_before < guest_mperf,
+ "MPERF: host_before (0x%" PRIx64 ") >= guest (0x%" PRIx64 ")",
+ host_mperf_before, guest_mperf);
+ TEST_ASSERT(guest_mperf < host_mperf_after,
+ "MPERF: guest (0x%" PRIx64 ") >= host_after (0x%" PRIx64 ")",
+ guest_mperf, host_mperf_after);
+
+ host_aperf_before = host_aperf_after;
+ host_mperf_before = host_mperf_after;
+
+ break;
+ }
+ }
+ TEST_FAIL("Didn't receive UCALL_DONE\n");
+done:
+ kvm_vm_free(vm);
+ close(msr_fd);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/apic_bus_clock_test.c b/tools/testing/selftests/kvm/x86/apic_bus_clock_test.c
index f8916bb34405..0c84c27ea584 100644
--- a/tools/testing/selftests/kvm/x86/apic_bus_clock_test.c
+++ b/tools/testing/selftests/kvm/x86/apic_bus_clock_test.c
@@ -19,8 +19,8 @@
* timer frequency.
*/
static const struct {
- const uint32_t tdcr;
- const uint32_t divide_count;
+ const u32 tdcr;
+ const u32 divide_count;
} tdcrs[] = {
{0x0, 2},
{0x1, 4},
@@ -42,12 +42,12 @@ static void apic_enable(void)
xapic_enable();
}
-static uint32_t apic_read_reg(unsigned int reg)
+static u32 apic_read_reg(unsigned int reg)
{
return is_x2apic ? x2apic_read_reg(reg) : xapic_read_reg(reg);
}
-static void apic_write_reg(unsigned int reg, uint32_t val)
+static void apic_write_reg(unsigned int reg, u32 val)
{
if (is_x2apic)
x2apic_write_reg(reg, val);
@@ -55,12 +55,12 @@ static void apic_write_reg(unsigned int reg, uint32_t val)
xapic_write_reg(reg, val);
}
-static void apic_guest_code(uint64_t apic_hz, uint64_t delay_ms)
+static void apic_guest_code(u64 apic_hz, u64 delay_ms)
{
- uint64_t tsc_hz = guest_tsc_khz * 1000;
- const uint32_t tmict = ~0u;
- uint64_t tsc0, tsc1, freq;
- uint32_t tmcct;
+ u64 tsc_hz = guest_tsc_khz * 1000;
+ const u32 tmict = ~0u;
+ u64 tsc0, tsc1, freq;
+ u32 tmcct;
int i;
apic_enable();
@@ -121,7 +121,7 @@ static void test_apic_bus_clock(struct kvm_vcpu *vcpu)
}
}
-static void run_apic_bus_clock_test(uint64_t apic_hz, uint64_t delay_ms,
+static void run_apic_bus_clock_test(u64 apic_hz, u64 delay_ms,
bool x2apic)
{
struct kvm_vcpu *vcpu;
@@ -137,6 +137,10 @@ static void run_apic_bus_clock_test(uint64_t apic_hz, uint64_t delay_ms,
vm_enable_cap(vm, KVM_CAP_X86_APIC_BUS_CYCLES_NS,
NSEC_PER_SEC / apic_hz);
+ TEST_ASSERT_EQ(kvm_check_cap(KVM_CAP_X86_APIC_BUS_CYCLES_NS), 1);
+ TEST_ASSERT_EQ(vm_check_cap(vm, KVM_CAP_X86_APIC_BUS_CYCLES_NS),
+ NSEC_PER_SEC / apic_hz);
+
vcpu = vm_vcpu_add(vm, 0, apic_guest_code);
vcpu_args_set(vcpu, 2, apic_hz, delay_ms);
@@ -168,8 +172,8 @@ int main(int argc, char *argv[])
* Arbitrarilty default to 25MHz for the APIC bus frequency, which is
* different enough from the default 1GHz to be interesting.
*/
- uint64_t apic_hz = 25 * 1000 * 1000;
- uint64_t delay_ms = 100;
+ u64 apic_hz = 25 * 1000 * 1000;
+ u64 delay_ms = 100;
int opt;
TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_APIC_BUS_CYCLES_NS));
diff --git a/tools/testing/selftests/kvm/x86/cpuid_test.c b/tools/testing/selftests/kvm/x86/cpuid_test.c
index 7b3fda6842bc..ef0ddd240887 100644
--- a/tools/testing/selftests/kvm/x86/cpuid_test.c
+++ b/tools/testing/selftests/kvm/x86/cpuid_test.c
@@ -140,10 +140,10 @@ static void run_vcpu(struct kvm_vcpu *vcpu, int stage)
}
}
-struct kvm_cpuid2 *vcpu_alloc_cpuid(struct kvm_vm *vm, vm_vaddr_t *p_gva, struct kvm_cpuid2 *cpuid)
+struct kvm_cpuid2 *vcpu_alloc_cpuid(struct kvm_vm *vm, gva_t *p_gva, struct kvm_cpuid2 *cpuid)
{
int size = sizeof(*cpuid) + cpuid->nent * sizeof(cpuid->entries[0]);
- vm_vaddr_t gva = vm_vaddr_alloc(vm, size, KVM_UTIL_MIN_VADDR);
+ gva_t gva = vm_alloc(vm, size, KVM_UTIL_MIN_VADDR);
struct kvm_cpuid2 *guest_cpuids = addr_gva2hva(vm, gva);
memcpy(guest_cpuids, cpuid, size);
@@ -155,6 +155,7 @@ struct kvm_cpuid2 *vcpu_alloc_cpuid(struct kvm_vm *vm, vm_vaddr_t *p_gva, struct
static void set_cpuid_after_run(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *ent;
+ struct kvm_sregs sregs;
int rc;
u32 eax, ebx, x;
@@ -162,6 +163,20 @@ static void set_cpuid_after_run(struct kvm_vcpu *vcpu)
rc = __vcpu_set_cpuid(vcpu);
TEST_ASSERT(!rc, "Setting unmodified CPUID after KVM_RUN failed: %d", rc);
+ /*
+ * Toggle CR4 bits that affect dynamic CPUID feature flags to verify
+ * setting unmodified CPUID succeeds with runtime CPUID updates.
+ */
+ vcpu_sregs_get(vcpu, &sregs);
+ if (kvm_cpu_has(X86_FEATURE_XSAVE))
+ sregs.cr4 ^= X86_CR4_OSXSAVE;
+ if (kvm_cpu_has(X86_FEATURE_PKU))
+ sregs.cr4 ^= X86_CR4_PKE;
+ vcpu_sregs_set(vcpu, &sregs);
+
+ rc = __vcpu_set_cpuid(vcpu);
+ TEST_ASSERT(!rc, "Setting unmodified CPUID after KVM_RUN failed: %d", rc);
+
/* Changing CPU features is forbidden */
ent = vcpu_get_cpuid_entry(vcpu, 0x7);
ebx = ent->ebx;
@@ -202,7 +217,7 @@ static void test_get_cpuid2(struct kvm_vcpu *vcpu)
int main(void)
{
struct kvm_vcpu *vcpu;
- vm_vaddr_t cpuid_gva;
+ gva_t cpuid_gva;
struct kvm_vm *vm;
int stage;
diff --git a/tools/testing/selftests/kvm/x86/debug_regs.c b/tools/testing/selftests/kvm/x86/debug_regs.c
index 2d814c1d1dc4..0dfaf03cd0a0 100644
--- a/tools/testing/selftests/kvm/x86/debug_regs.c
+++ b/tools/testing/selftests/kvm/x86/debug_regs.c
@@ -16,7 +16,7 @@
#define IRQ_VECTOR 0xAA
/* For testing data access debug BP */
-uint32_t guest_value;
+u32 guest_value;
extern unsigned char sw_bp, hw_bp, write_data, ss_start, bd_start;
@@ -86,7 +86,7 @@ int main(void)
struct kvm_run *run;
struct kvm_vm *vm;
struct ucall uc;
- uint64_t cmd;
+ u64 cmd;
int i;
/* Instruction lengths starting at ss_start */
int ss_size[6] = {
diff --git a/tools/testing/selftests/kvm/x86/dirty_log_page_splitting_test.c b/tools/testing/selftests/kvm/x86/dirty_log_page_splitting_test.c
index 2929c067c207..388ba4101f97 100644
--- a/tools/testing/selftests/kvm/x86/dirty_log_page_splitting_test.c
+++ b/tools/testing/selftests/kvm/x86/dirty_log_page_splitting_test.c
@@ -23,7 +23,7 @@
#define SLOTS 2
#define ITERATIONS 2
-static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE;
+static u64 guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE;
static enum vm_mem_backing_src_type backing_src = VM_MEM_SRC_ANONYMOUS_HUGETLB;
@@ -33,17 +33,17 @@ static int iteration;
static int vcpu_last_completed_iteration[KVM_MAX_VCPUS];
struct kvm_page_stats {
- uint64_t pages_4k;
- uint64_t pages_2m;
- uint64_t pages_1g;
- uint64_t hugepages;
+ u64 pages_4k;
+ u64 pages_2m;
+ u64 pages_1g;
+ u64 hugepages;
};
static void get_page_stats(struct kvm_vm *vm, struct kvm_page_stats *stats, const char *stage)
{
- stats->pages_4k = vm_get_stat(vm, "pages_4k");
- stats->pages_2m = vm_get_stat(vm, "pages_2m");
- stats->pages_1g = vm_get_stat(vm, "pages_1g");
+ stats->pages_4k = vm_get_stat(vm, pages_4k);
+ stats->pages_2m = vm_get_stat(vm, pages_2m);
+ stats->pages_1g = vm_get_stat(vm, pages_1g);
stats->hugepages = stats->pages_2m + stats->pages_1g;
pr_debug("\nPage stats after %s: 4K: %ld 2M: %ld 1G: %ld huge: %ld\n",
@@ -89,9 +89,9 @@ static void run_test(enum vm_guest_mode mode, void *unused)
{
struct kvm_vm *vm;
unsigned long **bitmaps;
- uint64_t guest_num_pages;
- uint64_t host_num_pages;
- uint64_t pages_per_slot;
+ u64 guest_num_pages;
+ u64 host_num_pages;
+ u64 pages_per_slot;
int i;
struct kvm_page_stats stats_populated;
struct kvm_page_stats stats_dirty_logging_enabled;
diff --git a/tools/testing/selftests/kvm/x86/evmcs_smm_controls_test.c b/tools/testing/selftests/kvm/x86/evmcs_smm_controls_test.c
new file mode 100644
index 000000000000..5b3aef109cfc
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/evmcs_smm_controls_test.c
@@ -0,0 +1,150 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2026, Red Hat, Inc.
+ *
+ * Test that vmx_leave_smm() validates vmcs12 controls before re-entering
+ * nested guest mode on RSM.
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "smm.h"
+#include "hyperv.h"
+#include "vmx.h"
+
+#define SMRAM_GPA 0x1000000
+#define SMRAM_STAGE 0xfe
+
+#define SYNC_PORT 0xe
+
+#define STR(x) #x
+#define XSTR(s) STR(s)
+
+/*
+ * SMI handler: runs in real-address mode.
+ * Reports SMRAM_STAGE via port IO, then does RSM.
+ */
+static u8 smi_handler[] = {
+ 0xb0, SMRAM_STAGE, /* mov $SMRAM_STAGE, %al */
+ 0xe4, SYNC_PORT, /* in $SYNC_PORT, %al */
+ 0x0f, 0xaa, /* rsm */
+};
+
+static inline void sync_with_host(u64 phase)
+{
+ asm volatile("in $" XSTR(SYNC_PORT) ", %%al \n"
+ : "+a" (phase));
+}
+
+static void l2_guest_code(void)
+{
+ sync_with_host(1);
+
+ /* After SMI+RSM with invalid controls, we should not reach here. */
+ vmcall();
+}
+
+static void guest_code(struct vmx_pages *vmx_pages,
+ struct hyperv_test_pages *hv_pages)
+{
+#define L2_GUEST_STACK_SIZE 64
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+ /* Set up Hyper-V enlightenments and eVMCS */
+ wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
+ enable_vp_assist(hv_pages->vp_assist_gpa, hv_pages->vp_assist);
+ evmcs_enable();
+
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+ GUEST_ASSERT(load_evmcs(hv_pages));
+ prepare_vmcs(vmx_pages, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ GUEST_ASSERT(!vmlaunch());
+
+ /* L2 exits via vmcall if test fails */
+ sync_with_host(2);
+}
+
+int main(int argc, char *argv[])
+{
+ gva_t vmx_pages_gva = 0, hv_pages_gva = 0;
+ struct hyperv_test_pages *hv;
+ struct hv_enlightened_vmcs *evmcs;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct kvm_regs regs;
+ int stage_reported;
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE));
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS));
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_SMM));
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ setup_smram(vm, vcpu, SMRAM_GPA, smi_handler, sizeof(smi_handler));
+
+ vcpu_set_hv_cpuid(vcpu);
+ vcpu_enable_evmcs(vcpu);
+ vcpu_alloc_vmx(vm, &vmx_pages_gva);
+ hv = vcpu_alloc_hyperv_test_pages(vm, &hv_pages_gva);
+ vcpu_args_set(vcpu, 2, vmx_pages_gva, hv_pages_gva);
+
+ vcpu_run(vcpu);
+
+ /* L2 is running and syncs with host. */
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+ vcpu_regs_get(vcpu, &regs);
+ stage_reported = regs.rax & 0xff;
+ TEST_ASSERT(stage_reported == 1,
+ "Expected stage 1, got %d", stage_reported);
+
+ /* Inject SMI while L2 is running. */
+ inject_smi(vcpu);
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+ vcpu_regs_get(vcpu, &regs);
+ stage_reported = regs.rax & 0xff;
+ TEST_ASSERT(stage_reported == SMRAM_STAGE,
+ "Expected SMM handler stage %#x, got %#x",
+ SMRAM_STAGE, stage_reported);
+
+ /*
+ * Guest is now paused in the SMI handler, about to execute RSM.
+ * Hack the eVMCS page to set-up invalid pin-based execution
+ * control (PIN_BASED_VIRTUAL_NMIS without PIN_BASED_NMI_EXITING).
+ */
+ evmcs = hv->enlightened_vmcs_hva;
+ evmcs->pin_based_vm_exec_control |= PIN_BASED_VIRTUAL_NMIS;
+ evmcs->hv_clean_fields = 0;
+
+ /*
+ * Trigger copy_enlightened_to_vmcs12() via KVM_GET_NESTED_STATE,
+ * copying the invalid pin_based_vm_exec_control into cached_vmcs12.
+ */
+ union {
+ struct kvm_nested_state state;
+ char state_[16384];
+ } nested_state_buf;
+
+ memset(&nested_state_buf, 0, sizeof(nested_state_buf));
+ nested_state_buf.state.size = sizeof(nested_state_buf);
+ vcpu_nested_state_get(vcpu, &nested_state_buf.state);
+
+ /*
+ * Resume the guest. The SMI handler executes RSM, which calls
+ * vmx_leave_smm(). nested_vmx_check_controls() should detect
+ * VIRTUAL_NMIS without NMI_EXITING and cause a triple fault.
+ */
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_SHUTDOWN);
+
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/fastops_test.c b/tools/testing/selftests/kvm/x86/fastops_test.c
new file mode 100644
index 000000000000..c0d30ccd8767
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/fastops_test.c
@@ -0,0 +1,209 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+/*
+ * Execute a fastop() instruction, with or without forced emulation. BT bit 0
+ * to set RFLAGS.CF based on whether or not the input is even or odd, so that
+ * instructions like ADC and SBB are deterministic.
+ */
+#define fastop(__insn) \
+ "bt $0, %[bt_val]\n\t" \
+ __insn "\n\t" \
+ "pushfq\n\t" \
+ "pop %[flags]\n\t"
+
+#define flags_constraint(flags_val) [flags]"=r"(flags_val)
+#define bt_constraint(__bt_val) [bt_val]"rm"((u32)__bt_val)
+
+#define guest_execute_fastop_1(FEP, insn, __val, __flags) \
+({ \
+ __asm__ __volatile__(fastop(FEP insn " %[val]") \
+ : [val]"+r"(__val), flags_constraint(__flags) \
+ : bt_constraint(__val) \
+ : "cc", "memory"); \
+})
+
+#define guest_test_fastop_1(insn, type_t, __val) \
+({ \
+ type_t val = __val, ex_val = __val, input = __val; \
+ u64 flags, ex_flags; \
+ \
+ guest_execute_fastop_1("", insn, ex_val, ex_flags); \
+ guest_execute_fastop_1(KVM_FEP, insn, val, flags); \
+ \
+ __GUEST_ASSERT(val == ex_val, \
+ "Wanted 0x%lx for '%s 0x%lx', got 0x%lx", \
+ (u64)ex_val, insn, (u64)input, (u64)val); \
+ __GUEST_ASSERT(flags == ex_flags, \
+ "Wanted flags 0x%lx for '%s 0x%lx', got 0x%lx", \
+ ex_flags, insn, (u64)input, flags); \
+})
+
+#define guest_execute_fastop_2(FEP, insn, __input, __output, __flags) \
+({ \
+ __asm__ __volatile__(fastop(FEP insn " %[input], %[output]") \
+ : [output]"+r"(__output), flags_constraint(__flags) \
+ : [input]"r"(__input), bt_constraint(__output) \
+ : "cc", "memory"); \
+})
+
+#define guest_test_fastop_2(insn, type_t, __val1, __val2) \
+({ \
+ type_t input = __val1, input2 = __val2, output = __val2, ex_output = __val2; \
+ u64 flags, ex_flags; \
+ \
+ guest_execute_fastop_2("", insn, input, ex_output, ex_flags); \
+ guest_execute_fastop_2(KVM_FEP, insn, input, output, flags); \
+ \
+ __GUEST_ASSERT(output == ex_output, \
+ "Wanted 0x%lx for '%s 0x%lx 0x%lx', got 0x%lx", \
+ (u64)ex_output, insn, (u64)input, \
+ (u64)input2, (u64)output); \
+ __GUEST_ASSERT(flags == ex_flags, \
+ "Wanted flags 0x%lx for '%s 0x%lx, 0x%lx', got 0x%lx", \
+ ex_flags, insn, (u64)input, (u64)input2, flags); \
+})
+
+#define guest_execute_fastop_cl(FEP, insn, __shift, __output, __flags) \
+({ \
+ __asm__ __volatile__(fastop(FEP insn " %%cl, %[output]") \
+ : [output]"+r"(__output), flags_constraint(__flags) \
+ : "c"(__shift), bt_constraint(__output) \
+ : "cc", "memory"); \
+})
+
+#define guest_test_fastop_cl(insn, type_t, __val1, __val2) \
+({ \
+ type_t output = __val2, ex_output = __val2, input = __val2; \
+ u8 shift = __val1; \
+ u64 flags, ex_flags; \
+ \
+ guest_execute_fastop_cl("", insn, shift, ex_output, ex_flags); \
+ guest_execute_fastop_cl(KVM_FEP, insn, shift, output, flags); \
+ \
+ __GUEST_ASSERT(output == ex_output, \
+ "Wanted 0x%lx for '%s 0x%x, 0x%lx', got 0x%lx", \
+ (u64)ex_output, insn, shift, (u64)input, \
+ (u64)output); \
+ __GUEST_ASSERT(flags == ex_flags, \
+ "Wanted flags 0x%lx for '%s 0x%x, 0x%lx', got 0x%lx", \
+ ex_flags, insn, shift, (u64)input, flags); \
+})
+
+#define guest_execute_fastop_div(__KVM_ASM_SAFE, insn, __a, __d, __rm, __flags) \
+({ \
+ u64 ign_error_code; \
+ u8 vector; \
+ \
+ __asm__ __volatile__(fastop(__KVM_ASM_SAFE(insn " %[denom]")) \
+ : "+a"(__a), "+d"(__d), flags_constraint(__flags), \
+ KVM_ASM_SAFE_OUTPUTS(vector, ign_error_code) \
+ : [denom]"rm"(__rm), bt_constraint(__rm) \
+ : "cc", "memory", KVM_ASM_SAFE_CLOBBERS); \
+ vector; \
+})
+
+#define guest_test_fastop_div(insn, type_t, __val1, __val2) \
+({ \
+ type_t _a = __val1, _d = __val1, rm = __val2; \
+ type_t a = _a, d = _d, ex_a = _a, ex_d = _d; \
+ u64 flags, ex_flags; \
+ u8 v, ex_v; \
+ \
+ ex_v = guest_execute_fastop_div(KVM_ASM_SAFE, insn, ex_a, ex_d, rm, ex_flags); \
+ v = guest_execute_fastop_div(KVM_ASM_SAFE_FEP, insn, a, d, rm, flags); \
+ \
+ GUEST_ASSERT_EQ(v, ex_v); \
+ __GUEST_ASSERT(v == ex_v, \
+ "Wanted vector 0x%x for '%s 0x%lx:0x%lx/0x%lx', got 0x%x", \
+ ex_v, insn, (u64)_a, (u64)_d, (u64)rm, v); \
+ __GUEST_ASSERT(a == ex_a && d == ex_d, \
+ "Wanted 0x%lx:0x%lx for '%s 0x%lx:0x%lx/0x%lx', got 0x%lx:0x%lx",\
+ (u64)ex_a, (u64)ex_d, insn, (u64)_a, \
+ (u64)_d, (u64)rm, (u64)a, (u64)d); \
+ __GUEST_ASSERT(v || ex_v || (flags == ex_flags), \
+ "Wanted flags 0x%lx for '%s 0x%lx:0x%lx/0x%lx', got 0x%lx", \
+ ex_flags, insn, (u64)_a, (u64)_d, (u64)rm, flags);\
+})
+
+static const u64 vals[] = {
+ 0,
+ 1,
+ 2,
+ 4,
+ 7,
+ 0x5555555555555555,
+ 0xaaaaaaaaaaaaaaaa,
+ 0xfefefefefefefefe,
+ 0xffffffffffffffff,
+};
+
+#define guest_test_fastops(type_t, suffix) \
+do { \
+ int i, j; \
+ \
+ for (i = 0; i < ARRAY_SIZE(vals); i++) { \
+ guest_test_fastop_1("dec" suffix, type_t, vals[i]); \
+ guest_test_fastop_1("inc" suffix, type_t, vals[i]); \
+ guest_test_fastop_1("neg" suffix, type_t, vals[i]); \
+ guest_test_fastop_1("not" suffix, type_t, vals[i]); \
+ \
+ for (j = 0; j < ARRAY_SIZE(vals); j++) { \
+ guest_test_fastop_2("add" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("adc" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("and" suffix, type_t, vals[i], vals[j]); \
+if (sizeof(type_t) != 1) { \
+ guest_test_fastop_2("bsf" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("bsr" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("bt" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("btc" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("btr" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("bts" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("imul" suffix, type_t, vals[i], vals[j]); \
+} \
+ guest_test_fastop_2("cmp" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("or" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("sbb" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("sub" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("test" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("xor" suffix, type_t, vals[i], vals[j]); \
+ \
+ guest_test_fastop_cl("rol" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_cl("ror" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_cl("rcl" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_cl("rcr" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_cl("sar" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_cl("shl" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_cl("shr" suffix, type_t, vals[i], vals[j]); \
+ \
+ guest_test_fastop_div("div" suffix, type_t, vals[i], vals[j]); \
+ } \
+ } \
+} while (0)
+
+static void guest_code(void)
+{
+ guest_test_fastops(u8, "b");
+ guest_test_fastops(u16, "w");
+ guest_test_fastops(u32, "l");
+ guest_test_fastops(u64, "q");
+
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ TEST_REQUIRE(is_forced_emulation_enabled);
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
+
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86/feature_msrs_test.c b/tools/testing/selftests/kvm/x86/feature_msrs_test.c
index a72f13ae2edb..158550701771 100644
--- a/tools/testing/selftests/kvm/x86/feature_msrs_test.c
+++ b/tools/testing/selftests/kvm/x86/feature_msrs_test.c
@@ -12,7 +12,7 @@
#include "kvm_util.h"
#include "processor.h"
-static bool is_kvm_controlled_msr(uint32_t msr)
+static bool is_kvm_controlled_msr(u32 msr)
{
return msr == MSR_IA32_VMX_CR0_FIXED1 || msr == MSR_IA32_VMX_CR4_FIXED1;
}
@@ -21,7 +21,7 @@ static bool is_kvm_controlled_msr(uint32_t msr)
* For VMX MSRs with a "true" variant, KVM requires userspace to set the "true"
* MSR, and doesn't allow setting the hidden version.
*/
-static bool is_hidden_vmx_msr(uint32_t msr)
+static bool is_hidden_vmx_msr(u32 msr)
{
switch (msr) {
case MSR_IA32_VMX_PINBASED_CTLS:
@@ -34,15 +34,15 @@ static bool is_hidden_vmx_msr(uint32_t msr)
}
}
-static bool is_quirked_msr(uint32_t msr)
+static bool is_quirked_msr(u32 msr)
{
return msr != MSR_AMD64_DE_CFG;
}
-static void test_feature_msr(uint32_t msr)
+static void test_feature_msr(u32 msr)
{
- const uint64_t supported_mask = kvm_get_feature_msr(msr);
- uint64_t reset_value = is_quirked_msr(msr) ? supported_mask : 0;
+ const u64 supported_mask = kvm_get_feature_msr(msr);
+ u64 reset_value = is_quirked_msr(msr) ? supported_mask : 0;
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
diff --git a/tools/testing/selftests/kvm/x86/fix_hypercall_test.c b/tools/testing/selftests/kvm/x86/fix_hypercall_test.c
index 762628f7d4ba..753a0e730ea8 100644
--- a/tools/testing/selftests/kvm/x86/fix_hypercall_test.c
+++ b/tools/testing/selftests/kvm/x86/fix_hypercall_test.c
@@ -26,18 +26,18 @@ static void guest_ud_handler(struct ex_regs *regs)
regs->rip += HYPERCALL_INSN_SIZE;
}
-static const uint8_t vmx_vmcall[HYPERCALL_INSN_SIZE] = { 0x0f, 0x01, 0xc1 };
-static const uint8_t svm_vmmcall[HYPERCALL_INSN_SIZE] = { 0x0f, 0x01, 0xd9 };
+static const u8 vmx_vmcall[HYPERCALL_INSN_SIZE] = { 0x0f, 0x01, 0xc1 };
+static const u8 svm_vmmcall[HYPERCALL_INSN_SIZE] = { 0x0f, 0x01, 0xd9 };
-extern uint8_t hypercall_insn[HYPERCALL_INSN_SIZE];
-static uint64_t do_sched_yield(uint8_t apic_id)
+extern u8 hypercall_insn[HYPERCALL_INSN_SIZE];
+static u64 do_sched_yield(u8 apic_id)
{
- uint64_t ret;
+ u64 ret;
asm volatile("hypercall_insn:\n\t"
".byte 0xcc,0xcc,0xcc\n\t"
: "=a"(ret)
- : "a"((uint64_t)KVM_HC_SCHED_YIELD), "b"((uint64_t)apic_id)
+ : "a"((u64)KVM_HC_SCHED_YIELD), "b"((u64)apic_id)
: "memory");
return ret;
@@ -45,14 +45,14 @@ static uint64_t do_sched_yield(uint8_t apic_id)
static void guest_main(void)
{
- const uint8_t *native_hypercall_insn;
- const uint8_t *other_hypercall_insn;
- uint64_t ret;
+ const u8 *native_hypercall_insn;
+ const u8 *other_hypercall_insn;
+ u64 ret;
if (host_cpu_is_intel) {
native_hypercall_insn = vmx_vmcall;
other_hypercall_insn = svm_vmmcall;
- } else if (host_cpu_is_amd) {
+ } else if (host_cpu_is_amd_compatible) {
native_hypercall_insn = svm_vmmcall;
other_hypercall_insn = vmx_vmcall;
} else {
@@ -72,7 +72,7 @@ static void guest_main(void)
* the "right" hypercall.
*/
if (quirk_disabled) {
- GUEST_ASSERT(ret == (uint64_t)-EFAULT);
+ GUEST_ASSERT(ret == (u64)-EFAULT);
GUEST_ASSERT(!memcmp(other_hypercall_insn, hypercall_insn,
HYPERCALL_INSN_SIZE));
} else {
diff --git a/tools/testing/selftests/kvm/x86/flds_emulation.h b/tools/testing/selftests/kvm/x86/flds_emulation.h
index 37b1a9f52864..fd6b6c67199a 100644
--- a/tools/testing/selftests/kvm/x86/flds_emulation.h
+++ b/tools/testing/selftests/kvm/x86/flds_emulation.h
@@ -12,7 +12,7 @@
* KVM to emulate the instruction (e.g. by providing an MMIO address) to
* exercise emulation failures.
*/
-static inline void flds(uint64_t address)
+static inline void flds(u64 address)
{
__asm__ __volatile__(FLDS_MEM_EAX :: "a"(address));
}
@@ -21,8 +21,8 @@ static inline void handle_flds_emulation_failure_exit(struct kvm_vcpu *vcpu)
{
struct kvm_run *run = vcpu->run;
struct kvm_regs regs;
- uint8_t *insn_bytes;
- uint64_t flags;
+ u8 *insn_bytes;
+ u64 flags;
TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_INTERNAL_ERROR);
diff --git a/tools/testing/selftests/kvm/x86/hwcr_msr_test.c b/tools/testing/selftests/kvm/x86/hwcr_msr_test.c
index 10b1b0ba374e..8e20a03b3329 100644
--- a/tools/testing/selftests/kvm/x86/hwcr_msr_test.c
+++ b/tools/testing/selftests/kvm/x86/hwcr_msr_test.c
@@ -10,11 +10,11 @@
void test_hwcr_bit(struct kvm_vcpu *vcpu, unsigned int bit)
{
- const uint64_t ignored = BIT_ULL(3) | BIT_ULL(6) | BIT_ULL(8);
- const uint64_t valid = BIT_ULL(18) | BIT_ULL(24);
- const uint64_t legal = ignored | valid;
- uint64_t val = BIT_ULL(bit);
- uint64_t actual;
+ const u64 ignored = BIT_ULL(3) | BIT_ULL(6) | BIT_ULL(8);
+ const u64 valid = BIT_ULL(18) | BIT_ULL(24);
+ const u64 legal = ignored | valid;
+ u64 val = BIT_ULL(bit);
+ u64 actual;
int r;
r = _vcpu_set_msr(vcpu, MSR_K7_HWCR, val);
diff --git a/tools/testing/selftests/kvm/x86/hyperv_clock.c b/tools/testing/selftests/kvm/x86/hyperv_clock.c
index e058bc676cd6..c083cea546dc 100644
--- a/tools/testing/selftests/kvm/x86/hyperv_clock.c
+++ b/tools/testing/selftests/kvm/x86/hyperv_clock.c
@@ -98,7 +98,7 @@ static inline void check_tsc_msr_tsc_page(struct ms_hyperv_tsc_page *tsc_page)
GUEST_ASSERT(r2 >= t1 && r2 - t2 < 100000);
}
-static void guest_main(struct ms_hyperv_tsc_page *tsc_page, vm_paddr_t tsc_page_gpa)
+static void guest_main(struct ms_hyperv_tsc_page *tsc_page, gpa_t tsc_page_gpa)
{
u64 tsc_scale, tsc_offset;
@@ -208,7 +208,7 @@ int main(void)
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
struct ucall uc;
- vm_vaddr_t tsc_page_gva;
+ gva_t tsc_page_gva;
int stage;
TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_TIME));
@@ -218,7 +218,7 @@ int main(void)
vcpu_set_hv_cpuid(vcpu);
- tsc_page_gva = vm_vaddr_alloc_page(vm);
+ tsc_page_gva = vm_alloc_page(vm);
memset(addr_gva2hva(vm, tsc_page_gva), 0x0, getpagesize());
TEST_ASSERT((addr_gva2gpa(vm, tsc_page_gva) & (getpagesize() - 1)) == 0,
"TSC page has to be page aligned");
diff --git a/tools/testing/selftests/kvm/x86/hyperv_cpuid.c b/tools/testing/selftests/kvm/x86/hyperv_cpuid.c
index 4e920705681a..3c21af811d8f 100644
--- a/tools/testing/selftests/kvm/x86/hyperv_cpuid.c
+++ b/tools/testing/selftests/kvm/x86/hyperv_cpuid.c
@@ -22,25 +22,6 @@ static void guest_code(void)
{
}
-static bool smt_possible(void)
-{
- char buf[16];
- FILE *f;
- bool res = true;
-
- f = fopen("/sys/devices/system/cpu/smt/control", "r");
- if (f) {
- if (fread(buf, sizeof(*buf), sizeof(buf), f) > 0) {
- if (!strncmp(buf, "forceoff", 8) ||
- !strncmp(buf, "notsupported", 12))
- res = false;
- }
- fclose(f);
- }
-
- return res;
-}
-
static void test_hv_cpuid(struct kvm_vcpu *vcpu, bool evmcs_expected)
{
const bool has_irqchip = !vcpu || vcpu->vm->has_irqchip;
@@ -64,7 +45,7 @@ static void test_hv_cpuid(struct kvm_vcpu *vcpu, bool evmcs_expected)
TEST_ASSERT((entry->function >= 0x40000000) &&
(entry->function <= 0x40000082),
- "function %x is our of supported range",
+ "function %x is out of supported range",
entry->function);
TEST_ASSERT(entry->index == 0,
@@ -93,7 +74,7 @@ static void test_hv_cpuid(struct kvm_vcpu *vcpu, bool evmcs_expected)
case 0x40000004:
test_val = entry->eax & (1UL << 18);
- TEST_ASSERT(!!test_val == !smt_possible(),
+ TEST_ASSERT(!!test_val == !is_smt_possible(),
"NoNonArchitecturalCoreSharing bit"
" doesn't reflect SMT setting");
diff --git a/tools/testing/selftests/kvm/x86/hyperv_evmcs.c b/tools/testing/selftests/kvm/x86/hyperv_evmcs.c
index 74cf19661309..c7fa114aee20 100644
--- a/tools/testing/selftests/kvm/x86/hyperv_evmcs.c
+++ b/tools/testing/selftests/kvm/x86/hyperv_evmcs.c
@@ -30,7 +30,7 @@ static void guest_nmi_handler(struct ex_regs *regs)
{
}
-static inline void rdmsr_from_l2(uint32_t msr)
+static inline void rdmsr_from_l2(u32 msr)
{
/* Currently, L1 doesn't preserve GPRs during vmexits. */
__asm__ __volatile__ ("rdmsr" : : "c"(msr) :
@@ -76,7 +76,7 @@ void l2_guest_code(void)
}
void guest_code(struct vmx_pages *vmx_pages, struct hyperv_test_pages *hv_pages,
- vm_vaddr_t hv_hcall_page_gpa)
+ gpa_t hv_hcall_page_gpa)
{
#define L2_GUEST_STACK_SIZE 64
unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
@@ -231,8 +231,8 @@ static struct kvm_vcpu *save_restore_vm(struct kvm_vm *vm,
int main(int argc, char *argv[])
{
- vm_vaddr_t vmx_pages_gva = 0, hv_pages_gva = 0;
- vm_vaddr_t hcall_page;
+ gva_t vmx_pages_gva = 0, hv_pages_gva = 0;
+ gva_t hcall_page;
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
@@ -246,7 +246,7 @@ int main(int argc, char *argv[])
vm = vm_create_with_one_vcpu(&vcpu, guest_code);
- hcall_page = vm_vaddr_alloc_pages(vm, 1);
+ hcall_page = vm_alloc_pages(vm, 1);
memset(addr_gva2hva(vm, hcall_page), 0x0, getpagesize());
vcpu_set_hv_cpuid(vcpu);
diff --git a/tools/testing/selftests/kvm/x86/hyperv_extended_hypercalls.c b/tools/testing/selftests/kvm/x86/hyperv_extended_hypercalls.c
index 949e08e98f31..ae047db7b1be 100644
--- a/tools/testing/selftests/kvm/x86/hyperv_extended_hypercalls.c
+++ b/tools/testing/selftests/kvm/x86/hyperv_extended_hypercalls.c
@@ -15,19 +15,19 @@
/* Any value is fine */
#define EXT_CAPABILITIES 0xbull
-static void guest_code(vm_paddr_t in_pg_gpa, vm_paddr_t out_pg_gpa,
- vm_vaddr_t out_pg_gva)
+static void guest_code(gpa_t in_pg_gpa, gpa_t out_pg_gpa,
+ gva_t out_pg_gva)
{
- uint64_t *output_gva;
+ u64 *output_gva;
wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
wrmsr(HV_X64_MSR_HYPERCALL, in_pg_gpa);
- output_gva = (uint64_t *)out_pg_gva;
+ output_gva = (u64 *)out_pg_gva;
hyperv_hypercall(HV_EXT_CALL_QUERY_CAPABILITIES, in_pg_gpa, out_pg_gpa);
- /* TLFS states output will be a uint64_t value */
+ /* TLFS states output will be a u64 value */
GUEST_ASSERT_EQ(*output_gva, EXT_CAPABILITIES);
GUEST_DONE();
@@ -35,12 +35,12 @@ static void guest_code(vm_paddr_t in_pg_gpa, vm_paddr_t out_pg_gpa,
int main(void)
{
- vm_vaddr_t hcall_out_page;
- vm_vaddr_t hcall_in_page;
+ gva_t hcall_out_page;
+ gva_t hcall_in_page;
struct kvm_vcpu *vcpu;
struct kvm_run *run;
struct kvm_vm *vm;
- uint64_t *outval;
+ u64 *outval;
struct ucall uc;
TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_CPUID));
@@ -57,11 +57,11 @@ int main(void)
vcpu_set_hv_cpuid(vcpu);
/* Hypercall input */
- hcall_in_page = vm_vaddr_alloc_pages(vm, 1);
+ hcall_in_page = vm_alloc_pages(vm, 1);
memset(addr_gva2hva(vm, hcall_in_page), 0x0, vm->page_size);
/* Hypercall output */
- hcall_out_page = vm_vaddr_alloc_pages(vm, 1);
+ hcall_out_page = vm_alloc_pages(vm, 1);
memset(addr_gva2hva(vm, hcall_out_page), 0x0, vm->page_size);
vcpu_args_set(vcpu, 3, addr_gva2gpa(vm, hcall_in_page),
diff --git a/tools/testing/selftests/kvm/x86/hyperv_features.c b/tools/testing/selftests/kvm/x86/hyperv_features.c
index 068e9c69710d..7347f1fe5157 100644
--- a/tools/testing/selftests/kvm/x86/hyperv_features.c
+++ b/tools/testing/selftests/kvm/x86/hyperv_features.c
@@ -22,27 +22,27 @@
KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EBX, 0)
struct msr_data {
- uint32_t idx;
+ u32 idx;
bool fault_expected;
bool write;
u64 write_val;
};
struct hcall_data {
- uint64_t control;
- uint64_t expect;
+ u64 control;
+ u64 expect;
bool ud_expected;
};
-static bool is_write_only_msr(uint32_t msr)
+static bool is_write_only_msr(u32 msr)
{
return msr == HV_X64_MSR_EOI;
}
static void guest_msr(struct msr_data *msr)
{
- uint8_t vector = 0;
- uint64_t msr_val = 0;
+ u8 vector = 0;
+ u64 msr_val = 0;
GUEST_ASSERT(msr->idx);
@@ -54,12 +54,12 @@ static void guest_msr(struct msr_data *msr)
if (msr->fault_expected)
__GUEST_ASSERT(vector == GP_VECTOR,
- "Expected #GP on %sMSR(0x%x), got vector '0x%x'",
- msr->write ? "WR" : "RD", msr->idx, vector);
+ "Expected #GP on %sMSR(0x%x), got %s",
+ msr->write ? "WR" : "RD", msr->idx, ex_str(vector));
else
__GUEST_ASSERT(!vector,
- "Expected success on %sMSR(0x%x), got vector '0x%x'",
- msr->write ? "WR" : "RD", msr->idx, vector);
+ "Expected success on %sMSR(0x%x), got %s",
+ msr->write ? "WR" : "RD", msr->idx, ex_str(vector));
if (vector || is_write_only_msr(msr->idx))
goto done;
@@ -82,10 +82,10 @@ done:
GUEST_DONE();
}
-static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall)
+static void guest_hcall(gpa_t pgs_gpa, struct hcall_data *hcall)
{
u64 res, input, output;
- uint8_t vector;
+ u8 vector;
GUEST_ASSERT_NE(hcall->control, 0);
@@ -94,7 +94,7 @@ static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall)
if (!(hcall->control & HV_HYPERCALL_FAST_BIT)) {
input = pgs_gpa;
- output = pgs_gpa + 4096;
+ output = pgs_gpa + PAGE_SIZE;
} else {
input = output = 0;
}
@@ -102,12 +102,12 @@ static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall)
vector = __hyperv_hypercall(hcall->control, input, output, &res);
if (hcall->ud_expected) {
__GUEST_ASSERT(vector == UD_VECTOR,
- "Expected #UD for control '%lu', got vector '0x%x'",
- hcall->control, vector);
+ "Expected #UD for control '%lu', got %s",
+ hcall->control, ex_str(vector));
} else {
__GUEST_ASSERT(!vector,
- "Expected no exception for control '%lu', got vector '0x%x'",
- hcall->control, vector);
+ "Expected no exception for control '%lu', got %s",
+ hcall->control, ex_str(vector));
GUEST_ASSERT_EQ(res, hcall->expect);
}
@@ -134,14 +134,14 @@ static void guest_test_msrs_access(void)
struct kvm_vm *vm;
struct ucall uc;
int stage = 0;
- vm_vaddr_t msr_gva;
+ gva_t msr_gva;
struct msr_data *msr;
bool has_invtsc = kvm_cpu_has(X86_FEATURE_INVTSC);
while (true) {
vm = vm_create_with_one_vcpu(&vcpu, guest_msr);
- msr_gva = vm_vaddr_alloc_page(vm);
+ msr_gva = vm_alloc_page(vm);
memset(addr_gva2hva(vm, msr_gva), 0x0, getpagesize());
msr = addr_gva2hva(vm, msr_gva);
@@ -523,17 +523,17 @@ static void guest_test_hcalls_access(void)
struct kvm_vm *vm;
struct ucall uc;
int stage = 0;
- vm_vaddr_t hcall_page, hcall_params;
+ gva_t hcall_page, hcall_params;
struct hcall_data *hcall;
while (true) {
vm = vm_create_with_one_vcpu(&vcpu, guest_hcall);
/* Hypercall input/output */
- hcall_page = vm_vaddr_alloc_pages(vm, 2);
+ hcall_page = vm_alloc_pages(vm, 2);
memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize());
- hcall_params = vm_vaddr_alloc_page(vm);
+ hcall_params = vm_alloc_page(vm);
memset(addr_gva2hva(vm, hcall_params), 0x0, getpagesize());
hcall = addr_gva2hva(vm, hcall_params);
diff --git a/tools/testing/selftests/kvm/x86/hyperv_ipi.c b/tools/testing/selftests/kvm/x86/hyperv_ipi.c
index 22c0c124582f..771535f9aad3 100644
--- a/tools/testing/selftests/kvm/x86/hyperv_ipi.c
+++ b/tools/testing/selftests/kvm/x86/hyperv_ipi.c
@@ -18,7 +18,7 @@
#define IPI_VECTOR 0xfe
-static volatile uint64_t ipis_rcvd[RECEIVER_VCPU_ID_2 + 1];
+static volatile u64 ipis_rcvd[RECEIVER_VCPU_ID_2 + 1];
struct hv_vpset {
u64 format;
@@ -45,13 +45,13 @@ struct hv_send_ipi_ex {
struct hv_vpset vp_set;
};
-static inline void hv_init(vm_vaddr_t pgs_gpa)
+static inline void hv_init(gpa_t pgs_gpa)
{
wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
wrmsr(HV_X64_MSR_HYPERCALL, pgs_gpa);
}
-static void receiver_code(void *hcall_page, vm_vaddr_t pgs_gpa)
+static void receiver_code(void *hcall_page, gpa_t pgs_gpa)
{
u32 vcpu_id;
@@ -63,8 +63,10 @@ static void receiver_code(void *hcall_page, vm_vaddr_t pgs_gpa)
/* Signal sender vCPU we're ready */
ipis_rcvd[vcpu_id] = (u64)-1;
- for (;;)
- asm volatile("sti; hlt; cli");
+ for (;;) {
+ safe_halt();
+ cli();
+ }
}
static void guest_ipi_handler(struct ex_regs *regs)
@@ -83,7 +85,7 @@ static inline void nop_loop(void)
asm volatile("nop");
}
-static void sender_guest_code(void *hcall_page, vm_vaddr_t pgs_gpa)
+static void sender_guest_code(void *hcall_page, gpa_t pgs_gpa)
{
struct hv_send_ipi *ipi = (struct hv_send_ipi *)hcall_page;
struct hv_send_ipi_ex *ipi_ex = (struct hv_send_ipi_ex *)hcall_page;
@@ -100,7 +102,7 @@ static void sender_guest_code(void *hcall_page, vm_vaddr_t pgs_gpa)
/* 'Slow' HvCallSendSyntheticClusterIpi to RECEIVER_VCPU_ID_1 */
ipi->vector = IPI_VECTOR;
ipi->cpu_mask = 1 << RECEIVER_VCPU_ID_1;
- hyperv_hypercall(HVCALL_SEND_IPI, pgs_gpa, pgs_gpa + 4096);
+ hyperv_hypercall(HVCALL_SEND_IPI, pgs_gpa, pgs_gpa + PAGE_SIZE);
nop_loop();
GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]);
@@ -114,13 +116,13 @@ static void sender_guest_code(void *hcall_page, vm_vaddr_t pgs_gpa)
GUEST_SYNC(stage++);
/* 'Slow' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_1 */
- memset(hcall_page, 0, 4096);
+ memset(hcall_page, 0, PAGE_SIZE);
ipi_ex->vector = IPI_VECTOR;
ipi_ex->vp_set.format = HV_GENERIC_SET_SPARSE_4K;
ipi_ex->vp_set.valid_bank_mask = 1 << 0;
ipi_ex->vp_set.bank_contents[0] = BIT(RECEIVER_VCPU_ID_1);
hyperv_hypercall(HVCALL_SEND_IPI_EX | (1 << HV_HYPERCALL_VARHEAD_OFFSET),
- pgs_gpa, pgs_gpa + 4096);
+ pgs_gpa, pgs_gpa + PAGE_SIZE);
nop_loop();
GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]);
@@ -136,13 +138,13 @@ static void sender_guest_code(void *hcall_page, vm_vaddr_t pgs_gpa)
GUEST_SYNC(stage++);
/* 'Slow' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_2 */
- memset(hcall_page, 0, 4096);
+ memset(hcall_page, 0, PAGE_SIZE);
ipi_ex->vector = IPI_VECTOR;
ipi_ex->vp_set.format = HV_GENERIC_SET_SPARSE_4K;
ipi_ex->vp_set.valid_bank_mask = 1 << 1;
ipi_ex->vp_set.bank_contents[0] = BIT(RECEIVER_VCPU_ID_2 - 64);
hyperv_hypercall(HVCALL_SEND_IPI_EX | (1 << HV_HYPERCALL_VARHEAD_OFFSET),
- pgs_gpa, pgs_gpa + 4096);
+ pgs_gpa, pgs_gpa + PAGE_SIZE);
nop_loop();
GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ipis_expected[0]);
GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]);
@@ -158,14 +160,14 @@ static void sender_guest_code(void *hcall_page, vm_vaddr_t pgs_gpa)
GUEST_SYNC(stage++);
/* 'Slow' HvCallSendSyntheticClusterIpiEx to both RECEIVER_VCPU_ID_{1,2} */
- memset(hcall_page, 0, 4096);
+ memset(hcall_page, 0, PAGE_SIZE);
ipi_ex->vector = IPI_VECTOR;
ipi_ex->vp_set.format = HV_GENERIC_SET_SPARSE_4K;
ipi_ex->vp_set.valid_bank_mask = 1 << 1 | 1;
ipi_ex->vp_set.bank_contents[0] = BIT(RECEIVER_VCPU_ID_1);
ipi_ex->vp_set.bank_contents[1] = BIT(RECEIVER_VCPU_ID_2 - 64);
hyperv_hypercall(HVCALL_SEND_IPI_EX | (2 << HV_HYPERCALL_VARHEAD_OFFSET),
- pgs_gpa, pgs_gpa + 4096);
+ pgs_gpa, pgs_gpa + PAGE_SIZE);
nop_loop();
GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]);
@@ -181,10 +183,10 @@ static void sender_guest_code(void *hcall_page, vm_vaddr_t pgs_gpa)
GUEST_SYNC(stage++);
/* 'Slow' HvCallSendSyntheticClusterIpiEx to HV_GENERIC_SET_ALL */
- memset(hcall_page, 0, 4096);
+ memset(hcall_page, 0, PAGE_SIZE);
ipi_ex->vector = IPI_VECTOR;
ipi_ex->vp_set.format = HV_GENERIC_SET_ALL;
- hyperv_hypercall(HVCALL_SEND_IPI_EX, pgs_gpa, pgs_gpa + 4096);
+ hyperv_hypercall(HVCALL_SEND_IPI_EX, pgs_gpa, pgs_gpa + PAGE_SIZE);
nop_loop();
GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]);
@@ -241,7 +243,7 @@ int main(int argc, char *argv[])
{
struct kvm_vm *vm;
struct kvm_vcpu *vcpu[3];
- vm_vaddr_t hcall_page;
+ gva_t hcall_page;
pthread_t threads[2];
int stage = 1, r;
struct ucall uc;
@@ -251,7 +253,7 @@ int main(int argc, char *argv[])
vm = vm_create_with_one_vcpu(&vcpu[0], sender_guest_code);
/* Hypercall input/output */
- hcall_page = vm_vaddr_alloc_pages(vm, 2);
+ hcall_page = vm_alloc_pages(vm, 2);
memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize());
diff --git a/tools/testing/selftests/kvm/x86/hyperv_svm_test.c b/tools/testing/selftests/kvm/x86/hyperv_svm_test.c
index 0ddb63229bcb..7a62f6a9d606 100644
--- a/tools/testing/selftests/kvm/x86/hyperv_svm_test.c
+++ b/tools/testing/selftests/kvm/x86/hyperv_svm_test.c
@@ -21,7 +21,7 @@
#define L2_GUEST_STACK_SIZE 256
/* Exit to L1 from L2 with RDMSR instruction */
-static inline void rdmsr_from_l2(uint32_t msr)
+static inline void rdmsr_from_l2(u32 msr)
{
/* Currently, L1 doesn't preserve GPRs during vmexits. */
__asm__ __volatile__ ("rdmsr" : : "c"(msr) :
@@ -67,7 +67,7 @@ void l2_guest_code(void)
static void __attribute__((__flatten__)) guest_code(struct svm_test_data *svm,
struct hyperv_test_pages *hv_pages,
- vm_vaddr_t pgs_gpa)
+ gpa_t pgs_gpa)
{
unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
struct vmcb *vmcb = svm->vmcb;
@@ -149,8 +149,8 @@ static void __attribute__((__flatten__)) guest_code(struct svm_test_data *svm,
int main(int argc, char *argv[])
{
- vm_vaddr_t nested_gva = 0, hv_pages_gva = 0;
- vm_vaddr_t hcall_page;
+ gva_t nested_gva = 0, hv_pages_gva = 0;
+ gva_t hcall_page;
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
struct ucall uc;
@@ -165,7 +165,7 @@ int main(int argc, char *argv[])
vcpu_alloc_svm(vm, &nested_gva);
vcpu_alloc_hyperv_test_pages(vm, &hv_pages_gva);
- hcall_page = vm_vaddr_alloc_pages(vm, 1);
+ hcall_page = vm_alloc_pages(vm, 1);
memset(addr_gva2hva(vm, hcall_page), 0x0, getpagesize());
vcpu_args_set(vcpu, 3, nested_gva, hv_pages_gva, addr_gva2gpa(vm, hcall_page));
diff --git a/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c b/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c
index 077cd0ec3040..15ee8b7bfc11 100644
--- a/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c
+++ b/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c
@@ -61,14 +61,14 @@ struct hv_tlb_flush_ex {
* - GVAs of the test pages' PTEs
*/
struct test_data {
- vm_vaddr_t hcall_gva;
- vm_paddr_t hcall_gpa;
- vm_vaddr_t test_pages;
- vm_vaddr_t test_pages_pte[NTEST_PAGES];
+ gva_t hcall_gva;
+ gpa_t hcall_gpa;
+ gva_t test_pages;
+ gva_t test_pages_pte[NTEST_PAGES];
};
/* 'Worker' vCPU code checking the contents of the test page */
-static void worker_guest_code(vm_vaddr_t test_data)
+static void worker_guest_code(gva_t test_data)
{
struct test_data *data = (struct test_data *)test_data;
u32 vcpu_id = rdmsr(HV_X64_MSR_VP_INDEX);
@@ -133,12 +133,12 @@ static void set_expected_val(void *addr, u64 val, int vcpu_id)
* Update PTEs swapping two test pages.
* TODO: use swap()/xchg() when these are provided.
*/
-static void swap_two_test_pages(vm_paddr_t pte_gva1, vm_paddr_t pte_gva2)
+static void swap_two_test_pages(gpa_t pte_gva1, gpa_t pte_gva2)
{
- uint64_t tmp = *(uint64_t *)pte_gva1;
+ u64 tmp = *(u64 *)pte_gva1;
- *(uint64_t *)pte_gva1 = *(uint64_t *)pte_gva2;
- *(uint64_t *)pte_gva2 = tmp;
+ *(u64 *)pte_gva1 = *(u64 *)pte_gva2;
+ *(u64 *)pte_gva2 = tmp;
}
/*
@@ -196,12 +196,12 @@ static inline void post_test(struct test_data *data, u64 exp1, u64 exp2)
#define TESTVAL2 0x0202020202020202
/* Main vCPU doing the test */
-static void sender_guest_code(vm_vaddr_t test_data)
+static void sender_guest_code(gva_t test_data)
{
struct test_data *data = (struct test_data *)test_data;
struct hv_tlb_flush *flush = (struct hv_tlb_flush *)data->hcall_gva;
struct hv_tlb_flush_ex *flush_ex = (struct hv_tlb_flush_ex *)data->hcall_gva;
- vm_paddr_t hcall_gpa = data->hcall_gpa;
+ gpa_t hcall_gpa = data->hcall_gpa;
int i, stage = 1;
wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
@@ -581,9 +581,9 @@ int main(int argc, char *argv[])
struct kvm_vm *vm;
struct kvm_vcpu *vcpu[3];
pthread_t threads[2];
- vm_vaddr_t test_data_page, gva;
- vm_paddr_t gpa;
- uint64_t *pte;
+ gva_t test_data_page, gva;
+ gpa_t gpa;
+ u64 *pte;
struct test_data *data;
struct ucall uc;
int stage = 1, r, i;
@@ -593,11 +593,11 @@ int main(int argc, char *argv[])
vm = vm_create_with_one_vcpu(&vcpu[0], sender_guest_code);
/* Test data page */
- test_data_page = vm_vaddr_alloc_page(vm);
+ test_data_page = vm_alloc_page(vm);
data = (struct test_data *)addr_gva2hva(vm, test_data_page);
/* Hypercall input/output */
- data->hcall_gva = vm_vaddr_alloc_pages(vm, 2);
+ data->hcall_gva = vm_alloc_pages(vm, 2);
data->hcall_gpa = addr_gva2gpa(vm, data->hcall_gva);
memset(addr_gva2hva(vm, data->hcall_gva), 0x0, 2 * PAGE_SIZE);
@@ -606,7 +606,7 @@ int main(int argc, char *argv[])
* and the test will swap their mappings. The third page keeps the indication
* about the current state of mappings.
*/
- data->test_pages = vm_vaddr_alloc_pages(vm, NTEST_PAGES + 1);
+ data->test_pages = vm_alloc_pages(vm, NTEST_PAGES + 1);
for (i = 0; i < NTEST_PAGES; i++)
memset(addr_gva2hva(vm, data->test_pages + PAGE_SIZE * i),
(u8)(i + 1), PAGE_SIZE);
@@ -617,11 +617,11 @@ int main(int argc, char *argv[])
* Get PTE pointers for test pages and map them inside the guest.
* Use separate page for each PTE for simplicity.
*/
- gva = vm_vaddr_unused_gap(vm, NTEST_PAGES * PAGE_SIZE, KVM_UTIL_MIN_VADDR);
+ gva = vm_unused_gva_gap(vm, NTEST_PAGES * PAGE_SIZE, KVM_UTIL_MIN_VADDR);
for (i = 0; i < NTEST_PAGES; i++) {
- pte = vm_get_page_table_entry(vm, data->test_pages + i * PAGE_SIZE);
+ pte = vm_get_pte(vm, data->test_pages + i * PAGE_SIZE);
gpa = addr_hva2gpa(vm, pte);
- __virt_pg_map(vm, gva + PAGE_SIZE * i, gpa & PAGE_MASK, PG_LEVEL_4K);
+ virt_pg_map(vm, gva + PAGE_SIZE * i, gpa & PAGE_MASK);
data->test_pages_pte[i] = gva + (gpa & ~PAGE_MASK);
}
diff --git a/tools/testing/selftests/kvm/x86/kvm_buslock_test.c b/tools/testing/selftests/kvm/x86/kvm_buslock_test.c
new file mode 100644
index 000000000000..52014a3210c8
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/kvm_buslock_test.c
@@ -0,0 +1,135 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2024 Advanced Micro Devices, Inc.
+ */
+#include <linux/atomic.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+#include "vmx.h"
+#include "test_util.h"
+
+#define NR_BUS_LOCKS_PER_LEVEL 100
+#define CACHE_LINE_SIZE 64
+
+/*
+ * To generate a bus lock, carve out a buffer that precisely occupies two cache
+ * lines and perform an atomic access that splits the two lines.
+ */
+static u8 buffer[CACHE_LINE_SIZE * 2] __aligned(CACHE_LINE_SIZE);
+static atomic_t *val = (void *)&buffer[CACHE_LINE_SIZE - (sizeof(*val) / 2)];
+
+static void guest_generate_buslocks(void)
+{
+ for (int i = 0; i < NR_BUS_LOCKS_PER_LEVEL; i++)
+ atomic_inc(val);
+}
+
+#define L2_GUEST_STACK_SIZE 64
+
+static void l2_guest_code(void)
+{
+ guest_generate_buslocks();
+ GUEST_DONE();
+}
+
+static void l1_svm_code(struct svm_test_data *svm)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ struct vmcb *vmcb = svm->vmcb;
+
+ generic_svm_setup(svm, l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+ run_guest(vmcb, svm->vmcb_gpa);
+}
+
+static void l1_vmx_code(struct vmx_pages *vmx)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+ GUEST_ASSERT_EQ(prepare_for_vmx_operation(vmx), true);
+ GUEST_ASSERT_EQ(load_vmcs(vmx), true);
+
+ prepare_vmcs(vmx, NULL, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ GUEST_ASSERT(!vmwrite(GUEST_RIP, (u64)l2_guest_code));
+ GUEST_ASSERT(!vmlaunch());
+}
+
+static void guest_code(void *test_data)
+{
+ guest_generate_buslocks();
+
+ if (this_cpu_has(X86_FEATURE_SVM))
+ l1_svm_code(test_data);
+ else if (this_cpu_has(X86_FEATURE_VMX))
+ l1_vmx_code(test_data);
+ else
+ GUEST_DONE();
+
+ TEST_FAIL("L2 should have signaled 'done'");
+}
+
+int main(int argc, char *argv[])
+{
+ const bool has_nested = kvm_cpu_has(X86_FEATURE_SVM) || kvm_cpu_has(X86_FEATURE_VMX);
+ gva_t nested_test_data_gva;
+ struct kvm_vcpu *vcpu;
+ struct kvm_run *run;
+ struct kvm_vm *vm;
+ int i, bus_locks = 0;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_BUS_LOCK_EXIT));
+
+ vm = vm_create(1);
+ vm_enable_cap(vm, KVM_CAP_X86_BUS_LOCK_EXIT, KVM_BUS_LOCK_DETECTION_EXIT);
+ vcpu = vm_vcpu_add(vm, 0, guest_code);
+
+ if (kvm_cpu_has(X86_FEATURE_SVM))
+ vcpu_alloc_svm(vm, &nested_test_data_gva);
+ else
+ vcpu_alloc_vmx(vm, &nested_test_data_gva);
+
+ vcpu_args_set(vcpu, 1, nested_test_data_gva);
+
+ run = vcpu->run;
+
+ for (i = 0; i <= NR_BUS_LOCKS_PER_LEVEL * (1 + has_nested); i++) {
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+
+ if (run->exit_reason == KVM_EXIT_IO) {
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ goto done;
+ case UCALL_SYNC:
+ continue;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
+ }
+ }
+
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_X86_BUS_LOCK);
+
+ /*
+ * Verify the counter is actually getting incremented, e.g. that
+ * KVM isn't skipping the instruction. On Intel, the exit is
+ * trap-like, i.e. the counter should already have been
+ * incremented. On AMD, it's fault-like, i.e. the counter will
+ * be incremented when the guest re-executes the instruction.
+ */
+ sync_global_from_guest(vm, *val);
+ TEST_ASSERT_EQ(atomic_read(val), bus_locks + host_cpu_is_intel);
+
+ bus_locks++;
+ }
+ TEST_FAIL("Didn't receive UCALL_DONE, took %u bus lock exits\n", bus_locks);
+done:
+ TEST_ASSERT_EQ(i, bus_locks);
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/kvm_clock_test.c b/tools/testing/selftests/kvm/x86/kvm_clock_test.c
index 5bc12222d87a..5ad4aeb8e373 100644
--- a/tools/testing/selftests/kvm/x86/kvm_clock_test.c
+++ b/tools/testing/selftests/kvm/x86/kvm_clock_test.c
@@ -17,8 +17,8 @@
#include "processor.h"
struct test_case {
- uint64_t kvmclock_base;
- int64_t realtime_offset;
+ u64 kvmclock_base;
+ s64 realtime_offset;
};
static struct test_case test_cases[] = {
@@ -31,7 +31,7 @@ static struct test_case test_cases[] = {
#define GUEST_SYNC_CLOCK(__stage, __val) \
GUEST_SYNC_ARGS(__stage, __val, 0, 0, 0)
-static void guest_main(vm_paddr_t pvti_pa, struct pvclock_vcpu_time_info *pvti)
+static void guest_main(gpa_t pvti_pa, struct pvclock_vcpu_time_info *pvti)
{
int i;
@@ -52,7 +52,7 @@ static inline void assert_flags(struct kvm_clock_data *data)
static void handle_sync(struct ucall *uc, struct kvm_clock_data *start,
struct kvm_clock_data *end)
{
- uint64_t obs, exp_lo, exp_hi;
+ u64 obs, exp_lo, exp_hi;
obs = uc->args[2];
exp_lo = start->clock;
@@ -135,8 +135,8 @@ static void enter_guest(struct kvm_vcpu *vcpu)
int main(void)
{
struct kvm_vcpu *vcpu;
- vm_vaddr_t pvti_gva;
- vm_paddr_t pvti_gpa;
+ gva_t pvti_gva;
+ gpa_t pvti_gpa;
struct kvm_vm *vm;
int flags;
@@ -147,7 +147,7 @@ int main(void)
vm = vm_create_with_one_vcpu(&vcpu, guest_main);
- pvti_gva = vm_vaddr_alloc(vm, getpagesize(), 0x10000);
+ pvti_gva = vm_alloc(vm, getpagesize(), 0x10000);
pvti_gpa = addr_gva2gpa(vm, pvti_gva);
vcpu_args_set(vcpu, 2, pvti_gpa, pvti_gva);
diff --git a/tools/testing/selftests/kvm/x86/kvm_pv_test.c b/tools/testing/selftests/kvm/x86/kvm_pv_test.c
index 1b805cbdb47b..8ed5fa635021 100644
--- a/tools/testing/selftests/kvm/x86/kvm_pv_test.c
+++ b/tools/testing/selftests/kvm/x86/kvm_pv_test.c
@@ -13,7 +13,7 @@
#include "processor.h"
struct msr_data {
- uint32_t idx;
+ u32 idx;
const char *name;
};
@@ -40,8 +40,8 @@ static struct msr_data msrs_to_test[] = {
static void test_msr(struct msr_data *msr)
{
- uint64_t ignored;
- uint8_t vector;
+ u64 ignored;
+ u8 vector;
PR_MSR(msr);
@@ -53,7 +53,7 @@ static void test_msr(struct msr_data *msr)
}
struct hcall_data {
- uint64_t nr;
+ u64 nr;
const char *name;
};
@@ -73,7 +73,7 @@ static struct hcall_data hcalls_to_test[] = {
static void test_hcall(struct hcall_data *hc)
{
- uint64_t r;
+ u64 r;
PR_HCALL(hc);
r = kvm_hypercall(hc->nr, 0, 0, 0, 0);
diff --git a/tools/testing/selftests/kvm/x86/monitor_mwait_test.c b/tools/testing/selftests/kvm/x86/monitor_mwait_test.c
index 2b550eff35f1..9c156cf7db0e 100644
--- a/tools/testing/selftests/kvm/x86/monitor_mwait_test.c
+++ b/tools/testing/selftests/kvm/x86/monitor_mwait_test.c
@@ -7,6 +7,7 @@
#include "kvm_util.h"
#include "processor.h"
+#include "kselftest.h"
#define CPUID_MWAIT (1u << 3)
@@ -14,6 +15,8 @@ enum monitor_mwait_testcases {
MWAIT_QUIRK_DISABLED = BIT(0),
MISC_ENABLES_QUIRK_DISABLED = BIT(1),
MWAIT_DISABLED = BIT(2),
+ CPUID_DISABLED = BIT(3),
+ TEST_MAX = CPUID_DISABLED * 2 - 1,
};
/*
@@ -27,19 +30,27 @@ do { \
\
if (fault_wanted) \
__GUEST_ASSERT((vector) == UD_VECTOR, \
- "Expected #UD on " insn " for testcase '0x%x', got '0x%x'", \
- testcase, vector); \
+ "Expected #UD on " insn " for testcase '0x%x', got %s", \
+ testcase, ex_str(vector)); \
else \
__GUEST_ASSERT(!(vector), \
- "Expected success on " insn " for testcase '0x%x', got '0x%x'", \
- testcase, vector); \
+ "Expected success on " insn " for testcase '0x%x', got %s", \
+ testcase, ex_str(vector)); \
} while (0)
-static void guest_monitor_wait(int testcase)
+static void guest_monitor_wait(void *arg)
{
+ int testcase = (int) (long) arg;
u8 vector;
- GUEST_SYNC(testcase);
+ u64 val = rdmsr(MSR_IA32_MISC_ENABLE) & ~MSR_IA32_MISC_ENABLE_MWAIT;
+ if (!(testcase & MWAIT_DISABLED))
+ val |= MSR_IA32_MISC_ENABLE_MWAIT;
+ wrmsr(MSR_IA32_MISC_ENABLE, val);
+
+ __GUEST_ASSERT(this_cpu_has(X86_FEATURE_MWAIT) == !(testcase & MWAIT_DISABLED),
+ "Expected CPUID.MWAIT %s\n",
+ (testcase & MWAIT_DISABLED) ? "cleared" : "set");
/*
* Arbitrarily MONITOR this function, SVM performs fault checks before
@@ -50,80 +61,76 @@ static void guest_monitor_wait(int testcase)
vector = kvm_asm_safe("mwait", "a"(guest_monitor_wait), "c"(0), "d"(0));
GUEST_ASSERT_MONITOR_MWAIT("MWAIT", testcase, vector);
-}
-
-static void guest_code(void)
-{
- guest_monitor_wait(MWAIT_DISABLED);
-
- guest_monitor_wait(MWAIT_QUIRK_DISABLED | MWAIT_DISABLED);
-
- guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_DISABLED);
- guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED);
-
- guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_QUIRK_DISABLED | MWAIT_DISABLED);
- guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_QUIRK_DISABLED);
GUEST_DONE();
}
int main(int argc, char *argv[])
{
- uint64_t disabled_quirks;
+ u64 disabled_quirks;
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
struct ucall uc;
int testcase;
+ char test[80];
TEST_REQUIRE(this_cpu_has(X86_FEATURE_MWAIT));
TEST_REQUIRE(kvm_has_cap(KVM_CAP_DISABLE_QUIRKS2));
- vm = vm_create_with_one_vcpu(&vcpu, guest_code);
- vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_MWAIT);
+ ksft_print_header();
+ ksft_set_plan(12);
+ for (testcase = 0; testcase <= TEST_MAX; testcase++) {
+ vm = vm_create_with_one_vcpu(&vcpu, guest_monitor_wait);
+ vcpu_args_set(vcpu, 1, (void *)(long)testcase);
+
+ disabled_quirks = 0;
+ if (testcase & MWAIT_QUIRK_DISABLED) {
+ disabled_quirks |= KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS;
+ strcpy(test, "MWAIT can fault");
+ } else {
+ strcpy(test, "MWAIT never faults");
+ }
+ if (testcase & MISC_ENABLES_QUIRK_DISABLED) {
+ disabled_quirks |= KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT;
+ strcat(test, ", MISC_ENABLE updates CPUID");
+ } else {
+ strcat(test, ", no CPUID updates");
+ }
+
+ vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, disabled_quirks);
+
+ if (!(testcase & MISC_ENABLES_QUIRK_DISABLED) &&
+ (!!(testcase & CPUID_DISABLED) ^ !!(testcase & MWAIT_DISABLED)))
+ continue;
+
+ if (testcase & CPUID_DISABLED) {
+ strcat(test, ", CPUID clear");
+ vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_MWAIT);
+ } else {
+ strcat(test, ", CPUID set");
+ vcpu_set_cpuid_feature(vcpu, X86_FEATURE_MWAIT);
+ }
+
+ if (testcase & MWAIT_DISABLED)
+ strcat(test, ", MWAIT disabled");
- while (1) {
vcpu_run(vcpu);
TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
switch (get_ucall(vcpu, &uc)) {
- case UCALL_SYNC:
- testcase = uc.args[1];
- break;
case UCALL_ABORT:
- REPORT_GUEST_ASSERT(uc);
- goto done;
+ /* Detected in vcpu_run */
+ break;
case UCALL_DONE:
- goto done;
+ ksft_test_result_pass("%s\n", test);
+ break;
default:
TEST_FAIL("Unknown ucall %lu", uc.cmd);
- goto done;
- }
-
- disabled_quirks = 0;
- if (testcase & MWAIT_QUIRK_DISABLED)
- disabled_quirks |= KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS;
- if (testcase & MISC_ENABLES_QUIRK_DISABLED)
- disabled_quirks |= KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT;
- vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, disabled_quirks);
-
- /*
- * If the MISC_ENABLES quirk (KVM neglects to update CPUID to
- * enable/disable MWAIT) is disabled, toggle the ENABLE_MWAIT
- * bit in MISC_ENABLES accordingly. If the quirk is enabled,
- * the only valid configuration is MWAIT disabled, as CPUID
- * can't be manually changed after running the vCPU.
- */
- if (!(testcase & MISC_ENABLES_QUIRK_DISABLED)) {
- TEST_ASSERT(testcase & MWAIT_DISABLED,
- "Can't toggle CPUID features after running vCPU");
- continue;
+ break;
}
-
- vcpu_set_msr(vcpu, MSR_IA32_MISC_ENABLE,
- (testcase & MWAIT_DISABLED) ? 0 : MSR_IA32_MISC_ENABLE_MWAIT);
+ kvm_vm_free(vm);
}
+ ksft_finished();
-done:
- kvm_vm_free(vm);
return 0;
}
diff --git a/tools/testing/selftests/kvm/x86/msrs_test.c b/tools/testing/selftests/kvm/x86/msrs_test.c
new file mode 100644
index 000000000000..f7e39bf887ad
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/msrs_test.c
@@ -0,0 +1,489 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <asm/msr-index.h>
+
+#include <stdint.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+
+/* Use HYPERVISOR for MSRs that are emulated unconditionally (as is HYPERVISOR). */
+#define X86_FEATURE_NONE X86_FEATURE_HYPERVISOR
+
+struct kvm_msr {
+ const struct kvm_x86_cpu_feature feature;
+ const struct kvm_x86_cpu_feature feature2;
+ const char *name;
+ const u64 reset_val;
+ const u64 write_val;
+ const u64 rsvd_val;
+ const u32 index;
+ const bool is_kvm_defined;
+};
+
+#define ____MSR_TEST(msr, str, val, rsvd, reset, feat, f2, is_kvm) \
+{ \
+ .index = msr, \
+ .name = str, \
+ .write_val = val, \
+ .rsvd_val = rsvd, \
+ .reset_val = reset, \
+ .feature = X86_FEATURE_ ##feat, \
+ .feature2 = X86_FEATURE_ ##f2, \
+ .is_kvm_defined = is_kvm, \
+}
+
+#define __MSR_TEST(msr, str, val, rsvd, reset, feat) \
+ ____MSR_TEST(msr, str, val, rsvd, reset, feat, feat, false)
+
+#define MSR_TEST_NON_ZERO(msr, val, rsvd, reset, feat) \
+ __MSR_TEST(msr, #msr, val, rsvd, reset, feat)
+
+#define MSR_TEST(msr, val, rsvd, feat) \
+ __MSR_TEST(msr, #msr, val, rsvd, 0, feat)
+
+#define MSR_TEST2(msr, val, rsvd, feat, f2) \
+ ____MSR_TEST(msr, #msr, val, rsvd, 0, feat, f2, false)
+
+/*
+ * Note, use a page aligned value for the canonical value so that the value
+ * is compatible with MSRs that use bits 11:0 for things other than addresses.
+ */
+static const u64 canonical_val = 0x123456789000ull;
+
+/*
+ * Arbitrary value with bits set in every byte, but not all bits set. This is
+ * also a non-canonical value, but that's coincidental (any 64-bit value with
+ * an alternating 0s/1s pattern will be non-canonical).
+ */
+static const u64 u64_val = 0xaaaa5555aaaa5555ull;
+
+#define MSR_TEST_CANONICAL(msr, feat) \
+ __MSR_TEST(msr, #msr, canonical_val, NONCANONICAL, 0, feat)
+
+#define MSR_TEST_KVM(msr, val, rsvd, feat) \
+ ____MSR_TEST(KVM_REG_ ##msr, #msr, val, rsvd, 0, feat, feat, true)
+
+/*
+ * The main struct must be scoped to a function due to the use of structures to
+ * define features. For the global structure, allocate enough space for the
+ * foreseeable future without getting too ridiculous, to minimize maintenance
+ * costs (bumping the array size every time an MSR is added is really annoying).
+ */
+static struct kvm_msr msrs[128];
+static int idx;
+
+static bool ignore_unsupported_msrs;
+
+static u64 fixup_rdmsr_val(u32 msr, u64 want)
+{
+ /*
+ * AMD CPUs drop bits 63:32 on some MSRs that Intel CPUs support. KVM
+ * is supposed to emulate that behavior based on guest vendor model
+ * (which is the same as the host vendor model for this test).
+ */
+ if (!host_cpu_is_amd_compatible)
+ return want;
+
+ switch (msr) {
+ case MSR_IA32_SYSENTER_ESP:
+ case MSR_IA32_SYSENTER_EIP:
+ case MSR_TSC_AUX:
+ return want & GENMASK_ULL(31, 0);
+ default:
+ return want;
+ }
+}
+
+static void __rdmsr(u32 msr, u64 want)
+{
+ u64 val;
+ u8 vec;
+
+ vec = rdmsr_safe(msr, &val);
+ __GUEST_ASSERT(!vec, "Unexpected %s on RDMSR(0x%x)", ex_str(vec), msr);
+
+ __GUEST_ASSERT(val == want, "Wanted 0x%lx from RDMSR(0x%x), got 0x%lx",
+ want, msr, val);
+}
+
+static void __wrmsr(u32 msr, u64 val)
+{
+ u8 vec;
+
+ vec = wrmsr_safe(msr, val);
+ __GUEST_ASSERT(!vec, "Unexpected %s on WRMSR(0x%x, 0x%lx)",
+ ex_str(vec), msr, val);
+ __rdmsr(msr, fixup_rdmsr_val(msr, val));
+}
+
+static void guest_test_supported_msr(const struct kvm_msr *msr)
+{
+ __rdmsr(msr->index, msr->reset_val);
+ __wrmsr(msr->index, msr->write_val);
+ GUEST_SYNC(fixup_rdmsr_val(msr->index, msr->write_val));
+
+ __rdmsr(msr->index, msr->reset_val);
+}
+
+static void guest_test_unsupported_msr(const struct kvm_msr *msr)
+{
+ u64 val;
+ u8 vec;
+
+ /*
+ * KVM's ABI with respect to ignore_msrs is a mess and largely beyond
+ * repair, just skip the unsupported MSR tests.
+ */
+ if (ignore_unsupported_msrs)
+ goto skip_wrmsr_gp;
+
+ /*
+ * {S,U}_CET exist if IBT or SHSTK is supported, but with bits that are
+ * writable only if their associated feature is supported. Skip the
+ * RDMSR #GP test if the secondary feature is supported, but perform
+ * the WRMSR #GP test as the to-be-written value is tied to the primary
+ * feature. For all other MSRs, simply do nothing.
+ */
+ if (this_cpu_has(msr->feature2)) {
+ if (msr->index != MSR_IA32_U_CET &&
+ msr->index != MSR_IA32_S_CET)
+ goto skip_wrmsr_gp;
+
+ goto skip_rdmsr_gp;
+ }
+
+ vec = rdmsr_safe(msr->index, &val);
+ __GUEST_ASSERT(vec == GP_VECTOR, "Wanted #GP on RDMSR(0x%x), got %s",
+ msr->index, ex_str(vec));
+
+skip_rdmsr_gp:
+ vec = wrmsr_safe(msr->index, msr->write_val);
+ __GUEST_ASSERT(vec == GP_VECTOR, "Wanted #GP on WRMSR(0x%x, 0x%lx), got %s",
+ msr->index, msr->write_val, ex_str(vec));
+
+skip_wrmsr_gp:
+ GUEST_SYNC(0);
+}
+
+void guest_test_reserved_val(const struct kvm_msr *msr)
+{
+ /* Skip reserved value checks as well, ignore_msrs is trully a mess. */
+ if (ignore_unsupported_msrs)
+ return;
+
+ /*
+ * If the CPU will truncate the written value (e.g. SYSENTER on AMD),
+ * expect success and a truncated value, not #GP.
+ */
+ if ((!this_cpu_has(msr->feature) && !this_cpu_has(msr->feature2)) ||
+ msr->rsvd_val == fixup_rdmsr_val(msr->index, msr->rsvd_val)) {
+ u8 vec = wrmsr_safe(msr->index, msr->rsvd_val);
+
+ __GUEST_ASSERT(vec == GP_VECTOR,
+ "Wanted #GP on WRMSR(0x%x, 0x%lx), got %s",
+ msr->index, msr->rsvd_val, ex_str(vec));
+ } else {
+ __wrmsr(msr->index, msr->rsvd_val);
+ __wrmsr(msr->index, msr->reset_val);
+ }
+}
+
+static void guest_main(void)
+{
+ for (;;) {
+ const struct kvm_msr *msr = &msrs[READ_ONCE(idx)];
+
+ if (this_cpu_has(msr->feature))
+ guest_test_supported_msr(msr);
+ else
+ guest_test_unsupported_msr(msr);
+
+ if (msr->rsvd_val)
+ guest_test_reserved_val(msr);
+
+ GUEST_SYNC(msr->reset_val);
+ }
+}
+
+static bool has_one_reg;
+static bool use_one_reg;
+
+#define KVM_X86_MAX_NR_REGS 1
+
+static bool vcpu_has_reg(struct kvm_vcpu *vcpu, u64 reg)
+{
+ struct {
+ struct kvm_reg_list list;
+ u64 regs[KVM_X86_MAX_NR_REGS];
+ } regs = {};
+ int r, i;
+
+ /*
+ * If KVM_GET_REG_LIST succeeds with n=0, i.e. there are no supported
+ * regs, then the vCPU obviously doesn't support the reg.
+ */
+ r = __vcpu_ioctl(vcpu, KVM_GET_REG_LIST, &regs.list);
+ if (!r)
+ return false;
+
+ TEST_ASSERT_EQ(errno, E2BIG);
+
+ /*
+ * KVM x86 is expected to support enumerating a relative small number
+ * of regs. The majority of registers supported by KVM_{G,S}ET_ONE_REG
+ * are enumerated via other ioctls, e.g. KVM_GET_MSR_INDEX_LIST. For
+ * simplicity, hardcode the maximum number of regs and manually update
+ * the test as necessary.
+ */
+ TEST_ASSERT(regs.list.n <= KVM_X86_MAX_NR_REGS,
+ "KVM reports %llu regs, test expects at most %u regs, stale test?",
+ regs.list.n, KVM_X86_MAX_NR_REGS);
+
+ vcpu_ioctl(vcpu, KVM_GET_REG_LIST, &regs.list);
+ for (i = 0; i < regs.list.n; i++) {
+ if (regs.regs[i] == reg)
+ return true;
+ }
+
+ return false;
+}
+
+static void host_test_kvm_reg(struct kvm_vcpu *vcpu)
+{
+ bool has_reg = vcpu_cpuid_has(vcpu, msrs[idx].feature);
+ u64 reset_val = msrs[idx].reset_val;
+ u64 write_val = msrs[idx].write_val;
+ u64 rsvd_val = msrs[idx].rsvd_val;
+ u32 reg = msrs[idx].index;
+ u64 val;
+ int r;
+
+ if (!use_one_reg)
+ return;
+
+ TEST_ASSERT_EQ(vcpu_has_reg(vcpu, KVM_X86_REG_KVM(reg)), has_reg);
+
+ if (!has_reg) {
+ r = __vcpu_get_reg(vcpu, KVM_X86_REG_KVM(reg), &val);
+ TEST_ASSERT(r && errno == EINVAL,
+ "Expected failure on get_reg(0x%x)", reg);
+ rsvd_val = 0;
+ goto out;
+ }
+
+ val = vcpu_get_reg(vcpu, KVM_X86_REG_KVM(reg));
+ TEST_ASSERT(val == reset_val, "Wanted 0x%lx from get_reg(0x%x), got 0x%lx",
+ reset_val, reg, val);
+
+ vcpu_set_reg(vcpu, KVM_X86_REG_KVM(reg), write_val);
+ val = vcpu_get_reg(vcpu, KVM_X86_REG_KVM(reg));
+ TEST_ASSERT(val == write_val, "Wanted 0x%lx from get_reg(0x%x), got 0x%lx",
+ write_val, reg, val);
+
+out:
+ r = __vcpu_set_reg(vcpu, KVM_X86_REG_KVM(reg), rsvd_val);
+ TEST_ASSERT(r, "Expected failure on set_reg(0x%x, 0x%lx)", reg, rsvd_val);
+}
+
+static void host_test_msr(struct kvm_vcpu *vcpu, u64 guest_val)
+{
+ u64 reset_val = msrs[idx].reset_val;
+ u32 msr = msrs[idx].index;
+ u64 val;
+
+ if (!kvm_cpu_has(msrs[idx].feature))
+ return;
+
+ val = vcpu_get_msr(vcpu, msr);
+ TEST_ASSERT(val == guest_val, "Wanted 0x%lx from get_msr(0x%x), got 0x%lx",
+ guest_val, msr, val);
+
+ if (use_one_reg)
+ vcpu_set_reg(vcpu, KVM_X86_REG_MSR(msr), reset_val);
+ else
+ vcpu_set_msr(vcpu, msr, reset_val);
+
+ val = vcpu_get_msr(vcpu, msr);
+ TEST_ASSERT(val == reset_val, "Wanted 0x%lx from get_msr(0x%x), got 0x%lx",
+ reset_val, msr, val);
+
+ if (!has_one_reg)
+ return;
+
+ val = vcpu_get_reg(vcpu, KVM_X86_REG_MSR(msr));
+ TEST_ASSERT(val == reset_val, "Wanted 0x%lx from get_reg(0x%x), got 0x%lx",
+ reset_val, msr, val);
+}
+
+static void do_vcpu_run(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ for (;;) {
+ vcpu_run(vcpu);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ host_test_msr(vcpu, uc.args[1]);
+ return;
+ case UCALL_PRINTF:
+ pr_info("%s", uc.buffer);
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ case UCALL_DONE:
+ TEST_FAIL("Unexpected UCALL_DONE");
+ default:
+ TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
+ }
+ }
+}
+
+static void vcpus_run(struct kvm_vcpu **vcpus, const int NR_VCPUS)
+{
+ int i;
+
+ for (i = 0; i < NR_VCPUS; i++)
+ do_vcpu_run(vcpus[i]);
+}
+
+#define MISC_ENABLES_RESET_VAL (MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL | MSR_IA32_MISC_ENABLE_BTS_UNAVAIL)
+
+static void test_msrs(void)
+{
+ const struct kvm_msr __msrs[] = {
+ MSR_TEST_NON_ZERO(MSR_IA32_MISC_ENABLE,
+ MISC_ENABLES_RESET_VAL | MSR_IA32_MISC_ENABLE_FAST_STRING,
+ MSR_IA32_MISC_ENABLE_FAST_STRING, MISC_ENABLES_RESET_VAL, NONE),
+ MSR_TEST_NON_ZERO(MSR_IA32_CR_PAT, 0x07070707, 0, 0x7040600070406, NONE),
+
+ /*
+ * TSC_AUX is supported if RDTSCP *or* RDPID is supported. Add
+ * entries for each features so that TSC_AUX doesn't exists for
+ * the "unsupported" vCPU, and obviously to test both cases.
+ */
+ MSR_TEST2(MSR_TSC_AUX, 0x12345678, u64_val, RDTSCP, RDPID),
+ MSR_TEST2(MSR_TSC_AUX, 0x12345678, u64_val, RDPID, RDTSCP),
+
+ MSR_TEST(MSR_IA32_SYSENTER_CS, 0x1234, 0, NONE),
+ /*
+ * SYSENTER_{ESP,EIP} are technically non-canonical on Intel,
+ * but KVM doesn't emulate that behavior on emulated writes,
+ * i.e. this test will observe different behavior if the MSR
+ * writes are handed by hardware vs. KVM. KVM's behavior is
+ * intended (though far from ideal), so don't bother testing
+ * non-canonical values.
+ */
+ MSR_TEST(MSR_IA32_SYSENTER_ESP, canonical_val, 0, NONE),
+ MSR_TEST(MSR_IA32_SYSENTER_EIP, canonical_val, 0, NONE),
+
+ MSR_TEST_CANONICAL(MSR_FS_BASE, LM),
+ MSR_TEST_CANONICAL(MSR_GS_BASE, LM),
+ MSR_TEST_CANONICAL(MSR_KERNEL_GS_BASE, LM),
+ MSR_TEST_CANONICAL(MSR_LSTAR, LM),
+ MSR_TEST_CANONICAL(MSR_CSTAR, LM),
+ MSR_TEST(MSR_SYSCALL_MASK, 0xffffffff, 0, LM),
+
+ MSR_TEST2(MSR_IA32_S_CET, CET_SHSTK_EN, CET_RESERVED, SHSTK, IBT),
+ MSR_TEST2(MSR_IA32_S_CET, CET_ENDBR_EN, CET_RESERVED, IBT, SHSTK),
+ MSR_TEST2(MSR_IA32_U_CET, CET_SHSTK_EN, CET_RESERVED, SHSTK, IBT),
+ MSR_TEST2(MSR_IA32_U_CET, CET_ENDBR_EN, CET_RESERVED, IBT, SHSTK),
+ MSR_TEST_CANONICAL(MSR_IA32_PL0_SSP, SHSTK),
+ MSR_TEST(MSR_IA32_PL0_SSP, canonical_val, canonical_val | 1, SHSTK),
+ MSR_TEST_CANONICAL(MSR_IA32_PL1_SSP, SHSTK),
+ MSR_TEST(MSR_IA32_PL1_SSP, canonical_val, canonical_val | 1, SHSTK),
+ MSR_TEST_CANONICAL(MSR_IA32_PL2_SSP, SHSTK),
+ MSR_TEST(MSR_IA32_PL2_SSP, canonical_val, canonical_val | 1, SHSTK),
+ MSR_TEST_CANONICAL(MSR_IA32_PL3_SSP, SHSTK),
+ MSR_TEST(MSR_IA32_PL3_SSP, canonical_val, canonical_val | 1, SHSTK),
+
+ MSR_TEST_KVM(GUEST_SSP, canonical_val, NONCANONICAL, SHSTK),
+ };
+
+ const struct kvm_x86_cpu_feature feat_none = X86_FEATURE_NONE;
+ const struct kvm_x86_cpu_feature feat_lm = X86_FEATURE_LM;
+
+ /*
+ * Create three vCPUs, but run them on the same task, to validate KVM's
+ * context switching of MSR state. Don't pin the task to a pCPU to
+ * also validate KVM's handling of cross-pCPU migration. Use the full
+ * set of features for the first two vCPUs, but clear all features in
+ * third vCPU in order to test both positive and negative paths.
+ */
+ const int NR_VCPUS = 3;
+ struct kvm_vcpu *vcpus[NR_VCPUS];
+ struct kvm_vm *vm;
+ int i;
+
+ kvm_static_assert(sizeof(__msrs) <= sizeof(msrs));
+ kvm_static_assert(ARRAY_SIZE(__msrs) <= ARRAY_SIZE(msrs));
+ memcpy(msrs, __msrs, sizeof(__msrs));
+
+ ignore_unsupported_msrs = kvm_is_ignore_msrs();
+
+ vm = vm_create_with_vcpus(NR_VCPUS, guest_main, vcpus);
+
+ sync_global_to_guest(vm, msrs);
+ sync_global_to_guest(vm, ignore_unsupported_msrs);
+
+ /*
+ * Clear features in the "unsupported features" vCPU. This needs to be
+ * done before the first vCPU run as KVM's ABI is that guest CPUID is
+ * immutable once the vCPU has been run.
+ */
+ for (idx = 0; idx < ARRAY_SIZE(__msrs); idx++) {
+ /*
+ * Don't clear LM; selftests are 64-bit only, and KVM doesn't
+ * honor LM=0 for MSRs that are supposed to exist if and only
+ * if the vCPU is a 64-bit model. Ditto for NONE; clearing a
+ * fake feature flag will result in false failures.
+ */
+ if (memcmp(&msrs[idx].feature, &feat_lm, sizeof(feat_lm)) &&
+ memcmp(&msrs[idx].feature, &feat_none, sizeof(feat_none)))
+ vcpu_clear_cpuid_feature(vcpus[2], msrs[idx].feature);
+ }
+
+ for (idx = 0; idx < ARRAY_SIZE(__msrs); idx++) {
+ struct kvm_msr *msr = &msrs[idx];
+
+ if (msr->is_kvm_defined) {
+ for (i = 0; i < NR_VCPUS; i++)
+ host_test_kvm_reg(vcpus[i]);
+ continue;
+ }
+
+ /*
+ * Verify KVM_GET_SUPPORTED_CPUID and KVM_GET_MSR_INDEX_LIST
+ * are consistent with respect to MSRs whose existence is
+ * enumerated via CPUID. Skip the check for FS/GS.base MSRs,
+ * as they aren't reported in the save/restore list since their
+ * state is managed via SREGS.
+ */
+ TEST_ASSERT(msr->index == MSR_FS_BASE || msr->index == MSR_GS_BASE ||
+ kvm_msr_is_in_save_restore_list(msr->index) ==
+ (kvm_cpu_has(msr->feature) || kvm_cpu_has(msr->feature2)),
+ "%s %s in save/restore list, but %s according to CPUID", msr->name,
+ kvm_msr_is_in_save_restore_list(msr->index) ? "is" : "isn't",
+ (kvm_cpu_has(msr->feature) || kvm_cpu_has(msr->feature2)) ?
+ "supported" : "unsupported");
+
+ sync_global_to_guest(vm, idx);
+
+ vcpus_run(vcpus, NR_VCPUS);
+ vcpus_run(vcpus, NR_VCPUS);
+ }
+
+ kvm_vm_free(vm);
+}
+
+int main(void)
+{
+ has_one_reg = kvm_has_cap(KVM_CAP_ONE_REG);
+
+ test_msrs();
+
+ if (has_one_reg) {
+ use_one_reg = true;
+ test_msrs();
+ }
+}
diff --git a/tools/testing/selftests/kvm/x86/vmx_close_while_nested_test.c b/tools/testing/selftests/kvm/x86/nested_close_kvm_test.c
index dad988351493..761fec293408 100644
--- a/tools/testing/selftests/kvm/x86/vmx_close_while_nested_test.c
+++ b/tools/testing/selftests/kvm/x86/nested_close_kvm_test.c
@@ -1,7 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * vmx_close_while_nested
- *
* Copyright (C) 2019, Red Hat, Inc.
*
* Verify that nothing bad happens if a KVM user exits with open
@@ -12,6 +10,7 @@
#include "kvm_util.h"
#include "processor.h"
#include "vmx.h"
+#include "svm_util.h"
#include <string.h>
#include <sys/ioctl.h>
@@ -22,6 +21,8 @@ enum {
PORT_L0_EXIT = 0x2000,
};
+#define L2_GUEST_STACK_SIZE 64
+
static void l2_guest_code(void)
{
/* Exit to L0 */
@@ -29,9 +30,8 @@ static void l2_guest_code(void)
: : [port] "d" (PORT_L0_EXIT) : "rax");
}
-static void l1_guest_code(struct vmx_pages *vmx_pages)
+static void l1_vmx_code(struct vmx_pages *vmx_pages)
{
-#define L2_GUEST_STACK_SIZE 64
unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
@@ -45,19 +45,43 @@ static void l1_guest_code(struct vmx_pages *vmx_pages)
GUEST_ASSERT(0);
}
+static void l1_svm_code(struct svm_test_data *svm)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+ /* Prepare the VMCB for L2 execution. */
+ generic_svm_setup(svm, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ run_guest(svm->vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT(0);
+}
+
+static void l1_guest_code(void *data)
+{
+ if (this_cpu_has(X86_FEATURE_VMX))
+ l1_vmx_code(data);
+ else
+ l1_svm_code(data);
+}
+
int main(int argc, char *argv[])
{
- vm_vaddr_t vmx_pages_gva;
+ gva_t guest_gva;
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
- TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX) ||
+ kvm_cpu_has(X86_FEATURE_SVM));
vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
- /* Allocate VMX pages and shared descriptors (vmx_pages). */
- vcpu_alloc_vmx(vm, &vmx_pages_gva);
- vcpu_args_set(vcpu, 1, vmx_pages_gva);
+ if (kvm_cpu_has(X86_FEATURE_VMX))
+ vcpu_alloc_vmx(vm, &guest_gva);
+ else
+ vcpu_alloc_svm(vm, &guest_gva);
+
+ vcpu_args_set(vcpu, 1, guest_gva);
for (;;) {
volatile struct kvm_run *run = vcpu->run;
diff --git a/tools/testing/selftests/kvm/x86/nested_dirty_log_test.c b/tools/testing/selftests/kvm/x86/nested_dirty_log_test.c
new file mode 100644
index 000000000000..0e67cce83570
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/nested_dirty_log_test.c
@@ -0,0 +1,293 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KVM dirty page logging test
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <linux/bitmap.h>
+#include <linux/bitops.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+#include "vmx.h"
+
+/* The memory slot index to track dirty pages */
+#define TEST_MEM_SLOT_INDEX 1
+
+/*
+ * Allocate four pages total. Two pages are used to verify that the KVM marks
+ * the accessed page/GFN as marked dirty, but not the "other" page. Times two
+ * so that each "normal" page can be accessed from L2 via an aliased L2 GVA+GPA
+ * (when TDP is enabled), to verify KVM marks _L1's_ page/GFN as dirty (to
+ * detect failures, L2 => L1 GPAs can't be identity mapped in the TDP page
+ * tables, as marking L2's GPA dirty would get a false pass if L1 == L2).
+ */
+#define TEST_MEM_PAGES 4
+
+#define TEST_MEM_BASE 0xc0000000
+#define TEST_MEM_ALIAS_BASE 0xc0002000
+
+#define TEST_GUEST_ADDR(base, idx) ((base) + (idx) * PAGE_SIZE)
+
+#define TEST_GVA(idx) TEST_GUEST_ADDR(TEST_MEM_BASE, idx)
+#define TEST_GPA(idx) TEST_GUEST_ADDR(TEST_MEM_BASE, idx)
+
+#define TEST_ALIAS_GPA(idx) TEST_GUEST_ADDR(TEST_MEM_ALIAS_BASE, idx)
+
+#define TEST_HVA(vm, idx) addr_gpa2hva(vm, TEST_GPA(idx))
+
+#define L2_GUEST_STACK_SIZE 64
+
+/* Use the page offset bits to communicate the access+fault type. */
+#define TEST_SYNC_READ_FAULT BIT(0)
+#define TEST_SYNC_WRITE_FAULT BIT(1)
+#define TEST_SYNC_NO_FAULT BIT(2)
+
+static void l2_guest_code(gva_t base)
+{
+ gva_t page0 = TEST_GUEST_ADDR(base, 0);
+ gva_t page1 = TEST_GUEST_ADDR(base, 1);
+
+ READ_ONCE(*(u64 *)page0);
+ GUEST_SYNC(page0 | TEST_SYNC_READ_FAULT);
+ WRITE_ONCE(*(u64 *)page0, 1);
+ GUEST_SYNC(page0 | TEST_SYNC_WRITE_FAULT);
+ READ_ONCE(*(u64 *)page0);
+ GUEST_SYNC(page0 | TEST_SYNC_NO_FAULT);
+
+ WRITE_ONCE(*(u64 *)page1, 1);
+ GUEST_SYNC(page1 | TEST_SYNC_WRITE_FAULT);
+ WRITE_ONCE(*(u64 *)page1, 1);
+ GUEST_SYNC(page1 | TEST_SYNC_WRITE_FAULT);
+ READ_ONCE(*(u64 *)page1);
+ GUEST_SYNC(page1 | TEST_SYNC_NO_FAULT);
+
+ /* Exit to L1 and never come back. */
+ vmcall();
+}
+
+static void l2_guest_code_tdp_enabled(void)
+{
+ /*
+ * Use the aliased virtual addresses when running with TDP to verify
+ * that KVM correctly handles the case where a page is dirtied via a
+ * different GPA than would be used by L1.
+ */
+ l2_guest_code(TEST_MEM_ALIAS_BASE);
+}
+
+static void l2_guest_code_tdp_disabled(void)
+{
+ /*
+ * Use the "normal" virtual addresses when running without TDP enabled,
+ * in which case L2 will use the same page tables as L1, and thus needs
+ * to use the same virtual addresses that are mapped into L1.
+ */
+ l2_guest_code(TEST_MEM_BASE);
+}
+
+void l1_vmx_code(struct vmx_pages *vmx)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ void *l2_rip;
+
+ GUEST_ASSERT(vmx->vmcs_gpa);
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx));
+ GUEST_ASSERT(load_vmcs(vmx));
+
+ if (vmx->eptp_gpa)
+ l2_rip = l2_guest_code_tdp_enabled;
+ else
+ l2_rip = l2_guest_code_tdp_disabled;
+
+ prepare_vmcs(vmx, l2_rip, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ GUEST_SYNC(TEST_SYNC_NO_FAULT);
+ GUEST_ASSERT(!vmlaunch());
+ GUEST_SYNC(TEST_SYNC_NO_FAULT);
+ GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_VMCALL);
+ GUEST_DONE();
+}
+
+static void l1_svm_code(struct svm_test_data *svm)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ void *l2_rip;
+
+ if (svm->ncr3_gpa)
+ l2_rip = l2_guest_code_tdp_enabled;
+ else
+ l2_rip = l2_guest_code_tdp_disabled;
+
+ generic_svm_setup(svm, l2_rip, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ GUEST_SYNC(TEST_SYNC_NO_FAULT);
+ run_guest(svm->vmcb, svm->vmcb_gpa);
+ GUEST_SYNC(TEST_SYNC_NO_FAULT);
+ GUEST_ASSERT_EQ(svm->vmcb->control.exit_code, SVM_EXIT_VMMCALL);
+ GUEST_DONE();
+}
+
+static void l1_guest_code(void *data)
+{
+ if (this_cpu_has(X86_FEATURE_VMX))
+ l1_vmx_code(data);
+ else
+ l1_svm_code(data);
+}
+
+static void test_handle_ucall_sync(struct kvm_vm *vm, u64 arg,
+ unsigned long *bmap)
+{
+ gva_t gva = arg & ~(PAGE_SIZE - 1);
+ int page_nr, i;
+
+ /*
+ * Extract the page number of underlying physical page, which is also
+ * the _L1_ page number. The dirty bitmap _must_ be updated based on
+ * the L1 GPA, not L2 GPA, i.e. whether or not L2 used an aliased GPA
+ * (i.e. if TDP enabled for L2) is irrelevant with respect to the dirty
+ * bitmap and which underlying physical page is accessed.
+ *
+ * Note, gva will be '0' if there was no access, i.e. if the purpose of
+ * the sync is to verify all pages are clean.
+ */
+ if (!gva)
+ page_nr = 0;
+ else if (gva >= TEST_MEM_ALIAS_BASE)
+ page_nr = (gva - TEST_MEM_ALIAS_BASE) >> PAGE_SHIFT;
+ else
+ page_nr = (gva - TEST_MEM_BASE) >> PAGE_SHIFT;
+ TEST_ASSERT(page_nr == 0 || page_nr == 1,
+ "Test bug, unexpected frame number '%u' for arg = %lx", page_nr, arg);
+ TEST_ASSERT(gva || (arg & TEST_SYNC_NO_FAULT),
+ "Test bug, gva must be valid if a fault is expected");
+
+ kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap);
+
+ /*
+ * Check all pages to verify the correct physical page was modified (or
+ * not), and that all pages are clean/dirty as expected.
+ *
+ * If a fault of any kind is expected, the target page should be dirty
+ * as the Dirty bit is set in the gPTE. KVM should create a writable
+ * SPTE even on a read fault, *and* KVM must mark the GFN as dirty
+ * when doing so.
+ */
+ for (i = 0; i < TEST_MEM_PAGES; i++) {
+ if (i == page_nr && (arg & TEST_SYNC_WRITE_FAULT))
+ TEST_ASSERT(*(u64 *)TEST_HVA(vm, i) == 1,
+ "Page %u incorrectly not written by guest", i);
+ else
+ TEST_ASSERT(*(u64 *)TEST_HVA(vm, i) == 0xaaaaaaaaaaaaaaaaULL,
+ "Page %u incorrectly written by guest", i);
+
+ if (i == page_nr && !(arg & TEST_SYNC_NO_FAULT))
+ TEST_ASSERT(test_bit(i, bmap),
+ "Page %u incorrectly reported clean on %s fault",
+ i, arg & TEST_SYNC_READ_FAULT ? "read" : "write");
+ else
+ TEST_ASSERT(!test_bit(i, bmap),
+ "Page %u incorrectly reported dirty", i);
+ }
+}
+
+static void test_dirty_log(bool nested_tdp)
+{
+ gva_t nested_gva = 0;
+ unsigned long *bmap;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct ucall uc;
+ bool done = false;
+
+ pr_info("Nested TDP: %s\n", nested_tdp ? "enabled" : "disabled");
+
+ /* Create VM */
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+ if (nested_tdp)
+ vm_enable_tdp(vm);
+
+ if (kvm_cpu_has(X86_FEATURE_VMX))
+ vcpu_alloc_vmx(vm, &nested_gva);
+ else
+ vcpu_alloc_svm(vm, &nested_gva);
+
+ vcpu_args_set(vcpu, 1, nested_gva);
+
+ /* Add an extra memory slot for testing dirty logging */
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+ TEST_MEM_BASE,
+ TEST_MEM_SLOT_INDEX,
+ TEST_MEM_PAGES,
+ KVM_MEM_LOG_DIRTY_PAGES);
+
+ /*
+ * Add an identity map for GVA range [0xc0000000, 0xc0004000). This
+ * affects both L1 and L2. However...
+ */
+ virt_map(vm, TEST_MEM_BASE, TEST_MEM_BASE, TEST_MEM_PAGES);
+
+ /*
+ * ... pages in the L2 GPA address range [0xc0002000, 0xc0004000) will
+ * map to [0xc0000000, 0xc0002000) when TDP is enabled (for L2).
+ *
+ * When TDP is disabled, the L2 guest code will still access the same L1
+ * GPAs as the TDP enabled case.
+ *
+ * Set the Dirty bit in the PTEs used by L2 so that KVM will create
+ * writable SPTEs when handling read faults (if the Dirty bit isn't
+ * set, KVM must intercept the next write to emulate the Dirty bit
+ * update).
+ */
+ if (nested_tdp) {
+ tdp_identity_map_default_memslots(vm);
+ tdp_map(vm, TEST_ALIAS_GPA(0), TEST_GPA(0), PAGE_SIZE);
+ tdp_map(vm, TEST_ALIAS_GPA(1), TEST_GPA(1), PAGE_SIZE);
+
+ *tdp_get_pte(vm, TEST_ALIAS_GPA(0)) |= PTE_DIRTY_MASK(&vm->stage2_mmu);
+ *tdp_get_pte(vm, TEST_ALIAS_GPA(1)) |= PTE_DIRTY_MASK(&vm->stage2_mmu);
+ } else {
+ *vm_get_pte(vm, TEST_GVA(0)) |= PTE_DIRTY_MASK(&vm->mmu);
+ *vm_get_pte(vm, TEST_GVA(1)) |= PTE_DIRTY_MASK(&vm->mmu);
+ }
+
+ bmap = bitmap_zalloc(TEST_MEM_PAGES);
+
+ while (!done) {
+ memset(TEST_HVA(vm, 0), 0xaa, TEST_MEM_PAGES * PAGE_SIZE);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ /* NOT REACHED */
+ case UCALL_SYNC:
+ test_handle_ucall_sync(vm, uc.args[1], bmap);
+ break;
+ case UCALL_DONE:
+ done = true;
+ break;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX) || kvm_cpu_has(X86_FEATURE_SVM));
+
+ test_dirty_log(/*nested_tdp=*/false);
+
+ if (kvm_cpu_has_tdp())
+ test_dirty_log(/*nested_tdp=*/true);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/nested_emulation_test.c b/tools/testing/selftests/kvm/x86/nested_emulation_test.c
new file mode 100644
index 000000000000..fb7dcbe53ac7
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/nested_emulation_test.c
@@ -0,0 +1,146 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+#include "svm_util.h"
+
+enum {
+ SVM_F,
+ VMX_F,
+ NR_VIRTUALIZATION_FLAVORS,
+};
+
+struct emulated_instruction {
+ const char name[32];
+ u8 opcode[15];
+ u32 exit_reason[NR_VIRTUALIZATION_FLAVORS];
+};
+
+static struct emulated_instruction instructions[] = {
+ {
+ .name = "pause",
+ .opcode = { 0xf3, 0x90 },
+ .exit_reason = { SVM_EXIT_PAUSE,
+ EXIT_REASON_PAUSE_INSTRUCTION, }
+ },
+ {
+ .name = "hlt",
+ .opcode = { 0xf4 },
+ .exit_reason = { SVM_EXIT_HLT,
+ EXIT_REASON_HLT, }
+ },
+};
+
+static u8 kvm_fep[] = { 0x0f, 0x0b, 0x6b, 0x76, 0x6d }; /* ud2 ; .ascii "kvm" */
+static u8 l2_guest_code[sizeof(kvm_fep) + 15];
+static u8 *l2_instruction = &l2_guest_code[sizeof(kvm_fep)];
+
+static u32 get_instruction_length(struct emulated_instruction *insn)
+{
+ u32 i;
+
+ for (i = 0; i < ARRAY_SIZE(insn->opcode) && insn->opcode[i]; i++)
+ ;
+
+ return i;
+}
+
+static void guest_code(void *test_data)
+{
+ int f = this_cpu_has(X86_FEATURE_SVM) ? SVM_F : VMX_F;
+ int i;
+
+ memcpy(l2_guest_code, kvm_fep, sizeof(kvm_fep));
+
+ if (f == SVM_F) {
+ struct svm_test_data *svm = test_data;
+ struct vmcb *vmcb = svm->vmcb;
+
+ generic_svm_setup(svm, NULL, NULL);
+ vmcb->save.idtr.limit = 0;
+ vmcb->save.rip = (u64)l2_guest_code;
+
+ vmcb->control.intercept |= BIT_ULL(INTERCEPT_SHUTDOWN) |
+ BIT_ULL(INTERCEPT_PAUSE) |
+ BIT_ULL(INTERCEPT_HLT);
+ vmcb->control.intercept_exceptions = 0;
+ } else {
+ GUEST_ASSERT(prepare_for_vmx_operation(test_data));
+ GUEST_ASSERT(load_vmcs(test_data));
+
+ prepare_vmcs(test_data, NULL, NULL);
+ GUEST_ASSERT(!vmwrite(GUEST_IDTR_LIMIT, 0));
+ GUEST_ASSERT(!vmwrite(GUEST_RIP, (u64)l2_guest_code));
+ GUEST_ASSERT(!vmwrite(EXCEPTION_BITMAP, 0));
+
+ vmwrite(CPU_BASED_VM_EXEC_CONTROL, vmreadz(CPU_BASED_VM_EXEC_CONTROL) |
+ CPU_BASED_PAUSE_EXITING |
+ CPU_BASED_HLT_EXITING);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(instructions); i++) {
+ struct emulated_instruction *insn = &instructions[i];
+ u32 insn_len = get_instruction_length(insn);
+ u32 exit_insn_len;
+ u32 exit_reason;
+
+ /*
+ * Copy the target instruction to the L2 code stream, and fill
+ * the remaining bytes with INT3s so that a missed intercept
+ * results in a consistent failure mode (SHUTDOWN).
+ */
+ memcpy(l2_instruction, insn->opcode, insn_len);
+ memset(l2_instruction + insn_len, 0xcc, sizeof(insn->opcode) - insn_len);
+
+ if (f == SVM_F) {
+ struct svm_test_data *svm = test_data;
+ struct vmcb *vmcb = svm->vmcb;
+
+ run_guest(vmcb, svm->vmcb_gpa);
+ exit_reason = vmcb->control.exit_code;
+ exit_insn_len = vmcb->control.next_rip - vmcb->save.rip;
+ GUEST_ASSERT_EQ(vmcb->save.rip, (u64)l2_instruction);
+ } else {
+ GUEST_ASSERT_EQ(i ? vmresume() : vmlaunch(), 0);
+ exit_reason = vmreadz(VM_EXIT_REASON);
+ exit_insn_len = vmreadz(VM_EXIT_INSTRUCTION_LEN);
+ GUEST_ASSERT_EQ(vmreadz(GUEST_RIP), (u64)l2_instruction);
+ }
+
+ __GUEST_ASSERT(exit_reason == insn->exit_reason[f],
+ "Wanted exit_reason '0x%x' for '%s', got '0x%x'",
+ insn->exit_reason[f], insn->name, exit_reason);
+
+ __GUEST_ASSERT(exit_insn_len == insn_len,
+ "Wanted insn_len '%u' for '%s', got '%u'",
+ insn_len, insn->name, exit_insn_len);
+ }
+
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ gva_t nested_test_data_gva;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ TEST_REQUIRE(is_forced_emulation_enabled);
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM) || kvm_cpu_has(X86_FEATURE_VMX));
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ vm_enable_cap(vm, KVM_CAP_EXCEPTION_PAYLOAD, -2ul);
+
+ if (kvm_cpu_has(X86_FEATURE_SVM))
+ vcpu_alloc_svm(vm, &nested_test_data_gva);
+ else
+ vcpu_alloc_vmx(vm, &nested_test_data_gva);
+
+ vcpu_args_set(vcpu, 1, nested_test_data_gva);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
+
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86/nested_exceptions_test.c b/tools/testing/selftests/kvm/x86/nested_exceptions_test.c
index 3641a42934ac..186e980aa8ee 100644
--- a/tools/testing/selftests/kvm/x86/nested_exceptions_test.c
+++ b/tools/testing/selftests/kvm/x86/nested_exceptions_test.c
@@ -72,7 +72,7 @@ static void l2_ss_injected_tf_test(void)
}
static void svm_run_l2(struct svm_test_data *svm, void *l2_code, int vector,
- uint32_t error_code)
+ u32 error_code)
{
struct vmcb *vmcb = svm->vmcb;
struct vmcb_control_area *ctrl = &vmcb->control;
@@ -111,7 +111,7 @@ static void l1_svm_code(struct svm_test_data *svm)
GUEST_DONE();
}
-static void vmx_run_l2(void *l2_code, int vector, uint32_t error_code)
+static void vmx_run_l2(void *l2_code, int vector, u32 error_code)
{
GUEST_ASSERT(!vmwrite(GUEST_RIP, (u64)l2_code));
@@ -216,7 +216,7 @@ static void queue_ss_exception(struct kvm_vcpu *vcpu, bool inject)
*/
int main(int argc, char *argv[])
{
- vm_vaddr_t nested_test_data_gva;
+ gva_t nested_test_data_gva;
struct kvm_vcpu_events events;
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
diff --git a/tools/testing/selftests/kvm/x86/nested_invalid_cr3_test.c b/tools/testing/selftests/kvm/x86/nested_invalid_cr3_test.c
new file mode 100644
index 000000000000..11fd2467d823
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/nested_invalid_cr3_test.c
@@ -0,0 +1,116 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2025, Google LLC.
+ *
+ * This test verifies that L1 fails to enter L2 with an invalid CR3, and
+ * succeeds otherwise.
+ */
+#include "kvm_util.h"
+#include "vmx.h"
+#include "svm_util.h"
+#include "kselftest.h"
+
+
+#define L2_GUEST_STACK_SIZE 64
+
+static void l2_guest_code(void)
+{
+ vmcall();
+}
+
+static void l1_svm_code(struct svm_test_data *svm)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ uintptr_t save_cr3;
+
+ generic_svm_setup(svm, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ /* Try to run L2 with invalid CR3 and make sure it fails */
+ save_cr3 = svm->vmcb->save.cr3;
+ svm->vmcb->save.cr3 = -1ull;
+ run_guest(svm->vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT(svm->vmcb->control.exit_code == SVM_EXIT_ERR);
+
+ /* Now restore CR3 and make sure L2 runs successfully */
+ svm->vmcb->save.cr3 = save_cr3;
+ run_guest(svm->vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT(svm->vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+
+ GUEST_DONE();
+}
+
+static void l1_vmx_code(struct vmx_pages *vmx_pages)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ uintptr_t save_cr3;
+
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+ GUEST_ASSERT(load_vmcs(vmx_pages));
+
+ prepare_vmcs(vmx_pages, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ /* Try to run L2 with invalid CR3 and make sure it fails */
+ save_cr3 = vmreadz(GUEST_CR3);
+ vmwrite(GUEST_CR3, -1ull);
+ GUEST_ASSERT(!vmlaunch());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) ==
+ (EXIT_REASON_FAILED_VMENTRY | EXIT_REASON_INVALID_STATE));
+
+ /* Now restore CR3 and make sure L2 runs successfully */
+ vmwrite(GUEST_CR3, save_cr3);
+ GUEST_ASSERT(!vmlaunch());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+ GUEST_DONE();
+}
+
+static void l1_guest_code(void *data)
+{
+ if (this_cpu_has(X86_FEATURE_VMX))
+ l1_vmx_code(data);
+ else
+ l1_svm_code(data);
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ gva_t guest_gva = 0;
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX) ||
+ kvm_cpu_has(X86_FEATURE_SVM));
+
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+
+ if (kvm_cpu_has(X86_FEATURE_VMX))
+ vcpu_alloc_vmx(vm, &guest_gva);
+ else
+ vcpu_alloc_svm(vm, &guest_gva);
+
+ vcpu_args_set(vcpu, 1, guest_gva);
+
+ for (;;) {
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ case UCALL_SYNC:
+ break;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+ }
+
+done:
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/vmx_set_nested_state_test.c b/tools/testing/selftests/kvm/x86/nested_set_state_test.c
index 67a62a5a8895..831380732671 100644
--- a/tools/testing/selftests/kvm/x86/vmx_set_nested_state_test.c
+++ b/tools/testing/selftests/kvm/x86/nested_set_state_test.c
@@ -1,7 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * vmx_set_nested_state_test
- *
* Copyright (C) 2019, Google LLC.
*
* This test verifies the integrity of calling the ioctl KVM_SET_NESTED_STATE.
@@ -11,6 +9,7 @@
#include "kvm_util.h"
#include "processor.h"
#include "vmx.h"
+#include "svm_util.h"
#include <errno.h>
#include <linux/kvm.h>
@@ -241,8 +240,108 @@ void test_vmx_nested_state(struct kvm_vcpu *vcpu)
TEST_ASSERT(state->size >= sizeof(*state) && state->size <= state_sz,
"Size must be between %ld and %d. The size returned was %d.",
sizeof(*state), state_sz, state->size);
- TEST_ASSERT(state->hdr.vmx.vmxon_pa == -1ull, "vmxon_pa must be -1ull.");
- TEST_ASSERT(state->hdr.vmx.vmcs12_pa == -1ull, "vmcs_pa must be -1ull.");
+
+ TEST_ASSERT_EQ(state->hdr.vmx.vmxon_pa, -1ull);
+ TEST_ASSERT_EQ(state->hdr.vmx.vmcs12_pa, -1ull);
+ TEST_ASSERT_EQ(state->flags, 0);
+
+ free(state);
+}
+
+static void vcpu_efer_enable_svm(struct kvm_vcpu *vcpu)
+{
+ u64 old_efer = vcpu_get_msr(vcpu, MSR_EFER);
+
+ vcpu_set_msr(vcpu, MSR_EFER, old_efer | EFER_SVME);
+}
+
+static void vcpu_efer_disable_svm(struct kvm_vcpu *vcpu)
+{
+ u64 old_efer = vcpu_get_msr(vcpu, MSR_EFER);
+
+ vcpu_set_msr(vcpu, MSR_EFER, old_efer & ~EFER_SVME);
+}
+
+void set_default_svm_state(struct kvm_nested_state *state, int size)
+{
+ memset(state, 0, size);
+ state->format = 1;
+ state->size = size;
+ state->hdr.svm.vmcb_pa = 0x3000;
+}
+
+void test_svm_nested_state(struct kvm_vcpu *vcpu)
+{
+ /* Add a page for VMCB. */
+ const int state_sz = sizeof(struct kvm_nested_state) + getpagesize();
+ struct kvm_nested_state *state =
+ (struct kvm_nested_state *)malloc(state_sz);
+
+ vcpu_set_cpuid_feature(vcpu, X86_FEATURE_SVM);
+
+ /* The format must be set to 1. 0 for VMX, 1 for SVM. */
+ set_default_svm_state(state, state_sz);
+ state->format = 0;
+ test_nested_state_expect_einval(vcpu, state);
+
+ /* Invalid flags are rejected, KVM_STATE_NESTED_EVMCS is VMX-only */
+ set_default_svm_state(state, state_sz);
+ state->flags = KVM_STATE_NESTED_EVMCS;
+ test_nested_state_expect_einval(vcpu, state);
+
+ /*
+ * If EFER.SVME is clear, guest mode is disallowed and GIF can be set or
+ * cleared.
+ */
+ vcpu_efer_disable_svm(vcpu);
+
+ set_default_svm_state(state, state_sz);
+ state->flags = KVM_STATE_NESTED_GUEST_MODE;
+ test_nested_state_expect_einval(vcpu, state);
+
+ state->flags = 0;
+ test_nested_state(vcpu, state);
+
+ state->flags = KVM_STATE_NESTED_GIF_SET;
+ test_nested_state(vcpu, state);
+
+ /* Enable SVM in the guest EFER. */
+ vcpu_efer_enable_svm(vcpu);
+
+ /* Setting vmcb_pa to a non-aligned address is only fine when not entering guest mode */
+ set_default_svm_state(state, state_sz);
+ state->hdr.svm.vmcb_pa = -1ull;
+ state->flags = 0;
+ test_nested_state(vcpu, state);
+ state->flags = KVM_STATE_NESTED_GUEST_MODE;
+ test_nested_state_expect_einval(vcpu, state);
+
+ /*
+ * Size must be large enough to fit kvm_nested_state and VMCB
+ * only when entering guest mode.
+ */
+ set_default_svm_state(state, state_sz/2);
+ state->flags = 0;
+ test_nested_state(vcpu, state);
+ state->flags = KVM_STATE_NESTED_GUEST_MODE;
+ test_nested_state_expect_einval(vcpu, state);
+
+ /*
+ * Test that if we leave nesting the state reflects that when we get it
+ * again, except for vmcb_pa, which is always returned as 0 when not in
+ * guest mode.
+ */
+ set_default_svm_state(state, state_sz);
+ state->hdr.svm.vmcb_pa = -1ull;
+ state->flags = KVM_STATE_NESTED_GIF_SET;
+ test_nested_state(vcpu, state);
+ vcpu_nested_state_get(vcpu, state);
+ TEST_ASSERT(state->size >= sizeof(*state) && state->size <= state_sz,
+ "Size must be between %ld and %d. The size returned was %d.",
+ sizeof(*state), state_sz, state->size);
+
+ TEST_ASSERT_EQ(state->hdr.svm.vmcb_pa, 0);
+ TEST_ASSERT_EQ(state->flags, KVM_STATE_NESTED_GIF_SET);
free(state);
}
@@ -255,20 +354,20 @@ int main(int argc, char *argv[])
have_evmcs = kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS);
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX) ||
+ kvm_cpu_has(X86_FEATURE_SVM));
TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE));
- /*
- * AMD currently does not implement set_nested_state, so for now we
- * just early out.
- */
- TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
-
vm = vm_create_with_one_vcpu(&vcpu, NULL);
/*
- * First run tests with VMX disabled to check error handling.
+ * First run tests with VMX/SVM disabled to check error handling.
+ * test_{vmx/svm}_nested_state() will re-enable as needed.
*/
- vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_VMX);
+ if (kvm_cpu_has(X86_FEATURE_VMX))
+ vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_VMX);
+ else
+ vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_SVM);
/* Passing a NULL kvm_nested_state causes a EFAULT. */
test_nested_state_expect_efault(vcpu, NULL);
@@ -297,7 +396,10 @@ int main(int argc, char *argv[])
state.flags = KVM_STATE_NESTED_RUN_PENDING;
test_nested_state_expect_einval(vcpu, &state);
- test_vmx_nested_state(vcpu);
+ if (kvm_cpu_has(X86_FEATURE_VMX))
+ test_vmx_nested_state(vcpu);
+ else
+ test_svm_nested_state(vcpu);
kvm_vm_free(vm);
return 0;
diff --git a/tools/testing/selftests/kvm/x86/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/x86/nested_tsc_adjust_test.c
index 2ceb5c78c442..f0e4adac4751 100644
--- a/tools/testing/selftests/kvm/x86/vmx_tsc_adjust_test.c
+++ b/tools/testing/selftests/kvm/x86/nested_tsc_adjust_test.c
@@ -1,7 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * vmx_tsc_adjust_test
- *
* Copyright (C) 2018, Google LLC.
*
* IA32_TSC_ADJUST test
@@ -22,6 +20,7 @@
#include "kvm_util.h"
#include "processor.h"
#include "vmx.h"
+#include "svm_util.h"
#include <string.h>
#include <sys/ioctl.h>
@@ -35,6 +34,8 @@
#define TSC_ADJUST_VALUE (1ll << 32)
#define TSC_OFFSET_VALUE -(1ll << 48)
+#define L2_GUEST_STACK_SIZE 64
+
enum {
PORT_ABORT = 0x1000,
PORT_REPORT,
@@ -52,9 +53,9 @@ enum {
/* The virtual machine object. */
static struct kvm_vm *vm;
-static void check_ia32_tsc_adjust(int64_t max)
+static void check_ia32_tsc_adjust(s64 max)
{
- int64_t adjust;
+ s64 adjust;
adjust = rdmsr(MSR_IA32_TSC_ADJUST);
GUEST_SYNC(adjust);
@@ -63,7 +64,7 @@ static void check_ia32_tsc_adjust(int64_t max)
static void l2_guest_code(void)
{
- uint64_t l1_tsc = rdtsc() - TSC_OFFSET_VALUE;
+ u64 l1_tsc = rdtsc() - TSC_OFFSET_VALUE;
wrmsr(MSR_IA32_TSC, l1_tsc - TSC_ADJUST_VALUE);
check_ia32_tsc_adjust(-2 * TSC_ADJUST_VALUE);
@@ -72,46 +73,51 @@ static void l2_guest_code(void)
__asm__ __volatile__("vmcall");
}
-static void l1_guest_code(struct vmx_pages *vmx_pages)
+static void l1_guest_code(void *data)
{
-#define L2_GUEST_STACK_SIZE 64
unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
- uint32_t control;
- uintptr_t save_cr3;
+ /* Set TSC from L1 and make sure TSC_ADJUST is updated correctly */
GUEST_ASSERT(rdtsc() < TSC_ADJUST_VALUE);
wrmsr(MSR_IA32_TSC, rdtsc() - TSC_ADJUST_VALUE);
check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE);
- GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
- GUEST_ASSERT(load_vmcs(vmx_pages));
-
- /* Prepare the VMCS for L2 execution. */
- prepare_vmcs(vmx_pages, l2_guest_code,
- &l2_guest_stack[L2_GUEST_STACK_SIZE]);
- control = vmreadz(CPU_BASED_VM_EXEC_CONTROL);
- control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETTING;
- vmwrite(CPU_BASED_VM_EXEC_CONTROL, control);
- vmwrite(TSC_OFFSET, TSC_OFFSET_VALUE);
-
- /* Jump into L2. First, test failure to load guest CR3. */
- save_cr3 = vmreadz(GUEST_CR3);
- vmwrite(GUEST_CR3, -1ull);
- GUEST_ASSERT(!vmlaunch());
- GUEST_ASSERT(vmreadz(VM_EXIT_REASON) ==
- (EXIT_REASON_FAILED_VMENTRY | EXIT_REASON_INVALID_STATE));
- check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE);
- vmwrite(GUEST_CR3, save_cr3);
-
- GUEST_ASSERT(!vmlaunch());
- GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+ /*
+ * Run L2 with TSC_OFFSET. L2 will write to TSC, and L1 is not
+ * intercepting the write so it should update L1's TSC_ADJUST.
+ */
+ if (this_cpu_has(X86_FEATURE_VMX)) {
+ struct vmx_pages *vmx_pages = data;
+ u32 control;
+
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+ GUEST_ASSERT(load_vmcs(vmx_pages));
+
+ prepare_vmcs(vmx_pages, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+ control = vmreadz(CPU_BASED_VM_EXEC_CONTROL);
+ control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETTING;
+ vmwrite(CPU_BASED_VM_EXEC_CONTROL, control);
+ vmwrite(TSC_OFFSET, TSC_OFFSET_VALUE);
+
+ GUEST_ASSERT(!vmlaunch());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+ } else {
+ struct svm_test_data *svm = data;
+
+ generic_svm_setup(svm, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ svm->vmcb->control.tsc_offset = TSC_OFFSET_VALUE;
+ run_guest(svm->vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT(svm->vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+ }
check_ia32_tsc_adjust(-2 * TSC_ADJUST_VALUE);
-
GUEST_DONE();
}
-static void report(int64_t val)
+static void report(s64 val)
{
pr_info("IA32_TSC_ADJUST is %ld (%lld * TSC_ADJUST_VALUE + %lld).\n",
val, val / TSC_ADJUST_VALUE, val % TSC_ADJUST_VALUE);
@@ -119,16 +125,19 @@ static void report(int64_t val)
int main(int argc, char *argv[])
{
- vm_vaddr_t vmx_pages_gva;
+ gva_t nested_gva;
struct kvm_vcpu *vcpu;
- TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX) ||
+ kvm_cpu_has(X86_FEATURE_SVM));
- vm = vm_create_with_one_vcpu(&vcpu, (void *) l1_guest_code);
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+ if (kvm_cpu_has(X86_FEATURE_VMX))
+ vcpu_alloc_vmx(vm, &nested_gva);
+ else
+ vcpu_alloc_svm(vm, &nested_gva);
- /* Allocate VMX pages and shared descriptors (vmx_pages). */
- vcpu_alloc_vmx(vm, &vmx_pages_gva);
- vcpu_args_set(vcpu, 1, vmx_pages_gva);
+ vcpu_args_set(vcpu, 1, nested_gva);
for (;;) {
struct ucall uc;
diff --git a/tools/testing/selftests/kvm/x86/vmx_nested_tsc_scaling_test.c b/tools/testing/selftests/kvm/x86/nested_tsc_scaling_test.c
index 1759fa5cb3f2..190e93af20a1 100644
--- a/tools/testing/selftests/kvm/x86/vmx_nested_tsc_scaling_test.c
+++ b/tools/testing/selftests/kvm/x86/nested_tsc_scaling_test.c
@@ -13,12 +13,13 @@
#include "kvm_util.h"
#include "vmx.h"
+#include "svm_util.h"
#include "kselftest.h"
/* L2 is scaled up (from L1's perspective) by this factor */
#define L2_SCALE_FACTOR 4ULL
-#define TSC_OFFSET_L2 ((uint64_t) -33125236320908)
+#define TSC_OFFSET_L2 ((u64)-33125236320908)
#define TSC_MULTIPLIER_L2 (L2_SCALE_FACTOR << 48)
#define L2_GUEST_STACK_SIZE 64
@@ -34,9 +35,9 @@ enum { USLEEP, UCHECK_L1, UCHECK_L2 };
* measurements, a difference of 1% between the actual and the expected value
* is tolerated.
*/
-static void compare_tsc_freq(uint64_t actual, uint64_t expected)
+static void compare_tsc_freq(u64 actual, u64 expected)
{
- uint64_t tolerance, thresh_low, thresh_high;
+ u64 tolerance, thresh_low, thresh_high;
tolerance = expected / 100;
thresh_low = expected - tolerance;
@@ -54,7 +55,7 @@ static void compare_tsc_freq(uint64_t actual, uint64_t expected)
static void check_tsc_freq(int level)
{
- uint64_t tsc_start, tsc_end, tsc_freq;
+ u64 tsc_start, tsc_end, tsc_freq;
/*
* Reading the TSC twice with about a second's difference should give
@@ -79,10 +80,33 @@ static void l2_guest_code(void)
__asm__ __volatile__("vmcall");
}
-static void l1_guest_code(struct vmx_pages *vmx_pages)
+static void l1_svm_code(struct svm_test_data *svm)
{
unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
- uint32_t control;
+
+ /* check that L1's frequency looks alright before launching L2 */
+ check_tsc_freq(UCHECK_L1);
+
+ generic_svm_setup(svm, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ /* enable TSC scaling for L2 */
+ wrmsr(MSR_AMD64_TSC_RATIO, L2_SCALE_FACTOR << 32);
+
+ /* launch L2 */
+ run_guest(svm->vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT(svm->vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+
+ /* check that L1's frequency still looks good */
+ check_tsc_freq(UCHECK_L1);
+
+ GUEST_DONE();
+}
+
+static void l1_vmx_code(struct vmx_pages *vmx_pages)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ u32 control;
/* check that L1's frequency looks alright before launching L2 */
check_tsc_freq(UCHECK_L1);
@@ -116,20 +140,29 @@ static void l1_guest_code(struct vmx_pages *vmx_pages)
GUEST_DONE();
}
+static void l1_guest_code(void *data)
+{
+ if (this_cpu_has(X86_FEATURE_VMX))
+ l1_vmx_code(data);
+ else
+ l1_svm_code(data);
+}
+
int main(int argc, char *argv[])
{
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
- vm_vaddr_t vmx_pages_gva;
+ gva_t guest_gva = 0;
- uint64_t tsc_start, tsc_end;
- uint64_t tsc_khz;
- uint64_t l1_scale_factor;
- uint64_t l0_tsc_freq = 0;
- uint64_t l1_tsc_freq = 0;
- uint64_t l2_tsc_freq = 0;
+ u64 tsc_start, tsc_end;
+ u64 tsc_khz;
+ u64 l1_scale_factor;
+ u64 l0_tsc_freq = 0;
+ u64 l1_tsc_freq = 0;
+ u64 l2_tsc_freq = 0;
- TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX) ||
+ kvm_cpu_has(X86_FEATURE_SVM));
TEST_REQUIRE(kvm_has_cap(KVM_CAP_TSC_CONTROL));
TEST_REQUIRE(sys_clocksource_is_based_on_tsc());
@@ -152,8 +185,13 @@ int main(int argc, char *argv[])
printf("real TSC frequency is around: %"PRIu64"\n", l0_tsc_freq);
vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
- vcpu_alloc_vmx(vm, &vmx_pages_gva);
- vcpu_args_set(vcpu, 1, vmx_pages_gva);
+
+ if (kvm_cpu_has(X86_FEATURE_VMX))
+ vcpu_alloc_vmx(vm, &guest_gva);
+ else
+ vcpu_alloc_svm(vm, &guest_gva);
+
+ vcpu_args_set(vcpu, 1, guest_gva);
tsc_khz = __vcpu_ioctl(vcpu, KVM_GET_TSC_KHZ, NULL);
TEST_ASSERT(tsc_khz != -1, "vcpu ioctl KVM_GET_TSC_KHZ failed");
diff --git a/tools/testing/selftests/kvm/x86/nested_vmsave_vmload_test.c b/tools/testing/selftests/kvm/x86/nested_vmsave_vmload_test.c
new file mode 100644
index 000000000000..85d3f4cc76f3
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/nested_vmsave_vmload_test.c
@@ -0,0 +1,197 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2026, Google LLC.
+ */
+#include "kvm_util.h"
+#include "vmx.h"
+#include "svm_util.h"
+#include "kselftest.h"
+
+/*
+ * Allocate two VMCB pages for testing. Both pages have different GVAs (shared
+ * by both L1 and L2) and L1 GPAs. A single L2 GPA is used such that:
+ * - L2 GPA == L1 GPA for VMCB0.
+ * - L2 GPA is mapped to L1 GPA for VMCB1 using NPT in L1.
+ *
+ * This allows testing whether the GPA used by VMSAVE/VMLOAD in L2 is
+ * interpreted as a direct L1 GPA or translated using NPT as an L2 GPA, depends
+ * on which VMCB is accessed.
+ */
+#define TEST_MEM_SLOT_INDEX 1
+#define TEST_MEM_PAGES 2
+#define TEST_MEM_BASE 0xc0000000
+
+#define TEST_GUEST_ADDR(idx) (TEST_MEM_BASE + (idx) * PAGE_SIZE)
+
+#define TEST_VMCB_L1_GPA(idx) TEST_GUEST_ADDR(idx)
+#define TEST_VMCB_GVA(idx) TEST_GUEST_ADDR(idx)
+
+#define TEST_VMCB_L2_GPA TEST_VMCB_L1_GPA(0)
+
+#define L2_GUEST_STACK_SIZE 64
+
+static void l2_guest_code_vmsave(void)
+{
+ asm volatile("vmsave %0" : : "a"(TEST_VMCB_L2_GPA) : "memory");
+}
+
+static void l2_guest_code_vmload(void)
+{
+ asm volatile("vmload %0" : : "a"(TEST_VMCB_L2_GPA) : "memory");
+}
+
+static void l2_guest_code_vmcb(int vmcb_idx)
+{
+ wrmsr(MSR_KERNEL_GS_BASE, 0xaaaa);
+ l2_guest_code_vmsave();
+
+ /* Verify the VMCB used by VMSAVE and update KERNEL_GS_BASE to 0xbbbb */
+ GUEST_SYNC(vmcb_idx);
+
+ l2_guest_code_vmload();
+ GUEST_ASSERT_EQ(rdmsr(MSR_KERNEL_GS_BASE), 0xbbbb);
+
+ /* Reset MSR_KERNEL_GS_BASE */
+ wrmsr(MSR_KERNEL_GS_BASE, 0);
+ l2_guest_code_vmsave();
+
+ vmmcall();
+}
+
+static void l2_guest_code_vmcb0(void)
+{
+ l2_guest_code_vmcb(0);
+}
+
+static void l2_guest_code_vmcb1(void)
+{
+ l2_guest_code_vmcb(1);
+}
+
+static void l1_guest_code(struct svm_test_data *svm)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+ /* Each test case initializes the guest RIP below */
+ generic_svm_setup(svm, NULL, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ /* Set VMSAVE/VMLOAD intercepts and make sure they work with.. */
+ svm->vmcb->control.intercept |= (BIT_ULL(INTERCEPT_VMSAVE) |
+ BIT_ULL(INTERCEPT_VMLOAD));
+
+ /* ..SVM_MISC2_ENABLE_V_VMLOAD_VMSAVE cleared.. */
+ svm->vmcb->control.misc_ctl2 &= ~SVM_MISC2_ENABLE_V_VMLOAD_VMSAVE;
+
+ svm->vmcb->save.rip = (u64)l2_guest_code_vmsave;
+ run_guest(svm->vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT_EQ(svm->vmcb->control.exit_code, SVM_EXIT_VMSAVE);
+
+ svm->vmcb->save.rip = (u64)l2_guest_code_vmload;
+ run_guest(svm->vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT_EQ(svm->vmcb->control.exit_code, SVM_EXIT_VMLOAD);
+
+ /* ..and SVM_MISC2_ENABLE_V_VMLOAD_VMSAVE set */
+ svm->vmcb->control.misc_ctl2 |= SVM_MISC2_ENABLE_V_VMLOAD_VMSAVE;
+
+ svm->vmcb->save.rip = (u64)l2_guest_code_vmsave;
+ run_guest(svm->vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT_EQ(svm->vmcb->control.exit_code, SVM_EXIT_VMSAVE);
+
+ svm->vmcb->save.rip = (u64)l2_guest_code_vmload;
+ run_guest(svm->vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT_EQ(svm->vmcb->control.exit_code, SVM_EXIT_VMLOAD);
+
+ /* Now clear the intercepts to test VMSAVE/VMLOAD behavior */
+ svm->vmcb->control.intercept &= ~(BIT_ULL(INTERCEPT_VMSAVE) |
+ BIT_ULL(INTERCEPT_VMLOAD));
+
+ /*
+ * Without SVM_MISC2_ENABLE_V_VMLOAD_VMSAVE, the GPA will be
+ * interpreted as an L1 GPA, so VMCB0 should be used.
+ */
+ svm->vmcb->save.rip = (u64)l2_guest_code_vmcb0;
+ svm->vmcb->control.misc_ctl2 &= ~SVM_MISC2_ENABLE_V_VMLOAD_VMSAVE;
+ run_guest(svm->vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT_EQ(svm->vmcb->control.exit_code, SVM_EXIT_VMMCALL);
+
+ /*
+ * With SVM_MISC2_ENABLE_V_VMLOAD_VMSAVE, the GPA will be interpeted as
+ * an L2 GPA, and translated through the NPT to VMCB1.
+ */
+ svm->vmcb->save.rip = (u64)l2_guest_code_vmcb1;
+ svm->vmcb->control.misc_ctl2 |= SVM_MISC2_ENABLE_V_VMLOAD_VMSAVE;
+ run_guest(svm->vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT_EQ(svm->vmcb->control.exit_code, SVM_EXIT_VMMCALL);
+
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ gva_t nested_gva = 0;
+ struct vmcb *test_vmcb[2];
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ int i;
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM));
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_NPT));
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_V_VMSAVE_VMLOAD));
+
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+ vm_enable_tdp(vm);
+
+ vcpu_alloc_svm(vm, &nested_gva);
+ vcpu_args_set(vcpu, 1, nested_gva);
+
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+ TEST_MEM_BASE, TEST_MEM_SLOT_INDEX,
+ TEST_MEM_PAGES, 0);
+
+ for (i = 0; i <= 1; i++) {
+ virt_map(vm, TEST_VMCB_GVA(i), TEST_VMCB_L1_GPA(i), 1);
+ test_vmcb[i] = (struct vmcb *)addr_gva2hva(vm, TEST_VMCB_GVA(i));
+ }
+
+ tdp_identity_map_default_memslots(vm);
+
+ /*
+ * L2 GPA == L1_GPA(0), but map it to L1_GPA(1), to allow testing
+ * whether the L2 GPA is interpreted as an L1 GPA or translated through
+ * the NPT.
+ */
+ TEST_ASSERT_EQ(TEST_VMCB_L2_GPA, TEST_VMCB_L1_GPA(0));
+ tdp_map(vm, TEST_VMCB_L2_GPA, TEST_VMCB_L1_GPA(1), PAGE_SIZE);
+
+ for (;;) {
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ case UCALL_SYNC:
+ i = uc.args[1];
+ TEST_ASSERT(i == 0 || i == 1, "Unexpected VMCB idx: %d", i);
+
+ /*
+ * Check that only the expected VMCB has KERNEL_GS_BASE
+ * set to 0xaaaa, and update it to 0xbbbb.
+ */
+ TEST_ASSERT_EQ(test_vmcb[i]->save.kernel_gs_base, 0xaaaa);
+ TEST_ASSERT_EQ(test_vmcb[1-i]->save.kernel_gs_base, 0);
+ test_vmcb[i]->save.kernel_gs_base = 0xbbbb;
+ break;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+ }
+
+done:
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/nx_huge_pages_test.c b/tools/testing/selftests/kvm/x86/nx_huge_pages_test.c
index e7efb2b35f8b..70950067b989 100644
--- a/tools/testing/selftests/kvm/x86/nx_huge_pages_test.c
+++ b/tools/testing/selftests/kvm/x86/nx_huge_pages_test.c
@@ -32,7 +32,7 @@
#define RETURN_OPCODE 0xC3
/* Call the specified memory address. */
-static void guest_do_CALL(uint64_t target)
+static void guest_do_CALL(u64 target)
{
((void (*)(void)) target)();
}
@@ -46,14 +46,14 @@ static void guest_do_CALL(uint64_t target)
*/
void guest_code(void)
{
- uint64_t hpage_1 = HPAGE_GVA;
- uint64_t hpage_2 = hpage_1 + (PAGE_SIZE * 512);
- uint64_t hpage_3 = hpage_2 + (PAGE_SIZE * 512);
+ u64 hpage_1 = HPAGE_GVA;
+ u64 hpage_2 = hpage_1 + (PAGE_SIZE * 512);
+ u64 hpage_3 = hpage_2 + (PAGE_SIZE * 512);
- READ_ONCE(*(uint64_t *)hpage_1);
+ READ_ONCE(*(u64 *)hpage_1);
GUEST_SYNC(1);
- READ_ONCE(*(uint64_t *)hpage_2);
+ READ_ONCE(*(u64 *)hpage_2);
GUEST_SYNC(2);
guest_do_CALL(hpage_1);
@@ -62,10 +62,10 @@ void guest_code(void)
guest_do_CALL(hpage_3);
GUEST_SYNC(4);
- READ_ONCE(*(uint64_t *)hpage_1);
+ READ_ONCE(*(u64 *)hpage_1);
GUEST_SYNC(5);
- READ_ONCE(*(uint64_t *)hpage_3);
+ READ_ONCE(*(u64 *)hpage_3);
GUEST_SYNC(6);
}
@@ -73,7 +73,7 @@ static void check_2m_page_count(struct kvm_vm *vm, int expected_pages_2m)
{
int actual_pages_2m;
- actual_pages_2m = vm_get_stat(vm, "pages_2m");
+ actual_pages_2m = vm_get_stat(vm, pages_2m);
TEST_ASSERT(actual_pages_2m == expected_pages_2m,
"Unexpected 2m page count. Expected %d, got %d",
@@ -84,7 +84,7 @@ static void check_split_count(struct kvm_vm *vm, int expected_splits)
{
int actual_splits;
- actual_splits = vm_get_stat(vm, "nx_lpage_splits");
+ actual_splits = vm_get_stat(vm, nx_lpage_splits);
TEST_ASSERT(actual_splits == expected_splits,
"Unexpected NX huge page split count. Expected %d, got %d",
@@ -107,7 +107,7 @@ void run_test(int reclaim_period_ms, bool disable_nx_huge_pages,
{
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
- uint64_t nr_bytes;
+ u64 nr_bytes;
void *hva;
int r;
diff --git a/tools/testing/selftests/kvm/x86/platform_info_test.c b/tools/testing/selftests/kvm/x86/platform_info_test.c
index 9cbf283ebc55..80bb07e6531c 100644
--- a/tools/testing/selftests/kvm/x86/platform_info_test.c
+++ b/tools/testing/selftests/kvm/x86/platform_info_test.c
@@ -23,8 +23,8 @@
static void guest_code(void)
{
- uint64_t msr_platform_info;
- uint8_t vector;
+ u64 msr_platform_info;
+ u8 vector;
GUEST_SYNC(true);
msr_platform_info = rdmsr(MSR_PLATFORM_INFO);
@@ -42,7 +42,7 @@ int main(int argc, char *argv[])
{
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
- uint64_t msr_platform_info;
+ u64 msr_platform_info;
struct ucall uc;
TEST_REQUIRE(kvm_has_cap(KVM_CAP_MSR_PLATFORM_INFO));
diff --git a/tools/testing/selftests/kvm/x86/pmu_counters_test.c b/tools/testing/selftests/kvm/x86/pmu_counters_test.c
index 698cb36989db..dc6afac3aa91 100644
--- a/tools/testing/selftests/kvm/x86/pmu_counters_test.c
+++ b/tools/testing/selftests/kvm/x86/pmu_counters_test.c
@@ -14,10 +14,10 @@
#define NUM_BRANCH_INSNS_RETIRED (NUM_LOOPS)
/*
- * Number of instructions in each loop. 1 CLFLUSH/CLFLUSHOPT/NOP, 1 MFENCE,
- * 1 LOOP.
+ * Number of instructions in each loop. 1 ENTER, 1 CLFLUSH/CLFLUSHOPT/NOP,
+ * 1 MFENCE, 1 MOV, 1 LEAVE, 1 LOOP.
*/
-#define NUM_INSNS_PER_LOOP 3
+#define NUM_INSNS_PER_LOOP 6
/*
* Number of "extra" instructions that will be counted, i.e. the number of
@@ -29,19 +29,74 @@
/* Total number of instructions retired within the measured section. */
#define NUM_INSNS_RETIRED (NUM_LOOPS * NUM_INSNS_PER_LOOP + NUM_EXTRA_INSNS)
+/* Track which architectural events are supported by hardware. */
+static u32 hardware_pmu_arch_events;
-static uint8_t kvm_pmu_version;
+static u8 kvm_pmu_version;
static bool kvm_has_perf_caps;
+#define X86_PMU_FEATURE_NULL \
+({ \
+ struct kvm_x86_pmu_feature feature = {}; \
+ \
+ feature; \
+})
+
+static bool pmu_is_null_feature(struct kvm_x86_pmu_feature event)
+{
+ return !(*(u64 *)&event);
+}
+
+struct kvm_intel_pmu_event {
+ struct kvm_x86_pmu_feature gp_event;
+ struct kvm_x86_pmu_feature fixed_event;
+};
+
+/*
+ * Wrap the array to appease the compiler, as the macros used to construct each
+ * kvm_x86_pmu_feature use syntax that's only valid in function scope, and the
+ * compiler often thinks the feature definitions aren't compile-time constants.
+ */
+static struct kvm_intel_pmu_event intel_event_to_feature(u8 idx)
+{
+ const struct kvm_intel_pmu_event __intel_event_to_feature[] = {
+ [INTEL_ARCH_CPU_CYCLES_INDEX] = { X86_PMU_FEATURE_CPU_CYCLES, X86_PMU_FEATURE_CPU_CYCLES_FIXED },
+ [INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX] = { X86_PMU_FEATURE_INSNS_RETIRED, X86_PMU_FEATURE_INSNS_RETIRED_FIXED },
+ /*
+ * Note, the fixed counter for reference cycles is NOT the same as the
+ * general purpose architectural event. The fixed counter explicitly
+ * counts at the same frequency as the TSC, whereas the GP event counts
+ * at a fixed, but uarch specific, frequency. Bundle them here for
+ * simplicity.
+ */
+ [INTEL_ARCH_REFERENCE_CYCLES_INDEX] = { X86_PMU_FEATURE_REFERENCE_CYCLES, X86_PMU_FEATURE_REFERENCE_TSC_CYCLES_FIXED },
+ [INTEL_ARCH_LLC_REFERENCES_INDEX] = { X86_PMU_FEATURE_LLC_REFERENCES, X86_PMU_FEATURE_NULL },
+ [INTEL_ARCH_LLC_MISSES_INDEX] = { X86_PMU_FEATURE_LLC_MISSES, X86_PMU_FEATURE_NULL },
+ [INTEL_ARCH_BRANCHES_RETIRED_INDEX] = { X86_PMU_FEATURE_BRANCH_INSNS_RETIRED, X86_PMU_FEATURE_NULL },
+ [INTEL_ARCH_BRANCHES_MISPREDICTED_INDEX] = { X86_PMU_FEATURE_BRANCHES_MISPREDICTED, X86_PMU_FEATURE_NULL },
+ [INTEL_ARCH_TOPDOWN_SLOTS_INDEX] = { X86_PMU_FEATURE_TOPDOWN_SLOTS, X86_PMU_FEATURE_TOPDOWN_SLOTS_FIXED },
+ [INTEL_ARCH_TOPDOWN_BE_BOUND_INDEX] = { X86_PMU_FEATURE_TOPDOWN_BE_BOUND, X86_PMU_FEATURE_NULL },
+ [INTEL_ARCH_TOPDOWN_BAD_SPEC_INDEX] = { X86_PMU_FEATURE_TOPDOWN_BAD_SPEC, X86_PMU_FEATURE_NULL },
+ [INTEL_ARCH_TOPDOWN_FE_BOUND_INDEX] = { X86_PMU_FEATURE_TOPDOWN_FE_BOUND, X86_PMU_FEATURE_NULL },
+ [INTEL_ARCH_TOPDOWN_RETIRING_INDEX] = { X86_PMU_FEATURE_TOPDOWN_RETIRING, X86_PMU_FEATURE_NULL },
+ [INTEL_ARCH_LBR_INSERTS_INDEX] = { X86_PMU_FEATURE_LBR_INSERTS, X86_PMU_FEATURE_NULL },
+ };
+
+ kvm_static_assert(ARRAY_SIZE(__intel_event_to_feature) == NR_INTEL_ARCH_EVENTS);
+
+ return __intel_event_to_feature[idx];
+}
+
static struct kvm_vm *pmu_vm_create_with_one_vcpu(struct kvm_vcpu **vcpu,
void *guest_code,
- uint8_t pmu_version,
- uint64_t perf_capabilities)
+ u8 pmu_version,
+ u64 perf_capabilities)
{
struct kvm_vm *vm;
vm = vm_create_with_one_vcpu(vcpu, guest_code);
sync_global_to_guest(vm, kvm_pmu_version);
+ sync_global_to_guest(vm, hardware_pmu_arch_events);
/*
* Set PERF_CAPABILITIES before PMU version as KVM disallows enabling
@@ -77,7 +132,7 @@ static void run_vcpu(struct kvm_vcpu *vcpu)
} while (uc.cmd != UCALL_DONE);
}
-static uint8_t guest_get_pmu_version(void)
+static u8 guest_get_pmu_version(void)
{
/*
* Return the effective PMU version, i.e. the minimum between what KVM
@@ -86,7 +141,7 @@ static uint8_t guest_get_pmu_version(void)
* supported by KVM to verify KVM doesn't freak out and do something
* bizarre with an architecturally valid, but unsupported, version.
*/
- return min_t(uint8_t, kvm_pmu_version, this_cpu_property(X86_PROPERTY_PMU_VERSION));
+ return min_t(u8, kvm_pmu_version, this_cpu_property(X86_PROPERTY_PMU_VERSION));
}
/*
@@ -98,22 +153,28 @@ static uint8_t guest_get_pmu_version(void)
* Sanity check that in all cases, the event doesn't count when it's disabled,
* and that KVM correctly emulates the write of an arbitrary value.
*/
-static void guest_assert_event_count(uint8_t idx,
- struct kvm_x86_pmu_feature event,
- uint32_t pmc, uint32_t pmc_msr)
+static void guest_assert_event_count(u8 idx, u32 pmc, u32 pmc_msr)
{
- uint64_t count;
+ u64 count;
count = _rdpmc(pmc);
- if (!this_pmu_has(event))
+ if (!(hardware_pmu_arch_events & BIT(idx)))
goto sanity_checks;
switch (idx) {
case INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX:
- GUEST_ASSERT_EQ(count, NUM_INSNS_RETIRED);
+ /* Relax precise count check due to VM-EXIT/VM-ENTRY overcount issue */
+ if (this_pmu_has_errata(INSTRUCTIONS_RETIRED_OVERCOUNT))
+ GUEST_ASSERT(count >= NUM_INSNS_RETIRED);
+ else
+ GUEST_ASSERT_EQ(count, NUM_INSNS_RETIRED);
break;
case INTEL_ARCH_BRANCHES_RETIRED_INDEX:
- GUEST_ASSERT_EQ(count, NUM_BRANCH_INSNS_RETIRED);
+ /* Relax precise count check due to VM-EXIT/VM-ENTRY overcount issue */
+ if (this_pmu_has_errata(BRANCHES_RETIRED_OVERCOUNT))
+ GUEST_ASSERT(count >= NUM_BRANCH_INSNS_RETIRED);
+ else
+ GUEST_ASSERT_EQ(count, NUM_BRANCH_INSNS_RETIRED);
break;
case INTEL_ARCH_LLC_REFERENCES_INDEX:
case INTEL_ARCH_LLC_MISSES_INDEX:
@@ -123,10 +184,15 @@ static void guest_assert_event_count(uint8_t idx,
fallthrough;
case INTEL_ARCH_CPU_CYCLES_INDEX:
case INTEL_ARCH_REFERENCE_CYCLES_INDEX:
+ case INTEL_ARCH_TOPDOWN_BE_BOUND_INDEX:
+ case INTEL_ARCH_TOPDOWN_FE_BOUND_INDEX:
GUEST_ASSERT_NE(count, 0);
break;
case INTEL_ARCH_TOPDOWN_SLOTS_INDEX:
- GUEST_ASSERT(count >= NUM_INSNS_RETIRED);
+ case INTEL_ARCH_TOPDOWN_RETIRING_INDEX:
+ __GUEST_ASSERT(count >= NUM_INSNS_RETIRED,
+ "Expected top-down slots >= %u, got count = %lu",
+ NUM_INSNS_RETIRED, count);
break;
default:
break;
@@ -160,83 +226,52 @@ do { \
__asm__ __volatile__("wrmsr\n\t" \
" mov $" __stringify(NUM_LOOPS) ", %%ecx\n\t" \
"1:\n\t" \
+ FEP "enter $0, $0\n\t" \
clflush "\n\t" \
"mfence\n\t" \
+ "mov %[m], %%eax\n\t" \
+ FEP "leave\n\t" \
FEP "loop 1b\n\t" \
FEP "mov %%edi, %%ecx\n\t" \
FEP "xor %%eax, %%eax\n\t" \
FEP "xor %%edx, %%edx\n\t" \
"wrmsr\n\t" \
- :: "a"((uint32_t)_value), "d"(_value >> 32), \
- "c"(_msr), "D"(_msr) \
+ :: "a"((u32)_value), "d"(_value >> 32), \
+ "c"(_msr), "D"(_msr), [m]"m"(kvm_pmu_version) \
); \
} while (0)
-#define GUEST_TEST_EVENT(_idx, _event, _pmc, _pmc_msr, _ctrl_msr, _value, FEP) \
+#define GUEST_TEST_EVENT(_idx, _pmc, _pmc_msr, _ctrl_msr, _value, FEP) \
do { \
- wrmsr(pmc_msr, 0); \
+ wrmsr(_pmc_msr, 0); \
\
if (this_cpu_has(X86_FEATURE_CLFLUSHOPT)) \
- GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflushopt .", FEP); \
+ GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflushopt %[m]", FEP); \
else if (this_cpu_has(X86_FEATURE_CLFLUSH)) \
- GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflush .", FEP); \
+ GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflush %[m]", FEP); \
else \
GUEST_MEASURE_EVENT(_ctrl_msr, _value, "nop", FEP); \
\
- guest_assert_event_count(_idx, _event, _pmc, _pmc_msr); \
+ guest_assert_event_count(_idx, _pmc, _pmc_msr); \
} while (0)
-static void __guest_test_arch_event(uint8_t idx, struct kvm_x86_pmu_feature event,
- uint32_t pmc, uint32_t pmc_msr,
- uint32_t ctrl_msr, uint64_t ctrl_msr_value)
+static void __guest_test_arch_event(u8 idx, u32 pmc, u32 pmc_msr,
+ u32 ctrl_msr, u64 ctrl_msr_value)
{
- GUEST_TEST_EVENT(idx, event, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, "");
+ GUEST_TEST_EVENT(idx, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, "");
if (is_forced_emulation_enabled)
- GUEST_TEST_EVENT(idx, event, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, KVM_FEP);
-}
-
-#define X86_PMU_FEATURE_NULL \
-({ \
- struct kvm_x86_pmu_feature feature = {}; \
- \
- feature; \
-})
-
-static bool pmu_is_null_feature(struct kvm_x86_pmu_feature event)
-{
- return !(*(u64 *)&event);
+ GUEST_TEST_EVENT(idx, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, KVM_FEP);
}
-static void guest_test_arch_event(uint8_t idx)
+static void guest_test_arch_event(u8 idx)
{
- const struct {
- struct kvm_x86_pmu_feature gp_event;
- struct kvm_x86_pmu_feature fixed_event;
- } intel_event_to_feature[] = {
- [INTEL_ARCH_CPU_CYCLES_INDEX] = { X86_PMU_FEATURE_CPU_CYCLES, X86_PMU_FEATURE_CPU_CYCLES_FIXED },
- [INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX] = { X86_PMU_FEATURE_INSNS_RETIRED, X86_PMU_FEATURE_INSNS_RETIRED_FIXED },
- /*
- * Note, the fixed counter for reference cycles is NOT the same
- * as the general purpose architectural event. The fixed counter
- * explicitly counts at the same frequency as the TSC, whereas
- * the GP event counts at a fixed, but uarch specific, frequency.
- * Bundle them here for simplicity.
- */
- [INTEL_ARCH_REFERENCE_CYCLES_INDEX] = { X86_PMU_FEATURE_REFERENCE_CYCLES, X86_PMU_FEATURE_REFERENCE_TSC_CYCLES_FIXED },
- [INTEL_ARCH_LLC_REFERENCES_INDEX] = { X86_PMU_FEATURE_LLC_REFERENCES, X86_PMU_FEATURE_NULL },
- [INTEL_ARCH_LLC_MISSES_INDEX] = { X86_PMU_FEATURE_LLC_MISSES, X86_PMU_FEATURE_NULL },
- [INTEL_ARCH_BRANCHES_RETIRED_INDEX] = { X86_PMU_FEATURE_BRANCH_INSNS_RETIRED, X86_PMU_FEATURE_NULL },
- [INTEL_ARCH_BRANCHES_MISPREDICTED_INDEX] = { X86_PMU_FEATURE_BRANCHES_MISPREDICTED, X86_PMU_FEATURE_NULL },
- [INTEL_ARCH_TOPDOWN_SLOTS_INDEX] = { X86_PMU_FEATURE_TOPDOWN_SLOTS, X86_PMU_FEATURE_TOPDOWN_SLOTS_FIXED },
- };
-
- uint32_t nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
- uint32_t pmu_version = guest_get_pmu_version();
+ u32 nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
+ u32 pmu_version = guest_get_pmu_version();
/* PERF_GLOBAL_CTRL exists only for Architectural PMU Version 2+. */
bool guest_has_perf_global_ctrl = pmu_version >= 2;
struct kvm_x86_pmu_feature gp_event, fixed_event;
- uint32_t base_pmc_msr;
+ u32 base_pmc_msr;
unsigned int i;
/* The host side shouldn't invoke this without a guest PMU. */
@@ -248,13 +283,13 @@ static void guest_test_arch_event(uint8_t idx)
else
base_pmc_msr = MSR_IA32_PERFCTR0;
- gp_event = intel_event_to_feature[idx].gp_event;
+ gp_event = intel_event_to_feature(idx).gp_event;
GUEST_ASSERT_EQ(idx, gp_event.f.bit);
GUEST_ASSERT(nr_gp_counters);
for (i = 0; i < nr_gp_counters; i++) {
- uint64_t eventsel = ARCH_PERFMON_EVENTSEL_OS |
+ u64 eventsel = ARCH_PERFMON_EVENTSEL_OS |
ARCH_PERFMON_EVENTSEL_ENABLE |
intel_pmu_arch_events[idx];
@@ -262,14 +297,14 @@ static void guest_test_arch_event(uint8_t idx)
if (guest_has_perf_global_ctrl)
wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, BIT_ULL(i));
- __guest_test_arch_event(idx, gp_event, i, base_pmc_msr + i,
+ __guest_test_arch_event(idx, i, base_pmc_msr + i,
MSR_P6_EVNTSEL0 + i, eventsel);
}
if (!guest_has_perf_global_ctrl)
return;
- fixed_event = intel_event_to_feature[idx].fixed_event;
+ fixed_event = intel_event_to_feature(idx).fixed_event;
if (pmu_is_null_feature(fixed_event) || !this_pmu_has(fixed_event))
return;
@@ -277,7 +312,7 @@ static void guest_test_arch_event(uint8_t idx)
wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL));
- __guest_test_arch_event(idx, fixed_event, i | INTEL_RDPMC_FIXED,
+ __guest_test_arch_event(idx, i | INTEL_RDPMC_FIXED,
MSR_CORE_PERF_FIXED_CTR0 + i,
MSR_CORE_PERF_GLOBAL_CTRL,
FIXED_PMC_GLOBAL_CTRL_ENABLE(i));
@@ -285,7 +320,7 @@ static void guest_test_arch_event(uint8_t idx)
static void guest_test_arch_events(void)
{
- uint8_t i;
+ u8 i;
for (i = 0; i < NR_INTEL_ARCH_EVENTS; i++)
guest_test_arch_event(i);
@@ -293,8 +328,8 @@ static void guest_test_arch_events(void)
GUEST_DONE();
}
-static void test_arch_events(uint8_t pmu_version, uint64_t perf_capabilities,
- uint8_t length, uint8_t unavailable_mask)
+static void test_arch_events(u8 pmu_version, u64 perf_capabilities,
+ u8 length, u32 unavailable_mask)
{
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
@@ -303,6 +338,9 @@ static void test_arch_events(uint8_t pmu_version, uint64_t perf_capabilities,
if (!pmu_version)
return;
+ unavailable_mask &= GENMASK(X86_PROPERTY_PMU_EVENTS_MASK.hi_bit,
+ X86_PROPERTY_PMU_EVENTS_MASK.lo_bit);
+
vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_arch_events,
pmu_version, perf_capabilities);
@@ -327,19 +365,19 @@ static void test_arch_events(uint8_t pmu_version, uint64_t perf_capabilities,
#define GUEST_ASSERT_PMC_MSR_ACCESS(insn, msr, expect_gp, vector) \
__GUEST_ASSERT(expect_gp ? vector == GP_VECTOR : !vector, \
- "Expected %s on " #insn "(0x%x), got vector %u", \
- expect_gp ? "#GP" : "no fault", msr, vector) \
+ "Expected %s on " #insn "(0x%x), got %s", \
+ expect_gp ? "#GP" : "no fault", msr, ex_str(vector)) \
#define GUEST_ASSERT_PMC_VALUE(insn, msr, val, expected) \
- __GUEST_ASSERT(val == expected_val, \
+ __GUEST_ASSERT(val == expected, \
"Expected " #insn "(0x%x) to yield 0x%lx, got 0x%lx", \
- msr, expected_val, val);
+ msr, expected, val);
-static void guest_test_rdpmc(uint32_t rdpmc_idx, bool expect_success,
- uint64_t expected_val)
+static void guest_test_rdpmc(u32 rdpmc_idx, bool expect_success,
+ u64 expected_val)
{
- uint8_t vector;
- uint64_t val;
+ u8 vector;
+ u64 val;
vector = rdpmc_safe(rdpmc_idx, &val);
GUEST_ASSERT_PMC_MSR_ACCESS(RDPMC, rdpmc_idx, !expect_success, vector);
@@ -355,19 +393,19 @@ static void guest_test_rdpmc(uint32_t rdpmc_idx, bool expect_success,
GUEST_ASSERT_PMC_VALUE(RDPMC, rdpmc_idx, val, expected_val);
}
-static void guest_rd_wr_counters(uint32_t base_msr, uint8_t nr_possible_counters,
- uint8_t nr_counters, uint32_t or_mask)
+static void guest_rd_wr_counters(u32 base_msr, u8 nr_possible_counters,
+ u8 nr_counters, u32 or_mask)
{
const bool pmu_has_fast_mode = !guest_get_pmu_version();
- uint8_t i;
+ u8 i;
for (i = 0; i < nr_possible_counters; i++) {
/*
* TODO: Test a value that validates full-width writes and the
* width of the counters.
*/
- const uint64_t test_val = 0xffff;
- const uint32_t msr = base_msr + i;
+ const u64 test_val = 0xffff;
+ const u32 msr = base_msr + i;
/*
* Fixed counters are supported if the counter is less than the
@@ -380,12 +418,12 @@ static void guest_rd_wr_counters(uint32_t base_msr, uint8_t nr_possible_counters
* KVM drops writes to MSR_P6_PERFCTR[0|1] if the counters are
* unsupported, i.e. doesn't #GP and reads back '0'.
*/
- const uint64_t expected_val = expect_success ? test_val : 0;
+ const u64 expected_val = expect_success ? test_val : 0;
const bool expect_gp = !expect_success && msr != MSR_P6_PERFCTR0 &&
msr != MSR_P6_PERFCTR1;
- uint32_t rdpmc_idx;
- uint8_t vector;
- uint64_t val;
+ u32 rdpmc_idx;
+ u8 vector;
+ u64 val;
vector = wrmsr_safe(msr, test_val);
GUEST_ASSERT_PMC_MSR_ACCESS(WRMSR, msr, expect_gp, vector);
@@ -423,9 +461,9 @@ static void guest_rd_wr_counters(uint32_t base_msr, uint8_t nr_possible_counters
static void guest_test_gp_counters(void)
{
- uint8_t pmu_version = guest_get_pmu_version();
- uint8_t nr_gp_counters = 0;
- uint32_t base_msr;
+ u8 pmu_version = guest_get_pmu_version();
+ u8 nr_gp_counters = 0;
+ u32 base_msr;
if (pmu_version)
nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
@@ -439,7 +477,7 @@ static void guest_test_gp_counters(void)
* counters, of which there are none.
*/
if (pmu_version > 1) {
- uint64_t global_ctrl = rdmsr(MSR_CORE_PERF_GLOBAL_CTRL);
+ u64 global_ctrl = rdmsr(MSR_CORE_PERF_GLOBAL_CTRL);
if (nr_gp_counters)
GUEST_ASSERT_EQ(global_ctrl, GENMASK_ULL(nr_gp_counters - 1, 0));
@@ -457,8 +495,8 @@ static void guest_test_gp_counters(void)
GUEST_DONE();
}
-static void test_gp_counters(uint8_t pmu_version, uint64_t perf_capabilities,
- uint8_t nr_gp_counters)
+static void test_gp_counters(u8 pmu_version, u64 perf_capabilities,
+ u8 nr_gp_counters)
{
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
@@ -476,9 +514,9 @@ static void test_gp_counters(uint8_t pmu_version, uint64_t perf_capabilities,
static void guest_test_fixed_counters(void)
{
- uint64_t supported_bitmask = 0;
- uint8_t nr_fixed_counters = 0;
- uint8_t i;
+ u64 supported_bitmask = 0;
+ u8 nr_fixed_counters = 0;
+ u8 i;
/* Fixed counters require Architectural vPMU Version 2+. */
if (guest_get_pmu_version() >= 2)
@@ -495,8 +533,8 @@ static void guest_test_fixed_counters(void)
nr_fixed_counters, supported_bitmask);
for (i = 0; i < MAX_NR_FIXED_COUNTERS; i++) {
- uint8_t vector;
- uint64_t val;
+ u8 vector;
+ u64 val;
if (i >= nr_fixed_counters && !(supported_bitmask & BIT_ULL(i))) {
vector = wrmsr_safe(MSR_CORE_PERF_FIXED_CTR_CTRL,
@@ -523,9 +561,8 @@ static void guest_test_fixed_counters(void)
GUEST_DONE();
}
-static void test_fixed_counters(uint8_t pmu_version, uint64_t perf_capabilities,
- uint8_t nr_fixed_counters,
- uint32_t supported_bitmask)
+static void test_fixed_counters(u8 pmu_version, u64 perf_capabilities,
+ u8 nr_fixed_counters, u32 supported_bitmask)
{
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
@@ -545,40 +582,67 @@ static void test_fixed_counters(uint8_t pmu_version, uint64_t perf_capabilities,
static void test_intel_counters(void)
{
- uint8_t nr_arch_events = kvm_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH);
- uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
- uint8_t nr_gp_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
- uint8_t pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION);
+ u8 nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
+ u8 nr_gp_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
+ u8 pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION);
unsigned int i;
- uint8_t v, j;
- uint32_t k;
+ u8 v, j;
+ u32 k;
- const uint64_t perf_caps[] = {
+ const u64 perf_caps[] = {
0,
PMU_CAP_FW_WRITES,
};
/*
+ * To keep the total runtime reasonable, test only a handful of select,
+ * semi-arbitrary values for the mask of unavailable PMU events. Test
+ * 0 (all events available) and all ones (no events available) as well
+ * as alternating bit sequencues, e.g. to detect if KVM is checking the
+ * wrong bit(s).
+ */
+ const u32 unavailable_masks[] = {
+ 0x0,
+ 0xffffffffu,
+ 0xaaaaaaaau,
+ 0x55555555u,
+ 0xf0f0f0f0u,
+ 0x0f0f0f0fu,
+ 0xa0a0a0a0u,
+ 0x0a0a0a0au,
+ 0x50505050u,
+ 0x05050505u,
+ };
+
+ /*
* Test up to PMU v5, which is the current maximum version defined by
* Intel, i.e. is the last version that is guaranteed to be backwards
* compatible with KVM's existing behavior.
*/
- uint8_t max_pmu_version = max_t(typeof(pmu_version), pmu_version, 5);
+ u8 max_pmu_version = max_t(typeof(pmu_version), pmu_version, 5);
/*
* Detect the existence of events that aren't supported by selftests.
- * This will (obviously) fail any time the kernel adds support for a
- * new event, but it's worth paying that price to keep the test fresh.
+ * This will (obviously) fail any time hardware adds support for a new
+ * event, but it's worth paying that price to keep the test fresh.
*/
- TEST_ASSERT(nr_arch_events <= NR_INTEL_ARCH_EVENTS,
+ TEST_ASSERT(this_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH) <= NR_INTEL_ARCH_EVENTS,
"New architectural event(s) detected; please update this test (length = %u, mask = %x)",
- nr_arch_events, kvm_cpu_property(X86_PROPERTY_PMU_EVENTS_MASK));
+ this_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH),
+ this_cpu_property(X86_PROPERTY_PMU_EVENTS_MASK));
/*
- * Force iterating over known arch events regardless of whether or not
- * KVM/hardware supports a given event.
+ * Iterate over known arch events irrespective of KVM/hardware support
+ * to verify that KVM doesn't reject programming of events just because
+ * the *architectural* encoding is unsupported. Track which events are
+ * supported in hardware; the guest side will validate supported events
+ * count correctly, even if *enumeration* of the event is unsupported
+ * by KVM and/or isn't exposed to the guest.
*/
- nr_arch_events = max_t(typeof(nr_arch_events), nr_arch_events, NR_INTEL_ARCH_EVENTS);
+ for (i = 0; i < NR_INTEL_ARCH_EVENTS; i++) {
+ if (this_pmu_has(intel_event_to_feature(i).gp_event))
+ hardware_pmu_arch_events |= BIT(i);
+ }
for (v = 0; v <= max_pmu_version; v++) {
for (i = 0; i < ARRAY_SIZE(perf_caps); i++) {
@@ -587,16 +651,7 @@ static void test_intel_counters(void)
pr_info("Testing arch events, PMU version %u, perf_caps = %lx\n",
v, perf_caps[i]);
- /*
- * To keep the total runtime reasonable, test every
- * possible non-zero, non-reserved bitmap combination
- * only with the native PMU version and the full bit
- * vector length.
- */
- if (v == pmu_version) {
- for (k = 1; k < (BIT(nr_arch_events) - 1); k++)
- test_arch_events(v, perf_caps[i], nr_arch_events, k);
- }
+
/*
* Test single bits for all PMU version and lengths up
* the number of events +1 (to verify KVM doesn't do
@@ -604,12 +659,9 @@ static void test_intel_counters(void)
* host length). Explicitly test a mask of '0' and all
* ones i.e. all events being available and unavailable.
*/
- for (j = 0; j <= nr_arch_events + 1; j++) {
- test_arch_events(v, perf_caps[i], j, 0);
- test_arch_events(v, perf_caps[i], j, 0xff);
-
- for (k = 0; k < nr_arch_events; k++)
- test_arch_events(v, perf_caps[i], j, BIT(k));
+ for (j = 0; j <= NR_INTEL_ARCH_EVENTS + 1; j++) {
+ for (k = 1; k < ARRAY_SIZE(unavailable_masks); k++)
+ test_arch_events(v, perf_caps[i], j, unavailable_masks[k]);
}
pr_info("Testing GP counters, PMU version %u, perf_caps = %lx\n",
diff --git a/tools/testing/selftests/kvm/x86/pmu_event_filter_test.c b/tools/testing/selftests/kvm/x86/pmu_event_filter_test.c
index c15513cd74d1..c1232344fda8 100644
--- a/tools/testing/selftests/kvm/x86/pmu_event_filter_test.c
+++ b/tools/testing/selftests/kvm/x86/pmu_event_filter_test.c
@@ -53,11 +53,11 @@ static const struct __kvm_pmu_event_filter base_event_filter = {
};
struct {
- uint64_t loads;
- uint64_t stores;
- uint64_t loads_stores;
- uint64_t branches_retired;
- uint64_t instructions_retired;
+ u64 loads;
+ u64 stores;
+ u64 loads_stores;
+ u64 branches_retired;
+ u64 instructions_retired;
} pmc_results;
/*
@@ -75,9 +75,9 @@ static void guest_gp_handler(struct ex_regs *regs)
*
* Return on success. GUEST_SYNC(0) on error.
*/
-static void check_msr(uint32_t msr, uint64_t bits_to_flip)
+static void check_msr(u32 msr, u64 bits_to_flip)
{
- uint64_t v = rdmsr(msr) ^ bits_to_flip;
+ u64 v = rdmsr(msr) ^ bits_to_flip;
wrmsr(msr, v);
if (rdmsr(msr) != v)
@@ -89,10 +89,10 @@ static void check_msr(uint32_t msr, uint64_t bits_to_flip)
GUEST_SYNC(-EIO);
}
-static void run_and_measure_loop(uint32_t msr_base)
+static void run_and_measure_loop(u32 msr_base)
{
- const uint64_t branches_retired = rdmsr(msr_base + 0);
- const uint64_t insn_retired = rdmsr(msr_base + 1);
+ const u64 branches_retired = rdmsr(msr_base + 0);
+ const u64 insn_retired = rdmsr(msr_base + 1);
__asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
@@ -147,7 +147,7 @@ static void amd_guest_code(void)
* Run the VM to the next GUEST_SYNC(value), and return the value passed
* to the sync. Any other exit from the guest is fatal.
*/
-static uint64_t run_vcpu_to_sync(struct kvm_vcpu *vcpu)
+static u64 run_vcpu_to_sync(struct kvm_vcpu *vcpu)
{
struct ucall uc;
@@ -161,7 +161,7 @@ static uint64_t run_vcpu_to_sync(struct kvm_vcpu *vcpu)
static void run_vcpu_and_sync_pmc_results(struct kvm_vcpu *vcpu)
{
- uint64_t r;
+ u64 r;
memset(&pmc_results, 0, sizeof(pmc_results));
sync_global_to_guest(vcpu->vm, pmc_results);
@@ -182,7 +182,7 @@ static void run_vcpu_and_sync_pmc_results(struct kvm_vcpu *vcpu)
*/
static bool sanity_check_pmu(struct kvm_vcpu *vcpu)
{
- uint64_t r;
+ u64 r;
vm_install_exception_handler(vcpu->vm, GP_VECTOR, guest_gp_handler);
r = run_vcpu_to_sync(vcpu);
@@ -195,7 +195,7 @@ static bool sanity_check_pmu(struct kvm_vcpu *vcpu)
* Remove the first occurrence of 'event' (if any) from the filter's
* event list.
*/
-static void remove_event(struct __kvm_pmu_event_filter *f, uint64_t event)
+static void remove_event(struct __kvm_pmu_event_filter *f, u64 event)
{
bool found = false;
int i;
@@ -212,10 +212,12 @@ static void remove_event(struct __kvm_pmu_event_filter *f, uint64_t event)
#define ASSERT_PMC_COUNTING_INSTRUCTIONS() \
do { \
- uint64_t br = pmc_results.branches_retired; \
- uint64_t ir = pmc_results.instructions_retired; \
+ u64 br = pmc_results.branches_retired; \
+ u64 ir = pmc_results.instructions_retired; \
+ bool br_matched = this_pmu_has_errata(BRANCHES_RETIRED_OVERCOUNT) ? \
+ br >= NUM_BRANCHES : br == NUM_BRANCHES; \
\
- if (br && br != NUM_BRANCHES) \
+ if (br && !br_matched) \
pr_info("%s: Branch instructions retired = %lu (expected %u)\n", \
__func__, br, NUM_BRANCHES); \
TEST_ASSERT(br, "%s: Branch instructions retired = %lu (expected > 0)", \
@@ -226,8 +228,8 @@ do { \
#define ASSERT_PMC_NOT_COUNTING_INSTRUCTIONS() \
do { \
- uint64_t br = pmc_results.branches_retired; \
- uint64_t ir = pmc_results.instructions_retired; \
+ u64 br = pmc_results.branches_retired; \
+ u64 ir = pmc_results.instructions_retired; \
\
TEST_ASSERT(!br, "%s: Branch instructions retired = %lu (expected 0)", \
__func__, br); \
@@ -359,7 +361,8 @@ static bool use_intel_pmu(void)
*/
static bool use_amd_pmu(void)
{
- return host_cpu_is_amd && kvm_cpu_family() >= 0x17;
+ return (host_cpu_is_amd && kvm_cpu_family() >= 0x17) ||
+ host_cpu_is_hygon;
}
/*
@@ -375,7 +378,7 @@ static bool use_amd_pmu(void)
static bool supports_event_mem_inst_retired(void)
{
- uint32_t eax, ebx, ecx, edx;
+ u32 eax, ebx, ecx, edx;
cpuid(1, &eax, &ebx, &ecx, &edx);
if (x86_family(eax) == 0x6) {
@@ -412,15 +415,15 @@ static bool supports_event_mem_inst_retired(void)
#define EXCLUDE_MASKED_ENTRY(event_select, mask, match) \
KVM_PMU_ENCODE_MASKED_ENTRY(event_select, mask, match, true)
-static void masked_events_guest_test(uint32_t msr_base)
+static void masked_events_guest_test(u32 msr_base)
{
/*
* The actual value of the counters don't determine the outcome of
* the test. Only that they are zero or non-zero.
*/
- const uint64_t loads = rdmsr(msr_base + 0);
- const uint64_t stores = rdmsr(msr_base + 1);
- const uint64_t loads_stores = rdmsr(msr_base + 2);
+ const u64 loads = rdmsr(msr_base + 0);
+ const u64 stores = rdmsr(msr_base + 1);
+ const u64 loads_stores = rdmsr(msr_base + 2);
int val;
@@ -473,7 +476,7 @@ static void amd_masked_events_guest_code(void)
}
static void run_masked_events_test(struct kvm_vcpu *vcpu,
- const uint64_t masked_events[],
+ const u64 masked_events[],
const int nmasked_events)
{
struct __kvm_pmu_event_filter f = {
@@ -482,7 +485,7 @@ static void run_masked_events_test(struct kvm_vcpu *vcpu,
.flags = KVM_PMU_EVENT_FLAG_MASKED_EVENTS,
};
- memcpy(f.events, masked_events, sizeof(uint64_t) * nmasked_events);
+ memcpy(f.events, masked_events, sizeof(u64) * nmasked_events);
test_with_filter(vcpu, &f);
}
@@ -491,12 +494,12 @@ static void run_masked_events_test(struct kvm_vcpu *vcpu,
#define ALLOW_LOADS_STORES BIT(2)
struct masked_events_test {
- uint64_t intel_events[MAX_TEST_EVENTS];
- uint64_t intel_event_end;
- uint64_t amd_events[MAX_TEST_EVENTS];
- uint64_t amd_event_end;
+ u64 intel_events[MAX_TEST_EVENTS];
+ u64 intel_event_end;
+ u64 amd_events[MAX_TEST_EVENTS];
+ u64 amd_event_end;
const char *msg;
- uint32_t flags;
+ u32 flags;
};
/*
@@ -579,9 +582,9 @@ const struct masked_events_test test_cases[] = {
};
static int append_test_events(const struct masked_events_test *test,
- uint64_t *events, int nevents)
+ u64 *events, int nevents)
{
- const uint64_t *evts;
+ const u64 *evts;
int i;
evts = use_intel_pmu() ? test->intel_events : test->amd_events;
@@ -600,7 +603,7 @@ static bool bool_eq(bool a, bool b)
return a == b;
}
-static void run_masked_events_tests(struct kvm_vcpu *vcpu, uint64_t *events,
+static void run_masked_events_tests(struct kvm_vcpu *vcpu, u64 *events,
int nevents)
{
int ntests = ARRAY_SIZE(test_cases);
@@ -627,7 +630,7 @@ static void run_masked_events_tests(struct kvm_vcpu *vcpu, uint64_t *events,
}
}
-static void add_dummy_events(uint64_t *events, int nevents)
+static void add_dummy_events(u64 *events, int nevents)
{
int i;
@@ -647,7 +650,7 @@ static void add_dummy_events(uint64_t *events, int nevents)
static void test_masked_events(struct kvm_vcpu *vcpu)
{
int nevents = KVM_PMU_EVENT_FILTER_MAX_EVENTS - MAX_TEST_EVENTS;
- uint64_t events[KVM_PMU_EVENT_FILTER_MAX_EVENTS];
+ u64 events[KVM_PMU_EVENT_FILTER_MAX_EVENTS];
/* Run the test cases against a sparse PMU event filter. */
run_masked_events_tests(vcpu, events, 0);
@@ -665,8 +668,8 @@ static int set_pmu_event_filter(struct kvm_vcpu *vcpu,
return __vm_ioctl(vcpu->vm, KVM_SET_PMU_EVENT_FILTER, f);
}
-static int set_pmu_single_event_filter(struct kvm_vcpu *vcpu, uint64_t event,
- uint32_t flags, uint32_t action)
+static int set_pmu_single_event_filter(struct kvm_vcpu *vcpu, u64 event,
+ u32 flags, u32 action)
{
struct __kvm_pmu_event_filter f = {
.nevents = 1,
@@ -682,9 +685,9 @@ static int set_pmu_single_event_filter(struct kvm_vcpu *vcpu, uint64_t event,
static void test_filter_ioctl(struct kvm_vcpu *vcpu)
{
- uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
+ u8 nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
struct __kvm_pmu_event_filter f;
- uint64_t e = ~0ul;
+ u64 e = ~0ul;
int r;
/*
@@ -726,7 +729,7 @@ static void test_filter_ioctl(struct kvm_vcpu *vcpu)
TEST_ASSERT(!r, "Masking non-existent fixed counters should be allowed");
}
-static void intel_run_fixed_counter_guest_code(uint8_t idx)
+static void intel_run_fixed_counter_guest_code(u8 idx)
{
for (;;) {
wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
@@ -742,8 +745,8 @@ static void intel_run_fixed_counter_guest_code(uint8_t idx)
}
}
-static uint64_t test_with_fixed_counter_filter(struct kvm_vcpu *vcpu,
- uint32_t action, uint32_t bitmap)
+static u64 test_with_fixed_counter_filter(struct kvm_vcpu *vcpu,
+ u32 action, u32 bitmap)
{
struct __kvm_pmu_event_filter f = {
.action = action,
@@ -754,9 +757,9 @@ static uint64_t test_with_fixed_counter_filter(struct kvm_vcpu *vcpu,
return run_vcpu_to_sync(vcpu);
}
-static uint64_t test_set_gp_and_fixed_event_filter(struct kvm_vcpu *vcpu,
- uint32_t action,
- uint32_t bitmap)
+static u64 test_set_gp_and_fixed_event_filter(struct kvm_vcpu *vcpu,
+ u32 action,
+ u32 bitmap)
{
struct __kvm_pmu_event_filter f = base_event_filter;
@@ -767,12 +770,12 @@ static uint64_t test_set_gp_and_fixed_event_filter(struct kvm_vcpu *vcpu,
return run_vcpu_to_sync(vcpu);
}
-static void __test_fixed_counter_bitmap(struct kvm_vcpu *vcpu, uint8_t idx,
- uint8_t nr_fixed_counters)
+static void __test_fixed_counter_bitmap(struct kvm_vcpu *vcpu, u8 idx,
+ u8 nr_fixed_counters)
{
unsigned int i;
- uint32_t bitmap;
- uint64_t count;
+ u32 bitmap;
+ u64 count;
TEST_ASSERT(nr_fixed_counters < sizeof(bitmap) * 8,
"Invalid nr_fixed_counters");
@@ -812,10 +815,10 @@ static void __test_fixed_counter_bitmap(struct kvm_vcpu *vcpu, uint8_t idx,
static void test_fixed_counter_bitmap(void)
{
- uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
+ u8 nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
struct kvm_vm *vm;
struct kvm_vcpu *vcpu;
- uint8_t idx;
+ u8 idx;
/*
* Check that pmu_event_filter works as expected when it's applied to
diff --git a/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c b/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c
index 82a8d88b5338..1d2f5d4fd45d 100644
--- a/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c
+++ b/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c
@@ -23,13 +23,13 @@
#include <processor.h>
#define BASE_DATA_SLOT 10
-#define BASE_DATA_GPA ((uint64_t)(1ull << 32))
-#define PER_CPU_DATA_SIZE ((uint64_t)(SZ_2M + PAGE_SIZE))
+#define BASE_DATA_GPA ((u64)(1ull << 32))
+#define PER_CPU_DATA_SIZE ((u64)(SZ_2M + PAGE_SIZE))
/* Horrific macro so that the line info is captured accurately :-( */
#define memcmp_g(gpa, pattern, size) \
do { \
- uint8_t *mem = (uint8_t *)gpa; \
+ u8 *mem = (u8 *)gpa; \
size_t i; \
\
for (i = 0; i < size; i++) \
@@ -38,7 +38,7 @@ do { \
pattern, i, gpa + i, mem[i]); \
} while (0)
-static void memcmp_h(uint8_t *mem, uint64_t gpa, uint8_t pattern, size_t size)
+static void memcmp_h(u8 *mem, gpa_t gpa, u8 pattern, size_t size)
{
size_t i;
@@ -70,13 +70,13 @@ enum ucall_syncs {
SYNC_PRIVATE,
};
-static void guest_sync_shared(uint64_t gpa, uint64_t size,
- uint8_t current_pattern, uint8_t new_pattern)
+static void guest_sync_shared(gpa_t gpa, u64 size,
+ u8 current_pattern, u8 new_pattern)
{
GUEST_SYNC5(SYNC_SHARED, gpa, size, current_pattern, new_pattern);
}
-static void guest_sync_private(uint64_t gpa, uint64_t size, uint8_t pattern)
+static void guest_sync_private(gpa_t gpa, u64 size, u8 pattern)
{
GUEST_SYNC4(SYNC_PRIVATE, gpa, size, pattern);
}
@@ -86,10 +86,10 @@ static void guest_sync_private(uint64_t gpa, uint64_t size, uint8_t pattern)
#define MAP_GPA_SHARED BIT(1)
#define MAP_GPA_DO_FALLOCATE BIT(2)
-static void guest_map_mem(uint64_t gpa, uint64_t size, bool map_shared,
+static void guest_map_mem(gpa_t gpa, u64 size, bool map_shared,
bool do_fallocate)
{
- uint64_t flags = MAP_GPA_SET_ATTRIBUTES;
+ u64 flags = MAP_GPA_SET_ATTRIBUTES;
if (map_shared)
flags |= MAP_GPA_SHARED;
@@ -98,19 +98,19 @@ static void guest_map_mem(uint64_t gpa, uint64_t size, bool map_shared,
kvm_hypercall_map_gpa_range(gpa, size, flags);
}
-static void guest_map_shared(uint64_t gpa, uint64_t size, bool do_fallocate)
+static void guest_map_shared(gpa_t gpa, u64 size, bool do_fallocate)
{
guest_map_mem(gpa, size, true, do_fallocate);
}
-static void guest_map_private(uint64_t gpa, uint64_t size, bool do_fallocate)
+static void guest_map_private(gpa_t gpa, u64 size, bool do_fallocate)
{
guest_map_mem(gpa, size, false, do_fallocate);
}
struct {
- uint64_t offset;
- uint64_t size;
+ u64 offset;
+ u64 size;
} static const test_ranges[] = {
GUEST_STAGE(0, PAGE_SIZE),
GUEST_STAGE(0, SZ_2M),
@@ -119,11 +119,11 @@ struct {
GUEST_STAGE(SZ_2M, PAGE_SIZE),
};
-static void guest_test_explicit_conversion(uint64_t base_gpa, bool do_fallocate)
+static void guest_test_explicit_conversion(u64 base_gpa, bool do_fallocate)
{
- const uint8_t def_p = 0xaa;
- const uint8_t init_p = 0xcc;
- uint64_t j;
+ const u8 def_p = 0xaa;
+ const u8 init_p = 0xcc;
+ u64 j;
int i;
/* Memory should be shared by default. */
@@ -134,12 +134,12 @@ static void guest_test_explicit_conversion(uint64_t base_gpa, bool do_fallocate)
memcmp_g(base_gpa, init_p, PER_CPU_DATA_SIZE);
for (i = 0; i < ARRAY_SIZE(test_ranges); i++) {
- uint64_t gpa = base_gpa + test_ranges[i].offset;
- uint64_t size = test_ranges[i].size;
- uint8_t p1 = 0x11;
- uint8_t p2 = 0x22;
- uint8_t p3 = 0x33;
- uint8_t p4 = 0x44;
+ gpa_t gpa = base_gpa + test_ranges[i].offset;
+ u64 size = test_ranges[i].size;
+ u8 p1 = 0x11;
+ u8 p2 = 0x22;
+ u8 p3 = 0x33;
+ u8 p4 = 0x44;
/*
* Set the test region to pattern one to differentiate it from
@@ -214,10 +214,10 @@ skip:
}
}
-static void guest_punch_hole(uint64_t gpa, uint64_t size)
+static void guest_punch_hole(gpa_t gpa, u64 size)
{
/* "Mapping" memory shared via fallocate() is done via PUNCH_HOLE. */
- uint64_t flags = MAP_GPA_SHARED | MAP_GPA_DO_FALLOCATE;
+ u64 flags = MAP_GPA_SHARED | MAP_GPA_DO_FALLOCATE;
kvm_hypercall_map_gpa_range(gpa, size, flags);
}
@@ -227,9 +227,9 @@ static void guest_punch_hole(uint64_t gpa, uint64_t size)
* proper conversion. Freeing (PUNCH_HOLE) should zap SPTEs, and reallocating
* (subsequent fault) should zero memory.
*/
-static void guest_test_punch_hole(uint64_t base_gpa, bool precise)
+static void guest_test_punch_hole(u64 base_gpa, bool precise)
{
- const uint8_t init_p = 0xcc;
+ const u8 init_p = 0xcc;
int i;
/*
@@ -239,8 +239,8 @@ static void guest_test_punch_hole(uint64_t base_gpa, bool precise)
guest_map_private(base_gpa, PER_CPU_DATA_SIZE, false);
for (i = 0; i < ARRAY_SIZE(test_ranges); i++) {
- uint64_t gpa = base_gpa + test_ranges[i].offset;
- uint64_t size = test_ranges[i].size;
+ gpa_t gpa = base_gpa + test_ranges[i].offset;
+ u64 size = test_ranges[i].size;
/*
* Free all memory before each iteration, even for the !precise
@@ -268,7 +268,7 @@ static void guest_test_punch_hole(uint64_t base_gpa, bool precise)
}
}
-static void guest_code(uint64_t base_gpa)
+static void guest_code(u64 base_gpa)
{
/*
* Run the conversion test twice, with and without doing fallocate() on
@@ -289,8 +289,8 @@ static void guest_code(uint64_t base_gpa)
static void handle_exit_hypercall(struct kvm_vcpu *vcpu)
{
struct kvm_run *run = vcpu->run;
- uint64_t gpa = run->hypercall.args[0];
- uint64_t size = run->hypercall.args[1] * PAGE_SIZE;
+ gpa_t gpa = run->hypercall.args[0];
+ u64 size = run->hypercall.args[1] * PAGE_SIZE;
bool set_attributes = run->hypercall.args[2] & MAP_GPA_SET_ATTRIBUTES;
bool map_shared = run->hypercall.args[2] & MAP_GPA_SHARED;
bool do_fallocate = run->hypercall.args[2] & MAP_GPA_DO_FALLOCATE;
@@ -337,7 +337,7 @@ static void *__test_mem_conversions(void *__vcpu)
case UCALL_ABORT:
REPORT_GUEST_ASSERT(uc);
case UCALL_SYNC: {
- uint64_t gpa = uc.args[1];
+ gpa_t gpa = uc.args[1];
size_t size = uc.args[2];
size_t i;
@@ -347,7 +347,7 @@ static void *__test_mem_conversions(void *__vcpu)
for (i = 0; i < size; i += vm->page_size) {
size_t nr_bytes = min_t(size_t, vm->page_size, size - i);
- uint8_t *hva = addr_gpa2hva(vm, gpa + i);
+ u8 *hva = addr_gpa2hva(vm, gpa + i);
/* In all cases, the host should observe the shared data. */
memcmp_h(hva, gpa + i, uc.args[3], nr_bytes);
@@ -366,8 +366,8 @@ static void *__test_mem_conversions(void *__vcpu)
}
}
-static void test_mem_conversions(enum vm_mem_backing_src_type src_type, uint32_t nr_vcpus,
- uint32_t nr_memslots)
+static void test_mem_conversions(enum vm_mem_backing_src_type src_type, u32 nr_vcpus,
+ u32 nr_memslots)
{
/*
* Allocate enough memory so that each vCPU's chunk of memory can be
@@ -380,7 +380,7 @@ static void test_mem_conversions(enum vm_mem_backing_src_type src_type, uint32_t
struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
pthread_t threads[KVM_MAX_VCPUS];
struct kvm_vm *vm;
- int memfd, i, r;
+ int memfd, i;
const struct vm_shape shape = {
.mode = VM_MODE_DEFAULT,
@@ -402,7 +402,7 @@ static void test_mem_conversions(enum vm_mem_backing_src_type src_type, uint32_t
KVM_MEM_GUEST_MEMFD, memfd, slot_size * i);
for (i = 0; i < nr_vcpus; i++) {
- uint64_t gpa = BASE_DATA_GPA + i * per_cpu_size;
+ gpa_t gpa = BASE_DATA_GPA + i * per_cpu_size;
vcpu_args_set(vcpus[i], 1, gpa);
@@ -428,11 +428,8 @@ static void test_mem_conversions(enum vm_mem_backing_src_type src_type, uint32_t
* should prevent the VM from being fully destroyed until the last
* reference to the guest_memfd is also put.
*/
- r = fallocate(memfd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0, memfd_size);
- TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r));
-
- r = fallocate(memfd, FALLOC_FL_KEEP_SIZE, 0, memfd_size);
- TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r));
+ kvm_fallocate(memfd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0, memfd_size);
+ kvm_fallocate(memfd, FALLOC_FL_KEEP_SIZE, 0, memfd_size);
close(memfd);
}
@@ -453,8 +450,8 @@ static void usage(const char *cmd)
int main(int argc, char *argv[])
{
enum vm_mem_backing_src_type src_type = DEFAULT_VM_MEM_SRC;
- uint32_t nr_memslots = 1;
- uint32_t nr_vcpus = 1;
+ u32 nr_memslots = 1;
+ u32 nr_vcpus = 1;
int opt;
TEST_REQUIRE(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM));
diff --git a/tools/testing/selftests/kvm/x86/private_mem_kvm_exits_test.c b/tools/testing/selftests/kvm/x86/private_mem_kvm_exits_test.c
index 13e72fcec8dd..10db9fe6d906 100644
--- a/tools/testing/selftests/kvm/x86/private_mem_kvm_exits_test.c
+++ b/tools/testing/selftests/kvm/x86/private_mem_kvm_exits_test.c
@@ -17,17 +17,17 @@
#define EXITS_TEST_SIZE (EXITS_TEST_NPAGES * PAGE_SIZE)
#define EXITS_TEST_SLOT 10
-static uint64_t guest_repeatedly_read(void)
+static u64 guest_repeatedly_read(void)
{
- volatile uint64_t value;
+ volatile u64 value;
while (true)
- value = *((uint64_t *) EXITS_TEST_GVA);
+ value = *((u64 *)EXITS_TEST_GVA);
return value;
}
-static uint32_t run_vcpu_get_exit_reason(struct kvm_vcpu *vcpu)
+static u32 run_vcpu_get_exit_reason(struct kvm_vcpu *vcpu)
{
int r;
@@ -50,7 +50,7 @@ static void test_private_access_memslot_deleted(void)
struct kvm_vcpu *vcpu;
pthread_t vm_thread;
void *thread_return;
- uint32_t exit_reason;
+ u32 exit_reason;
vm = vm_create_shape_with_one_vcpu(protected_vm_shape, &vcpu,
guest_repeatedly_read);
@@ -72,7 +72,7 @@ static void test_private_access_memslot_deleted(void)
vm_mem_region_delete(vm, EXITS_TEST_SLOT);
pthread_join(vm_thread, &thread_return);
- exit_reason = (uint32_t)(uint64_t)thread_return;
+ exit_reason = (u32)(u64)thread_return;
TEST_ASSERT_EQ(exit_reason, KVM_EXIT_MEMORY_FAULT);
TEST_ASSERT_EQ(vcpu->run->memory_fault.flags, KVM_MEMORY_EXIT_FLAG_PRIVATE);
@@ -86,7 +86,7 @@ static void test_private_access_memslot_not_private(void)
{
struct kvm_vm *vm;
struct kvm_vcpu *vcpu;
- uint32_t exit_reason;
+ u32 exit_reason;
vm = vm_create_shape_with_one_vcpu(protected_vm_shape, &vcpu,
guest_repeatedly_read);
diff --git a/tools/testing/selftests/kvm/x86/set_boot_cpu_id.c b/tools/testing/selftests/kvm/x86/set_boot_cpu_id.c
index 49913784bc82..8e3898646c69 100644
--- a/tools/testing/selftests/kvm/x86/set_boot_cpu_id.c
+++ b/tools/testing/selftests/kvm/x86/set_boot_cpu_id.c
@@ -86,11 +86,11 @@ static void run_vcpu(struct kvm_vcpu *vcpu)
}
}
-static struct kvm_vm *create_vm(uint32_t nr_vcpus, uint32_t bsp_vcpu_id,
+static struct kvm_vm *create_vm(u32 nr_vcpus, u32 bsp_vcpu_id,
struct kvm_vcpu *vcpus[])
{
struct kvm_vm *vm;
- uint32_t i;
+ u32 i;
vm = vm_create(nr_vcpus);
@@ -104,7 +104,7 @@ static struct kvm_vm *create_vm(uint32_t nr_vcpus, uint32_t bsp_vcpu_id,
return vm;
}
-static void run_vm_bsp(uint32_t bsp_vcpu_id)
+static void run_vm_bsp(u32 bsp_vcpu_id)
{
struct kvm_vcpu *vcpus[2];
struct kvm_vm *vm;
diff --git a/tools/testing/selftests/kvm/x86/set_sregs_test.c b/tools/testing/selftests/kvm/x86/set_sregs_test.c
index f4095a3d1278..8e654cc9ab16 100644
--- a/tools/testing/selftests/kvm/x86/set_sregs_test.c
+++ b/tools/testing/selftests/kvm/x86/set_sregs_test.c
@@ -46,9 +46,9 @@ do { \
X86_CR4_MCE | X86_CR4_PGE | X86_CR4_PCE | \
X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT)
-static uint64_t calc_supported_cr4_feature_bits(void)
+static u64 calc_supported_cr4_feature_bits(void)
{
- uint64_t cr4 = KVM_ALWAYS_ALLOWED_CR4;
+ u64 cr4 = KVM_ALWAYS_ALLOWED_CR4;
if (kvm_cpu_has(X86_FEATURE_UMIP))
cr4 |= X86_CR4_UMIP;
@@ -74,7 +74,7 @@ static uint64_t calc_supported_cr4_feature_bits(void)
return cr4;
}
-static void test_cr_bits(struct kvm_vcpu *vcpu, uint64_t cr4)
+static void test_cr_bits(struct kvm_vcpu *vcpu, u64 cr4)
{
struct kvm_sregs sregs;
int rc, i;
diff --git a/tools/testing/selftests/kvm/x86/sev_init2_tests.c b/tools/testing/selftests/kvm/x86/sev_init2_tests.c
index 3fb967f40c6a..8eeba2327c7c 100644
--- a/tools/testing/selftests/kvm/x86/sev_init2_tests.c
+++ b/tools/testing/selftests/kvm/x86/sev_init2_tests.c
@@ -28,12 +28,13 @@
int kvm_fd;
u64 supported_vmsa_features;
bool have_sev_es;
+bool have_snp;
static int __sev_ioctl(int vm_fd, int cmd_id, void *data)
{
struct kvm_sev_cmd cmd = {
.id = cmd_id,
- .data = (uint64_t)data,
+ .data = (u64)data,
.sev_fd = open_sev_dev_path_or_exit(),
};
int ret;
@@ -83,6 +84,9 @@ void test_vm_types(void)
if (have_sev_es)
test_init2(KVM_X86_SEV_ES_VM, &(struct kvm_sev_init){});
+ if (have_snp)
+ test_init2(KVM_X86_SNP_VM, &(struct kvm_sev_init){});
+
test_init2_invalid(0, &(struct kvm_sev_init){},
"VM type is KVM_X86_DEFAULT_VM");
if (kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM))
@@ -90,7 +94,7 @@ void test_vm_types(void)
"VM type is KVM_X86_SW_PROTECTED_VM");
}
-void test_flags(uint32_t vm_type)
+void test_flags(u32 vm_type)
{
int i;
@@ -100,7 +104,7 @@ void test_flags(uint32_t vm_type)
"invalid flag");
}
-void test_features(uint32_t vm_type, uint64_t supported_features)
+void test_features(u32 vm_type, u64 supported_features)
{
int i;
@@ -138,15 +142,24 @@ int main(int argc, char *argv[])
"sev-es: KVM_CAP_VM_TYPES (%x) does not match cpuid (checking %x)",
kvm_check_cap(KVM_CAP_VM_TYPES), 1 << KVM_X86_SEV_ES_VM);
+ have_snp = kvm_cpu_has(X86_FEATURE_SEV_SNP);
+ TEST_ASSERT(have_snp == !!(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SNP_VM)),
+ "sev-snp: KVM_CAP_VM_TYPES (%x) indicates SNP support (bit %d), but CPUID does not",
+ kvm_check_cap(KVM_CAP_VM_TYPES), KVM_X86_SNP_VM);
+
test_vm_types();
test_flags(KVM_X86_SEV_VM);
if (have_sev_es)
test_flags(KVM_X86_SEV_ES_VM);
+ if (have_snp)
+ test_flags(KVM_X86_SNP_VM);
test_features(KVM_X86_SEV_VM, 0);
if (have_sev_es)
test_features(KVM_X86_SEV_ES_VM, supported_vmsa_features);
+ if (have_snp)
+ test_features(KVM_X86_SNP_VM, supported_vmsa_features);
return 0;
}
diff --git a/tools/testing/selftests/kvm/x86/sev_migrate_tests.c b/tools/testing/selftests/kvm/x86/sev_migrate_tests.c
index 0a6dfba3905b..6b0928e69051 100644
--- a/tools/testing/selftests/kvm/x86/sev_migrate_tests.c
+++ b/tools/testing/selftests/kvm/x86/sev_migrate_tests.c
@@ -36,8 +36,6 @@ static struct kvm_vm *sev_vm_create(bool es)
sev_vm_launch(vm, es ? SEV_POLICY_ES : 0);
- if (es)
- vm_sev_ioctl(vm, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL);
return vm;
}
diff --git a/tools/testing/selftests/kvm/x86/sev_smoke_test.c b/tools/testing/selftests/kvm/x86/sev_smoke_test.c
index d97816dc476a..1a49ee391586 100644
--- a/tools/testing/selftests/kvm/x86/sev_smoke_test.c
+++ b/tools/testing/selftests/kvm/x86/sev_smoke_test.c
@@ -13,21 +13,61 @@
#include "linux/psp-sev.h"
#include "sev.h"
+static void guest_sev_test_msr(u32 msr)
+{
+ u64 val = rdmsr(msr);
+
+ wrmsr(msr, val);
+ GUEST_ASSERT(val == rdmsr(msr));
+}
+
+#define guest_sev_test_reg(reg) \
+do { \
+ u64 val = get_##reg(); \
+ \
+ set_##reg(val); \
+ GUEST_ASSERT(val == get_##reg()); \
+} while (0)
+
+static void guest_sev_test_regs(void)
+{
+ guest_sev_test_msr(MSR_EFER);
+ guest_sev_test_reg(cr0);
+ guest_sev_test_reg(cr3);
+ guest_sev_test_reg(cr4);
+ guest_sev_test_reg(cr8);
+}
#define XFEATURE_MASK_X87_AVX (XFEATURE_MASK_FP | XFEATURE_MASK_SSE | XFEATURE_MASK_YMM)
+static void guest_snp_code(void)
+{
+ u64 sev_msr = rdmsr(MSR_AMD64_SEV);
+
+ GUEST_ASSERT(sev_msr & MSR_AMD64_SEV_ENABLED);
+ GUEST_ASSERT(sev_msr & MSR_AMD64_SEV_ES_ENABLED);
+ GUEST_ASSERT(sev_msr & MSR_AMD64_SEV_SNP_ENABLED);
+
+ guest_sev_test_regs();
+
+ wrmsr(MSR_AMD64_SEV_ES_GHCB, GHCB_MSR_TERM_REQ);
+ vmgexit();
+}
+
static void guest_sev_es_code(void)
{
/* TODO: Check CPUID after GHCB-based hypercall support is added. */
GUEST_ASSERT(rdmsr(MSR_AMD64_SEV) & MSR_AMD64_SEV_ENABLED);
GUEST_ASSERT(rdmsr(MSR_AMD64_SEV) & MSR_AMD64_SEV_ES_ENABLED);
+ guest_sev_test_regs();
+
/*
* TODO: Add GHCB and ucall support for SEV-ES guests. For now, simply
* force "termination" to signal "done" via the GHCB MSR protocol.
*/
wrmsr(MSR_AMD64_SEV_ES_GHCB, GHCB_MSR_TERM_REQ);
- __asm__ __volatile__("rep; vmmcall");
+ vmgexit();
}
static void guest_sev_code(void)
@@ -35,6 +75,8 @@ static void guest_sev_code(void)
GUEST_ASSERT(this_cpu_has(X86_FEATURE_SEV));
GUEST_ASSERT(rdmsr(MSR_AMD64_SEV) & MSR_AMD64_SEV_ENABLED);
+ guest_sev_test_regs();
+
GUEST_DONE();
}
@@ -62,19 +104,19 @@ static void compare_xsave(u8 *from_host, u8 *from_guest)
abort();
}
-static void test_sync_vmsa(uint32_t policy)
+static void test_sync_vmsa(u32 type, u64 policy)
{
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
- vm_vaddr_t gva;
+ gva_t gva;
void *hva;
double x87val = M_PI;
struct kvm_xsave __attribute__((aligned(64))) xsave = { 0 };
- vm = vm_sev_create_with_one_vcpu(KVM_X86_SEV_ES_VM, guest_code_xsave, &vcpu);
- gva = vm_vaddr_alloc_shared(vm, PAGE_SIZE, KVM_UTIL_MIN_VADDR,
- MEM_REGION_TEST_DATA);
+ vm = vm_sev_create_with_one_vcpu(type, guest_code_xsave, &vcpu);
+ gva = vm_alloc_shared(vm, PAGE_SIZE, KVM_UTIL_MIN_VADDR,
+ MEM_REGION_TEST_DATA);
hva = addr_gva2hva(vm, gva);
vcpu_args_set(vcpu, 1, gva);
@@ -89,10 +131,10 @@ static void test_sync_vmsa(uint32_t policy)
: "ymm4", "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)");
vcpu_xsave_set(vcpu, &xsave);
- vm_sev_launch(vm, SEV_POLICY_ES | policy, NULL);
+ vm_sev_launch(vm, policy, NULL);
/* This page is shared, so make it decrypted. */
- memset(hva, 0, 4096);
+ memset(hva, 0, PAGE_SIZE);
vcpu_run(vcpu);
@@ -108,14 +150,12 @@ static void test_sync_vmsa(uint32_t policy)
kvm_vm_free(vm);
}
-static void test_sev(void *guest_code, uint64_t policy)
+static void test_sev(void *guest_code, u32 type, u64 policy)
{
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
struct ucall uc;
- uint32_t type = policy & SEV_POLICY_ES ? KVM_X86_SEV_ES_VM : KVM_X86_SEV_VM;
-
vm = vm_sev_create_with_one_vcpu(type, guest_code, &vcpu);
/* TODO: Validate the measurement is as expected. */
@@ -124,7 +164,7 @@ static void test_sev(void *guest_code, uint64_t policy)
for (;;) {
vcpu_run(vcpu);
- if (policy & SEV_POLICY_ES) {
+ if (is_sev_es_vm(vm)) {
TEST_ASSERT(vcpu->run->exit_reason == KVM_EXIT_SYSTEM_EVENT,
"Wanted SYSTEM_EVENT, got %s",
exit_reason_str(vcpu->run->exit_reason));
@@ -161,16 +201,14 @@ static void guest_shutdown_code(void)
__asm__ __volatile__("ud2");
}
-static void test_sev_es_shutdown(void)
+static void test_sev_shutdown(u32 type, u64 policy)
{
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
- uint32_t type = KVM_X86_SEV_ES_VM;
-
vm = vm_sev_create_with_one_vcpu(type, guest_shutdown_code, &vcpu);
- vm_sev_launch(vm, SEV_POLICY_ES, NULL);
+ vm_sev_launch(vm, policy, NULL);
vcpu_run(vcpu);
TEST_ASSERT(vcpu->run->exit_reason == KVM_EXIT_SHUTDOWN,
@@ -180,27 +218,42 @@ static void test_sev_es_shutdown(void)
kvm_vm_free(vm);
}
-int main(int argc, char *argv[])
+static void test_sev_smoke(void *guest, u32 type, u64 policy)
{
const u64 xf_mask = XFEATURE_MASK_X87_AVX;
- TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SEV));
-
- test_sev(guest_sev_code, SEV_POLICY_NO_DBG);
- test_sev(guest_sev_code, 0);
+ if (type == KVM_X86_SNP_VM)
+ test_sev(guest, type, policy | SNP_POLICY_DBG);
+ else
+ test_sev(guest, type, policy | SEV_POLICY_NO_DBG);
+ test_sev(guest, type, policy);
- if (kvm_cpu_has(X86_FEATURE_SEV_ES)) {
- test_sev(guest_sev_es_code, SEV_POLICY_ES | SEV_POLICY_NO_DBG);
- test_sev(guest_sev_es_code, SEV_POLICY_ES);
+ if (type == KVM_X86_SEV_VM)
+ return;
- test_sev_es_shutdown();
+ test_sev_shutdown(type, policy);
- if (kvm_has_cap(KVM_CAP_XCRS) &&
- (xgetbv(0) & kvm_cpu_supported_xcr0() & xf_mask) == xf_mask) {
- test_sync_vmsa(0);
- test_sync_vmsa(SEV_POLICY_NO_DBG);
- }
+ if (kvm_has_cap(KVM_CAP_XCRS) &&
+ (xgetbv(0) & kvm_cpu_supported_xcr0() & xf_mask) == xf_mask) {
+ test_sync_vmsa(type, policy);
+ if (type == KVM_X86_SNP_VM)
+ test_sync_vmsa(type, policy | SNP_POLICY_DBG);
+ else
+ test_sync_vmsa(type, policy | SEV_POLICY_NO_DBG);
}
+}
+
+int main(int argc, char *argv[])
+{
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SEV));
+
+ test_sev_smoke(guest_sev_code, KVM_X86_SEV_VM, 0);
+
+ if (kvm_cpu_has(X86_FEATURE_SEV_ES))
+ test_sev_smoke(guest_sev_es_code, KVM_X86_SEV_ES_VM, SEV_POLICY_ES);
+
+ if (kvm_cpu_has(X86_FEATURE_SEV_SNP))
+ test_sev_smoke(guest_snp_code, KVM_X86_SNP_VM, snp_default_policy());
return 0;
}
diff --git a/tools/testing/selftests/kvm/x86/smaller_maxphyaddr_emulation_test.c b/tools/testing/selftests/kvm/x86/smaller_maxphyaddr_emulation_test.c
index fabeeaddfb3a..3dca85e95478 100644
--- a/tools/testing/selftests/kvm/x86/smaller_maxphyaddr_emulation_test.c
+++ b/tools/testing/selftests/kvm/x86/smaller_maxphyaddr_emulation_test.c
@@ -20,8 +20,8 @@
static void guest_code(bool tdp_enabled)
{
- uint64_t error_code;
- uint64_t vector;
+ u64 error_code;
+ u64 vector;
vector = kvm_asm_safe_ec(FLDS_MEM_EAX, error_code, "a"(MEM_REGION_GVA));
@@ -47,9 +47,8 @@ int main(int argc, char *argv[])
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
struct ucall uc;
- uint64_t *pte;
- uint64_t *hva;
- uint64_t gpa;
+ u64 *hva;
+ gpa_t gpa;
int rc;
TEST_REQUIRE(kvm_has_cap(KVM_CAP_SMALLER_MAXPHYADDR));
@@ -73,8 +72,7 @@ int main(int argc, char *argv[])
hva = addr_gpa2hva(vm, MEM_REGION_GPA);
memset(hva, 0, PAGE_SIZE);
- pte = vm_get_page_table_entry(vm, MEM_REGION_GVA);
- *pte |= BIT_ULL(MAXPHYADDR);
+ *vm_get_pte(vm, MEM_REGION_GVA) |= BIT_ULL(MAXPHYADDR);
vcpu_run(vcpu);
diff --git a/tools/testing/selftests/kvm/x86/smm_test.c b/tools/testing/selftests/kvm/x86/smm_test.c
index 55c88d664a94..740051167dbd 100644
--- a/tools/testing/selftests/kvm/x86/smm_test.c
+++ b/tools/testing/selftests/kvm/x86/smm_test.c
@@ -14,13 +14,11 @@
#include "test_util.h"
#include "kvm_util.h"
+#include "smm.h"
#include "vmx.h"
#include "svm_util.h"
-#define SMRAM_SIZE 65536
-#define SMRAM_MEMSLOT ((1 << 16) | 1)
-#define SMRAM_PAGES (SMRAM_SIZE / PAGE_SIZE)
#define SMRAM_GPA 0x1000000
#define SMRAM_STAGE 0xfe
@@ -36,13 +34,13 @@
* independent subset of asm here.
* SMI handler always report back fixed stage SMRAM_STAGE.
*/
-uint8_t smi_handler[] = {
+u8 smi_handler[] = {
0xb0, SMRAM_STAGE, /* mov $SMRAM_STAGE, %al */
0xe4, SYNC_PORT, /* in $SYNC_PORT, %al */
0x0f, 0xaa, /* rsm */
};
-static inline void sync_with_host(uint64_t phase)
+static inline void sync_with_host(u64 phase)
{
asm volatile("in $" XSTR(SYNC_PORT)", %%al \n"
: "+a" (phase));
@@ -67,7 +65,7 @@ static void guest_code(void *arg)
{
#define L2_GUEST_STACK_SIZE 64
unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
- uint64_t apicbase = rdmsr(MSR_IA32_APICBASE);
+ u64 apicbase = rdmsr(MSR_IA32_APICBASE);
struct svm_test_data *svm = arg;
struct vmx_pages *vmx_pages = arg;
@@ -113,21 +111,9 @@ static void guest_code(void *arg)
sync_with_host(DONE);
}
-void inject_smi(struct kvm_vcpu *vcpu)
-{
- struct kvm_vcpu_events events;
-
- vcpu_events_get(vcpu, &events);
-
- events.smi.pending = 1;
- events.flags |= KVM_VCPUEVENT_VALID_SMM;
-
- vcpu_events_set(vcpu, &events);
-}
-
int main(int argc, char *argv[])
{
- vm_vaddr_t nested_gva = 0;
+ gva_t nested_gva = 0;
struct kvm_vcpu *vcpu;
struct kvm_regs regs;
@@ -140,16 +126,7 @@ int main(int argc, char *argv[])
/* Create VM */
vm = vm_create_with_one_vcpu(&vcpu, guest_code);
- vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, SMRAM_GPA,
- SMRAM_MEMSLOT, SMRAM_PAGES, 0);
- TEST_ASSERT(vm_phy_pages_alloc(vm, SMRAM_PAGES, SMRAM_GPA, SMRAM_MEMSLOT)
- == SMRAM_GPA, "could not allocate guest physical addresses?");
-
- memset(addr_gpa2hva(vm, SMRAM_GPA), 0x0, SMRAM_SIZE);
- memcpy(addr_gpa2hva(vm, SMRAM_GPA) + 0x8000, smi_handler,
- sizeof(smi_handler));
-
- vcpu_set_msr(vcpu, MSR_IA32_SMBASE, SMRAM_GPA);
+ setup_smram(vm, vcpu, SMRAM_GPA, smi_handler, sizeof(smi_handler));
if (kvm_has_cap(KVM_CAP_NESTED_STATE)) {
if (kvm_cpu_has(X86_FEATURE_SVM))
diff --git a/tools/testing/selftests/kvm/x86/state_test.c b/tools/testing/selftests/kvm/x86/state_test.c
index 141b7fc0c965..409c6cc9f921 100644
--- a/tools/testing/selftests/kvm/x86/state_test.c
+++ b/tools/testing/selftests/kvm/x86/state_test.c
@@ -26,7 +26,9 @@ void svm_l2_guest_code(void)
GUEST_SYNC(4);
/* Exit to L1 */
vmcall();
+ clgi();
GUEST_SYNC(6);
+ stgi();
/* Done, exit to L1 and never come back. */
vmcall();
}
@@ -41,6 +43,8 @@ static void svm_l1_guest_code(struct svm_test_data *svm)
generic_svm_setup(svm, svm_l2_guest_code,
&l2_guest_stack[L2_GUEST_STACK_SIZE]);
+ vmcb->control.int_ctl |= (V_GIF_ENABLE_MASK | V_GIF_MASK);
+
GUEST_SYNC(3);
run_guest(vmcb, svm->vmcb_gpa);
GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
@@ -140,8 +144,8 @@ static void __attribute__((__flatten__)) guest_code(void *arg)
GUEST_SYNC(1);
if (this_cpu_has(X86_FEATURE_XSAVE)) {
- uint64_t supported_xcr0 = this_cpu_supported_xcr0();
- uint8_t buffer[4096];
+ u64 supported_xcr0 = this_cpu_supported_xcr0();
+ u8 buffer[PAGE_SIZE];
memset(buffer, 0xcc, sizeof(buffer));
@@ -168,8 +172,8 @@ static void __attribute__((__flatten__)) guest_code(void *arg)
}
if (this_cpu_has(X86_FEATURE_MPX)) {
- uint64_t bounds[2] = { 10, 0xffffffffull };
- uint64_t output[2] = { };
+ u64 bounds[2] = { 10, 0xffffffffull };
+ u64 output[2] = { };
GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_BNDREGS);
GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_BNDCSR);
@@ -222,10 +226,39 @@ static void __attribute__((__flatten__)) guest_code(void *arg)
GUEST_DONE();
}
+void svm_check_nested_state(int stage, struct kvm_x86_state *state)
+{
+ struct vmcb *vmcb = (struct vmcb *)state->nested.data.svm;
+
+ if (kvm_cpu_has(X86_FEATURE_VGIF)) {
+ if (stage == 4)
+ TEST_ASSERT_EQ(!!(vmcb->control.int_ctl & V_GIF_MASK), 1);
+ if (stage == 6)
+ TEST_ASSERT_EQ(!!(vmcb->control.int_ctl & V_GIF_MASK), 0);
+ }
+
+ if (kvm_cpu_has(X86_FEATURE_NRIPS)) {
+ /*
+ * GUEST_SYNC() causes IO emulation in KVM, in which case the
+ * RIP is advanced before exiting to userspace. Hence, the RIP
+ * in the saved state should be the same as nRIP saved by the
+ * CPU in the VMCB.
+ */
+ if (stage == 6)
+ TEST_ASSERT_EQ(vmcb->control.next_rip, state->regs.rip);
+ }
+}
+
+void check_nested_state(int stage, struct kvm_x86_state *state)
+{
+ if (kvm_has_cap(KVM_CAP_NESTED_STATE) && kvm_cpu_has(X86_FEATURE_SVM))
+ svm_check_nested_state(stage, state);
+}
+
int main(int argc, char *argv[])
{
- uint64_t *xstate_bv, saved_xstate_bv;
- vm_vaddr_t nested_gva = 0;
+ u64 *xstate_bv, saved_xstate_bv;
+ gva_t nested_gva = 0;
struct kvm_cpuid2 empty_cpuid = {};
struct kvm_regs regs1, regs2;
struct kvm_vcpu *vcpu, *vcpuN;
@@ -278,6 +311,8 @@ int main(int argc, char *argv[])
kvm_vm_release(vm);
+ check_nested_state(stage, state);
+
/* Restore state in a new VM. */
vcpu = vm_recreate_with_one_vcpu(vm);
vcpu_load_state(vcpu, state);
@@ -296,7 +331,7 @@ int main(int argc, char *argv[])
* supported features, even if something goes awry in saving
* the original snapshot.
*/
- xstate_bv = (void *)&((uint8_t *)state->xsave->region)[512];
+ xstate_bv = (void *)&((u8 *)state->xsave->region)[512];
saved_xstate_bv = *xstate_bv;
vcpuN = __vm_vcpu_add(vm, vcpu->id + 1);
diff --git a/tools/testing/selftests/kvm/x86/svm_int_ctl_test.c b/tools/testing/selftests/kvm/x86/svm_int_ctl_test.c
index 916e04248fbb..d3cc5e4f7883 100644
--- a/tools/testing/selftests/kvm/x86/svm_int_ctl_test.c
+++ b/tools/testing/selftests/kvm/x86/svm_int_ctl_test.c
@@ -42,10 +42,7 @@ static void l2_guest_code(struct svm_test_data *svm)
x2apic_write_reg(APIC_ICR,
APIC_DEST_SELF | APIC_INT_ASSERT | INTR_IRQ_NUMBER);
- __asm__ __volatile__(
- "sti\n"
- "nop\n"
- );
+ sti_nop();
GUEST_ASSERT(vintr_irq_called);
GUEST_ASSERT(intr_irq_called);
@@ -85,7 +82,7 @@ static void l1_guest_code(struct svm_test_data *svm)
int main(int argc, char *argv[])
{
struct kvm_vcpu *vcpu;
- vm_vaddr_t svm_gva;
+ gva_t svm_gva;
struct kvm_vm *vm;
struct ucall uc;
diff --git a/tools/testing/selftests/kvm/x86/svm_lbr_nested_state.c b/tools/testing/selftests/kvm/x86/svm_lbr_nested_state.c
new file mode 100644
index 000000000000..7fbfaa054c95
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/svm_lbr_nested_state.c
@@ -0,0 +1,145 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2026, Google, Inc.
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+
+
+#define L2_GUEST_STACK_SIZE 64
+
+#define DO_BRANCH() do { asm volatile("jmp 1f\n 1: nop"); } while (0)
+
+struct lbr_branch {
+ u64 from, to;
+};
+
+volatile struct lbr_branch l2_branch;
+
+#define RECORD_AND_CHECK_BRANCH(b) \
+do { \
+ wrmsr(MSR_IA32_DEBUGCTLMSR, DEBUGCTLMSR_LBR); \
+ DO_BRANCH(); \
+ (b)->from = rdmsr(MSR_IA32_LASTBRANCHFROMIP); \
+ (b)->to = rdmsr(MSR_IA32_LASTBRANCHTOIP); \
+ /* Disable LBR right after to avoid overriding the IPs */ \
+ wrmsr(MSR_IA32_DEBUGCTLMSR, 0); \
+ \
+ GUEST_ASSERT_NE((b)->from, 0); \
+ GUEST_ASSERT_NE((b)->to, 0); \
+} while (0)
+
+#define CHECK_BRANCH_MSRS(b) \
+do { \
+ GUEST_ASSERT_EQ((b)->from, rdmsr(MSR_IA32_LASTBRANCHFROMIP)); \
+ GUEST_ASSERT_EQ((b)->to, rdmsr(MSR_IA32_LASTBRANCHTOIP)); \
+} while (0)
+
+#define CHECK_BRANCH_VMCB(b, vmcb) \
+do { \
+ GUEST_ASSERT_EQ((b)->from, vmcb->save.br_from); \
+ GUEST_ASSERT_EQ((b)->to, vmcb->save.br_to); \
+} while (0)
+
+static void l2_guest_code(struct svm_test_data *svm)
+{
+ /* Record a branch, trigger save/restore, and make sure LBRs are intact */
+ RECORD_AND_CHECK_BRANCH(&l2_branch);
+ GUEST_SYNC(true);
+ CHECK_BRANCH_MSRS(&l2_branch);
+ vmmcall();
+}
+
+static void l1_guest_code(struct svm_test_data *svm, bool nested_lbrv)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ struct vmcb *vmcb = svm->vmcb;
+ struct lbr_branch l1_branch;
+
+ /* Record a branch, trigger save/restore, and make sure LBRs are intact */
+ RECORD_AND_CHECK_BRANCH(&l1_branch);
+ GUEST_SYNC(true);
+ CHECK_BRANCH_MSRS(&l1_branch);
+
+ /* Run L2, which will also do the same */
+ generic_svm_setup(svm, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ if (nested_lbrv)
+ vmcb->control.misc_ctl2 = SVM_MISC2_ENABLE_V_LBR;
+ else
+ vmcb->control.misc_ctl2 &= ~SVM_MISC2_ENABLE_V_LBR;
+
+ run_guest(vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT(svm->vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+
+ /* Trigger save/restore one more time before checking, just for kicks */
+ GUEST_SYNC(true);
+
+ /*
+ * If LBR_CTL_ENABLE is set, L1 and L2 should have separate LBR MSRs, so
+ * expect L1's LBRs to remain intact and L2 LBRs to be in the VMCB.
+ * Otherwise, the MSRs are shared between L1 & L2 so expect L2's LBRs.
+ */
+ if (nested_lbrv) {
+ CHECK_BRANCH_MSRS(&l1_branch);
+ CHECK_BRANCH_VMCB(&l2_branch, vmcb);
+ } else {
+ CHECK_BRANCH_MSRS(&l2_branch);
+ }
+ GUEST_DONE();
+}
+
+void test_lbrv_nested_state(bool nested_lbrv)
+{
+ struct kvm_x86_state *state = NULL;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct ucall uc;
+ gva_t svm_gva;
+
+ pr_info("Testing with nested LBRV %s\n", nested_lbrv ? "enabled" : "disabled");
+
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+ vcpu_alloc_svm(vm, &svm_gva);
+ vcpu_args_set(vcpu, 2, svm_gva, nested_lbrv);
+
+ for (;;) {
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ /* Save the vCPU state and restore it in a new VM on sync */
+ pr_info("Guest triggered save/restore.\n");
+ state = vcpu_save_state(vcpu);
+ kvm_vm_release(vm);
+ vcpu = vm_recreate_with_one_vcpu(vm);
+ vcpu_load_state(vcpu, state);
+ kvm_x86_state_cleanup(state);
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ /* NOT REACHED */
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+ }
+done:
+ kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM));
+ TEST_REQUIRE(kvm_is_lbrv_enabled());
+
+ test_lbrv_nested_state(/*nested_lbrv=*/false);
+ test_lbrv_nested_state(/*nested_lbrv=*/true);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/svm_nested_clear_efer_svme.c b/tools/testing/selftests/kvm/x86/svm_nested_clear_efer_svme.c
new file mode 100644
index 000000000000..6a89eaffc657
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/svm_nested_clear_efer_svme.c
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2026, Google LLC.
+ */
+#include "kvm_util.h"
+#include "vmx.h"
+#include "svm_util.h"
+#include "kselftest.h"
+
+
+#define L2_GUEST_STACK_SIZE 64
+
+static void l2_guest_code(void)
+{
+ unsigned long efer = rdmsr(MSR_EFER);
+
+ /* generic_svm_setup() initializes EFER_SVME set for L2 */
+ GUEST_ASSERT(efer & EFER_SVME);
+ wrmsr(MSR_EFER, efer & ~EFER_SVME);
+
+ /* Unreachable, L1 should be shutdown */
+ GUEST_ASSERT(0);
+}
+
+static void l1_guest_code(struct svm_test_data *svm)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+ generic_svm_setup(svm, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+ run_guest(svm->vmcb, svm->vmcb_gpa);
+
+ /* Unreachable, L1 should be shutdown */
+ GUEST_ASSERT(0);
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ gva_t nested_gva = 0;
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM));
+
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+
+ vcpu_alloc_svm(vm, &nested_gva);
+ vcpu_args_set(vcpu, 1, nested_gva);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_SHUTDOWN);
+
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/svm_nested_shutdown_test.c b/tools/testing/selftests/kvm/x86/svm_nested_shutdown_test.c
index 00135cbba35e..c6ea3d609a62 100644
--- a/tools/testing/selftests/kvm/x86/svm_nested_shutdown_test.c
+++ b/tools/testing/selftests/kvm/x86/svm_nested_shutdown_test.c
@@ -42,7 +42,7 @@ static void l1_guest_code(struct svm_test_data *svm, struct idt_entry *idt)
int main(int argc, char *argv[])
{
struct kvm_vcpu *vcpu;
- vm_vaddr_t svm_gva;
+ gva_t svm_gva;
struct kvm_vm *vm;
TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM));
diff --git a/tools/testing/selftests/kvm/x86/svm_nested_soft_inject_test.c b/tools/testing/selftests/kvm/x86/svm_nested_soft_inject_test.c
index 7b6481d6c0d3..f72f11d4c4f8 100644
--- a/tools/testing/selftests/kvm/x86/svm_nested_soft_inject_test.c
+++ b/tools/testing/selftests/kvm/x86/svm_nested_soft_inject_test.c
@@ -76,7 +76,7 @@ static void l2_guest_code_nmi(void)
ud2();
}
-static void l1_guest_code(struct svm_test_data *svm, uint64_t is_nmi, uint64_t idt_alt)
+static void l1_guest_code(struct svm_test_data *svm, u64 is_nmi, u64 idt_alt)
{
#define L2_GUEST_STACK_SIZE 64
unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
@@ -103,7 +103,7 @@ static void l1_guest_code(struct svm_test_data *svm, uint64_t is_nmi, uint64_t i
run_guest(vmcb, svm->vmcb_gpa);
__GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL,
- "Expected VMMCAL #VMEXIT, got '0x%x', info1 = '0x%lx, info2 = '0x%lx'",
+ "Expected VMMCAL #VMEXIT, got '0x%lx', info1 = '0x%lx, info2 = '0x%lx'",
vmcb->control.exit_code,
vmcb->control.exit_info_1, vmcb->control.exit_info_2);
@@ -133,7 +133,7 @@ static void l1_guest_code(struct svm_test_data *svm, uint64_t is_nmi, uint64_t i
run_guest(vmcb, svm->vmcb_gpa);
__GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_HLT,
- "Expected HLT #VMEXIT, got '0x%x', info1 = '0x%lx, info2 = '0x%lx'",
+ "Expected HLT #VMEXIT, got '0x%lx', info1 = '0x%lx, info2 = '0x%lx'",
vmcb->control.exit_code,
vmcb->control.exit_info_1, vmcb->control.exit_info_2);
@@ -144,8 +144,8 @@ static void run_test(bool is_nmi)
{
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
- vm_vaddr_t svm_gva;
- vm_vaddr_t idt_alt_vm;
+ gva_t svm_gva;
+ gva_t idt_alt_vm;
struct kvm_guest_debug debug;
pr_info("Running %s test\n", is_nmi ? "NMI" : "soft int");
@@ -161,14 +161,14 @@ static void run_test(bool is_nmi)
if (!is_nmi) {
void *idt, *idt_alt;
- idt_alt_vm = vm_vaddr_alloc_page(vm);
+ idt_alt_vm = vm_alloc_page(vm);
idt_alt = addr_gva2hva(vm, idt_alt_vm);
idt = addr_gva2hva(vm, vm->arch.idt);
memcpy(idt_alt, idt, getpagesize());
} else {
idt_alt_vm = 0;
}
- vcpu_args_set(vcpu, 3, svm_gva, (uint64_t)is_nmi, (uint64_t)idt_alt_vm);
+ vcpu_args_set(vcpu, 3, svm_gva, (u64)is_nmi, (u64)idt_alt_vm);
memset(&debug, 0, sizeof(debug));
vcpu_guest_debug_set(vcpu, &debug);
diff --git a/tools/testing/selftests/kvm/x86/svm_nested_vmcb12_gpa.c b/tools/testing/selftests/kvm/x86/svm_nested_vmcb12_gpa.c
new file mode 100644
index 000000000000..a4935ce2fb99
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/svm_nested_vmcb12_gpa.c
@@ -0,0 +1,176 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2026, Google LLC.
+ */
+#include "kvm_util.h"
+#include "vmx.h"
+#include "svm_util.h"
+#include "kselftest.h"
+#include "kvm_test_harness.h"
+#include "test_util.h"
+
+
+#define L2_GUEST_STACK_SIZE 64
+
+#define SYNC_GP 101
+#define SYNC_L2_STARTED 102
+
+static unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+static void guest_gp_handler(struct ex_regs *regs)
+{
+ GUEST_SYNC(SYNC_GP);
+}
+
+static void l2_code(void)
+{
+ GUEST_SYNC(SYNC_L2_STARTED);
+ vmcall();
+}
+
+static void l1_vmrun(struct svm_test_data *svm, gpa_t gpa)
+{
+ generic_svm_setup(svm, l2_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ asm volatile ("vmrun %[gpa]" : : [gpa] "a" (gpa) : "memory");
+}
+
+static void l1_vmload(struct svm_test_data *svm, gpa_t gpa)
+{
+ generic_svm_setup(svm, l2_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ asm volatile ("vmload %[gpa]" : : [gpa] "a" (gpa) : "memory");
+}
+
+static void l1_vmsave(struct svm_test_data *svm, gpa_t gpa)
+{
+ generic_svm_setup(svm, l2_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ asm volatile ("vmsave %[gpa]" : : [gpa] "a" (gpa) : "memory");
+}
+
+static void l1_vmexit(struct svm_test_data *svm, gpa_t gpa)
+{
+ generic_svm_setup(svm, l2_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ run_guest(svm->vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT(svm->vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+ GUEST_DONE();
+}
+
+static u64 unmappable_gpa(struct kvm_vcpu *vcpu)
+{
+ struct userspace_mem_region *region;
+ u64 region_gpa_end, vm_gpa_end = 0;
+ int i;
+
+ hash_for_each(vcpu->vm->regions.slot_hash, i, region, slot_node) {
+ region_gpa_end = region->region.guest_phys_addr + region->region.memory_size;
+ vm_gpa_end = max(vm_gpa_end, region_gpa_end);
+ }
+
+ return vm_gpa_end;
+}
+
+static void test_invalid_vmcb12(struct kvm_vcpu *vcpu)
+{
+ gva_t nested_gva = 0;
+ struct ucall uc;
+
+
+ vm_install_exception_handler(vcpu->vm, GP_VECTOR, guest_gp_handler);
+ vcpu_alloc_svm(vcpu->vm, &nested_gva);
+ vcpu_args_set(vcpu, 2, nested_gva, -1ULL);
+ vcpu_run(vcpu);
+
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+ TEST_ASSERT_EQ(get_ucall(vcpu, &uc), UCALL_SYNC);
+ TEST_ASSERT_EQ(uc.args[1], SYNC_GP);
+}
+
+static void test_unmappable_vmcb12(struct kvm_vcpu *vcpu)
+{
+ gva_t nested_gva = 0;
+
+ vcpu_alloc_svm(vcpu->vm, &nested_gva);
+ vcpu_args_set(vcpu, 2, nested_gva, unmappable_gpa(vcpu));
+ vcpu_run(vcpu);
+
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_INTERNAL_ERROR);
+ TEST_ASSERT_EQ(vcpu->run->emulation_failure.suberror, KVM_INTERNAL_ERROR_EMULATION);
+}
+
+static void test_unmappable_vmcb12_vmexit(struct kvm_vcpu *vcpu)
+{
+ struct kvm_x86_state *state;
+ gva_t nested_gva = 0;
+ struct ucall uc;
+
+ /*
+ * Enter L2 (with a legit vmcb12 GPA), then overwrite vmcb12 GPA with an
+ * unmappable GPA. KVM will fail to map vmcb12 on nested VM-Exit and
+ * cause a shutdown.
+ */
+ vcpu_alloc_svm(vcpu->vm, &nested_gva);
+ vcpu_args_set(vcpu, 2, nested_gva, unmappable_gpa(vcpu));
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+ TEST_ASSERT_EQ(get_ucall(vcpu, &uc), UCALL_SYNC);
+ TEST_ASSERT_EQ(uc.args[1], SYNC_L2_STARTED);
+
+ state = vcpu_save_state(vcpu);
+ state->nested.hdr.svm.vmcb_pa = unmappable_gpa(vcpu);
+ vcpu_load_state(vcpu, state);
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_SHUTDOWN);
+
+ kvm_x86_state_cleanup(state);
+}
+
+KVM_ONE_VCPU_TEST_SUITE(vmcb12_gpa);
+
+KVM_ONE_VCPU_TEST(vmcb12_gpa, vmrun_invalid, l1_vmrun)
+{
+ test_invalid_vmcb12(vcpu);
+}
+
+KVM_ONE_VCPU_TEST(vmcb12_gpa, vmload_invalid, l1_vmload)
+{
+ test_invalid_vmcb12(vcpu);
+}
+
+KVM_ONE_VCPU_TEST(vmcb12_gpa, vmsave_invalid, l1_vmsave)
+{
+ test_invalid_vmcb12(vcpu);
+}
+
+KVM_ONE_VCPU_TEST(vmcb12_gpa, vmrun_unmappable, l1_vmrun)
+{
+ test_unmappable_vmcb12(vcpu);
+}
+
+KVM_ONE_VCPU_TEST(vmcb12_gpa, vmload_unmappable, l1_vmload)
+{
+ test_unmappable_vmcb12(vcpu);
+}
+
+KVM_ONE_VCPU_TEST(vmcb12_gpa, vmsave_unmappable, l1_vmsave)
+{
+ test_unmappable_vmcb12(vcpu);
+}
+
+/*
+ * Invalid vmcb12_gpa cannot be test for #VMEXIT as KVM_SET_NESTED_STATE will
+ * reject it.
+ */
+KVM_ONE_VCPU_TEST(vmcb12_gpa, vmexit_unmappable, l1_vmexit)
+{
+ test_unmappable_vmcb12_vmexit(vcpu);
+}
+
+int main(int argc, char *argv[])
+{
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM));
+
+ return test_harness_run(argc, argv);
+}
diff --git a/tools/testing/selftests/kvm/x86/svm_vmcall_test.c b/tools/testing/selftests/kvm/x86/svm_vmcall_test.c
index 8a62cca28cfb..b1887242f3b8 100644
--- a/tools/testing/selftests/kvm/x86/svm_vmcall_test.c
+++ b/tools/testing/selftests/kvm/x86/svm_vmcall_test.c
@@ -36,7 +36,7 @@ static void l1_guest_code(struct svm_test_data *svm)
int main(int argc, char *argv[])
{
struct kvm_vcpu *vcpu;
- vm_vaddr_t svm_gva;
+ gva_t svm_gva;
struct kvm_vm *vm;
TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM));
diff --git a/tools/testing/selftests/kvm/x86/sync_regs_test.c b/tools/testing/selftests/kvm/x86/sync_regs_test.c
index 8fa3948b0170..e0c52321f87c 100644
--- a/tools/testing/selftests/kvm/x86/sync_regs_test.c
+++ b/tools/testing/selftests/kvm/x86/sync_regs_test.c
@@ -20,7 +20,7 @@
#include "kvm_util.h"
#include "processor.h"
-#define UCALL_PIO_PORT ((uint16_t)0x1000)
+#define UCALL_PIO_PORT ((u16)0x1000)
struct ucall uc_none = {
.cmd = UCALL_NONE,
diff --git a/tools/testing/selftests/kvm/x86/triple_fault_event_test.c b/tools/testing/selftests/kvm/x86/triple_fault_event_test.c
index 56306a19144a..f1c488e0d497 100644
--- a/tools/testing/selftests/kvm/x86/triple_fault_event_test.c
+++ b/tools/testing/selftests/kvm/x86/triple_fault_event_test.c
@@ -72,13 +72,13 @@ int main(void)
if (has_vmx) {
- vm_vaddr_t vmx_pages_gva;
+ gva_t vmx_pages_gva;
vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code_vmx);
vcpu_alloc_vmx(vm, &vmx_pages_gva);
vcpu_args_set(vcpu, 1, vmx_pages_gva);
} else {
- vm_vaddr_t svm_gva;
+ gva_t svm_gva;
vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code_svm);
vcpu_alloc_svm(vm, &svm_gva);
diff --git a/tools/testing/selftests/kvm/x86/tsc_msrs_test.c b/tools/testing/selftests/kvm/x86/tsc_msrs_test.c
index 12b0964f4f13..91583969a14f 100644
--- a/tools/testing/selftests/kvm/x86/tsc_msrs_test.c
+++ b/tools/testing/selftests/kvm/x86/tsc_msrs_test.c
@@ -95,7 +95,7 @@ int main(void)
{
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
- uint64_t val;
+ u64 val;
ksft_print_header();
ksft_set_plan(5);
diff --git a/tools/testing/selftests/kvm/x86/tsc_scaling_sync.c b/tools/testing/selftests/kvm/x86/tsc_scaling_sync.c
index 59c7304f805e..59da8d4da607 100644
--- a/tools/testing/selftests/kvm/x86/tsc_scaling_sync.c
+++ b/tools/testing/selftests/kvm/x86/tsc_scaling_sync.c
@@ -21,10 +21,10 @@ pthread_spinlock_t create_lock;
#define TEST_TSC_KHZ 2345678UL
#define TEST_TSC_OFFSET 200000000
-uint64_t tsc_sync;
+u64 tsc_sync;
static void guest_code(void)
{
- uint64_t start_tsc, local_tsc, tmp;
+ u64 start_tsc, local_tsc, tmp;
start_tsc = rdtsc();
do {
diff --git a/tools/testing/selftests/kvm/x86/ucna_injection_test.c b/tools/testing/selftests/kvm/x86/ucna_injection_test.c
index 57f157c06b39..df1ec8209c76 100644
--- a/tools/testing/selftests/kvm/x86/ucna_injection_test.c
+++ b/tools/testing/selftests/kvm/x86/ucna_injection_test.c
@@ -45,7 +45,7 @@
#define MCI_CTL2_RESERVED_BIT BIT_ULL(29)
-static uint64_t supported_mcg_caps;
+static u64 supported_mcg_caps;
/*
* Record states about the injected UCNA.
@@ -53,30 +53,30 @@ static uint64_t supported_mcg_caps;
* handler. Variables without the 'i_' prefixes are recorded in guest main
* execution thread.
*/
-static volatile uint64_t i_ucna_rcvd;
-static volatile uint64_t i_ucna_addr;
-static volatile uint64_t ucna_addr;
-static volatile uint64_t ucna_addr2;
+static volatile u64 i_ucna_rcvd;
+static volatile u64 i_ucna_addr;
+static volatile u64 ucna_addr;
+static volatile u64 ucna_addr2;
struct thread_params {
struct kvm_vcpu *vcpu;
- uint64_t *p_i_ucna_rcvd;
- uint64_t *p_i_ucna_addr;
- uint64_t *p_ucna_addr;
- uint64_t *p_ucna_addr2;
+ u64 *p_i_ucna_rcvd;
+ u64 *p_i_ucna_addr;
+ u64 *p_ucna_addr;
+ u64 *p_ucna_addr2;
};
static void verify_apic_base_addr(void)
{
- uint64_t msr = rdmsr(MSR_IA32_APICBASE);
- uint64_t base = GET_APIC_BASE(msr);
+ u64 msr = rdmsr(MSR_IA32_APICBASE);
+ u64 base = GET_APIC_BASE(msr);
GUEST_ASSERT(base == APIC_DEFAULT_GPA);
}
static void ucna_injection_guest_code(void)
{
- uint64_t ctl2;
+ u64 ctl2;
verify_apic_base_addr();
xapic_enable();
@@ -86,7 +86,7 @@ static void ucna_injection_guest_code(void)
wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 | MCI_CTL2_CMCI_EN);
/* Enables interrupt in guest. */
- asm volatile("sti");
+ sti();
/* Let user space inject the first UCNA */
GUEST_SYNC(SYNC_FIRST_UCNA);
@@ -106,7 +106,7 @@ static void ucna_injection_guest_code(void)
static void cmci_disabled_guest_code(void)
{
- uint64_t ctl2 = rdmsr(MSR_IA32_MCx_CTL2(UCNA_BANK));
+ u64 ctl2 = rdmsr(MSR_IA32_MCx_CTL2(UCNA_BANK));
wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 | MCI_CTL2_CMCI_EN);
GUEST_DONE();
@@ -114,7 +114,7 @@ static void cmci_disabled_guest_code(void)
static void cmci_enabled_guest_code(void)
{
- uint64_t ctl2 = rdmsr(MSR_IA32_MCx_CTL2(UCNA_BANK));
+ u64 ctl2 = rdmsr(MSR_IA32_MCx_CTL2(UCNA_BANK));
wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 | MCI_CTL2_RESERVED_BIT);
GUEST_DONE();
@@ -145,14 +145,15 @@ static void run_vcpu_expect_gp(struct kvm_vcpu *vcpu)
printf("vCPU received GP in guest.\n");
}
-static void inject_ucna(struct kvm_vcpu *vcpu, uint64_t addr) {
+static void inject_ucna(struct kvm_vcpu *vcpu, u64 addr)
+{
/*
* A UCNA error is indicated with VAL=1, UC=1, PCC=0, S=0 and AR=0 in
* the IA32_MCi_STATUS register.
* MSCOD=1 (BIT[16] - MscodDataRdErr).
* MCACOD=0x0090 (Memory controller error format, channel 0)
*/
- uint64_t status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN |
+ u64 status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN |
MCI_STATUS_MISCV | MCI_STATUS_ADDRV | 0x10090;
struct kvm_x86_mce mce = {};
mce.status = status;
@@ -216,10 +217,10 @@ static void test_ucna_injection(struct kvm_vcpu *vcpu, struct thread_params *par
{
struct kvm_vm *vm = vcpu->vm;
params->vcpu = vcpu;
- params->p_i_ucna_rcvd = (uint64_t *)addr_gva2hva(vm, (uint64_t)&i_ucna_rcvd);
- params->p_i_ucna_addr = (uint64_t *)addr_gva2hva(vm, (uint64_t)&i_ucna_addr);
- params->p_ucna_addr = (uint64_t *)addr_gva2hva(vm, (uint64_t)&ucna_addr);
- params->p_ucna_addr2 = (uint64_t *)addr_gva2hva(vm, (uint64_t)&ucna_addr2);
+ params->p_i_ucna_rcvd = (u64 *)addr_gva2hva(vm, (u64)&i_ucna_rcvd);
+ params->p_i_ucna_addr = (u64 *)addr_gva2hva(vm, (u64)&i_ucna_addr);
+ params->p_ucna_addr = (u64 *)addr_gva2hva(vm, (u64)&ucna_addr);
+ params->p_ucna_addr2 = (u64 *)addr_gva2hva(vm, (u64)&ucna_addr2);
run_ucna_injection(params);
@@ -242,7 +243,7 @@ static void test_ucna_injection(struct kvm_vcpu *vcpu, struct thread_params *par
static void setup_mce_cap(struct kvm_vcpu *vcpu, bool enable_cmci_p)
{
- uint64_t mcg_caps = MCG_CTL_P | MCG_SER_P | MCG_LMCE_P | KVM_MAX_MCE_BANKS;
+ u64 mcg_caps = MCG_CTL_P | MCG_SER_P | MCG_LMCE_P | KVM_MAX_MCE_BANKS;
if (enable_cmci_p)
mcg_caps |= MCG_CMCI_P;
@@ -250,7 +251,7 @@ static void setup_mce_cap(struct kvm_vcpu *vcpu, bool enable_cmci_p)
vcpu_ioctl(vcpu, KVM_X86_SETUP_MCE, &mcg_caps);
}
-static struct kvm_vcpu *create_vcpu_with_mce_cap(struct kvm_vm *vm, uint32_t vcpuid,
+static struct kvm_vcpu *create_vcpu_with_mce_cap(struct kvm_vm *vm, u32 vcpuid,
bool enable_cmci_p, void *guest_code)
{
struct kvm_vcpu *vcpu = vm_vcpu_add(vm, vcpuid, guest_code);
diff --git a/tools/testing/selftests/kvm/x86/userspace_io_test.c b/tools/testing/selftests/kvm/x86/userspace_io_test.c
index 9481cbcf284f..9c5a87576c2e 100644
--- a/tools/testing/selftests/kvm/x86/userspace_io_test.c
+++ b/tools/testing/selftests/kvm/x86/userspace_io_test.c
@@ -10,7 +10,7 @@
#include "kvm_util.h"
#include "processor.h"
-static void guest_ins_port80(uint8_t *buffer, unsigned int count)
+static void guest_ins_port80(u8 *buffer, unsigned int count)
{
unsigned long end;
@@ -26,7 +26,7 @@ static void guest_ins_port80(uint8_t *buffer, unsigned int count)
static void guest_code(void)
{
- uint8_t buffer[8192];
+ u8 buffer[8192];
int i;
/*
@@ -85,7 +85,7 @@ int main(int argc, char *argv[])
regs.rcx = 1;
if (regs.rcx == 3)
regs.rcx = 8192;
- memset((void *)run + run->io.data_offset, 0xaa, 4096);
+ memset((void *)run + run->io.data_offset, 0xaa, PAGE_SIZE);
vcpu_regs_set(vcpu, &regs);
}
diff --git a/tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c b/tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c
index 32b2794b78fe..2808ce727e5f 100644
--- a/tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c
+++ b/tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c
@@ -23,21 +23,21 @@ struct kvm_msr_filter filter_allow = {
.nmsrs = 1,
/* Test an MSR the kernel knows about. */
.base = MSR_IA32_XSS,
- .bitmap = (uint8_t*)&deny_bits,
+ .bitmap = (u8 *)&deny_bits,
}, {
.flags = KVM_MSR_FILTER_READ |
KVM_MSR_FILTER_WRITE,
.nmsrs = 1,
/* Test an MSR the kernel doesn't know about. */
.base = MSR_IA32_FLUSH_CMD,
- .bitmap = (uint8_t*)&deny_bits,
+ .bitmap = (u8 *)&deny_bits,
}, {
.flags = KVM_MSR_FILTER_READ |
KVM_MSR_FILTER_WRITE,
.nmsrs = 1,
/* Test a fabricated MSR that no one knows about. */
.base = MSR_NON_EXISTENT,
- .bitmap = (uint8_t*)&deny_bits,
+ .bitmap = (u8 *)&deny_bits,
},
},
};
@@ -49,7 +49,7 @@ struct kvm_msr_filter filter_fs = {
.flags = KVM_MSR_FILTER_READ,
.nmsrs = 1,
.base = MSR_FS_BASE,
- .bitmap = (uint8_t*)&deny_bits,
+ .bitmap = (u8 *)&deny_bits,
},
},
};
@@ -61,12 +61,12 @@ struct kvm_msr_filter filter_gs = {
.flags = KVM_MSR_FILTER_READ,
.nmsrs = 1,
.base = MSR_GS_BASE,
- .bitmap = (uint8_t*)&deny_bits,
+ .bitmap = (u8 *)&deny_bits,
},
},
};
-static uint64_t msr_non_existent_data;
+static u64 msr_non_existent_data;
static int guest_exception_count;
static u32 msr_reads, msr_writes;
@@ -77,7 +77,7 @@ static u8 bitmap_c0000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
static u8 bitmap_c0000000_read[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
static u8 bitmap_deadbeef[1] = { 0x1 };
-static void deny_msr(uint8_t *bitmap, u32 msr)
+static void deny_msr(u8 *bitmap, u32 msr)
{
u32 idx = msr & (KVM_MSR_FILTER_MAX_BITMAP_SIZE - 1);
@@ -142,26 +142,26 @@ struct kvm_msr_filter no_filter_deny = {
* Note: Force test_rdmsr() to not be inlined to prevent the labels,
* rdmsr_start and rdmsr_end, from being defined multiple times.
*/
-static noinline uint64_t test_rdmsr(uint32_t msr)
+static noinline u64 test_rdmsr(u32 msr)
{
- uint32_t a, d;
+ u32 a, d;
guest_exception_count = 0;
__asm__ __volatile__("rdmsr_start: rdmsr; rdmsr_end:" :
"=a"(a), "=d"(d) : "c"(msr) : "memory");
- return a | ((uint64_t) d << 32);
+ return a | ((u64)d << 32);
}
/*
* Note: Force test_wrmsr() to not be inlined to prevent the labels,
* wrmsr_start and wrmsr_end, from being defined multiple times.
*/
-static noinline void test_wrmsr(uint32_t msr, uint64_t value)
+static noinline void test_wrmsr(u32 msr, u64 value)
{
- uint32_t a = value;
- uint32_t d = value >> 32;
+ u32 a = value;
+ u32 d = value >> 32;
guest_exception_count = 0;
@@ -176,26 +176,26 @@ extern char wrmsr_start, wrmsr_end;
* Note: Force test_em_rdmsr() to not be inlined to prevent the labels,
* rdmsr_start and rdmsr_end, from being defined multiple times.
*/
-static noinline uint64_t test_em_rdmsr(uint32_t msr)
+static noinline u64 test_em_rdmsr(u32 msr)
{
- uint32_t a, d;
+ u32 a, d;
guest_exception_count = 0;
__asm__ __volatile__(KVM_FEP "em_rdmsr_start: rdmsr; em_rdmsr_end:" :
"=a"(a), "=d"(d) : "c"(msr) : "memory");
- return a | ((uint64_t) d << 32);
+ return a | ((u64)d << 32);
}
/*
* Note: Force test_em_wrmsr() to not be inlined to prevent the labels,
* wrmsr_start and wrmsr_end, from being defined multiple times.
*/
-static noinline void test_em_wrmsr(uint32_t msr, uint64_t value)
+static noinline void test_em_wrmsr(u32 msr, u64 value)
{
- uint32_t a = value;
- uint32_t d = value >> 32;
+ u32 a = value;
+ u32 d = value >> 32;
guest_exception_count = 0;
@@ -208,7 +208,7 @@ extern char em_wrmsr_start, em_wrmsr_end;
static void guest_code_filter_allow(void)
{
- uint64_t data;
+ u64 data;
/*
* Test userspace intercepting rdmsr / wrmsr for MSR_IA32_XSS.
@@ -328,7 +328,7 @@ static void guest_code_filter_deny(void)
static void guest_code_permission_bitmap(void)
{
- uint64_t data;
+ u64 data;
data = test_rdmsr(MSR_FS_BASE);
GUEST_ASSERT(data == MSR_FS_BASE);
@@ -343,6 +343,12 @@ static void guest_code_permission_bitmap(void)
data = test_rdmsr(MSR_GS_BASE);
GUEST_ASSERT(data == MSR_GS_BASE);
+ /* Access the MSRs again to ensure KVM has disabled interception.*/
+ data = test_rdmsr(MSR_FS_BASE);
+ GUEST_ASSERT(data != MSR_FS_BASE);
+ data = test_rdmsr(MSR_GS_BASE);
+ GUEST_ASSERT(data != MSR_GS_BASE);
+
GUEST_DONE();
}
@@ -385,7 +391,7 @@ static void check_for_guest_assert(struct kvm_vcpu *vcpu)
}
}
-static void process_rdmsr(struct kvm_vcpu *vcpu, uint32_t msr_index)
+static void process_rdmsr(struct kvm_vcpu *vcpu, u32 msr_index)
{
struct kvm_run *run = vcpu->run;
@@ -417,7 +423,7 @@ static void process_rdmsr(struct kvm_vcpu *vcpu, uint32_t msr_index)
}
}
-static void process_wrmsr(struct kvm_vcpu *vcpu, uint32_t msr_index)
+static void process_wrmsr(struct kvm_vcpu *vcpu, u32 msr_index)
{
struct kvm_run *run = vcpu->run;
@@ -458,7 +464,7 @@ static void process_ucall_done(struct kvm_vcpu *vcpu)
uc.cmd, UCALL_DONE);
}
-static uint64_t process_ucall(struct kvm_vcpu *vcpu)
+static u64 process_ucall(struct kvm_vcpu *vcpu)
{
struct ucall uc = {};
@@ -483,20 +489,20 @@ static uint64_t process_ucall(struct kvm_vcpu *vcpu)
}
static void run_guest_then_process_rdmsr(struct kvm_vcpu *vcpu,
- uint32_t msr_index)
+ u32 msr_index)
{
vcpu_run(vcpu);
process_rdmsr(vcpu, msr_index);
}
static void run_guest_then_process_wrmsr(struct kvm_vcpu *vcpu,
- uint32_t msr_index)
+ u32 msr_index)
{
vcpu_run(vcpu);
process_wrmsr(vcpu, msr_index);
}
-static uint64_t run_guest_then_process_ucall(struct kvm_vcpu *vcpu)
+static u64 run_guest_then_process_ucall(struct kvm_vcpu *vcpu)
{
vcpu_run(vcpu);
return process_ucall(vcpu);
@@ -513,7 +519,7 @@ KVM_ONE_VCPU_TEST_SUITE(user_msr);
KVM_ONE_VCPU_TEST(user_msr, msr_filter_allow, guest_code_filter_allow)
{
struct kvm_vm *vm = vcpu->vm;
- uint64_t cmd;
+ u64 cmd;
int rc;
rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR);
@@ -682,6 +688,8 @@ KVM_ONE_VCPU_TEST(user_msr, msr_permission_bitmap, guest_code_permission_bitmap)
"Expected ucall state to be UCALL_SYNC.");
vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_gs);
run_guest_then_process_rdmsr(vcpu, MSR_GS_BASE);
+
+ vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_allow);
run_guest_then_process_ucall_done(vcpu);
}
@@ -724,7 +732,7 @@ static void run_msr_filter_flag_test(struct kvm_vm *vm)
.flags = KVM_MSR_FILTER_READ,
.nmsrs = 1,
.base = 0,
- .bitmap = (uint8_t *)&deny_bits,
+ .bitmap = (u8 *)&deny_bits,
},
},
};
diff --git a/tools/testing/selftests/kvm/x86/vmx_apic_access_test.c b/tools/testing/selftests/kvm/x86/vmx_apic_access_test.c
index a81a24761aac..1720113eae79 100644
--- a/tools/testing/selftests/kvm/x86/vmx_apic_access_test.c
+++ b/tools/testing/selftests/kvm/x86/vmx_apic_access_test.c
@@ -38,7 +38,7 @@ static void l1_guest_code(struct vmx_pages *vmx_pages, unsigned long high_gpa)
{
#define L2_GUEST_STACK_SIZE 64
unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
- uint32_t control;
+ u32 control;
GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
GUEST_ASSERT(load_vmcs(vmx_pages));
@@ -72,7 +72,7 @@ static void l1_guest_code(struct vmx_pages *vmx_pages, unsigned long high_gpa)
int main(int argc, char *argv[])
{
unsigned long apic_access_addr = ~0ul;
- vm_vaddr_t vmx_pages_gva;
+ gva_t vmx_pages_gva;
unsigned long high_gpa;
struct vmx_pages *vmx;
bool done = false;
diff --git a/tools/testing/selftests/kvm/x86/vmx_apicv_updates_test.c b/tools/testing/selftests/kvm/x86/vmx_apicv_updates_test.c
new file mode 100644
index 000000000000..80a4fd1e5bbb
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/vmx_apicv_updates_test.c
@@ -0,0 +1,155 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+#define GOOD_IPI_VECTOR 0xe0
+#define BAD_IPI_VECTOR 0xf0
+
+static volatile int good_ipis_received;
+
+static void good_ipi_handler(struct ex_regs *regs)
+{
+ good_ipis_received++;
+}
+
+static void bad_ipi_handler(struct ex_regs *regs)
+{
+ GUEST_FAIL("Received \"bad\" IPI; ICR MMIO write should have been ignored");
+}
+
+static void l2_guest_code(void)
+{
+ x2apic_enable();
+ vmcall();
+
+ xapic_enable();
+ xapic_write_reg(APIC_ID, 1 << 24);
+ vmcall();
+}
+
+static void l1_guest_code(struct vmx_pages *vmx_pages)
+{
+#define L2_GUEST_STACK_SIZE 64
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ u32 control;
+
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+ GUEST_ASSERT(load_vmcs(vmx_pages));
+
+ /* Prepare the VMCS for L2 execution. */
+ prepare_vmcs(vmx_pages, l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+ control = vmreadz(CPU_BASED_VM_EXEC_CONTROL);
+ control |= CPU_BASED_USE_MSR_BITMAPS;
+ vmwrite(CPU_BASED_VM_EXEC_CONTROL, control);
+
+ /* Modify APIC ID to coerce KVM into inhibiting APICv. */
+ xapic_enable();
+ xapic_write_reg(APIC_ID, 1 << 24);
+
+ /*
+ * Generate+receive an IRQ without doing EOI to get an IRQ set in vISR
+ * but not SVI. APICv should be inhibited due to running with a
+ * modified APIC ID.
+ */
+ xapic_write_reg(APIC_ICR, APIC_DEST_SELF | APIC_DM_FIXED | GOOD_IPI_VECTOR);
+ GUEST_ASSERT_EQ(xapic_read_reg(APIC_ID), 1 << 24);
+
+ /* Enable IRQs and verify the IRQ was received. */
+ sti_nop();
+ GUEST_ASSERT_EQ(good_ipis_received, 1);
+
+ /*
+ * Run L2 to switch to x2APIC mode, which in turn will uninhibit APICv,
+ * as KVM should force the APIC ID back to its default.
+ */
+ GUEST_ASSERT(!vmlaunch());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+ vmwrite(GUEST_RIP, vmreadz(GUEST_RIP) + vmreadz(VM_EXIT_INSTRUCTION_LEN));
+ GUEST_ASSERT(rdmsr(MSR_IA32_APICBASE) & MSR_IA32_APICBASE_EXTD);
+
+ /*
+ * Scribble the APIC access page to verify KVM disabled xAPIC
+ * virtualization in vmcs01, and to verify that KVM flushes L1's TLB
+ * when L2 switches back to accelerated xAPIC mode.
+ */
+ xapic_write_reg(APIC_ICR2, 0xdeadbeefu);
+ xapic_write_reg(APIC_ICR, APIC_DEST_SELF | APIC_DM_FIXED | BAD_IPI_VECTOR);
+
+ /*
+ * Verify the IRQ is still in-service and emit an EOI to verify KVM
+ * propagates the highest vISR vector to SVI when APICv is activated
+ * (and does so even if APICv was uninhibited while L2 was active).
+ */
+ GUEST_ASSERT_EQ(x2apic_read_reg(APIC_ISR + APIC_VECTOR_TO_REG_OFFSET(GOOD_IPI_VECTOR)),
+ BIT(APIC_VECTOR_TO_BIT_NUMBER(GOOD_IPI_VECTOR)));
+ x2apic_write_reg(APIC_EOI, 0);
+ GUEST_ASSERT_EQ(x2apic_read_reg(APIC_ISR + APIC_VECTOR_TO_REG_OFFSET(GOOD_IPI_VECTOR)), 0);
+
+ /*
+ * Run L2 one more time to switch back to xAPIC mode to verify that KVM
+ * handles the x2APIC => xAPIC transition and inhibits APICv while L2
+ * is active.
+ */
+ GUEST_ASSERT(!vmresume());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+ GUEST_ASSERT(!(rdmsr(MSR_IA32_APICBASE) & MSR_IA32_APICBASE_EXTD));
+
+ xapic_write_reg(APIC_ICR, APIC_DEST_SELF | APIC_DM_FIXED | GOOD_IPI_VECTOR);
+ /* Re-enable IRQs, as VM-Exit clears RFLAGS.IF. */
+ sti_nop();
+ GUEST_ASSERT_EQ(good_ipis_received, 2);
+
+ GUEST_ASSERT_EQ(xapic_read_reg(APIC_ISR + APIC_VECTOR_TO_REG_OFFSET(GOOD_IPI_VECTOR)),
+ BIT(APIC_VECTOR_TO_BIT_NUMBER(GOOD_IPI_VECTOR)));
+ xapic_write_reg(APIC_EOI, 0);
+ GUEST_ASSERT_EQ(xapic_read_reg(APIC_ISR + APIC_VECTOR_TO_REG_OFFSET(GOOD_IPI_VECTOR)), 0);
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ gva_t vmx_pages_gva;
+ struct vmx_pages *vmx;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct ucall uc;
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+
+ vmx = vcpu_alloc_vmx(vm, &vmx_pages_gva);
+ prepare_virtualize_apic_accesses(vmx, vm);
+ vcpu_args_set(vcpu, 1, vmx_pages_gva);
+
+ virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
+ vm_install_exception_handler(vm, BAD_IPI_VECTOR, bad_ipi_handler);
+ vm_install_exception_handler(vm, GOOD_IPI_VECTOR, good_ipi_handler);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ /* NOT REACHED */
+ case UCALL_DONE:
+ break;
+ default:
+ TEST_FAIL("Unexpected ucall %lu", uc.cmd);
+ }
+
+ /*
+ * Verify at least two IRQs were injected. Unfortunately, KVM counts
+ * re-injected IRQs (e.g. if delivering the IRQ hits an EPT violation),
+ * so being more precise isn't possible given the current stats.
+ */
+ TEST_ASSERT(vcpu_get_stat(vcpu, irq_injections) >= 2,
+ "Wanted at least 2 IRQ injections, got %lu\n",
+ vcpu_get_stat(vcpu, irq_injections));
+
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c b/tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c
deleted file mode 100644
index fa512d033205..000000000000
--- a/tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c
+++ /dev/null
@@ -1,179 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * KVM dirty page logging test
- *
- * Copyright (C) 2018, Red Hat, Inc.
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <linux/bitmap.h>
-#include <linux/bitops.h>
-
-#include "test_util.h"
-#include "kvm_util.h"
-#include "processor.h"
-#include "vmx.h"
-
-/* The memory slot index to track dirty pages */
-#define TEST_MEM_SLOT_INDEX 1
-#define TEST_MEM_PAGES 3
-
-/* L1 guest test virtual memory offset */
-#define GUEST_TEST_MEM 0xc0000000
-
-/* L2 guest test virtual memory offset */
-#define NESTED_TEST_MEM1 0xc0001000
-#define NESTED_TEST_MEM2 0xc0002000
-
-static void l2_guest_code(u64 *a, u64 *b)
-{
- READ_ONCE(*a);
- WRITE_ONCE(*a, 1);
- GUEST_SYNC(true);
- GUEST_SYNC(false);
-
- WRITE_ONCE(*b, 1);
- GUEST_SYNC(true);
- WRITE_ONCE(*b, 1);
- GUEST_SYNC(true);
- GUEST_SYNC(false);
-
- /* Exit to L1 and never come back. */
- vmcall();
-}
-
-static void l2_guest_code_ept_enabled(void)
-{
- l2_guest_code((u64 *)NESTED_TEST_MEM1, (u64 *)NESTED_TEST_MEM2);
-}
-
-static void l2_guest_code_ept_disabled(void)
-{
- /* Access the same L1 GPAs as l2_guest_code_ept_enabled() */
- l2_guest_code((u64 *)GUEST_TEST_MEM, (u64 *)GUEST_TEST_MEM);
-}
-
-void l1_guest_code(struct vmx_pages *vmx)
-{
-#define L2_GUEST_STACK_SIZE 64
- unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
- void *l2_rip;
-
- GUEST_ASSERT(vmx->vmcs_gpa);
- GUEST_ASSERT(prepare_for_vmx_operation(vmx));
- GUEST_ASSERT(load_vmcs(vmx));
-
- if (vmx->eptp_gpa)
- l2_rip = l2_guest_code_ept_enabled;
- else
- l2_rip = l2_guest_code_ept_disabled;
-
- prepare_vmcs(vmx, l2_rip, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
-
- GUEST_SYNC(false);
- GUEST_ASSERT(!vmlaunch());
- GUEST_SYNC(false);
- GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
- GUEST_DONE();
-}
-
-static void test_vmx_dirty_log(bool enable_ept)
-{
- vm_vaddr_t vmx_pages_gva = 0;
- struct vmx_pages *vmx;
- unsigned long *bmap;
- uint64_t *host_test_mem;
-
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm;
- struct ucall uc;
- bool done = false;
-
- pr_info("Nested EPT: %s\n", enable_ept ? "enabled" : "disabled");
-
- /* Create VM */
- vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
- vmx = vcpu_alloc_vmx(vm, &vmx_pages_gva);
- vcpu_args_set(vcpu, 1, vmx_pages_gva);
-
- /* Add an extra memory slot for testing dirty logging */
- vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
- GUEST_TEST_MEM,
- TEST_MEM_SLOT_INDEX,
- TEST_MEM_PAGES,
- KVM_MEM_LOG_DIRTY_PAGES);
-
- /*
- * Add an identity map for GVA range [0xc0000000, 0xc0002000). This
- * affects both L1 and L2. However...
- */
- virt_map(vm, GUEST_TEST_MEM, GUEST_TEST_MEM, TEST_MEM_PAGES);
-
- /*
- * ... pages in the L2 GPA range [0xc0001000, 0xc0003000) will map to
- * 0xc0000000.
- *
- * Note that prepare_eptp should be called only L1's GPA map is done,
- * meaning after the last call to virt_map.
- *
- * When EPT is disabled, the L2 guest code will still access the same L1
- * GPAs as the EPT enabled case.
- */
- if (enable_ept) {
- prepare_eptp(vmx, vm, 0);
- nested_map_memslot(vmx, vm, 0);
- nested_map(vmx, vm, NESTED_TEST_MEM1, GUEST_TEST_MEM, 4096);
- nested_map(vmx, vm, NESTED_TEST_MEM2, GUEST_TEST_MEM, 4096);
- }
-
- bmap = bitmap_zalloc(TEST_MEM_PAGES);
- host_test_mem = addr_gpa2hva(vm, GUEST_TEST_MEM);
-
- while (!done) {
- memset(host_test_mem, 0xaa, TEST_MEM_PAGES * 4096);
- vcpu_run(vcpu);
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
-
- switch (get_ucall(vcpu, &uc)) {
- case UCALL_ABORT:
- REPORT_GUEST_ASSERT(uc);
- /* NOT REACHED */
- case UCALL_SYNC:
- /*
- * The nested guest wrote at offset 0x1000 in the memslot, but the
- * dirty bitmap must be filled in according to L1 GPA, not L2.
- */
- kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap);
- if (uc.args[1]) {
- TEST_ASSERT(test_bit(0, bmap), "Page 0 incorrectly reported clean");
- TEST_ASSERT(host_test_mem[0] == 1, "Page 0 not written by guest");
- } else {
- TEST_ASSERT(!test_bit(0, bmap), "Page 0 incorrectly reported dirty");
- TEST_ASSERT(host_test_mem[0] == 0xaaaaaaaaaaaaaaaaULL, "Page 0 written by guest");
- }
-
- TEST_ASSERT(!test_bit(1, bmap), "Page 1 incorrectly reported dirty");
- TEST_ASSERT(host_test_mem[4096 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 1 written by guest");
- TEST_ASSERT(!test_bit(2, bmap), "Page 2 incorrectly reported dirty");
- TEST_ASSERT(host_test_mem[8192 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 2 written by guest");
- break;
- case UCALL_DONE:
- done = true;
- break;
- default:
- TEST_FAIL("Unknown ucall %lu", uc.cmd);
- }
- }
-}
-
-int main(int argc, char *argv[])
-{
- TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
-
- test_vmx_dirty_log(/*enable_ept=*/false);
-
- if (kvm_cpu_has_ept())
- test_vmx_dirty_log(/*enable_ept=*/true);
-
- return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86/vmx_exception_with_invalid_guest_state.c b/tools/testing/selftests/kvm/x86/vmx_exception_with_invalid_guest_state.c
index 3fd6eceab46f..2cae86d9d5e2 100644
--- a/tools/testing/selftests/kvm/x86/vmx_exception_with_invalid_guest_state.c
+++ b/tools/testing/selftests/kvm/x86/vmx_exception_with_invalid_guest_state.c
@@ -110,7 +110,7 @@ int main(int argc, char *argv[])
struct kvm_vm *vm;
TEST_REQUIRE(host_cpu_is_intel);
- TEST_REQUIRE(!vm_is_unrestricted_guest(NULL));
+ TEST_REQUIRE(!kvm_is_unrestricted_guest_enabled());
vm = vm_create_with_one_vcpu(&vcpu, guest_code);
get_set_sigalrm_vcpu(vcpu);
diff --git a/tools/testing/selftests/kvm/x86/vmx_invalid_nested_guest_state.c b/tools/testing/selftests/kvm/x86/vmx_invalid_nested_guest_state.c
index a100ee5f0009..a2eaceed9ad5 100644
--- a/tools/testing/selftests/kvm/x86/vmx_invalid_nested_guest_state.c
+++ b/tools/testing/selftests/kvm/x86/vmx_invalid_nested_guest_state.c
@@ -52,7 +52,7 @@ static void l1_guest_code(struct vmx_pages *vmx_pages)
int main(int argc, char *argv[])
{
- vm_vaddr_t vmx_pages_gva;
+ gva_t vmx_pages_gva;
struct kvm_sregs sregs;
struct kvm_vcpu *vcpu;
struct kvm_run *run;
diff --git a/tools/testing/selftests/kvm/x86/vmx_msrs_test.c b/tools/testing/selftests/kvm/x86/vmx_msrs_test.c
index 90720b6205f4..c1e8632a1bb6 100644
--- a/tools/testing/selftests/kvm/x86/vmx_msrs_test.c
+++ b/tools/testing/selftests/kvm/x86/vmx_msrs_test.c
@@ -12,11 +12,10 @@
#include "kvm_util.h"
#include "vmx.h"
-static void vmx_fixed1_msr_test(struct kvm_vcpu *vcpu, uint32_t msr_index,
- uint64_t mask)
+static void vmx_fixed1_msr_test(struct kvm_vcpu *vcpu, u32 msr_index, u64 mask)
{
- uint64_t val = vcpu_get_msr(vcpu, msr_index);
- uint64_t bit;
+ u64 val = vcpu_get_msr(vcpu, msr_index);
+ u64 bit;
mask &= val;
@@ -26,11 +25,10 @@ static void vmx_fixed1_msr_test(struct kvm_vcpu *vcpu, uint32_t msr_index,
}
}
-static void vmx_fixed0_msr_test(struct kvm_vcpu *vcpu, uint32_t msr_index,
- uint64_t mask)
+static void vmx_fixed0_msr_test(struct kvm_vcpu *vcpu, u32 msr_index, u64 mask)
{
- uint64_t val = vcpu_get_msr(vcpu, msr_index);
- uint64_t bit;
+ u64 val = vcpu_get_msr(vcpu, msr_index);
+ u64 bit;
mask = ~mask | val;
@@ -40,7 +38,7 @@ static void vmx_fixed0_msr_test(struct kvm_vcpu *vcpu, uint32_t msr_index,
}
}
-static void vmx_fixed0and1_msr_test(struct kvm_vcpu *vcpu, uint32_t msr_index)
+static void vmx_fixed0and1_msr_test(struct kvm_vcpu *vcpu, u32 msr_index)
{
vmx_fixed0_msr_test(vcpu, msr_index, GENMASK_ULL(31, 0));
vmx_fixed1_msr_test(vcpu, msr_index, GENMASK_ULL(63, 32));
@@ -68,10 +66,10 @@ static void vmx_save_restore_msrs_test(struct kvm_vcpu *vcpu)
}
static void __ia32_feature_control_msr_test(struct kvm_vcpu *vcpu,
- uint64_t msr_bit,
+ u64 msr_bit,
struct kvm_x86_cpu_feature feature)
{
- uint64_t val;
+ u64 val;
vcpu_clear_cpuid_feature(vcpu, feature);
@@ -90,7 +88,7 @@ static void __ia32_feature_control_msr_test(struct kvm_vcpu *vcpu,
static void ia32_feature_control_msr_test(struct kvm_vcpu *vcpu)
{
- uint64_t supported_bits = FEAT_CTL_LOCKED |
+ u64 supported_bits = FEAT_CTL_LOCKED |
FEAT_CTL_VMX_ENABLED_INSIDE_SMX |
FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX |
FEAT_CTL_SGX_LC_ENABLED |
diff --git a/tools/testing/selftests/kvm/x86/vmx_nested_la57_state_test.c b/tools/testing/selftests/kvm/x86/vmx_nested_la57_state_test.c
new file mode 100644
index 000000000000..f13dee317383
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/vmx_nested_la57_state_test.c
@@ -0,0 +1,132 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2025, Google LLC.
+ *
+ * Test KVM's ability to save and restore nested state when the L1 guest
+ * is using 5-level paging and the L2 guest is using 4-level paging.
+ *
+ * This test would have failed prior to commit 9245fd6b8531 ("KVM: x86:
+ * model canonical checks more precisely").
+ */
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+#define LA57_GS_BASE 0xff2bc0311fb00000ull
+
+static void l2_guest_code(void)
+{
+ /*
+ * Sync with L0 to trigger save/restore. After
+ * resuming, execute VMCALL to exit back to L1.
+ */
+ GUEST_SYNC(1);
+ vmcall();
+}
+
+static void l1_guest_code(struct vmx_pages *vmx_pages)
+{
+#define L2_GUEST_STACK_SIZE 64
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ u64 guest_cr4;
+ gpa_t pml5_pa, pml4_pa;
+ u64 *pml5;
+ u64 exit_reason;
+
+ /* Set GS_BASE to a value that is only canonical with LA57. */
+ wrmsr(MSR_GS_BASE, LA57_GS_BASE);
+ GUEST_ASSERT(rdmsr(MSR_GS_BASE) == LA57_GS_BASE);
+
+ GUEST_ASSERT(vmx_pages->vmcs_gpa);
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+ GUEST_ASSERT(load_vmcs(vmx_pages));
+
+ prepare_vmcs(vmx_pages, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ /*
+ * Set up L2 with a 4-level page table by pointing its CR3 to
+ * L1's first PML4 table and clearing CR4.LA57. This creates
+ * the CR4.LA57 mismatch that exercises the bug.
+ */
+ pml5_pa = get_cr3() & PHYSICAL_PAGE_MASK;
+ pml5 = (u64 *)pml5_pa;
+ pml4_pa = pml5[0] & PHYSICAL_PAGE_MASK;
+ vmwrite(GUEST_CR3, pml4_pa);
+
+ guest_cr4 = vmreadz(GUEST_CR4);
+ guest_cr4 &= ~X86_CR4_LA57;
+ vmwrite(GUEST_CR4, guest_cr4);
+
+ GUEST_ASSERT(!vmlaunch());
+
+ exit_reason = vmreadz(VM_EXIT_REASON);
+ GUEST_ASSERT(exit_reason == EXIT_REASON_VMCALL);
+}
+
+void guest_code(struct vmx_pages *vmx_pages)
+{
+ l1_guest_code(vmx_pages);
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ gva_t vmx_pages_gva = 0;
+ struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu;
+ struct kvm_x86_state *state;
+ struct ucall uc;
+ int stage;
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_LA57));
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE));
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ /*
+ * L1 needs to read its own PML5 table to set up L2. Identity map
+ * the PML5 table to facilitate this.
+ */
+ virt_map(vm, vm->mmu.pgd, vm->mmu.pgd, 1);
+
+ vcpu_alloc_vmx(vm, &vmx_pages_gva);
+ vcpu_args_set(vcpu, 1, vmx_pages_gva);
+
+ for (stage = 1;; stage++) {
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ /* NOT REACHED */
+ case UCALL_SYNC:
+ break;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+
+ TEST_ASSERT(uc.args[1] == stage,
+ "Expected stage %d, got stage %lu", stage, (ulong)uc.args[1]);
+ if (stage == 1) {
+ pr_info("L2 is active; performing save/restore.\n");
+ state = vcpu_save_state(vcpu);
+
+ kvm_vm_release(vm);
+
+ /* Restore state in a new VM. */
+ vcpu = vm_recreate_with_one_vcpu(vm);
+ vcpu_load_state(vcpu, state);
+ kvm_x86_state_cleanup(state);
+ }
+ }
+
+done:
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/vmx_pmu_caps_test.c b/tools/testing/selftests/kvm/x86/vmx_pmu_caps_test.c
index a1f5ff45d518..d004108dbdc6 100644
--- a/tools/testing/selftests/kvm/x86/vmx_pmu_caps_test.c
+++ b/tools/testing/selftests/kvm/x86/vmx_pmu_caps_test.c
@@ -29,7 +29,7 @@ static union perf_capabilities {
u64 pebs_baseline:1;
u64 perf_metrics:1;
u64 pebs_output_pt_available:1;
- u64 anythread_deprecated:1;
+ u64 pebs_timing_info:1;
};
u64 capabilities;
} host_cap;
@@ -44,6 +44,7 @@ static const union perf_capabilities immutable_caps = {
.pebs_arch_reg = 1,
.pebs_format = -1,
.pebs_baseline = 1,
+ .pebs_timing_info = 1,
};
static const union perf_capabilities format_caps = {
@@ -51,16 +52,16 @@ static const union perf_capabilities format_caps = {
.pebs_format = -1,
};
-static void guest_test_perf_capabilities_gp(uint64_t val)
+static void guest_test_perf_capabilities_gp(u64 val)
{
- uint8_t vector = wrmsr_safe(MSR_IA32_PERF_CAPABILITIES, val);
+ u8 vector = wrmsr_safe(MSR_IA32_PERF_CAPABILITIES, val);
__GUEST_ASSERT(vector == GP_VECTOR,
- "Expected #GP for value '0x%lx', got vector '0x%x'",
- val, vector);
+ "Expected #GP for value '0x%lx', got %s",
+ val, ex_str(vector));
}
-static void guest_code(uint64_t current_val)
+static void guest_code(u64 current_val)
{
int i;
@@ -128,7 +129,7 @@ KVM_ONE_VCPU_TEST(vmx_pmu_caps, basic_perf_capabilities, guest_code)
KVM_ONE_VCPU_TEST(vmx_pmu_caps, fungible_perf_capabilities, guest_code)
{
- const uint64_t fungible_caps = host_cap.capabilities & ~immutable_caps.capabilities;
+ const u64 fungible_caps = host_cap.capabilities & ~immutable_caps.capabilities;
int bit;
for_each_set_bit(bit, &fungible_caps, 64) {
@@ -147,7 +148,7 @@ KVM_ONE_VCPU_TEST(vmx_pmu_caps, fungible_perf_capabilities, guest_code)
*/
KVM_ONE_VCPU_TEST(vmx_pmu_caps, immutable_perf_capabilities, guest_code)
{
- const uint64_t reserved_caps = (~host_cap.capabilities |
+ const u64 reserved_caps = (~host_cap.capabilities |
immutable_caps.capabilities) &
~format_caps.capabilities;
union perf_capabilities val = host_cap;
@@ -209,7 +210,7 @@ KVM_ONE_VCPU_TEST(vmx_pmu_caps, lbr_perf_capabilities, guest_code)
KVM_ONE_VCPU_TEST(vmx_pmu_caps, perf_capabilities_unsupported, guest_code)
{
- uint64_t val;
+ u64 val;
int i, r;
vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
diff --git a/tools/testing/selftests/kvm/x86/vmx_preemption_timer_test.c b/tools/testing/selftests/kvm/x86/vmx_preemption_timer_test.c
index 00dd2ac07a61..1b7b6ba23de7 100644
--- a/tools/testing/selftests/kvm/x86/vmx_preemption_timer_test.c
+++ b/tools/testing/selftests/kvm/x86/vmx_preemption_timer_test.c
@@ -152,7 +152,7 @@ void guest_code(struct vmx_pages *vmx_pages)
int main(int argc, char *argv[])
{
- vm_vaddr_t vmx_pages_gva = 0;
+ gva_t vmx_pages_gva = 0;
struct kvm_regs regs1, regs2;
struct kvm_vm *vm;
diff --git a/tools/testing/selftests/kvm/x86/xapic_ipi_test.c b/tools/testing/selftests/kvm/x86/xapic_ipi_test.c
index a76078a08ff8..39ce9a9369f5 100644
--- a/tools/testing/selftests/kvm/x86/xapic_ipi_test.c
+++ b/tools/testing/selftests/kvm/x86/xapic_ipi_test.c
@@ -48,20 +48,20 @@
* Incremented in the IPI handler. Provides evidence to the sender that the IPI
* arrived at the destination
*/
-static volatile uint64_t ipis_rcvd;
+static volatile u64 ipis_rcvd;
/* Data struct shared between host main thread and vCPUs */
struct test_data_page {
- uint32_t halter_apic_id;
- volatile uint64_t hlt_count;
- volatile uint64_t wake_count;
- uint64_t ipis_sent;
- uint64_t migrations_attempted;
- uint64_t migrations_completed;
- uint32_t icr;
- uint32_t icr2;
- uint32_t halter_tpr;
- uint32_t halter_ppr;
+ u32 halter_apic_id;
+ volatile u64 hlt_count;
+ volatile u64 wake_count;
+ u64 ipis_sent;
+ u64 migrations_attempted;
+ u64 migrations_completed;
+ u32 icr;
+ u32 icr2;
+ u32 halter_tpr;
+ u32 halter_ppr;
/*
* Record local version register as a cross-check that APIC access
@@ -69,19 +69,19 @@ struct test_data_page {
* arch/x86/kvm/lapic.c). If test is failing, check that values match
* to determine whether APIC access exits are working.
*/
- uint32_t halter_lvr;
+ u32 halter_lvr;
};
struct thread_params {
struct test_data_page *data;
struct kvm_vcpu *vcpu;
- uint64_t *pipis_rcvd; /* host address of ipis_rcvd global */
+ u64 *pipis_rcvd; /* host address of ipis_rcvd global */
};
void verify_apic_base_addr(void)
{
- uint64_t msr = rdmsr(MSR_IA32_APICBASE);
- uint64_t base = GET_APIC_BASE(msr);
+ u64 msr = rdmsr(MSR_IA32_APICBASE);
+ u64 base = GET_APIC_BASE(msr);
GUEST_ASSERT(base == APIC_DEFAULT_GPA);
}
@@ -106,7 +106,8 @@ static void halter_guest_code(struct test_data_page *data)
data->halter_tpr = xapic_read_reg(APIC_TASKPRI);
data->halter_ppr = xapic_read_reg(APIC_PROCPRI);
data->hlt_count++;
- asm volatile("sti; hlt; cli");
+ safe_halt();
+ cli();
data->wake_count++;
}
}
@@ -124,12 +125,12 @@ static void guest_ipi_handler(struct ex_regs *regs)
static void sender_guest_code(struct test_data_page *data)
{
- uint64_t last_wake_count;
- uint64_t last_hlt_count;
- uint64_t last_ipis_rcvd_count;
- uint32_t icr_val;
- uint32_t icr2_val;
- uint64_t tsc_start;
+ u64 last_wake_count;
+ u64 last_hlt_count;
+ u64 last_ipis_rcvd_count;
+ u32 icr_val;
+ u32 icr2_val;
+ u64 tsc_start;
verify_apic_base_addr();
xapic_enable();
@@ -247,7 +248,7 @@ static void cancel_join_vcpu_thread(pthread_t thread, struct kvm_vcpu *vcpu)
}
void do_migrations(struct test_data_page *data, int run_secs, int delay_usecs,
- uint64_t *pipis_rcvd)
+ u64 *pipis_rcvd)
{
long pages_not_moved;
unsigned long nodemask = 0;
@@ -255,20 +256,19 @@ void do_migrations(struct test_data_page *data, int run_secs, int delay_usecs,
int nodes = 0;
time_t start_time, last_update, now;
time_t interval_secs = 1;
- int i, r;
+ int i;
int from, to;
unsigned long bit;
- uint64_t hlt_count;
- uint64_t wake_count;
- uint64_t ipis_sent;
+ u64 hlt_count;
+ u64 wake_count;
+ u64 ipis_sent;
fprintf(stderr, "Calling migrate_pages every %d microseconds\n",
delay_usecs);
/* Get set of first 64 numa nodes available */
- r = get_mempolicy(NULL, &nodemask, sizeof(nodemask) * 8,
+ kvm_get_mempolicy(NULL, &nodemask, sizeof(nodemask) * 8,
0, MPOL_F_MEMS_ALLOWED);
- TEST_ASSERT(r == 0, "get_mempolicy failed errno=%d", errno);
fprintf(stderr, "Numa nodes found amongst first %lu possible nodes "
"(each 1-bit indicates node is present): %#lx\n",
@@ -393,12 +393,12 @@ int main(int argc, char *argv[])
int run_secs = 0;
int delay_usecs = 0;
struct test_data_page *data;
- vm_vaddr_t test_data_page_vaddr;
+ gva_t test_data_page_gva;
bool migrate = false;
pthread_t threads[2];
struct thread_params params[2];
struct kvm_vm *vm;
- uint64_t *pipis_rcvd;
+ u64 *pipis_rcvd;
get_cmdline_args(argc, argv, &run_secs, &migrate, &delay_usecs);
if (run_secs <= 0)
@@ -414,16 +414,16 @@ int main(int argc, char *argv[])
params[1].vcpu = vm_vcpu_add(vm, 1, sender_guest_code);
- test_data_page_vaddr = vm_vaddr_alloc_page(vm);
- data = addr_gva2hva(vm, test_data_page_vaddr);
+ test_data_page_gva = vm_alloc_page(vm);
+ data = addr_gva2hva(vm, test_data_page_gva);
memset(data, 0, sizeof(*data));
params[0].data = data;
params[1].data = data;
- vcpu_args_set(params[0].vcpu, 1, test_data_page_vaddr);
- vcpu_args_set(params[1].vcpu, 1, test_data_page_vaddr);
+ vcpu_args_set(params[0].vcpu, 1, test_data_page_gva);
+ vcpu_args_set(params[1].vcpu, 1, test_data_page_gva);
- pipis_rcvd = (uint64_t *)addr_gva2hva(vm, (uint64_t)&ipis_rcvd);
+ pipis_rcvd = (u64 *)addr_gva2hva(vm, (u64)&ipis_rcvd);
params[0].pipis_rcvd = pipis_rcvd;
params[1].pipis_rcvd = pipis_rcvd;
@@ -465,6 +465,19 @@ int main(int argc, char *argv[])
cancel_join_vcpu_thread(threads[0], params[0].vcpu);
cancel_join_vcpu_thread(threads[1], params[1].vcpu);
+ /*
+ * If the host support Idle HLT, i.e. KVM *might* be using Idle HLT,
+ * then the number of HLT exits may be less than the number of HLTs
+ * that were executed, as Idle HLT elides the exit if the vCPU has an
+ * unmasked, pending IRQ (or NMI).
+ */
+ if (this_cpu_has(X86_FEATURE_IDLE_HLT))
+ TEST_ASSERT(data->hlt_count >= vcpu_get_stat(params[0].vcpu, halt_exits),
+ "HLT insns = %lu, HLT exits = %lu",
+ data->hlt_count, vcpu_get_stat(params[0].vcpu, halt_exits));
+ else
+ TEST_ASSERT_EQ(data->hlt_count, vcpu_get_stat(params[0].vcpu, halt_exits));
+
fprintf(stderr,
"Test successful after running for %d seconds.\n"
"Sending vCPU sent %lu IPIs to halting vCPU\n"
diff --git a/tools/testing/selftests/kvm/x86/xapic_state_test.c b/tools/testing/selftests/kvm/x86/xapic_state_test.c
index 88bcca188799..637bb90c1d93 100644
--- a/tools/testing/selftests/kvm/x86/xapic_state_test.c
+++ b/tools/testing/selftests/kvm/x86/xapic_state_test.c
@@ -18,12 +18,12 @@ struct xapic_vcpu {
static void xapic_guest_code(void)
{
- asm volatile("cli");
+ cli();
xapic_enable();
while (1) {
- uint64_t val = (u64)xapic_read_reg(APIC_IRR) |
+ u64 val = (u64)xapic_read_reg(APIC_IRR) |
(u64)xapic_read_reg(APIC_IRR + 0x10) << 32;
xapic_write_reg(APIC_ICR2, val >> 32);
@@ -38,12 +38,12 @@ static void xapic_guest_code(void)
static void x2apic_guest_code(void)
{
- asm volatile("cli");
+ cli();
x2apic_enable();
do {
- uint64_t val = x2apic_read_reg(APIC_IRR) |
+ u64 val = x2apic_read_reg(APIC_IRR) |
x2apic_read_reg(APIC_IRR + 0x10) << 32;
if (val & X2APIC_RSVD_BITS_MASK) {
@@ -56,12 +56,12 @@ static void x2apic_guest_code(void)
} while (1);
}
-static void ____test_icr(struct xapic_vcpu *x, uint64_t val)
+static void ____test_icr(struct xapic_vcpu *x, u64 val)
{
struct kvm_vcpu *vcpu = x->vcpu;
struct kvm_lapic_state xapic;
struct ucall uc;
- uint64_t icr;
+ u64 icr;
/*
* Tell the guest what ICR value to write. Use the IRR to pass info,
@@ -93,7 +93,7 @@ static void ____test_icr(struct xapic_vcpu *x, uint64_t val)
TEST_ASSERT_EQ(icr, val & ~APIC_ICR_BUSY);
}
-static void __test_icr(struct xapic_vcpu *x, uint64_t val)
+static void __test_icr(struct xapic_vcpu *x, u64 val)
{
/*
* The BUSY bit is reserved on both AMD and Intel, but only AMD treats
@@ -109,7 +109,7 @@ static void __test_icr(struct xapic_vcpu *x, uint64_t val)
static void test_icr(struct xapic_vcpu *x)
{
struct kvm_vcpu *vcpu = x->vcpu;
- uint64_t icr, i, j;
+ u64 icr, i, j;
icr = APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_FIXED;
for (i = 0; i <= 0xff; i++)
@@ -120,8 +120,8 @@ static void test_icr(struct xapic_vcpu *x)
__test_icr(x, icr | i);
/*
- * Send all flavors of IPIs to non-existent vCPUs. TODO: use number of
- * vCPUs, not vcpu.id + 1. Arbitrarily use vector 0xff.
+ * Send all flavors of IPIs to non-existent vCPUs. Arbitrarily use
+ * vector 0xff.
*/
icr = APIC_INT_ASSERT | 0xff;
for (i = 0; i < 0xff; i++) {
@@ -142,9 +142,9 @@ static void test_icr(struct xapic_vcpu *x)
__test_icr(x, -1ull & ~APIC_DM_FIXED_MASK);
}
-static void __test_apic_id(struct kvm_vcpu *vcpu, uint64_t apic_base)
+static void __test_apic_id(struct kvm_vcpu *vcpu, u64 apic_base)
{
- uint32_t apic_id, expected;
+ u32 apic_id, expected;
struct kvm_lapic_state xapic;
vcpu_set_msr(vcpu, MSR_IA32_APICBASE, apic_base);
@@ -170,9 +170,9 @@ static void __test_apic_id(struct kvm_vcpu *vcpu, uint64_t apic_base)
*/
static void test_apic_id(void)
{
- const uint32_t NR_VCPUS = 3;
+ const u32 NR_VCPUS = 3;
struct kvm_vcpu *vcpus[NR_VCPUS];
- uint64_t apic_base;
+ u64 apic_base;
struct kvm_vm *vm;
int i;
@@ -248,7 +248,7 @@ int main(int argc, char *argv[])
* drops writes, AMD does not). Account for the errata when checking
* that KVM reads back what was written.
*/
- x.has_xavic_errata = host_cpu_is_amd &&
+ x.has_xavic_errata = host_cpu_is_amd_compatible &&
get_kvm_amd_param_bool("avic");
vcpu_clear_cpuid_feature(x.vcpu, X86_FEATURE_X2APIC);
diff --git a/tools/testing/selftests/kvm/x86/xapic_tpr_test.c b/tools/testing/selftests/kvm/x86/xapic_tpr_test.c
new file mode 100644
index 000000000000..ab25db2235d5
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/xapic_tpr_test.c
@@ -0,0 +1,276 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <fcntl.h>
+#include <stdatomic.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <unistd.h>
+
+#include "apic.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "test_util.h"
+
+static bool is_x2apic;
+
+#define IRQ_VECTOR 0x20
+
+/* See also the comment at similar assertion in memslot_perf_test.c */
+static_assert(ATOMIC_INT_LOCK_FREE == 2, "atomic int is not lockless");
+
+static atomic_uint tpr_guest_irq_sync_val;
+
+static void tpr_guest_irq_sync_flag_reset(void)
+{
+ atomic_store_explicit(&tpr_guest_irq_sync_val, 0,
+ memory_order_release);
+}
+
+static unsigned int tpr_guest_irq_sync_val_get(void)
+{
+ return atomic_load_explicit(&tpr_guest_irq_sync_val,
+ memory_order_acquire);
+}
+
+static void tpr_guest_irq_sync_val_inc(void)
+{
+ atomic_fetch_add_explicit(&tpr_guest_irq_sync_val, 1,
+ memory_order_acq_rel);
+}
+
+static void tpr_guest_irq_handler_xapic(struct ex_regs *regs)
+{
+ tpr_guest_irq_sync_val_inc();
+
+ xapic_write_reg(APIC_EOI, 0);
+}
+
+static void tpr_guest_irq_handler_x2apic(struct ex_regs *regs)
+{
+ tpr_guest_irq_sync_val_inc();
+
+ x2apic_write_reg(APIC_EOI, 0);
+}
+
+static void tpr_guest_irq_queue(void)
+{
+ if (is_x2apic) {
+ x2apic_write_reg(APIC_SELF_IPI, IRQ_VECTOR);
+ } else {
+ u32 icr, icr2;
+
+ icr = APIC_DEST_SELF | APIC_DEST_PHYSICAL | APIC_DM_FIXED |
+ IRQ_VECTOR;
+ icr2 = 0;
+
+ xapic_write_reg(APIC_ICR2, icr2);
+ xapic_write_reg(APIC_ICR, icr);
+ }
+}
+
+static u8 tpr_guest_tpr_get(void)
+{
+ u32 taskpri;
+
+ if (is_x2apic)
+ taskpri = x2apic_read_reg(APIC_TASKPRI);
+ else
+ taskpri = xapic_read_reg(APIC_TASKPRI);
+
+ return GET_APIC_PRI(taskpri);
+}
+
+static u8 tpr_guest_ppr_get(void)
+{
+ u32 procpri;
+
+ if (is_x2apic)
+ procpri = x2apic_read_reg(APIC_PROCPRI);
+ else
+ procpri = xapic_read_reg(APIC_PROCPRI);
+
+ return GET_APIC_PRI(procpri);
+}
+
+static u8 tpr_guest_cr8_get(void)
+{
+ u64 cr8;
+
+ asm volatile ("mov %%cr8, %[cr8]\n\t" : [cr8] "=r"(cr8));
+
+ return cr8 & GENMASK(3, 0);
+}
+
+static void tpr_guest_check_tpr_ppr_cr8_equal(void)
+{
+ u8 tpr;
+
+ tpr = tpr_guest_tpr_get();
+
+ GUEST_ASSERT_EQ(tpr_guest_ppr_get(), tpr);
+ GUEST_ASSERT_EQ(tpr_guest_cr8_get(), tpr);
+}
+
+static void tpr_guest_code(void)
+{
+ cli();
+
+ if (is_x2apic)
+ x2apic_enable();
+ else
+ xapic_enable();
+
+ GUEST_ASSERT_EQ(tpr_guest_tpr_get(), 0);
+ tpr_guest_check_tpr_ppr_cr8_equal();
+
+ tpr_guest_irq_queue();
+
+ /* TPR = 0 but IRQ masked by IF=0, should not fire */
+ udelay(1000);
+ GUEST_ASSERT_EQ(tpr_guest_irq_sync_val_get(), 0);
+
+ sti();
+
+ /* IF=1 now, IRQ should fire */
+ while (tpr_guest_irq_sync_val_get() == 0)
+ cpu_relax();
+ GUEST_ASSERT_EQ(tpr_guest_irq_sync_val_get(), 1);
+
+ GUEST_SYNC(true);
+ tpr_guest_check_tpr_ppr_cr8_equal();
+
+ tpr_guest_irq_queue();
+
+ /* IRQ masked by barely high enough TPR now, should not fire */
+ udelay(1000);
+ GUEST_ASSERT_EQ(tpr_guest_irq_sync_val_get(), 1);
+
+ GUEST_SYNC(false);
+ tpr_guest_check_tpr_ppr_cr8_equal();
+
+ /* TPR barely low enough now to unmask IRQ, should fire */
+ while (tpr_guest_irq_sync_val_get() == 1)
+ cpu_relax();
+ GUEST_ASSERT_EQ(tpr_guest_irq_sync_val_get(), 2);
+
+ GUEST_DONE();
+}
+
+static u8 lapic_tpr_get(struct kvm_lapic_state *xapic)
+{
+ return GET_APIC_PRI(*((u32 *)&xapic->regs[APIC_TASKPRI]));
+}
+
+static void lapic_tpr_set(struct kvm_lapic_state *xapic, u8 val)
+{
+ u32 *taskpri = (u32 *)&xapic->regs[APIC_TASKPRI];
+
+ *taskpri = SET_APIC_PRI(*taskpri, val);
+}
+
+static u8 sregs_tpr(struct kvm_sregs *sregs)
+{
+ return sregs->cr8 & GENMASK(3, 0);
+}
+
+static void test_tpr_check_tpr_zero(struct kvm_vcpu *vcpu)
+{
+ struct kvm_lapic_state xapic;
+
+ vcpu_ioctl(vcpu, KVM_GET_LAPIC, &xapic);
+
+ TEST_ASSERT_EQ(lapic_tpr_get(&xapic), 0);
+}
+
+static void test_tpr_check_tpr_cr8_equal(struct kvm_vcpu *vcpu)
+{
+ struct kvm_sregs sregs;
+ struct kvm_lapic_state xapic;
+
+ vcpu_sregs_get(vcpu, &sregs);
+ vcpu_ioctl(vcpu, KVM_GET_LAPIC, &xapic);
+
+ TEST_ASSERT_EQ(sregs_tpr(&sregs), lapic_tpr_get(&xapic));
+}
+
+static void test_tpr_set_tpr_for_irq(struct kvm_vcpu *vcpu, bool mask)
+{
+ struct kvm_lapic_state xapic;
+ u8 tpr;
+
+ static_assert(IRQ_VECTOR >= 16, "invalid IRQ vector number");
+ tpr = IRQ_VECTOR / 16;
+ if (!mask)
+ tpr--;
+
+ vcpu_ioctl(vcpu, KVM_GET_LAPIC, &xapic);
+ lapic_tpr_set(&xapic, tpr);
+ vcpu_ioctl(vcpu, KVM_SET_LAPIC, &xapic);
+}
+
+static void test_tpr(bool __is_x2apic)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ bool done = false;
+
+ is_x2apic = __is_x2apic;
+
+ vm = vm_create_with_one_vcpu(&vcpu, tpr_guest_code);
+ if (is_x2apic) {
+ vm_install_exception_handler(vm, IRQ_VECTOR,
+ tpr_guest_irq_handler_x2apic);
+ } else {
+ vm_install_exception_handler(vm, IRQ_VECTOR,
+ tpr_guest_irq_handler_xapic);
+ vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_X2APIC);
+ virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
+ }
+
+ sync_global_to_guest(vcpu->vm, is_x2apic);
+
+ /* According to the SDM/APM the TPR value at reset is 0 */
+ test_tpr_check_tpr_zero(vcpu);
+ test_tpr_check_tpr_cr8_equal(vcpu);
+
+ tpr_guest_irq_sync_flag_reset();
+ sync_global_to_guest(vcpu->vm, tpr_guest_irq_sync_val);
+
+ while (!done) {
+ struct ucall uc;
+
+ alarm(2);
+ vcpu_run(vcpu);
+ alarm(0);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_DONE:
+ test_tpr_check_tpr_cr8_equal(vcpu);
+ done = true;
+ break;
+ case UCALL_SYNC:
+ test_tpr_check_tpr_cr8_equal(vcpu);
+ test_tpr_set_tpr_for_irq(vcpu, uc.args[1]);
+ break;
+ default:
+ TEST_FAIL("Unknown ucall result 0x%lx", uc.cmd);
+ break;
+ }
+ }
+ kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+ /*
+ * Use separate VMs for the xAPIC and x2APIC tests so that x2APIC can
+ * be fully hidden from the guest. KVM disallows changing CPUID after
+ * KVM_RUN and AVIC is disabled if _any_ vCPU is allowed to use x2APIC.
+ */
+ test_tpr(false);
+ test_tpr(true);
+}
diff --git a/tools/testing/selftests/kvm/x86/xcr0_cpuid_test.c b/tools/testing/selftests/kvm/x86/xcr0_cpuid_test.c
index c8a5c5e51661..40dc9e6b3fad 100644
--- a/tools/testing/selftests/kvm/x86/xcr0_cpuid_test.c
+++ b/tools/testing/selftests/kvm/x86/xcr0_cpuid_test.c
@@ -21,7 +21,7 @@
*/
#define ASSERT_XFEATURE_DEPENDENCIES(supported_xcr0, xfeatures, dependencies) \
do { \
- uint64_t __supported = (supported_xcr0) & ((xfeatures) | (dependencies)); \
+ u64 __supported = (supported_xcr0) & ((xfeatures) | (dependencies)); \
\
__GUEST_ASSERT((__supported & (xfeatures)) != (xfeatures) || \
__supported == ((xfeatures) | (dependencies)), \
@@ -39,7 +39,7 @@ do { \
*/
#define ASSERT_ALL_OR_NONE_XFEATURE(supported_xcr0, xfeatures) \
do { \
- uint64_t __supported = (supported_xcr0) & (xfeatures); \
+ u64 __supported = (supported_xcr0) & (xfeatures); \
\
__GUEST_ASSERT(!__supported || __supported == (xfeatures), \
"supported = 0x%lx, xfeatures = 0x%llx", \
@@ -48,8 +48,8 @@ do { \
static void guest_code(void)
{
- uint64_t initial_xcr0;
- uint64_t supported_xcr0;
+ u64 initial_xcr0;
+ u64 supported_xcr0;
int i, vector;
set_cr4(get_cr4() | X86_CR4_OSXSAVE);
@@ -81,13 +81,13 @@ static void guest_code(void)
vector = xsetbv_safe(0, XFEATURE_MASK_FP);
__GUEST_ASSERT(!vector,
- "Expected success on XSETBV(FP), got vector '0x%x'",
- vector);
+ "Expected success on XSETBV(FP), got %s",
+ ex_str(vector));
vector = xsetbv_safe(0, supported_xcr0);
__GUEST_ASSERT(!vector,
- "Expected success on XSETBV(0x%lx), got vector '0x%x'",
- supported_xcr0, vector);
+ "Expected success on XSETBV(0x%lx), got %s",
+ supported_xcr0, ex_str(vector));
for (i = 0; i < 64; i++) {
if (supported_xcr0 & BIT_ULL(i))
@@ -95,8 +95,8 @@ static void guest_code(void)
vector = xsetbv_safe(0, supported_xcr0 | BIT_ULL(i));
__GUEST_ASSERT(vector == GP_VECTOR,
- "Expected #GP on XSETBV(0x%llx), supported XCR0 = %lx, got vector '0x%x'",
- BIT_ULL(i), supported_xcr0, vector);
+ "Expected #GP on XSETBV(0x%llx), supported XCR0 = %lx, got %s",
+ BIT_ULL(i), supported_xcr0, ex_str(vector));
}
GUEST_DONE();
diff --git a/tools/testing/selftests/kvm/x86/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86/xen_shinfo_test.c
index a59b3c799bb2..5076f6a75455 100644
--- a/tools/testing/selftests/kvm/x86/xen_shinfo_test.c
+++ b/tools/testing/selftests/kvm/x86/xen_shinfo_test.c
@@ -116,15 +116,15 @@ struct pvclock_wall_clock {
} __attribute__((__packed__));
struct vcpu_runstate_info {
- uint32_t state;
- uint64_t state_entry_time;
- uint64_t time[5]; /* Extra field for overrun check */
+ u32 state;
+ u64 state_entry_time;
+ u64 time[5]; /* Extra field for overrun check */
};
struct compat_vcpu_runstate_info {
- uint32_t state;
- uint64_t state_entry_time;
- uint64_t time[5];
+ u32 state;
+ u64 state_entry_time;
+ u64 time[5];
} __attribute__((__packed__));
struct arch_vcpu_info {
@@ -133,8 +133,8 @@ struct arch_vcpu_info {
};
struct vcpu_info {
- uint8_t evtchn_upcall_pending;
- uint8_t evtchn_upcall_mask;
+ u8 evtchn_upcall_pending;
+ u8 evtchn_upcall_mask;
unsigned long evtchn_pending_sel;
struct arch_vcpu_info arch;
struct pvclock_vcpu_time_info time;
@@ -145,7 +145,7 @@ struct shared_info {
unsigned long evtchn_pending[64];
unsigned long evtchn_mask[64];
struct pvclock_wall_clock wc;
- uint32_t wc_sec_hi;
+ u32 wc_sec_hi;
/* arch_shared_info here */
};
@@ -191,10 +191,7 @@ static void guest_code(void)
struct vcpu_runstate_info *rs = (void *)RUNSTATE_VADDR;
int i;
- __asm__ __volatile__(
- "sti\n"
- "nop\n"
- );
+ sti_nop();
/* Trigger an interrupt injection */
GUEST_SYNC(TEST_INJECT_VECTOR);
@@ -550,15 +547,9 @@ int main(int argc, char *argv[])
int irq_fd[2] = { -1, -1 };
if (do_eventfd_tests) {
- irq_fd[0] = eventfd(0, 0);
- irq_fd[1] = eventfd(0, 0);
+ irq_fd[0] = kvm_new_eventfd();
+ irq_fd[1] = kvm_new_eventfd();
- /* Unexpected, but not a KVM failure */
- if (irq_fd[0] == -1 || irq_fd[1] == -1)
- do_evtchn_tests = do_eventfd_tests = false;
- }
-
- if (do_eventfd_tests) {
irq_routes.info.nr = 2;
irq_routes.entries[0].gsi = 32;
@@ -575,15 +566,8 @@ int main(int argc, char *argv[])
vm_ioctl(vm, KVM_SET_GSI_ROUTING, &irq_routes.info);
- struct kvm_irqfd ifd = { };
-
- ifd.fd = irq_fd[0];
- ifd.gsi = 32;
- vm_ioctl(vm, KVM_IRQFD, &ifd);
-
- ifd.fd = irq_fd[1];
- ifd.gsi = 33;
- vm_ioctl(vm, KVM_IRQFD, &ifd);
+ kvm_assign_irqfd(vm, 32, irq_fd[0]);
+ kvm_assign_irqfd(vm, 33, irq_fd[1]);
struct sigaction sa = { };
sa.sa_handler = handle_alrm;
@@ -674,7 +658,7 @@ int main(int argc, char *argv[])
printf("Testing RUNSTATE_ADJUST\n");
rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST;
memset(&rst.u, 0, sizeof(rst.u));
- rst.u.runstate.state = (uint64_t)-1;
+ rst.u.runstate.state = (u64)-1;
rst.u.runstate.time_blocked =
0x5a - rs->time[RUNSTATE_blocked];
rst.u.runstate.time_offline =
@@ -1129,7 +1113,7 @@ int main(int argc, char *argv[])
/* Don't change the address, just trigger a write */
struct kvm_xen_vcpu_attr adj = {
.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST,
- .u.runstate.state = (uint64_t)-1
+ .u.runstate.state = (u64)-1
};
vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &adj);
diff --git a/tools/testing/selftests/kvm/x86/xss_msr_test.c b/tools/testing/selftests/kvm/x86/xss_msr_test.c
index f331a4e9bae3..12c63df6bbce 100644
--- a/tools/testing/selftests/kvm/x86/xss_msr_test.c
+++ b/tools/testing/selftests/kvm/x86/xss_msr_test.c
@@ -17,7 +17,7 @@ int main(int argc, char *argv[])
bool xss_in_msr_list;
struct kvm_vm *vm;
struct kvm_vcpu *vcpu;
- uint64_t xss_val;
+ u64 xss_val;
int i, r;
/* Create VM */