diff options
Diffstat (limited to 'tools/testing/selftests/kvm')
225 files changed, 15417 insertions, 5066 deletions
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 20af35a91d6f..f2b223072b62 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -3,10 +3,10 @@ top_srcdir = ../../../.. include $(top_srcdir)/scripts/subarch.include ARCH ?= $(SUBARCH) -ifeq ($(ARCH),$(filter $(ARCH),arm64 s390 riscv x86 x86_64)) +ifeq ($(ARCH),$(filter $(ARCH),arm64 s390 riscv x86 x86_64 loongarch)) # Top-level selftests allows ARCH=x86_64 :-( ifeq ($(ARCH),x86_64) - ARCH := x86 + override ARCH := x86 endif include Makefile.kvm else diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm index 4277b983cace..9118a5a51b89 100644 --- a/tools/testing/selftests/kvm/Makefile.kvm +++ b/tools/testing/selftests/kvm/Makefile.kvm @@ -8,6 +8,7 @@ LIBKVM += lib/elf.c LIBKVM += lib/guest_modes.c LIBKVM += lib/io.c LIBKVM += lib/kvm_util.c +LIBKVM += lib/lru_gen_util.c LIBKVM += lib/memstress.c LIBKVM += lib/guest_sprintf.c LIBKVM += lib/rbtree.c @@ -47,15 +48,34 @@ LIBKVM_riscv += lib/riscv/handlers.S LIBKVM_riscv += lib/riscv/processor.c LIBKVM_riscv += lib/riscv/ucall.c +LIBKVM_loongarch += lib/loongarch/processor.c +LIBKVM_loongarch += lib/loongarch/ucall.c +LIBKVM_loongarch += lib/loongarch/exception.S + # Non-compiled test targets TEST_PROGS_x86 += x86/nx_huge_pages_test.sh +# Compiled test targets valid on all architectures with libkvm support +TEST_GEN_PROGS_COMMON = demand_paging_test +TEST_GEN_PROGS_COMMON += dirty_log_test +TEST_GEN_PROGS_COMMON += guest_print_test +TEST_GEN_PROGS_COMMON += irqfd_test +TEST_GEN_PROGS_COMMON += kvm_binary_stats_test +TEST_GEN_PROGS_COMMON += kvm_create_max_vcpus +TEST_GEN_PROGS_COMMON += kvm_page_table_test +TEST_GEN_PROGS_COMMON += set_memory_region_test +TEST_GEN_PROGS_COMMON += memslot_modification_stress_test +TEST_GEN_PROGS_COMMON += memslot_perf_test + # Compiled test targets -TEST_GEN_PROGS_x86 = x86/cpuid_test +TEST_GEN_PROGS_x86 = $(TEST_GEN_PROGS_COMMON) +TEST_GEN_PROGS_x86 += x86/cpuid_test TEST_GEN_PROGS_x86 += x86/cr4_cpuid_sync_test TEST_GEN_PROGS_x86 += x86/dirty_log_page_splitting_test TEST_GEN_PROGS_x86 += x86/feature_msrs_test +TEST_GEN_PROGS_x86 += x86/evmcs_smm_controls_test TEST_GEN_PROGS_x86 += x86/exit_on_emulation_failure_test +TEST_GEN_PROGS_x86 += x86/fastops_test TEST_GEN_PROGS_x86 += x86/fix_hypercall_test TEST_GEN_PROGS_x86 += x86/hwcr_msr_test TEST_GEN_PROGS_x86 += x86/hyperv_clock @@ -68,8 +88,18 @@ TEST_GEN_PROGS_x86 += x86/hyperv_svm_test TEST_GEN_PROGS_x86 += x86/hyperv_tlb_flush TEST_GEN_PROGS_x86 += x86/kvm_clock_test TEST_GEN_PROGS_x86 += x86/kvm_pv_test +TEST_GEN_PROGS_x86 += x86/kvm_buslock_test TEST_GEN_PROGS_x86 += x86/monitor_mwait_test +TEST_GEN_PROGS_x86 += x86/msrs_test +TEST_GEN_PROGS_x86 += x86/nested_close_kvm_test +TEST_GEN_PROGS_x86 += x86/nested_dirty_log_test +TEST_GEN_PROGS_x86 += x86/nested_emulation_test TEST_GEN_PROGS_x86 += x86/nested_exceptions_test +TEST_GEN_PROGS_x86 += x86/nested_invalid_cr3_test +TEST_GEN_PROGS_x86 += x86/nested_set_state_test +TEST_GEN_PROGS_x86 += x86/nested_tsc_adjust_test +TEST_GEN_PROGS_x86 += x86/nested_tsc_scaling_test +TEST_GEN_PROGS_x86 += x86/nested_vmsave_vmload_test TEST_GEN_PROGS_x86 += x86/platform_info_test TEST_GEN_PROGS_x86 += x86/pmu_counters_test TEST_GEN_PROGS_x86 += x86/pmu_event_filter_test @@ -83,25 +113,26 @@ TEST_GEN_PROGS_x86 += x86/state_test TEST_GEN_PROGS_x86 += x86/vmx_preemption_timer_test TEST_GEN_PROGS_x86 += x86/svm_vmcall_test TEST_GEN_PROGS_x86 += x86/svm_int_ctl_test +TEST_GEN_PROGS_x86 += x86/svm_nested_clear_efer_svme TEST_GEN_PROGS_x86 += x86/svm_nested_shutdown_test TEST_GEN_PROGS_x86 += x86/svm_nested_soft_inject_test +TEST_GEN_PROGS_x86 += x86/svm_nested_vmcb12_gpa +TEST_GEN_PROGS_x86 += x86/svm_lbr_nested_state TEST_GEN_PROGS_x86 += x86/tsc_scaling_sync TEST_GEN_PROGS_x86 += x86/sync_regs_test TEST_GEN_PROGS_x86 += x86/ucna_injection_test TEST_GEN_PROGS_x86 += x86/userspace_io_test TEST_GEN_PROGS_x86 += x86/userspace_msr_exit_test TEST_GEN_PROGS_x86 += x86/vmx_apic_access_test -TEST_GEN_PROGS_x86 += x86/vmx_close_while_nested_test -TEST_GEN_PROGS_x86 += x86/vmx_dirty_log_test +TEST_GEN_PROGS_x86 += x86/vmx_apicv_updates_test TEST_GEN_PROGS_x86 += x86/vmx_exception_with_invalid_guest_state TEST_GEN_PROGS_x86 += x86/vmx_msrs_test TEST_GEN_PROGS_x86 += x86/vmx_invalid_nested_guest_state -TEST_GEN_PROGS_x86 += x86/vmx_set_nested_state_test -TEST_GEN_PROGS_x86 += x86/vmx_tsc_adjust_test -TEST_GEN_PROGS_x86 += x86/vmx_nested_tsc_scaling_test +TEST_GEN_PROGS_x86 += x86/vmx_nested_la57_state_test TEST_GEN_PROGS_x86 += x86/apic_bus_clock_test TEST_GEN_PROGS_x86 += x86/xapic_ipi_test TEST_GEN_PROGS_x86 += x86/xapic_state_test +TEST_GEN_PROGS_x86 += x86/xapic_tpr_test TEST_GEN_PROGS_x86 += x86/xcr0_cpuid_test TEST_GEN_PROGS_x86 += x86/xss_msr_test TEST_GEN_PROGS_x86 += x86/debug_regs @@ -116,63 +147,56 @@ TEST_GEN_PROGS_x86 += x86/amx_test TEST_GEN_PROGS_x86 += x86/max_vcpuid_cap_test TEST_GEN_PROGS_x86 += x86/triple_fault_event_test TEST_GEN_PROGS_x86 += x86/recalc_apic_map_test +TEST_GEN_PROGS_x86 += x86/aperfmperf_test TEST_GEN_PROGS_x86 += access_tracking_perf_test TEST_GEN_PROGS_x86 += coalesced_io_test -TEST_GEN_PROGS_x86 += demand_paging_test -TEST_GEN_PROGS_x86 += dirty_log_test TEST_GEN_PROGS_x86 += dirty_log_perf_test TEST_GEN_PROGS_x86 += guest_memfd_test -TEST_GEN_PROGS_x86 += guest_print_test TEST_GEN_PROGS_x86 += hardware_disable_test -TEST_GEN_PROGS_x86 += kvm_create_max_vcpus -TEST_GEN_PROGS_x86 += kvm_page_table_test -TEST_GEN_PROGS_x86 += memslot_modification_stress_test -TEST_GEN_PROGS_x86 += memslot_perf_test TEST_GEN_PROGS_x86 += mmu_stress_test TEST_GEN_PROGS_x86 += rseq_test -TEST_GEN_PROGS_x86 += set_memory_region_test TEST_GEN_PROGS_x86 += steal_time -TEST_GEN_PROGS_x86 += kvm_binary_stats_test TEST_GEN_PROGS_x86 += system_counter_offset_test TEST_GEN_PROGS_x86 += pre_fault_memory_test # Compiled outputs used by test targets TEST_GEN_PROGS_EXTENDED_x86 += x86/nx_huge_pages_test +TEST_GEN_PROGS_arm64 = $(TEST_GEN_PROGS_COMMON) TEST_GEN_PROGS_arm64 += arm64/aarch32_id_regs TEST_GEN_PROGS_arm64 += arm64/arch_timer_edge_cases +TEST_GEN_PROGS_arm64 += arm64/at TEST_GEN_PROGS_arm64 += arm64/debug-exceptions +TEST_GEN_PROGS_arm64 += arm64/hello_el2 +TEST_GEN_PROGS_arm64 += arm64/host_sve TEST_GEN_PROGS_arm64 += arm64/hypercalls -TEST_GEN_PROGS_arm64 += arm64/mmio_abort +TEST_GEN_PROGS_arm64 += arm64/external_aborts TEST_GEN_PROGS_arm64 += arm64/page_fault_test TEST_GEN_PROGS_arm64 += arm64/psci_test +TEST_GEN_PROGS_arm64 += arm64/sea_to_user TEST_GEN_PROGS_arm64 += arm64/set_id_regs TEST_GEN_PROGS_arm64 += arm64/smccc_filter TEST_GEN_PROGS_arm64 += arm64/vcpu_width_config TEST_GEN_PROGS_arm64 += arm64/vgic_init TEST_GEN_PROGS_arm64 += arm64/vgic_irq TEST_GEN_PROGS_arm64 += arm64/vgic_lpi_stress +TEST_GEN_PROGS_arm64 += arm64/vgic_v5 TEST_GEN_PROGS_arm64 += arm64/vpmu_counter_access -TEST_GEN_PROGS_arm64 += arm64/no-vgic-v3 +TEST_GEN_PROGS_arm64 += arm64/no-vgic +TEST_GEN_PROGS_arm64 += arm64/idreg-idst +TEST_GEN_PROGS_arm64 += arm64/kvm-uuid TEST_GEN_PROGS_arm64 += access_tracking_perf_test TEST_GEN_PROGS_arm64 += arch_timer TEST_GEN_PROGS_arm64 += coalesced_io_test -TEST_GEN_PROGS_arm64 += demand_paging_test -TEST_GEN_PROGS_arm64 += dirty_log_test TEST_GEN_PROGS_arm64 += dirty_log_perf_test -TEST_GEN_PROGS_arm64 += guest_print_test TEST_GEN_PROGS_arm64 += get-reg-list -TEST_GEN_PROGS_arm64 += kvm_create_max_vcpus -TEST_GEN_PROGS_arm64 += kvm_page_table_test -TEST_GEN_PROGS_arm64 += memslot_modification_stress_test -TEST_GEN_PROGS_arm64 += memslot_perf_test +TEST_GEN_PROGS_arm64 += guest_memfd_test TEST_GEN_PROGS_arm64 += mmu_stress_test TEST_GEN_PROGS_arm64 += rseq_test -TEST_GEN_PROGS_arm64 += set_memory_region_test TEST_GEN_PROGS_arm64 += steal_time -TEST_GEN_PROGS_arm64 += kvm_binary_stats_test -TEST_GEN_PROGS_s390 = s390/memop +TEST_GEN_PROGS_s390 = $(TEST_GEN_PROGS_COMMON) +TEST_GEN_PROGS_s390 += s390/memop TEST_GEN_PROGS_s390 += s390/resets TEST_GEN_PROGS_s390 += s390/sync_regs_test TEST_GEN_PROGS_s390 += s390/tprot @@ -181,29 +205,40 @@ TEST_GEN_PROGS_s390 += s390/debug_test TEST_GEN_PROGS_s390 += s390/cpumodel_subfuncs_test TEST_GEN_PROGS_s390 += s390/shared_zeropage_test TEST_GEN_PROGS_s390 += s390/ucontrol_test -TEST_GEN_PROGS_s390 += demand_paging_test -TEST_GEN_PROGS_s390 += dirty_log_test -TEST_GEN_PROGS_s390 += guest_print_test -TEST_GEN_PROGS_s390 += kvm_create_max_vcpus -TEST_GEN_PROGS_s390 += kvm_page_table_test +TEST_GEN_PROGS_s390 += s390/user_operexec +TEST_GEN_PROGS_s390 += s390/keyop TEST_GEN_PROGS_s390 += rseq_test -TEST_GEN_PROGS_s390 += set_memory_region_test -TEST_GEN_PROGS_s390 += kvm_binary_stats_test +TEST_GEN_PROGS_s390 += s390/irq_routing +TEST_GEN_PROGS_s390 += mmu_stress_test +TEST_GEN_PROGS_riscv = $(TEST_GEN_PROGS_COMMON) TEST_GEN_PROGS_riscv += riscv/sbi_pmu_test TEST_GEN_PROGS_riscv += riscv/ebreak_test +TEST_GEN_PROGS_riscv += access_tracking_perf_test TEST_GEN_PROGS_riscv += arch_timer TEST_GEN_PROGS_riscv += coalesced_io_test -TEST_GEN_PROGS_riscv += demand_paging_test -TEST_GEN_PROGS_riscv += dirty_log_test +TEST_GEN_PROGS_riscv += dirty_log_perf_test TEST_GEN_PROGS_riscv += get-reg-list -TEST_GEN_PROGS_riscv += guest_print_test -TEST_GEN_PROGS_riscv += kvm_binary_stats_test -TEST_GEN_PROGS_riscv += kvm_create_max_vcpus -TEST_GEN_PROGS_riscv += kvm_page_table_test -TEST_GEN_PROGS_riscv += set_memory_region_test +TEST_GEN_PROGS_riscv += mmu_stress_test +TEST_GEN_PROGS_riscv += rseq_test TEST_GEN_PROGS_riscv += steal_time +TEST_GEN_PROGS_loongarch = loongarch/pmu_test +TEST_GEN_PROGS_loongarch += arch_timer +TEST_GEN_PROGS_loongarch += coalesced_io_test +TEST_GEN_PROGS_loongarch += demand_paging_test +TEST_GEN_PROGS_loongarch += dirty_log_perf_test +TEST_GEN_PROGS_loongarch += dirty_log_test +TEST_GEN_PROGS_loongarch += guest_print_test +TEST_GEN_PROGS_loongarch += hardware_disable_test +TEST_GEN_PROGS_loongarch += kvm_binary_stats_test +TEST_GEN_PROGS_loongarch += kvm_create_max_vcpus +TEST_GEN_PROGS_loongarch += kvm_page_table_test +TEST_GEN_PROGS_loongarch += memslot_modification_stress_test +TEST_GEN_PROGS_loongarch += memslot_perf_test +TEST_GEN_PROGS_loongarch += set_memory_region_test +TEST_GEN_PROGS_loongarch += steal_time + SPLIT_TESTS += arch_timer SPLIT_TESTS += get-reg-list @@ -218,6 +253,7 @@ OVERRIDE_TARGETS = 1 # importantly defines, i.e. overwrites, $(CC) (unless `make -e` or `make CC=`, # which causes the environment variable to override the makefile). include ../lib.mk +include ../cgroup/lib/libcgroup.mk INSTALL_HDR_PATH = $(top_srcdir)/usr LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/ @@ -225,6 +261,7 @@ LINUX_TOOL_INCLUDE = $(top_srcdir)/tools/include LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/$(ARCH)/include CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \ -Wno-gnu-variable-sized-type-not-at-end -MD -MP -DCONFIG_64BIT \ + -U_FORTIFY_SOURCE \ -fno-builtin-memcmp -fno-builtin-memcpy \ -fno-builtin-memset -fno-builtin-strnlen \ -fno-stack-protector -fno-PIE -fno-strict-aliasing \ @@ -271,7 +308,7 @@ LIBKVM_S := $(filter %.S,$(LIBKVM)) LIBKVM_C_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_C)) LIBKVM_S_OBJ := $(patsubst %.S, $(OUTPUT)/%.o, $(LIBKVM_S)) LIBKVM_STRING_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_STRING)) -LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ) $(LIBKVM_STRING_OBJ) +LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ) $(LIBKVM_STRING_OBJ) $(LIBCGROUP_O) SPLIT_TEST_GEN_PROGS := $(patsubst %, $(OUTPUT)/%, $(SPLIT_TESTS)) SPLIT_TEST_GEN_OBJ := $(patsubst %, $(OUTPUT)/$(ARCH)/%.o, $(SPLIT_TESTS)) diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c index 3c7defd34f56..4415c94b2866 100644 --- a/tools/testing/selftests/kvm/access_tracking_perf_test.c +++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c @@ -7,9 +7,11 @@ * This test measures the performance effects of KVM's access tracking. * Access tracking is driven by the MMU notifiers test_young, clear_young, and * clear_flush_young. These notifiers do not have a direct userspace API, - * however the clear_young notifier can be triggered by marking a pages as idle - * in /sys/kernel/mm/page_idle/bitmap. This test leverages that mechanism to - * enable access tracking on guest memory. + * however the clear_young notifier can be triggered either by + * 1. marking a pages as idle in /sys/kernel/mm/page_idle/bitmap OR + * 2. adding a new MGLRU generation using the lru_gen debugfs file. + * This test leverages page_idle to enable access tracking on guest memory + * unless MGLRU is enabled, in which case MGLRU is used. * * To measure performance this test runs a VM with a configurable number of * vCPUs that each touch every page in disjoint regions of memory. Performance @@ -17,10 +19,11 @@ * predefined region. * * Note that a deterministic correctness test of access tracking is not possible - * by using page_idle as it exists today. This is for a few reasons: + * by using page_idle or MGLRU aging as it exists today. This is for a few + * reasons: * - * 1. page_idle only issues clear_young notifiers, which lack a TLB flush. This - * means subsequent guest accesses are not guaranteed to see page table + * 1. page_idle and MGLRU only issue clear_young notifiers, which lack a TLB flush. + * This means subsequent guest accesses are not guaranteed to see page table * updates made by KVM until some time in the future. * * 2. page_idle only operates on LRU pages. Newly allocated pages are not @@ -38,19 +41,28 @@ #include <inttypes.h> #include <limits.h> #include <pthread.h> -#include <sys/mman.h> #include <sys/types.h> #include <sys/stat.h> +#include "kvm_syscalls.h" #include "kvm_util.h" #include "test_util.h" #include "memstress.h" #include "guest_modes.h" #include "processor.h" +#include "ucall_common.h" + +#include "cgroup_util.h" +#include "lru_gen_util.h" + +static const char *TEST_MEMCG_NAME = "access_tracking_perf_test"; /* Global variable used to synchronize all of the vCPU threads. */ static int iteration; +/* The cgroup memory controller root. Needed for lru_gen-based aging. */ +char cgroup_root[PATH_MAX]; + /* Defines what vCPU threads should do during a given iteration. */ static enum { /* Run the vCPU to access all its memory. */ @@ -65,20 +77,39 @@ static int vcpu_last_completed_iteration[KVM_MAX_VCPUS]; /* Whether to overlap the regions of memory vCPUs access. */ static bool overlap_memory_access; +/* + * If the test should only warn if there are too many idle pages (i.e., it is + * expected). + * -1: Not yet set. + * 0: We do not expect too many idle pages, so FAIL if too many idle pages. + * 1: Having too many idle pages is expected, so merely print a warning if + * too many idle pages are found. + */ +static int idle_pages_warn_only = -1; + +/* Whether or not to use MGLRU instead of page_idle for access tracking */ +static bool use_lru_gen; + +/* Total number of pages to expect in the memcg after touching everything */ +static long test_pages; + +/* Last generation we found the pages in */ +static int lru_gen_last_gen = -1; + struct test_params { /* The backing source for the region of memory. */ enum vm_mem_backing_src_type backing_src; /* The amount of memory to allocate for each vCPU. */ - uint64_t vcpu_memory_bytes; + u64 vcpu_memory_bytes; /* The number of vCPUs to create in the VM. */ int nr_vcpus; }; -static uint64_t pread_uint64(int fd, const char *filename, uint64_t index) +static u64 pread_u64(int fd, const char *filename, u64 index) { - uint64_t value; + u64 value; off_t offset = index * sizeof(value); TEST_ASSERT(pread(fd, &value, sizeof(value), offset) == sizeof(value), @@ -92,13 +123,13 @@ static uint64_t pread_uint64(int fd, const char *filename, uint64_t index) #define PAGEMAP_PRESENT (1ULL << 63) #define PAGEMAP_PFN_MASK ((1ULL << 55) - 1) -static uint64_t lookup_pfn(int pagemap_fd, struct kvm_vm *vm, uint64_t gva) +static u64 lookup_pfn(int pagemap_fd, struct kvm_vm *vm, gva_t gva) { - uint64_t hva = (uint64_t) addr_gva2hva(vm, gva); - uint64_t entry; - uint64_t pfn; + u64 hva = (u64)addr_gva2hva(vm, gva); + u64 entry; + u64 pfn; - entry = pread_uint64(pagemap_fd, "pagemap", hva / getpagesize()); + entry = pread_u64(pagemap_fd, "pagemap", hva / getpagesize()); if (!(entry & PAGEMAP_PRESENT)) return 0; @@ -108,30 +139,46 @@ static uint64_t lookup_pfn(int pagemap_fd, struct kvm_vm *vm, uint64_t gva) return pfn; } -static bool is_page_idle(int page_idle_fd, uint64_t pfn) +static bool is_page_idle(int page_idle_fd, u64 pfn) { - uint64_t bits = pread_uint64(page_idle_fd, "page_idle", pfn / 64); + u64 bits = pread_u64(page_idle_fd, "page_idle", pfn / 64); return !!((bits >> (pfn % 64)) & 1); } -static void mark_page_idle(int page_idle_fd, uint64_t pfn) +static void mark_page_idle(int page_idle_fd, u64 pfn) { - uint64_t bits = 1ULL << (pfn % 64); + u64 bits = 1ULL << (pfn % 64); TEST_ASSERT(pwrite(page_idle_fd, &bits, 8, 8 * (pfn / 64)) == 8, "Set page_idle bits for PFN 0x%" PRIx64, pfn); } -static void mark_vcpu_memory_idle(struct kvm_vm *vm, - struct memstress_vcpu_args *vcpu_args) +static void too_many_idle_pages(long idle_pages, long total_pages, int vcpu_idx) +{ + char prefix[18] = {}; + + if (vcpu_idx >= 0) + snprintf(prefix, 18, "vCPU%d: ", vcpu_idx); + + TEST_ASSERT(idle_pages_warn_only, + "%sToo many pages still idle (%lu out of %lu)", + prefix, idle_pages, total_pages); + + printf("WARNING: %sToo many pages still idle (%lu out of %lu), " + "this will affect performance results.\n", + prefix, idle_pages, total_pages); +} + +static void pageidle_mark_vcpu_memory_idle(struct kvm_vm *vm, + struct memstress_vcpu_args *vcpu_args) { int vcpu_idx = vcpu_args->vcpu_idx; - uint64_t base_gva = vcpu_args->gva; - uint64_t pages = vcpu_args->pages; - uint64_t page; - uint64_t still_idle = 0; - uint64_t no_pfn = 0; + gva_t base_gva = vcpu_args->gva; + u64 pages = vcpu_args->pages; + u64 page; + u64 still_idle = 0; + u64 no_pfn = 0; int page_idle_fd; int pagemap_fd; @@ -146,8 +193,8 @@ static void mark_vcpu_memory_idle(struct kvm_vm *vm, TEST_ASSERT(pagemap_fd > 0, "Failed to open pagemap."); for (page = 0; page < pages; page++) { - uint64_t gva = base_gva + page * memstress_args.guest_page_size; - uint64_t pfn = lookup_pfn(pagemap_fd, vm, gva); + gva_t gva = base_gva + page * memstress_args.guest_page_size; + u64 pfn = lookup_pfn(pagemap_fd, vm, gva); if (!pfn) { no_pfn++; @@ -177,31 +224,83 @@ static void mark_vcpu_memory_idle(struct kvm_vm *vm, * arbitrary; high enough that we ensure most memory access went through * access tracking but low enough as to not make the test too brittle * over time and across architectures. - * - * When running the guest as a nested VM, "warn" instead of asserting - * as the TLB size is effectively unlimited and the KVM doesn't - * explicitly flush the TLB when aging SPTEs. As a result, more pages - * are cached and the guest won't see the "idle" bit cleared. */ - if (still_idle >= pages / 10) { -#ifdef __x86_64__ - TEST_ASSERT(this_cpu_has(X86_FEATURE_HYPERVISOR), - "vCPU%d: Too many pages still idle (%lu out of %lu)", - vcpu_idx, still_idle, pages); -#endif - printf("WARNING: vCPU%d: Too many pages still idle (%lu out of %lu), " - "this will affect performance results.\n", - vcpu_idx, still_idle, pages); - } + if (still_idle >= pages / 10) + too_many_idle_pages(still_idle, pages, + overlap_memory_access ? -1 : vcpu_idx); close(page_idle_fd); close(pagemap_fd); } -static void assert_ucall(struct kvm_vcpu *vcpu, uint64_t expected_ucall) +int find_generation(struct memcg_stats *stats, long total_pages) +{ + /* + * For finding the generation that contains our pages, use the same + * 90% threshold that page_idle uses. + */ + int gen = lru_gen_find_generation(stats, total_pages * 9 / 10); + + if (gen >= 0) + return gen; + + if (!idle_pages_warn_only) { + TEST_FAIL("Could not find a generation with 90%% of guest memory (%ld pages).", + total_pages * 9 / 10); + return gen; + } + + /* + * We couldn't find a generation with 90% of guest memory, which can + * happen if access tracking is unreliable. Simply look for a majority + * of pages. + */ + puts("WARNING: Couldn't find a generation with 90% of guest memory. " + "Performance results may not be accurate."); + gen = lru_gen_find_generation(stats, total_pages / 2); + TEST_ASSERT(gen >= 0, + "Could not find a generation with 50%% of guest memory (%ld pages).", + total_pages / 2); + return gen; +} + +static void lru_gen_mark_memory_idle(struct kvm_vm *vm) +{ + struct timespec ts_start; + struct timespec ts_elapsed; + struct memcg_stats stats; + int new_gen; + + /* Make a new generation */ + clock_gettime(CLOCK_MONOTONIC, &ts_start); + lru_gen_do_aging(&stats, TEST_MEMCG_NAME); + ts_elapsed = timespec_elapsed(ts_start); + + /* Check the generation again */ + new_gen = find_generation(&stats, test_pages); + + /* + * This function should only be invoked with newly-accessed pages, + * so pages should always move to a newer generation. + */ + if (new_gen <= lru_gen_last_gen) { + /* We did not move to a newer generation. */ + long idle_pages = lru_gen_sum_memcg_stats_for_gen(lru_gen_last_gen, + &stats); + + too_many_idle_pages(min_t(long, idle_pages, test_pages), + test_pages, -1); + } + pr_info("%-30s: %ld.%09lds\n", + "Mark memory idle (lru_gen)", ts_elapsed.tv_sec, + ts_elapsed.tv_nsec); + lru_gen_last_gen = new_gen; +} + +static void assert_ucall(struct kvm_vcpu *vcpu, u64 expected_ucall) { struct ucall uc; - uint64_t actual_ucall = get_ucall(vcpu, &uc); + u64 actual_ucall = get_ucall(vcpu, &uc); TEST_ASSERT(expected_ucall == actual_ucall, "Guest exited unexpectedly (expected ucall %" PRIu64 @@ -237,9 +336,9 @@ static void vcpu_thread_main(struct memstress_vcpu_args *vcpu_args) assert_ucall(vcpu, UCALL_SYNC); break; case ITERATION_MARK_IDLE: - mark_vcpu_memory_idle(vm, vcpu_args); + pageidle_mark_vcpu_memory_idle(vm, vcpu_args); break; - }; + } vcpu_last_completed_iteration[vcpu_idx] = current_iteration; } @@ -289,15 +388,18 @@ static void access_memory(struct kvm_vm *vm, int nr_vcpus, static void mark_memory_idle(struct kvm_vm *vm, int nr_vcpus) { + if (use_lru_gen) + return lru_gen_mark_memory_idle(vm); + /* * Even though this parallelizes the work across vCPUs, this is still a * very slow operation because page_idle forces the test to mark one pfn - * at a time and the clear_young notifier serializes on the KVM MMU + * at a time and the clear_young notifier may serialize on the KVM MMU * lock. */ pr_debug("Marking VM memory idle (slow)...\n"); iteration_work = ITERATION_MARK_IDLE; - run_iteration(vm, nr_vcpus, "Mark memory idle"); + run_iteration(vm, nr_vcpus, "Mark memory idle (page_idle)"); } static void run_test(enum vm_guest_mode mode, void *arg) @@ -309,11 +411,38 @@ static void run_test(enum vm_guest_mode mode, void *arg) vm = memstress_create_vm(mode, nr_vcpus, params->vcpu_memory_bytes, 1, params->backing_src, !overlap_memory_access); + /* + * If guest_page_size is larger than the host's page size, the + * guest (memstress) will only fault in a subset of the host's pages. + */ + test_pages = params->nr_vcpus * params->vcpu_memory_bytes / + max(memstress_args.guest_page_size, + (u64)getpagesize()); + memstress_start_vcpu_threads(nr_vcpus, vcpu_thread_main); pr_info("\n"); access_memory(vm, nr_vcpus, ACCESS_WRITE, "Populating memory"); + if (use_lru_gen) { + struct memcg_stats stats; + + /* + * Do a page table scan now. Following initial population, aging + * may not cause the pages to move to a newer generation. Do + * an aging pass now so that future aging passes always move + * pages to a newer generation. + */ + printf("Initial aging pass (lru_gen)\n"); + lru_gen_do_aging(&stats, TEST_MEMCG_NAME); + TEST_ASSERT(lru_gen_sum_memcg_stats(&stats) >= test_pages, + "Not all pages accounted for (looking for %ld). " + "Was the memcg set up correctly?", test_pages); + access_memory(vm, nr_vcpus, ACCESS_WRITE, "Re-populating memory"); + lru_gen_read_memcg_stats(&stats, TEST_MEMCG_NAME); + lru_gen_last_gen = find_generation(&stats, test_pages); + } + /* As a control, read and write to the populated memory first. */ access_memory(vm, nr_vcpus, ACCESS_WRITE, "Writing to populated memory"); access_memory(vm, nr_vcpus, ACCESS_READ, "Reading from populated memory"); @@ -328,6 +457,37 @@ static void run_test(enum vm_guest_mode mode, void *arg) memstress_destroy_vm(vm); } +static int access_tracking_unreliable(void) +{ +#ifdef __x86_64__ + /* + * When running nested, the TLB size may be effectively unlimited (for + * example, this is the case when running on KVM L0), and KVM doesn't + * explicitly flush the TLB when aging SPTEs. As a result, more pages + * are cached and the guest won't see the "idle" bit cleared. + */ + if (this_cpu_has(X86_FEATURE_HYPERVISOR)) { + puts("Skipping idle page count sanity check, because the test is run nested"); + return 1; + } +#endif + /* + * When NUMA balancing is enabled, guest memory will be unmapped to get + * NUMA faults, dropping the Accessed bits. + */ + if (is_numa_balancing_enabled()) { + puts("Skipping idle page count sanity check, because NUMA balancing is enabled"); + return 1; + } + return 0; +} + +static int run_test_for_each_guest_mode(const char *cgroup, void *arg) +{ + for_each_guest_mode(run_test, arg); + return 0; +} + static void help(char *name) { puts(""); @@ -342,11 +502,22 @@ static void help(char *name) printf(" -v: specify the number of vCPUs to run.\n"); printf(" -o: Overlap guest memory accesses instead of partitioning\n" " them into a separate region of memory for each vCPU.\n"); + printf(" -w: Control whether the test warns or fails if more than 10%%\n" + " of pages are still seen as idle/old after accessing guest\n" + " memory. >0 == warn only, 0 == fail, <0 == auto. For auto\n" + " mode, the test fails by default, but switches to warn only\n" + " if NUMA balancing is enabled or the test detects it's running\n" + " in a VM.\n"); backing_src_help("-s"); puts(""); exit(0); } +void destroy_cgroup(char *cg) +{ + printf("Destroying cgroup: %s\n", cg); +} + int main(int argc, char *argv[]) { struct test_params params = { @@ -354,12 +525,13 @@ int main(int argc, char *argv[]) .vcpu_memory_bytes = DEFAULT_PER_VCPU_MEM_SIZE, .nr_vcpus = 1, }; + char *new_cg = NULL; int page_idle_fd; int opt; guest_modes_append_default(); - while ((opt = getopt(argc, argv, "hm:b:v:os:")) != -1) { + while ((opt = getopt(argc, argv, "hm:b:v:os:w:")) != -1) { switch (opt) { case 'm': guest_modes_cmdline(optarg); @@ -376,6 +548,11 @@ int main(int argc, char *argv[]) case 's': params.backing_src = parse_backing_src_type(optarg); break; + case 'w': + idle_pages_warn_only = + atoi_non_negative("Idle pages warning", + optarg); + break; case 'h': default: help(argv[0]); @@ -383,12 +560,50 @@ int main(int argc, char *argv[]) } } - page_idle_fd = open("/sys/kernel/mm/page_idle/bitmap", O_RDWR); - __TEST_REQUIRE(page_idle_fd >= 0, - "CONFIG_IDLE_PAGE_TRACKING is not enabled"); - close(page_idle_fd); + if (idle_pages_warn_only == -1) + idle_pages_warn_only = access_tracking_unreliable(); + + if (lru_gen_usable()) { + bool cg_created = true; + int ret; - for_each_guest_mode(run_test, ¶ms); + puts("Using lru_gen for aging"); + use_lru_gen = true; + + if (cg_find_controller_root(cgroup_root, sizeof(cgroup_root), "memory")) + ksft_exit_skip("Cannot find memory cgroup controller\n"); + + new_cg = cg_name(cgroup_root, TEST_MEMCG_NAME); + printf("Creating cgroup: %s\n", new_cg); + if (cg_create(new_cg)) { + if (errno == EEXIST) { + printf("Found existing cgroup"); + cg_created = false; + } else { + ksft_exit_skip("could not create new cgroup: %s\n", new_cg); + } + } + + /* + * This will fork off a new process to run the test within + * a new memcg, so we need to properly propagate the return + * value up. + */ + ret = cg_run(new_cg, &run_test_for_each_guest_mode, ¶ms); + if (cg_created) + cg_destroy(new_cg); + if (ret < 0) + TEST_FAIL("child did not spawn or was abnormally killed"); + if (ret) + return ret; + } else { + page_idle_fd = __open_path_or_exit("/sys/kernel/mm/page_idle/bitmap", O_RDWR, + "Is CONFIG_IDLE_PAGE_TRACKING enabled?"); + close(page_idle_fd); + + puts("Using page_idle for aging"); + run_test_for_each_guest_mode(NULL, ¶ms); + } return 0; } diff --git a/tools/testing/selftests/kvm/arch_timer.c b/tools/testing/selftests/kvm/arch_timer.c index acb2cb596332..90c475a61b22 100644 --- a/tools/testing/selftests/kvm/arch_timer.c +++ b/tools/testing/selftests/kvm/arch_timer.c @@ -78,9 +78,9 @@ static void *test_vcpu_run(void *arg) return NULL; } -static uint32_t test_get_pcpu(void) +static u32 test_get_pcpu(void) { - uint32_t pcpu; + u32 pcpu; unsigned int nproc_conf; cpu_set_t online_cpuset; @@ -98,16 +98,11 @@ static uint32_t test_get_pcpu(void) static int test_migrate_vcpu(unsigned int vcpu_idx) { int ret; - cpu_set_t cpuset; - uint32_t new_pcpu = test_get_pcpu(); - - CPU_ZERO(&cpuset); - CPU_SET(new_pcpu, &cpuset); + u32 new_pcpu = test_get_pcpu(); pr_debug("Migrating vCPU: %u to pCPU: %u\n", vcpu_idx, new_pcpu); - ret = pthread_setaffinity_np(pt_vcpu_run[vcpu_idx], - sizeof(cpuset), &cpuset); + ret = __pin_task_to_cpu(pt_vcpu_run[vcpu_idx], new_pcpu); /* Allow the error where the vCPU thread is already finished */ TEST_ASSERT(ret == 0 || ret == ESRCH, diff --git a/tools/testing/selftests/kvm/arm64/aarch32_id_regs.c b/tools/testing/selftests/kvm/arm64/aarch32_id_regs.c index cef8f7323ceb..8a019cbaf4c4 100644 --- a/tools/testing/selftests/kvm/arm64/aarch32_id_regs.c +++ b/tools/testing/selftests/kvm/arm64/aarch32_id_regs.c @@ -66,7 +66,7 @@ static void test_guest_raz(struct kvm_vcpu *vcpu) } } -static uint64_t raz_wi_reg_ids[] = { +static u64 raz_wi_reg_ids[] = { KVM_ARM64_SYS_REG(SYS_ID_PFR0_EL1), KVM_ARM64_SYS_REG(SYS_ID_PFR1_EL1), KVM_ARM64_SYS_REG(SYS_ID_DFR0_EL1), @@ -94,8 +94,8 @@ static void test_user_raz_wi(struct kvm_vcpu *vcpu) int i; for (i = 0; i < ARRAY_SIZE(raz_wi_reg_ids); i++) { - uint64_t reg_id = raz_wi_reg_ids[i]; - uint64_t val; + u64 reg_id = raz_wi_reg_ids[i]; + u64 val; val = vcpu_get_reg(vcpu, reg_id); TEST_ASSERT_EQ(val, 0); @@ -111,7 +111,7 @@ static void test_user_raz_wi(struct kvm_vcpu *vcpu) } } -static uint64_t raz_invariant_reg_ids[] = { +static u64 raz_invariant_reg_ids[] = { KVM_ARM64_SYS_REG(SYS_ID_AFR0_EL1), KVM_ARM64_SYS_REG(sys_reg(3, 0, 0, 3, 3)), KVM_ARM64_SYS_REG(SYS_ID_DFR1_EL1), @@ -123,8 +123,8 @@ static void test_user_raz_invariant(struct kvm_vcpu *vcpu) int i, r; for (i = 0; i < ARRAY_SIZE(raz_invariant_reg_ids); i++) { - uint64_t reg_id = raz_invariant_reg_ids[i]; - uint64_t val; + u64 reg_id = raz_invariant_reg_ids[i]; + u64 val; val = vcpu_get_reg(vcpu, reg_id); TEST_ASSERT_EQ(val, 0); @@ -142,11 +142,11 @@ static void test_user_raz_invariant(struct kvm_vcpu *vcpu) static bool vcpu_aarch64_only(struct kvm_vcpu *vcpu) { - uint64_t val, el0; + u64 val, el0; val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1)); - el0 = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0), val); + el0 = FIELD_GET(ID_AA64PFR0_EL1_EL0, val); return el0 == ID_AA64PFR0_EL1_EL0_IMP; } diff --git a/tools/testing/selftests/kvm/arm64/arch_timer.c b/tools/testing/selftests/kvm/arm64/arch_timer.c index eeba1cc87ff8..5fa5c0ec2b3e 100644 --- a/tools/testing/selftests/kvm/arm64/arch_timer.c +++ b/tools/testing/selftests/kvm/arm64/arch_timer.c @@ -56,7 +56,7 @@ static void guest_validate_irq(unsigned int intid, struct test_vcpu_shared_data *shared_data) { enum guest_stage stage = shared_data->guest_stage; - uint64_t xcnt = 0, xcnt_diff_us, cval = 0; + u64 xcnt = 0, xcnt_diff_us, cval = 0; unsigned long xctl = 0; unsigned int timer_irq = 0; unsigned int accessor; @@ -105,7 +105,7 @@ static void guest_validate_irq(unsigned int intid, static void guest_irq_handler(struct ex_regs *regs) { unsigned int intid = gic_get_and_ack_irq(); - uint32_t cpu = guest_get_vcpuid(); + u32 cpu = guest_get_vcpuid(); struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu]; guest_validate_irq(intid, shared_data); @@ -116,7 +116,7 @@ static void guest_irq_handler(struct ex_regs *regs) static void guest_run_stage(struct test_vcpu_shared_data *shared_data, enum guest_stage stage) { - uint32_t irq_iter, config_iter; + u32 irq_iter, config_iter; shared_data->guest_stage = stage; shared_data->nr_iter = 0; @@ -140,7 +140,7 @@ static void guest_run_stage(struct test_vcpu_shared_data *shared_data, static void guest_code(void) { - uint32_t cpu = guest_get_vcpuid(); + u32 cpu = guest_get_vcpuid(); struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu]; local_irq_disable(); @@ -165,10 +165,8 @@ static void guest_code(void) static void test_init_timer_irq(struct kvm_vm *vm) { /* Timer initid should be same for all the vCPUs, so query only vCPU-0 */ - vcpu_device_attr_get(vcpus[0], KVM_ARM_VCPU_TIMER_CTRL, - KVM_ARM_VCPU_TIMER_IRQ_PTIMER, &ptimer_irq); - vcpu_device_attr_get(vcpus[0], KVM_ARM_VCPU_TIMER_CTRL, - KVM_ARM_VCPU_TIMER_IRQ_VTIMER, &vtimer_irq); + ptimer_irq = vcpu_get_ptimer_irq(vcpus[0]); + vtimer_irq = vcpu_get_vtimer_irq(vcpus[0]); sync_global_to_guest(vm, ptimer_irq); sync_global_to_guest(vm, vtimer_irq); @@ -176,14 +174,14 @@ static void test_init_timer_irq(struct kvm_vm *vm) pr_debug("ptimer_irq: %d; vtimer_irq: %d\n", ptimer_irq, vtimer_irq); } -static int gic_fd; - struct kvm_vm *test_vm_create(void) { struct kvm_vm *vm; unsigned int i; int nr_vcpus = test_args.nr_vcpus; + TEST_REQUIRE(kvm_supports_vgic_v3()); + vm = vm_create_with_vcpus(nr_vcpus, guest_code, vcpus); vm_init_descriptor_tables(vm); @@ -204,8 +202,6 @@ struct kvm_vm *test_vm_create(void) vcpu_init_descriptor_tables(vcpus[i]); test_init_timer_irq(vm); - gic_fd = vgic_v3_setup(vm, nr_vcpus, 64); - __TEST_REQUIRE(gic_fd >= 0, "Failed to create vgic-v3"); /* Make all the test's cmdline args visible to the guest */ sync_global_to_guest(vm, test_args); @@ -215,6 +211,5 @@ struct kvm_vm *test_vm_create(void) void test_vm_cleanup(struct kvm_vm *vm) { - close(gic_fd); kvm_vm_free(vm); } diff --git a/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c index a36a7e2db434..f7625eb711d6 100644 --- a/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c +++ b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c @@ -22,25 +22,26 @@ #include "gic.h" #include "vgic.h" -static const uint64_t CVAL_MAX = ~0ULL; +/* Depends on counter width. */ +static u64 CVAL_MAX; /* tval is a signed 32-bit int. */ -static const int32_t TVAL_MAX = INT32_MAX; -static const int32_t TVAL_MIN = INT32_MIN; +static const s32 TVAL_MAX = INT32_MAX; +static const s32 TVAL_MIN = INT32_MIN; /* After how much time we say there is no IRQ. */ -static const uint32_t TIMEOUT_NO_IRQ_US = 50000; +static const u32 TIMEOUT_NO_IRQ_US = 50000; -/* A nice counter value to use as the starting one for most tests. */ -static const uint64_t DEF_CNT = (CVAL_MAX / 2); +/* Counter value to use as the starting one for most tests. Set to CVAL_MAX/2 */ +static u64 DEF_CNT; /* Number of runs. */ -static const uint32_t NR_TEST_ITERS_DEF = 5; +static const u32 NR_TEST_ITERS_DEF = 5; /* Default wait test time in ms. */ -static const uint32_t WAIT_TEST_MS = 10; +static const u32 WAIT_TEST_MS = 10; /* Default "long" wait test time in ms. */ -static const uint32_t LONG_WAIT_TEST_MS = 100; +static const u32 LONG_WAIT_TEST_MS = 100; /* Shared with IRQ handler. */ struct test_vcpu_shared_data { @@ -52,9 +53,9 @@ struct test_args { /* Virtual or physical timer and counter tests. */ enum arch_timer timer; /* Delay used for most timer tests. */ - uint64_t wait_ms; + u64 wait_ms; /* Delay used in the test_long_timer_delays test. */ - uint64_t long_wait_ms; + u64 long_wait_ms; /* Number of iterations. */ int iterations; /* Whether to test the physical timer. */ @@ -81,12 +82,12 @@ enum sync_cmd { NO_USERSPACE_CMD, }; -typedef void (*sleep_method_t)(enum arch_timer timer, uint64_t usec); +typedef void (*sleep_method_t)(enum arch_timer timer, u64 usec); -static void sleep_poll(enum arch_timer timer, uint64_t usec); -static void sleep_sched_poll(enum arch_timer timer, uint64_t usec); -static void sleep_in_userspace(enum arch_timer timer, uint64_t usec); -static void sleep_migrate(enum arch_timer timer, uint64_t usec); +static void sleep_poll(enum arch_timer timer, u64 usec); +static void sleep_sched_poll(enum arch_timer timer, u64 usec); +static void sleep_in_userspace(enum arch_timer timer, u64 usec); +static void sleep_migrate(enum arch_timer timer, u64 usec); sleep_method_t sleep_method[] = { sleep_poll, @@ -114,14 +115,14 @@ enum timer_view { TIMER_TVAL, }; -static void assert_irqs_handled(uint32_t n) +static void assert_irqs_handled(u32 n) { int h = atomic_read(&shared_data.handled); __GUEST_ASSERT(h == n, "Handled %d IRQS but expected %d", h, n); } -static void userspace_cmd(uint64_t cmd) +static void userspace_cmd(u64 cmd) { GUEST_SYNC_ARGS(cmd, 0, 0, 0, 0); } @@ -131,12 +132,12 @@ static void userspace_migrate_vcpu(void) userspace_cmd(USERSPACE_MIGRATE_SELF); } -static void userspace_sleep(uint64_t usecs) +static void userspace_sleep(u64 usecs) { GUEST_SYNC_ARGS(USERSPACE_USLEEP, usecs, 0, 0, 0); } -static void set_counter(enum arch_timer timer, uint64_t counter) +static void set_counter(enum arch_timer timer, u64 counter) { GUEST_SYNC_ARGS(SET_COUNTER_VALUE, counter, timer, 0, 0); } @@ -145,8 +146,8 @@ static void guest_irq_handler(struct ex_regs *regs) { unsigned int intid = gic_get_and_ack_irq(); enum arch_timer timer; - uint64_t cnt, cval; - uint32_t ctl; + u64 cnt, cval; + u32 ctl; bool timer_condition, istatus; if (intid == IAR_SPURIOUS) { @@ -177,8 +178,8 @@ out: gic_set_eoi(intid); } -static void set_cval_irq(enum arch_timer timer, uint64_t cval_cycles, - uint32_t ctl) +static void set_cval_irq(enum arch_timer timer, u64 cval_cycles, + u32 ctl) { atomic_set(&shared_data.handled, 0); atomic_set(&shared_data.spurious, 0); @@ -186,16 +187,16 @@ static void set_cval_irq(enum arch_timer timer, uint64_t cval_cycles, timer_set_ctl(timer, ctl); } -static void set_tval_irq(enum arch_timer timer, uint64_t tval_cycles, - uint32_t ctl) +static void set_tval_irq(enum arch_timer timer, u64 tval_cycles, + u32 ctl) { atomic_set(&shared_data.handled, 0); atomic_set(&shared_data.spurious, 0); - timer_set_ctl(timer, ctl); timer_set_tval(timer, tval_cycles); + timer_set_ctl(timer, ctl); } -static void set_xval_irq(enum arch_timer timer, uint64_t xval, uint32_t ctl, +static void set_xval_irq(enum arch_timer timer, u64 xval, u32 ctl, enum timer_view tv) { switch (tv) { @@ -274,13 +275,13 @@ static void wait_migrate_poll_for_irq(void) * Sleep for usec microseconds by polling in the guest or in * userspace (e.g. userspace_cmd=USERSPACE_SCHEDULE). */ -static void guest_poll(enum arch_timer test_timer, uint64_t usec, +static void guest_poll(enum arch_timer test_timer, u64 usec, enum sync_cmd usp_cmd) { - uint64_t cycles = usec_to_cycles(usec); + u64 cycles = usec_to_cycles(usec); /* Whichever timer we are testing with, sleep with the other. */ enum arch_timer sleep_timer = 1 - test_timer; - uint64_t start = timer_get_cntct(sleep_timer); + u64 start = timer_get_cntct(sleep_timer); while ((timer_get_cntct(sleep_timer) - start) < cycles) { if (usp_cmd == NO_USERSPACE_CMD) @@ -290,22 +291,22 @@ static void guest_poll(enum arch_timer test_timer, uint64_t usec, } } -static void sleep_poll(enum arch_timer timer, uint64_t usec) +static void sleep_poll(enum arch_timer timer, u64 usec) { guest_poll(timer, usec, NO_USERSPACE_CMD); } -static void sleep_sched_poll(enum arch_timer timer, uint64_t usec) +static void sleep_sched_poll(enum arch_timer timer, u64 usec) { guest_poll(timer, usec, USERSPACE_SCHED_YIELD); } -static void sleep_migrate(enum arch_timer timer, uint64_t usec) +static void sleep_migrate(enum arch_timer timer, u64 usec) { guest_poll(timer, usec, USERSPACE_MIGRATE_SELF); } -static void sleep_in_userspace(enum arch_timer timer, uint64_t usec) +static void sleep_in_userspace(enum arch_timer timer, u64 usec) { userspace_sleep(usec); } @@ -314,15 +315,15 @@ static void sleep_in_userspace(enum arch_timer timer, uint64_t usec) * Reset the timer state to some nice values like the counter not being close * to the edge, and the control register masked and disabled. */ -static void reset_timer_state(enum arch_timer timer, uint64_t cnt) +static void reset_timer_state(enum arch_timer timer, u64 cnt) { set_counter(timer, cnt); timer_set_ctl(timer, CTL_IMASK); } -static void test_timer_xval(enum arch_timer timer, uint64_t xval, +static void test_timer_xval(enum arch_timer timer, u64 xval, enum timer_view tv, irq_wait_method_t wm, bool reset_state, - uint64_t reset_cnt) + u64 reset_cnt) { local_irq_disable(); @@ -347,23 +348,23 @@ static void test_timer_xval(enum arch_timer timer, uint64_t xval, * the "runner", like: tools/testing/selftests/kselftest/runner.sh. */ -static void test_timer_cval(enum arch_timer timer, uint64_t cval, +static void test_timer_cval(enum arch_timer timer, u64 cval, irq_wait_method_t wm, bool reset_state, - uint64_t reset_cnt) + u64 reset_cnt) { test_timer_xval(timer, cval, TIMER_CVAL, wm, reset_state, reset_cnt); } -static void test_timer_tval(enum arch_timer timer, int32_t tval, +static void test_timer_tval(enum arch_timer timer, s32 tval, irq_wait_method_t wm, bool reset_state, - uint64_t reset_cnt) + u64 reset_cnt) { - test_timer_xval(timer, (uint64_t) tval, TIMER_TVAL, wm, reset_state, + test_timer_xval(timer, (u64)tval, TIMER_TVAL, wm, reset_state, reset_cnt); } -static void test_xval_check_no_irq(enum arch_timer timer, uint64_t xval, - uint64_t usec, enum timer_view timer_view, +static void test_xval_check_no_irq(enum arch_timer timer, u64 xval, + u64 usec, enum timer_view timer_view, sleep_method_t guest_sleep) { local_irq_disable(); @@ -378,17 +379,17 @@ static void test_xval_check_no_irq(enum arch_timer timer, uint64_t xval, assert_irqs_handled(0); } -static void test_cval_no_irq(enum arch_timer timer, uint64_t cval, - uint64_t usec, sleep_method_t wm) +static void test_cval_no_irq(enum arch_timer timer, u64 cval, + u64 usec, sleep_method_t wm) { test_xval_check_no_irq(timer, cval, usec, TIMER_CVAL, wm); } -static void test_tval_no_irq(enum arch_timer timer, int32_t tval, uint64_t usec, +static void test_tval_no_irq(enum arch_timer timer, s32 tval, u64 usec, sleep_method_t wm) { - /* tval will be cast to an int32_t in test_xval_check_no_irq */ - test_xval_check_no_irq(timer, (uint64_t) tval, usec, TIMER_TVAL, wm); + /* tval will be cast to an s32 in test_xval_check_no_irq */ + test_xval_check_no_irq(timer, (u64)tval, usec, TIMER_TVAL, wm); } /* Test masking/unmasking a timer using the timer mask (not the IRQ mask). */ @@ -462,7 +463,7 @@ static void test_timers_fired_multiple_times(enum arch_timer timer) * timeout for the wait: we use the wfi instruction. */ static void test_reprogramming_timer(enum arch_timer timer, irq_wait_method_t wm, - int32_t delta_1_ms, int32_t delta_2_ms) + s32 delta_1_ms, s32 delta_2_ms) { local_irq_disable(); reset_timer_state(timer, DEF_CNT); @@ -487,7 +488,7 @@ static void test_reprogramming_timer(enum arch_timer timer, irq_wait_method_t wm static void test_reprogram_timers(enum arch_timer timer) { int i; - uint64_t base_wait = test_args.wait_ms; + u64 base_wait = test_args.wait_ms; for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) { /* @@ -503,8 +504,8 @@ static void test_reprogram_timers(enum arch_timer timer) static void test_basic_functionality(enum arch_timer timer) { - int32_t tval = (int32_t) msec_to_cycles(test_args.wait_ms); - uint64_t cval = DEF_CNT + msec_to_cycles(test_args.wait_ms); + s32 tval = (s32)msec_to_cycles(test_args.wait_ms); + u64 cval = DEF_CNT + msec_to_cycles(test_args.wait_ms); int i; for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) { @@ -592,7 +593,7 @@ static void test_set_cnt_after_tval_max(enum arch_timer timer, irq_wait_method_t reset_timer_state(timer, DEF_CNT); set_cval_irq(timer, - (uint64_t) TVAL_MAX + + (u64)TVAL_MAX + msec_to_cycles(test_args.wait_ms) / 2, CTL_ENABLE); set_counter(timer, TVAL_MAX); @@ -607,7 +608,7 @@ static void test_set_cnt_after_tval_max(enum arch_timer timer, irq_wait_method_t /* Test timers set for: cval = now + TVAL_MAX + wait_ms / 2 */ static void test_timers_above_tval_max(enum arch_timer timer) { - uint64_t cval; + u64 cval; int i; /* @@ -637,8 +638,8 @@ static void test_timers_above_tval_max(enum arch_timer timer) * sets the counter to cnt_1, the [c|t]val, the counter to cnt_2, and * then waits for an IRQ. */ -static void test_set_cnt_after_xval(enum arch_timer timer, uint64_t cnt_1, - uint64_t xval, uint64_t cnt_2, +static void test_set_cnt_after_xval(enum arch_timer timer, u64 cnt_1, + u64 xval, u64 cnt_2, irq_wait_method_t wm, enum timer_view tv) { local_irq_disable(); @@ -661,8 +662,8 @@ static void test_set_cnt_after_xval(enum arch_timer timer, uint64_t cnt_1, * then waits for an IRQ. */ static void test_set_cnt_after_xval_no_irq(enum arch_timer timer, - uint64_t cnt_1, uint64_t xval, - uint64_t cnt_2, + u64 cnt_1, u64 xval, + u64 cnt_2, sleep_method_t guest_sleep, enum timer_view tv) { @@ -683,31 +684,31 @@ static void test_set_cnt_after_xval_no_irq(enum arch_timer timer, timer_set_ctl(timer, CTL_IMASK); } -static void test_set_cnt_after_tval(enum arch_timer timer, uint64_t cnt_1, - int32_t tval, uint64_t cnt_2, +static void test_set_cnt_after_tval(enum arch_timer timer, u64 cnt_1, + s32 tval, u64 cnt_2, irq_wait_method_t wm) { test_set_cnt_after_xval(timer, cnt_1, tval, cnt_2, wm, TIMER_TVAL); } -static void test_set_cnt_after_cval(enum arch_timer timer, uint64_t cnt_1, - uint64_t cval, uint64_t cnt_2, +static void test_set_cnt_after_cval(enum arch_timer timer, u64 cnt_1, + u64 cval, u64 cnt_2, irq_wait_method_t wm) { test_set_cnt_after_xval(timer, cnt_1, cval, cnt_2, wm, TIMER_CVAL); } static void test_set_cnt_after_tval_no_irq(enum arch_timer timer, - uint64_t cnt_1, int32_t tval, - uint64_t cnt_2, sleep_method_t wm) + u64 cnt_1, s32 tval, + u64 cnt_2, sleep_method_t wm) { test_set_cnt_after_xval_no_irq(timer, cnt_1, tval, cnt_2, wm, TIMER_TVAL); } static void test_set_cnt_after_cval_no_irq(enum arch_timer timer, - uint64_t cnt_1, uint64_t cval, - uint64_t cnt_2, sleep_method_t wm) + u64 cnt_1, u64 cval, + u64 cnt_2, sleep_method_t wm) { test_set_cnt_after_xval_no_irq(timer, cnt_1, cval, cnt_2, wm, TIMER_CVAL); @@ -717,7 +718,7 @@ static void test_set_cnt_after_cval_no_irq(enum arch_timer timer, static void test_move_counters_ahead_of_timers(enum arch_timer timer) { int i; - int32_t tval; + s32 tval; for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) { irq_wait_method_t wm = irq_wait_method[i]; @@ -729,14 +730,7 @@ static void test_move_counters_ahead_of_timers(enum arch_timer timer) test_set_cnt_after_tval(timer, 0, -1, DEF_CNT + 1, wm); test_set_cnt_after_tval(timer, 0, -1, TVAL_MAX, wm); tval = TVAL_MAX; - test_set_cnt_after_tval(timer, 0, tval, (uint64_t) tval + 1, - wm); - } - - for (i = 0; i < ARRAY_SIZE(sleep_method); i++) { - sleep_method_t sm = sleep_method[i]; - - test_set_cnt_after_cval_no_irq(timer, 0, DEF_CNT, CVAL_MAX, sm); + test_set_cnt_after_tval(timer, 0, tval, (u64)tval + 1, wm); } } @@ -759,8 +753,8 @@ static void test_move_counters_behind_timers(enum arch_timer timer) static void test_timers_in_the_past(enum arch_timer timer) { - int32_t tval = -1 * (int32_t) msec_to_cycles(test_args.wait_ms); - uint64_t cval; + s32 tval = -1 * (s32)msec_to_cycles(test_args.wait_ms); + u64 cval; int i; for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) { @@ -795,8 +789,8 @@ static void test_timers_in_the_past(enum arch_timer timer) static void test_long_timer_delays(enum arch_timer timer) { - int32_t tval = (int32_t) msec_to_cycles(test_args.long_wait_ms); - uint64_t cval = DEF_CNT + msec_to_cycles(test_args.long_wait_ms); + s32 tval = (s32)msec_to_cycles(test_args.long_wait_ms); + u64 cval = DEF_CNT + msec_to_cycles(test_args.long_wait_ms); int i; for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) { @@ -849,17 +843,17 @@ static void guest_code(enum arch_timer timer) GUEST_DONE(); } -static uint32_t next_pcpu(void) +static cpu_set_t default_cpuset; + +static u32 next_pcpu(void) { - uint32_t max = get_nprocs(); - uint32_t cur = sched_getcpu(); - uint32_t next = cur; - cpu_set_t cpuset; + u32 max = get_nprocs(); + u32 cur = sched_getcpu(); + u32 next = cur; + cpu_set_t cpuset = default_cpuset; TEST_ASSERT(max > 1, "Need at least two physical cpus"); - sched_getaffinity(0, sizeof(cpuset), &cpuset); - do { next = (next + 1) % CPU_SETSIZE; } while (!CPU_ISSET(next, &cpuset)); @@ -867,26 +861,7 @@ static uint32_t next_pcpu(void) return next; } -static void migrate_self(uint32_t new_pcpu) -{ - int ret; - cpu_set_t cpuset; - pthread_t thread; - - thread = pthread_self(); - - CPU_ZERO(&cpuset); - CPU_SET(new_pcpu, &cpuset); - - pr_debug("Migrating from %u to %u\n", sched_getcpu(), new_pcpu); - - ret = pthread_setaffinity_np(thread, sizeof(cpuset), &cpuset); - - TEST_ASSERT(ret == 0, "Failed to migrate to pCPU: %u; ret: %d\n", - new_pcpu, ret); -} - -static void kvm_set_cntxct(struct kvm_vcpu *vcpu, uint64_t cnt, +static void kvm_set_cntxct(struct kvm_vcpu *vcpu, u64 cnt, enum arch_timer timer) { if (timer == PHYSICAL) @@ -898,7 +873,7 @@ static void kvm_set_cntxct(struct kvm_vcpu *vcpu, uint64_t cnt, static void handle_sync(struct kvm_vcpu *vcpu, struct ucall *uc) { enum sync_cmd cmd = uc->args[1]; - uint64_t val = uc->args[2]; + u64 val = uc->args[2]; enum arch_timer timer = uc->args[3]; switch (cmd) { @@ -912,7 +887,7 @@ static void handle_sync(struct kvm_vcpu *vcpu, struct ucall *uc) sched_yield(); break; case USERSPACE_MIGRATE_SELF: - migrate_self(next_pcpu()); + pin_self_to_cpu(next_pcpu()); break; default: break; @@ -924,7 +899,7 @@ static void test_run(struct kvm_vm *vm, struct kvm_vcpu *vcpu) struct ucall uc; /* Start on CPU 0 */ - migrate_self(0); + pin_self_to_cpu(0); while (true) { vcpu_run(vcpu); @@ -948,10 +923,8 @@ static void test_run(struct kvm_vm *vm, struct kvm_vcpu *vcpu) static void test_init_timer_irq(struct kvm_vm *vm, struct kvm_vcpu *vcpu) { - vcpu_device_attr_get(vcpu, KVM_ARM_VCPU_TIMER_CTRL, - KVM_ARM_VCPU_TIMER_IRQ_PTIMER, &ptimer_irq); - vcpu_device_attr_get(vcpu, KVM_ARM_VCPU_TIMER_CTRL, - KVM_ARM_VCPU_TIMER_IRQ_VTIMER, &vtimer_irq); + ptimer_irq = vcpu_get_ptimer_irq(vcpu); + vtimer_irq = vcpu_get_vtimer_irq(vcpu); sync_global_to_guest(vm, ptimer_irq); sync_global_to_guest(vm, vtimer_irq); @@ -973,8 +946,15 @@ static void test_vm_create(struct kvm_vm **vm, struct kvm_vcpu **vcpu, vcpu_args_set(*vcpu, 1, timer); test_init_timer_irq(*vm, *vcpu); - vgic_v3_setup(*vm, 1, 64); + sync_global_to_guest(*vm, test_args); + sync_global_to_guest(*vm, CVAL_MAX); + sync_global_to_guest(*vm, DEF_CNT); +} + +static void test_vm_cleanup(struct kvm_vm *vm) +{ + kvm_vm_free(vm); } static void test_print_help(char *name) @@ -986,7 +966,7 @@ static void test_print_help(char *name) pr_info("\t-b: Test both physical and virtual timers (default: true)\n"); pr_info("\t-l: Delta (in ms) used for long wait time test (default: %u)\n", LONG_WAIT_TEST_MS); - pr_info("\t-l: Delta (in ms) used for wait times (default: %u)\n", + pr_info("\t-w: Delta (in ms) used for wait times (default: %u)\n", WAIT_TEST_MS); pr_info("\t-p: Test physical timer (default: true)\n"); pr_info("\t-v: Test virtual timer (default: true)\n"); @@ -1035,6 +1015,17 @@ static bool parse_args(int argc, char *argv[]) return false; } +static void set_counter_defaults(void) +{ + const u64 MIN_ROLLOVER_SECS = 40ULL * 365 * 24 * 3600; + u64 freq = read_sysreg(CNTFRQ_EL0); + int width = ilog2(MIN_ROLLOVER_SECS * freq); + + width = clamp(width, 56, 64); + CVAL_MAX = GENMASK_ULL(width - 1, 0); + DEF_CNT = CVAL_MAX / 2; +} + int main(int argc, char *argv[]) { struct kvm_vcpu *vcpu; @@ -1043,19 +1034,24 @@ int main(int argc, char *argv[]) /* Tell stdout not to buffer its content */ setbuf(stdout, NULL); + TEST_REQUIRE(kvm_supports_vgic_v3()); + if (!parse_args(argc, argv)) exit(KSFT_SKIP); + sched_getaffinity(0, sizeof(default_cpuset), &default_cpuset); + set_counter_defaults(); + if (test_args.test_virtual) { test_vm_create(&vm, &vcpu, VIRTUAL); test_run(vm, vcpu); - kvm_vm_free(vm); + test_vm_cleanup(vm); } if (test_args.test_physical) { test_vm_create(&vm, &vcpu, PHYSICAL); test_run(vm, vcpu); - kvm_vm_free(vm); + test_vm_cleanup(vm); } return 0; diff --git a/tools/testing/selftests/kvm/arm64/at.c b/tools/testing/selftests/kvm/arm64/at.c new file mode 100644 index 000000000000..ce5d312ef6ba --- /dev/null +++ b/tools/testing/selftests/kvm/arm64/at.c @@ -0,0 +1,156 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * at - Test for KVM's AT emulation in the EL2&0 and EL1&0 translation regimes. + */ +#include "kvm_util.h" +#include "processor.h" +#include "test_util.h" +#include "ucall.h" + +#include <asm/sysreg.h> + +#define TEST_ADDR 0x80000000 + +enum { + CLEAR_ACCESS_FLAG, +}; + +static u64 *ptep_hva; + +#define copy_el2_to_el1(reg) \ + write_sysreg_s(read_sysreg_s(SYS_##reg##_EL1), SYS_##reg##_EL12) + +/* Yes, this is an ugly hack */ +#define __at(op, addr) write_sysreg_s(addr, op) + +#define test_at_insn(op, expect_fault) \ +do { \ + u64 par, fsc; \ + bool fault; \ + \ + GUEST_SYNC(CLEAR_ACCESS_FLAG); \ + \ + __at(OP_AT_##op, TEST_ADDR); \ + isb(); \ + par = read_sysreg(par_el1); \ + \ + fault = par & SYS_PAR_EL1_F; \ + fsc = FIELD_GET(SYS_PAR_EL1_FST, par); \ + \ + __GUEST_ASSERT((expect_fault) == fault, \ + "AT "#op": %sexpected fault (par: %lx)1", \ + (expect_fault) ? "" : "un", par); \ + if ((expect_fault)) { \ + __GUEST_ASSERT(fsc == ESR_ELx_FSC_ACCESS_L(3), \ + "AT "#op": expected access flag fault (par: %lx)", \ + par); \ + } else { \ + GUEST_ASSERT_EQ(FIELD_GET(SYS_PAR_EL1_ATTR, par), MAIR_ATTR_NORMAL); \ + GUEST_ASSERT_EQ(FIELD_GET(SYS_PAR_EL1_SH, par), PTE_SHARED >> 8); \ + GUEST_ASSERT_EQ(par & SYS_PAR_EL1_PA, TEST_ADDR); \ + } \ +} while (0) + +static void test_at(bool expect_fault) +{ + test_at_insn(S1E2R, expect_fault); + test_at_insn(S1E2W, expect_fault); + + /* Reuse the stage-1 MMU context from EL2 at EL1 */ + copy_el2_to_el1(SCTLR); + copy_el2_to_el1(MAIR); + copy_el2_to_el1(TCR); + copy_el2_to_el1(TTBR0); + copy_el2_to_el1(TTBR1); + + /* Disable stage-2 translation and enter a non-host context */ + write_sysreg(0, vtcr_el2); + write_sysreg(0, vttbr_el2); + sysreg_clear_set(hcr_el2, HCR_EL2_TGE | HCR_EL2_VM, 0); + isb(); + + test_at_insn(S1E1R, expect_fault); + test_at_insn(S1E1W, expect_fault); +} + +static void guest_code(void) +{ + sysreg_clear_set(tcr_el1, TCR_HA, 0); + isb(); + + test_at(true); + + if (!SYS_FIELD_GET(ID_AA64MMFR1_EL1, HAFDBS, read_sysreg(id_aa64mmfr1_el1))) + GUEST_DONE(); + + sysreg_clear_set(tcr_el1, 0, TCR_HA); + isb(); + test_at(false); + + GUEST_DONE(); +} + +static void handle_sync(struct kvm_vcpu *vcpu, struct ucall *uc) +{ + switch (uc->args[1]) { + case CLEAR_ACCESS_FLAG: + /* + * Delete + reinstall the memslot to invalidate stage-2 + * mappings of the stage-1 page tables, allowing KVM to + * potentially use the 'slow' AT emulation path. + * + * This and clearing the access flag from host userspace + * ensures that the access flag cannot be set speculatively + * and is reliably cleared at the time of the AT instruction. + */ + clear_bit(__ffs(PTE_AF), ptep_hva); + vm_mem_region_reload(vcpu->vm, vcpu->vm->memslots[MEM_REGION_PT]); + break; + default: + TEST_FAIL("Unexpected SYNC arg: %lu", uc->args[1]); + } +} + +static void run_test(struct kvm_vcpu *vcpu) +{ + struct ucall uc; + + while (true) { + vcpu_run(vcpu); + switch (get_ucall(vcpu, &uc)) { + case UCALL_DONE: + return; + case UCALL_SYNC: + handle_sync(vcpu, &uc); + continue; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + return; + default: + TEST_FAIL("Unexpected ucall: %lu", uc.cmd); + } + } +} + +int main(void) +{ + struct kvm_vcpu_init init; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + TEST_REQUIRE(kvm_check_cap(KVM_CAP_ARM_EL2)); + + vm = vm_create(1); + + kvm_get_default_vcpu_target(vm, &init); + init.features[0] |= BIT(KVM_ARM_VCPU_HAS_EL2); + vcpu = aarch64_vcpu_add(vm, 0, &init, guest_code); + kvm_arch_vm_finalize_vcpus(vm); + + virt_map(vm, TEST_ADDR, TEST_ADDR, 1); + ptep_hva = virt_get_pte_hva_at_level(vm, TEST_ADDR, 3); + run_test(vcpu); + + kvm_vm_free(vm); + return 0; +} diff --git a/tools/testing/selftests/kvm/arm64/debug-exceptions.c b/tools/testing/selftests/kvm/arm64/debug-exceptions.c index c7fb55c9135b..3eb4b1b6682d 100644 --- a/tools/testing/selftests/kvm/arm64/debug-exceptions.c +++ b/tools/testing/selftests/kvm/arm64/debug-exceptions.c @@ -31,14 +31,14 @@ extern unsigned char sw_bp, sw_bp2, hw_bp, hw_bp2, bp_svc, bp_brk, hw_wp, ss_start, hw_bp_ctx; extern unsigned char iter_ss_begin, iter_ss_end; -static volatile uint64_t sw_bp_addr, hw_bp_addr; -static volatile uint64_t wp_addr, wp_data_addr; -static volatile uint64_t svc_addr; -static volatile uint64_t ss_addr[4], ss_idx; -#define PC(v) ((uint64_t)&(v)) +static volatile u64 sw_bp_addr, hw_bp_addr; +static volatile u64 wp_addr, wp_data_addr; +static volatile u64 svc_addr; +static volatile u64 ss_addr[4], ss_idx; +#define PC(v) ((u64)&(v)) #define GEN_DEBUG_WRITE_REG(reg_name) \ -static void write_##reg_name(int num, uint64_t val) \ +static void write_##reg_name(int num, u64 val) \ { \ switch (num) { \ case 0: \ @@ -102,8 +102,8 @@ GEN_DEBUG_WRITE_REG(dbgwvr) static void reset_debug_state(void) { - uint8_t brps, wrps, i; - uint64_t dfr0; + u8 brps, wrps, i; + u64 dfr0; asm volatile("msr daifset, #8"); @@ -116,12 +116,12 @@ static void reset_debug_state(void) /* Reset all bcr/bvr/wcr/wvr registers */ dfr0 = read_sysreg(id_aa64dfr0_el1); - brps = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_BRPs), dfr0); + brps = FIELD_GET(ID_AA64DFR0_EL1_BRPs, dfr0); for (i = 0; i <= brps; i++) { write_dbgbcr(i, 0); write_dbgbvr(i, 0); } - wrps = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_WRPs), dfr0); + wrps = FIELD_GET(ID_AA64DFR0_EL1_WRPs, dfr0); for (i = 0; i <= wrps; i++) { write_dbgwcr(i, 0); write_dbgwvr(i, 0); @@ -140,7 +140,7 @@ static void enable_os_lock(void) static void enable_monitor_debug_exceptions(void) { - uint32_t mdscr; + u64 mdscr; asm volatile("msr daifclr, #8"); @@ -149,9 +149,9 @@ static void enable_monitor_debug_exceptions(void) isb(); } -static void install_wp(uint8_t wpn, uint64_t addr) +static void install_wp(u8 wpn, u64 addr) { - uint32_t wcr; + u32 wcr; wcr = DBGWCR_LEN8 | DBGWCR_RD | DBGWCR_WR | DBGWCR_EL1 | DBGWCR_E; write_dbgwcr(wpn, wcr); @@ -162,9 +162,9 @@ static void install_wp(uint8_t wpn, uint64_t addr) enable_monitor_debug_exceptions(); } -static void install_hw_bp(uint8_t bpn, uint64_t addr) +static void install_hw_bp(u8 bpn, u64 addr) { - uint32_t bcr; + u32 bcr; bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E; write_dbgbcr(bpn, bcr); @@ -174,11 +174,10 @@ static void install_hw_bp(uint8_t bpn, uint64_t addr) enable_monitor_debug_exceptions(); } -static void install_wp_ctx(uint8_t addr_wp, uint8_t ctx_bp, uint64_t addr, - uint64_t ctx) +static void install_wp_ctx(u8 addr_wp, u8 ctx_bp, u64 addr, u64 ctx) { - uint32_t wcr; - uint64_t ctx_bcr; + u32 wcr; + u64 ctx_bcr; /* Setup a context-aware breakpoint for Linked Context ID Match */ ctx_bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E | @@ -188,7 +187,7 @@ static void install_wp_ctx(uint8_t addr_wp, uint8_t ctx_bp, uint64_t addr, /* Setup a linked watchpoint (linked to the context-aware breakpoint) */ wcr = DBGWCR_LEN8 | DBGWCR_RD | DBGWCR_WR | DBGWCR_EL1 | DBGWCR_E | - DBGWCR_WT_LINK | ((uint32_t)ctx_bp << DBGWCR_LBN_SHIFT); + DBGWCR_WT_LINK | ((u32)ctx_bp << DBGWCR_LBN_SHIFT); write_dbgwcr(addr_wp, wcr); write_dbgwvr(addr_wp, addr); isb(); @@ -196,10 +195,9 @@ static void install_wp_ctx(uint8_t addr_wp, uint8_t ctx_bp, uint64_t addr, enable_monitor_debug_exceptions(); } -void install_hw_bp_ctx(uint8_t addr_bp, uint8_t ctx_bp, uint64_t addr, - uint64_t ctx) +void install_hw_bp_ctx(u8 addr_bp, u8 ctx_bp, u64 addr, u64 ctx) { - uint32_t addr_bcr, ctx_bcr; + u32 addr_bcr, ctx_bcr; /* Setup a context-aware breakpoint for Linked Context ID Match */ ctx_bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E | @@ -213,7 +211,7 @@ void install_hw_bp_ctx(uint8_t addr_bp, uint8_t ctx_bp, uint64_t addr, */ addr_bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E | DBGBCR_BT_ADDR_LINK_CTX | - ((uint32_t)ctx_bp << DBGBCR_LBN_SHIFT); + ((u32)ctx_bp << DBGBCR_LBN_SHIFT); write_dbgbcr(addr_bp, addr_bcr); write_dbgbvr(addr_bp, addr); isb(); @@ -223,7 +221,7 @@ void install_hw_bp_ctx(uint8_t addr_bp, uint8_t ctx_bp, uint64_t addr, static void install_ss(void) { - uint32_t mdscr; + u64 mdscr; asm volatile("msr daifclr, #8"); @@ -234,9 +232,9 @@ static void install_ss(void) static volatile char write_data; -static void guest_code(uint8_t bpn, uint8_t wpn, uint8_t ctx_bpn) +static void guest_code(u8 bpn, u8 wpn, u8 ctx_bpn) { - uint64_t ctx = 0xabcdef; /* a random context number */ + u64 ctx = 0xabcdef; /* a random context number */ /* Software-breakpoint */ reset_debug_state(); @@ -377,8 +375,8 @@ static void guest_svc_handler(struct ex_regs *regs) static void guest_code_ss(int test_cnt) { - uint64_t i; - uint64_t bvr, wvr, w_bvr, w_wvr; + u64 i; + u64 bvr, wvr, w_bvr, w_wvr; for (i = 0; i < test_cnt; i++) { /* Bits [1:0] of dbg{b,w}vr are RES0 */ @@ -416,12 +414,12 @@ static void guest_code_ss(int test_cnt) GUEST_DONE(); } -static int debug_version(uint64_t id_aa64dfr0) +static int debug_version(u64 id_aa64dfr0) { - return FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DebugVer), id_aa64dfr0); + return FIELD_GET(ID_AA64DFR0_EL1_DebugVer, id_aa64dfr0); } -static void test_guest_debug_exceptions(uint8_t bpn, uint8_t wpn, uint8_t ctx_bpn) +static void test_guest_debug_exceptions(u8 bpn, u8 wpn, u8 ctx_bpn) { struct kvm_vcpu *vcpu; struct kvm_vm *vm; @@ -468,8 +466,8 @@ void test_single_step_from_userspace(int test_cnt) struct kvm_vm *vm; struct ucall uc; struct kvm_run *run; - uint64_t pc, cmd; - uint64_t test_pc = 0; + u64 pc, cmd; + u64 test_pc = 0; bool ss_enable = false; struct kvm_guest_debug debug = {}; @@ -506,7 +504,7 @@ void test_single_step_from_userspace(int test_cnt) "Unexpected pc 0x%lx (expected 0x%lx)", pc, test_pc); - if ((pc + 4) == (uint64_t)&iter_ss_end) { + if ((pc + 4) == (u64)&iter_ss_end) { test_pc = 0; debug.control = KVM_GUESTDBG_ENABLE; ss_enable = false; @@ -519,8 +517,8 @@ void test_single_step_from_userspace(int test_cnt) * iter_ss_end, the pc for the next KVM_EXIT_DEBUG should * be the current pc + 4. */ - if ((pc >= (uint64_t)&iter_ss_begin) && - (pc < (uint64_t)&iter_ss_end)) + if ((pc >= (u64)&iter_ss_begin) && + (pc < (u64)&iter_ss_end)) test_pc = pc + 4; else test_pc = 0; @@ -533,20 +531,20 @@ void test_single_step_from_userspace(int test_cnt) * Run debug testing using the various breakpoint#, watchpoint# and * context-aware breakpoint# with the given ID_AA64DFR0_EL1 configuration. */ -void test_guest_debug_exceptions_all(uint64_t aa64dfr0) +void test_guest_debug_exceptions_all(u64 aa64dfr0) { - uint8_t brp_num, wrp_num, ctx_brp_num, normal_brp_num, ctx_brp_base; + u8 brp_num, wrp_num, ctx_brp_num, normal_brp_num, ctx_brp_base; int b, w, c; /* Number of breakpoints */ - brp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_BRPs), aa64dfr0) + 1; + brp_num = FIELD_GET(ID_AA64DFR0_EL1_BRPs, aa64dfr0) + 1; __TEST_REQUIRE(brp_num >= 2, "At least two breakpoints are required"); /* Number of watchpoints */ - wrp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_WRPs), aa64dfr0) + 1; + wrp_num = FIELD_GET(ID_AA64DFR0_EL1_WRPs, aa64dfr0) + 1; /* Number of context aware breakpoints */ - ctx_brp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_CTX_CMPs), aa64dfr0) + 1; + ctx_brp_num = FIELD_GET(ID_AA64DFR0_EL1_CTX_CMPs, aa64dfr0) + 1; pr_debug("%s brp_num:%d, wrp_num:%d, ctx_brp_num:%d\n", __func__, brp_num, wrp_num, ctx_brp_num); @@ -580,7 +578,7 @@ int main(int argc, char *argv[]) struct kvm_vm *vm; int opt; int ss_iteration = 10000; - uint64_t aa64dfr0; + u64 aa64dfr0; vm = vm_create_with_one_vcpu(&vcpu, guest_code); aa64dfr0 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1)); diff --git a/tools/testing/selftests/kvm/arm64/external_aborts.c b/tools/testing/selftests/kvm/arm64/external_aborts.c new file mode 100644 index 000000000000..d8fe17a6cc59 --- /dev/null +++ b/tools/testing/selftests/kvm/arm64/external_aborts.c @@ -0,0 +1,415 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * external_abort - Tests for userspace external abort injection + * + * Copyright (c) 2024 Google LLC + */ +#include "processor.h" +#include "test_util.h" + +#define MMIO_ADDR 0x8000000ULL +#define EXPECTED_SERROR_ISS (ESR_ELx_ISV | 0x1d1ed) + +static u64 expected_abort_pc; + +static void expect_sea_handler(struct ex_regs *regs) +{ + u64 esr = read_sysreg(esr_el1); + + GUEST_ASSERT_EQ(regs->pc, expected_abort_pc); + GUEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_DABT_CUR); + GUEST_ASSERT_EQ(esr & ESR_ELx_FSC_TYPE, ESR_ELx_FSC_EXTABT); + + GUEST_DONE(); +} + +static void unexpected_dabt_handler(struct ex_regs *regs) +{ + GUEST_FAIL("Unexpected data abort at PC: %lx\n", regs->pc); +} + +static struct kvm_vm *vm_create_with_dabt_handler(struct kvm_vcpu **vcpu, void *guest_code, + handler_fn dabt_handler) +{ + struct kvm_vm *vm = vm_create_with_one_vcpu(vcpu, guest_code); + + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(*vcpu); + vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, ESR_ELx_EC_DABT_CUR, dabt_handler); + + virt_map(vm, MMIO_ADDR, MMIO_ADDR, 1); + + return vm; +} + +static void vcpu_inject_sea(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_events events = {}; + + events.exception.ext_dabt_pending = true; + vcpu_events_set(vcpu, &events); +} + +static bool vcpu_has_ras(struct kvm_vcpu *vcpu) +{ + u64 pfr0 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1)); + + return SYS_FIELD_GET(ID_AA64PFR0_EL1, RAS, pfr0); +} + +static bool guest_has_ras(void) +{ + return SYS_FIELD_GET(ID_AA64PFR0_EL1, RAS, read_sysreg(id_aa64pfr0_el1)); +} + +static void vcpu_inject_serror(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_events events = {}; + + events.exception.serror_pending = true; + if (vcpu_has_ras(vcpu)) { + events.exception.serror_has_esr = true; + events.exception.serror_esr = EXPECTED_SERROR_ISS; + } + + vcpu_events_set(vcpu, &events); +} + +static void __vcpu_run_expect(struct kvm_vcpu *vcpu, unsigned int cmd) +{ + struct ucall uc; + + vcpu_run(vcpu); + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + default: + if (uc.cmd == cmd) + return; + + TEST_FAIL("Unexpected ucall: %lu", uc.cmd); + } +} + +static void vcpu_run_expect_done(struct kvm_vcpu *vcpu) +{ + __vcpu_run_expect(vcpu, UCALL_DONE); +} + +static void vcpu_run_expect_sync(struct kvm_vcpu *vcpu) +{ + __vcpu_run_expect(vcpu, UCALL_SYNC); +} + +extern char test_mmio_abort_insn; + +static noinline void test_mmio_abort_guest(void) +{ + WRITE_ONCE(expected_abort_pc, (u64)&test_mmio_abort_insn); + + asm volatile("test_mmio_abort_insn:\n\t" + "ldr x0, [%0]\n\t" + : : "r" (MMIO_ADDR) : "x0", "memory"); + + GUEST_FAIL("MMIO instruction should not retire"); +} + +/* + * Test that KVM doesn't complete MMIO emulation when userspace has made an + * external abort pending for the instruction. + */ +static void test_mmio_abort(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_abort_guest, + expect_sea_handler); + struct kvm_run *run = vcpu->run; + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_MMIO); + TEST_ASSERT_EQ(run->mmio.phys_addr, MMIO_ADDR); + TEST_ASSERT_EQ(run->mmio.len, sizeof(unsigned long)); + TEST_ASSERT(!run->mmio.is_write, "Expected MMIO read"); + + vcpu_inject_sea(vcpu); + vcpu_run_expect_done(vcpu); + kvm_vm_free(vm); +} + +extern char test_mmio_nisv_insn; + +static void test_mmio_nisv_guest(void) +{ + WRITE_ONCE(expected_abort_pc, (u64)&test_mmio_nisv_insn); + + asm volatile("test_mmio_nisv_insn:\n\t" + "ldr x0, [%0], #8\n\t" + : : "r" (MMIO_ADDR) : "x0", "memory"); + + GUEST_FAIL("MMIO instruction should not retire"); +} + +/* + * Test that the KVM_RUN ioctl fails for ESR_EL2.ISV=0 MMIO aborts if userspace + * hasn't enabled KVM_CAP_ARM_NISV_TO_USER. + */ +static void test_mmio_nisv(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_nisv_guest, + unexpected_dabt_handler); + + TEST_ASSERT(_vcpu_run(vcpu), "Expected nonzero return code from KVM_RUN"); + TEST_ASSERT_EQ(errno, ENOSYS); + + kvm_vm_free(vm); +} + +/* + * Test that ESR_EL2.ISV=0 MMIO aborts reach userspace and that an injected SEA + * reaches the guest. + */ +static void test_mmio_nisv_abort(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_nisv_guest, + expect_sea_handler); + struct kvm_run *run = vcpu->run; + + vm_enable_cap(vm, KVM_CAP_ARM_NISV_TO_USER, 1); + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_ARM_NISV); + TEST_ASSERT_EQ(run->arm_nisv.fault_ipa, MMIO_ADDR); + + vcpu_inject_sea(vcpu); + vcpu_run_expect_done(vcpu); + kvm_vm_free(vm); +} + +static void unexpected_serror_handler(struct ex_regs *regs) +{ + GUEST_FAIL("Took unexpected SError exception"); +} + +static void test_serror_masked_guest(void) +{ + GUEST_ASSERT(read_sysreg(isr_el1) & ISR_EL1_A); + + isb(); + + GUEST_DONE(); +} + +static void test_serror_masked(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_serror_masked_guest, + unexpected_dabt_handler); + + vm_install_exception_handler(vm, VECTOR_ERROR_CURRENT, unexpected_serror_handler); + + vcpu_inject_serror(vcpu); + vcpu_run_expect_done(vcpu); + kvm_vm_free(vm); +} + +static void expect_serror_handler(struct ex_regs *regs) +{ + u64 esr = read_sysreg(esr_el1); + + GUEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_SERROR); + if (guest_has_ras()) + GUEST_ASSERT_EQ(ESR_ELx_ISS(esr), EXPECTED_SERROR_ISS); + + GUEST_DONE(); +} + +static void test_serror_guest(void) +{ + GUEST_ASSERT(read_sysreg(isr_el1) & ISR_EL1_A); + + local_serror_enable(); + isb(); + local_serror_disable(); + + GUEST_FAIL("Should've taken pending SError exception"); +} + +static void test_serror(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_serror_guest, + unexpected_dabt_handler); + + vm_install_exception_handler(vm, VECTOR_ERROR_CURRENT, expect_serror_handler); + + vcpu_inject_serror(vcpu); + vcpu_run_expect_done(vcpu); + kvm_vm_free(vm); +} + +static void expect_sea_s1ptw_handler(struct ex_regs *regs) +{ + u64 esr = read_sysreg(esr_el1); + + GUEST_ASSERT_EQ(regs->pc, expected_abort_pc); + GUEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_DABT_CUR); + GUEST_ASSERT_EQ((esr & ESR_ELx_FSC), ESR_ELx_FSC_SEA_TTW(3)); + + GUEST_DONE(); +} + +static noinline void test_s1ptw_abort_guest(void) +{ + extern char test_s1ptw_abort_insn; + + WRITE_ONCE(expected_abort_pc, (u64)&test_s1ptw_abort_insn); + + asm volatile("test_s1ptw_abort_insn:\n\t" + "ldr x0, [%0]\n\t" + : : "r" (MMIO_ADDR) : "x0", "memory"); + + GUEST_FAIL("Load on S1PTW abort should not retire"); +} + +static void test_s1ptw_abort(void) +{ + struct kvm_vcpu *vcpu; + u64 *ptep, bad_pa; + struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_s1ptw_abort_guest, + expect_sea_s1ptw_handler); + + ptep = virt_get_pte_hva_at_level(vm, MMIO_ADDR, 2); + bad_pa = BIT(vm->pa_bits) - vm->page_size; + + *ptep &= ~GENMASK(47, 12); + *ptep |= bad_pa; + + vcpu_run_expect_done(vcpu); + kvm_vm_free(vm); +} + +static void test_serror_emulated_guest(void) +{ + GUEST_ASSERT(!(read_sysreg(isr_el1) & ISR_EL1_A)); + + local_serror_enable(); + GUEST_SYNC(0); + local_serror_disable(); + + GUEST_FAIL("Should've taken unmasked SError exception"); +} + +static void test_serror_emulated(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_serror_emulated_guest, + unexpected_dabt_handler); + + vm_install_exception_handler(vm, VECTOR_ERROR_CURRENT, expect_serror_handler); + + vcpu_run_expect_sync(vcpu); + vcpu_inject_serror(vcpu); + vcpu_run_expect_done(vcpu); + kvm_vm_free(vm); +} + +static void test_mmio_ease_guest(void) +{ + sysreg_clear_set_s(SYS_SCTLR2_EL1, 0, SCTLR2_EL1_EASE); + isb(); + + test_mmio_abort_guest(); +} + +/* + * Test that KVM doesn't complete MMIO emulation when userspace has made an + * external abort pending for the instruction. + */ +static void test_mmio_ease(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_ease_guest, + unexpected_dabt_handler); + struct kvm_run *run = vcpu->run; + u64 pfr1; + + pfr1 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1)); + if (!SYS_FIELD_GET(ID_AA64PFR1_EL1, DF2, pfr1)) { + pr_debug("Skipping %s\n", __func__); + return; + } + + /* + * SCTLR2_ELx.EASE changes the exception vector to the SError vector but + * doesn't further modify the exception context (e.g. ESR_ELx, FAR_ELx). + */ + vm_install_exception_handler(vm, VECTOR_ERROR_CURRENT, expect_sea_handler); + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_MMIO); + TEST_ASSERT_EQ(run->mmio.phys_addr, MMIO_ADDR); + TEST_ASSERT_EQ(run->mmio.len, sizeof(unsigned long)); + TEST_ASSERT(!run->mmio.is_write, "Expected MMIO read"); + + vcpu_inject_sea(vcpu); + vcpu_run_expect_done(vcpu); + kvm_vm_free(vm); +} + +static void test_serror_amo_guest(void) +{ + /* + * The ISB is entirely unnecessary (and highlights how FEAT_NV2 is borked) + * since the write is redirected to memory. But don't write (intentionally) + * broken code! + */ + sysreg_clear_set(hcr_el2, HCR_EL2_AMO | HCR_EL2_TGE, 0); + isb(); + + GUEST_SYNC(0); + GUEST_ASSERT(read_sysreg(isr_el1) & ISR_EL1_A); + + /* + * KVM treats the effective value of AMO as 1 when + * HCR_EL2.{E2H,TGE} = {1, 0}, meaning the SError will be taken when + * unmasked. + */ + local_serror_enable(); + isb(); + local_serror_disable(); + + GUEST_FAIL("Should've taken pending SError exception"); +} + +static void test_serror_amo(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_serror_amo_guest, + unexpected_dabt_handler); + + vm_install_exception_handler(vm, VECTOR_ERROR_CURRENT, expect_serror_handler); + vcpu_run_expect_sync(vcpu); + vcpu_inject_serror(vcpu); + vcpu_run_expect_done(vcpu); + kvm_vm_free(vm); +} + +int main(void) +{ + test_mmio_abort(); + test_mmio_nisv(); + test_mmio_nisv_abort(); + test_serror(); + test_serror_masked(); + test_serror_emulated(); + test_mmio_ease(); + test_s1ptw_abort(); + + if (!test_supports_el2()) + return 0; + + test_serror_amo(); +} diff --git a/tools/testing/selftests/kvm/arm64/get-reg-list.c b/tools/testing/selftests/kvm/arm64/get-reg-list.c index d43fb3f49050..0a3a94c4cca1 100644 --- a/tools/testing/selftests/kvm/arm64/get-reg-list.c +++ b/tools/testing/selftests/kvm/arm64/get-reg-list.c @@ -15,6 +15,12 @@ #include "test_util.h" #include "processor.h" +#define SYS_REG(r) ARM64_SYS_REG(sys_reg_Op0(SYS_ ## r), \ + sys_reg_Op1(SYS_ ## r), \ + sys_reg_CRn(SYS_ ## r), \ + sys_reg_CRm(SYS_ ## r), \ + sys_reg_Op2(SYS_ ## r)) + struct feature_id_reg { __u64 reg; __u64 id_reg; @@ -22,37 +28,48 @@ struct feature_id_reg { __u64 feat_min; }; -static struct feature_id_reg feat_id_regs[] = { - { - ARM64_SYS_REG(3, 0, 2, 0, 3), /* TCR2_EL1 */ - ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */ - 0, - 1 - }, - { - ARM64_SYS_REG(3, 0, 10, 2, 2), /* PIRE0_EL1 */ - ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */ - 8, - 1 - }, - { - ARM64_SYS_REG(3, 0, 10, 2, 3), /* PIR_EL1 */ - ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */ - 8, - 1 - }, - { - ARM64_SYS_REG(3, 0, 10, 2, 4), /* POR_EL1 */ - ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */ - 16, - 1 - }, - { - ARM64_SYS_REG(3, 3, 10, 2, 4), /* POR_EL0 */ - ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */ - 16, - 1 +#define FEAT(id, f, v) \ + .id_reg = SYS_REG(id), \ + .feat_shift = id ## _ ## f ## _SHIFT, \ + .feat_min = id ## _ ## f ## _ ## v + +#define REG_FEAT(r, id, f, v) \ + { \ + .reg = SYS_REG(r), \ + FEAT(id, f, v) \ } + +static struct feature_id_reg feat_id_regs[] = { + REG_FEAT(TCR2_EL1, ID_AA64MMFR3_EL1, TCRX, IMP), + REG_FEAT(TCR2_EL2, ID_AA64MMFR3_EL1, TCRX, IMP), + REG_FEAT(PIRE0_EL1, ID_AA64MMFR3_EL1, S1PIE, IMP), + REG_FEAT(PIRE0_EL2, ID_AA64MMFR3_EL1, S1PIE, IMP), + REG_FEAT(PIR_EL1, ID_AA64MMFR3_EL1, S1PIE, IMP), + REG_FEAT(PIR_EL2, ID_AA64MMFR3_EL1, S1PIE, IMP), + REG_FEAT(POR_EL1, ID_AA64MMFR3_EL1, S1POE, IMP), + REG_FEAT(POR_EL0, ID_AA64MMFR3_EL1, S1POE, IMP), + REG_FEAT(POR_EL2, ID_AA64MMFR3_EL1, S1POE, IMP), + REG_FEAT(HCRX_EL2, ID_AA64MMFR1_EL1, HCX, IMP), + REG_FEAT(HFGRTR_EL2, ID_AA64MMFR0_EL1, FGT, IMP), + REG_FEAT(HFGWTR_EL2, ID_AA64MMFR0_EL1, FGT, IMP), + REG_FEAT(HFGITR_EL2, ID_AA64MMFR0_EL1, FGT, IMP), + REG_FEAT(HDFGRTR_EL2, ID_AA64MMFR0_EL1, FGT, IMP), + REG_FEAT(HDFGWTR_EL2, ID_AA64MMFR0_EL1, FGT, IMP), + REG_FEAT(HAFGRTR_EL2, ID_AA64MMFR0_EL1, FGT, IMP), + REG_FEAT(HFGRTR2_EL2, ID_AA64MMFR0_EL1, FGT, FGT2), + REG_FEAT(HFGWTR2_EL2, ID_AA64MMFR0_EL1, FGT, FGT2), + REG_FEAT(HFGITR2_EL2, ID_AA64MMFR0_EL1, FGT, FGT2), + REG_FEAT(HDFGRTR2_EL2, ID_AA64MMFR0_EL1, FGT, FGT2), + REG_FEAT(HDFGWTR2_EL2, ID_AA64MMFR0_EL1, FGT, FGT2), + REG_FEAT(ZCR_EL2, ID_AA64PFR0_EL1, SVE, IMP), + REG_FEAT(SCTLR2_EL1, ID_AA64MMFR3_EL1, SCTLRX, IMP), + REG_FEAT(SCTLR2_EL2, ID_AA64MMFR3_EL1, SCTLRX, IMP), + REG_FEAT(VDISR_EL2, ID_AA64PFR0_EL1, RAS, IMP), + REG_FEAT(VSESR_EL2, ID_AA64PFR0_EL1, RAS, IMP), + REG_FEAT(VNCR_EL2, ID_AA64MMFR4_EL1, NV_frac, NV2_ONLY), + REG_FEAT(CNTHV_CTL_EL2, ID_AA64MMFR1_EL1, VH, IMP), + REG_FEAT(CNTHV_CVAL_EL2,ID_AA64MMFR1_EL1, VH, IMP), + REG_FEAT(ZCR_EL2, ID_AA64PFR0_EL1, SVE, IMP), }; bool filter_reg(__u64 reg) @@ -332,9 +349,21 @@ static __u64 base_regs[] = { KVM_REG_ARM_FW_FEAT_BMAP_REG(0), /* KVM_REG_ARM_STD_BMAP */ KVM_REG_ARM_FW_FEAT_BMAP_REG(1), /* KVM_REG_ARM_STD_HYP_BMAP */ KVM_REG_ARM_FW_FEAT_BMAP_REG(2), /* KVM_REG_ARM_VENDOR_HYP_BMAP */ - ARM64_SYS_REG(3, 3, 14, 3, 1), /* CNTV_CTL_EL0 */ - ARM64_SYS_REG(3, 3, 14, 3, 2), /* CNTV_CVAL_EL0 */ - ARM64_SYS_REG(3, 3, 14, 0, 2), + KVM_REG_ARM_FW_FEAT_BMAP_REG(3), /* KVM_REG_ARM_VENDOR_HYP_BMAP_2 */ + + /* + * EL0 Virtual Timer Registers + * + * WARNING: + * KVM_REG_ARM_TIMER_CVAL and KVM_REG_ARM_TIMER_CNT are not defined + * with the appropriate register encodings. Their values have been + * accidentally swapped. As this is set API, the definitions here + * must be used, rather than ones derived from the encodings. + */ + KVM_ARM64_SYS_REG(SYS_CNTV_CTL_EL0), + KVM_REG_ARM_TIMER_CVAL, + KVM_REG_ARM_TIMER_CNT, + ARM64_SYS_REG(3, 0, 0, 0, 0), /* MIDR_EL1 */ ARM64_SYS_REG(3, 0, 0, 0, 6), /* REVIDR_EL1 */ ARM64_SYS_REG(3, 1, 0, 0, 1), /* CLIDR_EL1 */ @@ -468,6 +497,7 @@ static __u64 base_regs[] = { ARM64_SYS_REG(3, 0, 1, 0, 0), /* SCTLR_EL1 */ ARM64_SYS_REG(3, 0, 1, 0, 1), /* ACTLR_EL1 */ ARM64_SYS_REG(3, 0, 1, 0, 2), /* CPACR_EL1 */ + KVM_ARM64_SYS_REG(SYS_SCTLR2_EL1), ARM64_SYS_REG(3, 0, 2, 0, 0), /* TTBR0_EL1 */ ARM64_SYS_REG(3, 0, 2, 0, 1), /* TTBR1_EL1 */ ARM64_SYS_REG(3, 0, 2, 0, 2), /* TCR_EL1 */ @@ -685,6 +715,67 @@ static __u64 pauth_generic_regs[] = { ARM64_SYS_REG(3, 0, 2, 3, 1), /* APGAKEYHI_EL1 */ }; +static __u64 el2_regs[] = { + SYS_REG(VPIDR_EL2), + SYS_REG(VMPIDR_EL2), + SYS_REG(SCTLR_EL2), + SYS_REG(ACTLR_EL2), + SYS_REG(SCTLR2_EL2), + SYS_REG(HCR_EL2), + SYS_REG(MDCR_EL2), + SYS_REG(CPTR_EL2), + SYS_REG(HSTR_EL2), + SYS_REG(HFGRTR_EL2), + SYS_REG(HFGWTR_EL2), + SYS_REG(HFGITR_EL2), + SYS_REG(HACR_EL2), + SYS_REG(ZCR_EL2), + SYS_REG(HCRX_EL2), + SYS_REG(TTBR0_EL2), + SYS_REG(TTBR1_EL2), + SYS_REG(TCR_EL2), + SYS_REG(TCR2_EL2), + SYS_REG(VTTBR_EL2), + SYS_REG(VTCR_EL2), + SYS_REG(VNCR_EL2), + SYS_REG(HDFGRTR2_EL2), + SYS_REG(HDFGWTR2_EL2), + SYS_REG(HFGRTR2_EL2), + SYS_REG(HFGWTR2_EL2), + SYS_REG(HDFGRTR_EL2), + SYS_REG(HDFGWTR_EL2), + SYS_REG(HAFGRTR_EL2), + SYS_REG(HFGITR2_EL2), + SYS_REG(SPSR_EL2), + SYS_REG(ELR_EL2), + SYS_REG(AFSR0_EL2), + SYS_REG(AFSR1_EL2), + SYS_REG(ESR_EL2), + SYS_REG(FAR_EL2), + SYS_REG(HPFAR_EL2), + SYS_REG(MAIR_EL2), + SYS_REG(PIRE0_EL2), + SYS_REG(PIR_EL2), + SYS_REG(POR_EL2), + SYS_REG(AMAIR_EL2), + SYS_REG(VBAR_EL2), + SYS_REG(CONTEXTIDR_EL2), + SYS_REG(TPIDR_EL2), + SYS_REG(CNTVOFF_EL2), + SYS_REG(CNTHCTL_EL2), + SYS_REG(CNTHP_CTL_EL2), + SYS_REG(CNTHP_CVAL_EL2), + SYS_REG(CNTHV_CTL_EL2), + SYS_REG(CNTHV_CVAL_EL2), + SYS_REG(SP_EL2), + SYS_REG(VDISR_EL2), + SYS_REG(VSESR_EL2), +}; + +static __u64 el2_e2h0_regs[] = { + /* Empty */ +}; + #define BASE_SUBLIST \ { "base", .regs = base_regs, .regs_n = ARRAY_SIZE(base_regs), } #define VREGS_SUBLIST \ @@ -711,6 +802,23 @@ static __u64 pauth_generic_regs[] = { .regs = pauth_generic_regs, \ .regs_n = ARRAY_SIZE(pauth_generic_regs), \ } +#define EL2_SUBLIST \ + { \ + .name = "EL2", \ + .capability = KVM_CAP_ARM_EL2, \ + .feature = KVM_ARM_VCPU_HAS_EL2, \ + .regs = el2_regs, \ + .regs_n = ARRAY_SIZE(el2_regs), \ + } +#define EL2_E2H0_SUBLIST \ + EL2_SUBLIST, \ + { \ + .name = "EL2 E2H0", \ + .capability = KVM_CAP_ARM_EL2_E2H0, \ + .feature = KVM_ARM_VCPU_HAS_EL2_E2H0, \ + .regs = el2_e2h0_regs, \ + .regs_n = ARRAY_SIZE(el2_e2h0_regs), \ + } static struct vcpu_reg_list vregs_config = { .sublists = { @@ -760,6 +868,124 @@ static struct vcpu_reg_list pauth_pmu_config = { }, }; +static struct vcpu_reg_list el2_vregs_config = { + .sublists = { + BASE_SUBLIST, + EL2_SUBLIST, + VREGS_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list el2_vregs_pmu_config = { + .sublists = { + BASE_SUBLIST, + EL2_SUBLIST, + VREGS_SUBLIST, + PMU_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list el2_sve_config = { + .sublists = { + BASE_SUBLIST, + EL2_SUBLIST, + SVE_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list el2_sve_pmu_config = { + .sublists = { + BASE_SUBLIST, + EL2_SUBLIST, + SVE_SUBLIST, + PMU_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list el2_pauth_config = { + .sublists = { + BASE_SUBLIST, + EL2_SUBLIST, + VREGS_SUBLIST, + PAUTH_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list el2_pauth_pmu_config = { + .sublists = { + BASE_SUBLIST, + EL2_SUBLIST, + VREGS_SUBLIST, + PAUTH_SUBLIST, + PMU_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list el2_e2h0_vregs_config = { + .sublists = { + BASE_SUBLIST, + EL2_E2H0_SUBLIST, + VREGS_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list el2_e2h0_vregs_pmu_config = { + .sublists = { + BASE_SUBLIST, + EL2_E2H0_SUBLIST, + VREGS_SUBLIST, + PMU_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list el2_e2h0_sve_config = { + .sublists = { + BASE_SUBLIST, + EL2_E2H0_SUBLIST, + SVE_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list el2_e2h0_sve_pmu_config = { + .sublists = { + BASE_SUBLIST, + EL2_E2H0_SUBLIST, + SVE_SUBLIST, + PMU_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list el2_e2h0_pauth_config = { + .sublists = { + BASE_SUBLIST, + EL2_E2H0_SUBLIST, + VREGS_SUBLIST, + PAUTH_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list el2_e2h0_pauth_pmu_config = { + .sublists = { + BASE_SUBLIST, + EL2_E2H0_SUBLIST, + VREGS_SUBLIST, + PAUTH_SUBLIST, + PMU_SUBLIST, + {0}, + }, +}; + struct vcpu_reg_list *vcpu_configs[] = { &vregs_config, &vregs_pmu_config, @@ -767,5 +993,19 @@ struct vcpu_reg_list *vcpu_configs[] = { &sve_pmu_config, &pauth_config, &pauth_pmu_config, + + &el2_vregs_config, + &el2_vregs_pmu_config, + &el2_sve_config, + &el2_sve_pmu_config, + &el2_pauth_config, + &el2_pauth_pmu_config, + + &el2_e2h0_vregs_config, + &el2_e2h0_vregs_pmu_config, + &el2_e2h0_sve_config, + &el2_e2h0_sve_pmu_config, + &el2_e2h0_pauth_config, + &el2_e2h0_pauth_pmu_config, }; int vcpu_configs_n = ARRAY_SIZE(vcpu_configs); diff --git a/tools/testing/selftests/kvm/arm64/hello_el2.c b/tools/testing/selftests/kvm/arm64/hello_el2.c new file mode 100644 index 000000000000..bbe6862c6ab1 --- /dev/null +++ b/tools/testing/selftests/kvm/arm64/hello_el2.c @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * hello_el2 - Basic KVM selftest for VM running at EL2 with E2H=RES1 + * + * Copyright 2025 Google LLC + */ +#include "kvm_util.h" +#include "processor.h" +#include "test_util.h" +#include "ucall.h" + +#include <asm/sysreg.h> + +static void guest_code(void) +{ + u64 mmfr0 = read_sysreg_s(SYS_ID_AA64MMFR0_EL1); + u64 mmfr1 = read_sysreg_s(SYS_ID_AA64MMFR1_EL1); + u64 mmfr4 = read_sysreg_s(SYS_ID_AA64MMFR4_EL1); + u8 e2h0 = SYS_FIELD_GET(ID_AA64MMFR4_EL1, E2H0, mmfr4); + + GUEST_ASSERT_EQ(get_current_el(), 2); + GUEST_ASSERT(read_sysreg(hcr_el2) & HCR_EL2_E2H); + GUEST_ASSERT_EQ(SYS_FIELD_GET(ID_AA64MMFR1_EL1, VH, mmfr1), + ID_AA64MMFR1_EL1_VH_IMP); + + /* + * Traps of the complete ID register space are IMPDEF without FEAT_FGT, + * which is really annoying to deal with in KVM describing E2H as RES1. + * + * If the implementation doesn't honor the trap then expect the register + * to return all zeros. + */ + if (e2h0 == ID_AA64MMFR4_EL1_E2H0_IMP) + GUEST_ASSERT_EQ(SYS_FIELD_GET(ID_AA64MMFR0_EL1, FGT, mmfr0), + ID_AA64MMFR0_EL1_FGT_NI); + else + GUEST_ASSERT_EQ(e2h0, ID_AA64MMFR4_EL1_E2H0_NI_NV1); + + GUEST_DONE(); +} + +int main(void) +{ + struct kvm_vcpu_init init; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + struct ucall uc; + + TEST_REQUIRE(kvm_check_cap(KVM_CAP_ARM_EL2)); + + vm = vm_create(1); + + kvm_get_default_vcpu_target(vm, &init); + init.features[0] |= BIT(KVM_ARM_VCPU_HAS_EL2); + vcpu = aarch64_vcpu_add(vm, 0, &init, guest_code); + kvm_arch_vm_finalize_vcpus(vm); + + vcpu_run(vcpu); + switch (get_ucall(vcpu, &uc)) { + case UCALL_DONE: + break; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + default: + TEST_FAIL("Unhandled ucall: %ld\n", uc.cmd); + } + + kvm_vm_free(vm); + return 0; +} diff --git a/tools/testing/selftests/kvm/arm64/host_sve.c b/tools/testing/selftests/kvm/arm64/host_sve.c new file mode 100644 index 000000000000..3826772fd470 --- /dev/null +++ b/tools/testing/selftests/kvm/arm64/host_sve.c @@ -0,0 +1,127 @@ +// SPDX-License-Identifier: GPL-2.0-only + +/* + * Host SVE: Check FPSIMD/SVE/SME save/restore over KVM_RUN ioctls. + * + * Copyright 2025 Arm, Ltd + */ + +#include <errno.h> +#include <signal.h> +#include <sys/auxv.h> +#include <asm/kvm.h> +#include <kvm_util.h> + +#include "ucall_common.h" + +static void guest_code(void) +{ + for (int i = 0; i < 10; i++) { + GUEST_UCALL_NONE(); + } + + GUEST_DONE(); +} + +void handle_sigill(int sig, siginfo_t *info, void *ctx) +{ + ucontext_t *uctx = ctx; + + printf(" < host signal %d >\n", sig); + + /* + * Skip the UDF + */ + uctx->uc_mcontext.pc += 4; +} + +void register_sigill_handler(void) +{ + struct sigaction sa = { + .sa_sigaction = handle_sigill, + .sa_flags = SA_SIGINFO, + }; + sigaction(SIGILL, &sa, NULL); +} + +static void do_sve_roundtrip(void) +{ + unsigned long before, after; + + /* + * Set all bits in a predicate register, force a save/restore via a + * SIGILL (which handle_sigill() will recover from), then report + * whether the value has changed. + */ + asm volatile( + " .arch_extension sve\n" + " ptrue p0.B\n" + " cntp %[before], p0, p0.B\n" + " udf #0\n" + " cntp %[after], p0, p0.B\n" + : [before] "=r" (before), + [after] "=r" (after) + : + : "p0" + ); + + if (before != after) { + TEST_FAIL("Signal roundtrip discarded predicate bits (%ld => %ld)\n", + before, after); + } else { + printf("Signal roundtrip preserved predicate bits (%ld => %ld)\n", + before, after); + } +} + +static void test_run(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + struct ucall uc; + bool guest_done = false; + + register_sigill_handler(); + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + + do_sve_roundtrip(); + + while (!guest_done) { + + printf("Running VCPU...\n"); + vcpu_run(vcpu); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_NONE: + do_sve_roundtrip(); + do_sve_roundtrip(); + break; + case UCALL_DONE: + guest_done = true; + break; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + default: + TEST_FAIL("Unexpected guest exit"); + } + } + + kvm_vm_free(vm); +} + +int main(void) +{ + /* + * This is testing the host environment, we don't care about + * guest SVE support. + */ + if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) { + printf("SVE not supported\n"); + return KSFT_SKIP; + } + + test_run(); + return 0; +} diff --git a/tools/testing/selftests/kvm/arm64/hypercalls.c b/tools/testing/selftests/kvm/arm64/hypercalls.c index ec54ec7726e9..5d96cdf382c4 100644 --- a/tools/testing/selftests/kvm/arm64/hypercalls.c +++ b/tools/testing/selftests/kvm/arm64/hypercalls.c @@ -21,22 +21,31 @@ #define KVM_REG_ARM_STD_BMAP_BIT_MAX 0 #define KVM_REG_ARM_STD_HYP_BMAP_BIT_MAX 0 #define KVM_REG_ARM_VENDOR_HYP_BMAP_BIT_MAX 1 +#define KVM_REG_ARM_VENDOR_HYP_BMAP_2_BIT_MAX 1 + +#define KVM_REG_ARM_STD_BMAP_RESET_VAL FW_REG_ULIMIT_VAL(KVM_REG_ARM_STD_BMAP_BIT_MAX) +#define KVM_REG_ARM_STD_HYP_BMAP_RESET_VAL FW_REG_ULIMIT_VAL(KVM_REG_ARM_STD_HYP_BMAP_BIT_MAX) +#define KVM_REG_ARM_VENDOR_HYP_BMAP_RESET_VAL FW_REG_ULIMIT_VAL(KVM_REG_ARM_VENDOR_HYP_BMAP_BIT_MAX) +#define KVM_REG_ARM_VENDOR_HYP_BMAP_2_RESET_VAL 0 struct kvm_fw_reg_info { - uint64_t reg; /* Register definition */ - uint64_t max_feat_bit; /* Bit that represents the upper limit of the feature-map */ + u64 reg; /* Register definition */ + u64 max_feat_bit; /* Bit that represents the upper limit of the feature-map */ + u64 reset_val; /* Reset value for the register */ }; #define FW_REG_INFO(r) \ { \ .reg = r, \ .max_feat_bit = r##_BIT_MAX, \ + .reset_val = r##_RESET_VAL \ } static const struct kvm_fw_reg_info fw_reg_info[] = { FW_REG_INFO(KVM_REG_ARM_STD_BMAP), FW_REG_INFO(KVM_REG_ARM_STD_HYP_BMAP), FW_REG_INFO(KVM_REG_ARM_VENDOR_HYP_BMAP), + FW_REG_INFO(KVM_REG_ARM_VENDOR_HYP_BMAP_2), }; enum test_stage { @@ -50,8 +59,8 @@ enum test_stage { static int stage = TEST_STAGE_REG_IFACE; struct test_hvc_info { - uint32_t func_id; - uint64_t arg1; + u32 func_id; + u64 arg1; }; #define TEST_HVC_INFO(f, a1) \ @@ -99,7 +108,7 @@ static void guest_test_hvc(const struct test_hvc_info *hc_info) for (i = 0; i < hvc_info_arr_sz; i++, hc_info++) { memset(&res, 0, sizeof(res)); - smccc_hvc(hc_info->func_id, hc_info->arg1, 0, 0, 0, 0, 0, 0, &res); + do_smccc(hc_info->func_id, hc_info->arg1, 0, 0, 0, 0, 0, 0, &res); switch (stage) { case TEST_STAGE_HVC_IFACE_FEAT_DISABLED: @@ -143,9 +152,9 @@ static void guest_code(void) } struct st_time { - uint32_t rev; - uint32_t attr; - uint64_t st_time; + u32 rev; + u32 attr; + u64 st_time; }; #define STEAL_TIME_SIZE ((sizeof(struct st_time) + 63) & ~63) @@ -153,7 +162,7 @@ struct st_time { static void steal_time_init(struct kvm_vcpu *vcpu) { - uint64_t st_ipa = (ulong)ST_GPA_BASE; + u64 st_ipa = (ulong)ST_GPA_BASE; unsigned int gpages; gpages = vm_calc_num_guest_pages(VM_MODE_DEFAULT, STEAL_TIME_SIZE); @@ -165,28 +174,45 @@ static void steal_time_init(struct kvm_vcpu *vcpu) static void test_fw_regs_before_vm_start(struct kvm_vcpu *vcpu) { - uint64_t val; + u64 val; unsigned int i; int ret; for (i = 0; i < ARRAY_SIZE(fw_reg_info); i++) { const struct kvm_fw_reg_info *reg_info = &fw_reg_info[i]; + u64 set_val; - /* First 'read' should be an upper limit of the features supported */ + /* First 'read' should be the reset value for the reg */ val = vcpu_get_reg(vcpu, reg_info->reg); - TEST_ASSERT(val == FW_REG_ULIMIT_VAL(reg_info->max_feat_bit), - "Expected all the features to be set for reg: 0x%lx; expected: 0x%lx; read: 0x%lx", - reg_info->reg, FW_REG_ULIMIT_VAL(reg_info->max_feat_bit), val); + TEST_ASSERT(val == reg_info->reset_val, + "Unexpected reset value for reg: 0x%lx; expected: 0x%lx; read: 0x%lx", + reg_info->reg, reg_info->reset_val, val); + + if (reg_info->reset_val) + set_val = 0; + else + set_val = FW_REG_ULIMIT_VAL(reg_info->max_feat_bit); - /* Test a 'write' by disabling all the features of the register map */ - ret = __vcpu_set_reg(vcpu, reg_info->reg, 0); + ret = __vcpu_set_reg(vcpu, reg_info->reg, set_val); TEST_ASSERT(ret == 0, - "Failed to clear all the features of reg: 0x%lx; ret: %d", - reg_info->reg, errno); + "Failed to %s all the features of reg: 0x%lx; ret: %d", + (set_val ? "set" : "clear"), reg_info->reg, errno); val = vcpu_get_reg(vcpu, reg_info->reg); - TEST_ASSERT(val == 0, - "Expected all the features to be cleared for reg: 0x%lx", reg_info->reg); + TEST_ASSERT(val == set_val, + "Expected all the features to be %s for reg: 0x%lx", + (set_val ? "set" : "cleared"), reg_info->reg); + + /* + * If the reg has been set, clear it as test_fw_regs_after_vm_start() + * expects it to be cleared. + */ + if (set_val) { + ret = __vcpu_set_reg(vcpu, reg_info->reg, 0); + TEST_ASSERT(ret == 0, + "Failed to clear all the features of reg: 0x%lx; ret: %d", + reg_info->reg, errno); + } /* * Test enabling a feature that's not supported. @@ -203,7 +229,7 @@ static void test_fw_regs_before_vm_start(struct kvm_vcpu *vcpu) static void test_fw_regs_after_vm_start(struct kvm_vcpu *vcpu) { - uint64_t val; + u64 val; unsigned int i; int ret; diff --git a/tools/testing/selftests/kvm/arm64/idreg-idst.c b/tools/testing/selftests/kvm/arm64/idreg-idst.c new file mode 100644 index 000000000000..a3e84701d814 --- /dev/null +++ b/tools/testing/selftests/kvm/arm64/idreg-idst.c @@ -0,0 +1,117 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Access all FEAT_IDST-handled registers that depend on more than + * just FEAT_AA64, and fail if we don't get an a trap with an 0x18 EC. + */ + +#include <test_util.h> +#include <kvm_util.h> +#include <processor.h> + +static volatile bool sys64, undef; + +#define __check_sr_read(r) \ + ({ \ + u64 val; \ + \ + sys64 = false; \ + undef = false; \ + dsb(sy); \ + val = read_sysreg_s(SYS_ ## r); \ + val; \ + }) + +/* Fatal checks */ +#define check_sr_read(r) \ + do { \ + __check_sr_read(r); \ + __GUEST_ASSERT(!undef, #r " unexpected UNDEF"); \ + __GUEST_ASSERT(sys64, #r " didn't trap"); \ + } while(0) + + +static void guest_code(void) +{ + check_sr_read(CCSIDR2_EL1); + check_sr_read(SMIDR_EL1); + check_sr_read(GMID_EL1); + + GUEST_DONE(); +} + +static void guest_sys64_handler(struct ex_regs *regs) +{ + sys64 = true; + undef = false; + regs->pc += 4; +} + +static void guest_undef_handler(struct ex_regs *regs) +{ + sys64 = false; + undef = true; + regs->pc += 4; +} + +static void test_run_vcpu(struct kvm_vcpu *vcpu) +{ + struct ucall uc; + + do { + vcpu_run(vcpu); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + case UCALL_PRINTF: + printf("%s", uc.buffer); + break; + case UCALL_DONE: + break; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + } while (uc.cmd != UCALL_DONE); +} + +static void test_guest_feat_idst(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + /* This VM has no MTE, no SME, no CCIDX */ + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(vcpu); + + vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, + ESR_ELx_EC_SYS64, guest_sys64_handler); + vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, + ESR_ELx_EC_UNKNOWN, guest_undef_handler); + + test_run_vcpu(vcpu); + + kvm_vm_free(vm); +} + +int main(int argc, char *argv[]) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + u64 mmfr2; + + test_disable_default_vgic(); + + vm = vm_create_with_one_vcpu(&vcpu, NULL); + mmfr2 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64MMFR2_EL1)); + __TEST_REQUIRE(FIELD_GET(ID_AA64MMFR2_EL1_IDS, mmfr2) > 0, + "FEAT_IDST not supported"); + kvm_vm_free(vm); + + test_guest_feat_idst(); + + return 0; +} diff --git a/tools/testing/selftests/kvm/arm64/kvm-uuid.c b/tools/testing/selftests/kvm/arm64/kvm-uuid.c new file mode 100644 index 000000000000..b5be9133535a --- /dev/null +++ b/tools/testing/selftests/kvm/arm64/kvm-uuid.c @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: GPL-2.0 + +// Check that nobody has tampered with KVM's UID + +#include <errno.h> +#include <linux/arm-smccc.h> +#include <asm/kvm.h> +#include <kvm_util.h> + +#include "processor.h" + +/* + * Do NOT redefine these constants, or try to replace them with some + * "common" version. They are hardcoded here to detect any potential + * breakage happening in the rest of the kernel. + * + * KVM UID value: 28b46fb6-2ec5-11e9-a9ca-4b564d003a74 + */ +#define ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_0 0xb66fb428U +#define ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_1 0xe911c52eU +#define ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_2 0x564bcaa9U +#define ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_3 0x743a004dU + +static void guest_code(void) +{ + struct arm_smccc_res res = {}; + + do_smccc(ARM_SMCCC_VENDOR_HYP_CALL_UID_FUNC_ID, 0, 0, 0, 0, 0, 0, 0, &res); + + __GUEST_ASSERT(res.a0 == ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_0 && + res.a1 == ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_1 && + res.a2 == ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_2 && + res.a3 == ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_3, + "Unexpected KVM-specific UID %lx %lx %lx %lx\n", res.a0, res.a1, res.a2, res.a3); + GUEST_DONE(); +} + +int main (int argc, char *argv[]) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + struct ucall uc; + bool guest_done = false; + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + + while (!guest_done) { + vcpu_run(vcpu); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_SYNC: + break; + case UCALL_DONE: + guest_done = true; + break; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + case UCALL_PRINTF: + printf("%s", uc.buffer); + break; + default: + TEST_FAIL("Unexpected guest exit"); + } + } + + kvm_vm_free(vm); + + return 0; +} diff --git a/tools/testing/selftests/kvm/arm64/mmio_abort.c b/tools/testing/selftests/kvm/arm64/mmio_abort.c deleted file mode 100644 index 8b7a80a51b1c..000000000000 --- a/tools/testing/selftests/kvm/arm64/mmio_abort.c +++ /dev/null @@ -1,159 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * mmio_abort - Tests for userspace MMIO abort injection - * - * Copyright (c) 2024 Google LLC - */ -#include "processor.h" -#include "test_util.h" - -#define MMIO_ADDR 0x8000000ULL - -static u64 expected_abort_pc; - -static void expect_sea_handler(struct ex_regs *regs) -{ - u64 esr = read_sysreg(esr_el1); - - GUEST_ASSERT_EQ(regs->pc, expected_abort_pc); - GUEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_DABT_CUR); - GUEST_ASSERT_EQ(esr & ESR_ELx_FSC_TYPE, ESR_ELx_FSC_EXTABT); - - GUEST_DONE(); -} - -static void unexpected_dabt_handler(struct ex_regs *regs) -{ - GUEST_FAIL("Unexpected data abort at PC: %lx\n", regs->pc); -} - -static struct kvm_vm *vm_create_with_dabt_handler(struct kvm_vcpu **vcpu, void *guest_code, - handler_fn dabt_handler) -{ - struct kvm_vm *vm = vm_create_with_one_vcpu(vcpu, guest_code); - - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(*vcpu); - vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, ESR_ELx_EC_DABT_CUR, dabt_handler); - - virt_map(vm, MMIO_ADDR, MMIO_ADDR, 1); - - return vm; -} - -static void vcpu_inject_extabt(struct kvm_vcpu *vcpu) -{ - struct kvm_vcpu_events events = {}; - - events.exception.ext_dabt_pending = true; - vcpu_events_set(vcpu, &events); -} - -static void vcpu_run_expect_done(struct kvm_vcpu *vcpu) -{ - struct ucall uc; - - vcpu_run(vcpu); - switch (get_ucall(vcpu, &uc)) { - case UCALL_ABORT: - REPORT_GUEST_ASSERT(uc); - break; - case UCALL_DONE: - break; - default: - TEST_FAIL("Unexpected ucall: %lu", uc.cmd); - } -} - -extern char test_mmio_abort_insn; - -static void test_mmio_abort_guest(void) -{ - WRITE_ONCE(expected_abort_pc, (u64)&test_mmio_abort_insn); - - asm volatile("test_mmio_abort_insn:\n\t" - "ldr x0, [%0]\n\t" - : : "r" (MMIO_ADDR) : "x0", "memory"); - - GUEST_FAIL("MMIO instruction should not retire"); -} - -/* - * Test that KVM doesn't complete MMIO emulation when userspace has made an - * external abort pending for the instruction. - */ -static void test_mmio_abort(void) -{ - struct kvm_vcpu *vcpu; - struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_abort_guest, - expect_sea_handler); - struct kvm_run *run = vcpu->run; - - vcpu_run(vcpu); - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_MMIO); - TEST_ASSERT_EQ(run->mmio.phys_addr, MMIO_ADDR); - TEST_ASSERT_EQ(run->mmio.len, sizeof(unsigned long)); - TEST_ASSERT(!run->mmio.is_write, "Expected MMIO read"); - - vcpu_inject_extabt(vcpu); - vcpu_run_expect_done(vcpu); - kvm_vm_free(vm); -} - -extern char test_mmio_nisv_insn; - -static void test_mmio_nisv_guest(void) -{ - WRITE_ONCE(expected_abort_pc, (u64)&test_mmio_nisv_insn); - - asm volatile("test_mmio_nisv_insn:\n\t" - "ldr x0, [%0], #8\n\t" - : : "r" (MMIO_ADDR) : "x0", "memory"); - - GUEST_FAIL("MMIO instruction should not retire"); -} - -/* - * Test that the KVM_RUN ioctl fails for ESR_EL2.ISV=0 MMIO aborts if userspace - * hasn't enabled KVM_CAP_ARM_NISV_TO_USER. - */ -static void test_mmio_nisv(void) -{ - struct kvm_vcpu *vcpu; - struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_nisv_guest, - unexpected_dabt_handler); - - TEST_ASSERT(_vcpu_run(vcpu), "Expected nonzero return code from KVM_RUN"); - TEST_ASSERT_EQ(errno, ENOSYS); - - kvm_vm_free(vm); -} - -/* - * Test that ESR_EL2.ISV=0 MMIO aborts reach userspace and that an injected SEA - * reaches the guest. - */ -static void test_mmio_nisv_abort(void) -{ - struct kvm_vcpu *vcpu; - struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_nisv_guest, - expect_sea_handler); - struct kvm_run *run = vcpu->run; - - vm_enable_cap(vm, KVM_CAP_ARM_NISV_TO_USER, 1); - - vcpu_run(vcpu); - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_ARM_NISV); - TEST_ASSERT_EQ(run->arm_nisv.fault_ipa, MMIO_ADDR); - - vcpu_inject_extabt(vcpu); - vcpu_run_expect_done(vcpu); - kvm_vm_free(vm); -} - -int main(void) -{ - test_mmio_abort(); - test_mmio_nisv(); - test_mmio_nisv_abort(); -} diff --git a/tools/testing/selftests/kvm/arm64/no-vgic-v3.c b/tools/testing/selftests/kvm/arm64/no-vgic-v3.c deleted file mode 100644 index ebd70430c89d..000000000000 --- a/tools/testing/selftests/kvm/arm64/no-vgic-v3.c +++ /dev/null @@ -1,175 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -// Check that, on a GICv3 system, not configuring GICv3 correctly -// results in all of the sysregs generating an UNDEF exception. - -#include <test_util.h> -#include <kvm_util.h> -#include <processor.h> - -static volatile bool handled; - -#define __check_sr_read(r) \ - ({ \ - uint64_t val; \ - \ - handled = false; \ - dsb(sy); \ - val = read_sysreg_s(SYS_ ## r); \ - val; \ - }) - -#define __check_sr_write(r) \ - do { \ - handled = false; \ - dsb(sy); \ - write_sysreg_s(0, SYS_ ## r); \ - isb(); \ - } while(0) - -/* Fatal checks */ -#define check_sr_read(r) \ - do { \ - __check_sr_read(r); \ - __GUEST_ASSERT(handled, #r " no read trap"); \ - } while(0) - -#define check_sr_write(r) \ - do { \ - __check_sr_write(r); \ - __GUEST_ASSERT(handled, #r " no write trap"); \ - } while(0) - -#define check_sr_rw(r) \ - do { \ - check_sr_read(r); \ - check_sr_write(r); \ - } while(0) - -static void guest_code(void) -{ - uint64_t val; - - /* - * Check that we advertise that ID_AA64PFR0_EL1.GIC == 0, having - * hidden the feature at runtime without any other userspace action. - */ - __GUEST_ASSERT(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_GIC), - read_sysreg(id_aa64pfr0_el1)) == 0, - "GICv3 wrongly advertised"); - - /* - * Access all GICv3 registers, and fail if we don't get an UNDEF. - * Note that we happily access all the APxRn registers without - * checking their existance, as all we want to see is a failure. - */ - check_sr_rw(ICC_PMR_EL1); - check_sr_read(ICC_IAR0_EL1); - check_sr_write(ICC_EOIR0_EL1); - check_sr_rw(ICC_HPPIR0_EL1); - check_sr_rw(ICC_BPR0_EL1); - check_sr_rw(ICC_AP0R0_EL1); - check_sr_rw(ICC_AP0R1_EL1); - check_sr_rw(ICC_AP0R2_EL1); - check_sr_rw(ICC_AP0R3_EL1); - check_sr_rw(ICC_AP1R0_EL1); - check_sr_rw(ICC_AP1R1_EL1); - check_sr_rw(ICC_AP1R2_EL1); - check_sr_rw(ICC_AP1R3_EL1); - check_sr_write(ICC_DIR_EL1); - check_sr_read(ICC_RPR_EL1); - check_sr_write(ICC_SGI1R_EL1); - check_sr_write(ICC_ASGI1R_EL1); - check_sr_write(ICC_SGI0R_EL1); - check_sr_read(ICC_IAR1_EL1); - check_sr_write(ICC_EOIR1_EL1); - check_sr_rw(ICC_HPPIR1_EL1); - check_sr_rw(ICC_BPR1_EL1); - check_sr_rw(ICC_CTLR_EL1); - check_sr_rw(ICC_IGRPEN0_EL1); - check_sr_rw(ICC_IGRPEN1_EL1); - - /* - * ICC_SRE_EL1 may not be trappable, as ICC_SRE_EL2.Enable can - * be RAO/WI. Engage in non-fatal accesses, starting with a - * write of 0 to try and disable SRE, and let's see if it - * sticks. - */ - __check_sr_write(ICC_SRE_EL1); - if (!handled) - GUEST_PRINTF("ICC_SRE_EL1 write not trapping (OK)\n"); - - val = __check_sr_read(ICC_SRE_EL1); - if (!handled) { - __GUEST_ASSERT((val & BIT(0)), - "ICC_SRE_EL1 not trapped but ICC_SRE_EL1.SRE not set\n"); - GUEST_PRINTF("ICC_SRE_EL1 read not trapping (OK)\n"); - } - - GUEST_DONE(); -} - -static void guest_undef_handler(struct ex_regs *regs) -{ - /* Success, we've gracefully exploded! */ - handled = true; - regs->pc += 4; -} - -static void test_run_vcpu(struct kvm_vcpu *vcpu) -{ - struct ucall uc; - - do { - vcpu_run(vcpu); - - switch (get_ucall(vcpu, &uc)) { - case UCALL_ABORT: - REPORT_GUEST_ASSERT(uc); - break; - case UCALL_PRINTF: - printf("%s", uc.buffer); - break; - case UCALL_DONE: - break; - default: - TEST_FAIL("Unknown ucall %lu", uc.cmd); - } - } while (uc.cmd != UCALL_DONE); -} - -static void test_guest_no_gicv3(void) -{ - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; - - /* Create a VM without a GICv3 */ - vm = vm_create_with_one_vcpu(&vcpu, guest_code); - - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vcpu); - - vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, - ESR_ELx_EC_UNKNOWN, guest_undef_handler); - - test_run_vcpu(vcpu); - - kvm_vm_free(vm); -} - -int main(int argc, char *argv[]) -{ - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; - uint64_t pfr0; - - vm = vm_create_with_one_vcpu(&vcpu, NULL); - pfr0 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1)); - __TEST_REQUIRE(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_GIC), pfr0), - "GICv3 not supported."); - kvm_vm_free(vm); - - test_guest_no_gicv3(); - - return 0; -} diff --git a/tools/testing/selftests/kvm/arm64/no-vgic.c b/tools/testing/selftests/kvm/arm64/no-vgic.c new file mode 100644 index 000000000000..25b2e3222f68 --- /dev/null +++ b/tools/testing/selftests/kvm/arm64/no-vgic.c @@ -0,0 +1,297 @@ +// SPDX-License-Identifier: GPL-2.0 + +// Check that, on a GICv3-capable system (GICv3 native, or GICv5 with +// FEAT_GCIE_LEGACY), not configuring GICv3 correctly results in all +// of the sysregs generating an UNDEF exception. Do the same for GICv5 +// on a GICv5 host. + +#include <test_util.h> +#include <kvm_util.h> +#include <processor.h> + +#include <arm64/gic_v5.h> + +static volatile bool handled; + +#define __check_sr_read(r) \ + ({ \ + u64 val; \ + \ + handled = false; \ + dsb(sy); \ + val = read_sysreg_s(SYS_ ## r); \ + val; \ + }) + +#define __check_sr_write(r) \ + do { \ + handled = false; \ + dsb(sy); \ + write_sysreg_s(0, SYS_ ## r); \ + isb(); \ + } while (0) + +#define __check_gicv5_gicr_op(r) \ + ({ \ + u64 val; \ + \ + handled = false; \ + dsb(sy); \ + val = read_sysreg_s(GICV5_OP_GICR_ ## r); \ + val; \ + }) + +#define __check_gicv5_gic_op(r) \ + do { \ + handled = false; \ + dsb(sy); \ + write_sysreg_s(0, GICV5_OP_GIC_ ## r); \ + isb(); \ + } while (0) + +/* Fatal checks */ +#define check_sr_read(r) \ + do { \ + __check_sr_read(r); \ + __GUEST_ASSERT(handled, #r " no read trap"); \ + } while (0) + +#define check_sr_write(r) \ + do { \ + __check_sr_write(r); \ + __GUEST_ASSERT(handled, #r " no write trap"); \ + } while (0) + +#define check_sr_rw(r) \ + do { \ + check_sr_read(r); \ + check_sr_write(r); \ + } while (0) + +#define check_gicv5_gicr_op(r) \ + do { \ + __check_gicv5_gicr_op(r); \ + __GUEST_ASSERT(handled, #r " no read trap"); \ + } while (0) + +#define check_gicv5_gic_op(r) \ + do { \ + __check_gicv5_gic_op(r); \ + __GUEST_ASSERT(handled, #r " no write trap"); \ + } while (0) + +static void guest_code_gicv3(void) +{ + u64 val; + + /* + * Check that we advertise that ID_AA64PFR0_EL1.GIC == 0, having + * hidden the feature at runtime without any other userspace action. + */ + __GUEST_ASSERT(FIELD_GET(ID_AA64PFR0_EL1_GIC, + read_sysreg(id_aa64pfr0_el1)) == 0, + "GICv3 wrongly advertised"); + + /* + * Access all GICv3 registers, and fail if we don't get an UNDEF. + * Note that we happily access all the APxRn registers without + * checking their existence, as all we want to see is a failure. + */ + check_sr_rw(ICC_PMR_EL1); + check_sr_read(ICC_IAR0_EL1); + check_sr_write(ICC_EOIR0_EL1); + check_sr_rw(ICC_HPPIR0_EL1); + check_sr_rw(ICC_BPR0_EL1); + check_sr_rw(ICC_AP0R0_EL1); + check_sr_rw(ICC_AP0R1_EL1); + check_sr_rw(ICC_AP0R2_EL1); + check_sr_rw(ICC_AP0R3_EL1); + check_sr_rw(ICC_AP1R0_EL1); + check_sr_rw(ICC_AP1R1_EL1); + check_sr_rw(ICC_AP1R2_EL1); + check_sr_rw(ICC_AP1R3_EL1); + check_sr_write(ICC_DIR_EL1); + check_sr_read(ICC_RPR_EL1); + check_sr_write(ICC_SGI1R_EL1); + check_sr_write(ICC_ASGI1R_EL1); + check_sr_write(ICC_SGI0R_EL1); + check_sr_read(ICC_IAR1_EL1); + check_sr_write(ICC_EOIR1_EL1); + check_sr_rw(ICC_HPPIR1_EL1); + check_sr_rw(ICC_BPR1_EL1); + check_sr_rw(ICC_CTLR_EL1); + check_sr_rw(ICC_IGRPEN0_EL1); + check_sr_rw(ICC_IGRPEN1_EL1); + + /* + * ICC_SRE_EL1 may not be trappable, as ICC_SRE_EL2.Enable can + * be RAO/WI. Engage in non-fatal accesses, starting with a + * write of 0 to try and disable SRE, and let's see if it + * sticks. + */ + __check_sr_write(ICC_SRE_EL1); + if (!handled) + GUEST_PRINTF("ICC_SRE_EL1 write not trapping (OK)\n"); + + val = __check_sr_read(ICC_SRE_EL1); + if (!handled) { + __GUEST_ASSERT((val & BIT(0)), + "ICC_SRE_EL1 not trapped but ICC_SRE_EL1.SRE not set\n"); + GUEST_PRINTF("ICC_SRE_EL1 read not trapping (OK)\n"); + } + + GUEST_DONE(); +} + +static void guest_code_gicv5(void) +{ + /* + * Check that we advertise that ID_AA64PFR2_EL1.GCIE == 0, having + * hidden the feature at runtime without any other userspace action. + */ + __GUEST_ASSERT(FIELD_GET(ID_AA64PFR2_EL1_GCIE, + read_sysreg_s(SYS_ID_AA64PFR2_EL1)) == 0, + "GICv5 wrongly advertised"); + + /* + * Try all GICv5 instructions, and fail if we don't get an UNDEF. + */ + check_gicv5_gic_op(CDAFF); + check_gicv5_gic_op(CDDI); + check_gicv5_gic_op(CDDIS); + check_gicv5_gic_op(CDEOI); + check_gicv5_gic_op(CDHM); + check_gicv5_gic_op(CDPEND); + check_gicv5_gic_op(CDPRI); + check_gicv5_gic_op(CDRCFG); + check_gicv5_gicr_op(CDIA); + check_gicv5_gicr_op(CDNMIA); + + /* Check General System Register acccesses */ + check_sr_rw(ICC_APR_EL1); + check_sr_rw(ICC_CR0_EL1); + check_sr_read(ICC_HPPIR_EL1); + check_sr_read(ICC_IAFFIDR_EL1); + check_sr_rw(ICC_ICSR_EL1); + check_sr_read(ICC_IDR0_EL1); + check_sr_rw(ICC_PCR_EL1); + + /* Check PPI System Register accessess */ + check_sr_rw(ICC_PPI_CACTIVER0_EL1); + check_sr_rw(ICC_PPI_CACTIVER1_EL1); + check_sr_rw(ICC_PPI_SACTIVER0_EL1); + check_sr_rw(ICC_PPI_SACTIVER1_EL1); + check_sr_rw(ICC_PPI_CPENDR0_EL1); + check_sr_rw(ICC_PPI_CPENDR1_EL1); + check_sr_rw(ICC_PPI_SPENDR0_EL1); + check_sr_rw(ICC_PPI_SPENDR1_EL1); + check_sr_rw(ICC_PPI_ENABLER0_EL1); + check_sr_rw(ICC_PPI_ENABLER1_EL1); + check_sr_read(ICC_PPI_HMR0_EL1); + check_sr_read(ICC_PPI_HMR1_EL1); + check_sr_rw(ICC_PPI_PRIORITYR0_EL1); + check_sr_rw(ICC_PPI_PRIORITYR1_EL1); + check_sr_rw(ICC_PPI_PRIORITYR2_EL1); + check_sr_rw(ICC_PPI_PRIORITYR3_EL1); + check_sr_rw(ICC_PPI_PRIORITYR4_EL1); + check_sr_rw(ICC_PPI_PRIORITYR5_EL1); + check_sr_rw(ICC_PPI_PRIORITYR6_EL1); + check_sr_rw(ICC_PPI_PRIORITYR7_EL1); + check_sr_rw(ICC_PPI_PRIORITYR8_EL1); + check_sr_rw(ICC_PPI_PRIORITYR9_EL1); + check_sr_rw(ICC_PPI_PRIORITYR10_EL1); + check_sr_rw(ICC_PPI_PRIORITYR11_EL1); + check_sr_rw(ICC_PPI_PRIORITYR12_EL1); + check_sr_rw(ICC_PPI_PRIORITYR13_EL1); + check_sr_rw(ICC_PPI_PRIORITYR14_EL1); + check_sr_rw(ICC_PPI_PRIORITYR15_EL1); + + GUEST_DONE(); +} + +static void guest_undef_handler(struct ex_regs *regs) +{ + /* Success, we've gracefully exploded! */ + handled = true; + regs->pc += 4; +} + +static void test_run_vcpu(struct kvm_vcpu *vcpu) +{ + struct ucall uc; + + do { + vcpu_run(vcpu); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + case UCALL_PRINTF: + printf("%s", uc.buffer); + break; + case UCALL_DONE: + break; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + } while (uc.cmd != UCALL_DONE); +} + +static void test_guest_no_vgic(void *guest_code) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + /* Create a VM without a GIC */ + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(vcpu); + + vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, + ESR_ELx_EC_UNKNOWN, guest_undef_handler); + + test_run_vcpu(vcpu); + + kvm_vm_free(vm); +} + +int main(int argc, char *argv[]) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + bool has_v3, has_v5; + u64 pfr; + + test_disable_default_vgic(); + + vm = vm_create_with_one_vcpu(&vcpu, NULL); + + pfr = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1)); + has_v3 = !!FIELD_GET(ID_AA64PFR0_EL1_GIC, pfr); + + pfr = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR2_EL1)); + has_v5 = !!FIELD_GET(ID_AA64PFR2_EL1_GCIE, pfr); + + kvm_vm_free(vm); + + __TEST_REQUIRE(has_v3 || has_v5, + "Neither GICv3 nor GICv5 supported."); + + if (has_v3) { + pr_info("Testing no-vgic-v3\n"); + test_guest_no_vgic(guest_code_gicv3); + } else { + pr_info("No GICv3 support: skipping no-vgic-v3 test\n"); + } + + if (has_v5) { + pr_info("Testing no-vgic-v5\n"); + test_guest_no_vgic(guest_code_gicv5); + } else { + pr_info("No GICv5 support: skipping no-vgic-v5 test\n"); + } + + return 0; +} diff --git a/tools/testing/selftests/kvm/arm64/page_fault_test.c b/tools/testing/selftests/kvm/arm64/page_fault_test.c index ec33a8f9c908..6bb3d82906b2 100644 --- a/tools/testing/selftests/kvm/arm64/page_fault_test.c +++ b/tools/testing/selftests/kvm/arm64/page_fault_test.c @@ -23,7 +23,7 @@ #define TEST_PTE_GVA 0xb0000000 #define TEST_DATA 0x0123456789ABCDEF -static uint64_t *guest_test_memory = (uint64_t *)TEST_GVA; +static u64 *guest_test_memory = (u64 *)TEST_GVA; #define CMD_NONE (0) #define CMD_SKIP_TEST (1ULL << 1) @@ -48,7 +48,7 @@ static struct event_cnt { struct test_desc { const char *name; - uint64_t mem_mark_cmd; + u64 mem_mark_cmd; /* Skip the test if any prepare function returns false */ bool (*guest_prepare[PREPARE_FN_NR])(void); void (*guest_test)(void); @@ -59,8 +59,8 @@ struct test_desc { void (*iabt_handler)(struct ex_regs *regs); void (*mmio_handler)(struct kvm_vm *vm, struct kvm_run *run); void (*fail_vcpu_run_handler)(int ret); - uint32_t pt_memslot_flags; - uint32_t data_memslot_flags; + u32 pt_memslot_flags; + u32 data_memslot_flags; bool skip; struct event_cnt expected_events; }; @@ -70,9 +70,9 @@ struct test_params { struct test_desc *test_desc; }; -static inline void flush_tlb_page(uint64_t vaddr) +static inline void flush_tlb_page(gva_t gva) { - uint64_t page = vaddr >> 12; + gva_t page = gva >> 12; dsb(ishst); asm volatile("tlbi vaae1is, %0" :: "r" (page)); @@ -82,7 +82,7 @@ static inline void flush_tlb_page(uint64_t vaddr) static void guest_write64(void) { - uint64_t val; + u64 val; WRITE_ONCE(*guest_test_memory, TEST_DATA); val = READ_ONCE(*guest_test_memory); @@ -92,17 +92,17 @@ static void guest_write64(void) /* Check the system for atomic instructions. */ static bool guest_check_lse(void) { - uint64_t isar0 = read_sysreg(id_aa64isar0_el1); - uint64_t atomic; + u64 isar0 = read_sysreg(id_aa64isar0_el1); + u64 atomic; - atomic = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_ATOMIC), isar0); + atomic = FIELD_GET(ID_AA64ISAR0_EL1_ATOMIC, isar0); return atomic >= 2; } static bool guest_check_dc_zva(void) { - uint64_t dczid = read_sysreg(dczid_el0); - uint64_t dzp = FIELD_GET(ARM64_FEATURE_MASK(DCZID_EL0_DZP), dczid); + u64 dczid = read_sysreg(dczid_el0); + u64 dzp = FIELD_GET(DCZID_EL0_DZP, dczid); return dzp == 0; } @@ -110,7 +110,7 @@ static bool guest_check_dc_zva(void) /* Compare and swap instruction. */ static void guest_cas(void) { - uint64_t val; + u64 val; GUEST_ASSERT(guest_check_lse()); asm volatile(".arch_extension lse\n" @@ -122,7 +122,7 @@ static void guest_cas(void) static void guest_read64(void) { - uint64_t val; + u64 val; val = READ_ONCE(*guest_test_memory); GUEST_ASSERT_EQ(val, 0); @@ -131,7 +131,7 @@ static void guest_read64(void) /* Address translation instruction */ static void guest_at(void) { - uint64_t par; + u64 par; asm volatile("at s1e1r, %0" :: "r" (guest_test_memory)); isb(); @@ -148,7 +148,7 @@ static void guest_at(void) */ static void guest_dc_zva(void) { - uint16_t val; + u16 val; asm volatile("dc zva, %0" :: "r" (guest_test_memory)); dsb(ish); @@ -164,8 +164,8 @@ static void guest_dc_zva(void) */ static void guest_ld_preidx(void) { - uint64_t val; - uint64_t addr = TEST_GVA - 8; + u64 val; + u64 addr = TEST_GVA - 8; /* * This ends up accessing "TEST_GVA + 8 - 8", where "TEST_GVA - 8" is @@ -179,8 +179,8 @@ static void guest_ld_preidx(void) static void guest_st_preidx(void) { - uint64_t val = TEST_DATA; - uint64_t addr = TEST_GVA - 8; + u64 val = TEST_DATA; + u64 addr = TEST_GVA - 8; asm volatile("str %0, [%1, #8]!" : "+r" (val), "+r" (addr)); @@ -191,15 +191,15 @@ static void guest_st_preidx(void) static bool guest_set_ha(void) { - uint64_t mmfr1 = read_sysreg(id_aa64mmfr1_el1); - uint64_t hadbs, tcr; + u64 mmfr1 = read_sysreg(id_aa64mmfr1_el1); + u64 hadbs, tcr; /* Skip if HA is not supported. */ - hadbs = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_HAFDBS), mmfr1); + hadbs = FIELD_GET(ID_AA64MMFR1_EL1_HAFDBS, mmfr1); if (hadbs == 0) return false; - tcr = read_sysreg(tcr_el1) | TCR_EL1_HA; + tcr = read_sysreg(tcr_el1) | TCR_HA; write_sysreg(tcr, tcr_el1); isb(); @@ -208,7 +208,7 @@ static bool guest_set_ha(void) static bool guest_clear_pte_af(void) { - *((uint64_t *)TEST_PTE_GVA) &= ~PTE_AF; + *((u64 *)TEST_PTE_GVA) &= ~PTE_AF; flush_tlb_page(TEST_GVA); return true; @@ -217,7 +217,7 @@ static bool guest_clear_pte_af(void) static void guest_check_pte_af(void) { dsb(ish); - GUEST_ASSERT_EQ(*((uint64_t *)TEST_PTE_GVA) & PTE_AF, PTE_AF); + GUEST_ASSERT_EQ(*((u64 *)TEST_PTE_GVA) & PTE_AF, PTE_AF); } static void guest_check_write_in_dirty_log(void) @@ -302,26 +302,26 @@ static void no_iabt_handler(struct ex_regs *regs) static struct uffd_args { char *copy; void *hva; - uint64_t paging_size; + u64 paging_size; } pt_args, data_args; /* Returns true to continue the test, and false if it should be skipped. */ static int uffd_generic_handler(int uffd_mode, int uffd, struct uffd_msg *msg, struct uffd_args *args) { - uint64_t addr = msg->arg.pagefault.address; - uint64_t flags = msg->arg.pagefault.flags; + u64 addr = msg->arg.pagefault.address; + u64 flags = msg->arg.pagefault.flags; struct uffdio_copy copy; int ret; TEST_ASSERT(uffd_mode == UFFDIO_REGISTER_MODE_MISSING, "The only expected UFFD mode is MISSING"); - TEST_ASSERT_EQ(addr, (uint64_t)args->hva); + TEST_ASSERT_EQ(addr, (u64)args->hva); pr_debug("uffd fault: addr=%p write=%d\n", (void *)addr, !!(flags & UFFD_PAGEFAULT_FLAG_WRITE)); - copy.src = (uint64_t)args->copy; + copy.src = (u64)args->copy; copy.dst = addr; copy.len = args->paging_size; copy.mode = 0; @@ -407,7 +407,7 @@ static bool punch_hole_in_backing_store(struct kvm_vm *vm, struct userspace_mem_region *region) { void *hva = (void *)region->region.userspace_addr; - uint64_t paging_size = region->region.memory_size; + u64 paging_size = region->region.memory_size; int ret, fd = region->fd; if (fd != -1) { @@ -438,7 +438,7 @@ static void mmio_on_test_gpa_handler(struct kvm_vm *vm, struct kvm_run *run) static void mmio_no_handler(struct kvm_vm *vm, struct kvm_run *run) { - uint64_t data; + u64 data; memcpy(&data, run->mmio.data, sizeof(data)); pr_debug("addr=%lld len=%d w=%d data=%lx\n", @@ -449,11 +449,11 @@ static void mmio_no_handler(struct kvm_vm *vm, struct kvm_run *run) static bool check_write_in_dirty_log(struct kvm_vm *vm, struct userspace_mem_region *region, - uint64_t host_pg_nr) + u64 host_pg_nr) { unsigned long *bmap; bool first_page_dirty; - uint64_t size = region->region.memory_size; + u64 size = region->region.memory_size; /* getpage_size() is not always equal to vm->page_size */ bmap = bitmap_zalloc(size / getpagesize()); @@ -468,7 +468,7 @@ static bool handle_cmd(struct kvm_vm *vm, int cmd) { struct userspace_mem_region *data_region, *pt_region; bool continue_test = true; - uint64_t pte_gpa, pte_pg; + u64 pte_gpa, pte_pg; data_region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA); pt_region = vm_get_mem_region(vm, MEM_REGION_PT); @@ -510,7 +510,7 @@ void fail_vcpu_run_mmio_no_syndrome_handler(int ret) events.fail_vcpu_runs += 1; } -typedef uint32_t aarch64_insn_t; +typedef u32 aarch64_insn_t; extern aarch64_insn_t __exec_test[2]; noinline void __return_0x77(void) @@ -525,7 +525,7 @@ noinline void __return_0x77(void) */ static void load_exec_code_for_test(struct kvm_vm *vm) { - uint64_t *code; + u64 *code; struct userspace_mem_region *region; void *hva; @@ -552,7 +552,7 @@ static void setup_abort_handlers(struct kvm_vm *vm, struct kvm_vcpu *vcpu, static void setup_gva_maps(struct kvm_vm *vm) { struct userspace_mem_region *region; - uint64_t pte_gpa; + u64 pte_gpa; region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA); /* Map TEST_GVA first. This will install a new PTE. */ @@ -574,12 +574,12 @@ enum pf_test_memslots { */ static void setup_memslots(struct kvm_vm *vm, struct test_params *p) { - uint64_t backing_src_pagesz = get_backing_src_pagesz(p->src_type); - uint64_t guest_page_size = vm->page_size; - uint64_t max_gfn = vm_compute_max_gfn(vm); + u64 backing_src_pagesz = get_backing_src_pagesz(p->src_type); + u64 guest_page_size = vm->page_size; + u64 max_gfn = vm_compute_max_gfn(vm); /* Enough for 2M of code when using 4K guest pages. */ - uint64_t code_npages = 512; - uint64_t pt_size, data_size, data_gpa; + u64 code_npages = 512; + u64 pt_size, data_size, data_gpa; /* * This test requires 1 pgd, 2 pud, 4 pmd, and 6 pte pages when using diff --git a/tools/testing/selftests/kvm/arm64/psci_test.c b/tools/testing/selftests/kvm/arm64/psci_test.c index ab491ee9e5f7..e775faf20868 100644 --- a/tools/testing/selftests/kvm/arm64/psci_test.c +++ b/tools/testing/selftests/kvm/arm64/psci_test.c @@ -22,52 +22,50 @@ #define CPU_ON_ENTRY_ADDR 0xfeedf00dul #define CPU_ON_CONTEXT_ID 0xdeadc0deul -static uint64_t psci_cpu_on(uint64_t target_cpu, uint64_t entry_addr, - uint64_t context_id) +static u64 psci_cpu_on(u64 target_cpu, u64 entry_addr, u64 context_id) { struct arm_smccc_res res; - smccc_hvc(PSCI_0_2_FN64_CPU_ON, target_cpu, entry_addr, context_id, + do_smccc(PSCI_0_2_FN64_CPU_ON, target_cpu, entry_addr, context_id, 0, 0, 0, 0, &res); return res.a0; } -static uint64_t psci_affinity_info(uint64_t target_affinity, - uint64_t lowest_affinity_level) +static u64 psci_affinity_info(u64 target_affinity, u64 lowest_affinity_level) { struct arm_smccc_res res; - smccc_hvc(PSCI_0_2_FN64_AFFINITY_INFO, target_affinity, lowest_affinity_level, + do_smccc(PSCI_0_2_FN64_AFFINITY_INFO, target_affinity, lowest_affinity_level, 0, 0, 0, 0, 0, &res); return res.a0; } -static uint64_t psci_system_suspend(uint64_t entry_addr, uint64_t context_id) +static u64 psci_system_suspend(u64 entry_addr, u64 context_id) { struct arm_smccc_res res; - smccc_hvc(PSCI_1_0_FN64_SYSTEM_SUSPEND, entry_addr, context_id, + do_smccc(PSCI_1_0_FN64_SYSTEM_SUSPEND, entry_addr, context_id, 0, 0, 0, 0, 0, &res); return res.a0; } -static uint64_t psci_system_off2(uint64_t type, uint64_t cookie) +static u64 psci_system_off2(u64 type, u64 cookie) { struct arm_smccc_res res; - smccc_hvc(PSCI_1_3_FN64_SYSTEM_OFF2, type, cookie, 0, 0, 0, 0, 0, &res); + do_smccc(PSCI_1_3_FN64_SYSTEM_OFF2, type, cookie, 0, 0, 0, 0, 0, &res); return res.a0; } -static uint64_t psci_features(uint32_t func_id) +static u64 psci_features(u32 func_id) { struct arm_smccc_res res; - smccc_hvc(PSCI_1_0_FN_PSCI_FEATURES, func_id, 0, 0, 0, 0, 0, 0, &res); + do_smccc(PSCI_1_0_FN_PSCI_FEATURES, func_id, 0, 0, 0, 0, 0, 0, &res); return res.a0; } @@ -89,12 +87,13 @@ static struct kvm_vm *setup_vm(void *guest_code, struct kvm_vcpu **source, vm = vm_create(2); - vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init); + kvm_get_default_vcpu_target(vm, &init); init.features[0] |= (1 << KVM_ARM_VCPU_PSCI_0_2); *source = aarch64_vcpu_add(vm, 0, &init, guest_code); *target = aarch64_vcpu_add(vm, 1, &init, guest_code); + kvm_arch_vm_finalize_vcpus(vm); return vm; } @@ -109,7 +108,7 @@ static void enter_guest(struct kvm_vcpu *vcpu) static void assert_vcpu_reset(struct kvm_vcpu *vcpu) { - uint64_t obs_pc, obs_x0; + u64 obs_pc, obs_x0; obs_pc = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc)); obs_x0 = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.regs[0])); @@ -122,9 +121,9 @@ static void assert_vcpu_reset(struct kvm_vcpu *vcpu) obs_x0, CPU_ON_CONTEXT_ID); } -static void guest_test_cpu_on(uint64_t target_cpu) +static void guest_test_cpu_on(u64 target_cpu) { - uint64_t target_state; + u64 target_state; GUEST_ASSERT(!psci_cpu_on(target_cpu, CPU_ON_ENTRY_ADDR, CPU_ON_CONTEXT_ID)); @@ -141,7 +140,7 @@ static void guest_test_cpu_on(uint64_t target_cpu) static void host_test_cpu_on(void) { struct kvm_vcpu *source, *target; - uint64_t target_mpidr; + u64 target_mpidr; struct kvm_vm *vm; struct ucall uc; @@ -165,7 +164,7 @@ static void host_test_cpu_on(void) static void guest_test_system_suspend(void) { - uint64_t ret; + u64 ret; /* assert that SYSTEM_SUSPEND is discoverable */ GUEST_ASSERT(!psci_features(PSCI_1_0_FN_SYSTEM_SUSPEND)); @@ -199,7 +198,7 @@ static void host_test_system_suspend(void) static void guest_test_system_off2(void) { - uint64_t ret; + u64 ret; /* assert that SYSTEM_OFF2 is discoverable */ GUEST_ASSERT(psci_features(PSCI_1_3_FN_SYSTEM_OFF2) & @@ -237,7 +236,7 @@ static void host_test_system_off2(void) { struct kvm_vcpu *source, *target; struct kvm_mp_state mps; - uint64_t psci_version = 0; + u64 psci_version = 0; int nr_shutdowns = 0; struct kvm_run *run; struct ucall uc; diff --git a/tools/testing/selftests/kvm/arm64/sea_to_user.c b/tools/testing/selftests/kvm/arm64/sea_to_user.c new file mode 100644 index 000000000000..e96d8982c28b --- /dev/null +++ b/tools/testing/selftests/kvm/arm64/sea_to_user.c @@ -0,0 +1,328 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Test KVM returns to userspace with KVM_EXIT_ARM_SEA if host APEI fails + * to handle SEA and userspace has opt-ed in KVM_CAP_ARM_SEA_TO_USER. + * + * After reaching userspace with expected arm_sea info, also test userspace + * injecting a synchronous external data abort into the guest. + * + * This test utilizes EINJ to generate a REAL synchronous external data + * abort by consuming a recoverable uncorrectable memory error. Therefore + * the device under test must support EINJ in both firmware and host kernel, + * including the notrigger feature. Otherwise the test will be skipped. + * The under-test platform's APEI should be unable to claim SEA. Otherwise + * the test will also be skipped. + */ + +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" +#include "guest_modes.h" + +#define PAGE_PRESENT (1ULL << 63) +#define PAGE_PHYSICAL 0x007fffffffffffffULL +#define PAGE_ADDR_MASK (~(0xfffULL)) + +/* Group ISV and ISS[23:14]. */ +#define ESR_ELx_INST_SYNDROME ((ESR_ELx_ISV) | (ESR_ELx_SAS) | \ + (ESR_ELx_SSE) | (ESR_ELx_SRT_MASK) | \ + (ESR_ELx_SF) | (ESR_ELx_AR)) + +#define EINJ_ETYPE "/sys/kernel/debug/apei/einj/error_type" +#define EINJ_ADDR "/sys/kernel/debug/apei/einj/param1" +#define EINJ_MASK "/sys/kernel/debug/apei/einj/param2" +#define EINJ_FLAGS "/sys/kernel/debug/apei/einj/flags" +#define EINJ_NOTRIGGER "/sys/kernel/debug/apei/einj/notrigger" +#define EINJ_DOIT "/sys/kernel/debug/apei/einj/error_inject" +/* Memory Uncorrectable non-fatal. */ +#define ERROR_TYPE_MEMORY_UER 0x10 +/* Memory address and mask valid (param1 and param2). */ +#define MASK_MEMORY_UER 0b10 + +/* Guest virtual address region = [2G, 3G). */ +#define START_GVA 0x80000000UL +#define VM_MEM_SIZE 0x40000000UL +/* Note: EINJ_OFFSET must < VM_MEM_SIZE. */ +#define EINJ_OFFSET 0x01234badUL +#define EINJ_GVA ((START_GVA) + (EINJ_OFFSET)) + +static gpa_t einj_gpa; +static void *einj_hva; +static u64 einj_hpa; +static bool far_invalid; + +static u64 translate_hva_to_hpa(unsigned long hva) +{ + u64 pinfo; + s64 offset = hva / getpagesize() * sizeof(pinfo); + int fd; + + fd = open("/proc/self/pagemap", O_RDONLY); + if (fd < 0) + ksft_exit_fail_perror("Failed to open /proc/self/pagemap"); + if (pread(fd, &pinfo, sizeof(pinfo), offset) != sizeof(pinfo)) { + close(fd); + ksft_exit_fail_perror("Failed to read /proc/self/pagemap"); + } + + close(fd); + + if ((pinfo & PAGE_PRESENT) == 0) + ksft_exit_fail_perror("Page not present"); + + return ((pinfo & PAGE_PHYSICAL) << MIN_PAGE_SHIFT) + + (hva & (getpagesize() - 1)); +} + +static void write_einj_entry(const char *einj_path, u64 val) +{ + char cmd[256] = {0}; + FILE *cmdfile = NULL; + + sprintf(cmd, "echo %#lx > %s", val, einj_path); + cmdfile = popen(cmd, "r"); + + if (pclose(cmdfile) == 0) + ksft_print_msg("echo %#lx > %s - done\n", val, einj_path); + else + ksft_exit_fail_perror("Failed to write EINJ entry"); +} + +static void inject_uer(u64 hpa) +{ + if (access("/sys/firmware/acpi/tables/EINJ", R_OK) == -1) + ksft_test_result_skip("EINJ table no available in firmware"); + + if (access(EINJ_ETYPE, R_OK | W_OK) == -1) + ksft_test_result_skip("EINJ module probably not loaded?"); + + write_einj_entry(EINJ_ETYPE, ERROR_TYPE_MEMORY_UER); + write_einj_entry(EINJ_FLAGS, MASK_MEMORY_UER); + write_einj_entry(EINJ_ADDR, hpa); + write_einj_entry(EINJ_MASK, ~0x0UL); + write_einj_entry(EINJ_NOTRIGGER, 1); + write_einj_entry(EINJ_DOIT, 1); +} + +/* + * When host APEI successfully claims the SEA caused by guest_code, kernel + * will send SIGBUS signal with BUS_MCEERR_AR to test thread. + * + * We set up this SIGBUS handler to skip the test for that case. + */ +static void sigbus_signal_handler(int sig, siginfo_t *si, void *v) +{ + ksft_print_msg("SIGBUS (%d) received, dumping siginfo...\n", sig); + ksft_print_msg("si_signo=%d, si_errno=%d, si_code=%d, si_addr=%p\n", + si->si_signo, si->si_errno, si->si_code, si->si_addr); + if (si->si_code == BUS_MCEERR_AR) + ksft_test_result_skip("SEA is claimed by host APEI\n"); + else + ksft_test_result_fail("Exit with signal unhandled\n"); + + exit(0); +} + +static void setup_sigbus_handler(void) +{ + struct sigaction act; + + memset(&act, 0, sizeof(act)); + sigemptyset(&act.sa_mask); + act.sa_sigaction = sigbus_signal_handler; + act.sa_flags = SA_SIGINFO; + TEST_ASSERT(sigaction(SIGBUS, &act, NULL) == 0, + "Failed to setup SIGBUS handler"); +} + +static void guest_code(void) +{ + u64 guest_data; + + /* Consumes error will cause a SEA. */ + guest_data = *(u64 *)EINJ_GVA; + + GUEST_FAIL("Poison not protected by SEA: gva=%#lx, guest_data=%#lx\n", + EINJ_GVA, guest_data); +} + +static void expect_sea_handler(struct ex_regs *regs) +{ + u64 esr = read_sysreg(esr_el1); + u64 far = read_sysreg(far_el1); + bool expect_far_invalid = far_invalid; + + GUEST_PRINTF("Handling Guest SEA\n"); + GUEST_PRINTF("ESR_EL1=%#lx, FAR_EL1=%#lx\n", esr, far); + + GUEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_DABT_CUR); + GUEST_ASSERT_EQ(esr & ESR_ELx_FSC_TYPE, ESR_ELx_FSC_EXTABT); + + if (expect_far_invalid) { + GUEST_ASSERT_EQ(esr & ESR_ELx_FnV, ESR_ELx_FnV); + GUEST_PRINTF("Guest observed garbage value in FAR\n"); + } else { + GUEST_ASSERT_EQ(esr & ESR_ELx_FnV, 0); + GUEST_ASSERT_EQ(far, EINJ_GVA); + } + + GUEST_DONE(); +} + +static void vcpu_inject_sea(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_events events = {}; + + events.exception.ext_dabt_pending = true; + vcpu_events_set(vcpu, &events); +} + +static void run_vm(struct kvm_vm *vm, struct kvm_vcpu *vcpu) +{ + struct ucall uc; + bool guest_done = false; + struct kvm_run *run = vcpu->run; + u64 esr; + + /* Resume the vCPU after error injection to consume the error. */ + vcpu_run(vcpu); + + ksft_print_msg("Dump kvm_run info about KVM_EXIT_%s\n", + exit_reason_str(run->exit_reason)); + ksft_print_msg("kvm_run.arm_sea: esr=%#llx, flags=%#llx\n", + run->arm_sea.esr, run->arm_sea.flags); + ksft_print_msg("kvm_run.arm_sea: gva=%#llx, gpa=%#llx\n", + run->arm_sea.gva, run->arm_sea.gpa); + + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_ARM_SEA); + + esr = run->arm_sea.esr; + TEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_DABT_LOW); + TEST_ASSERT_EQ(esr & ESR_ELx_FSC_TYPE, ESR_ELx_FSC_EXTABT); + TEST_ASSERT_EQ(ESR_ELx_ISS2(esr), 0); + TEST_ASSERT_EQ((esr & ESR_ELx_INST_SYNDROME), 0); + TEST_ASSERT_EQ(esr & ESR_ELx_VNCR, 0); + + if (!(esr & ESR_ELx_FnV)) { + ksft_print_msg("Expect gva to match given FnV bit is 0\n"); + TEST_ASSERT_EQ(run->arm_sea.gva, EINJ_GVA); + } + + if (run->arm_sea.flags & KVM_EXIT_ARM_SEA_FLAG_GPA_VALID) { + ksft_print_msg("Expect gpa to match given KVM_EXIT_ARM_SEA_FLAG_GPA_VALID is set\n"); + TEST_ASSERT_EQ(run->arm_sea.gpa, einj_gpa & PAGE_ADDR_MASK); + } + + far_invalid = esr & ESR_ELx_FnV; + + /* Inject a SEA into guest and expect handled in SEA handler. */ + vcpu_inject_sea(vcpu); + + /* Expect the guest to reach GUEST_DONE gracefully. */ + do { + vcpu_run(vcpu); + switch (get_ucall(vcpu, &uc)) { + case UCALL_PRINTF: + ksft_print_msg("From guest: %s", uc.buffer); + break; + case UCALL_DONE: + ksft_print_msg("Guest done gracefully!\n"); + guest_done = 1; + break; + case UCALL_ABORT: + ksft_print_msg("Guest aborted!\n"); + guest_done = 1; + REPORT_GUEST_ASSERT(uc); + break; + default: + TEST_FAIL("Unexpected ucall: %lu\n", uc.cmd); + } + } while (!guest_done); +} + +static struct kvm_vm *vm_create_with_sea_handler(struct kvm_vcpu **vcpu) +{ + size_t backing_page_size; + size_t guest_page_size; + size_t alignment; + u64 num_guest_pages; + gpa_t start_gpa; + enum vm_mem_backing_src_type src_type = VM_MEM_SRC_ANONYMOUS_HUGETLB_1GB; + struct kvm_vm *vm; + + backing_page_size = get_backing_src_pagesz(src_type); + guest_page_size = vm_guest_mode_params[VM_MODE_DEFAULT].page_size; + alignment = max(backing_page_size, guest_page_size); + num_guest_pages = VM_MEM_SIZE / guest_page_size; + + vm = __vm_create_with_one_vcpu(vcpu, num_guest_pages, guest_code); + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(*vcpu); + + vm_install_sync_handler(vm, + /*vector=*/VECTOR_SYNC_CURRENT, + /*ec=*/ESR_ELx_EC_DABT_CUR, + /*handler=*/expect_sea_handler); + + start_gpa = (vm->max_gfn - num_guest_pages) * guest_page_size; + start_gpa = align_down(start_gpa, alignment); + + vm_userspace_mem_region_add( + /*vm=*/vm, + /*src_type=*/src_type, + /*gpa=*/start_gpa, + /*slot=*/1, + /*npages=*/num_guest_pages, + /*flags=*/0); + + virt_map(vm, START_GVA, start_gpa, num_guest_pages); + + ksft_print_msg("Mapped %#lx pages: gva=%#lx to gpa=%#lx\n", + num_guest_pages, START_GVA, start_gpa); + return vm; +} + +static void vm_inject_memory_uer(struct kvm_vm *vm) +{ + u64 guest_data; + + einj_gpa = addr_gva2gpa(vm, EINJ_GVA); + einj_hva = addr_gva2hva(vm, EINJ_GVA); + + /* Populate certain data before injecting UER. */ + *(u64 *)einj_hva = 0xBAADCAFE; + guest_data = *(u64 *)einj_hva; + ksft_print_msg("Before EINJect: data=%#lx\n", + guest_data); + + einj_hpa = translate_hva_to_hpa((unsigned long)einj_hva); + + ksft_print_msg("EINJ_GVA=%#lx, einj_gpa=%#lx, einj_hva=%p, einj_hpa=%#lx\n", + EINJ_GVA, einj_gpa, einj_hva, einj_hpa); + + inject_uer(einj_hpa); + ksft_print_msg("Memory UER EINJected\n"); +} + +int main(int argc, char *argv[]) +{ + struct kvm_vm *vm; + struct kvm_vcpu *vcpu; + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_SEA_TO_USER)); + + setup_sigbus_handler(); + + vm = vm_create_with_sea_handler(&vcpu); + vm_enable_cap(vm, KVM_CAP_ARM_SEA_TO_USER, 0); + vm_inject_memory_uer(vm); + run_vm(vm, vcpu); + kvm_vm_free(vm); + + return 0; +} diff --git a/tools/testing/selftests/kvm/arm64/set_id_regs.c b/tools/testing/selftests/kvm/arm64/set_id_regs.c index 217541fe6536..7429a1055df5 100644 --- a/tools/testing/selftests/kvm/arm64/set_id_regs.c +++ b/tools/testing/selftests/kvm/arm64/set_id_regs.c @@ -30,21 +30,24 @@ struct reg_ftr_bits { char *name; bool sign; enum ftr_type type; - uint8_t shift; - uint64_t mask; + u8 shift; + u64 mask; /* * For FTR_EXACT, safe_val is used as the exact safe value. * For FTR_LOWER_SAFE, safe_val is used as the minimal safe value. */ - int64_t safe_val; + s64 safe_val; + + /* Allowed to be changed by the host after run */ + bool mutable; }; struct test_feature_reg { - uint32_t reg; + u32 reg; const struct reg_ftr_bits *ftr_bits; }; -#define __REG_FTR_BITS(NAME, SIGNED, TYPE, SHIFT, MASK, SAFE_VAL) \ +#define __REG_FTR_BITS(NAME, SIGNED, TYPE, SHIFT, MASK, SAFE_VAL, MUT) \ { \ .name = #NAME, \ .sign = SIGNED, \ @@ -52,15 +55,20 @@ struct test_feature_reg { .shift = SHIFT, \ .mask = MASK, \ .safe_val = SAFE_VAL, \ + .mutable = MUT, \ } #define REG_FTR_BITS(type, reg, field, safe_val) \ __REG_FTR_BITS(reg##_##field, FTR_UNSIGNED, type, reg##_##field##_SHIFT, \ - reg##_##field##_MASK, safe_val) + reg##_##field##_MASK, safe_val, false) + +#define REG_FTR_BITS_MUTABLE(type, reg, field, safe_val) \ + __REG_FTR_BITS(reg##_##field, FTR_UNSIGNED, type, reg##_##field##_SHIFT, \ + reg##_##field##_MASK, safe_val, true) #define S_REG_FTR_BITS(type, reg, field, safe_val) \ __REG_FTR_BITS(reg##_##field, FTR_SIGNED, type, reg##_##field##_SHIFT, \ - reg##_##field##_MASK, safe_val) + reg##_##field##_MASK, safe_val, false) #define REG_FTR_END \ { \ @@ -91,7 +99,6 @@ static const struct reg_ftr_bits ftr_id_aa64isar0_el1[] = { REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SM3, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SHA3, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, RDM, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, TME, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, ATOMIC, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, CRC32, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SHA2, 0), @@ -123,20 +130,30 @@ static const struct reg_ftr_bits ftr_id_aa64isar2_el1[] = { REG_FTR_END, }; +static const struct reg_ftr_bits ftr_id_aa64isar3_el1[] = { + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR3_EL1, FPRCVT, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR3_EL1, LSUI, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR3_EL1, LSFE, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR3_EL1, FAMINMAX, 0), + REG_FTR_END, +}; + static const struct reg_ftr_bits ftr_id_aa64pfr0_el1[] = { REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, CSV3, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, CSV2, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, DIT, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, SEL2, 0), - REG_FTR_BITS(FTR_EXACT, ID_AA64PFR0_EL1, GIC, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL3, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL2, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL1, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL0, 0), + /* GICv3 support will be forced at run time if available */ + REG_FTR_BITS_MUTABLE(FTR_EXACT, ID_AA64PFR0_EL1, GIC, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL3, 1), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL2, 1), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL1, 1), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL0, 1), REG_FTR_END, }; static const struct reg_ftr_bits ftr_id_aa64pfr1_el1[] = { + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, DF2, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, CSV2_frac, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, SSBS, ID_AA64PFR1_EL1_SSBS_NI), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, BT, 0), @@ -146,6 +163,9 @@ static const struct reg_ftr_bits ftr_id_aa64pfr1_el1[] = { static const struct reg_ftr_bits ftr_id_aa64mmfr0_el1[] = { REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, ECV, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, EXS, 0), + REG_FTR_BITS(FTR_EXACT, ID_AA64MMFR0_EL1, TGRAN4_2, 1), + REG_FTR_BITS(FTR_EXACT, ID_AA64MMFR0_EL1, TGRAN64_2, 1), + REG_FTR_BITS(FTR_EXACT, ID_AA64MMFR0_EL1, TGRAN16_2, 1), S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, TGRAN4, 0), S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, TGRAN64, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, TGRAN16, 0), @@ -159,7 +179,9 @@ static const struct reg_ftr_bits ftr_id_aa64mmfr0_el1[] = { static const struct reg_ftr_bits ftr_id_aa64mmfr1_el1[] = { REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, TIDCP1, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, AFP, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, HCX, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, ETS, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, TWED, 0), REG_FTR_BITS(FTR_HIGHER_SAFE, ID_AA64MMFR1_EL1, SpecSEI, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, PAN, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, LO, 0), @@ -182,6 +204,14 @@ static const struct reg_ftr_bits ftr_id_aa64mmfr2_el1[] = { REG_FTR_END, }; +static const struct reg_ftr_bits ftr_id_aa64mmfr3_el1[] = { + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR3_EL1, S1POE, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR3_EL1, S1PIE, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR3_EL1, SCTLRX, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR3_EL1, TCRX, 0), + REG_FTR_END, +}; + static const struct reg_ftr_bits ftr_id_aa64zfr0_el1[] = { REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, F64MM, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, F32MM, 0), @@ -207,11 +237,13 @@ static struct test_feature_reg test_regs[] = { TEST_REG(SYS_ID_AA64ISAR0_EL1, ftr_id_aa64isar0_el1), TEST_REG(SYS_ID_AA64ISAR1_EL1, ftr_id_aa64isar1_el1), TEST_REG(SYS_ID_AA64ISAR2_EL1, ftr_id_aa64isar2_el1), + TEST_REG(SYS_ID_AA64ISAR3_EL1, ftr_id_aa64isar3_el1), TEST_REG(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0_el1), TEST_REG(SYS_ID_AA64PFR1_EL1, ftr_id_aa64pfr1_el1), TEST_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0_el1), TEST_REG(SYS_ID_AA64MMFR1_EL1, ftr_id_aa64mmfr1_el1), TEST_REG(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2_el1), + TEST_REG(SYS_ID_AA64MMFR3_EL1, ftr_id_aa64mmfr3_el1), TEST_REG(SYS_ID_AA64ZFR0_EL1, ftr_id_aa64zfr0_el1), }; @@ -224,20 +256,30 @@ static void guest_code(void) GUEST_REG_SYNC(SYS_ID_AA64ISAR0_EL1); GUEST_REG_SYNC(SYS_ID_AA64ISAR1_EL1); GUEST_REG_SYNC(SYS_ID_AA64ISAR2_EL1); + GUEST_REG_SYNC(SYS_ID_AA64ISAR3_EL1); GUEST_REG_SYNC(SYS_ID_AA64PFR0_EL1); + GUEST_REG_SYNC(SYS_ID_AA64PFR1_EL1); GUEST_REG_SYNC(SYS_ID_AA64MMFR0_EL1); GUEST_REG_SYNC(SYS_ID_AA64MMFR1_EL1); GUEST_REG_SYNC(SYS_ID_AA64MMFR2_EL1); + GUEST_REG_SYNC(SYS_ID_AA64MMFR3_EL1); GUEST_REG_SYNC(SYS_ID_AA64ZFR0_EL1); + GUEST_REG_SYNC(SYS_MPIDR_EL1); + GUEST_REG_SYNC(SYS_CLIDR_EL1); GUEST_REG_SYNC(SYS_CTR_EL0); + GUEST_REG_SYNC(SYS_MIDR_EL1); + GUEST_REG_SYNC(SYS_REVIDR_EL1); + GUEST_REG_SYNC(SYS_AIDR_EL1); GUEST_DONE(); } /* Return a safe value to a given ftr_bits an ftr value */ -uint64_t get_safe_value(const struct reg_ftr_bits *ftr_bits, uint64_t ftr) +u64 get_safe_value(const struct reg_ftr_bits *ftr_bits, u64 ftr) { - uint64_t ftr_max = GENMASK_ULL(ARM64_FEATURE_FIELD_BITS - 1, 0); + u64 ftr_max = ftr_bits->mask >> ftr_bits->shift; + + TEST_ASSERT(ftr_max > 1, "This test doesn't support single bit features"); if (ftr_bits->sign == FTR_UNSIGNED) { switch (ftr_bits->type) { @@ -287,14 +329,16 @@ uint64_t get_safe_value(const struct reg_ftr_bits *ftr_bits, uint64_t ftr) } /* Return an invalid value to a given ftr_bits an ftr value */ -uint64_t get_invalid_value(const struct reg_ftr_bits *ftr_bits, uint64_t ftr) +u64 get_invalid_value(const struct reg_ftr_bits *ftr_bits, u64 ftr) { - uint64_t ftr_max = GENMASK_ULL(ARM64_FEATURE_FIELD_BITS - 1, 0); + u64 ftr_max = ftr_bits->mask >> ftr_bits->shift; + + TEST_ASSERT(ftr_max > 1, "This test doesn't support single bit features"); if (ftr_bits->sign == FTR_UNSIGNED) { switch (ftr_bits->type) { case FTR_EXACT: - ftr = max((uint64_t)ftr_bits->safe_val + 1, ftr + 1); + ftr = max((u64)ftr_bits->safe_val + 1, ftr + 1); break; case FTR_LOWER_SAFE: ftr++; @@ -314,7 +358,7 @@ uint64_t get_invalid_value(const struct reg_ftr_bits *ftr_bits, uint64_t ftr) } else if (ftr != ftr_max) { switch (ftr_bits->type) { case FTR_EXACT: - ftr = max((uint64_t)ftr_bits->safe_val + 1, ftr + 1); + ftr = max((u64)ftr_bits->safe_val + 1, ftr + 1); break; case FTR_LOWER_SAFE: ftr++; @@ -338,12 +382,12 @@ uint64_t get_invalid_value(const struct reg_ftr_bits *ftr_bits, uint64_t ftr) return ftr; } -static uint64_t test_reg_set_success(struct kvm_vcpu *vcpu, uint64_t reg, - const struct reg_ftr_bits *ftr_bits) +static u64 test_reg_set_success(struct kvm_vcpu *vcpu, u64 reg, + const struct reg_ftr_bits *ftr_bits) { - uint8_t shift = ftr_bits->shift; - uint64_t mask = ftr_bits->mask; - uint64_t val, new_val, ftr; + u8 shift = ftr_bits->shift; + u64 mask = ftr_bits->mask; + u64 val, new_val, ftr; val = vcpu_get_reg(vcpu, reg); ftr = (val & mask) >> shift; @@ -361,12 +405,12 @@ static uint64_t test_reg_set_success(struct kvm_vcpu *vcpu, uint64_t reg, return new_val; } -static void test_reg_set_fail(struct kvm_vcpu *vcpu, uint64_t reg, +static void test_reg_set_fail(struct kvm_vcpu *vcpu, u64 reg, const struct reg_ftr_bits *ftr_bits) { - uint8_t shift = ftr_bits->shift; - uint64_t mask = ftr_bits->mask; - uint64_t val, old_val, ftr; + u8 shift = ftr_bits->shift; + u64 mask = ftr_bits->mask; + u64 val, old_val, ftr; int r; val = vcpu_get_reg(vcpu, reg); @@ -387,7 +431,7 @@ static void test_reg_set_fail(struct kvm_vcpu *vcpu, uint64_t reg, TEST_ASSERT_EQ(val, old_val); } -static uint64_t test_reg_vals[KVM_ARM_FEATURE_ID_RANGE_SIZE]; +static u64 test_reg_vals[KVM_ARM_FEATURE_ID_RANGE_SIZE]; #define encoding_to_range_idx(encoding) \ KVM_ARM_FEATURE_ID_RANGE_IDX(sys_reg_Op0(encoding), sys_reg_Op1(encoding), \ @@ -397,7 +441,7 @@ static uint64_t test_reg_vals[KVM_ARM_FEATURE_ID_RANGE_SIZE]; static void test_vm_ftr_id_regs(struct kvm_vcpu *vcpu, bool aarch64_only) { - uint64_t masks[KVM_ARM_FEATURE_ID_RANGE_SIZE]; + u64 masks[KVM_ARM_FEATURE_ID_RANGE_SIZE]; struct reg_mask_range range = { .addr = (__u64)masks, }; @@ -414,8 +458,8 @@ static void test_vm_ftr_id_regs(struct kvm_vcpu *vcpu, bool aarch64_only) for (int i = 0; i < ARRAY_SIZE(test_regs); i++) { const struct reg_ftr_bits *ftr_bits = test_regs[i].ftr_bits; - uint32_t reg_id = test_regs[i].reg; - uint64_t reg = KVM_ARM64_SYS_REG(reg_id); + u32 reg_id = test_regs[i].reg; + u64 reg = KVM_ARM64_SYS_REG(reg_id); int idx; /* Get the index to masks array for the idreg */ @@ -445,11 +489,11 @@ static void test_vm_ftr_id_regs(struct kvm_vcpu *vcpu, bool aarch64_only) #define MPAM_IDREG_TEST 6 static void test_user_set_mpam_reg(struct kvm_vcpu *vcpu) { - uint64_t masks[KVM_ARM_FEATURE_ID_RANGE_SIZE]; + u64 masks[KVM_ARM_FEATURE_ID_RANGE_SIZE]; struct reg_mask_range range = { .addr = (__u64)masks, }; - uint64_t val; + u64 val; int idx, err; /* @@ -537,12 +581,101 @@ static void test_user_set_mpam_reg(struct kvm_vcpu *vcpu) ksft_test_result_fail("ID_AA64PFR1_EL1.MPAM_frac value should not be ignored\n"); } +#define MTE_IDREG_TEST 1 +static void test_user_set_mte_reg(struct kvm_vcpu *vcpu) +{ + u64 masks[KVM_ARM_FEATURE_ID_RANGE_SIZE]; + struct reg_mask_range range = { + .addr = (__u64)masks, + }; + u64 val; + u64 mte; + u64 mte_frac; + int idx, err; + + val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1)); + mte = FIELD_GET(ID_AA64PFR1_EL1_MTE, val); + if (!mte) { + ksft_test_result_skip("MTE capability not supported, nothing to test\n"); + return; + } + + /* Get writable masks for feature ID registers */ + memset(range.reserved, 0, sizeof(range.reserved)); + vm_ioctl(vcpu->vm, KVM_ARM_GET_REG_WRITABLE_MASKS, &range); + + idx = encoding_to_range_idx(SYS_ID_AA64PFR1_EL1); + if ((masks[idx] & ID_AA64PFR1_EL1_MTE_frac_MASK) == ID_AA64PFR1_EL1_MTE_frac_MASK) { + ksft_test_result_skip("ID_AA64PFR1_EL1.MTE_frac is officially writable, nothing to test\n"); + return; + } + + /* + * When MTE is supported but MTE_ASYMM is not (ID_AA64PFR1_EL1.MTE == 2) + * ID_AA64PFR1_EL1.MTE_frac == 0xF indicates MTE_ASYNC is unsupported + * and MTE_frac == 0 indicates it is supported. + * + * As MTE_frac was previously unconditionally read as 0, check + * that the set to 0 succeeds but does not change MTE_frac + * from unsupported (0xF) to supported (0). + * + */ + mte_frac = FIELD_GET(ID_AA64PFR1_EL1_MTE_frac, val); + if (mte != ID_AA64PFR1_EL1_MTE_MTE2 || + mte_frac != ID_AA64PFR1_EL1_MTE_frac_NI) { + ksft_test_result_skip("MTE_ASYNC or MTE_ASYMM are supported, nothing to test\n"); + return; + } + + /* Try to set MTE_frac=0. */ + val &= ~ID_AA64PFR1_EL1_MTE_frac_MASK; + val |= FIELD_PREP(ID_AA64PFR1_EL1_MTE_frac_MASK, 0); + err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1), val); + if (err) { + ksft_test_result_fail("ID_AA64PFR1_EL1.MTE_frac=0 was not accepted\n"); + return; + } + + val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1)); + mte_frac = FIELD_GET(ID_AA64PFR1_EL1_MTE_frac, val); + if (mte_frac == ID_AA64PFR1_EL1_MTE_frac_NI) + ksft_test_result_pass("ID_AA64PFR1_EL1.MTE_frac=0 accepted and still 0xF\n"); + else + ksft_test_result_pass("ID_AA64PFR1_EL1.MTE_frac no longer 0xF\n"); +} + +static u64 reset_mutable_bits(u32 id, u64 val) +{ + struct test_feature_reg *reg = NULL; + + for (int i = 0; i < ARRAY_SIZE(test_regs); i++) { + if (test_regs[i].reg == id) { + reg = &test_regs[i]; + break; + } + } + + if (!reg) + return val; + + for (const struct reg_ftr_bits *bits = reg->ftr_bits; bits->type != FTR_END; bits++) { + if (bits->mutable) { + val &= ~bits->mask; + val |= bits->safe_val << bits->shift; + } + } + + return val; +} + static void test_guest_reg_read(struct kvm_vcpu *vcpu) { bool done = false; struct ucall uc; while (!done) { + u64 val; + vcpu_run(vcpu); switch (get_ucall(vcpu, &uc)) { @@ -550,9 +683,11 @@ static void test_guest_reg_read(struct kvm_vcpu *vcpu) REPORT_GUEST_ASSERT(uc); break; case UCALL_SYNC: + val = test_reg_vals[encoding_to_range_idx(uc.args[2])]; + val = reset_mutable_bits(uc.args[2], val); + /* Make sure the written values are seen by guest */ - TEST_ASSERT_EQ(test_reg_vals[encoding_to_range_idx(uc.args[2])], - uc.args[3]); + TEST_ASSERT_EQ(val, reset_mutable_bits(uc.args[2], uc.args[3])); break; case UCALL_DONE: done = true; @@ -572,13 +707,13 @@ static void test_guest_reg_read(struct kvm_vcpu *vcpu) static void test_clidr(struct kvm_vcpu *vcpu) { - uint64_t clidr; + u64 clidr; int level; clidr = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CLIDR_EL1)); /* find the first empty level in the cache hierarchy */ - for (level = 1; level < 7; level++) { + for (level = 1; level <= 7; level++) { if (!CLIDR_CTYPE(clidr, level)) break; } @@ -609,28 +744,42 @@ static void test_ctr(struct kvm_vcpu *vcpu) test_reg_vals[encoding_to_range_idx(SYS_CTR_EL0)] = ctr; } -static void test_vcpu_ftr_id_regs(struct kvm_vcpu *vcpu) +static void test_id_reg(struct kvm_vcpu *vcpu, u32 id) { u64 val; + val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(id)); + val++; + vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(id), val); + test_reg_vals[encoding_to_range_idx(id)] = val; +} + +static void test_vcpu_ftr_id_regs(struct kvm_vcpu *vcpu) +{ test_clidr(vcpu); test_ctr(vcpu); - val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1)); - val++; - vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1), val); + test_id_reg(vcpu, SYS_MPIDR_EL1); + ksft_test_result_pass("%s\n", __func__); +} + +static void test_vcpu_non_ftr_id_regs(struct kvm_vcpu *vcpu) +{ + test_id_reg(vcpu, SYS_MIDR_EL1); + test_id_reg(vcpu, SYS_REVIDR_EL1); + test_id_reg(vcpu, SYS_AIDR_EL1); - test_reg_vals[encoding_to_range_idx(SYS_MPIDR_EL1)] = val; ksft_test_result_pass("%s\n", __func__); } -static void test_assert_id_reg_unchanged(struct kvm_vcpu *vcpu, uint32_t encoding) +static void test_assert_id_reg_unchanged(struct kvm_vcpu *vcpu, u32 encoding) { size_t idx = encoding_to_range_idx(encoding); - uint64_t observed; + u64 observed; observed = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(encoding)); - TEST_ASSERT_EQ(test_reg_vals[idx], observed); + TEST_ASSERT_EQ(reset_mutable_bits(encoding, test_reg_vals[idx]), + reset_mutable_bits(encoding, observed)); } static void test_reset_preserves_id_regs(struct kvm_vcpu *vcpu) @@ -647,6 +796,9 @@ static void test_reset_preserves_id_regs(struct kvm_vcpu *vcpu) test_assert_id_reg_unchanged(vcpu, SYS_MPIDR_EL1); test_assert_id_reg_unchanged(vcpu, SYS_CLIDR_EL1); test_assert_id_reg_unchanged(vcpu, SYS_CTR_EL0); + test_assert_id_reg_unchanged(vcpu, SYS_MIDR_EL1); + test_assert_id_reg_unchanged(vcpu, SYS_REVIDR_EL1); + test_assert_id_reg_unchanged(vcpu, SYS_AIDR_EL1); ksft_test_result_pass("%s\n", __func__); } @@ -656,33 +808,38 @@ int main(void) struct kvm_vcpu *vcpu; struct kvm_vm *vm; bool aarch64_only; - uint64_t val, el0; - int test_cnt; + u64 val, el0; + int test_cnt, i, j; TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES)); + TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_WRITABLE_IMP_ID_REGS)); + + test_wants_mte(); - vm = vm_create_with_one_vcpu(&vcpu, guest_code); + vm = vm_create(1); + vm_enable_cap(vm, KVM_CAP_ARM_WRITABLE_IMP_ID_REGS, 0); + vcpu = vm_vcpu_add(vm, 0, guest_code); + kvm_arch_vm_finalize_vcpus(vm); /* Check for AARCH64 only system */ val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1)); - el0 = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0), val); + el0 = FIELD_GET(ID_AA64PFR0_EL1_EL0, val); aarch64_only = (el0 == ID_AA64PFR0_EL1_EL0_IMP); ksft_print_header(); - test_cnt = ARRAY_SIZE(ftr_id_aa64dfr0_el1) + ARRAY_SIZE(ftr_id_dfr0_el1) + - ARRAY_SIZE(ftr_id_aa64isar0_el1) + ARRAY_SIZE(ftr_id_aa64isar1_el1) + - ARRAY_SIZE(ftr_id_aa64isar2_el1) + ARRAY_SIZE(ftr_id_aa64pfr0_el1) + - ARRAY_SIZE(ftr_id_aa64pfr1_el1) + ARRAY_SIZE(ftr_id_aa64mmfr0_el1) + - ARRAY_SIZE(ftr_id_aa64mmfr1_el1) + ARRAY_SIZE(ftr_id_aa64mmfr2_el1) + - ARRAY_SIZE(ftr_id_aa64zfr0_el1) - ARRAY_SIZE(test_regs) + 2 + - MPAM_IDREG_TEST; + test_cnt = 3 + MPAM_IDREG_TEST + MTE_IDREG_TEST; + for (i = 0; i < ARRAY_SIZE(test_regs); i++) + for (j = 0; test_regs[i].ftr_bits[j].type != FTR_END; j++) + test_cnt++; ksft_set_plan(test_cnt); test_vm_ftr_id_regs(vcpu, aarch64_only); test_vcpu_ftr_id_regs(vcpu); + test_vcpu_non_ftr_id_regs(vcpu); test_user_set_mpam_reg(vcpu); + test_user_set_mte_reg(vcpu); test_guest_reg_read(vcpu); diff --git a/tools/testing/selftests/kvm/arm64/smccc_filter.c b/tools/testing/selftests/kvm/arm64/smccc_filter.c index 2d189f3da228..21e41880261b 100644 --- a/tools/testing/selftests/kvm/arm64/smccc_filter.c +++ b/tools/testing/selftests/kvm/arm64/smccc_filter.c @@ -22,10 +22,22 @@ enum smccc_conduit { SMC_INSN, }; +static bool test_runs_at_el2(void) +{ + struct kvm_vm *vm = vm_create(1); + struct kvm_vcpu_init init; + + kvm_get_default_vcpu_target(vm, &init); + kvm_vm_free(vm); + + return init.features[0] & BIT(KVM_ARM_VCPU_HAS_EL2); +} + #define for_each_conduit(conduit) \ - for (conduit = HVC_INSN; conduit <= SMC_INSN; conduit++) + for (conduit = test_runs_at_el2() ? SMC_INSN : HVC_INSN; \ + conduit <= SMC_INSN; conduit++) -static void guest_main(uint32_t func_id, enum smccc_conduit conduit) +static void guest_main(u32 func_id, enum smccc_conduit conduit) { struct arm_smccc_res res; @@ -37,7 +49,7 @@ static void guest_main(uint32_t func_id, enum smccc_conduit conduit) GUEST_SYNC(res.a0); } -static int __set_smccc_filter(struct kvm_vm *vm, uint32_t start, uint32_t nr_functions, +static int __set_smccc_filter(struct kvm_vm *vm, u32 start, u32 nr_functions, enum kvm_smccc_filter_action action) { struct kvm_smccc_filter filter = { @@ -50,7 +62,7 @@ static int __set_smccc_filter(struct kvm_vm *vm, uint32_t start, uint32_t nr_fun KVM_ARM_VM_SMCCC_FILTER, &filter); } -static void set_smccc_filter(struct kvm_vm *vm, uint32_t start, uint32_t nr_functions, +static void set_smccc_filter(struct kvm_vm *vm, u32 start, u32 nr_functions, enum kvm_smccc_filter_action action) { int ret = __set_smccc_filter(vm, start, nr_functions, action); @@ -64,7 +76,7 @@ static struct kvm_vm *setup_vm(struct kvm_vcpu **vcpu) struct kvm_vm *vm; vm = vm_create(1); - vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init); + kvm_get_default_vcpu_target(vm, &init); /* * Enable in-kernel emulation of PSCI to ensure that calls are denied @@ -73,6 +85,7 @@ static struct kvm_vm *setup_vm(struct kvm_vcpu **vcpu) init.features[0] |= (1 << KVM_ARM_VCPU_PSCI_0_2); *vcpu = aarch64_vcpu_add(vm, 0, &init, guest_main); + kvm_arch_vm_finalize_vcpus(vm); return vm; } @@ -99,7 +112,7 @@ static void test_filter_reserved_range(void) { struct kvm_vcpu *vcpu; struct kvm_vm *vm = setup_vm(&vcpu); - uint32_t smc64_fn; + u32 smc64_fn; int r; r = __set_smccc_filter(vm, ARM_SMCCC_ARCH_WORKAROUND_1, @@ -204,7 +217,7 @@ static void test_filter_denied(void) } } -static void expect_call_fwd_to_user(struct kvm_vcpu *vcpu, uint32_t func_id, +static void expect_call_fwd_to_user(struct kvm_vcpu *vcpu, u32 func_id, enum smccc_conduit conduit) { struct kvm_run *run = vcpu->run; diff --git a/tools/testing/selftests/kvm/arm64/vgic_init.c b/tools/testing/selftests/kvm/arm64/vgic_init.c index b3b5fb0ff0a9..47e34b43afb2 100644 --- a/tools/testing/selftests/kvm/arm64/vgic_init.c +++ b/tools/testing/selftests/kvm/arm64/vgic_init.c @@ -9,16 +9,17 @@ #include <asm/kvm.h> #include <asm/kvm_para.h> +#include <arm64/gic_v3.h> + #include "test_util.h" #include "kvm_util.h" #include "processor.h" #include "vgic.h" +#include "gic_v3.h" #define NR_VCPUS 4 -#define REG_OFFSET(vcpu, offset) (((uint64_t)vcpu << 32) | offset) - -#define GICR_TYPER 0x8 +#define REG_OFFSET(vcpu, offset) (((u64)vcpu << 32) | offset) #define VGIC_DEV_IS_V2(_d) ((_d) == KVM_DEV_TYPE_ARM_VGIC_V2) #define VGIC_DEV_IS_V3(_d) ((_d) == KVM_DEV_TYPE_ARM_VGIC_V3) @@ -26,10 +27,10 @@ struct vm_gic { struct kvm_vm *vm; int gic_fd; - uint32_t gic_dev_type; + u32 gic_dev_type; }; -static uint64_t max_phys_size; +static u64 max_phys_size; /* * Helpers to access a redistributor register and verify the ioctl() failed or @@ -38,17 +39,17 @@ static uint64_t max_phys_size; static void v3_redist_reg_get_errno(int gicv3_fd, int vcpu, int offset, int want, const char *msg) { - uint32_t ignored_val; + u32 ignored_val; int ret = __kvm_device_attr_get(gicv3_fd, KVM_DEV_ARM_VGIC_GRP_REDIST_REGS, REG_OFFSET(vcpu, offset), &ignored_val); TEST_ASSERT(ret && errno == want, "%s; want errno = %d", msg, want); } -static void v3_redist_reg_get(int gicv3_fd, int vcpu, int offset, uint32_t want, +static void v3_redist_reg_get(int gicv3_fd, int vcpu, int offset, u32 want, const char *msg) { - uint32_t val; + u32 val; kvm_device_attr_get(gicv3_fd, KVM_DEV_ARM_VGIC_GRP_REDIST_REGS, REG_OFFSET(vcpu, offset), &val); @@ -70,8 +71,8 @@ static int run_vcpu(struct kvm_vcpu *vcpu) return __vcpu_run(vcpu) ? -errno : 0; } -static struct vm_gic vm_gic_create_with_vcpus(uint32_t gic_dev_type, - uint32_t nr_vcpus, +static struct vm_gic vm_gic_create_with_vcpus(u32 gic_dev_type, + u32 nr_vcpus, struct kvm_vcpu *vcpus[]) { struct vm_gic v; @@ -83,7 +84,7 @@ static struct vm_gic vm_gic_create_with_vcpus(uint32_t gic_dev_type, return v; } -static struct vm_gic vm_gic_create_barebones(uint32_t gic_dev_type) +static struct vm_gic vm_gic_create_barebones(u32 gic_dev_type) { struct vm_gic v; @@ -102,9 +103,9 @@ static void vm_gic_destroy(struct vm_gic *v) } struct vgic_region_attr { - uint64_t attr; - uint64_t size; - uint64_t alignment; + u64 attr; + u64 size; + u64 alignment; }; struct vgic_region_attr gic_v3_dist_region = { @@ -142,7 +143,7 @@ struct vgic_region_attr gic_v2_cpu_region = { static void subtest_dist_rdist(struct vm_gic *v) { int ret; - uint64_t addr; + u64 addr; struct vgic_region_attr rdist; /* CPU interface in GICv2*/ struct vgic_region_attr dist; @@ -222,7 +223,7 @@ static void subtest_dist_rdist(struct vm_gic *v) /* Test the new REDIST region API */ static void subtest_v3_redist_regions(struct vm_gic *v) { - uint64_t addr, expected_addr; + u64 addr, expected_addr; int ret; ret = __kvm_has_device_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, @@ -331,7 +332,7 @@ static void subtest_v3_redist_regions(struct vm_gic *v) * VGIC KVM device is created and initialized before the secondary CPUs * get created */ -static void test_vgic_then_vcpus(uint32_t gic_dev_type) +static void test_vgic_then_vcpus(u32 gic_dev_type) { struct kvm_vcpu *vcpus[NR_VCPUS]; struct vm_gic v; @@ -352,7 +353,7 @@ static void test_vgic_then_vcpus(uint32_t gic_dev_type) } /* All the VCPUs are created before the VGIC KVM device gets initialized */ -static void test_vcpus_then_vgic(uint32_t gic_dev_type) +static void test_vcpus_then_vgic(u32 gic_dev_type) { struct kvm_vcpu *vcpus[NR_VCPUS]; struct vm_gic v; @@ -407,7 +408,7 @@ static void test_v3_new_redist_regions(void) struct kvm_vcpu *vcpus[NR_VCPUS]; void *dummy = NULL; struct vm_gic v; - uint64_t addr; + u64 addr; int ret; v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS, vcpus); @@ -459,7 +460,7 @@ static void test_v3_new_redist_regions(void) static void test_v3_typer_accesses(void) { struct vm_gic v; - uint64_t addr; + u64 addr; int ret, i; v.vm = vm_create(NR_VCPUS); @@ -517,7 +518,7 @@ static void test_v3_typer_accesses(void) } static struct vm_gic vm_gic_v3_create_with_vcpuids(int nr_vcpus, - uint32_t vcpuids[]) + u32 vcpuids[]) { struct vm_gic v; int i; @@ -543,9 +544,9 @@ static struct vm_gic vm_gic_v3_create_with_vcpuids(int nr_vcpus, */ static void test_v3_last_bit_redist_regions(void) { - uint32_t vcpuids[] = { 0, 3, 5, 4, 1, 2 }; + u32 vcpuids[] = { 0, 3, 5, 4, 1, 2 }; struct vm_gic v; - uint64_t addr; + u64 addr; v = vm_gic_v3_create_with_vcpuids(ARRAY_SIZE(vcpuids), vcpuids); @@ -577,9 +578,9 @@ static void test_v3_last_bit_redist_regions(void) /* Test last bit with legacy region */ static void test_v3_last_bit_single_rdist(void) { - uint32_t vcpuids[] = { 0, 3, 5, 4, 1, 2 }; + u32 vcpuids[] = { 0, 3, 5, 4, 1, 2 }; struct vm_gic v; - uint64_t addr; + u64 addr; v = vm_gic_v3_create_with_vcpuids(ARRAY_SIZE(vcpuids), vcpuids); @@ -605,7 +606,7 @@ static void test_v3_redist_ipa_range_check_at_vcpu_run(void) struct kvm_vcpu *vcpus[NR_VCPUS]; struct vm_gic v; int ret, i; - uint64_t addr; + u64 addr; v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, 1, vcpus); @@ -637,7 +638,7 @@ static void test_v3_its_region(void) { struct kvm_vcpu *vcpus[NR_VCPUS]; struct vm_gic v; - uint64_t addr; + u64 addr; int its_fd, ret; v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS, vcpus); @@ -675,14 +676,52 @@ static void test_v3_its_region(void) vm_gic_destroy(&v); } +static void test_v3_nassgicap(void) +{ + struct kvm_vcpu *vcpus[NR_VCPUS]; + bool has_nassgicap; + struct vm_gic vm; + u32 typer2; + int ret; + + vm = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS, vcpus); + kvm_device_attr_get(vm.gic_fd, KVM_DEV_ARM_VGIC_GRP_DIST_REGS, + GICD_TYPER2, &typer2); + has_nassgicap = typer2 & GICD_TYPER2_nASSGIcap; + + typer2 |= GICD_TYPER2_nASSGIcap; + ret = __kvm_device_attr_set(vm.gic_fd, KVM_DEV_ARM_VGIC_GRP_DIST_REGS, + GICD_TYPER2, &typer2); + if (has_nassgicap) + TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_DEVICE_ATTR_SET, ret)); + else + TEST_ASSERT(ret && errno == EINVAL, + "Enabled nASSGIcap even though it's unavailable"); + + typer2 &= ~GICD_TYPER2_nASSGIcap; + kvm_device_attr_set(vm.gic_fd, KVM_DEV_ARM_VGIC_GRP_DIST_REGS, + GICD_TYPER2, &typer2); + + kvm_device_attr_set(vm.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, + KVM_DEV_ARM_VGIC_CTRL_INIT, NULL); + + typer2 ^= GICD_TYPER2_nASSGIcap; + ret = __kvm_device_attr_set(vm.gic_fd, KVM_DEV_ARM_VGIC_GRP_DIST_REGS, + GICD_TYPER2, &typer2); + TEST_ASSERT(ret && errno == EBUSY, + "Changed nASSGIcap after initializing the VGIC"); + + vm_gic_destroy(&vm); +} + /* * Returns 0 if it's possible to create GIC device of a given type (V2 or V3). */ -int test_kvm_device(uint32_t gic_dev_type) +int test_kvm_device(u32 gic_dev_type) { struct kvm_vcpu *vcpus[NR_VCPUS]; struct vm_gic v; - uint32_t other; + u32 other; int ret; v.vm = vm_create_with_vcpus(NR_VCPUS, guest_code, vcpus); @@ -715,7 +754,221 @@ int test_kvm_device(uint32_t gic_dev_type) return 0; } -void run_tests(uint32_t gic_dev_type) +struct sr_def { + const char *name; + u32 encoding; +}; + +#define PACK_SR(r) \ + ((sys_reg_Op0(r) << 14) | \ + (sys_reg_Op1(r) << 11) | \ + (sys_reg_CRn(r) << 7) | \ + (sys_reg_CRm(r) << 3) | \ + (sys_reg_Op2(r))) + +#define SR(r) \ + { \ + .name = #r, \ + .encoding = r, \ + } + +static const struct sr_def sysregs_el1[] = { + SR(SYS_ICC_PMR_EL1), + SR(SYS_ICC_BPR0_EL1), + SR(SYS_ICC_AP0R0_EL1), + SR(SYS_ICC_AP0R1_EL1), + SR(SYS_ICC_AP0R2_EL1), + SR(SYS_ICC_AP0R3_EL1), + SR(SYS_ICC_AP1R0_EL1), + SR(SYS_ICC_AP1R1_EL1), + SR(SYS_ICC_AP1R2_EL1), + SR(SYS_ICC_AP1R3_EL1), + SR(SYS_ICC_BPR1_EL1), + SR(SYS_ICC_CTLR_EL1), + SR(SYS_ICC_SRE_EL1), + SR(SYS_ICC_IGRPEN0_EL1), + SR(SYS_ICC_IGRPEN1_EL1), +}; + +static const struct sr_def sysregs_el2[] = { + SR(SYS_ICH_AP0R0_EL2), + SR(SYS_ICH_AP0R1_EL2), + SR(SYS_ICH_AP0R2_EL2), + SR(SYS_ICH_AP0R3_EL2), + SR(SYS_ICH_AP1R0_EL2), + SR(SYS_ICH_AP1R1_EL2), + SR(SYS_ICH_AP1R2_EL2), + SR(SYS_ICH_AP1R3_EL2), + SR(SYS_ICH_HCR_EL2), + SR(SYS_ICC_SRE_EL2), + SR(SYS_ICH_VTR_EL2), + SR(SYS_ICH_VMCR_EL2), + SR(SYS_ICH_LR0_EL2), + SR(SYS_ICH_LR1_EL2), + SR(SYS_ICH_LR2_EL2), + SR(SYS_ICH_LR3_EL2), + SR(SYS_ICH_LR4_EL2), + SR(SYS_ICH_LR5_EL2), + SR(SYS_ICH_LR6_EL2), + SR(SYS_ICH_LR7_EL2), + SR(SYS_ICH_LR8_EL2), + SR(SYS_ICH_LR9_EL2), + SR(SYS_ICH_LR10_EL2), + SR(SYS_ICH_LR11_EL2), + SR(SYS_ICH_LR12_EL2), + SR(SYS_ICH_LR13_EL2), + SR(SYS_ICH_LR14_EL2), + SR(SYS_ICH_LR15_EL2), +}; + +static void test_sysreg_array(int gic, const struct sr_def *sr, int nr, + int (*check)(int, const struct sr_def *, const char *)) +{ + for (int i = 0; i < nr; i++) { + u64 val; + u64 attr; + int ret; + + /* Assume MPIDR_EL1.Aff*=0 */ + attr = PACK_SR(sr[i].encoding); + + /* + * The API is braindead. A register can be advertised as + * available, and yet not be readable or writable. + * ICC_APnR{1,2,3}_EL1 are examples of such non-sense, and + * ICH_APnR{1,2,3}_EL2 do follow suit for consistency. + * + * On the bright side, no known HW is implementing more than + * 5 bits of priority, so we're safe. Sort of... + */ + ret = __kvm_has_device_attr(gic, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS, + attr); + TEST_ASSERT(ret == 0, "%s unavailable", sr[i].name); + + /* Check that we can write back what we read */ + ret = __kvm_device_attr_get(gic, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS, + attr, &val); + TEST_ASSERT(ret == 0 || !check(gic, &sr[i], "read"), "%s unreadable", sr[i].name); + ret = __kvm_device_attr_set(gic, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS, + attr, &val); + TEST_ASSERT(ret == 0 || !check(gic, &sr[i], "write"), "%s unwritable", sr[i].name); + } +} + +static u8 get_ctlr_pribits(int gic) +{ + int ret; + u64 val; + u8 pri; + + ret = __kvm_device_attr_get(gic, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS, + PACK_SR(SYS_ICC_CTLR_EL1), &val); + TEST_ASSERT(ret == 0, "ICC_CTLR_EL1 unreadable"); + + pri = FIELD_GET(ICC_CTLR_EL1_PRI_BITS_MASK, val) + 1; + TEST_ASSERT(pri >= 5 && pri <= 7, "Bad pribits %d", pri); + + return pri; +} + +static int check_unaccessible_el1_regs(int gic, const struct sr_def *sr, const char *what) +{ + switch (sr->encoding) { + case SYS_ICC_AP0R1_EL1: + case SYS_ICC_AP1R1_EL1: + if (get_ctlr_pribits(gic) >= 6) + return -EINVAL; + break; + case SYS_ICC_AP0R2_EL1: + case SYS_ICC_AP0R3_EL1: + case SYS_ICC_AP1R2_EL1: + case SYS_ICC_AP1R3_EL1: + if (get_ctlr_pribits(gic) == 7) + return 0; + break; + default: + return -EINVAL; + } + + pr_info("SKIP %s for %s\n", sr->name, what); + return 0; +} + +static u8 get_vtr_pribits(int gic) +{ + int ret; + u64 val; + u8 pri; + + ret = __kvm_device_attr_get(gic, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS, + PACK_SR(SYS_ICH_VTR_EL2), &val); + TEST_ASSERT(ret == 0, "ICH_VTR_EL2 unreadable"); + + pri = FIELD_GET(ICH_VTR_EL2_PRIbits, val) + 1; + TEST_ASSERT(pri >= 5 && pri <= 7, "Bad pribits %d", pri); + + return pri; +} + +static int check_unaccessible_el2_regs(int gic, const struct sr_def *sr, const char *what) +{ + switch (sr->encoding) { + case SYS_ICH_AP0R1_EL2: + case SYS_ICH_AP1R1_EL2: + if (get_vtr_pribits(gic) >= 6) + return -EINVAL; + break; + case SYS_ICH_AP0R2_EL2: + case SYS_ICH_AP0R3_EL2: + case SYS_ICH_AP1R2_EL2: + case SYS_ICH_AP1R3_EL2: + if (get_vtr_pribits(gic) == 7) + return -EINVAL; + break; + default: + return -EINVAL; + } + + pr_info("SKIP %s for %s\n", sr->name, what); + return 0; +} + +static void test_v3_sysregs(void) +{ + struct kvm_vcpu_init init = {}; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + u32 feat = 0; + int gic; + + if (kvm_check_cap(KVM_CAP_ARM_EL2)) + feat |= BIT(KVM_ARM_VCPU_HAS_EL2); + + vm = vm_create(1); + + vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init); + init.features[0] |= feat; + + vcpu = aarch64_vcpu_add(vm, 0, &init, NULL); + TEST_ASSERT(vcpu, "Can't create a vcpu?"); + + gic = kvm_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_V3); + TEST_ASSERT(gic >= 0, "No GIC???"); + + kvm_device_attr_set(gic, KVM_DEV_ARM_VGIC_GRP_CTRL, + KVM_DEV_ARM_VGIC_CTRL_INIT, NULL); + + test_sysreg_array(gic, sysregs_el1, ARRAY_SIZE(sysregs_el1), check_unaccessible_el1_regs); + if (feat) + test_sysreg_array(gic, sysregs_el2, ARRAY_SIZE(sysregs_el2), check_unaccessible_el2_regs); + else + pr_info("SKIP EL2 registers, not available\n"); + + close(gic); + kvm_vm_free(vm); +} + +void run_tests(u32 gic_dev_type) { test_vcpus_then_vgic(gic_dev_type); test_vgic_then_vcpus(gic_dev_type); @@ -730,6 +983,8 @@ void run_tests(uint32_t gic_dev_type) test_v3_last_bit_single_rdist(); test_v3_redist_ipa_range_check_at_vcpu_run(); test_v3_its_region(); + test_v3_sysregs(); + test_v3_nassgicap(); } } @@ -739,6 +994,8 @@ int main(int ac, char **av) int pa_bits; int cnt_impl = 0; + test_disable_default_vgic(); + pa_bits = vm_guest_mode_params[VM_MODE_DEFAULT].pa_bits; max_phys_size = 1ULL << pa_bits; diff --git a/tools/testing/selftests/kvm/arm64/vgic_irq.c b/tools/testing/selftests/kvm/arm64/vgic_irq.c index f4ac28d53747..5e231998617e 100644 --- a/tools/testing/selftests/kvm/arm64/vgic_irq.c +++ b/tools/testing/selftests/kvm/arm64/vgic_irq.c @@ -24,11 +24,12 @@ * function. */ struct test_args { - uint32_t nr_irqs; /* number of KVM supported IRQs. */ + u32 nr_irqs; /* number of KVM supported IRQs. */ bool eoi_split; /* 1 is eoir+dir, 0 is eoir only */ bool level_sensitive; /* 1 is level, 0 is edge */ int kvm_max_routes; /* output of KVM_CAP_IRQ_ROUTING */ bool kvm_supports_irqfd; /* output of KVM_CAP_IRQFD */ + u32 shared_data; }; /* @@ -63,15 +64,15 @@ typedef enum { struct kvm_inject_args { kvm_inject_cmd cmd; - uint32_t first_intid; - uint32_t num; + u32 first_intid; + u32 num; int level; bool expect_failure; }; /* Used on the guest side to perform the hypercall. */ -static void kvm_inject_call(kvm_inject_cmd cmd, uint32_t first_intid, - uint32_t num, int level, bool expect_failure); +static void kvm_inject_call(kvm_inject_cmd cmd, u32 first_intid, + u32 num, int level, bool expect_failure); /* Used on the host side to get the hypercall info. */ static void kvm_inject_get_call(struct kvm_vm *vm, struct ucall *uc, @@ -132,8 +133,8 @@ static struct kvm_inject_desc set_active_fns[] = { for_each_supported_inject_fn((args), (t), (f)) /* Shared between the guest main thread and the IRQ handlers. */ -volatile uint64_t irq_handled; -volatile uint32_t irqnr_received[MAX_SPI + 1]; +volatile u64 irq_handled; +volatile u32 irqnr_received[MAX_SPI + 1]; static void reset_stats(void) { @@ -144,25 +145,25 @@ static void reset_stats(void) irqnr_received[i] = 0; } -static uint64_t gic_read_ap1r0(void) +static u64 gic_read_ap1r0(void) { - uint64_t reg = read_sysreg_s(SYS_ICC_AP1R0_EL1); + u64 reg = read_sysreg_s(SYS_ICC_AP1R0_EL1); dsb(sy); return reg; } -static void gic_write_ap1r0(uint64_t val) +static void gic_write_ap1r0(u64 val) { write_sysreg_s(val, SYS_ICC_AP1R0_EL1); isb(); } -static void guest_set_irq_line(uint32_t intid, uint32_t level); +static void guest_set_irq_line(u32 intid, u32 level); static void guest_irq_generic_handler(bool eoi_split, bool level_sensitive) { - uint32_t intid = gic_get_and_ack_irq(); + u32 intid = gic_get_and_ack_irq(); if (intid == IAR_SPURIOUS) return; @@ -188,8 +189,8 @@ static void guest_irq_generic_handler(bool eoi_split, bool level_sensitive) GUEST_ASSERT(!gic_irq_get_pending(intid)); } -static void kvm_inject_call(kvm_inject_cmd cmd, uint32_t first_intid, - uint32_t num, int level, bool expect_failure) +static void kvm_inject_call(kvm_inject_cmd cmd, u32 first_intid, + u32 num, int level, bool expect_failure) { struct kvm_inject_args args = { .cmd = cmd, @@ -203,9 +204,9 @@ static void kvm_inject_call(kvm_inject_cmd cmd, uint32_t first_intid, #define GUEST_ASSERT_IAR_EMPTY() \ do { \ - uint32_t _intid; \ + u32 _intid; \ _intid = gic_get_and_ack_irq(); \ - GUEST_ASSERT(_intid == 0 || _intid == IAR_SPURIOUS); \ + GUEST_ASSERT(_intid == IAR_SPURIOUS); \ } while (0) #define CAT_HELPER(a, b) a ## b @@ -236,13 +237,13 @@ static void reset_priorities(struct test_args *args) gic_set_priority(i, IRQ_DEFAULT_PRIO_REG); } -static void guest_set_irq_line(uint32_t intid, uint32_t level) +static void guest_set_irq_line(u32 intid, u32 level) { kvm_inject_call(KVM_SET_IRQ_LINE, intid, 1, level, false); } static void test_inject_fail(struct test_args *args, - uint32_t intid, kvm_inject_cmd cmd) + u32 intid, kvm_inject_cmd cmd) { reset_stats(); @@ -254,10 +255,10 @@ static void test_inject_fail(struct test_args *args, } static void guest_inject(struct test_args *args, - uint32_t first_intid, uint32_t num, - kvm_inject_cmd cmd) + u32 first_intid, u32 num, + kvm_inject_cmd cmd) { - uint32_t i; + u32 i; reset_stats(); @@ -291,10 +292,10 @@ static void guest_inject(struct test_args *args, * deactivated yet. */ static void guest_restore_active(struct test_args *args, - uint32_t first_intid, uint32_t num, - kvm_inject_cmd cmd) + u32 first_intid, u32 num, + kvm_inject_cmd cmd) { - uint32_t prio, intid, ap1r; + u32 prio, intid, ap1r; int i; /* @@ -341,9 +342,9 @@ static void guest_restore_active(struct test_args *args, * This function should only be used in test_inject_preemption (with IRQs * masked). */ -static uint32_t wait_for_and_activate_irq(void) +static u32 wait_for_and_activate_irq(void) { - uint32_t intid; + u32 intid; do { asm volatile("wfi" : : : "memory"); @@ -359,10 +360,11 @@ static uint32_t wait_for_and_activate_irq(void) * interrupts for the whole test. */ static void test_inject_preemption(struct test_args *args, - uint32_t first_intid, int num, - kvm_inject_cmd cmd) + u32 first_intid, int num, + const unsigned long *exclude, + kvm_inject_cmd cmd) { - uint32_t intid, prio, step = KVM_PRIO_STEPS; + u32 intid, prio, step = KVM_PRIO_STEPS; int i; /* Set the priorities of the first (KVM_NUM_PRIOS - 1) IRQs @@ -377,8 +379,12 @@ static void test_inject_preemption(struct test_args *args, local_irq_disable(); for (i = 0; i < num; i++) { - uint32_t tmp; + u32 tmp; intid = i + first_intid; + + if (exclude && test_bit(i, exclude)) + continue; + KVM_INJECT(cmd, intid); /* Each successive IRQ will preempt the previous one. */ tmp = wait_for_and_activate_irq(); @@ -390,15 +396,33 @@ static void test_inject_preemption(struct test_args *args, /* finish handling the IRQs starting with the highest priority one. */ for (i = 0; i < num; i++) { intid = num - i - 1 + first_intid; + + if (exclude && test_bit(intid - first_intid, exclude)) + continue; + gic_set_eoi(intid); - if (args->eoi_split) - gic_set_dir(intid); + } + + if (args->eoi_split) { + for (i = 0; i < num; i++) { + intid = i + first_intid; + + if (exclude && test_bit(i, exclude)) + continue; + + if (args->eoi_split) + gic_set_dir(intid); + } } local_irq_enable(); - for (i = 0; i < num; i++) + for (i = 0; i < num; i++) { + if (exclude && test_bit(i, exclude)) + continue; + GUEST_ASSERT(!gic_irq_get_active(i + first_intid)); + } GUEST_ASSERT_EQ(gic_read_ap1r0(), 0); GUEST_ASSERT_IAR_EMPTY(); @@ -407,7 +431,7 @@ static void test_inject_preemption(struct test_args *args, static void test_injection(struct test_args *args, struct kvm_inject_desc *f) { - uint32_t nr_irqs = args->nr_irqs; + u32 nr_irqs = args->nr_irqs; if (f->sgi) { guest_inject(args, MIN_SGI, 1, f->cmd); @@ -427,7 +451,7 @@ static void test_injection(struct test_args *args, struct kvm_inject_desc *f) static void test_injection_failure(struct test_args *args, struct kvm_inject_desc *f) { - uint32_t bad_intid[] = { args->nr_irqs, 1020, 1024, 1120, 5120, ~0U, }; + u32 bad_intid[] = { args->nr_irqs, 1020, 1024, 1120, 5120, ~0U, }; int i; for (i = 0; i < ARRAY_SIZE(bad_intid); i++) @@ -436,49 +460,48 @@ static void test_injection_failure(struct test_args *args, static void test_preemption(struct test_args *args, struct kvm_inject_desc *f) { - /* - * Test up to 4 levels of preemption. The reason is that KVM doesn't - * currently implement the ability to have more than the number-of-LRs - * number of concurrently active IRQs. The number of LRs implemented is - * IMPLEMENTATION DEFINED, however, it seems that most implement 4. - */ + /* Timer PPIs cannot be injected from userspace */ + static const unsigned long ppi_exclude = (BIT(27 - MIN_PPI) | + BIT(30 - MIN_PPI) | + BIT(28 - MIN_PPI) | + BIT(26 - MIN_PPI)); + if (f->sgi) - test_inject_preemption(args, MIN_SGI, 4, f->cmd); + test_inject_preemption(args, MIN_SGI, 16, NULL, f->cmd); if (f->ppi) - test_inject_preemption(args, MIN_PPI, 4, f->cmd); + test_inject_preemption(args, MIN_PPI, 16, &ppi_exclude, f->cmd); if (f->spi) - test_inject_preemption(args, MIN_SPI, 4, f->cmd); + test_inject_preemption(args, MIN_SPI, 31, NULL, f->cmd); } static void test_restore_active(struct test_args *args, struct kvm_inject_desc *f) { - /* Test up to 4 active IRQs. Same reason as in test_preemption. */ if (f->sgi) - guest_restore_active(args, MIN_SGI, 4, f->cmd); + guest_restore_active(args, MIN_SGI, 16, f->cmd); if (f->ppi) - guest_restore_active(args, MIN_PPI, 4, f->cmd); + guest_restore_active(args, MIN_PPI, 16, f->cmd); if (f->spi) - guest_restore_active(args, MIN_SPI, 4, f->cmd); + guest_restore_active(args, MIN_SPI, 31, f->cmd); } static void guest_code(struct test_args *args) { - uint32_t i, nr_irqs = args->nr_irqs; + u32 i, nr_irqs = args->nr_irqs; bool level_sensitive = args->level_sensitive; struct kvm_inject_desc *f, *inject_fns; gic_init(GIC_V3, 1); - for (i = 0; i < nr_irqs; i++) - gic_irq_enable(i); - for (i = MIN_SPI; i < nr_irqs; i++) gic_irq_set_config(i, !level_sensitive); + for (i = 0; i < nr_irqs; i++) + gic_irq_enable(i); + gic_set_eoi_split(args->eoi_split); reset_priorities(args); @@ -506,8 +529,8 @@ static void guest_code(struct test_args *args) GUEST_DONE(); } -static void kvm_irq_line_check(struct kvm_vm *vm, uint32_t intid, int level, - struct test_args *test_args, bool expect_failure) +static void kvm_irq_line_check(struct kvm_vm *vm, u32 intid, int level, + struct test_args *test_args, bool expect_failure) { int ret; @@ -525,8 +548,8 @@ static void kvm_irq_line_check(struct kvm_vm *vm, uint32_t intid, int level, } } -void kvm_irq_set_level_info_check(int gic_fd, uint32_t intid, int level, - bool expect_failure) +void kvm_irq_set_level_info_check(int gic_fd, u32 intid, int level, + bool expect_failure) { if (!expect_failure) { kvm_irq_set_level_info(gic_fd, intid, level); @@ -550,17 +573,18 @@ void kvm_irq_set_level_info_check(int gic_fd, uint32_t intid, int level, } static void kvm_set_gsi_routing_irqchip_check(struct kvm_vm *vm, - uint32_t intid, uint32_t num, uint32_t kvm_max_routes, - bool expect_failure) + u32 intid, u32 num, + u32 kvm_max_routes, + bool expect_failure) { struct kvm_irq_routing *routing; int ret; - uint64_t i; + u64 i; assert(num <= kvm_max_routes && kvm_max_routes <= KVM_MAX_IRQ_ROUTES); routing = kvm_gsi_routing_create(); - for (i = intid; i < (uint64_t)intid + num; i++) + for (i = intid; i < (u64)intid + num; i++) kvm_gsi_routing_irqchip_add(routing, i - MIN_SPI, i - MIN_SPI); if (!expect_failure) { @@ -568,7 +592,7 @@ static void kvm_set_gsi_routing_irqchip_check(struct kvm_vm *vm, } else { ret = _kvm_gsi_routing_write(vm, routing); /* The kernel only checks e->irqchip.pin >= KVM_IRQCHIP_NUM_PINS */ - if (((uint64_t)intid + num - 1 - MIN_SPI) >= KVM_IRQCHIP_NUM_PINS) + if (((u64)intid + num - 1 - MIN_SPI) >= KVM_IRQCHIP_NUM_PINS) TEST_ASSERT(ret != 0 && errno == EINVAL, "Bad intid %u did not cause KVM_SET_GSI_ROUTING " "error: rc: %i errno: %i", intid, ret, errno); @@ -579,7 +603,7 @@ static void kvm_set_gsi_routing_irqchip_check(struct kvm_vm *vm, } } -static void kvm_irq_write_ispendr_check(int gic_fd, uint32_t intid, +static void kvm_irq_write_ispendr_check(int gic_fd, u32 intid, struct kvm_vcpu *vcpu, bool expect_failure) { @@ -595,13 +619,13 @@ static void kvm_irq_write_ispendr_check(int gic_fd, uint32_t intid, } static void kvm_routing_and_irqfd_check(struct kvm_vm *vm, - uint32_t intid, uint32_t num, uint32_t kvm_max_routes, - bool expect_failure) + u32 intid, u32 num, u32 kvm_max_routes, + bool expect_failure) { int fd[MAX_SPI]; - uint64_t val; + u64 val; int ret, f; - uint64_t i; + u64 i; /* * There is no way to try injecting an SGI or PPI as the interface @@ -620,35 +644,29 @@ static void kvm_routing_and_irqfd_check(struct kvm_vm *vm, * that no actual interrupt was injected for those cases. */ - for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) { - fd[f] = eventfd(0, 0); - TEST_ASSERT(fd[f] != -1, __KVM_SYSCALL_ERROR("eventfd()", fd[f])); - } + for (f = 0, i = intid; i < (u64)intid + num; i++, f++) + fd[f] = kvm_new_eventfd(); - for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) { - struct kvm_irqfd irqfd = { - .fd = fd[f], - .gsi = i - MIN_SPI, - }; - assert(i <= (uint64_t)UINT_MAX); - vm_ioctl(vm, KVM_IRQFD, &irqfd); + for (f = 0, i = intid; i < (u64)intid + num; i++, f++) { + assert(i <= (u64)UINT_MAX); + kvm_assign_irqfd(vm, i - MIN_SPI, fd[f]); } - for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) { + for (f = 0, i = intid; i < (u64)intid + num; i++, f++) { val = 1; - ret = write(fd[f], &val, sizeof(uint64_t)); - TEST_ASSERT(ret == sizeof(uint64_t), + ret = write(fd[f], &val, sizeof(u64)); + TEST_ASSERT(ret == sizeof(u64), __KVM_SYSCALL_ERROR("write()", ret)); } - for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) - close(fd[f]); + for (f = 0, i = intid; i < (u64)intid + num; i++, f++) + kvm_close(fd[f]); } /* handles the valid case: intid=0xffffffff num=1 */ #define for_each_intid(first, num, tmp, i) \ for ((tmp) = (i) = (first); \ - (tmp) < (uint64_t)(first) + (uint64_t)(num); \ + (tmp) < (u64)(first) + (u64)(num); \ (tmp)++, (i)++) static void run_guest_cmd(struct kvm_vcpu *vcpu, int gic_fd, @@ -656,13 +674,13 @@ static void run_guest_cmd(struct kvm_vcpu *vcpu, int gic_fd, struct test_args *test_args) { kvm_inject_cmd cmd = inject_args->cmd; - uint32_t intid = inject_args->first_intid; - uint32_t num = inject_args->num; + u32 intid = inject_args->first_intid; + u32 num = inject_args->num; int level = inject_args->level; bool expect_failure = inject_args->expect_failure; struct kvm_vm *vm = vcpu->vm; - uint64_t tmp; - uint32_t i; + u64 tmp; + u32 i; /* handles the valid case: intid=0xffffffff num=1 */ assert(intid < UINT_MAX - num || num == 1); @@ -714,7 +732,7 @@ static void kvm_inject_get_call(struct kvm_vm *vm, struct ucall *uc, struct kvm_inject_args *args) { struct kvm_inject_args *kvm_args_hva; - vm_vaddr_t kvm_args_gva; + gva_t kvm_args_gva; kvm_args_gva = uc->args[1]; kvm_args_hva = (struct kvm_inject_args *)addr_gva2hva(vm, kvm_args_gva); @@ -728,14 +746,14 @@ static void print_args(struct test_args *args) args->eoi_split); } -static void test_vgic(uint32_t nr_irqs, bool level_sensitive, bool eoi_split) +static void test_vgic(u32 nr_irqs, bool level_sensitive, bool eoi_split) { struct ucall uc; int gic_fd; struct kvm_vcpu *vcpu; struct kvm_vm *vm; struct kvm_inject_args inject_args; - vm_vaddr_t args_gva; + gva_t args_gva; struct test_args args = { .nr_irqs = nr_irqs, @@ -753,12 +771,11 @@ static void test_vgic(uint32_t nr_irqs, bool level_sensitive, bool eoi_split) vcpu_init_descriptor_tables(vcpu); /* Setup the guest args page (so it gets the args). */ - args_gva = vm_vaddr_alloc_page(vm); + args_gva = vm_alloc_page(vm); memcpy(addr_gva2hva(vm, args_gva), &args, sizeof(args)); vcpu_args_set(vcpu, 1, args_gva); gic_fd = vgic_v3_setup(vm, 1, nr_irqs); - __TEST_REQUIRE(gic_fd >= 0, "Failed to create vgic-v3, skipping"); vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT, guest_irq_handlers[args.eoi_split][args.level_sensitive]); @@ -786,6 +803,221 @@ done: kvm_vm_free(vm); } +static void guest_code_asym_dir(struct test_args *args, int cpuid) +{ + gic_init(GIC_V3, 2); + + gic_set_eoi_split(1); + gic_set_priority_mask(CPU_PRIO_MASK); + + if (cpuid == 0) { + u32 intid; + + local_irq_disable(); + + gic_set_priority(MIN_PPI, IRQ_DEFAULT_PRIO); + gic_irq_enable(MIN_SPI); + gic_irq_set_pending(MIN_SPI); + + intid = wait_for_and_activate_irq(); + GUEST_ASSERT_EQ(intid, MIN_SPI); + + gic_set_eoi(intid); + isb(); + + WRITE_ONCE(args->shared_data, MIN_SPI); + dsb(ishst); + + do { + dsb(ishld); + } while (READ_ONCE(args->shared_data) == MIN_SPI); + GUEST_ASSERT(!gic_irq_get_active(MIN_SPI)); + } else { + do { + dsb(ishld); + } while (READ_ONCE(args->shared_data) != MIN_SPI); + + gic_set_dir(MIN_SPI); + isb(); + + WRITE_ONCE(args->shared_data, 0); + dsb(ishst); + } + + GUEST_DONE(); +} + +static void guest_code_group_en(struct test_args *args, int cpuid) +{ + u32 intid; + + gic_init(GIC_V3, 2); + + gic_set_eoi_split(0); + gic_set_priority_mask(CPU_PRIO_MASK); + /* SGI0 is G0, which is disabled */ + gic_irq_set_group(0, 0); + + /* Configure all SGIs with decreasing priority */ + for (intid = 0; intid < MIN_PPI; intid++) { + gic_set_priority(intid, (intid + 1) * 8); + gic_irq_enable(intid); + gic_irq_set_pending(intid); + } + + /* Ack and EOI all G1 interrupts */ + for (int i = 1; i < MIN_PPI; i++) { + intid = wait_for_and_activate_irq(); + + GUEST_ASSERT(intid < MIN_PPI); + gic_set_eoi(intid); + isb(); + } + + /* + * Check that SGI0 is still pending, inactive, and that we cannot + * ack anything. + */ + GUEST_ASSERT(gic_irq_get_pending(0)); + GUEST_ASSERT(!gic_irq_get_active(0)); + GUEST_ASSERT_IAR_EMPTY(); + GUEST_ASSERT(read_sysreg_s(SYS_ICC_IAR0_EL1) == IAR_SPURIOUS); + + /* Open the G0 gates, and verify we can ack SGI0 */ + write_sysreg_s(1, SYS_ICC_IGRPEN0_EL1); + isb(); + + do { + intid = read_sysreg_s(SYS_ICC_IAR0_EL1); + } while (intid == IAR_SPURIOUS); + + GUEST_ASSERT(intid == 0); + GUEST_DONE(); +} + +static void guest_code_timer_spi(struct test_args *args, int cpuid) +{ + u32 intid; + u64 val; + + gic_init(GIC_V3, 2); + + gic_set_eoi_split(1); + gic_set_priority_mask(CPU_PRIO_MASK); + + /* Add a pending SPI so that KVM starts trapping DIR */ + gic_set_priority(MIN_SPI + cpuid, IRQ_DEFAULT_PRIO); + gic_irq_set_pending(MIN_SPI + cpuid); + + /* Configure the timer with a higher priority, make it pending */ + gic_set_priority(27, IRQ_DEFAULT_PRIO - 8); + + isb(); + val = read_sysreg(cntvct_el0); + write_sysreg(val, cntv_cval_el0); + write_sysreg(1, cntv_ctl_el0); + isb(); + + GUEST_ASSERT(gic_irq_get_pending(27)); + + /* Enable both interrupts */ + gic_irq_enable(MIN_SPI + cpuid); + gic_irq_enable(27); + + /* The timer must fire */ + intid = wait_for_and_activate_irq(); + GUEST_ASSERT(intid == 27); + + /* Check that we can deassert it */ + write_sysreg(0, cntv_ctl_el0); + isb(); + + GUEST_ASSERT(!gic_irq_get_pending(27)); + + /* + * Priority drop, deactivation -- we expect that the host + * deactivation will have been effective + */ + gic_set_eoi(27); + gic_set_dir(27); + + GUEST_ASSERT(!gic_irq_get_active(27)); + + /* Do it one more time */ + isb(); + val = read_sysreg(cntvct_el0); + write_sysreg(val, cntv_cval_el0); + write_sysreg(1, cntv_ctl_el0); + isb(); + + GUEST_ASSERT(gic_irq_get_pending(27)); + + /* The timer must fire again */ + intid = wait_for_and_activate_irq(); + GUEST_ASSERT(intid == 27); + + GUEST_DONE(); +} + +static void *test_vcpu_run(void *arg) +{ + struct kvm_vcpu *vcpu = arg; + struct ucall uc; + + while (1) { + vcpu_run(vcpu); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + case UCALL_DONE: + return NULL; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + } + + return NULL; +} + +static void test_vgic_two_cpus(void *gcode) +{ + pthread_t thr[2]; + struct kvm_vcpu *vcpus[2]; + struct test_args args = {}; + struct kvm_vm *vm; + gva_t args_gva; + int gic_fd, ret; + + vm = vm_create_with_vcpus(2, gcode, vcpus); + + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(vcpus[0]); + vcpu_init_descriptor_tables(vcpus[1]); + + /* Setup the guest args page (so it gets the args). */ + args_gva = vm_alloc_page(vm); + memcpy(addr_gva2hva(vm, args_gva), &args, sizeof(args)); + vcpu_args_set(vcpus[0], 2, args_gva, 0); + vcpu_args_set(vcpus[1], 2, args_gva, 1); + + gic_fd = vgic_v3_setup(vm, 2, 64); + + ret = pthread_create(&thr[0], NULL, test_vcpu_run, vcpus[0]); + if (ret) + TEST_FAIL("Can't create thread for vcpu 0 (%d)\n", ret); + ret = pthread_create(&thr[1], NULL, test_vcpu_run, vcpus[1]); + if (ret) + TEST_FAIL("Can't create thread for vcpu 1 (%d)\n", ret); + + pthread_join(thr[0], NULL); + pthread_join(thr[1], NULL); + + close(gic_fd); + kvm_vm_free(vm); +} + static void help(const char *name) { printf( @@ -802,12 +1034,15 @@ static void help(const char *name) int main(int argc, char **argv) { - uint32_t nr_irqs = 64; + u32 nr_irqs = 64; bool default_args = true; bool level_sensitive = false; int opt; bool eoi_split = false; + TEST_REQUIRE(kvm_supports_vgic_v3()); + test_disable_default_vgic(); + while ((opt = getopt(argc, argv, "hn:e:l:")) != -1) { switch (opt) { case 'n': @@ -839,6 +1074,9 @@ int main(int argc, char **argv) test_vgic(nr_irqs, false /* level */, true /* eoi_split */); test_vgic(nr_irqs, true /* level */, false /* eoi_split */); test_vgic(nr_irqs, true /* level */, true /* eoi_split */); + test_vgic_two_cpus(guest_code_asym_dir); + test_vgic_two_cpus(guest_code_group_en); + test_vgic_two_cpus(guest_code_timer_spi); } else { test_vgic(nr_irqs, level_sensitive, eoi_split); } diff --git a/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c b/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c index fc4fe52fb6f8..d64d434d3f06 100644 --- a/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c +++ b/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c @@ -23,11 +23,11 @@ #define GIC_LPI_OFFSET 8192 static size_t nr_iterations = 1000; -static vm_paddr_t gpa_base; +static gpa_t gpa_base; static struct kvm_vm *vm; static struct kvm_vcpu **vcpus; -static int gic_fd, its_fd; +static int its_fd; static struct test_data { bool request_vcpus_stop; @@ -35,14 +35,14 @@ static struct test_data { u32 nr_devices; u32 nr_event_ids; - vm_paddr_t device_table; - vm_paddr_t collection_table; - vm_paddr_t cmdq_base; + gpa_t device_table; + gpa_t collection_table; + gpa_t cmdq_base; void *cmdq_base_va; - vm_paddr_t itt_tables; + gpa_t itt_tables; - vm_paddr_t lpi_prop_table; - vm_paddr_t lpi_pend_tables; + gpa_t lpi_prop_table; + gpa_t lpi_pend_tables; } test_data = { .nr_cpus = 1, .nr_devices = 1, @@ -73,7 +73,7 @@ static void guest_setup_its_mappings(void) /* Round-robin the LPIs to all of the vCPUs in the VM */ coll_id = 0; for (device_id = 0; device_id < nr_devices; device_id++) { - vm_paddr_t itt_base = test_data.itt_tables + (device_id * SZ_64K); + gpa_t itt_base = test_data.itt_tables + (device_id * SZ_64K); its_send_mapd_cmd(test_data.cmdq_base_va, device_id, itt_base, SZ_64K, true); @@ -118,11 +118,16 @@ static void guest_setup_gic(void) guest_setup_its_mappings(); guest_invalidate_all_rdists(); + + /* SYNC to ensure ITS setup is complete */ + for (cpuid = 0; cpuid < test_data.nr_cpus; cpuid++) + its_send_sync_cmd(test_data.cmdq_base_va, cpuid); } static void guest_code(size_t nr_lpis) { guest_setup_gic(); + local_irq_enable(); GUEST_SYNC(0); @@ -183,7 +188,7 @@ static void setup_test_data(void) size_t pages_per_64k = vm_calc_num_guest_pages(vm->mode, SZ_64K); u32 nr_devices = test_data.nr_devices; u32 nr_cpus = test_data.nr_cpus; - vm_paddr_t cmdq_base; + gpa_t cmdq_base; test_data.device_table = vm_phy_pages_alloc(vm, pages_per_64k, gpa_base, @@ -214,15 +219,12 @@ static void setup_test_data(void) static void setup_gic(void) { - gic_fd = vgic_v3_setup(vm, test_data.nr_cpus, 64); - __TEST_REQUIRE(gic_fd >= 0, "Failed to create GICv3"); - its_fd = vgic_its_setup(vm); } static void signal_lpi(u32 device_id, u32 event_id) { - vm_paddr_t db_addr = GITS_BASE_GPA + GITS_TRANSLATER; + gpa_t db_addr = GITS_BASE_GPA + GITS_TRANSLATER; struct kvm_msi msi = { .address_lo = db_addr, @@ -334,7 +336,7 @@ static void setup_vm(void) { int i; - vcpus = malloc(test_data.nr_cpus * sizeof(struct kvm_vcpu)); + vcpus = malloc(test_data.nr_cpus * sizeof(struct kvm_vcpu *)); TEST_ASSERT(vcpus, "Failed to allocate vCPU array"); vm = vm_create_with_vcpus(test_data.nr_cpus, guest_code, vcpus); @@ -355,7 +357,6 @@ static void setup_vm(void) static void destroy_vm(void) { close(its_fd); - close(gic_fd); kvm_vm_free(vm); free(vcpus); } @@ -374,6 +375,8 @@ int main(int argc, char **argv) u32 nr_threads; int c; + TEST_REQUIRE(kvm_supports_vgic_v3()); + while ((c = getopt(argc, argv, "hv:d:e:i:")) != -1) { switch (c) { case 'v': diff --git a/tools/testing/selftests/kvm/arm64/vgic_v5.c b/tools/testing/selftests/kvm/arm64/vgic_v5.c new file mode 100644 index 000000000000..d785b660d847 --- /dev/null +++ b/tools/testing/selftests/kvm/arm64/vgic_v5.c @@ -0,0 +1,228 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/kernel.h> +#include <sys/syscall.h> +#include <asm/kvm.h> +#include <asm/kvm_para.h> + +#include <arm64/gic_v5.h> + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" +#include "vgic.h" + +#define NR_VCPUS 1 + +struct vm_gic { + struct kvm_vm *vm; + int gic_fd; + u32 gic_dev_type; +}; + +static u64 max_phys_size; + +#define GUEST_CMD_IRQ_CDIA 10 +#define GUEST_CMD_IRQ_DIEOI 11 +#define GUEST_CMD_IS_AWAKE 12 +#define GUEST_CMD_IS_READY 13 + +static void guest_irq_handler(struct ex_regs *regs) +{ + bool valid; + u32 hwirq; + u64 ia; + static int count; + + /* + * We have pending interrupts. Should never actually enter WFI + * here! + */ + wfi(); + GUEST_SYNC(GUEST_CMD_IS_AWAKE); + + ia = gicr_insn(CDIA); + valid = GICV5_GICR_CDIA_VALID(ia); + + GUEST_SYNC(GUEST_CMD_IRQ_CDIA); + + if (!valid) + return; + + gsb_ack(); + isb(); + + hwirq = FIELD_GET(GICV5_GICR_CDIA_INTID, ia); + + gic_insn(hwirq, CDDI); + gic_insn(0, CDEOI); + + GUEST_SYNC(GUEST_CMD_IRQ_DIEOI); + + if (++count >= 2) + GUEST_DONE(); + + /* Ask for the next interrupt to be injected */ + GUEST_SYNC(GUEST_CMD_IS_READY); +} + +static void guest_code(void) +{ + local_irq_disable(); + + gicv5_cpu_enable_interrupts(); + local_irq_enable(); + + /* Enable the SW_PPI (3) */ + write_sysreg_s(BIT_ULL(3), SYS_ICC_PPI_ENABLER0_EL1); + + /* Ask for the first interrupt to be injected */ + GUEST_SYNC(GUEST_CMD_IS_READY); + + /* Loop forever waiting for interrupts */ + while (1); +} + + +/* we don't want to assert on run execution, hence that helper */ +static int run_vcpu(struct kvm_vcpu *vcpu) +{ + return __vcpu_run(vcpu) ? -errno : 0; +} + +static void vm_gic_destroy(struct vm_gic *v) +{ + close(v->gic_fd); + kvm_vm_free(v->vm); +} + +static void test_vgic_v5_ppis(u32 gic_dev_type) +{ + struct kvm_vcpu *vcpus[NR_VCPUS]; + struct ucall uc; + u64 user_ppis[2]; + struct vm_gic v; + int ret, i; + + v.gic_dev_type = gic_dev_type; + v.vm = __vm_create(VM_SHAPE_DEFAULT, NR_VCPUS, 0); + + v.gic_fd = kvm_create_device(v.vm, gic_dev_type); + + for (i = 0; i < NR_VCPUS; i++) + vcpus[i] = vm_vcpu_add(v.vm, i, guest_code); + + vm_init_descriptor_tables(v.vm); + vm_install_exception_handler(v.vm, VECTOR_IRQ_CURRENT, guest_irq_handler); + + for (i = 0; i < NR_VCPUS; i++) + vcpu_init_descriptor_tables(vcpus[i]); + + kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, + KVM_DEV_ARM_VGIC_CTRL_INIT, NULL); + + /* Read out the PPIs that user space is allowed to drive. */ + kvm_device_attr_get(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, + KVM_DEV_ARM_VGIC_USERSPACE_PPIS, &user_ppis); + + /* We should always be able to drive the SW_PPI. */ + TEST_ASSERT(user_ppis[0] & BIT(GICV5_ARCH_PPI_SW_PPI), + "SW_PPI is not drivable by userspace"); + + while (1) { + ret = run_vcpu(vcpus[0]); + + switch (get_ucall(vcpus[0], &uc)) { + case UCALL_SYNC: + /* + * The guest is ready for the next level change. Set + * high if ready, and lower if it has been consumed. + */ + if (uc.args[1] == GUEST_CMD_IS_READY || + uc.args[1] == GUEST_CMD_IRQ_DIEOI) { + u64 irq; + bool level = uc.args[1] == GUEST_CMD_IRQ_DIEOI ? 0 : 1; + + irq = FIELD_PREP(KVM_ARM_IRQ_NUM_MASK, 3); + irq |= KVM_ARM_IRQ_TYPE_PPI << KVM_ARM_IRQ_TYPE_SHIFT; + + _kvm_irq_line(v.vm, irq, level); + } else if (uc.args[1] == GUEST_CMD_IS_AWAKE) { + pr_info("Guest skipping WFI due to pending IRQ\n"); + } else if (uc.args[1] == GUEST_CMD_IRQ_CDIA) { + pr_info("Guest acknowledged IRQ\n"); + } + + continue; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + case UCALL_DONE: + goto done; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + } + +done: + TEST_ASSERT(ret == 0, "Failed to test GICv5 PPIs"); + + vm_gic_destroy(&v); +} + +/* + * Returns 0 if it's possible to create GIC device of a given type (V5). + */ +int test_kvm_device(u32 gic_dev_type) +{ + struct kvm_vcpu *vcpus[NR_VCPUS]; + struct vm_gic v; + int ret; + + v.vm = vm_create_with_vcpus(NR_VCPUS, guest_code, vcpus); + + /* try to create a non existing KVM device */ + ret = __kvm_test_create_device(v.vm, 0); + TEST_ASSERT(ret && errno == ENODEV, "unsupported device"); + + /* trial mode */ + ret = __kvm_test_create_device(v.vm, gic_dev_type); + if (ret) + return ret; + v.gic_fd = kvm_create_device(v.vm, gic_dev_type); + + ret = __kvm_create_device(v.vm, gic_dev_type); + TEST_ASSERT(ret < 0 && errno == EEXIST, "create GIC device twice"); + + vm_gic_destroy(&v); + + return 0; +} + +void run_tests(u32 gic_dev_type) +{ + pr_info("Test VGICv5 PPIs\n"); + test_vgic_v5_ppis(gic_dev_type); +} + +int main(int ac, char **av) +{ + int ret; + int pa_bits; + + test_disable_default_vgic(); + + pa_bits = vm_guest_mode_params[VM_MODE_DEFAULT].pa_bits; + max_phys_size = 1ULL << pa_bits; + + ret = test_kvm_device(KVM_DEV_TYPE_ARM_VGIC_V5); + if (ret) { + pr_info("No GICv5 support; Not running GIC_v5 tests.\n"); + exit(KSFT_SKIP); + } + + pr_info("Running VGIC_V5 tests.\n"); + run_tests(KVM_DEV_TYPE_ARM_VGIC_V5); + + return 0; +} diff --git a/tools/testing/selftests/kvm/arm64/vpmu_counter_access.c b/tools/testing/selftests/kvm/arm64/vpmu_counter_access.c index f16b3b27e32e..22223395969e 100644 --- a/tools/testing/selftests/kvm/arm64/vpmu_counter_access.c +++ b/tools/testing/selftests/kvm/arm64/vpmu_counter_access.c @@ -28,31 +28,25 @@ struct vpmu_vm { struct kvm_vm *vm; struct kvm_vcpu *vcpu; - int gic_fd; }; static struct vpmu_vm vpmu_vm; struct pmreg_sets { - uint64_t set_reg_id; - uint64_t clr_reg_id; + u64 set_reg_id; + u64 clr_reg_id; }; #define PMREG_SET(set, clr) {.set_reg_id = set, .clr_reg_id = clr} -static uint64_t get_pmcr_n(uint64_t pmcr) +static u64 get_pmcr_n(u64 pmcr) { return FIELD_GET(ARMV8_PMU_PMCR_N, pmcr); } -static void set_pmcr_n(uint64_t *pmcr, uint64_t pmcr_n) +static u64 get_counters_mask(u64 n) { - u64p_replace_bits((__u64 *) pmcr, pmcr_n, ARMV8_PMU_PMCR_N); -} - -static uint64_t get_counters_mask(uint64_t n) -{ - uint64_t mask = BIT(ARMV8_PMU_CYCLE_IDX); + u64 mask = BIT(ARMV8_PMU_CYCLE_IDX); if (n) mask |= GENMASK(n - 1, 0); @@ -95,7 +89,7 @@ static inline void write_sel_evtyper(int sel, unsigned long val) static void pmu_disable_reset(void) { - uint64_t pmcr = read_sysreg(pmcr_el0); + u64 pmcr = read_sysreg(pmcr_el0); /* Reset all counters, disabling them */ pmcr &= ~ARMV8_PMU_PMCR_E; @@ -175,7 +169,7 @@ struct pmc_accessor pmc_accessors[] = { #define GUEST_ASSERT_BITMAP_REG(regname, mask, set_expected) \ { \ - uint64_t _tval = read_sysreg(regname); \ + u64 _tval = read_sysreg(regname); \ \ if (set_expected) \ __GUEST_ASSERT((_tval & mask), \ @@ -191,7 +185,7 @@ struct pmc_accessor pmc_accessors[] = { * Check if @mask bits in {PMCNTEN,PMINTEN,PMOVS}{SET,CLR} registers * are set or cleared as specified in @set_expected. */ -static void check_bitmap_pmu_regs(uint64_t mask, bool set_expected) +static void check_bitmap_pmu_regs(u64 mask, bool set_expected) { GUEST_ASSERT_BITMAP_REG(pmcntenset_el0, mask, set_expected); GUEST_ASSERT_BITMAP_REG(pmcntenclr_el0, mask, set_expected); @@ -213,7 +207,7 @@ static void check_bitmap_pmu_regs(uint64_t mask, bool set_expected) */ static void test_bitmap_pmu_regs(int pmc_idx, bool set_op) { - uint64_t pmcr_n, test_bit = BIT(pmc_idx); + u64 pmcr_n, test_bit = BIT(pmc_idx); bool set_expected = false; if (set_op) { @@ -238,7 +232,7 @@ static void test_bitmap_pmu_regs(int pmc_idx, bool set_op) */ static void test_access_pmc_regs(struct pmc_accessor *acc, int pmc_idx) { - uint64_t write_data, read_data; + u64 write_data, read_data; /* Disable all PMCs and reset all PMCs to zero. */ pmu_disable_reset(); @@ -293,11 +287,11 @@ static void test_access_pmc_regs(struct pmc_accessor *acc, int pmc_idx) } #define INVALID_EC (-1ul) -uint64_t expected_ec = INVALID_EC; +u64 expected_ec = INVALID_EC; static void guest_sync_handler(struct ex_regs *regs) { - uint64_t esr, ec; + u64 esr, ec; esr = read_sysreg(esr_el1); ec = ESR_ELx_EC(esr); @@ -357,9 +351,9 @@ static void test_access_invalid_pmc_regs(struct pmc_accessor *acc, int pmc_idx) * if reading/writing PMU registers for implemented or unimplemented * counters works as expected. */ -static void guest_code(uint64_t expected_pmcr_n) +static void guest_code(u64 expected_pmcr_n) { - uint64_t pmcr, pmcr_n, unimp_mask; + u64 pmcr, pmcr_n, unimp_mask; int i, pmc; __GUEST_ASSERT(expected_pmcr_n <= ARMV8_PMU_MAX_GENERAL_COUNTERS, @@ -408,16 +402,12 @@ static void guest_code(uint64_t expected_pmcr_n) static void create_vpmu_vm(void *guest_code) { struct kvm_vcpu_init init; - uint8_t pmuver, ec; - uint64_t dfr0, irq = 23; + u8 pmuver, ec; + u64 dfr0, irq = 23; struct kvm_device_attr irq_attr = { .group = KVM_ARM_VCPU_PMU_V3_CTRL, .attr = KVM_ARM_VCPU_PMU_V3_IRQ, - .addr = (uint64_t)&irq, - }; - struct kvm_device_attr init_attr = { - .group = KVM_ARM_VCPU_PMU_V3_CTRL, - .attr = KVM_ARM_VCPU_PMU_V3_INIT, + .addr = (u64)&irq, }; /* The test creates the vpmu_vm multiple times. Ensure a clean state */ @@ -431,33 +421,29 @@ static void create_vpmu_vm(void *guest_code) } /* Create vCPU with PMUv3 */ - vm_ioctl(vpmu_vm.vm, KVM_ARM_PREFERRED_TARGET, &init); + kvm_get_default_vcpu_target(vpmu_vm.vm, &init); init.features[0] |= (1 << KVM_ARM_VCPU_PMU_V3); vpmu_vm.vcpu = aarch64_vcpu_add(vpmu_vm.vm, 0, &init, guest_code); vcpu_init_descriptor_tables(vpmu_vm.vcpu); - vpmu_vm.gic_fd = vgic_v3_setup(vpmu_vm.vm, 1, 64); - __TEST_REQUIRE(vpmu_vm.gic_fd >= 0, - "Failed to create vgic-v3, skipping"); + + kvm_arch_vm_finalize_vcpus(vpmu_vm.vm); /* Make sure that PMUv3 support is indicated in the ID register */ dfr0 = vcpu_get_reg(vpmu_vm.vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1)); - pmuver = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), dfr0); + pmuver = FIELD_GET(ID_AA64DFR0_EL1_PMUVer, dfr0); TEST_ASSERT(pmuver != ID_AA64DFR0_EL1_PMUVer_IMP_DEF && pmuver >= ID_AA64DFR0_EL1_PMUVer_IMP, "Unexpected PMUVER (0x%x) on the vCPU with PMUv3", pmuver); - /* Initialize vPMU */ vcpu_ioctl(vpmu_vm.vcpu, KVM_SET_DEVICE_ATTR, &irq_attr); - vcpu_ioctl(vpmu_vm.vcpu, KVM_SET_DEVICE_ATTR, &init_attr); } static void destroy_vpmu_vm(void) { - close(vpmu_vm.gic_fd); kvm_vm_free(vpmu_vm.vm); } -static void run_vcpu(struct kvm_vcpu *vcpu, uint64_t pmcr_n) +static void run_vcpu(struct kvm_vcpu *vcpu, u64 pmcr_n) { struct ucall uc; @@ -475,52 +461,47 @@ static void run_vcpu(struct kvm_vcpu *vcpu, uint64_t pmcr_n) } } -static void test_create_vpmu_vm_with_pmcr_n(uint64_t pmcr_n, bool expect_fail) +static void test_create_vpmu_vm_with_nr_counters(unsigned int nr_counters, bool expect_fail) { struct kvm_vcpu *vcpu; - uint64_t pmcr, pmcr_orig; + unsigned int prev; + int ret; create_vpmu_vm(guest_code); vcpu = vpmu_vm.vcpu; - pmcr_orig = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0)); - pmcr = pmcr_orig; + prev = get_pmcr_n(vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0))); - /* - * Setting a larger value of PMCR.N should not modify the field, and - * return a success. - */ - set_pmcr_n(&pmcr, pmcr_n); - vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0), pmcr); - pmcr = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0)); + ret = __vcpu_device_attr_set(vcpu, KVM_ARM_VCPU_PMU_V3_CTRL, + KVM_ARM_VCPU_PMU_V3_SET_NR_COUNTERS, &nr_counters); if (expect_fail) - TEST_ASSERT(pmcr_orig == pmcr, - "PMCR.N modified by KVM to a larger value (PMCR: 0x%lx) for pmcr_n: 0x%lx", - pmcr, pmcr_n); + TEST_ASSERT(ret && errno == EINVAL, + "Setting more PMU counters (%u) than available (%u) unexpectedly succeeded", + nr_counters, prev); else - TEST_ASSERT(pmcr_n == get_pmcr_n(pmcr), - "Failed to update PMCR.N to %lu (received: %lu)", - pmcr_n, get_pmcr_n(pmcr)); + TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_SET_DEVICE_ATTR, ret)); + + vcpu_device_attr_set(vcpu, KVM_ARM_VCPU_PMU_V3_CTRL, KVM_ARM_VCPU_PMU_V3_INIT, NULL); } /* * Create a guest with one vCPU, set the PMCR_EL0.N for the vCPU to @pmcr_n, * and run the test. */ -static void run_access_test(uint64_t pmcr_n) +static void run_access_test(u64 pmcr_n) { - uint64_t sp; + u64 sp; struct kvm_vcpu *vcpu; struct kvm_vcpu_init init; pr_debug("Test with pmcr_n %lu\n", pmcr_n); - test_create_vpmu_vm_with_pmcr_n(pmcr_n, false); + test_create_vpmu_vm_with_nr_counters(pmcr_n, false); vcpu = vpmu_vm.vcpu; /* Save the initial sp to restore them later to run the guest again */ - sp = vcpu_get_reg(vcpu, ARM64_CORE_REG(sp_el1)); + sp = vcpu_get_reg(vcpu, ctxt_reg_alias(vcpu, SYS_SP_EL1)); run_vcpu(vcpu, pmcr_n); @@ -528,12 +509,12 @@ static void run_access_test(uint64_t pmcr_n) * Reset and re-initialize the vCPU, and run the guest code again to * check if PMCR_EL0.N is preserved. */ - vm_ioctl(vpmu_vm.vm, KVM_ARM_PREFERRED_TARGET, &init); + kvm_get_default_vcpu_target(vpmu_vm.vm, &init); init.features[0] |= (1 << KVM_ARM_VCPU_PMU_V3); aarch64_vcpu_setup(vcpu, &init); vcpu_init_descriptor_tables(vcpu); - vcpu_set_reg(vcpu, ARM64_CORE_REG(sp_el1), sp); - vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.pc), (uint64_t)guest_code); + vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_SP_EL1), sp); + vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.pc), (u64)guest_code); run_vcpu(vcpu, pmcr_n); @@ -550,14 +531,14 @@ static struct pmreg_sets validity_check_reg_sets[] = { * Create a VM, and check if KVM handles the userspace accesses of * the PMU register sets in @validity_check_reg_sets[] correctly. */ -static void run_pmregs_validity_test(uint64_t pmcr_n) +static void run_pmregs_validity_test(u64 pmcr_n) { int i; struct kvm_vcpu *vcpu; - uint64_t set_reg_id, clr_reg_id, reg_val; - uint64_t valid_counters_mask, max_counters_mask; + u64 set_reg_id, clr_reg_id, reg_val; + u64 valid_counters_mask, max_counters_mask; - test_create_vpmu_vm_with_pmcr_n(pmcr_n, false); + test_create_vpmu_vm_with_nr_counters(pmcr_n, false); vcpu = vpmu_vm.vcpu; valid_counters_mask = get_counters_mask(pmcr_n); @@ -607,11 +588,11 @@ static void run_pmregs_validity_test(uint64_t pmcr_n) * the vCPU to @pmcr_n, which is larger than the host value. * The attempt should fail as @pmcr_n is too big to set for the vCPU. */ -static void run_error_test(uint64_t pmcr_n) +static void run_error_test(u64 pmcr_n) { pr_debug("Error test with pmcr_n %lu (larger than the host)\n", pmcr_n); - test_create_vpmu_vm_with_pmcr_n(pmcr_n, true); + test_create_vpmu_vm_with_nr_counters(pmcr_n, true); destroy_vpmu_vm(); } @@ -619,9 +600,9 @@ static void run_error_test(uint64_t pmcr_n) * Return the default number of implemented PMU event counters excluding * the cycle counter (i.e. PMCR_EL0.N value) for the guest. */ -static uint64_t get_pmcr_n_limit(void) +static u64 get_pmcr_n_limit(void) { - uint64_t pmcr; + u64 pmcr; create_vpmu_vm(guest_code); pmcr = vcpu_get_reg(vpmu_vm.vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0)); @@ -629,11 +610,25 @@ static uint64_t get_pmcr_n_limit(void) return get_pmcr_n(pmcr); } +static bool kvm_supports_nr_counters_attr(void) +{ + bool supported; + + create_vpmu_vm(NULL); + supported = !__vcpu_has_device_attr(vpmu_vm.vcpu, KVM_ARM_VCPU_PMU_V3_CTRL, + KVM_ARM_VCPU_PMU_V3_SET_NR_COUNTERS); + destroy_vpmu_vm(); + + return supported; +} + int main(void) { - uint64_t i, pmcr_n; + u64 i, pmcr_n; TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_PMU_V3)); + TEST_REQUIRE(kvm_supports_vgic_v3()); + TEST_REQUIRE(kvm_supports_nr_counters_attr()); pmcr_n = get_pmcr_n_limit(); for (i = 0; i <= pmcr_n; i++) { diff --git a/tools/testing/selftests/kvm/coalesced_io_test.c b/tools/testing/selftests/kvm/coalesced_io_test.c index 60cb25454899..df4ed5e3877c 100644 --- a/tools/testing/selftests/kvm/coalesced_io_test.c +++ b/tools/testing/selftests/kvm/coalesced_io_test.c @@ -14,16 +14,16 @@ struct kvm_coalesced_io { struct kvm_coalesced_mmio_ring *ring; - uint32_t ring_size; - uint64_t mmio_gpa; - uint64_t *mmio; + u32 ring_size; + u64 mmio_gpa; + u64 *mmio; /* * x86-only, but define pio_port for all architectures to minimize the * amount of #ifdeffery and complexity, without having to sacrifice * verbose error messages. */ - uint8_t pio_port; + u8 pio_port; }; static struct kvm_coalesced_io kvm_builtin_io_ring; @@ -70,13 +70,13 @@ static void guest_code(struct kvm_coalesced_io *io) static void vcpu_run_and_verify_io_exit(struct kvm_vcpu *vcpu, struct kvm_coalesced_io *io, - uint32_t ring_start, - uint32_t expected_exit) + u32 ring_start, + u32 expected_exit) { const bool want_pio = expected_exit == KVM_EXIT_IO; struct kvm_coalesced_mmio_ring *ring = io->ring; struct kvm_run *run = vcpu->run; - uint32_t pio_value; + u32 pio_value; WRITE_ONCE(ring->first, ring_start); WRITE_ONCE(ring->last, ring_start); @@ -88,13 +88,13 @@ static void vcpu_run_and_verify_io_exit(struct kvm_vcpu *vcpu, * data_offset is garbage, e.g. an MMIO gpa. */ if (run->exit_reason == KVM_EXIT_IO) - pio_value = *(uint32_t *)((void *)run + run->io.data_offset); + pio_value = *(u32 *)((void *)run + run->io.data_offset); else pio_value = 0; TEST_ASSERT((!want_pio && (run->exit_reason == KVM_EXIT_MMIO && run->mmio.is_write && run->mmio.phys_addr == io->mmio_gpa && run->mmio.len == 8 && - *(uint64_t *)run->mmio.data == io->mmio_gpa + io->ring_size - 1)) || + *(u64 *)run->mmio.data == io->mmio_gpa + io->ring_size - 1)) || (want_pio && (run->exit_reason == KVM_EXIT_IO && run->io.port == io->pio_port && run->io.direction == KVM_EXIT_IO_OUT && run->io.count == 1 && pio_value == io->pio_port + io->ring_size - 1)), @@ -105,14 +105,14 @@ static void vcpu_run_and_verify_io_exit(struct kvm_vcpu *vcpu, want_pio ? (unsigned long long)io->pio_port : io->mmio_gpa, (want_pio ? io->pio_port : io->mmio_gpa) + io->ring_size - 1, run->exit_reason, run->exit_reason == KVM_EXIT_MMIO ? "MMIO" : run->exit_reason == KVM_EXIT_IO ? "PIO" : "other", - run->mmio.phys_addr, run->mmio.is_write, run->mmio.len, *(uint64_t *)run->mmio.data, + run->mmio.phys_addr, run->mmio.is_write, run->mmio.len, *(u64 *)run->mmio.data, run->io.port, run->io.direction, run->io.size, run->io.count, pio_value); } static void vcpu_run_and_verify_coalesced_io(struct kvm_vcpu *vcpu, struct kvm_coalesced_io *io, - uint32_t ring_start, - uint32_t expected_exit) + u32 ring_start, + u32 expected_exit) { struct kvm_coalesced_mmio_ring *ring = io->ring; int i; @@ -124,18 +124,18 @@ static void vcpu_run_and_verify_coalesced_io(struct kvm_vcpu *vcpu, ring->first, ring->last, io->ring_size, ring_start); for (i = 0; i < io->ring_size - 1; i++) { - uint32_t idx = (ring->first + i) % io->ring_size; + u32 idx = (ring->first + i) % io->ring_size; struct kvm_coalesced_mmio *entry = &ring->coalesced_mmio[idx]; #ifdef __x86_64__ if (i & 1) TEST_ASSERT(entry->phys_addr == io->pio_port && entry->len == 4 && entry->pio && - *(uint32_t *)entry->data == io->pio_port + i, + *(u32 *)entry->data == io->pio_port + i, "Wanted 4-byte port I/O 0x%x = 0x%x in entry %u, got %u-byte %s 0x%llx = 0x%x", io->pio_port, io->pio_port + i, i, entry->len, entry->pio ? "PIO" : "MMIO", - entry->phys_addr, *(uint32_t *)entry->data); + entry->phys_addr, *(u32 *)entry->data); else #endif TEST_ASSERT(entry->phys_addr == io->mmio_gpa && @@ -143,12 +143,12 @@ static void vcpu_run_and_verify_coalesced_io(struct kvm_vcpu *vcpu, "Wanted 8-byte MMIO to 0x%lx = %lx in entry %u, got %u-byte %s 0x%llx = 0x%lx", io->mmio_gpa, io->mmio_gpa + i, i, entry->len, entry->pio ? "PIO" : "MMIO", - entry->phys_addr, *(uint64_t *)entry->data); + entry->phys_addr, *(u64 *)entry->data); } } static void test_coalesced_io(struct kvm_vcpu *vcpu, - struct kvm_coalesced_io *io, uint32_t ring_start) + struct kvm_coalesced_io *io, u32 ring_start) { struct kvm_coalesced_mmio_ring *ring = io->ring; @@ -219,11 +219,11 @@ int main(int argc, char *argv[]) * the MMIO GPA identity mapped in the guest. */ .mmio_gpa = 4ull * SZ_1G, - .mmio = (uint64_t *)(4ull * SZ_1G), + .mmio = (u64 *)(4ull * SZ_1G), .pio_port = 0x80, }; - virt_map(vm, (uint64_t)kvm_builtin_io_ring.mmio, kvm_builtin_io_ring.mmio_gpa, 1); + virt_map(vm, (u64)kvm_builtin_io_ring.mmio, kvm_builtin_io_ring.mmio_gpa, 1); sync_global_to_guest(vm, kvm_builtin_io_ring); vcpu_args_set(vcpu, 1, &kvm_builtin_io_ring); diff --git a/tools/testing/selftests/kvm/config b/tools/testing/selftests/kvm/config index 8835fed09e9f..96d874b239eb 100644 --- a/tools/testing/selftests/kvm/config +++ b/tools/testing/selftests/kvm/config @@ -1,5 +1,6 @@ CONFIG_KVM=y CONFIG_KVM_INTEL=y CONFIG_KVM_AMD=y +CONFIG_EVENTFD=y CONFIG_USERFAULTFD=y CONFIG_IDLE_PAGE_TRACKING=y diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c index 0202b78f8680..302c4923d093 100644 --- a/tools/testing/selftests/kvm/demand_paging_test.c +++ b/tools/testing/selftests/kvm/demand_paging_test.c @@ -24,7 +24,7 @@ #ifdef __NR_userfaultfd static int nr_vcpus = 1; -static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE; +static u64 guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE; static size_t demand_paging_size; static char *guest_data_prototype; @@ -58,7 +58,7 @@ static int handle_uffd_page_request(int uffd_mode, int uffd, struct uffd_msg *msg) { pid_t tid = syscall(__NR_gettid); - uint64_t addr = msg->arg.pagefault.address; + u64 addr = msg->arg.pagefault.address; struct timespec start; struct timespec ts_diff; int r; @@ -68,7 +68,7 @@ static int handle_uffd_page_request(int uffd_mode, int uffd, if (uffd_mode == UFFDIO_REGISTER_MODE_MISSING) { struct uffdio_copy copy; - copy.src = (uint64_t)guest_data_prototype; + copy.src = (u64)guest_data_prototype; copy.dst = addr; copy.len = demand_paging_size; copy.mode = 0; @@ -138,7 +138,7 @@ struct test_params { bool partition_vcpu_memory_access; }; -static void prefault_mem(void *alias, uint64_t len) +static void prefault_mem(void *alias, u64 len) { size_t p; @@ -154,7 +154,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) struct memstress_vcpu_args *vcpu_args; struct test_params *p = arg; struct uffd_desc **uffd_descs = NULL; - uint64_t uffd_region_size; + u64 uffd_region_size; struct timespec start; struct timespec ts_diff; double vcpu_paging_rate; diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c index e79817bd0e29..ef779fa91827 100644 --- a/tools/testing/selftests/kvm/dirty_log_perf_test.c +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c @@ -20,43 +20,11 @@ #include "guest_modes.h" #include "ucall_common.h" -#ifdef __aarch64__ -#include "arm64/vgic.h" - -static int gic_fd; - -static void arch_setup_vm(struct kvm_vm *vm, unsigned int nr_vcpus) -{ - /* - * The test can still run even if hardware does not support GICv3, as it - * is only an optimization to reduce guest exits. - */ - gic_fd = vgic_v3_setup(vm, nr_vcpus, 64); -} - -static void arch_cleanup_vm(struct kvm_vm *vm) -{ - if (gic_fd > 0) - close(gic_fd); -} - -#else /* __aarch64__ */ - -static void arch_setup_vm(struct kvm_vm *vm, unsigned int nr_vcpus) -{ -} - -static void arch_cleanup_vm(struct kvm_vm *vm) -{ -} - -#endif - /* How many host loops to run by default (one KVM_GET_DIRTY_LOG for each loop)*/ #define TEST_HOST_LOOP_N 2UL static int nr_vcpus = 1; -static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE; +static u64 guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE; static bool run_vcpus_while_disabling_dirty_logging; /* Host variables */ @@ -69,7 +37,7 @@ static void vcpu_worker(struct memstress_vcpu_args *vcpu_args) { struct kvm_vcpu *vcpu = vcpu_args->vcpu; int vcpu_idx = vcpu_args->vcpu_idx; - uint64_t pages_count = 0; + u64 pages_count = 0; struct kvm_run *run; struct timespec start; struct timespec ts_diff; @@ -125,11 +93,11 @@ static void vcpu_worker(struct memstress_vcpu_args *vcpu_args) struct test_params { unsigned long iterations; - uint64_t phys_offset; + u64 phys_offset; bool partition_vcpu_memory_access; enum vm_mem_backing_src_type backing_src; int slots; - uint32_t write_percent; + u32 write_percent; bool random_access; }; @@ -138,9 +106,9 @@ static void run_test(enum vm_guest_mode mode, void *arg) struct test_params *p = arg; struct kvm_vm *vm; unsigned long **bitmaps; - uint64_t guest_num_pages; - uint64_t host_num_pages; - uint64_t pages_per_slot; + u64 guest_num_pages; + u64 host_num_pages; + u64 pages_per_slot; struct timespec start; struct timespec ts_diff; struct timespec get_dirty_log_total = (struct timespec){0}; @@ -166,8 +134,6 @@ static void run_test(enum vm_guest_mode mode, void *arg) vm_enable_cap(vm, KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2, dirty_log_manual_caps); - arch_setup_vm(vm, nr_vcpus); - /* Start the iterations */ iteration = 0; host_quit = false; @@ -285,7 +251,6 @@ static void run_test(enum vm_guest_mode mode, void *arg) } memstress_free_bitmaps(bitmaps, p->slots); - arch_cleanup_vm(vm); memstress_destroy_vm(vm); } diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index aacf80f57439..12446a4b6e8d 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -31,15 +31,18 @@ /* Default guest test virtual memory offset */ #define DEFAULT_GUEST_TEST_MEM 0xc0000000 -/* How many pages to dirty for each guest loop */ -#define TEST_PAGES_PER_LOOP 1024 - /* How many host loops to run (one KVM_GET_DIRTY_LOG for each loop) */ #define TEST_HOST_LOOP_N 32UL /* Interval for each host loop (ms) */ #define TEST_HOST_LOOP_INTERVAL 10UL +/* + * Ensure the vCPU is able to perform a reasonable number of writes in each + * iteration to provide a lower bound on coverage. + */ +#define TEST_MIN_WRITES_PER_ITERATION 0x100 + /* Dirty bitmaps are always little endian, so we need to swap on big endian */ #if defined(__s390x__) # define BITOP_LE_SWIZZLE ((BITS_PER_LONG-1) & ~0x7) @@ -71,23 +74,25 @@ * the host. READ/WRITE_ONCE() should also be used with anything * that may change. */ -static uint64_t host_page_size; -static uint64_t guest_page_size; -static uint64_t guest_num_pages; -static uint64_t iteration; +static u64 host_page_size; +static u64 guest_page_size; +static u64 guest_num_pages; +static u64 iteration; +static u64 nr_writes; +static bool vcpu_stop; /* * Guest physical memory offset of the testing memory slot. * This will be set to the topmost valid physical address minus * the test memory size. */ -static uint64_t guest_test_phys_mem; +static u64 guest_test_phys_mem; /* * Guest virtual memory offset of the testing memory slot. * Must not conflict with identity mapped test code. */ -static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM; +static u64 guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM; /* * Continuously write to the first 8 bytes of a random pages within @@ -95,8 +100,10 @@ static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM; */ static void guest_code(void) { - uint64_t addr; - int i; + u64 addr; + +#ifdef __s390x__ + u64 i; /* * On s390x, all pages of a 1M segment are initially marked as dirty @@ -106,17 +113,20 @@ static void guest_code(void) */ for (i = 0; i < guest_num_pages; i++) { addr = guest_test_virt_mem + i * guest_page_size; - vcpu_arch_put_guest(*(uint64_t *)addr, READ_ONCE(iteration)); + vcpu_arch_put_guest(*(u64 *)addr, READ_ONCE(iteration)); + nr_writes++; } +#endif while (true) { - for (i = 0; i < TEST_PAGES_PER_LOOP; i++) { + while (!READ_ONCE(vcpu_stop)) { addr = guest_test_virt_mem; addr += (guest_random_u64(&guest_rng) % guest_num_pages) * guest_page_size; addr = align_down(addr, host_page_size); - vcpu_arch_put_guest(*(uint64_t *)addr, READ_ONCE(iteration)); + vcpu_arch_put_guest(*(u64 *)addr, READ_ONCE(iteration)); + nr_writes++; } GUEST_SYNC(1); @@ -128,30 +138,23 @@ static bool host_quit; /* Points to the test VM memory region on which we track dirty logs */ static void *host_test_mem; -static uint64_t host_num_pages; +static u64 host_num_pages; /* For statistics only */ -static uint64_t host_dirty_count; -static uint64_t host_clear_count; -static uint64_t host_track_next_count; +static u64 host_dirty_count; +static u64 host_clear_count; /* Whether dirty ring reset is requested, or finished */ static sem_t sem_vcpu_stop; static sem_t sem_vcpu_cont; -/* - * This is only set by main thread, and only cleared by vcpu thread. It is - * used to request vcpu thread to stop at the next GUEST_SYNC, since GUEST_SYNC - * is the only place that we'll guarantee both "dirty bit" and "dirty data" - * will match. E.g., SIG_IPI won't guarantee that if the vcpu is interrupted - * after setting dirty bit but before the data is written. - */ -static atomic_t vcpu_sync_stop_requested; + /* * This is updated by the vcpu thread to tell the host whether it's a * ring-full event. It should only be read until a sem_wait() of * sem_vcpu_stop and before vcpu continues to run. */ static bool dirty_ring_vcpu_ring_full; + /* * This is only used for verifying the dirty pages. Dirty ring has a very * tricky case when the ring just got full, kvm will do userspace exit due to @@ -166,7 +169,51 @@ static bool dirty_ring_vcpu_ring_full; * dirty gfn we've collected, so that if a mismatch of data found later in the * verifying process, we let it pass. */ -static uint64_t dirty_ring_last_page; +static u64 dirty_ring_last_page = -1ULL; + +/* + * In addition to the above, it is possible (especially if this + * test is run nested) for the above scenario to repeat multiple times: + * + * The following can happen: + * + * - L1 vCPU: Memory write is logged to PML but not committed. + * + * - L1 test thread: Ignores the write because its last dirty ring entry + * Resets the dirty ring which: + * - Resets the A/D bits in EPT + * - Issues tlb flush (invept), which is intercepted by L0 + * + * - L0: frees the whole nested ept mmu root as the response to invept, + * and thus ensures that when memory write is retried, it will fault again + * + * - L1 vCPU: Same memory write is logged to the PML but not committed again. + * + * - L1 test thread: Ignores the write because its last dirty ring entry (again) + * Resets the dirty ring which: + * - Resets the A/D bits in EPT (again) + * - Issues tlb flush (again) which is intercepted by L0 + * + * ... + * + * N times + * + * - L1 vCPU: Memory write is logged in the PML and then committed. + * Lots of other memory writes are logged and committed. + * ... + * + * - L1 test thread: Sees the memory write along with other memory writes + * in the dirty ring, and since the write is usually not + * the last entry in the dirty-ring and has a very outdated + * iteration, the test fails. + * + * + * Note that this is only possible when the write was the last log entry + * write during iteration N-1, thus remember last iteration last log entry + * and also don't fail when it is reported in the next iteration, together with + * an outdated iteration count. + */ +static u64 dirty_ring_prev_iteration_last_page; enum log_mode_t { /* Only use KVM_GET_DIRTY_LOG for logging */ @@ -189,25 +236,7 @@ static enum log_mode_t host_log_mode_option = LOG_MODE_ALL; /* Logging mode for current run */ static enum log_mode_t host_log_mode; static pthread_t vcpu_thread; -static uint32_t test_dirty_ring_count = TEST_DIRTY_RING_COUNT; - -static void vcpu_kick(void) -{ - pthread_kill(vcpu_thread, SIG_IPI); -} - -/* - * In our test we do signal tricks, let's use a better version of - * sem_wait to avoid signal interrupts - */ -static void sem_wait_until(sem_t *sem) -{ - int ret; - - do - ret = sem_wait(sem); - while (ret == -1 && errno == EINTR); -} +static u32 test_dirty_ring_count = TEST_DIRTY_RING_COUNT; static bool clear_log_supported(void) { @@ -226,15 +255,15 @@ static void clear_log_create_vm_done(struct kvm_vm *vm) } static void dirty_log_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot, - void *bitmap, uint32_t num_pages, - uint32_t *unused) + void *bitmap, u32 num_pages, + u32 *unused) { kvm_vm_get_dirty_log(vcpu->vm, slot, bitmap); } static void clear_log_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot, - void *bitmap, uint32_t num_pages, - uint32_t *unused) + void *bitmap, u32 num_pages, + u32 *unused) { kvm_vm_get_dirty_log(vcpu->vm, slot, bitmap); kvm_vm_clear_dirty_log(vcpu->vm, slot, bitmap, 0, num_pages); @@ -243,21 +272,16 @@ static void clear_log_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot, /* Should only be called after a GUEST_SYNC */ static void vcpu_handle_sync_stop(void) { - if (atomic_read(&vcpu_sync_stop_requested)) { - /* It means main thread is sleeping waiting */ - atomic_set(&vcpu_sync_stop_requested, false); + if (READ_ONCE(vcpu_stop)) { sem_post(&sem_vcpu_stop); - sem_wait_until(&sem_vcpu_cont); + sem_wait(&sem_vcpu_cont); } } -static void default_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err) +static void default_after_vcpu_run(struct kvm_vcpu *vcpu) { struct kvm_run *run = vcpu->run; - TEST_ASSERT(ret == 0 || (ret == -1 && err == EINTR), - "vcpu run failed: errno=%d", err); - TEST_ASSERT(get_ucall(vcpu, NULL) == UCALL_SYNC, "Invalid guest sync status: exit_reason=%s", exit_reason_str(run->exit_reason)); @@ -273,8 +297,8 @@ static bool dirty_ring_supported(void) static void dirty_ring_create_vm_done(struct kvm_vm *vm) { - uint64_t pages; - uint32_t limit; + u64 pages; + u32 limit; /* * We rely on vcpu exit due to full dirty ring state. Adjust @@ -309,12 +333,12 @@ static inline void dirty_gfn_set_collected(struct kvm_dirty_gfn *gfn) smp_store_release(&gfn->flags, KVM_DIRTY_GFN_F_RESET); } -static uint32_t dirty_ring_collect_one(struct kvm_dirty_gfn *dirty_gfns, - int slot, void *bitmap, - uint32_t num_pages, uint32_t *fetch_index) +static u32 dirty_ring_collect_one(struct kvm_dirty_gfn *dirty_gfns, + int slot, void *bitmap, + u32 num_pages, u32 *fetch_index) { struct kvm_dirty_gfn *cur; - uint32_t count = 0; + u32 count = 0; while (true) { cur = &dirty_gfns[*fetch_index % test_dirty_ring_count]; @@ -324,7 +348,6 @@ static uint32_t dirty_ring_collect_one(struct kvm_dirty_gfn *dirty_gfns, "%u != %u", cur->slot, slot); TEST_ASSERT(cur->offset < num_pages, "Offset overflow: " "0x%llx >= 0x%x", cur->offset, num_pages); - //pr_info("fetch 0x%x page %llu\n", *fetch_index, cur->offset); __set_bit_le(cur->offset, bitmap); dirty_ring_last_page = cur->offset; dirty_gfn_set_collected(cur); @@ -335,36 +358,11 @@ static uint32_t dirty_ring_collect_one(struct kvm_dirty_gfn *dirty_gfns, return count; } -static void dirty_ring_wait_vcpu(void) -{ - /* This makes sure that hardware PML cache flushed */ - vcpu_kick(); - sem_wait_until(&sem_vcpu_stop); -} - -static void dirty_ring_continue_vcpu(void) -{ - pr_info("Notifying vcpu to continue\n"); - sem_post(&sem_vcpu_cont); -} - static void dirty_ring_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot, - void *bitmap, uint32_t num_pages, - uint32_t *ring_buf_idx) + void *bitmap, u32 num_pages, + u32 *ring_buf_idx) { - uint32_t count = 0, cleared; - bool continued_vcpu = false; - - dirty_ring_wait_vcpu(); - - if (!dirty_ring_vcpu_ring_full) { - /* - * This is not a ring-full event, it's safe to allow - * vcpu to continue - */ - dirty_ring_continue_vcpu(); - continued_vcpu = true; - } + u32 count, cleared; /* Only have one vcpu */ count = dirty_ring_collect_one(vcpu_map_dirty_ring(vcpu), @@ -379,35 +377,18 @@ static void dirty_ring_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot, */ TEST_ASSERT(cleared == count, "Reset dirty pages (%u) mismatch " "with collected (%u)", cleared, count); - - if (!continued_vcpu) { - TEST_ASSERT(dirty_ring_vcpu_ring_full, - "Didn't continue vcpu even without ring full"); - dirty_ring_continue_vcpu(); - } - - pr_info("Iteration %ld collected %u pages\n", iteration, count); } -static void dirty_ring_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err) +static void dirty_ring_after_vcpu_run(struct kvm_vcpu *vcpu) { struct kvm_run *run = vcpu->run; /* A ucall-sync or ring-full event is allowed */ if (get_ucall(vcpu, NULL) == UCALL_SYNC) { - /* We should allow this to continue */ - ; - } else if (run->exit_reason == KVM_EXIT_DIRTY_RING_FULL || - (ret == -1 && err == EINTR)) { - /* Update the flag first before pause */ - WRITE_ONCE(dirty_ring_vcpu_ring_full, - run->exit_reason == KVM_EXIT_DIRTY_RING_FULL); - sem_post(&sem_vcpu_stop); - pr_info("vcpu stops because %s...\n", - dirty_ring_vcpu_ring_full ? - "dirty ring is full" : "vcpu is kicked out"); - sem_wait_until(&sem_vcpu_cont); - pr_info("vcpu continues now.\n"); + vcpu_handle_sync_stop(); + } else if (run->exit_reason == KVM_EXIT_DIRTY_RING_FULL) { + WRITE_ONCE(dirty_ring_vcpu_ring_full, true); + vcpu_handle_sync_stop(); } else { TEST_ASSERT(false, "Invalid guest sync status: " "exit_reason=%s", @@ -423,10 +404,10 @@ struct log_mode { void (*create_vm_done)(struct kvm_vm *vm); /* Hook to collect the dirty pages into the bitmap provided */ void (*collect_dirty_pages) (struct kvm_vcpu *vcpu, int slot, - void *bitmap, uint32_t num_pages, - uint32_t *ring_buf_idx); + void *bitmap, u32 num_pages, + u32 *ring_buf_idx); /* Hook to call when after each vcpu run */ - void (*after_vcpu_run)(struct kvm_vcpu *vcpu, int ret, int err); + void (*after_vcpu_run)(struct kvm_vcpu *vcpu); } log_modes[LOG_MODE_NUM] = { { .name = "dirty-log", @@ -449,15 +430,6 @@ struct log_mode { }, }; -/* - * We use this bitmap to track some pages that should have its dirty - * bit set in the _next_ iteration. For example, if we detected the - * page value changed to current iteration but at the same time the - * page bit is cleared in the latest bitmap, then the system must - * report that write in the next get dirty log call. - */ -static unsigned long *host_bmap_track; - static void log_modes_dump(void) { int i; @@ -487,8 +459,8 @@ static void log_mode_create_vm_done(struct kvm_vm *vm) } static void log_mode_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot, - void *bitmap, uint32_t num_pages, - uint32_t *ring_buf_idx) + void *bitmap, u32 num_pages, + u32 *ring_buf_idx) { struct log_mode *mode = &log_modes[host_log_mode]; @@ -497,174 +469,113 @@ static void log_mode_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot, mode->collect_dirty_pages(vcpu, slot, bitmap, num_pages, ring_buf_idx); } -static void log_mode_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err) +static void log_mode_after_vcpu_run(struct kvm_vcpu *vcpu) { struct log_mode *mode = &log_modes[host_log_mode]; if (mode->after_vcpu_run) - mode->after_vcpu_run(vcpu, ret, err); + mode->after_vcpu_run(vcpu); } static void *vcpu_worker(void *data) { - int ret; struct kvm_vcpu *vcpu = data; - uint64_t pages_count = 0; - struct kvm_signal_mask *sigmask = alloca(offsetof(struct kvm_signal_mask, sigset) - + sizeof(sigset_t)); - sigset_t *sigset = (sigset_t *) &sigmask->sigset; - /* - * SIG_IPI is unblocked atomically while in KVM_RUN. It causes the - * ioctl to return with -EINTR, but it is still pending and we need - * to accept it with the sigwait. - */ - sigmask->len = 8; - pthread_sigmask(0, NULL, sigset); - sigdelset(sigset, SIG_IPI); - vcpu_ioctl(vcpu, KVM_SET_SIGNAL_MASK, sigmask); - - sigemptyset(sigset); - sigaddset(sigset, SIG_IPI); + sem_wait(&sem_vcpu_cont); while (!READ_ONCE(host_quit)) { - /* Clear any existing kick signals */ - pages_count += TEST_PAGES_PER_LOOP; /* Let the guest dirty the random pages */ - ret = __vcpu_run(vcpu); - if (ret == -1 && errno == EINTR) { - int sig = -1; - sigwait(sigset, &sig); - assert(sig == SIG_IPI); - } - log_mode_after_vcpu_run(vcpu, ret, errno); + vcpu_run(vcpu); + log_mode_after_vcpu_run(vcpu); } - pr_info("Dirtied %"PRIu64" pages\n", pages_count); - return NULL; } -static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long *bmap) +static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long **bmap) { - uint64_t step = vm_num_host_pages(mode, 1); - uint64_t page; - uint64_t *value_ptr; - uint64_t min_iter = 0; + u64 page, nr_dirty_pages = 0, nr_clean_pages = 0; + u64 step = vm_num_host_pages(mode, 1); for (page = 0; page < host_num_pages; page += step) { - value_ptr = host_test_mem + page * host_page_size; - - /* If this is a special page that we were tracking... */ - if (__test_and_clear_bit_le(page, host_bmap_track)) { - host_track_next_count++; - TEST_ASSERT(test_bit_le(page, bmap), - "Page %"PRIu64" should have its dirty bit " - "set in this iteration but it is missing", - page); - } + u64 val = *(u64 *)(host_test_mem + page * host_page_size); + bool bmap0_dirty = __test_and_clear_bit_le(page, bmap[0]); - if (__test_and_clear_bit_le(page, bmap)) { - bool matched; - - host_dirty_count++; + /* + * Ensure both bitmaps are cleared, as a page can be written + * multiple times per iteration, i.e. can show up in both + * bitmaps, and the dirty ring is additive, i.e. doesn't purge + * bitmap entries from previous collections. + */ + if (__test_and_clear_bit_le(page, bmap[1]) || bmap0_dirty) { + nr_dirty_pages++; /* - * If the bit is set, the value written onto - * the corresponding page should be either the - * previous iteration number or the current one. + * If the page is dirty, the value written to memory + * should be the current iteration number. */ - matched = (*value_ptr == iteration || - *value_ptr == iteration - 1); - - if (host_log_mode == LOG_MODE_DIRTY_RING && !matched) { - if (*value_ptr == iteration - 2 && min_iter <= iteration - 2) { - /* - * Short answer: this case is special - * only for dirty ring test where the - * page is the last page before a kvm - * dirty ring full in iteration N-2. - * - * Long answer: Assuming ring size R, - * one possible condition is: - * - * main thr vcpu thr - * -------- -------- - * iter=1 - * write 1 to page 0~(R-1) - * full, vmexit - * collect 0~(R-1) - * kick vcpu - * write 1 to (R-1)~(2R-2) - * full, vmexit - * iter=2 - * collect (R-1)~(2R-2) - * kick vcpu - * write 1 to (2R-2) - * (NOTE!!! "1" cached in cpu reg) - * write 2 to (2R-1)~(3R-3) - * full, vmexit - * iter=3 - * collect (2R-2)~(3R-3) - * (here if we read value on page - * "2R-2" is 1, while iter=3!!!) - * - * This however can only happen once per iteration. - */ - min_iter = iteration - 1; + if (val == iteration) + continue; + + if (host_log_mode == LOG_MODE_DIRTY_RING) { + /* + * The last page in the ring from previous + * iteration can be written with the value + * from the previous iteration, as the value to + * be written may be cached in a CPU register. + */ + if (page == dirty_ring_prev_iteration_last_page && + val == iteration - 1) continue; - } else if (page == dirty_ring_last_page) { - /* - * Please refer to comments in - * dirty_ring_last_page. - */ + + /* + * Any value from a previous iteration is legal + * for the last entry, as the write may not yet + * have retired, i.e. the page may hold whatever + * it had before this iteration started. + */ + if (page == dirty_ring_last_page && + val < iteration) continue; - } + } else if (!val && iteration == 1 && bmap0_dirty) { + /* + * When testing get+clear, the dirty bitmap + * starts with all bits set, and so the first + * iteration can observe a "dirty" page that + * was never written, but only in the first + * bitmap (collecting the bitmap also clears + * all dirty pages). + */ + continue; } - TEST_ASSERT(matched, - "Set page %"PRIu64" value %"PRIu64 - " incorrect (iteration=%"PRIu64")", - page, *value_ptr, iteration); + TEST_FAIL("Dirty page %lu value (%lu) != iteration (%lu) " + "(last = %lu, prev_last = %lu)", + page, val, iteration, dirty_ring_last_page, + dirty_ring_prev_iteration_last_page); } else { - host_clear_count++; + nr_clean_pages++; /* * If cleared, the value written can be any - * value smaller or equals to the iteration - * number. Note that the value can be exactly - * (iteration-1) if that write can happen - * like this: - * - * (1) increase loop count to "iteration-1" - * (2) write to page P happens (with value - * "iteration-1") - * (3) get dirty log for "iteration-1"; we'll - * see that page P bit is set (dirtied), - * and not set the bit in host_bmap_track - * (4) increase loop count to "iteration" - * (which is current iteration) - * (5) get dirty log for current iteration, - * we'll see that page P is cleared, with - * value "iteration-1". + * value smaller than the iteration number. */ - TEST_ASSERT(*value_ptr <= iteration, - "Clear page %"PRIu64" value %"PRIu64 - " incorrect (iteration=%"PRIu64")", - page, *value_ptr, iteration); - if (*value_ptr == iteration) { - /* - * This page is _just_ modified; it - * should report its dirtyness in the - * next run - */ - __set_bit_le(page, host_bmap_track); - } + TEST_ASSERT(val < iteration, + "Clear page %lu value (%lu) >= iteration (%lu) " + "(last = %lu, prev_last = %lu)", + page, val, iteration, dirty_ring_last_page, + dirty_ring_prev_iteration_last_page); } } + + pr_info("Iteration %2ld: dirty: %-6lu clean: %-6lu writes: %-6lu\n", + iteration, nr_dirty_pages, nr_clean_pages, nr_writes); + + host_dirty_count += nr_dirty_pages; + host_clear_count += nr_clean_pages; } static struct kvm_vm *create_vm(enum vm_guest_mode mode, struct kvm_vcpu **vcpu, - uint64_t extra_mem_pages, void *guest_code) + u64 extra_mem_pages, void *guest_code) { struct kvm_vm *vm; @@ -674,13 +585,14 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, struct kvm_vcpu **vcpu, log_mode_create_vm_done(vm); *vcpu = vm_vcpu_add(vm, 0, guest_code); + kvm_arch_vm_finalize_vcpus(vm); return vm; } struct test_params { unsigned long iterations; unsigned long interval; - uint64_t phys_offset; + u64 phys_offset; }; static void run_test(enum vm_guest_mode mode, void *arg) @@ -688,8 +600,8 @@ static void run_test(enum vm_guest_mode mode, void *arg) struct test_params *p = arg; struct kvm_vcpu *vcpu; struct kvm_vm *vm; - unsigned long *bmap; - uint32_t ring_buf_idx = 0; + unsigned long *bmap[2]; + u32 ring_buf_idx = 0; int sem_val; if (!log_mode_supported()) { @@ -729,14 +641,20 @@ static void run_test(enum vm_guest_mode mode, void *arg) } #ifdef __s390x__ - /* Align to 1M (segment size) */ - guest_test_phys_mem = align_down(guest_test_phys_mem, 1 << 20); + /* + * The workaround in guest_code() to write all pages prior to the first + * iteration isn't compatible with the dirty ring, as the dirty ring + * support relies on the vCPU to actually stop when vcpu_stop is set so + * that the vCPU doesn't hang waiting for the dirty ring to be emptied. + */ + TEST_ASSERT(host_log_mode != LOG_MODE_DIRTY_RING, + "Test needs to be updated to support s390 dirty ring"); #endif pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem); - bmap = bitmap_zalloc(host_num_pages); - host_bmap_track = bitmap_zalloc(host_num_pages); + bmap[0] = bitmap_zalloc(host_num_pages); + bmap[1] = bitmap_zalloc(host_num_pages); /* Add an extra memory slot for testing dirty logging */ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, @@ -749,7 +667,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages); /* Cache the HVA pointer of the region */ - host_test_mem = addr_gpa2hva(vm, (vm_paddr_t)guest_test_phys_mem); + host_test_mem = addr_gpa2hva(vm, (gpa_t)guest_test_phys_mem); /* Export the shared variables to the guest */ sync_global_to_guest(vm, host_page_size); @@ -757,14 +675,9 @@ static void run_test(enum vm_guest_mode mode, void *arg) sync_global_to_guest(vm, guest_test_virt_mem); sync_global_to_guest(vm, guest_num_pages); - /* Start the iterations */ - iteration = 1; - sync_global_to_guest(vm, iteration); - WRITE_ONCE(host_quit, false); host_dirty_count = 0; host_clear_count = 0; - host_track_next_count = 0; - WRITE_ONCE(dirty_ring_vcpu_ring_full, false); + WRITE_ONCE(host_quit, false); /* * Ensure the previous iteration didn't leave a dangling semaphore, i.e. @@ -776,21 +689,95 @@ static void run_test(enum vm_guest_mode mode, void *arg) sem_getvalue(&sem_vcpu_cont, &sem_val); TEST_ASSERT_EQ(sem_val, 0); + TEST_ASSERT_EQ(vcpu_stop, false); + pthread_create(&vcpu_thread, NULL, vcpu_worker, vcpu); - while (iteration < p->iterations) { - /* Give the vcpu thread some time to dirty some pages */ - usleep(p->interval * 1000); - log_mode_collect_dirty_pages(vcpu, TEST_MEM_SLOT_INDEX, - bmap, host_num_pages, - &ring_buf_idx); + for (iteration = 1; iteration <= p->iterations; iteration++) { + unsigned long i; + + sync_global_to_guest(vm, iteration); + + WRITE_ONCE(nr_writes, 0); + sync_global_to_guest(vm, nr_writes); + + dirty_ring_prev_iteration_last_page = dirty_ring_last_page; + WRITE_ONCE(dirty_ring_vcpu_ring_full, false); + + sem_post(&sem_vcpu_cont); /* - * See vcpu_sync_stop_requested definition for details on why - * we need to stop vcpu when verify data. + * Let the vCPU run beyond the configured interval until it has + * performed the minimum number of writes. This verifies the + * guest is making forward progress, e.g. isn't stuck because + * of a KVM bug, and puts a firm floor on test coverage. */ - atomic_set(&vcpu_sync_stop_requested, true); - sem_wait_until(&sem_vcpu_stop); + for (i = 0; i < p->interval || nr_writes < TEST_MIN_WRITES_PER_ITERATION; i++) { + /* + * Sleep in 1ms chunks to keep the interval math simple + * and so that the test doesn't run too far beyond the + * specified interval. + */ + usleep(1000); + + sync_global_from_guest(vm, nr_writes); + + /* + * Reap dirty pages while the guest is running so that + * dirty ring full events are resolved, i.e. so that a + * larger interval doesn't always end up with a vCPU + * that's effectively blocked. Collecting while the + * guest is running also verifies KVM doesn't lose any + * state. + * + * For bitmap modes, KVM overwrites the entire bitmap, + * i.e. collecting the bitmaps is destructive. Collect + * the bitmap only on the first pass, otherwise this + * test would lose track of dirty pages. + */ + if (i && host_log_mode != LOG_MODE_DIRTY_RING) + continue; + + /* + * For the dirty ring, empty the ring on subsequent + * passes only if the ring was filled at least once, + * to verify KVM's handling of a full ring (emptying + * the ring on every pass would make it unlikely the + * vCPU would ever fill the fing). + */ + if (i && !READ_ONCE(dirty_ring_vcpu_ring_full)) + continue; + + log_mode_collect_dirty_pages(vcpu, TEST_MEM_SLOT_INDEX, + bmap[0], host_num_pages, + &ring_buf_idx); + } + + /* + * Stop the vCPU prior to collecting and verifying the dirty + * log. If the vCPU is allowed to run during collection, then + * pages that are written during this iteration may be missed, + * i.e. collected in the next iteration. And if the vCPU is + * writing memory during verification, pages that this thread + * sees as clean may be written with this iteration's value. + */ + WRITE_ONCE(vcpu_stop, true); + sync_global_to_guest(vm, vcpu_stop); + sem_wait(&sem_vcpu_stop); + + /* + * Clear vcpu_stop after the vCPU thread has acknowledge the + * stop request and is waiting, i.e. is definitely not running! + */ + WRITE_ONCE(vcpu_stop, false); + sync_global_to_guest(vm, vcpu_stop); + + /* + * Sync the number of writes performed before verification, the + * info will be printed along with the dirty/clean page counts. + */ + sync_global_from_guest(vm, nr_writes); + /* * NOTE: for dirty ring, it's possible that we didn't stop at * GUEST_SYNC but instead we stopped because ring is full; @@ -798,32 +785,22 @@ static void run_test(enum vm_guest_mode mode, void *arg) * the flush of the last page, and since we handle the last * page specially verification will succeed anyway. */ - assert(host_log_mode == LOG_MODE_DIRTY_RING || - atomic_read(&vcpu_sync_stop_requested) == false); + log_mode_collect_dirty_pages(vcpu, TEST_MEM_SLOT_INDEX, + bmap[1], host_num_pages, + &ring_buf_idx); vm_dirty_log_verify(mode, bmap); - - /* - * Set host_quit before sem_vcpu_cont in the final iteration to - * ensure that the vCPU worker doesn't resume the guest. As - * above, the dirty ring test may stop and wait even when not - * explicitly request to do so, i.e. would hang waiting for a - * "continue" if it's allowed to resume the guest. - */ - if (++iteration == p->iterations) - WRITE_ONCE(host_quit, true); - - sem_post(&sem_vcpu_cont); - sync_global_to_guest(vm, iteration); } + WRITE_ONCE(host_quit, true); + sem_post(&sem_vcpu_cont); + pthread_join(vcpu_thread, NULL); - pr_info("Total bits checked: dirty (%"PRIu64"), clear (%"PRIu64"), " - "track_next (%"PRIu64")\n", host_dirty_count, host_clear_count, - host_track_next_count); + pr_info("Total bits checked: dirty (%lu), clear (%lu)\n", + host_dirty_count, host_clear_count); - free(bmap); - free(host_bmap_track); + free(bmap[0]); + free(bmap[1]); kvm_vm_free(vm); } @@ -857,7 +834,6 @@ int main(int argc, char *argv[]) .interval = TEST_HOST_LOOP_INTERVAL, }; int opt, i; - sigset_t sigset; sem_init(&sem_vcpu_stop, 0, 0); sem_init(&sem_vcpu_cont, 0, 0); @@ -908,19 +884,12 @@ int main(int argc, char *argv[]) } } - TEST_ASSERT(p.iterations > 2, "Iterations must be greater than two"); + TEST_ASSERT(p.iterations > 0, "Iterations must be greater than zero"); TEST_ASSERT(p.interval > 0, "Interval must be greater than zero"); pr_info("Test iterations: %"PRIu64", interval: %"PRIu64" (ms)\n", p.iterations, p.interval); - srandom(time(0)); - - /* Ensure that vCPU threads start with SIG_IPI blocked. */ - sigemptyset(&sigset); - sigaddset(&sigset, SIG_IPI); - pthread_sigmask(SIG_BLOCK, &sigset, NULL); - if (host_log_mode_option == LOG_MODE_ALL) { /* Run each log mode */ for (i = 0; i < LOG_MODE_NUM; i++) { diff --git a/tools/testing/selftests/kvm/get-reg-list.c b/tools/testing/selftests/kvm/get-reg-list.c index 91f05f78e824..216f10644c1a 100644 --- a/tools/testing/selftests/kvm/get-reg-list.c +++ b/tools/testing/selftests/kvm/get-reg-list.c @@ -116,10 +116,13 @@ void __weak finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c) } #ifdef __aarch64__ -static void prepare_vcpu_init(struct vcpu_reg_list *c, struct kvm_vcpu_init *init) +static void prepare_vcpu_init(struct kvm_vm *vm, struct vcpu_reg_list *c, + struct kvm_vcpu_init *init) { struct vcpu_reg_sublist *s; + vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, init); + for_each_sublist(c, s) if (s->capability) init->features[s->feature / 32] |= 1 << (s->feature % 32); @@ -127,10 +130,10 @@ static void prepare_vcpu_init(struct vcpu_reg_list *c, struct kvm_vcpu_init *ini static struct kvm_vcpu *vcpu_config_get_vcpu(struct vcpu_reg_list *c, struct kvm_vm *vm) { - struct kvm_vcpu_init init = { .target = -1, }; + struct kvm_vcpu_init init; struct kvm_vcpu *vcpu; - prepare_vcpu_init(c, &init); + prepare_vcpu_init(vm, c, &init); vcpu = __vm_vcpu_add(vm, 0); aarch64_vcpu_setup(vcpu, &init); @@ -213,7 +216,7 @@ static void run_test(struct vcpu_reg_list *c) * since we don't know the capabilities of any new registers. */ for_each_present_blessed_reg(i) { - uint8_t addr[2048 / 8]; + u8 addr[2048 / 8]; struct kvm_one_reg reg = { .id = reg_list->reg[i], .addr = (__u64)&addr, diff --git a/tools/testing/selftests/kvm/guest_memfd_test.c b/tools/testing/selftests/kvm/guest_memfd_test.c index ce687f8d248f..832ef4dfb99f 100644 --- a/tools/testing/selftests/kvm/guest_memfd_test.c +++ b/tools/testing/selftests/kvm/guest_memfd_test.c @@ -13,14 +13,19 @@ #include <linux/bitmap.h> #include <linux/falloc.h> -#include <sys/mman.h> +#include <linux/sizes.h> #include <sys/types.h> #include <sys/stat.h> +#include "kvm_syscalls.h" #include "kvm_util.h" +#include "numaif.h" #include "test_util.h" +#include "ucall_common.h" -static void test_file_read_write(int fd) +static size_t page_size; + +static void test_file_read_write(int fd, size_t total_size) { char buf[64]; @@ -34,15 +39,235 @@ static void test_file_read_write(int fd) "pwrite on a guest_mem fd should fail"); } -static void test_mmap(int fd, size_t page_size) +static void test_mmap_cow(int fd, size_t size) +{ + void *mem; + + mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); + TEST_ASSERT(mem == MAP_FAILED, "Copy-on-write not allowed by guest_memfd."); +} + +static void test_mmap_supported(int fd, size_t total_size) +{ + const char val = 0xaa; + char *mem; + size_t i; + int ret; + + mem = kvm_mmap(total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd); + + memset(mem, val, total_size); + for (i = 0; i < total_size; i++) + TEST_ASSERT_EQ(READ_ONCE(mem[i]), val); + + ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0, + page_size); + TEST_ASSERT(!ret, "fallocate the first page should succeed."); + + for (i = 0; i < page_size; i++) + TEST_ASSERT_EQ(READ_ONCE(mem[i]), 0x00); + for (; i < total_size; i++) + TEST_ASSERT_EQ(READ_ONCE(mem[i]), val); + + memset(mem, val, page_size); + for (i = 0; i < total_size; i++) + TEST_ASSERT_EQ(READ_ONCE(mem[i]), val); + + kvm_munmap(mem, total_size); +} + +static void test_mbind(int fd, size_t total_size) +{ + const unsigned long nodemask_0 = 1; /* nid: 0 */ + unsigned long nodemask = 0; + unsigned long maxnode = BITS_PER_TYPE(nodemask); + int policy; + char *mem; + int ret; + + if (!is_multi_numa_node_system()) + return; + + mem = kvm_mmap(total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd); + + /* Test MPOL_INTERLEAVE policy */ + kvm_mbind(mem, page_size * 2, MPOL_INTERLEAVE, &nodemask_0, maxnode, 0); + kvm_get_mempolicy(&policy, &nodemask, maxnode, mem, MPOL_F_ADDR); + TEST_ASSERT(policy == MPOL_INTERLEAVE && nodemask == nodemask_0, + "Wanted MPOL_INTERLEAVE (%u) and nodemask 0x%lx, got %u and 0x%lx", + MPOL_INTERLEAVE, nodemask_0, policy, nodemask); + + /* Test basic MPOL_BIND policy */ + kvm_mbind(mem + page_size * 2, page_size * 2, MPOL_BIND, &nodemask_0, maxnode, 0); + kvm_get_mempolicy(&policy, &nodemask, maxnode, mem + page_size * 2, MPOL_F_ADDR); + TEST_ASSERT(policy == MPOL_BIND && nodemask == nodemask_0, + "Wanted MPOL_BIND (%u) and nodemask 0x%lx, got %u and 0x%lx", + MPOL_BIND, nodemask_0, policy, nodemask); + + /* Test MPOL_DEFAULT policy */ + kvm_mbind(mem, total_size, MPOL_DEFAULT, NULL, 0, 0); + kvm_get_mempolicy(&policy, &nodemask, maxnode, mem, MPOL_F_ADDR); + TEST_ASSERT(policy == MPOL_DEFAULT && !nodemask, + "Wanted MPOL_DEFAULT (%u) and nodemask 0x0, got %u and 0x%lx", + MPOL_DEFAULT, policy, nodemask); + + /* Test with invalid policy */ + ret = mbind(mem, page_size, 999, &nodemask_0, maxnode, 0); + TEST_ASSERT(ret == -1 && errno == EINVAL, + "mbind with invalid policy should fail with EINVAL"); + + kvm_munmap(mem, total_size); +} + +static void test_numa_allocation(int fd, size_t total_size) +{ + unsigned long node0_mask = 1; /* Node 0 */ + unsigned long node1_mask = 2; /* Node 1 */ + unsigned long maxnode = 8; + void *pages[4]; + int status[4]; + char *mem; + int i; + + if (!is_multi_numa_node_system()) + return; + + mem = kvm_mmap(total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd); + + for (i = 0; i < 4; i++) + pages[i] = (char *)mem + page_size * i; + + /* Set NUMA policy after allocation */ + memset(mem, 0xaa, page_size); + kvm_mbind(pages[0], page_size, MPOL_BIND, &node0_mask, maxnode, 0); + kvm_fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, page_size); + + /* Set NUMA policy before allocation */ + kvm_mbind(pages[0], page_size * 2, MPOL_BIND, &node1_mask, maxnode, 0); + kvm_mbind(pages[2], page_size * 2, MPOL_BIND, &node0_mask, maxnode, 0); + memset(mem, 0xaa, total_size); + + /* Validate if pages are allocated on specified NUMA nodes */ + kvm_move_pages(0, 4, pages, NULL, status, 0); + TEST_ASSERT(status[0] == 1, "Expected page 0 on node 1, got it on node %d", status[0]); + TEST_ASSERT(status[1] == 1, "Expected page 1 on node 1, got it on node %d", status[1]); + TEST_ASSERT(status[2] == 0, "Expected page 2 on node 0, got it on node %d", status[2]); + TEST_ASSERT(status[3] == 0, "Expected page 3 on node 0, got it on node %d", status[3]); + + /* Punch hole for all pages */ + kvm_fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, total_size); + + /* Change NUMA policy nodes and reallocate */ + kvm_mbind(pages[0], page_size * 2, MPOL_BIND, &node0_mask, maxnode, 0); + kvm_mbind(pages[2], page_size * 2, MPOL_BIND, &node1_mask, maxnode, 0); + memset(mem, 0xaa, total_size); + + kvm_move_pages(0, 4, pages, NULL, status, 0); + TEST_ASSERT(status[0] == 0, "Expected page 0 on node 0, got it on node %d", status[0]); + TEST_ASSERT(status[1] == 0, "Expected page 1 on node 0, got it on node %d", status[1]); + TEST_ASSERT(status[2] == 1, "Expected page 2 on node 1, got it on node %d", status[2]); + TEST_ASSERT(status[3] == 1, "Expected page 3 on node 1, got it on node %d", status[3]); + + kvm_munmap(mem, total_size); +} + +static void test_collapse(int fd, u64 flags) +{ + const size_t pmd_size = get_trans_hugepagesz(); + void *reserved_addr; + void *aligned_addr; + char *mem; + off_t i; + + /* + * To even reach the point where the guest_memfd folios will + * get collapsed, both the userspace address and the offset + * within the guest_memfd have to be aligned to pmd_size. + * + * To achieve that alignment, reserve virtual address space + * with regular mmap, then use MAP_FIXED to allocate memory + * from a pmd_size-aligned offset (0) at a known, available + * virtual address. + */ + reserved_addr = kvm_mmap(pmd_size * 2, PROT_NONE, + MAP_PRIVATE | MAP_ANONYMOUS, -1); + aligned_addr = align_ptr_up(reserved_addr, pmd_size); + + mem = mmap(aligned_addr, pmd_size, PROT_READ | PROT_WRITE, + MAP_FIXED | MAP_SHARED, fd, 0); + TEST_ASSERT(IS_ALIGNED((u64)mem, pmd_size), + "Userspace address must be aligned to PMD size."); + + /* + * Use reads to populate page table to avoid setting dirty + * flag on page. + */ + for (i = 0; i < pmd_size; i += getpagesize()) + READ_ONCE(mem[i]); + + /* + * Advising the use of huge pages in guest_memfd should be + * fine... + */ + kvm_madvise(mem, pmd_size, MADV_HUGEPAGE); + + /* + * ... but collapsing folios must not be supported to avoid + * mapping beyond shared ranges into host userspace page + * tables. + */ + TEST_ASSERT_EQ(madvise(mem, pmd_size, MADV_COLLAPSE), -1); + TEST_ASSERT_EQ(errno, EINVAL); + + /* + * Removing from host page tables and re-faulting should be + * fine; should not end up faulting in a collapsed/huge folio. + */ + kvm_madvise(mem, pmd_size, MADV_DONTNEED); + READ_ONCE(mem[0]); + + kvm_munmap(reserved_addr, pmd_size * 2); +} + +static void test_fault_sigbus(int fd, size_t accessible_size, size_t map_size) +{ + const char val = 0xaa; + char *mem; + size_t i; + + mem = kvm_mmap(map_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd); + + TEST_EXPECT_SIGBUS(memset(mem, val, map_size)); + TEST_EXPECT_SIGBUS((void)READ_ONCE(mem[accessible_size])); + + for (i = 0; i < accessible_size; i++) + TEST_ASSERT_EQ(READ_ONCE(mem[i]), val); + + kvm_munmap(mem, map_size); +} + +static void test_fault_overflow(int fd, size_t total_size) +{ + test_fault_sigbus(fd, total_size, total_size * 4); +} + +static void test_fault_private(int fd, size_t total_size) +{ + test_fault_sigbus(fd, 0, total_size); +} + +static void test_mmap_not_supported(int fd, size_t total_size) { char *mem; mem = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); TEST_ASSERT_EQ(mem, MAP_FAILED); + + mem = mmap(NULL, total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + TEST_ASSERT_EQ(mem, MAP_FAILED); } -static void test_file_size(int fd, size_t page_size, size_t total_size) +static void test_file_size(int fd, size_t total_size) { struct stat sb; int ret; @@ -53,7 +278,7 @@ static void test_file_size(int fd, size_t page_size, size_t total_size) TEST_ASSERT_EQ(sb.st_blksize, page_size); } -static void test_fallocate(int fd, size_t page_size, size_t total_size) +static void test_fallocate(int fd, size_t total_size) { int ret; @@ -90,7 +315,7 @@ static void test_fallocate(int fd, size_t page_size, size_t total_size) TEST_ASSERT(!ret, "fallocate to restore punched hole should succeed"); } -static void test_invalid_punch_hole(int fd, size_t page_size, size_t total_size) +static void test_invalid_punch_hole(int fd, size_t total_size) { struct { off_t offset; @@ -120,26 +345,18 @@ static void test_invalid_punch_hole(int fd, size_t page_size, size_t total_size) } } -static void test_create_guest_memfd_invalid(struct kvm_vm *vm) +static void test_create_guest_memfd_invalid_sizes(struct kvm_vm *vm, + u64 guest_memfd_flags) { - size_t page_size = getpagesize(); - uint64_t flag; size_t size; int fd; for (size = 1; size < page_size; size++) { - fd = __vm_create_guest_memfd(vm, size, 0); - TEST_ASSERT(fd == -1 && errno == EINVAL, + fd = __vm_create_guest_memfd(vm, size, guest_memfd_flags); + TEST_ASSERT(fd < 0 && errno == EINVAL, "guest_memfd() with non-page-aligned page size '0x%lx' should fail with EINVAL", size); } - - for (flag = BIT(0); flag; flag <<= 1) { - fd = __vm_create_guest_memfd(vm, page_size, flag); - TEST_ASSERT(fd == -1 && errno == EINVAL, - "guest_memfd() with flag '0x%lx' should fail with EINVAL", - flag); - } } static void test_create_guest_memfd_multiple(struct kvm_vm *vm) @@ -147,53 +364,198 @@ static void test_create_guest_memfd_multiple(struct kvm_vm *vm) int fd1, fd2, ret; struct stat st1, st2; - fd1 = __vm_create_guest_memfd(vm, 4096, 0); + fd1 = __vm_create_guest_memfd(vm, page_size, 0); TEST_ASSERT(fd1 != -1, "memfd creation should succeed"); ret = fstat(fd1, &st1); TEST_ASSERT(ret != -1, "memfd fstat should succeed"); - TEST_ASSERT(st1.st_size == 4096, "memfd st_size should match requested size"); + TEST_ASSERT(st1.st_size == page_size, "memfd st_size should match requested size"); - fd2 = __vm_create_guest_memfd(vm, 8192, 0); + fd2 = __vm_create_guest_memfd(vm, page_size * 2, 0); TEST_ASSERT(fd2 != -1, "memfd creation should succeed"); ret = fstat(fd2, &st2); TEST_ASSERT(ret != -1, "memfd fstat should succeed"); - TEST_ASSERT(st2.st_size == 8192, "second memfd st_size should match requested size"); + TEST_ASSERT(st2.st_size == page_size * 2, "second memfd st_size should match requested size"); ret = fstat(fd1, &st1); TEST_ASSERT(ret != -1, "memfd fstat should succeed"); - TEST_ASSERT(st1.st_size == 4096, "first memfd st_size should still match requested size"); + TEST_ASSERT(st1.st_size == page_size, "first memfd st_size should still match requested size"); TEST_ASSERT(st1.st_ino != st2.st_ino, "different memfd should have different inode numbers"); close(fd2); close(fd1); } -int main(int argc, char *argv[]) +static void test_guest_memfd_flags(struct kvm_vm *vm) { - size_t page_size; - size_t total_size; + u64 valid_flags = vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS); + u64 flag; int fd; - struct kvm_vm *vm; - TEST_REQUIRE(kvm_has_cap(KVM_CAP_GUEST_MEMFD)); + for (flag = BIT(0); flag; flag <<= 1) { + fd = __vm_create_guest_memfd(vm, page_size, flag); + if (flag & valid_flags) { + TEST_ASSERT(fd >= 0, + "guest_memfd() with flag '0x%lx' should succeed", + flag); + close(fd); + } else { + TEST_ASSERT(fd < 0 && errno == EINVAL, + "guest_memfd() with flag '0x%lx' should fail with EINVAL", + flag); + } + } +} - page_size = getpagesize(); - total_size = page_size * 4; +#define __gmem_test(__test, __vm, __flags, __gmem_size) \ +do { \ + int fd = vm_create_guest_memfd(__vm, __gmem_size, __flags); \ + \ + test_##__test(fd, __gmem_size); \ + close(fd); \ +} while (0) - vm = vm_create_barebones(); +#define gmem_test(__test, __vm, __flags) \ + __gmem_test(__test, __vm, __flags, page_size * 4) - test_create_guest_memfd_invalid(vm); +static void __test_guest_memfd(struct kvm_vm *vm, u64 flags) +{ test_create_guest_memfd_multiple(vm); + test_create_guest_memfd_invalid_sizes(vm, flags); + + gmem_test(file_read_write, vm, flags); + + if (flags & GUEST_MEMFD_FLAG_MMAP) { + if (flags & GUEST_MEMFD_FLAG_INIT_SHARED) { + size_t pmd_size = get_trans_hugepagesz(); + + gmem_test(mmap_supported, vm, flags); + gmem_test(fault_overflow, vm, flags); + gmem_test(numa_allocation, vm, flags); + __gmem_test(collapse, vm, flags, pmd_size); + } else { + gmem_test(fault_private, vm, flags); + } + + gmem_test(mmap_cow, vm, flags); + gmem_test(mbind, vm, flags); + } else { + gmem_test(mmap_not_supported, vm, flags); + } + + gmem_test(file_size, vm, flags); + gmem_test(fallocate, vm, flags); + gmem_test(invalid_punch_hole, vm, flags); +} + +static void test_guest_memfd(unsigned long vm_type) +{ + struct kvm_vm *vm = vm_create_barebones_type(vm_type); + u64 flags; - fd = vm_create_guest_memfd(vm, total_size, 0); + test_guest_memfd_flags(vm); - test_file_read_write(fd); - test_mmap(fd, page_size); - test_file_size(fd, page_size, total_size); - test_fallocate(fd, page_size, total_size); - test_invalid_punch_hole(fd, page_size, total_size); + __test_guest_memfd(vm, 0); + + flags = vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS); + if (flags & GUEST_MEMFD_FLAG_MMAP) + __test_guest_memfd(vm, GUEST_MEMFD_FLAG_MMAP); + + /* MMAP should always be supported if INIT_SHARED is supported. */ + if (flags & GUEST_MEMFD_FLAG_INIT_SHARED) + __test_guest_memfd(vm, GUEST_MEMFD_FLAG_MMAP | + GUEST_MEMFD_FLAG_INIT_SHARED); + + kvm_vm_free(vm); +} + +static void guest_code(u8 *mem, u64 size) +{ + size_t i; + + for (i = 0; i < size; i++) + __GUEST_ASSERT(mem[i] == 0xaa, + "Guest expected 0xaa at offset %lu, got 0x%x", i, mem[i]); + + memset(mem, 0xff, size); + GUEST_DONE(); +} + +static void test_guest_memfd_guest(void) +{ + /* + * Skip the first 4gb and slot0. slot0 maps <1gb and is used to back + * the guest's code, stack, and page tables, and low memory contains + * the PCI hole and other MMIO regions that need to be avoided. + */ + const gpa_t gpa = SZ_4G; + const int slot = 1; + + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + u8 *mem; + size_t size; + int fd, i; + + if (!kvm_check_cap(KVM_CAP_GUEST_MEMFD_FLAGS)) + return; + + vm = __vm_create_shape_with_one_vcpu(VM_SHAPE_DEFAULT, &vcpu, 1, guest_code); + + TEST_ASSERT(vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS) & GUEST_MEMFD_FLAG_MMAP, + "Default VM type should support MMAP, supported flags = 0x%x", + vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS)); + TEST_ASSERT(vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS) & GUEST_MEMFD_FLAG_INIT_SHARED, + "Default VM type should support INIT_SHARED, supported flags = 0x%x", + vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS)); + + /* + * Use the max of the host or guest page size for all operations, as + * KVM requires guest_memfd files and memslots to be sized to multiples + * of the host page size. + */ + size = max_t(size_t, vm->page_size, page_size); + fd = vm_create_guest_memfd(vm, size, GUEST_MEMFD_FLAG_MMAP | + GUEST_MEMFD_FLAG_INIT_SHARED); + vm_set_user_memory_region2(vm, slot, KVM_MEM_GUEST_MEMFD, gpa, size, NULL, fd, 0); + + mem = kvm_mmap(size, PROT_READ | PROT_WRITE, MAP_SHARED, fd); + memset(mem, 0xaa, size); + kvm_munmap(mem, size); + + virt_map(vm, gpa, gpa, size / vm->page_size); + vcpu_args_set(vcpu, 2, gpa, size); + vcpu_run(vcpu); + + TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE); + + mem = kvm_mmap(size, PROT_READ | PROT_WRITE, MAP_SHARED, fd); + for (i = 0; i < size; i++) + TEST_ASSERT_EQ(mem[i], 0xff); close(fd); + kvm_vm_free(vm); +} + +int main(int argc, char *argv[]) +{ + unsigned long vm_types, vm_type; + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_GUEST_MEMFD)); + + page_size = getpagesize(); + + /* + * Not all architectures support KVM_CAP_VM_TYPES. However, those that + * support guest_memfd have that support for the default VM type. + */ + vm_types = kvm_check_cap(KVM_CAP_VM_TYPES); + if (!vm_types) + vm_types = BIT(VM_TYPE_DEFAULT); + + for_each_set_bit(vm_type, &vm_types, BITS_PER_TYPE(vm_types)) + test_guest_memfd(vm_type); + + test_guest_memfd_guest(); } diff --git a/tools/testing/selftests/kvm/guest_print_test.c b/tools/testing/selftests/kvm/guest_print_test.c index bcf582852db9..79d3fc326e91 100644 --- a/tools/testing/selftests/kvm/guest_print_test.c +++ b/tools/testing/selftests/kvm/guest_print_test.c @@ -16,22 +16,22 @@ #include "ucall_common.h" struct guest_vals { - uint64_t a; - uint64_t b; - uint64_t type; + u64 a; + u64 b; + u64 type; }; static struct guest_vals vals; /* GUEST_PRINTF()/GUEST_ASSERT_FMT() does not support float or double. */ #define TYPE_LIST \ -TYPE(test_type_i64, I64, "%ld", int64_t) \ -TYPE(test_type_u64, U64u, "%lu", uint64_t) \ -TYPE(test_type_x64, U64x, "0x%lx", uint64_t) \ -TYPE(test_type_X64, U64X, "0x%lX", uint64_t) \ -TYPE(test_type_u32, U32u, "%u", uint32_t) \ -TYPE(test_type_x32, U32x, "0x%x", uint32_t) \ -TYPE(test_type_X32, U32X, "0x%X", uint32_t) \ +TYPE(test_type_i64, I64, "%ld", s64) \ +TYPE(test_type_u64, U64u, "%lu", u64) \ +TYPE(test_type_x64, U64x, "0x%lx", u64) \ +TYPE(test_type_X64, U64X, "0x%lX", u64) \ +TYPE(test_type_u32, U32u, "%u", u32) \ +TYPE(test_type_x32, U32x, "0x%x", u32) \ +TYPE(test_type_X32, U32X, "0x%X", u32) \ TYPE(test_type_int, INT, "%d", int) \ TYPE(test_type_char, CHAR, "%c", char) \ TYPE(test_type_str, STR, "'%s'", const char *) \ @@ -56,7 +56,7 @@ static void fn(struct kvm_vcpu *vcpu, T a, T b) \ \ snprintf(expected_printf, UCALL_BUFFER_LEN, PRINTF_FMT_##ext, a, b); \ snprintf(expected_assert, UCALL_BUFFER_LEN, ASSERT_FMT_##ext, a, b); \ - vals = (struct guest_vals){ (uint64_t)a, (uint64_t)b, TYPE_##ext }; \ + vals = (struct guest_vals){ (u64)a, (u64)b, TYPE_##ext }; \ sync_global_to_guest(vcpu->vm, vals); \ run_test(vcpu, expected_printf, expected_assert); \ } diff --git a/tools/testing/selftests/kvm/hardware_disable_test.c b/tools/testing/selftests/kvm/hardware_disable_test.c index 94bd6ed24cf3..3147f5c97e94 100644 --- a/tools/testing/selftests/kvm/hardware_disable_test.c +++ b/tools/testing/selftests/kvm/hardware_disable_test.c @@ -80,7 +80,7 @@ static inline void check_join(pthread_t thread, void **retval) TEST_ASSERT(r == 0, "%s: failed to join thread", __func__); } -static void run_test(uint32_t run) +static void run_test(u32 run) { struct kvm_vcpu *vcpu; struct kvm_vm *vm; @@ -88,7 +88,7 @@ static void run_test(uint32_t run) pthread_t threads[VCPU_NUM]; pthread_t throw_away; void *b; - uint32_t i, j; + u32 i, j; CPU_ZERO(&cpu_set); for (i = 0; i < VCPU_NUM; i++) @@ -149,7 +149,7 @@ void wait_for_child_setup(pid_t pid) int main(int argc, char **argv) { - uint32_t i; + u32 i; int s, r; pid_t pid; diff --git a/tools/testing/selftests/kvm/include/arm64/arch_timer.h b/tools/testing/selftests/kvm/include/arm64/arch_timer.h index bf461de34785..a5836d4ab7ee 100644 --- a/tools/testing/selftests/kvm/include/arm64/arch_timer.h +++ b/tools/testing/selftests/kvm/include/arm64/arch_timer.h @@ -18,20 +18,20 @@ enum arch_timer { #define CTL_ISTATUS (1 << 2) #define msec_to_cycles(msec) \ - (timer_get_cntfrq() * (uint64_t)(msec) / 1000) + (timer_get_cntfrq() * (u64)(msec) / 1000) #define usec_to_cycles(usec) \ - (timer_get_cntfrq() * (uint64_t)(usec) / 1000000) + (timer_get_cntfrq() * (u64)(usec) / 1000000) #define cycles_to_usec(cycles) \ - ((uint64_t)(cycles) * 1000000 / timer_get_cntfrq()) + ((u64)(cycles) * 1000000 / timer_get_cntfrq()) -static inline uint32_t timer_get_cntfrq(void) +static inline u32 timer_get_cntfrq(void) { return read_sysreg(cntfrq_el0); } -static inline uint64_t timer_get_cntct(enum arch_timer timer) +static inline u64 timer_get_cntct(enum arch_timer timer) { isb(); @@ -48,7 +48,7 @@ static inline uint64_t timer_get_cntct(enum arch_timer timer) return 0; } -static inline void timer_set_cval(enum arch_timer timer, uint64_t cval) +static inline void timer_set_cval(enum arch_timer timer, u64 cval) { switch (timer) { case VIRTUAL: @@ -64,7 +64,7 @@ static inline void timer_set_cval(enum arch_timer timer, uint64_t cval) isb(); } -static inline uint64_t timer_get_cval(enum arch_timer timer) +static inline u64 timer_get_cval(enum arch_timer timer) { switch (timer) { case VIRTUAL: @@ -79,7 +79,7 @@ static inline uint64_t timer_get_cval(enum arch_timer timer) return 0; } -static inline void timer_set_tval(enum arch_timer timer, int32_t tval) +static inline void timer_set_tval(enum arch_timer timer, s32 tval) { switch (timer) { case VIRTUAL: @@ -95,7 +95,7 @@ static inline void timer_set_tval(enum arch_timer timer, int32_t tval) isb(); } -static inline int32_t timer_get_tval(enum arch_timer timer) +static inline s32 timer_get_tval(enum arch_timer timer) { isb(); switch (timer) { @@ -111,7 +111,7 @@ static inline int32_t timer_get_tval(enum arch_timer timer) return 0; } -static inline void timer_set_ctl(enum arch_timer timer, uint32_t ctl) +static inline void timer_set_ctl(enum arch_timer timer, u32 ctl) { switch (timer) { case VIRTUAL: @@ -127,7 +127,7 @@ static inline void timer_set_ctl(enum arch_timer timer, uint32_t ctl) isb(); } -static inline uint32_t timer_get_ctl(enum arch_timer timer) +static inline u32 timer_get_ctl(enum arch_timer timer) { switch (timer) { case VIRTUAL: @@ -142,17 +142,41 @@ static inline uint32_t timer_get_ctl(enum arch_timer timer) return 0; } -static inline void timer_set_next_cval_ms(enum arch_timer timer, uint32_t msec) +static inline void timer_set_next_cval_ms(enum arch_timer timer, u32 msec) { - uint64_t now_ct = timer_get_cntct(timer); - uint64_t next_ct = now_ct + msec_to_cycles(msec); + u64 now_ct = timer_get_cntct(timer); + u64 next_ct = now_ct + msec_to_cycles(msec); timer_set_cval(timer, next_ct); } -static inline void timer_set_next_tval_ms(enum arch_timer timer, uint32_t msec) +static inline void timer_set_next_tval_ms(enum arch_timer timer, u32 msec) { timer_set_tval(timer, msec_to_cycles(msec)); } +static inline u32 vcpu_get_vtimer_irq(struct kvm_vcpu *vcpu) +{ + u32 intid; + u64 attr; + + attr = vcpu_has_el2(vcpu) ? KVM_ARM_VCPU_TIMER_IRQ_HVTIMER : + KVM_ARM_VCPU_TIMER_IRQ_VTIMER; + vcpu_device_attr_get(vcpu, KVM_ARM_VCPU_TIMER_CTRL, attr, &intid); + + return intid; +} + +static inline u32 vcpu_get_ptimer_irq(struct kvm_vcpu *vcpu) +{ + u32 intid; + u64 attr; + + attr = vcpu_has_el2(vcpu) ? KVM_ARM_VCPU_TIMER_IRQ_HPTIMER : + KVM_ARM_VCPU_TIMER_IRQ_PTIMER; + vcpu_device_attr_get(vcpu, KVM_ARM_VCPU_TIMER_CTRL, attr, &intid); + + return intid; +} + #endif /* SELFTEST_KVM_ARCH_TIMER_H */ diff --git a/tools/testing/selftests/kvm/include/arm64/delay.h b/tools/testing/selftests/kvm/include/arm64/delay.h index 329e4f5079ea..6a5d4634af2c 100644 --- a/tools/testing/selftests/kvm/include/arm64/delay.h +++ b/tools/testing/selftests/kvm/include/arm64/delay.h @@ -8,10 +8,10 @@ #include "arch_timer.h" -static inline void __delay(uint64_t cycles) +static inline void __delay(u64 cycles) { enum arch_timer timer = VIRTUAL; - uint64_t start = timer_get_cntct(timer); + u64 start = timer_get_cntct(timer); while ((timer_get_cntct(timer) - start) < cycles) cpu_relax(); diff --git a/tools/testing/selftests/kvm/include/arm64/gic.h b/tools/testing/selftests/kvm/include/arm64/gic.h index baeb3c859389..615745093c98 100644 --- a/tools/testing/selftests/kvm/include/arm64/gic.h +++ b/tools/testing/selftests/kvm/include/arm64/gic.h @@ -48,8 +48,8 @@ void gic_set_dir(unsigned int intid); * split is true, EOI drops the priority and deactivates the interrupt. */ void gic_set_eoi_split(bool split); -void gic_set_priority_mask(uint64_t mask); -void gic_set_priority(uint32_t intid, uint32_t prio); +void gic_set_priority_mask(u64 mask); +void gic_set_priority(u32 intid, u32 prio); void gic_irq_set_active(unsigned int intid); void gic_irq_clear_active(unsigned int intid); bool gic_irq_get_active(unsigned int intid); @@ -57,8 +57,9 @@ void gic_irq_set_pending(unsigned int intid); void gic_irq_clear_pending(unsigned int intid); bool gic_irq_get_pending(unsigned int intid); void gic_irq_set_config(unsigned int intid, bool is_edge); +void gic_irq_set_group(unsigned int intid, bool group); -void gic_rdist_enable_lpis(vm_paddr_t cfg_table, size_t cfg_table_size, - vm_paddr_t pend_table); +void gic_rdist_enable_lpis(gpa_t cfg_table, size_t cfg_table_size, + gpa_t pend_table); #endif /* SELFTEST_KVM_GIC_H */ diff --git a/tools/testing/selftests/kvm/include/arm64/gic_v3_its.h b/tools/testing/selftests/kvm/include/arm64/gic_v3_its.h index 3722ed9c8f96..a43a407e2d5c 100644 --- a/tools/testing/selftests/kvm/include/arm64/gic_v3_its.h +++ b/tools/testing/selftests/kvm/include/arm64/gic_v3_its.h @@ -5,15 +5,15 @@ #include <linux/sizes.h> -void its_init(vm_paddr_t coll_tbl, size_t coll_tbl_sz, - vm_paddr_t device_tbl, size_t device_tbl_sz, - vm_paddr_t cmdq, size_t cmdq_size); +void its_init(gpa_t coll_tbl, size_t coll_tbl_sz, gpa_t device_tbl, + size_t device_tbl_sz, gpa_t cmdq, size_t cmdq_size); -void its_send_mapd_cmd(void *cmdq_base, u32 device_id, vm_paddr_t itt_base, +void its_send_mapd_cmd(void *cmdq_base, u32 device_id, gpa_t itt_base, size_t itt_size, bool valid); void its_send_mapc_cmd(void *cmdq_base, u32 vcpu_id, u32 collection_id, bool valid); void its_send_mapti_cmd(void *cmdq_base, u32 device_id, u32 event_id, u32 collection_id, u32 intid); void its_send_invall_cmd(void *cmdq_base, u32 collection_id); +void its_send_sync_cmd(void *cmdq_base, u32 vcpu_id); #endif // __SELFTESTS_GIC_V3_ITS_H__ diff --git a/tools/testing/selftests/kvm/include/arm64/gic_v5.h b/tools/testing/selftests/kvm/include/arm64/gic_v5.h new file mode 100644 index 000000000000..eb523d9277cf --- /dev/null +++ b/tools/testing/selftests/kvm/include/arm64/gic_v5.h @@ -0,0 +1,150 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __SELFTESTS_GIC_V5_H +#define __SELFTESTS_GIC_V5_H + +#include <asm/barrier.h> +#include <asm/sysreg.h> + +#include <linux/bitfield.h> + +#include "processor.h" + +/* + * Definitions for GICv5 instructions for the Current Domain + */ +#define GICV5_OP_GIC_CDAFF sys_insn(1, 0, 12, 1, 3) +#define GICV5_OP_GIC_CDDI sys_insn(1, 0, 12, 2, 0) +#define GICV5_OP_GIC_CDDIS sys_insn(1, 0, 12, 1, 0) +#define GICV5_OP_GIC_CDHM sys_insn(1, 0, 12, 2, 1) +#define GICV5_OP_GIC_CDEN sys_insn(1, 0, 12, 1, 1) +#define GICV5_OP_GIC_CDEOI sys_insn(1, 0, 12, 1, 7) +#define GICV5_OP_GIC_CDPEND sys_insn(1, 0, 12, 1, 4) +#define GICV5_OP_GIC_CDPRI sys_insn(1, 0, 12, 1, 2) +#define GICV5_OP_GIC_CDRCFG sys_insn(1, 0, 12, 1, 5) +#define GICV5_OP_GICR_CDIA sys_insn(1, 0, 12, 3, 0) +#define GICV5_OP_GICR_CDNMIA sys_insn(1, 0, 12, 3, 1) + +/* Definitions for GIC CDAFF */ +#define GICV5_GIC_CDAFF_IAFFID_MASK GENMASK_ULL(47, 32) +#define GICV5_GIC_CDAFF_TYPE_MASK GENMASK_ULL(31, 29) +#define GICV5_GIC_CDAFF_IRM_MASK BIT_ULL(28) +#define GICV5_GIC_CDAFF_ID_MASK GENMASK_ULL(23, 0) + +/* Definitions for GIC CDDI */ +#define GICV5_GIC_CDDI_TYPE_MASK GENMASK_ULL(31, 29) +#define GICV5_GIC_CDDI_ID_MASK GENMASK_ULL(23, 0) + +/* Definitions for GIC CDDIS */ +#define GICV5_GIC_CDDIS_TYPE_MASK GENMASK_ULL(31, 29) +#define GICV5_GIC_CDDIS_TYPE(r) FIELD_GET(GICV5_GIC_CDDIS_TYPE_MASK, r) +#define GICV5_GIC_CDDIS_ID_MASK GENMASK_ULL(23, 0) +#define GICV5_GIC_CDDIS_ID(r) FIELD_GET(GICV5_GIC_CDDIS_ID_MASK, r) + +/* Definitions for GIC CDEN */ +#define GICV5_GIC_CDEN_TYPE_MASK GENMASK_ULL(31, 29) +#define GICV5_GIC_CDEN_ID_MASK GENMASK_ULL(23, 0) + +/* Definitions for GIC CDHM */ +#define GICV5_GIC_CDHM_HM_MASK BIT_ULL(32) +#define GICV5_GIC_CDHM_TYPE_MASK GENMASK_ULL(31, 29) +#define GICV5_GIC_CDHM_ID_MASK GENMASK_ULL(23, 0) + +/* Definitions for GIC CDPEND */ +#define GICV5_GIC_CDPEND_PENDING_MASK BIT_ULL(32) +#define GICV5_GIC_CDPEND_TYPE_MASK GENMASK_ULL(31, 29) +#define GICV5_GIC_CDPEND_ID_MASK GENMASK_ULL(23, 0) + +/* Definitions for GIC CDPRI */ +#define GICV5_GIC_CDPRI_PRIORITY_MASK GENMASK_ULL(39, 35) +#define GICV5_GIC_CDPRI_TYPE_MASK GENMASK_ULL(31, 29) +#define GICV5_GIC_CDPRI_ID_MASK GENMASK_ULL(23, 0) + +/* Definitions for GIC CDRCFG */ +#define GICV5_GIC_CDRCFG_TYPE_MASK GENMASK_ULL(31, 29) +#define GICV5_GIC_CDRCFG_ID_MASK GENMASK_ULL(23, 0) + +/* Definitions for GICR CDIA */ +#define GICV5_GICR_CDIA_VALID_MASK BIT_ULL(32) +#define GICV5_GICR_CDIA_VALID(r) FIELD_GET(GICV5_GICR_CDIA_VALID_MASK, r) +#define GICV5_GICR_CDIA_TYPE_MASK GENMASK_ULL(31, 29) +#define GICV5_GICR_CDIA_ID_MASK GENMASK_ULL(23, 0) +#define GICV5_GICR_CDIA_INTID GENMASK_ULL(31, 0) + +/* Definitions for GICR CDNMIA */ +#define GICV5_GICR_CDNMIA_VALID_MASK BIT_ULL(32) +#define GICV5_GICR_CDNMIA_VALID(r) FIELD_GET(GICV5_GICR_CDNMIA_VALID_MASK, r) +#define GICV5_GICR_CDNMIA_TYPE_MASK GENMASK_ULL(31, 29) +#define GICV5_GICR_CDNMIA_ID_MASK GENMASK_ULL(23, 0) + +#define gicr_insn(insn) read_sysreg_s(GICV5_OP_GICR_##insn) +#define gic_insn(v, insn) write_sysreg_s(v, GICV5_OP_GIC_##insn) + +#define __GIC_BARRIER_INSN(op0, op1, CRn, CRm, op2, Rt) \ + __emit_inst(0xd5000000 | \ + sys_insn((op0), (op1), (CRn), (CRm), (op2)) | \ + ((Rt) & 0x1f)) + +#define GSB_SYS_BARRIER_INSN __GIC_BARRIER_INSN(1, 0, 12, 0, 0, 31) +#define GSB_ACK_BARRIER_INSN __GIC_BARRIER_INSN(1, 0, 12, 0, 1, 31) + +#define gsb_ack() asm volatile(GSB_ACK_BARRIER_INSN : : : "memory") +#define gsb_sys() asm volatile(GSB_SYS_BARRIER_INSN : : : "memory") + +#define REPEAT_BYTE(x) ((~0ul / 0xff) * (x)) + +#define GICV5_IRQ_DEFAULT_PRI 0b10000 + +#define GICV5_ARCH_PPI_SW_PPI 0x3 + +void gicv5_ppi_priority_init(void) +{ + write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_DEFAULT_PRI), SYS_ICC_PPI_PRIORITYR0_EL1); + write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_DEFAULT_PRI), SYS_ICC_PPI_PRIORITYR1_EL1); + write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_DEFAULT_PRI), SYS_ICC_PPI_PRIORITYR2_EL1); + write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_DEFAULT_PRI), SYS_ICC_PPI_PRIORITYR3_EL1); + write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_DEFAULT_PRI), SYS_ICC_PPI_PRIORITYR4_EL1); + write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_DEFAULT_PRI), SYS_ICC_PPI_PRIORITYR5_EL1); + write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_DEFAULT_PRI), SYS_ICC_PPI_PRIORITYR6_EL1); + write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_DEFAULT_PRI), SYS_ICC_PPI_PRIORITYR7_EL1); + write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_DEFAULT_PRI), SYS_ICC_PPI_PRIORITYR8_EL1); + write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_DEFAULT_PRI), SYS_ICC_PPI_PRIORITYR9_EL1); + write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_DEFAULT_PRI), SYS_ICC_PPI_PRIORITYR10_EL1); + write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_DEFAULT_PRI), SYS_ICC_PPI_PRIORITYR11_EL1); + write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_DEFAULT_PRI), SYS_ICC_PPI_PRIORITYR12_EL1); + write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_DEFAULT_PRI), SYS_ICC_PPI_PRIORITYR13_EL1); + write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_DEFAULT_PRI), SYS_ICC_PPI_PRIORITYR14_EL1); + write_sysreg_s(REPEAT_BYTE(GICV5_IRQ_DEFAULT_PRI), SYS_ICC_PPI_PRIORITYR15_EL1); + + /* + * Context syncronization required to make sure system register writes + * effects are synchronised. + */ + isb(); +} + +void gicv5_cpu_disable_interrupts(void) +{ + u64 cr0; + + cr0 = FIELD_PREP(ICC_CR0_EL1_EN, 0); + write_sysreg_s(cr0, SYS_ICC_CR0_EL1); +} + +void gicv5_cpu_enable_interrupts(void) +{ + u64 cr0, pcr; + + write_sysreg_s(0, SYS_ICC_PPI_ENABLER0_EL1); + write_sysreg_s(0, SYS_ICC_PPI_ENABLER1_EL1); + + gicv5_ppi_priority_init(); + + pcr = FIELD_PREP(ICC_PCR_EL1_PRIORITY, GICV5_IRQ_DEFAULT_PRI); + write_sysreg_s(pcr, SYS_ICC_PCR_EL1); + + cr0 = FIELD_PREP(ICC_CR0_EL1_EN, 1); + write_sysreg_s(cr0, SYS_ICC_CR0_EL1); +} + +#endif diff --git a/tools/testing/selftests/kvm/include/arm64/kvm_util_arch.h b/tools/testing/selftests/kvm/include/arm64/kvm_util_arch.h index e43a57d99b56..4a2033708227 100644 --- a/tools/testing/selftests/kvm/include/arm64/kvm_util_arch.h +++ b/tools/testing/selftests/kvm/include/arm64/kvm_util_arch.h @@ -2,6 +2,11 @@ #ifndef SELFTEST_KVM_UTIL_ARCH_H #define SELFTEST_KVM_UTIL_ARCH_H -struct kvm_vm_arch {}; +struct kvm_mmu_arch {}; + +struct kvm_vm_arch { + bool has_gic; + int gic_fd; +}; #endif // SELFTEST_KVM_UTIL_ARCH_H diff --git a/tools/testing/selftests/kvm/include/arm64/processor.h b/tools/testing/selftests/kvm/include/arm64/processor.h index 1e8d0d531fbd..b8a902ba8573 100644 --- a/tools/testing/selftests/kvm/include/arm64/processor.h +++ b/tools/testing/selftests/kvm/include/arm64/processor.h @@ -62,8 +62,73 @@ MAIR_ATTRIDX(MAIR_ATTR_NORMAL, MT_NORMAL) | \ MAIR_ATTRIDX(MAIR_ATTR_NORMAL_WT, MT_NORMAL_WT)) +/* TCR_EL1 specific flags */ +#define TCR_T0SZ_OFFSET 0 +#define TCR_T0SZ(x) ((UL(64) - (x)) << TCR_T0SZ_OFFSET) + +#define TCR_IRGN0_SHIFT 8 +#define TCR_IRGN0_MASK (UL(3) << TCR_IRGN0_SHIFT) +#define TCR_IRGN0_NC (UL(0) << TCR_IRGN0_SHIFT) +#define TCR_IRGN0_WBWA (UL(1) << TCR_IRGN0_SHIFT) +#define TCR_IRGN0_WT (UL(2) << TCR_IRGN0_SHIFT) +#define TCR_IRGN0_WBnWA (UL(3) << TCR_IRGN0_SHIFT) + +#define TCR_ORGN0_SHIFT 10 +#define TCR_ORGN0_MASK (UL(3) << TCR_ORGN0_SHIFT) +#define TCR_ORGN0_NC (UL(0) << TCR_ORGN0_SHIFT) +#define TCR_ORGN0_WBWA (UL(1) << TCR_ORGN0_SHIFT) +#define TCR_ORGN0_WT (UL(2) << TCR_ORGN0_SHIFT) +#define TCR_ORGN0_WBnWA (UL(3) << TCR_ORGN0_SHIFT) + +#define TCR_SH0_SHIFT 12 +#define TCR_SH0_MASK (UL(3) << TCR_SH0_SHIFT) +#define TCR_SH0_INNER (UL(3) << TCR_SH0_SHIFT) + +#define TCR_TG0_SHIFT 14 +#define TCR_TG0_MASK (UL(3) << TCR_TG0_SHIFT) +#define TCR_TG0_4K (UL(0) << TCR_TG0_SHIFT) +#define TCR_TG0_64K (UL(1) << TCR_TG0_SHIFT) +#define TCR_TG0_16K (UL(2) << TCR_TG0_SHIFT) + +#define TCR_EPD1_SHIFT 23 +#define TCR_EPD1_MASK (UL(1) << TCR_EPD1_SHIFT) + +#define TCR_IPS_SHIFT 32 +#define TCR_IPS_MASK (UL(7) << TCR_IPS_SHIFT) +#define TCR_IPS_52_BITS (UL(6) << TCR_IPS_SHIFT) +#define TCR_IPS_48_BITS (UL(5) << TCR_IPS_SHIFT) +#define TCR_IPS_40_BITS (UL(2) << TCR_IPS_SHIFT) +#define TCR_IPS_36_BITS (UL(1) << TCR_IPS_SHIFT) + +#define TCR_TBI1 (UL(1) << 38) +#define TCR_HA (UL(1) << 39) +#define TCR_DS (UL(1) << 59) + +/* + * AttrIndx[2:0] encoding (mapping attributes defined in the MAIR* registers). + */ +#define PTE_ATTRINDX(t) ((t) << 2) +#define PTE_ATTRINDX_MASK GENMASK(4, 2) +#define PTE_ATTRINDX_SHIFT 2 + +#define PTE_VALID BIT(0) +#define PGD_TYPE_TABLE BIT(1) +#define PUD_TYPE_TABLE BIT(1) +#define PMD_TYPE_TABLE BIT(1) +#define PTE_TYPE_PAGE BIT(1) + +#define PTE_SHARED (UL(3) << 8) /* SH[1:0], inner shareable */ +#define PTE_AF BIT(10) + +#define PTE_ADDR_MASK(page_shift) GENMASK(47, (page_shift)) +#define PTE_ADDR_51_48 GENMASK(15, 12) +#define PTE_ADDR_51_48_SHIFT 12 +#define PTE_ADDR_MASK_LPA2(page_shift) GENMASK(49, (page_shift)) +#define PTE_ADDR_51_50_LPA2 GENMASK(9, 8) +#define PTE_ADDR_51_50_LPA2_SHIFT 8 + void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init); -struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, +struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, u32 vcpu_id, struct kvm_vcpu_init *init, void *guest_code); struct ex_regs { @@ -102,14 +167,8 @@ enum { (v) == VECTOR_SYNC_LOWER_64 || \ (v) == VECTOR_SYNC_LOWER_32) -/* Access flag */ -#define PTE_AF (1ULL << 10) - -/* Access flag update enable/disable */ -#define TCR_EL1_HA (1ULL << 39) - -void aarch64_get_supported_page_sizes(uint32_t ipa, uint32_t *ipa4k, - uint32_t *ipa16k, uint32_t *ipa64k); +void aarch64_get_supported_page_sizes(u32 ipa, u32 *ipa4k, + u32 *ipa16k, u32 *ipa64k); void vm_init_descriptor_tables(struct kvm_vm *vm); void vcpu_init_descriptor_tables(struct kvm_vcpu *vcpu); @@ -120,7 +179,8 @@ void vm_install_exception_handler(struct kvm_vm *vm, void vm_install_sync_handler(struct kvm_vm *vm, int vector, int ec, handler_fn handler); -uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva); +u64 *virt_get_pte_hva_at_level(struct kvm_vm *vm, gva_t gva, int level); +u64 *virt_get_pte_hva(struct kvm_vm *vm, gva_t gva); static inline void cpu_relax(void) { @@ -199,6 +259,16 @@ static inline void local_irq_disable(void) asm volatile("msr daifset, #3" : : : "memory"); } +static inline void local_serror_enable(void) +{ + asm volatile("msr daifclr, #4" : : : "memory"); +} + +static inline void local_serror_disable(void) +{ + asm volatile("msr daifset, #4" : : : "memory"); +} + /** * struct arm_smccc_res - Result from SMC/HVC call * @a0-a3 result values from registers 0 to 3 @@ -217,9 +287,9 @@ struct arm_smccc_res { * @res: pointer to write the return values from registers x0-x3 * */ -void smccc_hvc(uint32_t function_id, uint64_t arg0, uint64_t arg1, - uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5, - uint64_t arg6, struct arm_smccc_res *res); +void smccc_hvc(u32 function_id, u64 arg0, u64 arg1, + u64 arg2, u64 arg3, u64 arg4, u64 arg5, + u64 arg6, struct arm_smccc_res *res); /** * smccc_smc - Invoke a SMCCC function using the smc conduit @@ -228,11 +298,94 @@ void smccc_hvc(uint32_t function_id, uint64_t arg0, uint64_t arg1, * @res: pointer to write the return values from registers x0-x3 * */ -void smccc_smc(uint32_t function_id, uint64_t arg0, uint64_t arg1, - uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5, - uint64_t arg6, struct arm_smccc_res *res); +void smccc_smc(u32 function_id, u64 arg0, u64 arg1, + u64 arg2, u64 arg3, u64 arg4, u64 arg5, + u64 arg6, struct arm_smccc_res *res); /* Execute a Wait For Interrupt instruction. */ void wfi(void); +void test_wants_mte(void); +void test_disable_default_vgic(void); + +bool vm_supports_el2(struct kvm_vm *vm); + +static inline bool test_supports_el2(void) +{ + struct kvm_vm *vm = vm_create(1); + bool supported = vm_supports_el2(vm); + + kvm_vm_free(vm); + return supported; +} + +static inline bool vcpu_has_el2(struct kvm_vcpu *vcpu) +{ + return vcpu->init.features[0] & BIT(KVM_ARM_VCPU_HAS_EL2); +} + +#define MAPPED_EL2_SYSREG(el2, el1) \ + case SYS_##el1: \ + if (vcpu_has_el2(vcpu)) \ + alias = SYS_##el2; \ + break + + +static __always_inline u64 ctxt_reg_alias(struct kvm_vcpu *vcpu, u32 encoding) +{ + u32 alias = encoding; + + BUILD_BUG_ON(!__builtin_constant_p(encoding)); + + switch (encoding) { + MAPPED_EL2_SYSREG(SCTLR_EL2, SCTLR_EL1); + MAPPED_EL2_SYSREG(CPTR_EL2, CPACR_EL1); + MAPPED_EL2_SYSREG(TTBR0_EL2, TTBR0_EL1); + MAPPED_EL2_SYSREG(TTBR1_EL2, TTBR1_EL1); + MAPPED_EL2_SYSREG(TCR_EL2, TCR_EL1); + MAPPED_EL2_SYSREG(VBAR_EL2, VBAR_EL1); + MAPPED_EL2_SYSREG(AFSR0_EL2, AFSR0_EL1); + MAPPED_EL2_SYSREG(AFSR1_EL2, AFSR1_EL1); + MAPPED_EL2_SYSREG(ESR_EL2, ESR_EL1); + MAPPED_EL2_SYSREG(FAR_EL2, FAR_EL1); + MAPPED_EL2_SYSREG(MAIR_EL2, MAIR_EL1); + MAPPED_EL2_SYSREG(TCR2_EL2, TCR2_EL1); + MAPPED_EL2_SYSREG(PIR_EL2, PIR_EL1); + MAPPED_EL2_SYSREG(PIRE0_EL2, PIRE0_EL1); + MAPPED_EL2_SYSREG(POR_EL2, POR_EL1); + MAPPED_EL2_SYSREG(AMAIR_EL2, AMAIR_EL1); + MAPPED_EL2_SYSREG(ELR_EL2, ELR_EL1); + MAPPED_EL2_SYSREG(SPSR_EL2, SPSR_EL1); + MAPPED_EL2_SYSREG(ZCR_EL2, ZCR_EL1); + MAPPED_EL2_SYSREG(CONTEXTIDR_EL2, CONTEXTIDR_EL1); + MAPPED_EL2_SYSREG(SCTLR2_EL2, SCTLR2_EL1); + MAPPED_EL2_SYSREG(CNTHCTL_EL2, CNTKCTL_EL1); + case SYS_SP_EL1: + if (!vcpu_has_el2(vcpu)) + return ARM64_CORE_REG(sp_el1); + + alias = SYS_SP_EL2; + break; + default: + BUILD_BUG(); + } + + return KVM_ARM64_SYS_REG(alias); +} + +void kvm_get_default_vcpu_target(struct kvm_vm *vm, struct kvm_vcpu_init *init); + +static inline unsigned int get_current_el(void) +{ + return (read_sysreg(CurrentEL) >> 2) & 0x3; +} + +#define do_smccc(...) \ +do { \ + if (get_current_el() == 2) \ + smccc_smc(__VA_ARGS__); \ + else \ + smccc_hvc(__VA_ARGS__); \ +} while (0) + #endif /* SELFTEST_KVM_PROCESSOR_H */ diff --git a/tools/testing/selftests/kvm/include/arm64/ucall.h b/tools/testing/selftests/kvm/include/arm64/ucall.h index 4ec801f37f00..2210d3d94c40 100644 --- a/tools/testing/selftests/kvm/include/arm64/ucall.h +++ b/tools/testing/selftests/kvm/include/arm64/ucall.h @@ -10,9 +10,9 @@ * ucall_exit_mmio_addr holds per-VM values (global data is duplicated by each * VM), it must not be accessed from host code. */ -extern vm_vaddr_t *ucall_exit_mmio_addr; +extern gva_t *ucall_exit_mmio_addr; -static inline void ucall_arch_do_ucall(vm_vaddr_t uc) +static inline void ucall_arch_do_ucall(gva_t uc) { WRITE_ONCE(*ucall_exit_mmio_addr, uc); } diff --git a/tools/testing/selftests/kvm/include/arm64/vgic.h b/tools/testing/selftests/kvm/include/arm64/vgic.h index c481d0c00a5d..1f8b04373987 100644 --- a/tools/testing/selftests/kvm/include/arm64/vgic.h +++ b/tools/testing/selftests/kvm/include/arm64/vgic.h @@ -11,24 +11,27 @@ #include "kvm_util.h" #define REDIST_REGION_ATTR_ADDR(count, base, flags, index) \ - (((uint64_t)(count) << 52) | \ - ((uint64_t)((base) >> 16) << 16) | \ - ((uint64_t)(flags) << 12) | \ + (((u64)(count) << 52) | \ + ((u64)((base) >> 16) << 16) | \ + ((u64)(flags) << 12) | \ index) -int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs); +bool kvm_supports_vgic_v3(void); +int __vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, u32 nr_irqs); +void __vgic_v3_init(int fd); +int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, u32 nr_irqs); #define VGIC_MAX_RESERVED 1023 -void kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level); -int _kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level); +void kvm_irq_set_level_info(int gic_fd, u32 intid, int level); +int _kvm_irq_set_level_info(int gic_fd, u32 intid, int level); -void kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level); -int _kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level); +void kvm_arm_irq_line(struct kvm_vm *vm, u32 intid, int level); +int _kvm_arm_irq_line(struct kvm_vm *vm, u32 intid, int level); /* The vcpu arg only applies to private interrupts. */ -void kvm_irq_write_ispendr(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu); -void kvm_irq_write_isactiver(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu); +void kvm_irq_write_ispendr(int gic_fd, u32 intid, struct kvm_vcpu *vcpu); +void kvm_irq_write_isactiver(int gic_fd, u32 intid, struct kvm_vcpu *vcpu); #define KVM_IRQCHIP_NUM_PINS (1020 - 32) diff --git a/tools/testing/selftests/kvm/include/kvm_syscalls.h b/tools/testing/selftests/kvm/include/kvm_syscalls.h new file mode 100644 index 000000000000..067a4c9cf452 --- /dev/null +++ b/tools/testing/selftests/kvm/include/kvm_syscalls.h @@ -0,0 +1,92 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef SELFTEST_KVM_SYSCALLS_H +#define SELFTEST_KVM_SYSCALLS_H + +/* + * Include both the kernel and libc versions of mman.h. The kernel provides + * the most up-to-date flags and definitions, while libc provides the syscall + * wrappers tests expect. + */ +#include <linux/mman.h> + +#include <sys/mman.h> +#include <sys/syscall.h> + +#include <test_util.h> + +#define MAP_ARGS0(m,...) +#define MAP_ARGS1(m,t,a,...) m(t,a) +#define MAP_ARGS2(m,t,a,...) m(t,a), MAP_ARGS1(m,__VA_ARGS__) +#define MAP_ARGS3(m,t,a,...) m(t,a), MAP_ARGS2(m,__VA_ARGS__) +#define MAP_ARGS4(m,t,a,...) m(t,a), MAP_ARGS3(m,__VA_ARGS__) +#define MAP_ARGS5(m,t,a,...) m(t,a), MAP_ARGS4(m,__VA_ARGS__) +#define MAP_ARGS6(m,t,a,...) m(t,a), MAP_ARGS5(m,__VA_ARGS__) +#define MAP_ARGS(n,...) MAP_ARGS##n(__VA_ARGS__) + +#define __DECLARE_ARGS(t, a) t a +#define __UNPACK_ARGS(t, a) a + +#define DECLARE_ARGS(nr_args, args...) MAP_ARGS(nr_args, __DECLARE_ARGS, args) +#define UNPACK_ARGS(nr_args, args...) MAP_ARGS(nr_args, __UNPACK_ARGS, args) + +#define __KVM_SYSCALL_ERROR(_name, _ret) \ + "%s failed, rc: %i errno: %i (%s)", (_name), (_ret), errno, strerror(errno) + +/* Define a kvm_<syscall>() API to assert success. */ +#define __KVM_SYSCALL_DEFINE(name, nr_args, args...) \ +static inline void kvm_##name(DECLARE_ARGS(nr_args, args)) \ +{ \ + int r; \ + \ + r = name(UNPACK_ARGS(nr_args, args)); \ + TEST_ASSERT(!r, __KVM_SYSCALL_ERROR(#name, r)); \ +} + +/* + * Macro to define syscall APIs, either because KVM selftests doesn't link to + * the standard library, e.g. libnuma, or because there is no library that yet + * provides the syscall. These + */ +#define KVM_SYSCALL_DEFINE(name, nr_args, args...) \ +static inline long name(DECLARE_ARGS(nr_args, args)) \ +{ \ + return syscall(__NR_##name, UNPACK_ARGS(nr_args, args)); \ +} \ +__KVM_SYSCALL_DEFINE(name, nr_args, args) + +/* + * Special case mmap(), as KVM selftest rarely/never specific an address, + * rarely specify an offset, and because the unique return code requires + * special handling anyways. + */ +static inline void *__kvm_mmap(size_t size, int prot, int flags, int fd, + off_t offset) +{ + void *mem; + + mem = mmap(NULL, size, prot, flags, fd, offset); + TEST_ASSERT(mem != MAP_FAILED, __KVM_SYSCALL_ERROR("mmap()", + (int)(unsigned long)MAP_FAILED)); + return mem; +} + +static inline void *kvm_mmap(size_t size, int prot, int flags, int fd) +{ + return __kvm_mmap(size, prot, flags, fd, 0); +} + +static inline int kvm_dup(int fd) +{ + int new_fd = dup(fd); + + TEST_ASSERT(new_fd >= 0, __KVM_SYSCALL_ERROR("dup()", new_fd)); + return new_fd; +} + +__KVM_SYSCALL_DEFINE(munmap, 2, void *, mem, size_t, size); +__KVM_SYSCALL_DEFINE(close, 1, int, fd); +__KVM_SYSCALL_DEFINE(fallocate, 4, int, fd, int, mode, loff_t, offset, loff_t, len); +__KVM_SYSCALL_DEFINE(ftruncate, 2, unsigned int, fd, off_t, length); +__KVM_SYSCALL_DEFINE(madvise, 3, void *, addr, size_t, length, int, advice); + +#endif /* SELFTEST_KVM_SYSCALLS_H */ diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 4c4e5a847f67..2ecaaa0e9965 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -18,8 +18,12 @@ #include <asm/atomic.h> #include <asm/kvm.h> +#include <sys/eventfd.h> #include <sys/ioctl.h> +#include <pthread.h> + +#include "kvm_syscalls.h" #include "kvm_util_arch.h" #include "kvm_util_types.h" #include "sparsebit.h" @@ -46,18 +50,28 @@ struct userspace_mem_region { struct hlist_node slot_node; }; +struct kvm_binary_stats { + int fd; + struct kvm_stats_header header; + struct kvm_stats_desc *desc; +}; + struct kvm_vcpu { struct list_head list; - uint32_t id; + u32 id; int fd; struct kvm_vm *vm; struct kvm_run *run; #ifdef __x86_64__ struct kvm_cpuid2 *cpuid; #endif +#ifdef __aarch64__ + struct kvm_vcpu_init init; +#endif + struct kvm_binary_stats stats; struct kvm_dirty_gfn *dirty_gfns; - uint32_t fetch_index; - uint32_t dirty_gfns_count; + u32 fetch_index; + u32 dirty_gfns_count; }; struct userspace_mem_regions { @@ -74,42 +88,51 @@ enum kvm_mem_region_type { NR_MEM_REGIONS, }; +struct kvm_mmu { + bool pgd_created; + u64 pgd; + int pgtable_levels; + + struct kvm_mmu_arch arch; +}; + struct kvm_vm { int mode; unsigned long type; int kvm_fd; int fd; - unsigned int pgtable_levels; unsigned int page_size; unsigned int page_shift; unsigned int pa_bits; unsigned int va_bits; - uint64_t max_gfn; + u64 max_gfn; struct list_head vcpus; struct userspace_mem_regions regions; struct sparsebit *vpages_valid; struct sparsebit *vpages_mapped; bool has_irqchip; - bool pgd_created; - vm_paddr_t ucall_mmio_addr; - vm_paddr_t pgd; - vm_vaddr_t handlers; - uint32_t dirty_ring_size; - uint64_t gpa_tag_mask; + gpa_t ucall_mmio_addr; + gva_t handlers; + u32 dirty_ring_size; + gpa_t gpa_tag_mask; + + /* + * "mmu" is the guest's stage-1, with a short name because the vast + * majority of tests only care about the stage-1 MMU. + */ + struct kvm_mmu mmu; + struct kvm_mmu stage2_mmu; struct kvm_vm_arch arch; - /* Cache of information for binary stats interface */ - int stats_fd; - struct kvm_stats_header stats_header; - struct kvm_stats_desc *stats_desc; + struct kvm_binary_stats stats; /* * KVM region slots. These are the default memslots used by page * allocators, e.g., lib/elf uses the memslots[MEM_REGION_CODE] * memslot. */ - uint32_t memslots[NR_MEM_REGIONS]; + u32 memslots[NR_MEM_REGIONS]; }; struct vcpu_reg_sublist { @@ -141,7 +164,7 @@ struct vcpu_reg_list { else struct userspace_mem_region * -memslot2region(struct kvm_vm *vm, uint32_t memslot); +memslot2region(struct kvm_vm *vm, u32 memslot); static inline struct userspace_mem_region *vm_get_mem_region(struct kvm_vm *vm, enum kvm_mem_region_type type) @@ -167,24 +190,36 @@ enum vm_guest_mode { VM_MODE_P40V48_4K, VM_MODE_P40V48_16K, VM_MODE_P40V48_64K, - VM_MODE_PXXV48_4K, /* For 48bits VA but ANY bits PA */ + VM_MODE_PXXVYY_4K, /* For 48-bit or 57-bit VA, depending on host support */ VM_MODE_P47V64_4K, VM_MODE_P44V64_4K, VM_MODE_P36V48_4K, VM_MODE_P36V48_16K, VM_MODE_P36V48_64K, + VM_MODE_P47V47_16K, VM_MODE_P36V47_16K, + + VM_MODE_P56V57_4K, /* For riscv64 */ + VM_MODE_P56V48_4K, + VM_MODE_P56V39_4K, + VM_MODE_P50V57_4K, + VM_MODE_P50V48_4K, + VM_MODE_P50V39_4K, + VM_MODE_P41V57_4K, + VM_MODE_P41V48_4K, + VM_MODE_P41V39_4K, + NUM_VM_MODES, }; struct vm_shape { - uint32_t type; - uint8_t mode; - uint8_t pad0; - uint16_t pad1; + u32 type; + u8 mode; + u8 pad0; + u16 pad1; }; -kvm_static_assert(sizeof(struct vm_shape) == sizeof(uint64_t)); +kvm_static_assert(sizeof(struct vm_shape) == sizeof(u64)); #define VM_TYPE_DEFAULT 0 @@ -198,17 +233,17 @@ kvm_static_assert(sizeof(struct vm_shape) == sizeof(uint64_t)); shape; \ }) -#if defined(__aarch64__) - extern enum vm_guest_mode vm_mode_default; +#if defined(__aarch64__) + #define VM_MODE_DEFAULT vm_mode_default #define MIN_PAGE_SHIFT 12U #define ptes_per_page(page_size) ((page_size) / 8) #elif defined(__x86_64__) -#define VM_MODE_DEFAULT VM_MODE_PXXV48_4K +#define VM_MODE_DEFAULT VM_MODE_PXXVYY_4K #define MIN_PAGE_SHIFT 12U #define ptes_per_page(page_size) ((page_size) / 8) @@ -224,7 +259,12 @@ extern enum vm_guest_mode vm_mode_default; #error "RISC-V 32-bit kvm selftests not supported" #endif -#define VM_MODE_DEFAULT VM_MODE_P40V48_4K +#define VM_MODE_DEFAULT vm_mode_default +#define MIN_PAGE_SHIFT 12U +#define ptes_per_page(page_size) ((page_size) / 8) + +#elif defined(__loongarch__) +#define VM_MODE_DEFAULT VM_MODE_P47V47_16K #define MIN_PAGE_SHIFT 12U #define ptes_per_page(page_size) ((page_size) / 8) @@ -243,16 +283,22 @@ struct vm_guest_mode_params { }; extern const struct vm_guest_mode_params vm_guest_mode_params[]; +int __open_path_or_exit(const char *path, int flags, const char *enoent_help); int open_path_or_exit(const char *path, int flags); int open_kvm_dev_path_or_exit(void); -bool get_kvm_param_bool(const char *param); -bool get_kvm_intel_param_bool(const char *param); -bool get_kvm_amd_param_bool(const char *param); +int kvm_get_module_param_integer(const char *module_name, const char *param); +bool kvm_get_module_param_bool(const char *module_name, const char *param); -int get_kvm_param_integer(const char *param); -int get_kvm_intel_param_integer(const char *param); -int get_kvm_amd_param_integer(const char *param); +static inline bool get_kvm_param_bool(const char *param) +{ + return kvm_get_module_param_bool("kvm", param); +} + +static inline int get_kvm_param_integer(const char *param) +{ + return kvm_get_module_param_integer("kvm", param); +} unsigned int kvm_check_cap(long cap); @@ -261,9 +307,6 @@ static inline bool kvm_has_cap(long cap) return kvm_check_cap(cap); } -#define __KVM_SYSCALL_ERROR(_name, _ret) \ - "%s failed, rc: %i errno: %i (%s)", (_name), (_ret), errno, strerror(errno) - /* * Use the "inner", double-underscore macro when reporting errors from within * other macros so that the name of ioctl() and not its literal numeric value @@ -361,21 +404,22 @@ static inline int vm_check_cap(struct kvm_vm *vm, long cap) return ret; } -static inline int __vm_enable_cap(struct kvm_vm *vm, uint32_t cap, uint64_t arg0) +static inline int __vm_enable_cap(struct kvm_vm *vm, u32 cap, u64 arg0) { struct kvm_enable_cap enable_cap = { .cap = cap, .args = { arg0 } }; return __vm_ioctl(vm, KVM_ENABLE_CAP, &enable_cap); } -static inline void vm_enable_cap(struct kvm_vm *vm, uint32_t cap, uint64_t arg0) + +static inline void vm_enable_cap(struct kvm_vm *vm, u32 cap, u64 arg0) { struct kvm_enable_cap enable_cap = { .cap = cap, .args = { arg0 } }; vm_ioctl(vm, KVM_ENABLE_CAP, &enable_cap); } -static inline void vm_set_memory_attributes(struct kvm_vm *vm, uint64_t gpa, - uint64_t size, uint64_t attributes) +static inline void vm_set_memory_attributes(struct kvm_vm *vm, gpa_t gpa, + u64 size, u64 attributes) { struct kvm_memory_attributes attr = { .attributes = attributes, @@ -395,35 +439,35 @@ static inline void vm_set_memory_attributes(struct kvm_vm *vm, uint64_t gpa, } -static inline void vm_mem_set_private(struct kvm_vm *vm, uint64_t gpa, - uint64_t size) +static inline void vm_mem_set_private(struct kvm_vm *vm, gpa_t gpa, + u64 size) { vm_set_memory_attributes(vm, gpa, size, KVM_MEMORY_ATTRIBUTE_PRIVATE); } -static inline void vm_mem_set_shared(struct kvm_vm *vm, uint64_t gpa, - uint64_t size) +static inline void vm_mem_set_shared(struct kvm_vm *vm, gpa_t gpa, + u64 size) { vm_set_memory_attributes(vm, gpa, size, 0); } -void vm_guest_mem_fallocate(struct kvm_vm *vm, uint64_t gpa, uint64_t size, +void vm_guest_mem_fallocate(struct kvm_vm *vm, gpa_t gpa, u64 size, bool punch_hole); -static inline void vm_guest_mem_punch_hole(struct kvm_vm *vm, uint64_t gpa, - uint64_t size) +static inline void vm_guest_mem_punch_hole(struct kvm_vm *vm, gpa_t gpa, + u64 size) { vm_guest_mem_fallocate(vm, gpa, size, true); } -static inline void vm_guest_mem_allocate(struct kvm_vm *vm, uint64_t gpa, - uint64_t size) +static inline void vm_guest_mem_allocate(struct kvm_vm *vm, gpa_t gpa, + u64 size) { vm_guest_mem_fallocate(vm, gpa, size, false); } -void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size); -const char *vm_guest_mode_string(uint32_t i); +void vm_enable_dirty_ring(struct kvm_vm *vm, u32 ring_size); +const char *vm_guest_mode_string(u32 i); void kvm_vm_free(struct kvm_vm *vmp); void kvm_vm_restart(struct kvm_vm *vmp); @@ -431,7 +475,7 @@ void kvm_vm_release(struct kvm_vm *vmp); void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename); int kvm_memfd_alloc(size_t size, bool hugepages); -void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent); +void vm_dump(FILE *stream, struct kvm_vm *vm, u8 indent); static inline void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log) { @@ -441,7 +485,7 @@ static inline void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log) } static inline void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log, - uint64_t first_page, uint32_t num_pages) + u64 first_page, u32 num_pages) { struct kvm_clear_dirty_log args = { .dirty_bitmap = log, @@ -453,14 +497,14 @@ static inline void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log vm_ioctl(vm, KVM_CLEAR_DIRTY_LOG, &args); } -static inline uint32_t kvm_vm_reset_dirty_ring(struct kvm_vm *vm) +static inline u32 kvm_vm_reset_dirty_ring(struct kvm_vm *vm) { return __vm_ioctl(vm, KVM_RESET_DIRTY_RINGS, NULL); } static inline void kvm_vm_register_coalesced_io(struct kvm_vm *vm, - uint64_t address, - uint64_t size, bool pio) + u64 address, + u64 size, bool pio) { struct kvm_coalesced_mmio_zone zone = { .addr = address, @@ -472,8 +516,8 @@ static inline void kvm_vm_register_coalesced_io(struct kvm_vm *vm, } static inline void kvm_vm_unregister_coalesced_io(struct kvm_vm *vm, - uint64_t address, - uint64_t size, bool pio) + u64 address, + u64 size, bool pio) { struct kvm_coalesced_mmio_zone zone = { .addr = address, @@ -492,6 +536,44 @@ static inline int vm_get_stats_fd(struct kvm_vm *vm) return fd; } +static inline int __kvm_irqfd(struct kvm_vm *vm, u32 gsi, int eventfd, + u32 flags) +{ + struct kvm_irqfd irqfd = { + .fd = eventfd, + .gsi = gsi, + .flags = flags, + .resamplefd = -1, + }; + + return __vm_ioctl(vm, KVM_IRQFD, &irqfd); +} + +static inline void kvm_irqfd(struct kvm_vm *vm, u32 gsi, int eventfd, u32 flags) +{ + int ret = __kvm_irqfd(vm, gsi, eventfd, flags); + + TEST_ASSERT_VM_VCPU_IOCTL(!ret, KVM_IRQFD, ret, vm); +} + +static inline void kvm_assign_irqfd(struct kvm_vm *vm, u32 gsi, int eventfd) +{ + kvm_irqfd(vm, gsi, eventfd, 0); +} + +static inline void kvm_deassign_irqfd(struct kvm_vm *vm, u32 gsi, int eventfd) +{ + kvm_irqfd(vm, gsi, eventfd, KVM_IRQFD_FLAG_DEASSIGN); +} + +static inline int kvm_new_eventfd(void) +{ + int fd = eventfd(0, 0); + + TEST_ASSERT(fd >= 0, __KVM_SYSCALL_ERROR("eventfd()", fd)); + return fd; +} + static inline void read_stats_header(int stats_fd, struct kvm_stats_header *header) { ssize_t ret; @@ -528,24 +610,62 @@ static inline struct kvm_stats_desc *get_stats_descriptor(struct kvm_stats_desc } void read_stat_data(int stats_fd, struct kvm_stats_header *header, - struct kvm_stats_desc *desc, uint64_t *data, + struct kvm_stats_desc *desc, u64 *data, size_t max_elements); -void __vm_get_stat(struct kvm_vm *vm, const char *stat_name, uint64_t *data, - size_t max_elements); +void kvm_get_stat(struct kvm_binary_stats *stats, const char *name, + u64 *data, size_t max_elements); -static inline uint64_t vm_get_stat(struct kvm_vm *vm, const char *stat_name) +#define __get_stat(stats, stat) \ +({ \ + u64 data; \ + \ + kvm_get_stat(stats, #stat, &data, 1); \ + data; \ +}) + +#define vm_get_stat(vm, stat) __get_stat(&(vm)->stats, stat) +#define vcpu_get_stat(vcpu, stat) __get_stat(&(vcpu)->stats, stat) + +static inline bool read_smt_control(char *buf, size_t buf_size) +{ + FILE *f = fopen("/sys/devices/system/cpu/smt/control", "r"); + bool ret; + + if (!f) + return false; + + ret = fread(buf, sizeof(*buf), buf_size, f) > 0; + fclose(f); + + return ret; +} + +static inline bool is_smt_possible(void) +{ + char buf[16]; + + if (read_smt_control(buf, sizeof(buf)) && + (!strncmp(buf, "forceoff", 8) || !strncmp(buf, "notsupported", 12))) + return false; + + return true; +} + +static inline bool is_smt_on(void) { - uint64_t data; + char buf[16]; + + if (read_smt_control(buf, sizeof(buf)) && !strncmp(buf, "on", 2)) + return true; - __vm_get_stat(vm, stat_name, &data, 1); - return data; + return false; } void vm_create_irqchip(struct kvm_vm *vm); -static inline int __vm_create_guest_memfd(struct kvm_vm *vm, uint64_t size, - uint64_t flags) +static inline int __vm_create_guest_memfd(struct kvm_vm *vm, u64 size, + u64 flags) { struct kvm_create_guest_memfd guest_memfd = { .size = size, @@ -555,8 +675,8 @@ static inline int __vm_create_guest_memfd(struct kvm_vm *vm, uint64_t size, return __vm_ioctl(vm, KVM_CREATE_GUEST_MEMFD, &guest_memfd); } -static inline int vm_create_guest_memfd(struct kvm_vm *vm, uint64_t size, - uint64_t flags) +static inline int vm_create_guest_memfd(struct kvm_vm *vm, u64 size, + u64 flags) { int fd = __vm_create_guest_memfd(vm, size, flags); @@ -564,24 +684,23 @@ static inline int vm_create_guest_memfd(struct kvm_vm *vm, uint64_t size, return fd; } -void vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, - uint64_t gpa, uint64_t size, void *hva); -int __vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, - uint64_t gpa, uint64_t size, void *hva); -void vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags, - uint64_t gpa, uint64_t size, void *hva, - uint32_t guest_memfd, uint64_t guest_memfd_offset); -int __vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags, - uint64_t gpa, uint64_t size, void *hva, - uint32_t guest_memfd, uint64_t guest_memfd_offset); +void vm_set_user_memory_region(struct kvm_vm *vm, u32 slot, u32 flags, + gpa_t gpa, u64 size, void *hva); +int __vm_set_user_memory_region(struct kvm_vm *vm, u32 slot, u32 flags, + gpa_t gpa, u64 size, void *hva); +void vm_set_user_memory_region2(struct kvm_vm *vm, u32 slot, u32 flags, + gpa_t gpa, u64 size, void *hva, + u32 guest_memfd, u64 guest_memfd_offset); +int __vm_set_user_memory_region2(struct kvm_vm *vm, u32 slot, u32 flags, + gpa_t gpa, u64 size, void *hva, + u32 guest_memfd, u64 guest_memfd_offset); void vm_userspace_mem_region_add(struct kvm_vm *vm, - enum vm_mem_backing_src_type src_type, - uint64_t guest_paddr, uint32_t slot, uint64_t npages, - uint32_t flags); + enum vm_mem_backing_src_type src_type, + gpa_t gpa, u32 slot, u64 npages, u32 flags); void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, - uint64_t guest_paddr, uint32_t slot, uint64_t npages, - uint32_t flags, int guest_memfd_fd, uint64_t guest_memfd_offset); + gpa_t gpa, u32 slot, u64 npages, u32 flags, + int guest_memfd_fd, u64 guest_memfd_offset); #ifndef vm_arch_has_protected_memory static inline bool vm_arch_has_protected_memory(struct kvm_vm *vm) @@ -590,35 +709,34 @@ static inline bool vm_arch_has_protected_memory(struct kvm_vm *vm) } #endif -void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags); -void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa); -void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot); -struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id); -void vm_populate_vaddr_bitmap(struct kvm_vm *vm); -vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min); -vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min); -vm_vaddr_t __vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min, - enum kvm_mem_region_type type); -vm_vaddr_t vm_vaddr_alloc_shared(struct kvm_vm *vm, size_t sz, - vm_vaddr_t vaddr_min, - enum kvm_mem_region_type type); -vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages); -vm_vaddr_t __vm_vaddr_alloc_page(struct kvm_vm *vm, - enum kvm_mem_region_type type); -vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm); - -void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, +void vm_mem_region_set_flags(struct kvm_vm *vm, u32 slot, u32 flags); +void vm_mem_region_reload(struct kvm_vm *vm, u32 slot); +void vm_mem_region_move(struct kvm_vm *vm, u32 slot, u64 new_gpa); +void vm_mem_region_delete(struct kvm_vm *vm, u32 slot); +struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, u32 vcpu_id); +void vm_populate_gva_bitmap(struct kvm_vm *vm); +gva_t vm_unused_gva_gap(struct kvm_vm *vm, size_t sz, gva_t min_gva); +gva_t vm_alloc(struct kvm_vm *vm, size_t sz, gva_t min_gva); +gva_t __vm_alloc(struct kvm_vm *vm, size_t sz, gva_t min_gva, + enum kvm_mem_region_type type); +gva_t vm_alloc_shared(struct kvm_vm *vm, size_t sz, gva_t min_gva, + enum kvm_mem_region_type type); +gva_t vm_alloc_pages(struct kvm_vm *vm, int nr_pages); +gva_t __vm_alloc_page(struct kvm_vm *vm, enum kvm_mem_region_type type); +gva_t vm_alloc_page(struct kvm_vm *vm); + +void virt_map(struct kvm_vm *vm, gva_t gva, gpa_t gpa, unsigned int npages); -void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa); -void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva); -vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva); -void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa); +void *addr_gpa2hva(struct kvm_vm *vm, gpa_t gpa); +void *addr_gva2hva(struct kvm_vm *vm, gva_t gva); +gpa_t addr_hva2gpa(struct kvm_vm *vm, void *hva); +void *addr_gpa2alias(struct kvm_vm *vm, gpa_t gpa); #ifndef vcpu_arch_put_guest #define vcpu_arch_put_guest(mem, val) do { (mem) = (val); } while (0) #endif -static inline vm_paddr_t vm_untag_gpa(struct kvm_vm *vm, vm_paddr_t gpa) +static inline gpa_t vm_untag_gpa(struct kvm_vm *vm, gpa_t gpa) { return gpa & ~vm->gpa_tag_mask; } @@ -634,8 +752,8 @@ static inline int __vcpu_run(struct kvm_vcpu *vcpu) void vcpu_run_complete_io(struct kvm_vcpu *vcpu); struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vcpu *vcpu); -static inline void vcpu_enable_cap(struct kvm_vcpu *vcpu, uint32_t cap, - uint64_t arg0) +static inline void vcpu_enable_cap(struct kvm_vcpu *vcpu, u32 cap, + u64 arg0) { struct kvm_enable_cap enable_cap = { .cap = cap, .args = { arg0 } }; @@ -690,31 +808,34 @@ static inline void vcpu_fpu_set(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) vcpu_ioctl(vcpu, KVM_SET_FPU, fpu); } -static inline int __vcpu_get_reg(struct kvm_vcpu *vcpu, uint64_t id, void *addr) +static inline int __vcpu_get_reg(struct kvm_vcpu *vcpu, u64 id, void *addr) { - struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)addr }; + struct kvm_one_reg reg = { .id = id, .addr = (u64)addr }; return __vcpu_ioctl(vcpu, KVM_GET_ONE_REG, ®); } -static inline int __vcpu_set_reg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t val) + +static inline int __vcpu_set_reg(struct kvm_vcpu *vcpu, u64 id, u64 val) { - struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)&val }; + struct kvm_one_reg reg = { .id = id, .addr = (u64)&val }; return __vcpu_ioctl(vcpu, KVM_SET_ONE_REG, ®); } -static inline uint64_t vcpu_get_reg(struct kvm_vcpu *vcpu, uint64_t id) + +static inline u64 vcpu_get_reg(struct kvm_vcpu *vcpu, u64 id) { - uint64_t val; - struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)&val }; + u64 val; + struct kvm_one_reg reg = { .id = id, .addr = (u64)&val }; TEST_ASSERT(KVM_REG_SIZE(id) <= sizeof(val), "Reg %lx too big", id); vcpu_ioctl(vcpu, KVM_GET_ONE_REG, ®); return val; } -static inline void vcpu_set_reg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t val) + +static inline void vcpu_set_reg(struct kvm_vcpu *vcpu, u64 id, u64 val) { - struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)&val }; + struct kvm_one_reg reg = { .id = id, .addr = (u64)&val }; TEST_ASSERT(KVM_REG_SIZE(id) <= sizeof(val), "Reg %lx too big", id); @@ -759,75 +880,75 @@ static inline int vcpu_get_stats_fd(struct kvm_vcpu *vcpu) return fd; } -int __kvm_has_device_attr(int dev_fd, uint32_t group, uint64_t attr); +int __kvm_has_device_attr(int dev_fd, u32 group, u64 attr); -static inline void kvm_has_device_attr(int dev_fd, uint32_t group, uint64_t attr) +static inline void kvm_has_device_attr(int dev_fd, u32 group, u64 attr) { int ret = __kvm_has_device_attr(dev_fd, group, attr); TEST_ASSERT(!ret, "KVM_HAS_DEVICE_ATTR failed, rc: %i errno: %i", ret, errno); } -int __kvm_device_attr_get(int dev_fd, uint32_t group, uint64_t attr, void *val); +int __kvm_device_attr_get(int dev_fd, u32 group, u64 attr, void *val); -static inline void kvm_device_attr_get(int dev_fd, uint32_t group, - uint64_t attr, void *val) +static inline void kvm_device_attr_get(int dev_fd, u32 group, + u64 attr, void *val) { int ret = __kvm_device_attr_get(dev_fd, group, attr, val); TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_GET_DEVICE_ATTR, ret)); } -int __kvm_device_attr_set(int dev_fd, uint32_t group, uint64_t attr, void *val); +int __kvm_device_attr_set(int dev_fd, u32 group, u64 attr, void *val); -static inline void kvm_device_attr_set(int dev_fd, uint32_t group, - uint64_t attr, void *val) +static inline void kvm_device_attr_set(int dev_fd, u32 group, + u64 attr, void *val) { int ret = __kvm_device_attr_set(dev_fd, group, attr, val); TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_SET_DEVICE_ATTR, ret)); } -static inline int __vcpu_has_device_attr(struct kvm_vcpu *vcpu, uint32_t group, - uint64_t attr) +static inline int __vcpu_has_device_attr(struct kvm_vcpu *vcpu, u32 group, + u64 attr) { return __kvm_has_device_attr(vcpu->fd, group, attr); } -static inline void vcpu_has_device_attr(struct kvm_vcpu *vcpu, uint32_t group, - uint64_t attr) +static inline void vcpu_has_device_attr(struct kvm_vcpu *vcpu, u32 group, + u64 attr) { kvm_has_device_attr(vcpu->fd, group, attr); } -static inline int __vcpu_device_attr_get(struct kvm_vcpu *vcpu, uint32_t group, - uint64_t attr, void *val) +static inline int __vcpu_device_attr_get(struct kvm_vcpu *vcpu, u32 group, + u64 attr, void *val) { return __kvm_device_attr_get(vcpu->fd, group, attr, val); } -static inline void vcpu_device_attr_get(struct kvm_vcpu *vcpu, uint32_t group, - uint64_t attr, void *val) +static inline void vcpu_device_attr_get(struct kvm_vcpu *vcpu, u32 group, + u64 attr, void *val) { kvm_device_attr_get(vcpu->fd, group, attr, val); } -static inline int __vcpu_device_attr_set(struct kvm_vcpu *vcpu, uint32_t group, - uint64_t attr, void *val) +static inline int __vcpu_device_attr_set(struct kvm_vcpu *vcpu, u32 group, + u64 attr, void *val) { return __kvm_device_attr_set(vcpu->fd, group, attr, val); } -static inline void vcpu_device_attr_set(struct kvm_vcpu *vcpu, uint32_t group, - uint64_t attr, void *val) +static inline void vcpu_device_attr_set(struct kvm_vcpu *vcpu, u32 group, + u64 attr, void *val) { kvm_device_attr_set(vcpu->fd, group, attr, val); } -int __kvm_test_create_device(struct kvm_vm *vm, uint64_t type); -int __kvm_create_device(struct kvm_vm *vm, uint64_t type); +int __kvm_test_create_device(struct kvm_vm *vm, u64 type); +int __kvm_create_device(struct kvm_vm *vm, u64 type); -static inline int kvm_create_device(struct kvm_vm *vm, uint64_t type) +static inline int kvm_create_device(struct kvm_vm *vm, u64 type) { int fd = __kvm_create_device(vm, type); @@ -841,9 +962,9 @@ void *vcpu_map_dirty_ring(struct kvm_vcpu *vcpu); * VM VCPU Args Set * * Input Args: - * vm - Virtual Machine + * vcpu - vCPU * num - number of arguments - * ... - arguments, each of type uint64_t + * ... - arguments, each of type u64 * * Output Args: None * @@ -851,40 +972,38 @@ void *vcpu_map_dirty_ring(struct kvm_vcpu *vcpu); * * Sets the first @num input parameters for the function at @vcpu's entry point, * per the C calling convention of the architecture, to the values given as - * variable args. Each of the variable args is expected to be of type uint64_t. + * variable args. Each of the variable args is expected to be of type u64. * The maximum @num can be is specific to the architecture. */ void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...); -void kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level); -int _kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level); +void kvm_irq_line(struct kvm_vm *vm, u32 irq, int level); +int _kvm_irq_line(struct kvm_vm *vm, u32 irq, int level); #define KVM_MAX_IRQ_ROUTES 4096 struct kvm_irq_routing *kvm_gsi_routing_create(void); void kvm_gsi_routing_irqchip_add(struct kvm_irq_routing *routing, - uint32_t gsi, uint32_t pin); + u32 gsi, u32 pin); int _kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing); void kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing); const char *exit_reason_str(unsigned int exit_reason); -vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min, - uint32_t memslot); -vm_paddr_t __vm_phy_pages_alloc(struct kvm_vm *vm, size_t num, - vm_paddr_t paddr_min, uint32_t memslot, - bool protected); -vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm); +gpa_t vm_phy_page_alloc(struct kvm_vm *vm, gpa_t min_gpa, u32 memslot); +gpa_t __vm_phy_pages_alloc(struct kvm_vm *vm, size_t num, gpa_t min_gpa, + u32 memslot, bool protected); +gpa_t vm_alloc_page_table(struct kvm_vm *vm); -static inline vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num, - vm_paddr_t paddr_min, uint32_t memslot) +static inline gpa_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num, + gpa_t min_gpa, u32 memslot) { /* * By default, allocate memory as protected for VMs that support * protected memory, as the majority of memory for such VMs is * protected, i.e. using shared memory is effectively opt-in. */ - return __vm_phy_pages_alloc(vm, num, paddr_min, memslot, + return __vm_phy_pages_alloc(vm, num, min_gpa, memslot, vm_arch_has_protected_memory(vm)); } @@ -895,8 +1014,8 @@ static inline vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num, * calculate the amount of memory needed for per-vCPU data, e.g. stacks. */ struct kvm_vm *____vm_create(struct vm_shape shape); -struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus, - uint64_t nr_extra_pages); +struct kvm_vm *__vm_create(struct vm_shape shape, u32 nr_runnable_vcpus, + u64 nr_extra_pages); static inline struct kvm_vm *vm_create_barebones(void) { @@ -913,16 +1032,16 @@ static inline struct kvm_vm *vm_create_barebones_type(unsigned long type) return ____vm_create(shape); } -static inline struct kvm_vm *vm_create(uint32_t nr_runnable_vcpus) +static inline struct kvm_vm *vm_create(u32 nr_runnable_vcpus) { return __vm_create(VM_SHAPE_DEFAULT, nr_runnable_vcpus, 0); } -struct kvm_vm *__vm_create_with_vcpus(struct vm_shape shape, uint32_t nr_vcpus, - uint64_t extra_mem_pages, +struct kvm_vm *__vm_create_with_vcpus(struct vm_shape shape, u32 nr_vcpus, + u64 extra_mem_pages, void *guest_code, struct kvm_vcpu *vcpus[]); -static inline struct kvm_vm *vm_create_with_vcpus(uint32_t nr_vcpus, +static inline struct kvm_vm *vm_create_with_vcpus(u32 nr_vcpus, void *guest_code, struct kvm_vcpu *vcpus[]) { @@ -933,7 +1052,7 @@ static inline struct kvm_vm *vm_create_with_vcpus(uint32_t nr_vcpus, struct kvm_vm *__vm_create_shape_with_one_vcpu(struct vm_shape shape, struct kvm_vcpu **vcpu, - uint64_t extra_mem_pages, + u64 extra_mem_pages, void *guest_code); /* @@ -941,7 +1060,7 @@ struct kvm_vm *__vm_create_shape_with_one_vcpu(struct vm_shape shape, * additional pages of guest memory. Returns the VM and vCPU (via out param). */ static inline struct kvm_vm *__vm_create_with_one_vcpu(struct kvm_vcpu **vcpu, - uint64_t extra_mem_pages, + u64 extra_mem_pages, void *guest_code) { return __vm_create_shape_with_one_vcpu(VM_SHAPE_DEFAULT, vcpu, @@ -963,9 +1082,38 @@ static inline struct kvm_vm *vm_create_shape_with_one_vcpu(struct vm_shape shape struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm); -void kvm_pin_this_task_to_pcpu(uint32_t pcpu); +void kvm_set_files_rlimit(u32 nr_vcpus); + +int __pin_task_to_cpu(pthread_t task, int cpu); + +static inline void pin_task_to_cpu(pthread_t task, int cpu) +{ + int r; + + r = __pin_task_to_cpu(task, cpu); + TEST_ASSERT(!r, "Failed to set thread affinity to pCPU '%u'", cpu); +} + +static inline int pin_task_to_any_cpu(pthread_t task) +{ + int cpu = sched_getcpu(); + + pin_task_to_cpu(task, cpu); + return cpu; +} + +static inline void pin_self_to_cpu(int cpu) +{ + pin_task_to_cpu(pthread_self(), cpu); +} + +static inline int pin_self_to_any_cpu(void) +{ + return pin_task_to_any_cpu(pthread_self()); +} + void kvm_print_vcpu_pinning_help(void); -void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[], +void kvm_parse_vcpu_pinning(const char *pcpus_string, u32 vcpu_to_pcpu[], int nr_vcpus); unsigned long vm_compute_max_gfn(struct kvm_vm *vm); @@ -977,20 +1125,16 @@ vm_adjust_num_guest_pages(enum vm_guest_mode mode, unsigned int num_guest_pages) { unsigned int n; n = vm_num_guest_pages(mode, vm_num_host_pages(mode, num_guest_pages)); -#ifdef __s390x__ - /* s390 requires 1M aligned guest sizes */ - n = (n + 255) & ~255; -#endif return n; } #define sync_global_to_guest(vm, g) ({ \ - typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \ + typeof(g) *_p = addr_gva2hva(vm, (gva_t)&(g)); \ memcpy(_p, &(g), sizeof(g)); \ }) #define sync_global_from_guest(vm, g) ({ \ - typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \ + typeof(g) *_p = addr_gva2hva(vm, (gva_t)&(g)); \ memcpy(&(g), _p, sizeof(g)); \ }) @@ -1001,7 +1145,7 @@ vm_adjust_num_guest_pages(enum vm_guest_mode mode, unsigned int num_guest_pages) * undesirable to change the host's copy of the global. */ #define write_guest_global(vm, g, val) ({ \ - typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \ + typeof(g) *_p = addr_gva2hva(vm, (gva_t)&(g)); \ typeof(g) _val = val; \ \ memcpy(_p, &(_val), sizeof(g)); \ @@ -1010,10 +1154,10 @@ vm_adjust_num_guest_pages(enum vm_guest_mode mode, unsigned int num_guest_pages) void assert_on_unhandled_exception(struct kvm_vcpu *vcpu); void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, - uint8_t indent); + u8 indent); static inline void vcpu_dump(FILE *stream, struct kvm_vcpu *vcpu, - uint8_t indent) + u8 indent) { vcpu_arch_dump(stream, vcpu, indent); } @@ -1025,10 +1169,10 @@ static inline void vcpu_dump(FILE *stream, struct kvm_vcpu *vcpu, * vm - Virtual Machine * vcpu_id - The id of the VCPU to add to the VM. */ -struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id); +struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, u32 vcpu_id); void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code); -static inline struct kvm_vcpu *vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, +static inline struct kvm_vcpu *vm_vcpu_add(struct kvm_vm *vm, u32 vcpu_id, void *guest_code) { struct kvm_vcpu *vcpu = vm_arch_vcpu_add(vm, vcpu_id); @@ -1039,10 +1183,10 @@ static inline struct kvm_vcpu *vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, } /* Re-create a vCPU after restarting a VM, e.g. for state save/restore tests. */ -struct kvm_vcpu *vm_arch_vcpu_recreate(struct kvm_vm *vm, uint32_t vcpu_id); +struct kvm_vcpu *vm_arch_vcpu_recreate(struct kvm_vm *vm, u32 vcpu_id); static inline struct kvm_vcpu *vm_vcpu_recreate(struct kvm_vm *vm, - uint32_t vcpu_id) + u32 vcpu_id) { return vm_arch_vcpu_recreate(vm, vcpu_id); } @@ -1057,26 +1201,15 @@ static inline void virt_pgd_alloc(struct kvm_vm *vm) } /* - * VM Virtual Page Map - * - * Input Args: - * vm - Virtual Machine - * vaddr - VM Virtual Address - * paddr - VM Physical Address - * memslot - Memory region slot for new virtual translation tables - * - * Output Args: None - * - * Return: None - * * Within @vm, creates a virtual translation for the page starting - * at @vaddr to the page starting at @paddr. + * at @gva to the page starting at @gpa. */ -void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr); +void virt_arch_pg_map(struct kvm_vm *vm, gva_t gva, gpa_t gpa); -static inline void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) +static inline void virt_pg_map(struct kvm_vm *vm, gva_t gva, gpa_t gpa) { - virt_arch_pg_map(vm, vaddr, paddr); + virt_arch_pg_map(vm, gva, gpa); + sparsebit_set(vm->vpages_mapped, gva >> vm->page_shift); } @@ -1095,9 +1228,9 @@ static inline void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr * Returns the VM physical address of the translated VM virtual * address given by @gva. */ -vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva); +gpa_t addr_arch_gva2gpa(struct kvm_vm *vm, gva_t gva); -static inline vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) +static inline gpa_t addr_gva2gpa(struct kvm_vm *vm, gva_t gva) { return addr_arch_gva2gpa(vm, gva); } @@ -1117,9 +1250,9 @@ static inline vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) * Dumps to the FILE stream given by @stream, the contents of all the * virtual translation tables for the VM given by @vm. */ -void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent); +void virt_arch_dump(FILE *stream, struct kvm_vm *vm, u8 indent); -static inline void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) +static inline void virt_dump(FILE *stream, struct kvm_vm *vm, u8 indent) { virt_arch_dump(stream, vm, indent); } @@ -1130,17 +1263,26 @@ static inline int __vm_disable_nx_huge_pages(struct kvm_vm *vm) return __vm_enable_cap(vm, KVM_CAP_VM_DISABLE_NX_HUGE_PAGES, 0); } +static inline u64 vm_page_align(struct kvm_vm *vm, u64 v) +{ + return (v + vm->page_size - 1) & ~(vm->page_size - 1); +} + /* - * Arch hook that is invoked via a constructor, i.e. before exeucting main(), + * Arch hook that is invoked via a constructor, i.e. before executing main(), * to allow for arch-specific setup that is common to all tests, e.g. computing * the default guest "mode". */ void kvm_selftest_arch_init(void); -void kvm_arch_vm_post_create(struct kvm_vm *vm); +void kvm_arch_vm_post_create(struct kvm_vm *vm, unsigned int nr_vcpus); +void kvm_arch_vm_finalize_vcpus(struct kvm_vm *vm); +void kvm_arch_vm_release(struct kvm_vm *vm); + +bool vm_is_gpa_protected(struct kvm_vm *vm, gpa_t gpa); -bool vm_is_gpa_protected(struct kvm_vm *vm, vm_paddr_t paddr); +u32 guest_get_vcpuid(void); -uint32_t guest_get_vcpuid(void); +bool kvm_arch_has_default_irqchip(void); #endif /* SELFTEST_KVM_UTIL_H */ diff --git a/tools/testing/selftests/kvm/include/kvm_util_types.h b/tools/testing/selftests/kvm/include/kvm_util_types.h index ec787b97cf18..ed0087e31674 100644 --- a/tools/testing/selftests/kvm/include/kvm_util_types.h +++ b/tools/testing/selftests/kvm/include/kvm_util_types.h @@ -2,6 +2,8 @@ #ifndef SELFTEST_KVM_UTIL_TYPES_H #define SELFTEST_KVM_UTIL_TYPES_H +#include <linux/types.h> + /* * Provide a version of static_assert() that is guaranteed to have an optional * message param. _GNU_SOURCE is defined for all KVM selftests, _GNU_SOURCE @@ -14,7 +16,9 @@ #define __kvm_static_assert(expr, msg, ...) _Static_assert(expr, msg) #define kvm_static_assert(expr, ...) __kvm_static_assert(expr, ##__VA_ARGS__, #expr) -typedef uint64_t vm_paddr_t; /* Virtual Machine (Guest) physical address */ -typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */ +typedef u64 gpa_t; /* Virtual Machine (Guest) physical address */ +typedef u64 gva_t; /* Virtual Machine (Guest) virtual address */ + +#define INVALID_GPA (~(u64)0) #endif /* SELFTEST_KVM_UTIL_TYPES_H */ diff --git a/tools/testing/selftests/kvm/include/loongarch/arch_timer.h b/tools/testing/selftests/kvm/include/loongarch/arch_timer.h new file mode 100644 index 000000000000..3888aeeb3524 --- /dev/null +++ b/tools/testing/selftests/kvm/include/loongarch/arch_timer.h @@ -0,0 +1,85 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * LoongArch Constant Timer specific interface + */ +#ifndef SELFTEST_KVM_ARCH_TIMER_H +#define SELFTEST_KVM_ARCH_TIMER_H + +#include "processor.h" + +/* LoongArch timer frequency is constant 100MHZ */ +#define TIMER_FREQ (100UL << 20) +#define msec_to_cycles(msec) (TIMER_FREQ * (unsigned long)(msec) / 1000) +#define usec_to_cycles(usec) (TIMER_FREQ * (unsigned long)(usec) / 1000000) +#define cycles_to_usec(cycles) ((unsigned long)(cycles) * 1000000 / TIMER_FREQ) + +static inline unsigned long timer_get_cycles(void) +{ + unsigned long val = 0; + + __asm__ __volatile__( + "rdtime.d %0, $zero\n\t" + : "=r"(val) + : + ); + + return val; +} + +static inline unsigned long timer_get_cfg(void) +{ + return csr_read(LOONGARCH_CSR_TCFG); +} + +static inline unsigned long timer_get_val(void) +{ + return csr_read(LOONGARCH_CSR_TVAL); +} + +static inline void disable_timer(void) +{ + csr_write(0, LOONGARCH_CSR_TCFG); +} + +static inline void timer_irq_enable(void) +{ + unsigned long val; + + val = csr_read(LOONGARCH_CSR_ECFG); + val |= ECFGF_TIMER; + csr_write(val, LOONGARCH_CSR_ECFG); +} + +static inline void timer_irq_disable(void) +{ + unsigned long val; + + val = csr_read(LOONGARCH_CSR_ECFG); + val &= ~ECFGF_TIMER; + csr_write(val, LOONGARCH_CSR_ECFG); +} + +static inline void timer_set_next_cmp_ms(unsigned int msec, bool period) +{ + unsigned long val; + + val = msec_to_cycles(msec) & CSR_TCFG_VAL; + val |= CSR_TCFG_EN; + if (period) + val |= CSR_TCFG_PERIOD; + csr_write(val, LOONGARCH_CSR_TCFG); +} + +static inline void __delay(u64 cycles) +{ + u64 start = timer_get_cycles(); + + while ((timer_get_cycles() - start) < cycles) + cpu_relax(); +} + +static inline void udelay(unsigned long usec) +{ + __delay(usec_to_cycles(usec)); +} +#endif /* SELFTEST_KVM_ARCH_TIMER_H */ diff --git a/tools/testing/selftests/kvm/include/loongarch/kvm_util_arch.h b/tools/testing/selftests/kvm/include/loongarch/kvm_util_arch.h new file mode 100644 index 000000000000..d5095900e442 --- /dev/null +++ b/tools/testing/selftests/kvm/include/loongarch/kvm_util_arch.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef SELFTEST_KVM_UTIL_ARCH_H +#define SELFTEST_KVM_UTIL_ARCH_H + +struct kvm_mmu_arch {}; +struct kvm_vm_arch {}; + +#endif // SELFTEST_KVM_UTIL_ARCH_H diff --git a/tools/testing/selftests/kvm/include/loongarch/pmu.h b/tools/testing/selftests/kvm/include/loongarch/pmu.h new file mode 100644 index 000000000000..478e6a9bbb2b --- /dev/null +++ b/tools/testing/selftests/kvm/include/loongarch/pmu.h @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * LoongArch PMU specific interface + */ +#ifndef SELFTEST_KVM_PMU_H +#define SELFTEST_KVM_PMU_H + +#include "processor.h" + +#define LOONGARCH_CPUCFG6 0x6 +#define CPUCFG6_PMP BIT(0) +#define CPUCFG6_PAMVER GENMASK(3, 1) +#define CPUCFG6_PMNUM GENMASK(7, 4) +#define CPUCFG6_PMNUM_SHIFT 4 +#define CPUCFG6_PMBITS GENMASK(13, 8) +#define CPUCFG6_PMBITS_SHIFT 8 +#define CPUCFG6_UPM BIT(14) + +/* Performance Counter registers */ +#define LOONGARCH_CSR_PERFCTRL0 0x200 /* perf event 0 config */ +#define LOONGARCH_CSR_PERFCNTR0 0x201 /* perf event 0 count value */ +#define LOONGARCH_CSR_PERFCTRL1 0x202 /* perf event 1 config */ +#define LOONGARCH_CSR_PERFCNTR1 0x203 /* perf event 1 count value */ +#define LOONGARCH_CSR_PERFCTRL2 0x204 /* perf event 2 config */ +#define LOONGARCH_CSR_PERFCNTR2 0x205 /* perf event 2 count value */ +#define LOONGARCH_CSR_PERFCTRL3 0x206 /* perf event 3 config */ +#define LOONGARCH_CSR_PERFCNTR3 0x207 /* perf event 3 count value */ +#define CSR_PERFCTRL_PLV0 BIT(16) +#define CSR_PERFCTRL_PLV1 BIT(17) +#define CSR_PERFCTRL_PLV2 BIT(18) +#define CSR_PERFCTRL_PLV3 BIT(19) +#define CSR_PERFCTRL_PMIE BIT(20) +#define PMU_ENVENT_ENABLED (CSR_PERFCTRL_PLV0 | CSR_PERFCTRL_PLV1 | CSR_PERFCTRL_PLV2 | CSR_PERFCTRL_PLV3) + +/* Hardware event codes (from LoongArch perf_event.c */ +#define LOONGARCH_PMU_EVENT_CYCLES 0x00 /* CPU cycles */ +#define LOONGARCH_PMU_EVENT_INSTR_RETIRED 0x01 /* Instructions retired */ +#define PERF_COUNT_HW_BRANCH_INSTRUCTIONS 0x02 /* Branch instructions */ +#define PERF_COUNT_HW_BRANCH_MISSES 0x03 /* Branch misses */ + +#define NUM_LOOPS 1000 +#define EXPECTED_INSTR_MIN (NUM_LOOPS + 10) /* Loop + overhead */ +#define EXPECTED_CYCLES_MIN NUM_LOOPS /* At least 1 cycle per iteration */ +#define UPPER_BOUND (10 * NUM_LOOPS) + +#define PMU_OVERFLOW (1ULL << 63) + +static inline void pmu_irq_enable(void) +{ + unsigned long val; + + val = csr_read(LOONGARCH_CSR_ECFG); + val |= ECFGF_PMU; + csr_write(val, LOONGARCH_CSR_ECFG); +} + +static inline void pmu_irq_disable(void) +{ + unsigned long val; + + val = csr_read(LOONGARCH_CSR_ECFG); + val &= ~ECFGF_PMU; + csr_write(val, LOONGARCH_CSR_ECFG); +} + +#endif diff --git a/tools/testing/selftests/kvm/include/loongarch/processor.h b/tools/testing/selftests/kvm/include/loongarch/processor.h new file mode 100644 index 000000000000..93dc1fbd2e79 --- /dev/null +++ b/tools/testing/selftests/kvm/include/loongarch/processor.h @@ -0,0 +1,235 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef SELFTEST_KVM_PROCESSOR_H +#define SELFTEST_KVM_PROCESSOR_H + +#ifndef __ASSEMBLER__ +#include "ucall_common.h" + +#else +/* general registers */ +#define zero $r0 +#define ra $r1 +#define tp $r2 +#define sp $r3 +#define a0 $r4 +#define a1 $r5 +#define a2 $r6 +#define a3 $r7 +#define a4 $r8 +#define a5 $r9 +#define a6 $r10 +#define a7 $r11 +#define t0 $r12 +#define t1 $r13 +#define t2 $r14 +#define t3 $r15 +#define t4 $r16 +#define t5 $r17 +#define t6 $r18 +#define t7 $r19 +#define t8 $r20 +#define u0 $r21 +#define fp $r22 +#define s0 $r23 +#define s1 $r24 +#define s2 $r25 +#define s3 $r26 +#define s4 $r27 +#define s5 $r28 +#define s6 $r29 +#define s7 $r30 +#define s8 $r31 +#endif + +/* + * LoongArch page table entry definition + * Original header file arch/loongarch/include/asm/loongarch.h + */ +#define _PAGE_VALID_SHIFT 0 +#define _PAGE_DIRTY_SHIFT 1 +#define _PAGE_PLV_SHIFT 2 /* 2~3, two bits */ +#define PLV_KERN 0 +#define PLV_USER 3 +#define PLV_MASK 0x3 +#define _CACHE_SHIFT 4 /* 4~5, two bits */ +#define _PAGE_PRESENT_SHIFT 7 +#define _PAGE_WRITE_SHIFT 8 + +#define _PAGE_VALID BIT_ULL(_PAGE_VALID_SHIFT) +#define _PAGE_PRESENT BIT_ULL(_PAGE_PRESENT_SHIFT) +#define _PAGE_WRITE BIT_ULL(_PAGE_WRITE_SHIFT) +#define _PAGE_DIRTY BIT_ULL(_PAGE_DIRTY_SHIFT) +#define _PAGE_USER (PLV_USER << _PAGE_PLV_SHIFT) +#define __READABLE (_PAGE_VALID) +#define __WRITEABLE (_PAGE_DIRTY | _PAGE_WRITE) +/* Coherent Cached */ +#define _CACHE_CC BIT_ULL(_CACHE_SHIFT) +#define PS_4K 0x0000000c +#define PS_16K 0x0000000e +#define PS_64K 0x00000010 +#define PS_DEFAULT_SIZE PS_16K + +/* LoongArch Basic CSR registers */ +#define LOONGARCH_CSR_CRMD 0x0 /* Current mode info */ +#define CSR_CRMD_PG_SHIFT 4 +#define CSR_CRMD_PG BIT_ULL(CSR_CRMD_PG_SHIFT) +#define CSR_CRMD_IE_SHIFT 2 +#define CSR_CRMD_IE BIT_ULL(CSR_CRMD_IE_SHIFT) +#define CSR_CRMD_PLV_SHIFT 0 +#define CSR_CRMD_PLV_WIDTH 2 +#define CSR_CRMD_PLV (0x3UL << CSR_CRMD_PLV_SHIFT) +#define PLV_MASK 0x3 +#define LOONGARCH_CSR_PRMD 0x1 +#define LOONGARCH_CSR_EUEN 0x2 +#define LOONGARCH_CSR_ECFG 0x4 +#define ECFGB_PMU 10 +#define ECFGF_PMU (BIT_ULL(ECFGB_PMU)) +#define ECFGB_TIMER 11 +#define ECFGF_TIMER (BIT_ULL(ECFGB_TIMER)) +#define LOONGARCH_CSR_ESTAT 0x5 /* Exception status */ +#define CSR_ESTAT_EXC_SHIFT 16 +#define CSR_ESTAT_EXC_WIDTH 6 +#define CSR_ESTAT_EXC (0x3f << CSR_ESTAT_EXC_SHIFT) +#define EXCCODE_INT 0 /* Interrupt */ +#define INT_PMI 10 /* PMU interrupt */ +#define INT_TI 11 /* Timer interrupt*/ +#define LOONGARCH_CSR_ERA 0x6 /* ERA */ +#define LOONGARCH_CSR_BADV 0x7 /* Bad virtual address */ +#define LOONGARCH_CSR_EENTRY 0xc +#define LOONGARCH_CSR_TLBIDX 0x10 /* TLB Index, EHINV, PageSize */ +#define CSR_TLBIDX_PS_SHIFT 24 +#define CSR_TLBIDX_PS_WIDTH 6 +#define CSR_TLBIDX_PS (0x3fUL << CSR_TLBIDX_PS_SHIFT) +#define CSR_TLBIDX_SIZEM 0x3f000000 +#define CSR_TLBIDX_SIZE CSR_TLBIDX_PS_SHIFT +#define LOONGARCH_CSR_ASID 0x18 /* ASID */ +#define LOONGARCH_CSR_PGDL 0x19 +#define LOONGARCH_CSR_PGDH 0x1a +/* Page table base */ +#define LOONGARCH_CSR_PGD 0x1b +#define LOONGARCH_CSR_PWCTL0 0x1c +#define LOONGARCH_CSR_PWCTL1 0x1d +#define LOONGARCH_CSR_STLBPGSIZE 0x1e +#define LOONGARCH_CSR_CPUID 0x20 +#define LOONGARCH_CSR_KS0 0x30 +#define LOONGARCH_CSR_KS1 0x31 +#define LOONGARCH_CSR_TMID 0x40 +#define LOONGARCH_CSR_TCFG 0x41 +#define CSR_TCFG_VAL (BIT_ULL(48) - BIT_ULL(2)) +#define CSR_TCFG_PERIOD_SHIFT 1 +#define CSR_TCFG_PERIOD (0x1UL << CSR_TCFG_PERIOD_SHIFT) +#define CSR_TCFG_EN (0x1UL) +#define LOONGARCH_CSR_TVAL 0x42 +#define LOONGARCH_CSR_TINTCLR 0x44 /* Timer interrupt clear */ +#define CSR_TINTCLR_TI_SHIFT 0 +#define CSR_TINTCLR_TI (1 << CSR_TINTCLR_TI_SHIFT) +/* TLB refill exception entry */ +#define LOONGARCH_CSR_TLBRENTRY 0x88 +#define LOONGARCH_CSR_TLBRSAVE 0x8b +#define LOONGARCH_CSR_TLBREHI 0x8e +#define CSR_TLBREHI_PS_SHIFT 0 +#define CSR_TLBREHI_PS (0x3fUL << CSR_TLBREHI_PS_SHIFT) + +#define read_cpucfg(reg) \ +({ \ + register unsigned long __v; \ + __asm__ __volatile__( \ + "cpucfg %0, %1\n\t" \ + : "=r" (__v) \ + : "r" (reg) \ + : "memory"); \ + __v; \ +}) + +#define csr_read(csr) \ +({ \ + register unsigned long __v; \ + __asm__ __volatile__( \ + "csrrd %[val], %[reg]\n\t" \ + : [val] "=r" (__v) \ + : [reg] "i" (csr) \ + : "memory"); \ + __v; \ +}) + +#define csr_write(v, csr) \ +({ \ + register unsigned long __v = v; \ + __asm__ __volatile__ ( \ + "csrwr %[val], %[reg]\n\t" \ + : [val] "+r" (__v) \ + : [reg] "i" (csr) \ + : "memory"); \ + __v; \ +}) + +#define EXREGS_GPRS (32) + +#ifndef __ASSEMBLER__ +void handle_tlb_refill(void); +void handle_exception(void); + +struct ex_regs { + unsigned long regs[EXREGS_GPRS]; + unsigned long pc; + unsigned long estat; + unsigned long badv; + unsigned long prmd; +}; + +#define PC_OFFSET_EXREGS offsetof(struct ex_regs, pc) +#define ESTAT_OFFSET_EXREGS offsetof(struct ex_regs, estat) +#define BADV_OFFSET_EXREGS offsetof(struct ex_regs, badv) +#define PRMD_OFFSET_EXREGS offsetof(struct ex_regs, prmd) +#define EXREGS_SIZE sizeof(struct ex_regs) + +#define VECTOR_NUM 64 + +typedef void(*handler_fn)(struct ex_regs *); + +struct handlers { + handler_fn exception_handlers[VECTOR_NUM]; +}; + +void loongarch_vcpu_setup(struct kvm_vcpu *vcpu); +void vm_init_descriptor_tables(struct kvm_vm *vm); +void vm_install_exception_handler(struct kvm_vm *vm, int vector, handler_fn handler); + +static inline void cpu_relax(void) +{ + asm volatile("nop" ::: "memory"); +} + +static inline void local_irq_enable(void) +{ + unsigned int flags = CSR_CRMD_IE; + register unsigned int mask asm("$t0") = CSR_CRMD_IE; + + __asm__ __volatile__( + "csrxchg %[val], %[mask], %[reg]\n\t" + : [val] "+r" (flags) + : [mask] "r" (mask), [reg] "i" (LOONGARCH_CSR_CRMD) + : "memory"); +} + +static inline void local_irq_disable(void) +{ + unsigned int flags = 0; + register unsigned int mask asm("$t0") = CSR_CRMD_IE; + + __asm__ __volatile__( + "csrxchg %[val], %[mask], %[reg]\n\t" + : [val] "+r" (flags) + : [mask] "r" (mask), [reg] "i" (LOONGARCH_CSR_CRMD) + : "memory"); +} +#else +#define PC_OFFSET_EXREGS ((EXREGS_GPRS + 0) * 8) +#define ESTAT_OFFSET_EXREGS ((EXREGS_GPRS + 1) * 8) +#define BADV_OFFSET_EXREGS ((EXREGS_GPRS + 2) * 8) +#define PRMD_OFFSET_EXREGS ((EXREGS_GPRS + 3) * 8) +#define EXREGS_SIZE ((EXREGS_GPRS + 4) * 8) +#endif + +#endif /* SELFTEST_KVM_PROCESSOR_H */ diff --git a/tools/testing/selftests/kvm/include/loongarch/ucall.h b/tools/testing/selftests/kvm/include/loongarch/ucall.h new file mode 100644 index 000000000000..2210d3d94c40 --- /dev/null +++ b/tools/testing/selftests/kvm/include/loongarch/ucall.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef SELFTEST_KVM_UCALL_H +#define SELFTEST_KVM_UCALL_H + +#include "kvm_util.h" + +#define UCALL_EXIT_REASON KVM_EXIT_MMIO + +/* + * ucall_exit_mmio_addr holds per-VM values (global data is duplicated by each + * VM), it must not be accessed from host code. + */ +extern gva_t *ucall_exit_mmio_addr; + +static inline void ucall_arch_do_ucall(gva_t uc) +{ + WRITE_ONCE(*ucall_exit_mmio_addr, uc); +} + +#endif diff --git a/tools/testing/selftests/kvm/include/lru_gen_util.h b/tools/testing/selftests/kvm/include/lru_gen_util.h new file mode 100644 index 000000000000..d32ff5d8ffd0 --- /dev/null +++ b/tools/testing/selftests/kvm/include/lru_gen_util.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Tools for integrating with lru_gen, like parsing the lru_gen debugfs output. + * + * Copyright (C) 2025, Google LLC. + */ +#ifndef SELFTEST_KVM_LRU_GEN_UTIL_H +#define SELFTEST_KVM_LRU_GEN_UTIL_H + +#include <inttypes.h> +#include <limits.h> +#include <stdlib.h> + +#include "test_util.h" + +#define MAX_NR_GENS 16 /* MAX_NR_GENS in include/linux/mmzone.h */ +#define MAX_NR_NODES 4 /* Maximum number of nodes supported by the test */ + +#define LRU_GEN_DEBUGFS "/sys/kernel/debug/lru_gen" +#define LRU_GEN_ENABLED_PATH "/sys/kernel/mm/lru_gen/enabled" +#define LRU_GEN_ENABLED 1 +#define LRU_GEN_MM_WALK 2 + +struct generation_stats { + int gen; + long age_ms; + long nr_anon; + long nr_file; +}; + +struct node_stats { + int node; + int nr_gens; /* Number of populated gens entries. */ + struct generation_stats gens[MAX_NR_GENS]; +}; + +struct memcg_stats { + unsigned long memcg_id; + int nr_nodes; /* Number of populated nodes entries. */ + struct node_stats nodes[MAX_NR_NODES]; +}; + +void lru_gen_read_memcg_stats(struct memcg_stats *stats, const char *memcg); +long lru_gen_sum_memcg_stats(const struct memcg_stats *stats); +long lru_gen_sum_memcg_stats_for_gen(int gen, const struct memcg_stats *stats); +void lru_gen_do_aging(struct memcg_stats *stats, const char *memcg); +int lru_gen_find_generation(const struct memcg_stats *stats, + unsigned long total_pages); +bool lru_gen_usable(void); + +#endif /* SELFTEST_KVM_LRU_GEN_UTIL_H */ diff --git a/tools/testing/selftests/kvm/include/memstress.h b/tools/testing/selftests/kvm/include/memstress.h index 9071eb6dea60..0d1d6230cc05 100644 --- a/tools/testing/selftests/kvm/include/memstress.h +++ b/tools/testing/selftests/kvm/include/memstress.h @@ -20,9 +20,9 @@ #define MEMSTRESS_MEM_SLOT_INDEX 1 struct memstress_vcpu_args { - uint64_t gpa; - uint64_t gva; - uint64_t pages; + gpa_t gpa; + gva_t gva; + u64 pages; /* Only used by the host userspace part of the vCPU thread */ struct kvm_vcpu *vcpu; @@ -32,11 +32,11 @@ struct memstress_vcpu_args { struct memstress_args { struct kvm_vm *vm; /* The starting address and size of the guest test region. */ - uint64_t gpa; - uint64_t size; - uint64_t guest_page_size; - uint32_t random_seed; - uint32_t write_percent; + gpa_t gpa; + u64 size; + u64 guest_page_size; + u32 random_seed; + u32 write_percent; /* Run vCPUs in L2 instead of L1, if the architecture supports it. */ bool nested; @@ -45,7 +45,7 @@ struct memstress_args { /* True if all vCPUs are pinned to pCPUs */ bool pin_vcpus; /* The vCPU=>pCPU pinning map. Only valid if pin_vcpus is true. */ - uint32_t vcpu_to_pcpu[KVM_MAX_VCPUS]; + u32 vcpu_to_pcpu[KVM_MAX_VCPUS]; /* Test is done, stop running vCPUs. */ bool stop_vcpus; @@ -56,27 +56,27 @@ struct memstress_args { extern struct memstress_args memstress_args; struct kvm_vm *memstress_create_vm(enum vm_guest_mode mode, int nr_vcpus, - uint64_t vcpu_memory_bytes, int slots, + u64 vcpu_memory_bytes, int slots, enum vm_mem_backing_src_type backing_src, bool partition_vcpu_memory_access); void memstress_destroy_vm(struct kvm_vm *vm); -void memstress_set_write_percent(struct kvm_vm *vm, uint32_t write_percent); +void memstress_set_write_percent(struct kvm_vm *vm, u32 write_percent); void memstress_set_random_access(struct kvm_vm *vm, bool random_access); void memstress_start_vcpu_threads(int vcpus, void (*vcpu_fn)(struct memstress_vcpu_args *)); void memstress_join_vcpu_threads(int vcpus); -void memstress_guest_code(uint32_t vcpu_id); +void memstress_guest_code(u32 vcpu_id); -uint64_t memstress_nested_pages(int nr_vcpus); +u64 memstress_nested_pages(int nr_vcpus); void memstress_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vcpus[]); void memstress_enable_dirty_logging(struct kvm_vm *vm, int slots); void memstress_disable_dirty_logging(struct kvm_vm *vm, int slots); void memstress_get_dirty_log(struct kvm_vm *vm, unsigned long *bitmaps[], int slots); void memstress_clear_dirty_log(struct kvm_vm *vm, unsigned long *bitmaps[], - int slots, uint64_t pages_per_slot); -unsigned long **memstress_alloc_bitmaps(int slots, uint64_t pages_per_slot); + int slots, u64 pages_per_slot); +unsigned long **memstress_alloc_bitmaps(int slots, u64 pages_per_slot); void memstress_free_bitmaps(unsigned long *bitmaps[], int slots); #endif /* SELFTEST_KVM_MEMSTRESS_H */ diff --git a/tools/testing/selftests/kvm/include/numaif.h b/tools/testing/selftests/kvm/include/numaif.h index b020547403fd..29572a6d789c 100644 --- a/tools/testing/selftests/kvm/include/numaif.h +++ b/tools/testing/selftests/kvm/include/numaif.h @@ -1,55 +1,83 @@ /* SPDX-License-Identifier: GPL-2.0-only */ -/* - * tools/testing/selftests/kvm/include/numaif.h - * - * Copyright (C) 2020, Google LLC. - * - * This work is licensed under the terms of the GNU GPL, version 2. - * - * Header file that provides access to NUMA API functions not explicitly - * exported to user space. - */ +/* Copyright (C) 2020, Google LLC. */ #ifndef SELFTEST_KVM_NUMAIF_H #define SELFTEST_KVM_NUMAIF_H -#define __NR_get_mempolicy 239 -#define __NR_migrate_pages 256 +#include <dirent.h> -/* System calls */ -long get_mempolicy(int *policy, const unsigned long *nmask, - unsigned long maxnode, void *addr, int flags) +#include <linux/mempolicy.h> + +#include "kvm_syscalls.h" + +KVM_SYSCALL_DEFINE(get_mempolicy, 5, int *, policy, const unsigned long *, nmask, + unsigned long, maxnode, void *, addr, int, flags); + +KVM_SYSCALL_DEFINE(set_mempolicy, 3, int, mode, const unsigned long *, nmask, + unsigned long, maxnode); + +KVM_SYSCALL_DEFINE(set_mempolicy_home_node, 4, unsigned long, start, + unsigned long, len, unsigned long, home_node, + unsigned long, flags); + +KVM_SYSCALL_DEFINE(migrate_pages, 4, int, pid, unsigned long, maxnode, + const unsigned long *, frommask, const unsigned long *, tomask); + +KVM_SYSCALL_DEFINE(move_pages, 6, int, pid, unsigned long, count, void *, pages, + const int *, nodes, int *, status, int, flags); + +KVM_SYSCALL_DEFINE(mbind, 6, void *, addr, unsigned long, size, int, mode, + const unsigned long *, nodemask, unsigned long, maxnode, + unsigned int, flags); + +static inline int get_max_numa_node(void) { - return syscall(__NR_get_mempolicy, policy, nmask, - maxnode, addr, flags); + struct dirent *de; + int max_node = 0; + DIR *d; + + /* + * Assume there's a single node if the kernel doesn't support NUMA, + * or if no nodes are found. + */ + d = opendir("/sys/devices/system/node"); + if (!d) + return 0; + + while ((de = readdir(d)) != NULL) { + int node_id; + char *endptr; + + if (strncmp(de->d_name, "node", 4) != 0) + continue; + + node_id = strtol(de->d_name + 4, &endptr, 10); + if (*endptr != '\0') + continue; + + if (node_id > max_node) + max_node = node_id; + } + closedir(d); + + return max_node; } -long migrate_pages(int pid, unsigned long maxnode, - const unsigned long *frommask, - const unsigned long *tomask) +static bool is_numa_available(void) { - return syscall(__NR_migrate_pages, pid, maxnode, frommask, tomask); + /* + * Probe for NUMA by doing a dummy get_mempolicy(). If the syscall + * fails with ENOSYS, then the kernel was built without NUMA support. + * if the syscall fails with EPERM, then the process/user lacks the + * necessary capabilities (CAP_SYS_NICE). + */ + return !get_mempolicy(NULL, NULL, 0, NULL, 0) || + (errno != ENOSYS && errno != EPERM); } -/* Policies */ -#define MPOL_DEFAULT 0 -#define MPOL_PREFERRED 1 -#define MPOL_BIND 2 -#define MPOL_INTERLEAVE 3 - -#define MPOL_MAX MPOL_INTERLEAVE - -/* Flags for get_mem_policy */ -#define MPOL_F_NODE (1<<0) /* return next il node or node of address */ - /* Warning: MPOL_F_NODE is unsupported and - * subject to change. Don't use. - */ -#define MPOL_F_ADDR (1<<1) /* look up vma using address */ -#define MPOL_F_MEMS_ALLOWED (1<<2) /* query nodes allowed in cpuset */ - -/* Flags for mbind */ -#define MPOL_MF_STRICT (1<<0) /* Verify existing pages in the mapping */ -#define MPOL_MF_MOVE (1<<1) /* Move pages owned by this process to conform to mapping */ -#define MPOL_MF_MOVE_ALL (1<<2) /* Move every page to conform to mapping */ +static inline bool is_multi_numa_node_system(void) +{ + return is_numa_available() && get_max_numa_node() >= 1; +} #endif /* SELFTEST_KVM_NUMAIF_H */ diff --git a/tools/testing/selftests/kvm/include/riscv/arch_timer.h b/tools/testing/selftests/kvm/include/riscv/arch_timer.h index 225d81dad064..28ffc014da2a 100644 --- a/tools/testing/selftests/kvm/include/riscv/arch_timer.h +++ b/tools/testing/selftests/kvm/include/riscv/arch_timer.h @@ -14,25 +14,25 @@ static unsigned long timer_freq; #define msec_to_cycles(msec) \ - ((timer_freq) * (uint64_t)(msec) / 1000) + ((timer_freq) * (u64)(msec) / 1000) #define usec_to_cycles(usec) \ - ((timer_freq) * (uint64_t)(usec) / 1000000) + ((timer_freq) * (u64)(usec) / 1000000) #define cycles_to_usec(cycles) \ - ((uint64_t)(cycles) * 1000000 / (timer_freq)) + ((u64)(cycles) * 1000000 / (timer_freq)) -static inline uint64_t timer_get_cycles(void) +static inline u64 timer_get_cycles(void) { return csr_read(CSR_TIME); } -static inline void timer_set_cmp(uint64_t cval) +static inline void timer_set_cmp(u64 cval) { csr_write(CSR_STIMECMP, cval); } -static inline uint64_t timer_get_cmp(void) +static inline u64 timer_get_cmp(void) { return csr_read(CSR_STIMECMP); } @@ -47,17 +47,17 @@ static inline void timer_irq_disable(void) csr_clear(CSR_SIE, IE_TIE); } -static inline void timer_set_next_cmp_ms(uint32_t msec) +static inline void timer_set_next_cmp_ms(u32 msec) { - uint64_t now_ct = timer_get_cycles(); - uint64_t next_ct = now_ct + msec_to_cycles(msec); + u64 now_ct = timer_get_cycles(); + u64 next_ct = now_ct + msec_to_cycles(msec); timer_set_cmp(next_ct); } -static inline void __delay(uint64_t cycles) +static inline void __delay(u64 cycles) { - uint64_t start = timer_get_cycles(); + u64 start = timer_get_cycles(); while ((timer_get_cycles() - start) < cycles) cpu_relax(); diff --git a/tools/testing/selftests/kvm/include/riscv/kvm_util_arch.h b/tools/testing/selftests/kvm/include/riscv/kvm_util_arch.h index e43a57d99b56..d5095900e442 100644 --- a/tools/testing/selftests/kvm/include/riscv/kvm_util_arch.h +++ b/tools/testing/selftests/kvm/include/riscv/kvm_util_arch.h @@ -2,6 +2,7 @@ #ifndef SELFTEST_KVM_UTIL_ARCH_H #define SELFTEST_KVM_UTIL_ARCH_H +struct kvm_mmu_arch {}; struct kvm_vm_arch {}; #endif // SELFTEST_KVM_UTIL_ARCH_H diff --git a/tools/testing/selftests/kvm/include/riscv/processor.h b/tools/testing/selftests/kvm/include/riscv/processor.h index 5f389166338c..e3acf2ae9881 100644 --- a/tools/testing/selftests/kvm/include/riscv/processor.h +++ b/tools/testing/selftests/kvm/include/riscv/processor.h @@ -9,10 +9,23 @@ #include <linux/stringify.h> #include <asm/csr.h> +#include <asm/vdso/processor.h> #include "kvm_util.h" -static inline uint64_t __kvm_reg_id(uint64_t type, uint64_t subtype, - uint64_t idx, uint64_t size) +#define INSN_OPCODE_MASK 0x007c +#define INSN_OPCODE_SHIFT 2 +#define INSN_OPCODE_SYSTEM 28 + +#define INSN_MASK_FUNCT3 0x7000 +#define INSN_SHIFT_FUNCT3 12 + +#define INSN_CSR_MASK 0xfff00000 +#define INSN_CSR_SHIFT 20 + +#define GET_RM(insn) (((insn) & INSN_MASK_FUNCT3) >> INSN_SHIFT_FUNCT3) +#define GET_CSR_NUM(insn) (((insn) & INSN_CSR_MASK) >> INSN_CSR_SHIFT) + +static inline u64 __kvm_reg_id(u64 type, u64 subtype, u64 idx, u64 size) { return KVM_REG_RISCV | type | subtype | idx | size; } @@ -48,19 +61,20 @@ static inline uint64_t __kvm_reg_id(uint64_t type, uint64_t subtype, KVM_REG_RISCV_SBI_SINGLE, \ idx, KVM_REG_SIZE_ULONG) -bool __vcpu_has_ext(struct kvm_vcpu *vcpu, uint64_t ext); +bool __vcpu_has_ext(struct kvm_vcpu *vcpu, u64 ext); -static inline bool __vcpu_has_isa_ext(struct kvm_vcpu *vcpu, uint64_t isa_ext) +static inline bool __vcpu_has_isa_ext(struct kvm_vcpu *vcpu, u64 isa_ext) { return __vcpu_has_ext(vcpu, RISCV_ISA_EXT_REG(isa_ext)); } -static inline bool __vcpu_has_sbi_ext(struct kvm_vcpu *vcpu, uint64_t sbi_ext) +static inline bool __vcpu_has_sbi_ext(struct kvm_vcpu *vcpu, u64 sbi_ext) { return __vcpu_has_ext(vcpu, RISCV_SBI_EXT_REG(sbi_ext)); } -struct ex_regs { +struct pt_regs { + unsigned long epc; unsigned long ra; unsigned long sp; unsigned long gp; @@ -92,16 +106,19 @@ struct ex_regs { unsigned long t4; unsigned long t5; unsigned long t6; - unsigned long epc; + /* Supervisor/Machine CSRs */ unsigned long status; + unsigned long badaddr; unsigned long cause; + /* a0 value before the syscall */ + unsigned long orig_a0; }; #define NR_VECTORS 2 #define NR_EXCEPTIONS 32 #define EC_MASK (NR_EXCEPTIONS - 1) -typedef void(*exception_handler_fn)(struct ex_regs *); +typedef void(*exception_handler_fn)(struct pt_regs *); void vm_init_vector_tables(struct kvm_vm *vm); void vcpu_init_vector_tables(struct kvm_vcpu *vcpu); @@ -174,4 +191,6 @@ static inline void local_irq_disable(void) csr_clear(CSR_SSTATUS, SR_SIE); } +unsigned long riscv64_get_satp_mode(void); + #endif /* SELFTEST_KVM_PROCESSOR_H */ diff --git a/tools/testing/selftests/kvm/include/riscv/sbi.h b/tools/testing/selftests/kvm/include/riscv/sbi.h index 046b432ae896..16f1815ac48f 100644 --- a/tools/testing/selftests/kvm/include/riscv/sbi.h +++ b/tools/testing/selftests/kvm/include/riscv/sbi.h @@ -97,6 +97,43 @@ enum sbi_pmu_hw_generic_events_t { SBI_PMU_HW_GENERAL_MAX, }; +enum sbi_pmu_fw_generic_events_t { + SBI_PMU_FW_MISALIGNED_LOAD = 0, + SBI_PMU_FW_MISALIGNED_STORE = 1, + SBI_PMU_FW_ACCESS_LOAD = 2, + SBI_PMU_FW_ACCESS_STORE = 3, + SBI_PMU_FW_ILLEGAL_INSN = 4, + SBI_PMU_FW_SET_TIMER = 5, + SBI_PMU_FW_IPI_SENT = 6, + SBI_PMU_FW_IPI_RCVD = 7, + SBI_PMU_FW_FENCE_I_SENT = 8, + SBI_PMU_FW_FENCE_I_RCVD = 9, + SBI_PMU_FW_SFENCE_VMA_SENT = 10, + SBI_PMU_FW_SFENCE_VMA_RCVD = 11, + SBI_PMU_FW_SFENCE_VMA_ASID_SENT = 12, + SBI_PMU_FW_SFENCE_VMA_ASID_RCVD = 13, + + SBI_PMU_FW_HFENCE_GVMA_SENT = 14, + SBI_PMU_FW_HFENCE_GVMA_RCVD = 15, + SBI_PMU_FW_HFENCE_GVMA_VMID_SENT = 16, + SBI_PMU_FW_HFENCE_GVMA_VMID_RCVD = 17, + + SBI_PMU_FW_HFENCE_VVMA_SENT = 18, + SBI_PMU_FW_HFENCE_VVMA_RCVD = 19, + SBI_PMU_FW_HFENCE_VVMA_ASID_SENT = 20, + SBI_PMU_FW_HFENCE_VVMA_ASID_RCVD = 21, + SBI_PMU_FW_MAX, +}; + +/* SBI PMU event types */ +enum sbi_pmu_event_type { + SBI_PMU_EVENT_TYPE_HW = 0x0, + SBI_PMU_EVENT_TYPE_CACHE = 0x1, + SBI_PMU_EVENT_TYPE_RAW = 0x2, + SBI_PMU_EVENT_TYPE_RAW_V2 = 0x3, + SBI_PMU_EVENT_TYPE_FW = 0xf, +}; + /* SBI PMU counter types */ enum sbi_pmu_ctr_type { SBI_PMU_CTR_TYPE_HW = 0x0, diff --git a/tools/testing/selftests/kvm/include/riscv/ucall.h b/tools/testing/selftests/kvm/include/riscv/ucall.h index a695ae36f3e0..2de7c6a36096 100644 --- a/tools/testing/selftests/kvm/include/riscv/ucall.h +++ b/tools/testing/selftests/kvm/include/riscv/ucall.h @@ -7,11 +7,11 @@ #define UCALL_EXIT_REASON KVM_EXIT_RISCV_SBI -static inline void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) +static inline void ucall_arch_init(struct kvm_vm *vm, gpa_t mmio_gpa) { } -static inline void ucall_arch_do_ucall(vm_vaddr_t uc) +static inline void ucall_arch_do_ucall(gva_t uc) { sbi_ecall(KVM_RISCV_SELFTESTS_SBI_EXT, KVM_RISCV_SELFTESTS_SBI_UCALL, diff --git a/tools/testing/selftests/kvm/include/s390/diag318_test_handler.h b/tools/testing/selftests/kvm/include/s390/diag318_test_handler.h index b0ed71302722..6deaf18fec22 100644 --- a/tools/testing/selftests/kvm/include/s390/diag318_test_handler.h +++ b/tools/testing/selftests/kvm/include/s390/diag318_test_handler.h @@ -8,6 +8,6 @@ #ifndef SELFTEST_KVM_DIAG318_TEST_HANDLER #define SELFTEST_KVM_DIAG318_TEST_HANDLER -uint64_t get_diag318_info(void); +u64 get_diag318_info(void); #endif diff --git a/tools/testing/selftests/kvm/include/s390/facility.h b/tools/testing/selftests/kvm/include/s390/facility.h index 00a1ced6538b..41a265742666 100644 --- a/tools/testing/selftests/kvm/include/s390/facility.h +++ b/tools/testing/selftests/kvm/include/s390/facility.h @@ -16,7 +16,7 @@ /* alt_stfle_fac_list[16] + stfle_fac_list[16] */ #define NB_STFL_DOUBLEWORDS 32 -extern uint64_t stfl_doublewords[NB_STFL_DOUBLEWORDS]; +extern u64 stfl_doublewords[NB_STFL_DOUBLEWORDS]; extern bool stfle_flag; static inline bool test_bit_inv(unsigned long nr, const unsigned long *ptr) @@ -24,7 +24,7 @@ static inline bool test_bit_inv(unsigned long nr, const unsigned long *ptr) return test_bit(nr ^ (BITS_PER_LONG - 1), ptr); } -static inline void stfle(uint64_t *fac, unsigned int nb_doublewords) +static inline void stfle(u64 *fac, unsigned int nb_doublewords) { register unsigned long r0 asm("0") = nb_doublewords - 1; diff --git a/tools/testing/selftests/kvm/include/s390/kvm_util_arch.h b/tools/testing/selftests/kvm/include/s390/kvm_util_arch.h index e43a57d99b56..d5095900e442 100644 --- a/tools/testing/selftests/kvm/include/s390/kvm_util_arch.h +++ b/tools/testing/selftests/kvm/include/s390/kvm_util_arch.h @@ -2,6 +2,7 @@ #ifndef SELFTEST_KVM_UTIL_ARCH_H #define SELFTEST_KVM_UTIL_ARCH_H +struct kvm_mmu_arch {}; struct kvm_vm_arch {}; #endif // SELFTEST_KVM_UTIL_ARCH_H diff --git a/tools/testing/selftests/kvm/include/s390/ucall.h b/tools/testing/selftests/kvm/include/s390/ucall.h index 8035a872a351..3907d629304f 100644 --- a/tools/testing/selftests/kvm/include/s390/ucall.h +++ b/tools/testing/selftests/kvm/include/s390/ucall.h @@ -6,11 +6,11 @@ #define UCALL_EXIT_REASON KVM_EXIT_S390_SIEIC -static inline void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) +static inline void ucall_arch_init(struct kvm_vm *vm, gpa_t mmio_gpa) { } -static inline void ucall_arch_do_ucall(vm_vaddr_t uc) +static inline void ucall_arch_do_ucall(gva_t uc) { /* Exit via DIAGNOSE 0x501 (normally used for breakpoints) */ asm volatile ("diag 0,%0,0x501" : : "a"(uc) : "memory"); diff --git a/tools/testing/selftests/kvm/include/sparsebit.h b/tools/testing/selftests/kvm/include/sparsebit.h index bc760761e1a3..e027e5790946 100644 --- a/tools/testing/selftests/kvm/include/sparsebit.h +++ b/tools/testing/selftests/kvm/include/sparsebit.h @@ -6,7 +6,7 @@ * * Header file that describes API to the sparsebit library. * This library provides a memory efficient means of storing - * the settings of bits indexed via a uint64_t. Memory usage + * the settings of bits indexed via a u64. Memory usage * is reasonable, significantly less than (2^64 / 8) bytes, as * long as bits that are mostly set or mostly cleared are close * to each other. This library is efficient in memory usage @@ -25,8 +25,8 @@ extern "C" { #endif struct sparsebit; -typedef uint64_t sparsebit_idx_t; -typedef uint64_t sparsebit_num_t; +typedef u64 sparsebit_idx_t; +typedef u64 sparsebit_num_t; struct sparsebit *sparsebit_alloc(void); void sparsebit_free(struct sparsebit **sbitp); diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h index 3e473058849f..a56271c237ae 100644 --- a/tools/testing/selftests/kvm/include/test_util.h +++ b/tools/testing/selftests/kvm/include/test_util.h @@ -8,6 +8,8 @@ #ifndef SELFTEST_KVM_TEST_UTIL_H #define SELFTEST_KVM_TEST_UTIL_H +#include <setjmp.h> +#include <signal.h> #include <stdlib.h> #include <stdarg.h> #include <stdbool.h> @@ -17,12 +19,14 @@ #include <errno.h> #include <unistd.h> #include <fcntl.h> -#include <sys/mman.h> #include "kselftest.h" +#include <linux/mman.h> +#include <linux/types.h> + #define msecs_to_usecs(msec) ((msec) * 1000ULL) -static inline int _no_printf(const char *format, ...) { return 0; } +static inline __printf(1, 2) int _no_printf(const char *format, ...) { return 0; } #ifdef DEBUG #define pr_debug(...) printf(__VA_ARGS__) @@ -78,27 +82,44 @@ do { \ __builtin_unreachable(); \ } while (0) +extern sigjmp_buf expect_sigbus_jmpbuf; +void expect_sigbus_handler(int signum); + +#define TEST_EXPECT_SIGBUS(action) \ +do { \ + struct sigaction sa_old, sa_new = { \ + .sa_handler = expect_sigbus_handler, \ + }; \ + \ + sigaction(SIGBUS, &sa_new, &sa_old); \ + if (sigsetjmp(expect_sigbus_jmpbuf, 1) == 0) { \ + action; \ + TEST_FAIL("'%s' should have triggered SIGBUS", #action); \ + } \ + sigaction(SIGBUS, &sa_old, NULL); \ +} while (0) + size_t parse_size(const char *size); -int64_t timespec_to_ns(struct timespec ts); -struct timespec timespec_add_ns(struct timespec ts, int64_t ns); +s64 timespec_to_ns(struct timespec ts); +struct timespec timespec_add_ns(struct timespec ts, s64 ns); struct timespec timespec_add(struct timespec ts1, struct timespec ts2); struct timespec timespec_sub(struct timespec ts1, struct timespec ts2); struct timespec timespec_elapsed(struct timespec start); struct timespec timespec_div(struct timespec ts, int divisor); struct guest_random_state { - uint32_t seed; + u32 seed; }; -extern uint32_t guest_random_seed; +extern u32 guest_random_seed; extern struct guest_random_state guest_rng; -struct guest_random_state new_guest_random_state(uint32_t seed); -uint32_t guest_random_u32(struct guest_random_state *state); +struct guest_random_state new_guest_random_state(u32 seed); +u32 guest_random_u32(struct guest_random_state *state); static inline bool __guest_random_bool(struct guest_random_state *state, - uint8_t percent) + u8 percent) { return (guest_random_u32(state) % 100) < percent; } @@ -108,9 +129,9 @@ static inline bool guest_random_bool(struct guest_random_state *state) return __guest_random_bool(state, 50); } -static inline uint64_t guest_random_u64(struct guest_random_state *state) +static inline u64 guest_random_u64(struct guest_random_state *state) { - return ((uint64_t)guest_random_u32(state) << 32) | guest_random_u32(state); + return ((u64)guest_random_u32(state) << 32) | guest_random_u32(state); } enum vm_mem_backing_src_type { @@ -139,7 +160,7 @@ enum vm_mem_backing_src_type { struct vm_mem_backing_src_alias { const char *name; - uint32_t flag; + u32 flag; }; #define MIN_RUN_DELAY_NS 200000UL @@ -147,12 +168,13 @@ struct vm_mem_backing_src_alias { bool thp_configured(void); size_t get_trans_hugepagesz(void); size_t get_def_hugetlb_pagesz(void); -const struct vm_mem_backing_src_alias *vm_mem_backing_src_alias(uint32_t i); -size_t get_backing_src_pagesz(uint32_t i); -bool is_backing_src_hugetlb(uint32_t i); +const struct vm_mem_backing_src_alias *vm_mem_backing_src_alias(u32 i); +size_t get_backing_src_pagesz(u32 i); +bool is_backing_src_hugetlb(u32 i); void backing_src_help(const char *flag); enum vm_mem_backing_src_type parse_backing_src_type(const char *type_name); long get_run_delay(void); +bool is_numa_balancing_enabled(void); /* * Whether or not the given source type is shared memory (as opposed to @@ -169,18 +191,18 @@ static inline bool backing_src_can_be_huge(enum vm_mem_backing_src_type t) } /* Aligns x up to the next multiple of size. Size must be a power of 2. */ -static inline uint64_t align_up(uint64_t x, uint64_t size) +static inline u64 align_up(u64 x, u64 size) { - uint64_t mask = size - 1; + u64 mask = size - 1; TEST_ASSERT(size != 0 && !(size & (size - 1)), "size not a power of 2: %lu", size); return ((x + mask) & ~mask); } -static inline uint64_t align_down(uint64_t x, uint64_t size) +static inline u64 align_down(u64 x, u64 size) { - uint64_t x_aligned_up = align_up(x, size); + u64 x_aligned_up = align_up(x, size); if (x == x_aligned_up) return x; @@ -195,7 +217,7 @@ static inline void *align_ptr_up(void *x, size_t size) int atoi_paranoid(const char *num_str); -static inline uint32_t atoi_positive(const char *name, const char *num_str) +static inline u32 atoi_positive(const char *name, const char *num_str) { int num = atoi_paranoid(num_str); @@ -203,7 +225,7 @@ static inline uint32_t atoi_positive(const char *name, const char *num_str) return num; } -static inline uint32_t atoi_non_negative(const char *name, const char *num_str) +static inline u32 atoi_non_negative(const char *name, const char *num_str) { int num = atoi_paranoid(num_str); diff --git a/tools/testing/selftests/kvm/include/timer_test.h b/tools/testing/selftests/kvm/include/timer_test.h index 9b6edaafe6d4..b7d5d2c84701 100644 --- a/tools/testing/selftests/kvm/include/timer_test.h +++ b/tools/testing/selftests/kvm/include/timer_test.h @@ -18,21 +18,21 @@ /* Timer test cmdline parameters */ struct test_args { - uint32_t nr_vcpus; - uint32_t nr_iter; - uint32_t timer_period_ms; - uint32_t migration_freq_ms; - uint32_t timer_err_margin_us; + u32 nr_vcpus; + u32 nr_iter; + u32 timer_period_ms; + u32 migration_freq_ms; + u32 timer_err_margin_us; /* Members of struct kvm_arm_counter_offset */ - uint64_t counter_offset; - uint64_t reserved; + u64 counter_offset; + u64 reserved; }; /* Shared variables between host and guest */ struct test_vcpu_shared_data { - uint32_t nr_iter; + u32 nr_iter; int guest_stage; - uint64_t xcnt; + u64 xcnt; }; extern struct test_args test_args; diff --git a/tools/testing/selftests/kvm/include/ucall_common.h b/tools/testing/selftests/kvm/include/ucall_common.h index d9d6581b8d4f..cbdcb0a50c4f 100644 --- a/tools/testing/selftests/kvm/include/ucall_common.h +++ b/tools/testing/selftests/kvm/include/ucall_common.h @@ -21,26 +21,26 @@ enum { #define UCALL_BUFFER_LEN 1024 struct ucall { - uint64_t cmd; - uint64_t args[UCALL_MAX_ARGS]; + u64 cmd; + u64 args[UCALL_MAX_ARGS]; char buffer[UCALL_BUFFER_LEN]; /* Host virtual address of this struct. */ struct ucall *hva; }; -void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa); -void ucall_arch_do_ucall(vm_vaddr_t uc); +void ucall_arch_init(struct kvm_vm *vm, gpa_t mmio_gpa); +void ucall_arch_do_ucall(gva_t uc); void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu); -void ucall(uint64_t cmd, int nargs, ...); -__printf(2, 3) void ucall_fmt(uint64_t cmd, const char *fmt, ...); -__printf(5, 6) void ucall_assert(uint64_t cmd, const char *exp, +void ucall(u64 cmd, int nargs, ...); +__printf(2, 3) void ucall_fmt(u64 cmd, const char *fmt, ...); +__printf(5, 6) void ucall_assert(u64 cmd, const char *exp, const char *file, unsigned int line, const char *fmt, ...); -uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc); -void ucall_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa); -int ucall_nr_pages_required(uint64_t page_size); +u64 get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc); +void ucall_init(struct kvm_vm *vm, gpa_t mmio_gpa); +int ucall_nr_pages_required(u64 page_size); /* * Perform userspace call without any associated data. This bare call avoids @@ -48,7 +48,7 @@ int ucall_nr_pages_required(uint64_t page_size); * the full ucall() are problematic and/or unwanted. Note, this will come out * as UCALL_NONE on the backend. */ -#define GUEST_UCALL_NONE() ucall_arch_do_ucall((vm_vaddr_t)NULL) +#define GUEST_UCALL_NONE() ucall_arch_do_ucall((gva_t)NULL) #define GUEST_SYNC_ARGS(stage, arg1, arg2, arg3, arg4) \ ucall(UCALL_SYNC, 6, "hello", stage, arg1, arg2, arg3, arg4) diff --git a/tools/testing/selftests/kvm/include/userfaultfd_util.h b/tools/testing/selftests/kvm/include/userfaultfd_util.h index 60f7f9d435dc..0bc1dc16600e 100644 --- a/tools/testing/selftests/kvm/include/userfaultfd_util.h +++ b/tools/testing/selftests/kvm/include/userfaultfd_util.h @@ -25,7 +25,7 @@ struct uffd_reader_args { struct uffd_desc { int uffd; - uint64_t num_readers; + u64 num_readers; /* Holds the write ends of the pipes for killing the readers. */ int *pipefds; pthread_t *readers; @@ -33,8 +33,8 @@ struct uffd_desc { }; struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay, - void *hva, uint64_t len, - uint64_t num_readers, + void *hva, u64 len, + u64 num_readers, uffd_handler_t handler); void uffd_stop_demand_paging(struct uffd_desc *uffd); diff --git a/tools/testing/selftests/kvm/include/x86/apic.h b/tools/testing/selftests/kvm/include/x86/apic.h index 80fe9f69b38d..31887bdc3d6c 100644 --- a/tools/testing/selftests/kvm/include/x86/apic.h +++ b/tools/testing/selftests/kvm/include/x86/apic.h @@ -28,10 +28,13 @@ #define GET_APIC_ID_FIELD(x) (((x) >> 24) & 0xFF) #define APIC_TASKPRI 0x80 #define APIC_PROCPRI 0xA0 +#define GET_APIC_PRI(x) (((x) & GENMASK(7, 4)) >> 4) +#define SET_APIC_PRI(x, y) (((x) & ~GENMASK(7, 4)) | (y << 4)) #define APIC_EOI 0xB0 #define APIC_SPIV 0xF0 #define APIC_SPIV_FOCUS_DISABLED (1 << 9) #define APIC_SPIV_APIC_ENABLED (1 << 8) +#define APIC_ISR 0x100 #define APIC_IRR 0x200 #define APIC_ICR 0x300 #define APIC_LVTCMCI 0x2f0 @@ -67,47 +70,51 @@ #define APIC_TMICT 0x380 #define APIC_TMCCT 0x390 #define APIC_TDCR 0x3E0 +#define APIC_SELF_IPI 0x3F0 + +#define APIC_VECTOR_TO_BIT_NUMBER(v) ((unsigned int)(v) % 32) +#define APIC_VECTOR_TO_REG_OFFSET(v) ((unsigned int)(v) / 32 * 0x10) void apic_disable(void); void xapic_enable(void); void x2apic_enable(void); -static inline uint32_t get_bsp_flag(void) +static inline u32 get_bsp_flag(void) { return rdmsr(MSR_IA32_APICBASE) & MSR_IA32_APICBASE_BSP; } -static inline uint32_t xapic_read_reg(unsigned int reg) +static inline u32 xapic_read_reg(unsigned int reg) { - return ((volatile uint32_t *)APIC_DEFAULT_GPA)[reg >> 2]; + return ((volatile u32 *)APIC_DEFAULT_GPA)[reg >> 2]; } -static inline void xapic_write_reg(unsigned int reg, uint32_t val) +static inline void xapic_write_reg(unsigned int reg, u32 val) { - ((volatile uint32_t *)APIC_DEFAULT_GPA)[reg >> 2] = val; + ((volatile u32 *)APIC_DEFAULT_GPA)[reg >> 2] = val; } -static inline uint64_t x2apic_read_reg(unsigned int reg) +static inline u64 x2apic_read_reg(unsigned int reg) { return rdmsr(APIC_BASE_MSR + (reg >> 4)); } -static inline uint8_t x2apic_write_reg_safe(unsigned int reg, uint64_t value) +static inline u8 x2apic_write_reg_safe(unsigned int reg, u64 value) { return wrmsr_safe(APIC_BASE_MSR + (reg >> 4), value); } -static inline void x2apic_write_reg(unsigned int reg, uint64_t value) +static inline void x2apic_write_reg(unsigned int reg, u64 value) { - uint8_t fault = x2apic_write_reg_safe(reg, value); + u8 fault = x2apic_write_reg_safe(reg, value); __GUEST_ASSERT(!fault, "Unexpected fault 0x%x on WRMSR(%x) = %lx\n", fault, APIC_BASE_MSR + (reg >> 4), value); } -static inline void x2apic_write_reg_fault(unsigned int reg, uint64_t value) +static inline void x2apic_write_reg_fault(unsigned int reg, u64 value) { - uint8_t fault = x2apic_write_reg_safe(reg, value); + u8 fault = x2apic_write_reg_safe(reg, value); __GUEST_ASSERT(fault == GP_VECTOR, "Wanted #GP on WRMSR(%x) = %lx, got 0x%x\n", diff --git a/tools/testing/selftests/kvm/include/x86/evmcs.h b/tools/testing/selftests/kvm/include/x86/evmcs.h index 5a74bb30e2f8..be79bda024bf 100644 --- a/tools/testing/selftests/kvm/include/x86/evmcs.h +++ b/tools/testing/selftests/kvm/include/x86/evmcs.h @@ -10,9 +10,9 @@ #include "hyperv.h" #include "vmx.h" -#define u16 uint16_t -#define u32 uint32_t -#define u64 uint64_t +#define u16 u16 +#define u32 u32 +#define u64 u64 #define EVMCS_VERSION 1 @@ -245,7 +245,7 @@ static inline void evmcs_enable(void) enable_evmcs = true; } -static inline int evmcs_vmptrld(uint64_t vmcs_pa, void *vmcs) +static inline int evmcs_vmptrld(u64 vmcs_pa, void *vmcs) { current_vp_assist->current_nested_vmcs = vmcs_pa; current_vp_assist->enlighten_vmentry = 1; @@ -265,7 +265,7 @@ static inline bool load_evmcs(struct hyperv_test_pages *hv) return true; } -static inline int evmcs_vmptrst(uint64_t *value) +static inline int evmcs_vmptrst(u64 *value) { *value = current_vp_assist->current_nested_vmcs & ~HV_X64_MSR_VP_ASSIST_PAGE_ENABLE; @@ -273,7 +273,7 @@ static inline int evmcs_vmptrst(uint64_t *value) return 0; } -static inline int evmcs_vmread(uint64_t encoding, uint64_t *value) +static inline int evmcs_vmread(u64 encoding, u64 *value) { switch (encoding) { case GUEST_RIP: @@ -672,7 +672,7 @@ static inline int evmcs_vmread(uint64_t encoding, uint64_t *value) return 0; } -static inline int evmcs_vmwrite(uint64_t encoding, uint64_t value) +static inline int evmcs_vmwrite(u64 encoding, u64 value) { switch (encoding) { case GUEST_RIP: @@ -1226,9 +1226,9 @@ static inline int evmcs_vmlaunch(void) "pop %%rbp;" : [ret]"=&a"(ret) : [host_rsp]"r" - ((uint64_t)¤t_evmcs->host_rsp), + ((u64)¤t_evmcs->host_rsp), [host_rip]"r" - ((uint64_t)¤t_evmcs->host_rip) + ((u64)¤t_evmcs->host_rip) : "memory", "cc", "rbx", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"); return ret; @@ -1265,9 +1265,9 @@ static inline int evmcs_vmresume(void) "pop %%rbp;" : [ret]"=&a"(ret) : [host_rsp]"r" - ((uint64_t)¤t_evmcs->host_rsp), + ((u64)¤t_evmcs->host_rsp), [host_rip]"r" - ((uint64_t)¤t_evmcs->host_rip) + ((u64)¤t_evmcs->host_rip) : "memory", "cc", "rbx", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"); return ret; diff --git a/tools/testing/selftests/kvm/include/x86/hyperv.h b/tools/testing/selftests/kvm/include/x86/hyperv.h index f13e532be240..78003f5a22f3 100644 --- a/tools/testing/selftests/kvm/include/x86/hyperv.h +++ b/tools/testing/selftests/kvm/include/x86/hyperv.h @@ -254,12 +254,12 @@ * Issue a Hyper-V hypercall. Returns exception vector raised or 0, 'hv_status' * is set to the hypercall status (if no exception occurred). */ -static inline uint8_t __hyperv_hypercall(u64 control, vm_vaddr_t input_address, - vm_vaddr_t output_address, - uint64_t *hv_status) +static inline u8 __hyperv_hypercall(u64 control, gva_t input_address, + gva_t output_address, + u64 *hv_status) { - uint64_t error_code; - uint8_t vector; + u64 error_code; + u8 vector; /* Note both the hypercall and the "asm safe" clobber r9-r11. */ asm volatile("mov %[output_address], %%r8\n\t" @@ -274,11 +274,11 @@ static inline uint8_t __hyperv_hypercall(u64 control, vm_vaddr_t input_address, } /* Issue a Hyper-V hypercall and assert that it succeeded. */ -static inline void hyperv_hypercall(u64 control, vm_vaddr_t input_address, - vm_vaddr_t output_address) +static inline void hyperv_hypercall(u64 control, gva_t input_address, + gva_t output_address) { - uint64_t hv_status; - uint8_t vector; + u64 hv_status; + u8 vector; vector = __hyperv_hypercall(control, input_address, output_address, &hv_status); @@ -327,27 +327,27 @@ struct hv_vp_assist_page { extern struct hv_vp_assist_page *current_vp_assist; -int enable_vp_assist(uint64_t vp_assist_pa, void *vp_assist); +int enable_vp_assist(u64 vp_assist_pa, void *vp_assist); struct hyperv_test_pages { /* VP assist page */ void *vp_assist_hva; - uint64_t vp_assist_gpa; + u64 vp_assist_gpa; void *vp_assist; /* Partition assist page */ void *partition_assist_hva; - uint64_t partition_assist_gpa; + u64 partition_assist_gpa; void *partition_assist; /* Enlightened VMCS */ void *enlightened_vmcs_hva; - uint64_t enlightened_vmcs_gpa; + u64 enlightened_vmcs_gpa; void *enlightened_vmcs; }; struct hyperv_test_pages *vcpu_alloc_hyperv_test_pages(struct kvm_vm *vm, - vm_vaddr_t *p_hv_pages_gva); + gva_t *p_hv_pages_gva); /* HV_X64_MSR_TSC_INVARIANT_CONTROL bits */ #define HV_INVARIANT_TSC_EXPOSED BIT_ULL(0) diff --git a/tools/testing/selftests/kvm/include/x86/kvm_util_arch.h b/tools/testing/selftests/kvm/include/x86/kvm_util_arch.h index 972bb1c4ab4c..c33ab6e04171 100644 --- a/tools/testing/selftests/kvm/include/x86/kvm_util_arch.h +++ b/tools/testing/selftests/kvm/include/x86/kvm_util_arch.h @@ -10,13 +10,35 @@ extern bool is_forced_emulation_enabled; +struct pte_masks { + u64 present; + u64 writable; + u64 user; + u64 readable; + u64 executable; + u64 accessed; + u64 dirty; + u64 huge; + u64 nx; + u64 c; + u64 s; + + u64 always_set; +}; + +struct kvm_mmu_arch { + struct pte_masks pte_masks; +}; + +struct kvm_mmu; + struct kvm_vm_arch { - vm_vaddr_t gdt; - vm_vaddr_t tss; - vm_vaddr_t idt; + gva_t gdt; + gva_t tss; + gva_t idt; - uint64_t c_bit; - uint64_t s_bit; + u64 c_bit; + u64 s_bit; int sev_fd; bool is_pt_protected; }; @@ -40,7 +62,7 @@ do { \ : "+m" (mem) \ : "r" (val) : "memory"); \ } else { \ - uint64_t __old = READ_ONCE(mem); \ + u64 __old = READ_ONCE(mem); \ \ __asm__ __volatile__(KVM_FEP LOCK_PREFIX "cmpxchg %[new], %[ptr]" \ : [ptr] "+m" (mem), [old] "+a" (__old) \ diff --git a/tools/testing/selftests/kvm/include/x86/pmu.h b/tools/testing/selftests/kvm/include/x86/pmu.h index 3c10c4dc0ae8..98537cc8840d 100644 --- a/tools/testing/selftests/kvm/include/x86/pmu.h +++ b/tools/testing/selftests/kvm/include/x86/pmu.h @@ -5,7 +5,10 @@ #ifndef SELFTEST_KVM_PMU_H #define SELFTEST_KVM_PMU_H -#include <stdint.h> +#include <stdbool.h> + +#include <linux/types.h> +#include <linux/bits.h> #define KVM_PMU_EVENT_FILTER_MAX_EVENTS 300 @@ -61,6 +64,11 @@ #define INTEL_ARCH_BRANCHES_RETIRED RAW_EVENT(0xc4, 0x00) #define INTEL_ARCH_BRANCHES_MISPREDICTED RAW_EVENT(0xc5, 0x00) #define INTEL_ARCH_TOPDOWN_SLOTS RAW_EVENT(0xa4, 0x01) +#define INTEL_ARCH_TOPDOWN_BE_BOUND RAW_EVENT(0xa4, 0x02) +#define INTEL_ARCH_TOPDOWN_BAD_SPEC RAW_EVENT(0x73, 0x00) +#define INTEL_ARCH_TOPDOWN_FE_BOUND RAW_EVENT(0x9c, 0x01) +#define INTEL_ARCH_TOPDOWN_RETIRING RAW_EVENT(0xc2, 0x02) +#define INTEL_ARCH_LBR_INSERTS RAW_EVENT(0xe4, 0x01) #define AMD_ZEN_CORE_CYCLES RAW_EVENT(0x76, 0x00) #define AMD_ZEN_INSTRUCTIONS_RETIRED RAW_EVENT(0xc0, 0x00) @@ -80,6 +88,11 @@ enum intel_pmu_architectural_events { INTEL_ARCH_BRANCHES_RETIRED_INDEX, INTEL_ARCH_BRANCHES_MISPREDICTED_INDEX, INTEL_ARCH_TOPDOWN_SLOTS_INDEX, + INTEL_ARCH_TOPDOWN_BE_BOUND_INDEX, + INTEL_ARCH_TOPDOWN_BAD_SPEC_INDEX, + INTEL_ARCH_TOPDOWN_FE_BOUND_INDEX, + INTEL_ARCH_TOPDOWN_RETIRING_INDEX, + INTEL_ARCH_LBR_INSERTS_INDEX, NR_INTEL_ARCH_EVENTS, }; @@ -91,7 +104,21 @@ enum amd_pmu_zen_events { NR_AMD_ZEN_EVENTS, }; -extern const uint64_t intel_pmu_arch_events[]; -extern const uint64_t amd_pmu_zen_events[]; +extern const u64 intel_pmu_arch_events[]; +extern const u64 amd_pmu_zen_events[]; + +enum pmu_errata { + INSTRUCTIONS_RETIRED_OVERCOUNT, + BRANCHES_RETIRED_OVERCOUNT, +}; + +extern u64 pmu_errata_mask; + +void kvm_init_pmu_errata(void); + +static inline bool this_pmu_has_errata(enum pmu_errata errata) +{ + return pmu_errata_mask & BIT_ULL(errata); +} #endif /* SELFTEST_KVM_PMU_H */ diff --git a/tools/testing/selftests/kvm/include/x86/processor.h b/tools/testing/selftests/kvm/include/x86/processor.h index d60da8966772..77f576ee7789 100644 --- a/tools/testing/selftests/kvm/include/x86/processor.h +++ b/tools/testing/selftests/kvm/include/x86/processor.h @@ -21,7 +21,9 @@ extern bool host_cpu_is_intel; extern bool host_cpu_is_amd; -extern uint64_t guest_tsc_khz; +extern bool host_cpu_is_hygon; +extern bool host_cpu_is_amd_compatible; +extern u64 guest_tsc_khz; #ifndef MAX_NR_CPUID_ENTRIES #define MAX_NR_CPUID_ENTRIES 100 @@ -34,6 +36,8 @@ extern uint64_t guest_tsc_khz; #define NMI_VECTOR 0x02 +const char *ex_str(int vector); + #define X86_EFLAGS_FIXED (1u << 1) #define X86_CR4_VME (1ul << 0) @@ -183,6 +187,9 @@ struct kvm_x86_cpu_feature { * Extended Leafs, a.k.a. AMD defined */ #define X86_FEATURE_SVM KVM_X86_CPU_FEATURE(0x80000001, 0, ECX, 2) +#define X86_FEATURE_PERFCTR_CORE KVM_X86_CPU_FEATURE(0x80000001, 0, ECX, 23) +#define X86_FEATURE_PERFCTR_NB KVM_X86_CPU_FEATURE(0x80000001, 0, ECX, 24) +#define X86_FEATURE_PERFCTR_LLC KVM_X86_CPU_FEATURE(0x80000001, 0, ECX, 28) #define X86_FEATURE_NX KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 20) #define X86_FEATURE_GBPAGES KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 26) #define X86_FEATURE_RDTSCP KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 27) @@ -196,9 +203,14 @@ struct kvm_x86_cpu_feature { #define X86_FEATURE_TSCRATEMSR KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 4) #define X86_FEATURE_PAUSEFILTER KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 10) #define X86_FEATURE_PFTHRESHOLD KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 12) +#define X86_FEATURE_V_VMSAVE_VMLOAD KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 15) #define X86_FEATURE_VGIF KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 16) +#define X86_FEATURE_IDLE_HLT KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 30) #define X86_FEATURE_SEV KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 1) #define X86_FEATURE_SEV_ES KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 3) +#define X86_FEATURE_SEV_SNP KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 4) +#define X86_FEATURE_PERFMON_V2 KVM_X86_CPU_FEATURE(0x80000022, 0, EAX, 0) +#define X86_FEATURE_LBR_PMC_FREEZE KVM_X86_CPU_FEATURE(0x80000022, 0, EAX, 2) /* * KVM defined paravirt features. @@ -258,7 +270,7 @@ struct kvm_x86_cpu_property { #define X86_PROPERTY_PMU_NR_GP_COUNTERS KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 8, 15) #define X86_PROPERTY_PMU_GP_COUNTERS_BIT_WIDTH KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 16, 23) #define X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 24, 31) -#define X86_PROPERTY_PMU_EVENTS_MASK KVM_X86_CPU_PROPERTY(0xa, 0, EBX, 0, 7) +#define X86_PROPERTY_PMU_EVENTS_MASK KVM_X86_CPU_PROPERTY(0xa, 0, EBX, 0, 12) #define X86_PROPERTY_PMU_FIXED_COUNTERS_BITMASK KVM_X86_CPU_PROPERTY(0xa, 0, ECX, 0, 31) #define X86_PROPERTY_PMU_NR_FIXED_COUNTERS KVM_X86_CPU_PROPERTY(0xa, 0, EDX, 0, 4) #define X86_PROPERTY_PMU_FIXED_COUNTERS_BIT_WIDTH KVM_X86_CPU_PROPERTY(0xa, 0, EDX, 5, 12) @@ -285,6 +297,8 @@ struct kvm_x86_cpu_property { #define X86_PROPERTY_GUEST_MAX_PHY_ADDR KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 16, 23) #define X86_PROPERTY_SEV_C_BIT KVM_X86_CPU_PROPERTY(0x8000001F, 0, EBX, 0, 5) #define X86_PROPERTY_PHYS_ADDR_REDUCTION KVM_X86_CPU_PROPERTY(0x8000001F, 0, EBX, 6, 11) +#define X86_PROPERTY_NR_PERFCTR_CORE KVM_X86_CPU_PROPERTY(0x80000022, 0, EBX, 0, 3) +#define X86_PROPERTY_NR_PERFCTR_NB KVM_X86_CPU_PROPERTY(0x80000022, 0, EBX, 10, 15) #define X86_PROPERTY_MAX_CENTAUR_LEAF KVM_X86_CPU_PROPERTY(0xC0000000, 0, EAX, 0, 31) @@ -323,6 +337,11 @@ struct kvm_x86_pmu_feature { #define X86_PMU_FEATURE_BRANCH_INSNS_RETIRED KVM_X86_PMU_FEATURE(EBX, 5) #define X86_PMU_FEATURE_BRANCHES_MISPREDICTED KVM_X86_PMU_FEATURE(EBX, 6) #define X86_PMU_FEATURE_TOPDOWN_SLOTS KVM_X86_PMU_FEATURE(EBX, 7) +#define X86_PMU_FEATURE_TOPDOWN_BE_BOUND KVM_X86_PMU_FEATURE(EBX, 8) +#define X86_PMU_FEATURE_TOPDOWN_BAD_SPEC KVM_X86_PMU_FEATURE(EBX, 9) +#define X86_PMU_FEATURE_TOPDOWN_FE_BOUND KVM_X86_PMU_FEATURE(EBX, 10) +#define X86_PMU_FEATURE_TOPDOWN_RETIRING KVM_X86_PMU_FEATURE(EBX, 11) +#define X86_PMU_FEATURE_LBR_INSERTS KVM_X86_PMU_FEATURE(EBX, 12) #define X86_PMU_FEATURE_INSNS_RETIRED_FIXED KVM_X86_PMU_FEATURE(ECX, 0) #define X86_PMU_FEATURE_CPU_CYCLES_FIXED KVM_X86_PMU_FEATURE(ECX, 1) @@ -346,16 +365,6 @@ static inline unsigned int x86_model(unsigned int eax) return ((eax >> 12) & 0xf0) | ((eax >> 4) & 0x0f); } -/* Page table bitfield declarations */ -#define PTE_PRESENT_MASK BIT_ULL(0) -#define PTE_WRITABLE_MASK BIT_ULL(1) -#define PTE_USER_MASK BIT_ULL(2) -#define PTE_ACCESSED_MASK BIT_ULL(5) -#define PTE_DIRTY_MASK BIT_ULL(6) -#define PTE_LARGE_MASK BIT_ULL(7) -#define PTE_GLOBAL_MASK BIT_ULL(8) -#define PTE_NX_MASK BIT_ULL(63) - #define PHYSICAL_PAGE_MASK GENMASK_ULL(51, 12) #define PAGE_SHIFT 12 @@ -390,17 +399,17 @@ struct gpr64_regs { }; struct desc64 { - uint16_t limit0; - uint16_t base0; + u16 limit0; + u16 base0; unsigned base1:8, type:4, s:1, dpl:2, p:1; unsigned limit1:4, avl:1, l:1, db:1, g:1, base2:8; - uint32_t base3; - uint32_t zero1; + u32 base3; + u32 zero1; } __attribute__((packed)); struct desc_ptr { - uint16_t size; - uint64_t address; + u16 size; + u64 address; } __attribute__((packed)); struct kvm_x86_state { @@ -418,16 +427,18 @@ struct kvm_x86_state { struct kvm_msrs msrs; }; -static inline uint64_t get_desc64_base(const struct desc64 *desc) +static inline u64 get_desc64_base(const struct desc64 *desc) { - return ((uint64_t)desc->base3 << 32) | - (desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24)); + return (u64)desc->base3 << 32 | + (u64)desc->base2 << 24 | + (u64)desc->base1 << 16 | + (u64)desc->base0; } -static inline uint64_t rdtsc(void) +static inline u64 rdtsc(void) { - uint32_t eax, edx; - uint64_t tsc_val; + u32 eax, edx; + u64 tsc_val; /* * The lfence is to wait (on Intel CPUs) until all previous * instructions have been executed. If software requires RDTSC to be @@ -435,39 +446,39 @@ static inline uint64_t rdtsc(void) * execute LFENCE immediately after RDTSC */ __asm__ __volatile__("lfence; rdtsc; lfence" : "=a"(eax), "=d"(edx)); - tsc_val = ((uint64_t)edx) << 32 | eax; + tsc_val = ((u64)edx) << 32 | eax; return tsc_val; } -static inline uint64_t rdtscp(uint32_t *aux) +static inline u64 rdtscp(u32 *aux) { - uint32_t eax, edx; + u32 eax, edx; __asm__ __volatile__("rdtscp" : "=a"(eax), "=d"(edx), "=c"(*aux)); - return ((uint64_t)edx) << 32 | eax; + return ((u64)edx) << 32 | eax; } -static inline uint64_t rdmsr(uint32_t msr) +static inline u64 rdmsr(u32 msr) { - uint32_t a, d; + u32 a, d; __asm__ __volatile__("rdmsr" : "=a"(a), "=d"(d) : "c"(msr) : "memory"); - return a | ((uint64_t) d << 32); + return a | ((u64)d << 32); } -static inline void wrmsr(uint32_t msr, uint64_t value) +static inline void wrmsr(u32 msr, u64 value) { - uint32_t a = value; - uint32_t d = value >> 32; + u32 a = value; + u32 d = value >> 32; __asm__ __volatile__("wrmsr" :: "a"(a), "d"(d), "c"(msr) : "memory"); } -static inline uint16_t inw(uint16_t port) +static inline u16 inw(u16 port) { - uint16_t tmp; + u16 tmp; __asm__ __volatile__("in %%dx, %%ax" : /* output */ "=a" (tmp) @@ -476,101 +487,124 @@ static inline uint16_t inw(uint16_t port) return tmp; } -static inline uint16_t get_es(void) +static inline u16 get_es(void) { - uint16_t es; + u16 es; __asm__ __volatile__("mov %%es, %[es]" : /* output */ [es]"=rm"(es)); return es; } -static inline uint16_t get_cs(void) +static inline u16 get_cs(void) { - uint16_t cs; + u16 cs; __asm__ __volatile__("mov %%cs, %[cs]" : /* output */ [cs]"=rm"(cs)); return cs; } -static inline uint16_t get_ss(void) +static inline u16 get_ss(void) { - uint16_t ss; + u16 ss; __asm__ __volatile__("mov %%ss, %[ss]" : /* output */ [ss]"=rm"(ss)); return ss; } -static inline uint16_t get_ds(void) +static inline u16 get_ds(void) { - uint16_t ds; + u16 ds; __asm__ __volatile__("mov %%ds, %[ds]" : /* output */ [ds]"=rm"(ds)); return ds; } -static inline uint16_t get_fs(void) +static inline u16 get_fs(void) { - uint16_t fs; + u16 fs; __asm__ __volatile__("mov %%fs, %[fs]" : /* output */ [fs]"=rm"(fs)); return fs; } -static inline uint16_t get_gs(void) +static inline u16 get_gs(void) { - uint16_t gs; + u16 gs; __asm__ __volatile__("mov %%gs, %[gs]" : /* output */ [gs]"=rm"(gs)); return gs; } -static inline uint16_t get_tr(void) +static inline u16 get_tr(void) { - uint16_t tr; + u16 tr; __asm__ __volatile__("str %[tr]" : /* output */ [tr]"=rm"(tr)); return tr; } -static inline uint64_t get_cr0(void) +static inline u64 get_cr0(void) { - uint64_t cr0; + u64 cr0; __asm__ __volatile__("mov %%cr0, %[cr0]" : /* output */ [cr0]"=r"(cr0)); return cr0; } -static inline uint64_t get_cr3(void) +static inline void set_cr0(u64 val) { - uint64_t cr3; + __asm__ __volatile__("mov %0, %%cr0" : : "r" (val) : "memory"); +} + +static inline u64 get_cr3(void) +{ + u64 cr3; __asm__ __volatile__("mov %%cr3, %[cr3]" : /* output */ [cr3]"=r"(cr3)); return cr3; } -static inline uint64_t get_cr4(void) +static inline void set_cr3(u64 val) +{ + __asm__ __volatile__("mov %0, %%cr3" : : "r" (val) : "memory"); +} + +static inline u64 get_cr4(void) { - uint64_t cr4; + u64 cr4; __asm__ __volatile__("mov %%cr4, %[cr4]" : /* output */ [cr4]"=r"(cr4)); return cr4; } -static inline void set_cr4(uint64_t val) +static inline void set_cr4(u64 val) { __asm__ __volatile__("mov %0, %%cr4" : : "r" (val) : "memory"); } +static inline u64 get_cr8(void) +{ + u64 cr8; + + __asm__ __volatile__("mov %%cr8, %[cr8]" : [cr8]"=r"(cr8)); + return cr8; +} + +static inline void set_cr8(u64 val) +{ + __asm__ __volatile__("mov %0, %%cr8" : : "r" (val) : "memory"); +} + static inline void set_idt(const struct desc_ptr *idt_desc) { __asm__ __volatile__("lidt %0"::"m"(*idt_desc)); @@ -617,14 +651,14 @@ static inline struct desc_ptr get_idt(void) return idt; } -static inline void outl(uint16_t port, uint32_t value) +static inline void outl(u16 port, u32 value) { __asm__ __volatile__("outl %%eax, %%dx" : : "d"(port), "a"(value)); } -static inline void __cpuid(uint32_t function, uint32_t index, - uint32_t *eax, uint32_t *ebx, - uint32_t *ecx, uint32_t *edx) +static inline void __cpuid(u32 function, u32 index, + u32 *eax, u32 *ebx, + u32 *ecx, u32 *edx) { *eax = function; *ecx = index; @@ -638,35 +672,35 @@ static inline void __cpuid(uint32_t function, uint32_t index, : "memory"); } -static inline void cpuid(uint32_t function, - uint32_t *eax, uint32_t *ebx, - uint32_t *ecx, uint32_t *edx) +static inline void cpuid(u32 function, + u32 *eax, u32 *ebx, + u32 *ecx, u32 *edx) { return __cpuid(function, 0, eax, ebx, ecx, edx); } -static inline uint32_t this_cpu_fms(void) +static inline u32 this_cpu_fms(void) { - uint32_t eax, ebx, ecx, edx; + u32 eax, ebx, ecx, edx; cpuid(1, &eax, &ebx, &ecx, &edx); return eax; } -static inline uint32_t this_cpu_family(void) +static inline u32 this_cpu_family(void) { return x86_family(this_cpu_fms()); } -static inline uint32_t this_cpu_model(void) +static inline u32 this_cpu_model(void) { return x86_model(this_cpu_fms()); } static inline bool this_cpu_vendor_string_is(const char *vendor) { - const uint32_t *chunk = (const uint32_t *)vendor; - uint32_t eax, ebx, ecx, edx; + const u32 *chunk = (const u32 *)vendor; + u32 eax, ebx, ecx, edx; cpuid(0, &eax, &ebx, &ecx, &edx); return (ebx == chunk[0] && edx == chunk[1] && ecx == chunk[2]); @@ -685,10 +719,14 @@ static inline bool this_cpu_is_amd(void) return this_cpu_vendor_string_is("AuthenticAMD"); } -static inline uint32_t __this_cpu_has(uint32_t function, uint32_t index, - uint8_t reg, uint8_t lo, uint8_t hi) +static inline bool this_cpu_is_hygon(void) { - uint32_t gprs[4]; + return this_cpu_vendor_string_is("HygonGenuine"); +} + +static inline u32 __this_cpu_has(u32 function, u32 index, u8 reg, u8 lo, u8 hi) +{ + u32 gprs[4]; __cpuid(function, index, &gprs[KVM_CPUID_EAX], &gprs[KVM_CPUID_EBX], @@ -703,7 +741,7 @@ static inline bool this_cpu_has(struct kvm_x86_cpu_feature feature) feature.reg, feature.bit, feature.bit); } -static inline uint32_t this_cpu_property(struct kvm_x86_cpu_property property) +static inline u32 this_cpu_property(struct kvm_x86_cpu_property property) { return __this_cpu_has(property.function, property.index, property.reg, property.lo_bit, property.hi_bit); @@ -711,7 +749,7 @@ static inline uint32_t this_cpu_property(struct kvm_x86_cpu_property property) static __always_inline bool this_cpu_has_p(struct kvm_x86_cpu_property property) { - uint32_t max_leaf; + u32 max_leaf; switch (property.function & 0xc0000000) { case 0: @@ -731,7 +769,7 @@ static __always_inline bool this_cpu_has_p(struct kvm_x86_cpu_property property) static inline bool this_pmu_has(struct kvm_x86_pmu_feature feature) { - uint32_t nr_bits; + u32 nr_bits; if (feature.f.reg == KVM_CPUID_EBX) { nr_bits = this_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH); @@ -743,13 +781,13 @@ static inline bool this_pmu_has(struct kvm_x86_pmu_feature feature) return nr_bits > feature.f.bit || this_cpu_has(feature.f); } -static __always_inline uint64_t this_cpu_supported_xcr0(void) +static __always_inline u64 this_cpu_supported_xcr0(void) { if (!this_cpu_has_p(X86_PROPERTY_SUPPORTED_XCR0_LO)) return 0; return this_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_LO) | - ((uint64_t)this_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_HI) << 32); + ((u64)this_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_HI) << 32); } typedef u32 __attribute__((vector_size(16))) sse128_t; @@ -828,7 +866,7 @@ static inline void cpu_relax(void) static inline void udelay(unsigned long usec) { - uint64_t start, now, cycles; + u64 start, now, cycles; GUEST_ASSERT(guest_tsc_khz); cycles = guest_tsc_khz / 1000 * usec; @@ -859,8 +897,8 @@ void kvm_x86_state_cleanup(struct kvm_x86_state *state); const struct kvm_msr_list *kvm_get_msr_index_list(void); const struct kvm_msr_list *kvm_get_feature_msr_index_list(void); -bool kvm_msr_is_in_save_restore_list(uint32_t msr_index); -uint64_t kvm_get_feature_msr(uint64_t msr_index); +bool kvm_msr_is_in_save_restore_list(u32 msr_index); +u64 kvm_get_feature_msr(u64 msr_index); static inline void vcpu_msrs_get(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs) @@ -915,20 +953,20 @@ static inline void vcpu_xcrs_set(struct kvm_vcpu *vcpu, struct kvm_xcrs *xcrs) } const struct kvm_cpuid_entry2 *get_cpuid_entry(const struct kvm_cpuid2 *cpuid, - uint32_t function, uint32_t index); + u32 function, u32 index); const struct kvm_cpuid2 *kvm_get_supported_cpuid(void); -static inline uint32_t kvm_cpu_fms(void) +static inline u32 kvm_cpu_fms(void) { return get_cpuid_entry(kvm_get_supported_cpuid(), 0x1, 0)->eax; } -static inline uint32_t kvm_cpu_family(void) +static inline u32 kvm_cpu_family(void) { return x86_family(kvm_cpu_fms()); } -static inline uint32_t kvm_cpu_model(void) +static inline u32 kvm_cpu_model(void) { return x86_model(kvm_cpu_fms()); } @@ -941,17 +979,17 @@ static inline bool kvm_cpu_has(struct kvm_x86_cpu_feature feature) return kvm_cpuid_has(kvm_get_supported_cpuid(), feature); } -uint32_t kvm_cpuid_property(const struct kvm_cpuid2 *cpuid, - struct kvm_x86_cpu_property property); +u32 kvm_cpuid_property(const struct kvm_cpuid2 *cpuid, + struct kvm_x86_cpu_property property); -static inline uint32_t kvm_cpu_property(struct kvm_x86_cpu_property property) +static inline u32 kvm_cpu_property(struct kvm_x86_cpu_property property) { return kvm_cpuid_property(kvm_get_supported_cpuid(), property); } static __always_inline bool kvm_cpu_has_p(struct kvm_x86_cpu_property property) { - uint32_t max_leaf; + u32 max_leaf; switch (property.function & 0xc0000000) { case 0: @@ -971,7 +1009,7 @@ static __always_inline bool kvm_cpu_has_p(struct kvm_x86_cpu_property property) static inline bool kvm_pmu_has(struct kvm_x86_pmu_feature feature) { - uint32_t nr_bits; + u32 nr_bits; if (feature.f.reg == KVM_CPUID_EBX) { nr_bits = kvm_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH); @@ -983,13 +1021,13 @@ static inline bool kvm_pmu_has(struct kvm_x86_pmu_feature feature) return nr_bits > feature.f.bit || kvm_cpu_has(feature.f); } -static __always_inline uint64_t kvm_cpu_supported_xcr0(void) +static __always_inline u64 kvm_cpu_supported_xcr0(void) { if (!kvm_cpu_has_p(X86_PROPERTY_SUPPORTED_XCR0_LO)) return 0; return kvm_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_LO) | - ((uint64_t)kvm_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_HI) << 32); + ((u64)kvm_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_HI) << 32); } static inline size_t kvm_cpuid2_size(int nr_entries) @@ -1023,8 +1061,8 @@ static inline void vcpu_get_cpuid(struct kvm_vcpu *vcpu) } static inline struct kvm_cpuid_entry2 *__vcpu_get_cpuid_entry(struct kvm_vcpu *vcpu, - uint32_t function, - uint32_t index) + u32 function, + u32 index) { TEST_ASSERT(vcpu->cpuid, "Must do vcpu_init_cpuid() first (or equivalent)"); @@ -1035,7 +1073,7 @@ static inline struct kvm_cpuid_entry2 *__vcpu_get_cpuid_entry(struct kvm_vcpu *v } static inline struct kvm_cpuid_entry2 *vcpu_get_cpuid_entry(struct kvm_vcpu *vcpu, - uint32_t function) + u32 function) { return __vcpu_get_cpuid_entry(vcpu, function, 0); } @@ -1065,10 +1103,10 @@ static inline void vcpu_set_cpuid(struct kvm_vcpu *vcpu) void vcpu_set_cpuid_property(struct kvm_vcpu *vcpu, struct kvm_x86_cpu_property property, - uint32_t value); -void vcpu_set_cpuid_maxphyaddr(struct kvm_vcpu *vcpu, uint8_t maxphyaddr); + u32 value); +void vcpu_set_cpuid_maxphyaddr(struct kvm_vcpu *vcpu, u8 maxphyaddr); -void vcpu_clear_cpuid_entry(struct kvm_vcpu *vcpu, uint32_t function); +void vcpu_clear_cpuid_entry(struct kvm_vcpu *vcpu, u32 function); static inline bool vcpu_cpuid_has(struct kvm_vcpu *vcpu, struct kvm_x86_cpu_feature feature) @@ -1096,8 +1134,8 @@ static inline void vcpu_clear_cpuid_feature(struct kvm_vcpu *vcpu, vcpu_set_or_clear_cpuid_feature(vcpu, feature, false); } -uint64_t vcpu_get_msr(struct kvm_vcpu *vcpu, uint64_t msr_index); -int _vcpu_set_msr(struct kvm_vcpu *vcpu, uint64_t msr_index, uint64_t msr_value); +u64 vcpu_get_msr(struct kvm_vcpu *vcpu, u64 msr_index); +int _vcpu_set_msr(struct kvm_vcpu *vcpu, u64 msr_index, u64 msr_value); /* * Assert on an MSR access(es) and pretty print the MSR name when possible. @@ -1122,14 +1160,14 @@ do { \ * is changing, etc. This is NOT an exhaustive list! The intent is to filter * out MSRs that are not durable _and_ that a selftest wants to write. */ -static inline bool is_durable_msr(uint32_t msr) +static inline bool is_durable_msr(u32 msr) { return msr != MSR_IA32_TSC; } #define vcpu_set_msr(vcpu, msr, val) \ do { \ - uint64_t r, v = val; \ + u64 r, v = val; \ \ TEST_ASSERT_MSR(_vcpu_set_msr(vcpu, msr, v) == 1, \ "KVM_SET_MSRS failed on %s, value = 0x%lx", msr, #msr, v); \ @@ -1141,36 +1179,41 @@ do { \ void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits); void kvm_init_vm_address_properties(struct kvm_vm *vm); -bool vm_is_unrestricted_guest(struct kvm_vm *vm); struct ex_regs { - uint64_t rax, rcx, rdx, rbx; - uint64_t rbp, rsi, rdi; - uint64_t r8, r9, r10, r11; - uint64_t r12, r13, r14, r15; - uint64_t vector; - uint64_t error_code; - uint64_t rip; - uint64_t cs; - uint64_t rflags; + u64 rax, rcx, rdx, rbx; + u64 rbp, rsi, rdi; + u64 r8, r9, r10, r11; + u64 r12, r13, r14, r15; + u64 vector; + u64 error_code; + u64 rip; + u64 cs; + u64 rflags; }; struct idt_entry { - uint16_t offset0; - uint16_t selector; - uint16_t ist : 3; - uint16_t : 5; - uint16_t type : 4; - uint16_t : 1; - uint16_t dpl : 2; - uint16_t p : 1; - uint16_t offset1; - uint32_t offset2; uint32_t reserved; + u16 offset0; + u16 selector; + u16 ist : 3; + u16 : 5; + u16 type : 4; + u16 : 1; + u16 dpl : 2; + u16 p : 1; + u16 offset1; + u32 offset2; u32 reserved; }; void vm_install_exception_handler(struct kvm_vm *vm, int vector, void (*handler)(struct ex_regs *)); +/* + * Exception fixup morphs #DE to an arbitrary magic vector so that '0' can be + * used to signal "no expcetion". + */ +#define KVM_MAGIC_DE_VECTOR 0xff + /* If a toddler were to say "abracadabra". */ #define KVM_EXCEPTION_MAGIC 0xabacadabaULL @@ -1218,8 +1261,8 @@ void vm_install_exception_handler(struct kvm_vm *vm, int vector, #define kvm_asm_safe(insn, inputs...) \ ({ \ - uint64_t ign_error_code; \ - uint8_t vector; \ + u64 ign_error_code; \ + u8 vector; \ \ asm volatile(KVM_ASM_SAFE(insn) \ : KVM_ASM_SAFE_OUTPUTS(vector, ign_error_code) \ @@ -1230,7 +1273,7 @@ void vm_install_exception_handler(struct kvm_vm *vm, int vector, #define kvm_asm_safe_ec(insn, error_code, inputs...) \ ({ \ - uint8_t vector; \ + u8 vector; \ \ asm volatile(KVM_ASM_SAFE(insn) \ : KVM_ASM_SAFE_OUTPUTS(vector, error_code) \ @@ -1241,10 +1284,10 @@ void vm_install_exception_handler(struct kvm_vm *vm, int vector, #define kvm_asm_safe_fep(insn, inputs...) \ ({ \ - uint64_t ign_error_code; \ - uint8_t vector; \ + u64 ign_error_code; \ + u8 vector; \ \ - asm volatile(KVM_ASM_SAFE(insn) \ + asm volatile(KVM_ASM_SAFE_FEP(insn) \ : KVM_ASM_SAFE_OUTPUTS(vector, ign_error_code) \ : inputs \ : KVM_ASM_SAFE_CLOBBERS); \ @@ -1253,7 +1296,7 @@ void vm_install_exception_handler(struct kvm_vm *vm, int vector, #define kvm_asm_safe_ec_fep(insn, error_code, inputs...) \ ({ \ - uint8_t vector; \ + u8 vector; \ \ asm volatile(KVM_ASM_SAFE_FEP(insn) \ : KVM_ASM_SAFE_OUTPUTS(vector, error_code) \ @@ -1263,11 +1306,11 @@ void vm_install_exception_handler(struct kvm_vm *vm, int vector, }) #define BUILD_READ_U64_SAFE_HELPER(insn, _fep, _FEP) \ -static inline uint8_t insn##_safe ##_fep(uint32_t idx, uint64_t *val) \ +static inline u8 insn##_safe ##_fep(u32 idx, u64 *val) \ { \ - uint64_t error_code; \ - uint8_t vector; \ - uint32_t a, d; \ + u64 error_code; \ + u8 vector; \ + u32 a, d; \ \ asm volatile(KVM_ASM_SAFE##_FEP(#insn) \ : "=a"(a), "=d"(d), \ @@ -1275,7 +1318,7 @@ static inline uint8_t insn##_safe ##_fep(uint32_t idx, uint64_t *val) \ : "c"(idx) \ : KVM_ASM_SAFE_CLOBBERS); \ \ - *val = (uint64_t)a | ((uint64_t)d << 32); \ + *val = (u64)a | ((u64)d << 32); \ return vector; \ } @@ -1291,12 +1334,12 @@ BUILD_READ_U64_SAFE_HELPERS(rdmsr) BUILD_READ_U64_SAFE_HELPERS(rdpmc) BUILD_READ_U64_SAFE_HELPERS(xgetbv) -static inline uint8_t wrmsr_safe(uint32_t msr, uint64_t val) +static inline u8 wrmsr_safe(u32 msr, u64 val) { return kvm_asm_safe("wrmsr", "a"(val & -1u), "d"(val >> 32), "c"(msr)); } -static inline uint8_t xsetbv_safe(uint32_t index, uint64_t value) +static inline u8 xsetbv_safe(u32 index, u64 value) { u32 eax = value; u32 edx = value >> 32; @@ -1306,6 +1349,26 @@ static inline uint8_t xsetbv_safe(uint32_t index, uint64_t value) bool kvm_is_tdp_enabled(void); +static inline bool get_kvm_intel_param_bool(const char *param) +{ + return kvm_get_module_param_bool("kvm_intel", param); +} + +static inline bool get_kvm_amd_param_bool(const char *param) +{ + return kvm_get_module_param_bool("kvm_amd", param); +} + +static inline int get_kvm_intel_param_integer(const char *param) +{ + return kvm_get_module_param_integer("kvm_intel", param); +} + +static inline int get_kvm_amd_param_integer(const char *param) +{ + return kvm_get_module_param_integer("kvm_amd", param); +} + static inline bool kvm_is_pmu_enabled(void) { return get_kvm_param_bool("enable_pmu"); @@ -1316,30 +1379,80 @@ static inline bool kvm_is_forced_emulation_enabled(void) return !!get_kvm_param_integer("force_emulation_prefix"); } -uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr, - int *level); -uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr); +static inline bool kvm_is_unrestricted_guest_enabled(void) +{ + return get_kvm_intel_param_bool("unrestricted_guest"); +} + +static inline bool kvm_is_ignore_msrs(void) +{ + return get_kvm_param_bool("ignore_msrs"); +} + +static inline bool kvm_is_lbrv_enabled(void) +{ + return !!get_kvm_amd_param_integer("lbrv"); +} + +u64 *vm_get_pte(struct kvm_vm *vm, gva_t gva); -uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2, - uint64_t a3); -uint64_t __xen_hypercall(uint64_t nr, uint64_t a0, void *a1); -void xen_hypercall(uint64_t nr, uint64_t a0, void *a1); +u64 kvm_hypercall(u64 nr, u64 a0, u64 a1, u64 a2, u64 a3); +u64 __xen_hypercall(u64 nr, u64 a0, void *a1); +void xen_hypercall(u64 nr, u64 a0, void *a1); -static inline uint64_t __kvm_hypercall_map_gpa_range(uint64_t gpa, - uint64_t size, uint64_t flags) +static inline u64 __kvm_hypercall_map_gpa_range(gpa_t gpa, u64 size, u64 flags) { return kvm_hypercall(KVM_HC_MAP_GPA_RANGE, gpa, size >> PAGE_SHIFT, flags, 0); } -static inline void kvm_hypercall_map_gpa_range(uint64_t gpa, uint64_t size, - uint64_t flags) +static inline void kvm_hypercall_map_gpa_range(gpa_t gpa, u64 size, u64 flags) { - uint64_t ret = __kvm_hypercall_map_gpa_range(gpa, size, flags); + u64 ret = __kvm_hypercall_map_gpa_range(gpa, size, flags); GUEST_ASSERT(!ret); } -void __vm_xsave_require_permission(uint64_t xfeature, const char *name); +/* + * Execute HLT in an STI interrupt shadow to ensure that a pending IRQ that's + * intended to be a wake event arrives *after* HLT is executed. Modern CPUs, + * except for a few oddballs that KVM is unlikely to run on, block IRQs for one + * instruction after STI, *if* RFLAGS.IF=0 before STI. Note, Intel CPUs may + * block other events beyond regular IRQs, e.g. may block NMIs and SMIs too. + */ +static inline void safe_halt(void) +{ + asm volatile("sti; hlt"); +} + +/* + * Enable interrupts and ensure that interrupts are evaluated upon return from + * this function, i.e. execute a nop to consume the STi interrupt shadow. + */ +static inline void sti_nop(void) +{ + asm volatile ("sti; nop"); +} + +/* + * Enable interrupts for one instruction (nop), to allow the CPU to process all + * interrupts that are already pending. + */ +static inline void sti_nop_cli(void) +{ + asm volatile ("sti; nop; cli"); +} + +static inline void sti(void) +{ + asm volatile("sti"); +} + +static inline void cli(void) +{ + asm volatile ("cli"); +} + +void __vm_xsave_require_permission(u64 xfeature, const char *name); #define vm_xsave_require_permission(xfeature) \ __vm_xsave_require_permission(xfeature, #xfeature) @@ -1350,7 +1463,7 @@ enum pg_level { PG_LEVEL_2M, PG_LEVEL_1G, PG_LEVEL_512G, - PG_LEVEL_NUM + PG_LEVEL_256T }; #define PG_LEVEL_SHIFT(_level) ((_level - 1) * 9 + 12) @@ -1360,9 +1473,51 @@ enum pg_level { #define PG_SIZE_2M PG_LEVEL_SIZE(PG_LEVEL_2M) #define PG_SIZE_1G PG_LEVEL_SIZE(PG_LEVEL_1G) -void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level); -void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, - uint64_t nr_bytes, int level); +#define PTE_PRESENT_MASK(mmu) ((mmu)->arch.pte_masks.present) +#define PTE_WRITABLE_MASK(mmu) ((mmu)->arch.pte_masks.writable) +#define PTE_USER_MASK(mmu) ((mmu)->arch.pte_masks.user) +#define PTE_READABLE_MASK(mmu) ((mmu)->arch.pte_masks.readable) +#define PTE_EXECUTABLE_MASK(mmu) ((mmu)->arch.pte_masks.executable) +#define PTE_ACCESSED_MASK(mmu) ((mmu)->arch.pte_masks.accessed) +#define PTE_DIRTY_MASK(mmu) ((mmu)->arch.pte_masks.dirty) +#define PTE_HUGE_MASK(mmu) ((mmu)->arch.pte_masks.huge) +#define PTE_NX_MASK(mmu) ((mmu)->arch.pte_masks.nx) +#define PTE_C_BIT_MASK(mmu) ((mmu)->arch.pte_masks.c) +#define PTE_S_BIT_MASK(mmu) ((mmu)->arch.pte_masks.s) +#define PTE_ALWAYS_SET_MASK(mmu) ((mmu)->arch.pte_masks.always_set) + +/* + * For PTEs without a PRESENT bit (i.e. EPT entries), treat the PTE as present + * if it's executable or readable, as EPT supports execute-only PTEs, but not + * write-only PTEs. + */ +#define is_present_pte(mmu, pte) \ + (PTE_PRESENT_MASK(mmu) ? \ + !!(*(pte) & PTE_PRESENT_MASK(mmu)) : \ + !!(*(pte) & (PTE_READABLE_MASK(mmu) | PTE_EXECUTABLE_MASK(mmu)))) +#define is_executable_pte(mmu, pte) \ + ((*(pte) & (PTE_EXECUTABLE_MASK(mmu) | PTE_NX_MASK(mmu))) == PTE_EXECUTABLE_MASK(mmu)) +#define is_writable_pte(mmu, pte) (!!(*(pte) & PTE_WRITABLE_MASK(mmu))) +#define is_user_pte(mmu, pte) (!!(*(pte) & PTE_USER_MASK(mmu))) +#define is_accessed_pte(mmu, pte) (!!(*(pte) & PTE_ACCESSED_MASK(mmu))) +#define is_dirty_pte(mmu, pte) (!!(*(pte) & PTE_DIRTY_MASK(mmu))) +#define is_huge_pte(mmu, pte) (!!(*(pte) & PTE_HUGE_MASK(mmu))) +#define is_nx_pte(mmu, pte) (!is_executable_pte(mmu, pte)) + +void tdp_mmu_init(struct kvm_vm *vm, int pgtable_levels, + struct pte_masks *pte_masks); + +void __virt_pg_map(struct kvm_vm *vm, struct kvm_mmu *mmu, gva_t gva, + gpa_t gpa, int level); +void virt_map_level(struct kvm_vm *vm, gva_t gva, gpa_t gpa, + u64 nr_bytes, int level); + +void vm_enable_tdp(struct kvm_vm *vm); +bool kvm_cpu_has_tdp(void); +void tdp_map(struct kvm_vm *vm, gpa_t l2_gpa, gpa_t gpa, u64 size); +void tdp_identity_map_default_memslots(struct kvm_vm *vm); +void tdp_identity_map_1g(struct kvm_vm *vm, u64 addr, u64 size); +u64 *tdp_get_pte(struct kvm_vm *vm, u64 l2_gpa); /* * Basic CPU control in CR0 diff --git a/tools/testing/selftests/kvm/include/x86/sev.h b/tools/testing/selftests/kvm/include/x86/sev.h index 82c11c81a956..1af44c151d60 100644 --- a/tools/testing/selftests/kvm/include/x86/sev.h +++ b/tools/testing/selftests/kvm/include/x86/sev.h @@ -25,19 +25,51 @@ enum sev_guest_state { #define SEV_POLICY_NO_DBG (1UL << 0) #define SEV_POLICY_ES (1UL << 2) +#define SNP_POLICY_SMT (1ULL << 16) +#define SNP_POLICY_RSVD_MBO (1ULL << 17) +#define SNP_POLICY_DBG (1ULL << 19) + #define GHCB_MSR_TERM_REQ 0x100 -void sev_vm_launch(struct kvm_vm *vm, uint32_t policy); -void sev_vm_launch_measure(struct kvm_vm *vm, uint8_t *measurement); +static inline bool is_sev_snp_vm(struct kvm_vm *vm) +{ + return vm->type == KVM_X86_SNP_VM; +} + +static inline bool is_sev_es_vm(struct kvm_vm *vm) +{ + return is_sev_snp_vm(vm) || vm->type == KVM_X86_SEV_ES_VM; +} + +static inline bool is_sev_vm(struct kvm_vm *vm) +{ + return is_sev_es_vm(vm) || vm->type == KVM_X86_SEV_VM; +} + +void sev_vm_launch(struct kvm_vm *vm, u32 policy); +void sev_vm_launch_measure(struct kvm_vm *vm, u8 *measurement); void sev_vm_launch_finish(struct kvm_vm *vm); +void snp_vm_launch_start(struct kvm_vm *vm, u64 policy); +void snp_vm_launch_update(struct kvm_vm *vm); +void snp_vm_launch_finish(struct kvm_vm *vm); -struct kvm_vm *vm_sev_create_with_one_vcpu(uint32_t type, void *guest_code, +struct kvm_vm *vm_sev_create_with_one_vcpu(u32 type, void *guest_code, struct kvm_vcpu **cpu); -void vm_sev_launch(struct kvm_vm *vm, uint32_t policy, uint8_t *measurement); +void vm_sev_launch(struct kvm_vm *vm, u64 policy, u8 *measurement); kvm_static_assert(SEV_RET_SUCCESS == 0); /* + * A SEV-SNP VM requires the policy reserved bit to always be set. + * The SMT policy bit is also required to be set based on SMT being + * available and active on the system. + */ +static inline u64 snp_default_policy(void) +{ + return SNP_POLICY_RSVD_MBO | (is_smt_on() ? SNP_POLICY_SMT : 0); +} + +/* * The KVM_MEMORY_ENCRYPT_OP uAPI is utter garbage and takes an "unsigned long" * instead of a proper struct. The size of the parameter is embedded in the * ioctl number, i.e. is ABI and thus immutable. Hack around the mess by @@ -53,7 +85,7 @@ kvm_static_assert(SEV_RET_SUCCESS == 0); unsigned long raw; \ } sev_cmd = { .c = { \ .id = (cmd), \ - .data = (uint64_t)(arg), \ + .data = (u64)(arg), \ .sev_fd = (vm)->arch.sev_fd, \ } }; \ \ @@ -70,6 +102,12 @@ kvm_static_assert(SEV_RET_SUCCESS == 0); void sev_vm_init(struct kvm_vm *vm); void sev_es_vm_init(struct kvm_vm *vm); +void snp_vm_init(struct kvm_vm *vm); + +static inline void vmgexit(void) +{ + __asm__ __volatile__("rep; vmmcall"); +} static inline void sev_register_encrypted_memory(struct kvm_vm *vm, struct userspace_mem_region *region) @@ -82,8 +120,8 @@ static inline void sev_register_encrypted_memory(struct kvm_vm *vm, vm_ioctl(vm, KVM_MEMORY_ENCRYPT_REG_REGION, &range); } -static inline void sev_launch_update_data(struct kvm_vm *vm, vm_paddr_t gpa, - uint64_t size) +static inline void sev_launch_update_data(struct kvm_vm *vm, gpa_t gpa, + u64 size) { struct kvm_sev_launch_update_data update_data = { .uaddr = (unsigned long)addr_gpa2hva(vm, gpa), @@ -93,4 +131,17 @@ static inline void sev_launch_update_data(struct kvm_vm *vm, vm_paddr_t gpa, vm_sev_ioctl(vm, KVM_SEV_LAUNCH_UPDATE_DATA, &update_data); } +static inline void snp_launch_update_data(struct kvm_vm *vm, gpa_t gpa, + u64 hva, u64 size, u8 type) +{ + struct kvm_sev_snp_launch_update update_data = { + .uaddr = hva, + .gfn_start = gpa >> PAGE_SHIFT, + .len = size, + .type = type, + }; + + vm_sev_ioctl(vm, KVM_SEV_SNP_LAUNCH_UPDATE, &update_data); +} + #endif /* SELFTEST_KVM_SEV_H */ diff --git a/tools/testing/selftests/kvm/include/x86/smm.h b/tools/testing/selftests/kvm/include/x86/smm.h new file mode 100644 index 000000000000..2d1afa09819b --- /dev/null +++ b/tools/testing/selftests/kvm/include/x86/smm.h @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: GPL-2.0-only +#ifndef SELFTEST_KVM_SMM_H +#define SELFTEST_KVM_SMM_H + +#include "kvm_util.h" + +#define SMRAM_SIZE 65536 +#define SMRAM_MEMSLOT ((1 << 16) | 1) +#define SMRAM_PAGES (SMRAM_SIZE / PAGE_SIZE) + +void setup_smram(struct kvm_vm *vm, struct kvm_vcpu *vcpu, u64 smram_gpa, + const void *smi_handler, size_t handler_size); + +void inject_smi(struct kvm_vcpu *vcpu); + +#endif /* SELFTEST_KVM_SMM_H */ diff --git a/tools/testing/selftests/kvm/include/x86/svm.h b/tools/testing/selftests/kvm/include/x86/svm.h index 29cffd0a9181..c8539166270e 100644 --- a/tools/testing/selftests/kvm/include/x86/svm.h +++ b/tools/testing/selftests/kvm/include/x86/svm.h @@ -92,19 +92,18 @@ struct __attribute__ ((__packed__)) vmcb_control_area { u32 int_vector; u32 int_state; u8 reserved_3[4]; - u32 exit_code; - u32 exit_code_hi; + u64 exit_code; u64 exit_info_1; u64 exit_info_2; u32 exit_int_info; u32 exit_int_info_err; - u64 nested_ctl; + u64 misc_ctl; u64 avic_vapic_bar; u8 reserved_4[8]; u32 event_inj; u32 event_inj_err; u64 nested_cr3; - u64 virt_ext; + u64 misc_ctl2; u32 clean; u32 reserved_5; u64 next_rip; @@ -156,9 +155,6 @@ struct __attribute__ ((__packed__)) vmcb_control_area { #define AVIC_ENABLE_SHIFT 31 #define AVIC_ENABLE_MASK (1 << AVIC_ENABLE_SHIFT) -#define LBR_CTL_ENABLE_MASK BIT_ULL(0) -#define VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK BIT_ULL(1) - #define SVM_INTERRUPT_SHADOW_MASK 1 #define SVM_IOIO_STR_SHIFT 2 @@ -176,8 +172,11 @@ struct __attribute__ ((__packed__)) vmcb_control_area { #define SVM_VM_CR_SVM_LOCK_MASK 0x0008ULL #define SVM_VM_CR_SVM_DIS_MASK 0x0010ULL -#define SVM_NESTED_CTL_NP_ENABLE BIT(0) -#define SVM_NESTED_CTL_SEV_ENABLE BIT(1) +#define SVM_MISC_ENABLE_NP BIT(0) +#define SVM_MISC_ENABLE_SEV BIT(1) + +#define SVM_MISC2_ENABLE_V_LBR BIT_ULL(0) +#define SVM_MISC2_ENABLE_V_VMLOAD_VMSAVE BIT_ULL(1) struct __attribute__ ((__packed__)) vmcb_seg { u16 selector; diff --git a/tools/testing/selftests/kvm/include/x86/svm_util.h b/tools/testing/selftests/kvm/include/x86/svm_util.h index b74c6dcddcbd..6c013eb838be 100644 --- a/tools/testing/selftests/kvm/include/x86/svm_util.h +++ b/tools/testing/selftests/kvm/include/x86/svm_util.h @@ -16,17 +16,20 @@ struct svm_test_data { /* VMCB */ struct vmcb *vmcb; /* gva */ void *vmcb_hva; - uint64_t vmcb_gpa; + u64 vmcb_gpa; /* host state-save area */ struct vmcb_save_area *save_area; /* gva */ void *save_area_hva; - uint64_t save_area_gpa; + u64 save_area_gpa; /* MSR-Bitmap */ void *msr; /* gva */ void *msr_hva; - uint64_t msr_gpa; + u64 msr_gpa; + + /* NPT */ + u64 ncr3_gpa; }; static inline void vmmcall(void) @@ -53,9 +56,15 @@ static inline void vmmcall(void) "clgi\n" \ ) -struct svm_test_data *vcpu_alloc_svm(struct kvm_vm *vm, vm_vaddr_t *p_svm_gva); +struct svm_test_data *vcpu_alloc_svm(struct kvm_vm *vm, gva_t *p_svm_gva); void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_rsp); -void run_guest(struct vmcb *vmcb, uint64_t vmcb_gpa); +void run_guest(struct vmcb *vmcb, u64 vmcb_gpa); + +static inline bool kvm_cpu_has_npt(void) +{ + return kvm_cpu_has(X86_FEATURE_NPT); +} +void vm_enable_npt(struct kvm_vm *vm); int open_sev_dev_path_or_exit(void); diff --git a/tools/testing/selftests/kvm/include/x86/ucall.h b/tools/testing/selftests/kvm/include/x86/ucall.h index d3825dcc3cd9..0e4950041e3e 100644 --- a/tools/testing/selftests/kvm/include/x86/ucall.h +++ b/tools/testing/selftests/kvm/include/x86/ucall.h @@ -6,7 +6,7 @@ #define UCALL_EXIT_REASON KVM_EXIT_IO -static inline void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) +static inline void ucall_arch_init(struct kvm_vm *vm, gpa_t mmio_gpa) { } diff --git a/tools/testing/selftests/kvm/include/x86/vmx.h b/tools/testing/selftests/kvm/include/x86/vmx.h index edb3c391b982..90fffaf91595 100644 --- a/tools/testing/selftests/kvm/include/x86/vmx.h +++ b/tools/testing/selftests/kvm/include/x86/vmx.h @@ -285,16 +285,16 @@ enum vmcs_field { }; struct vmx_msr_entry { - uint32_t index; - uint32_t reserved; - uint64_t value; + u32 index; + u32 reserved; + u64 value; } __attribute__ ((aligned(16))); #include "evmcs.h" -static inline int vmxon(uint64_t phys) +static inline int vmxon(u64 phys) { - uint8_t ret; + u8 ret; __asm__ __volatile__ ("vmxon %[pa]; setna %[ret]" : [ret]"=rm"(ret) @@ -309,9 +309,9 @@ static inline void vmxoff(void) __asm__ __volatile__("vmxoff"); } -static inline int vmclear(uint64_t vmcs_pa) +static inline int vmclear(u64 vmcs_pa) { - uint8_t ret; + u8 ret; __asm__ __volatile__ ("vmclear %[pa]; setna %[ret]" : [ret]"=rm"(ret) @@ -321,9 +321,9 @@ static inline int vmclear(uint64_t vmcs_pa) return ret; } -static inline int vmptrld(uint64_t vmcs_pa) +static inline int vmptrld(u64 vmcs_pa) { - uint8_t ret; + u8 ret; if (enable_evmcs) return -1; @@ -336,10 +336,10 @@ static inline int vmptrld(uint64_t vmcs_pa) return ret; } -static inline int vmptrst(uint64_t *value) +static inline int vmptrst(u64 *value) { - uint64_t tmp; - uint8_t ret; + u64 tmp; + u8 ret; if (enable_evmcs) return evmcs_vmptrst(value); @@ -356,9 +356,9 @@ static inline int vmptrst(uint64_t *value) * A wrapper around vmptrst that ignores errors and returns zero if the * vmptrst instruction fails. */ -static inline uint64_t vmptrstz(void) +static inline u64 vmptrstz(void) { - uint64_t value = 0; + u64 value = 0; vmptrst(&value); return value; } @@ -391,8 +391,8 @@ static inline int vmlaunch(void) "pop %%rcx;" "pop %%rbp;" : [ret]"=&a"(ret) - : [host_rsp]"r"((uint64_t)HOST_RSP), - [host_rip]"r"((uint64_t)HOST_RIP) + : [host_rsp]"r"((u64)HOST_RSP), + [host_rip]"r"((u64)HOST_RIP) : "memory", "cc", "rbx", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"); return ret; @@ -426,8 +426,8 @@ static inline int vmresume(void) "pop %%rcx;" "pop %%rbp;" : [ret]"=&a"(ret) - : [host_rsp]"r"((uint64_t)HOST_RSP), - [host_rip]"r"((uint64_t)HOST_RIP) + : [host_rsp]"r"((u64)HOST_RSP), + [host_rip]"r"((u64)HOST_RIP) : "memory", "cc", "rbx", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"); return ret; @@ -447,10 +447,10 @@ static inline void vmcall(void) "r10", "r11", "r12", "r13", "r14", "r15"); } -static inline int vmread(uint64_t encoding, uint64_t *value) +static inline int vmread(u64 encoding, u64 *value) { - uint64_t tmp; - uint8_t ret; + u64 tmp; + u8 ret; if (enable_evmcs) return evmcs_vmread(encoding, value); @@ -468,16 +468,16 @@ static inline int vmread(uint64_t encoding, uint64_t *value) * A wrapper around vmread that ignores errors and returns zero if the * vmread instruction fails. */ -static inline uint64_t vmreadz(uint64_t encoding) +static inline u64 vmreadz(u64 encoding) { - uint64_t value = 0; + u64 value = 0; vmread(encoding, &value); return value; } -static inline int vmwrite(uint64_t encoding, uint64_t value) +static inline int vmwrite(u64 encoding, u64 value) { - uint8_t ret; + u8 ret; if (enable_evmcs) return evmcs_vmwrite(encoding, value); @@ -490,43 +490,41 @@ static inline int vmwrite(uint64_t encoding, uint64_t value) return ret; } -static inline uint32_t vmcs_revision(void) +static inline u32 vmcs_revision(void) { return rdmsr(MSR_IA32_VMX_BASIC); } struct vmx_pages { void *vmxon_hva; - uint64_t vmxon_gpa; + u64 vmxon_gpa; void *vmxon; void *vmcs_hva; - uint64_t vmcs_gpa; + u64 vmcs_gpa; void *vmcs; void *msr_hva; - uint64_t msr_gpa; + u64 msr_gpa; void *msr; void *shadow_vmcs_hva; - uint64_t shadow_vmcs_gpa; + u64 shadow_vmcs_gpa; void *shadow_vmcs; void *vmread_hva; - uint64_t vmread_gpa; + u64 vmread_gpa; void *vmread; void *vmwrite_hva; - uint64_t vmwrite_gpa; + u64 vmwrite_gpa; void *vmwrite; - void *eptp_hva; - uint64_t eptp_gpa; - void *eptp; - void *apic_access_hva; - uint64_t apic_access_gpa; + u64 apic_access_gpa; void *apic_access; + + u64 eptp_gpa; }; union vmx_basic { @@ -552,24 +550,15 @@ union vmx_ctrl_msr { }; }; -struct vmx_pages *vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva); +struct vmx_pages *vcpu_alloc_vmx(struct kvm_vm *vm, gva_t *p_vmx_gva); bool prepare_for_vmx_operation(struct vmx_pages *vmx); void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp); bool load_vmcs(struct vmx_pages *vmx); bool ept_1g_pages_supported(void); -void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm, - uint64_t nested_paddr, uint64_t paddr); -void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm, - uint64_t nested_paddr, uint64_t paddr, uint64_t size); -void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm, - uint32_t memslot); -void nested_identity_map_1g(struct vmx_pages *vmx, struct kvm_vm *vm, - uint64_t addr, uint64_t size); bool kvm_cpu_has_ept(void); -void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm, - uint32_t eptp_memslot); +void vm_enable_ept(struct kvm_vm *vm); void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm); #endif /* SELFTEST_KVM_VMX_H */ diff --git a/tools/testing/selftests/kvm/irqfd_test.c b/tools/testing/selftests/kvm/irqfd_test.c new file mode 100644 index 000000000000..5d7590d01868 --- /dev/null +++ b/tools/testing/selftests/kvm/irqfd_test.c @@ -0,0 +1,143 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <errno.h> +#include <pthread.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <signal.h> +#include <stdint.h> +#include <sys/sysinfo.h> + +#include "kvm_util.h" + +static struct kvm_vm *vm1; +static struct kvm_vm *vm2; +static int __eventfd; +static bool done; + +/* + * KVM de-assigns based on eventfd *and* GSI, but requires unique eventfds when + * assigning (the API isn't symmetrical). Abuse the oddity and use a per-task + * GSI base to avoid false failures due to cross-task de-assign, i.e. so that + * the secondary doesn't de-assign the primary's eventfd and cause assign to + * unexpectedly succeed on the primary. + */ +#define GSI_BASE_PRIMARY 0x20 +#define GSI_BASE_SECONDARY 0x30 + +static void juggle_eventfd_secondary(struct kvm_vm *vm, int eventfd) +{ + int r, i; + + /* + * The secondary task can encounter EBADF since the primary can close + * the eventfd at any time. And because the primary can recreate the + * eventfd, at the safe fd in the file table, the secondary can also + * encounter "unexpected" success, e.g. if the close+recreate happens + * between the first and second assignments. The secondary's role is + * mostly to antagonize KVM, not to detect bugs. + */ + for (i = 0; i < 2; i++) { + r = __kvm_irqfd(vm, GSI_BASE_SECONDARY, eventfd, 0); + TEST_ASSERT(!r || errno == EBUSY || errno == EBADF, + "Wanted success, EBUSY, or EBADF, r = %d, errno = %d", + r, errno); + + /* De-assign should succeed unless the eventfd was closed. */ + r = __kvm_irqfd(vm, GSI_BASE_SECONDARY + i, eventfd, KVM_IRQFD_FLAG_DEASSIGN); + TEST_ASSERT(!r || errno == EBADF, + "De-assign should succeed unless the fd was closed"); + } +} + +static void *secondary_irqfd_juggler(void *ign) +{ + while (!READ_ONCE(done)) { + juggle_eventfd_secondary(vm1, READ_ONCE(__eventfd)); + juggle_eventfd_secondary(vm2, READ_ONCE(__eventfd)); + } + + return NULL; +} + +static void juggle_eventfd_primary(struct kvm_vm *vm, int eventfd) +{ + int r1, r2; + + /* + * At least one of the assigns should fail. KVM disallows assigning a + * single eventfd to multiple GSIs (or VMs), so it's possible that both + * assignments can fail, too. + */ + r1 = __kvm_irqfd(vm, GSI_BASE_PRIMARY, eventfd, 0); + TEST_ASSERT(!r1 || errno == EBUSY, + "Wanted success or EBUSY, r = %d, errno = %d", r1, errno); + + r2 = __kvm_irqfd(vm, GSI_BASE_PRIMARY + 1, eventfd, 0); + TEST_ASSERT(r1 || (r2 && errno == EBUSY), + "Wanted failure (EBUSY), r1 = %d, r2 = %d, errno = %d", + r1, r2, errno); + + /* + * De-assign should always succeed, even if the corresponding assign + * failed. + */ + kvm_irqfd(vm, GSI_BASE_PRIMARY, eventfd, KVM_IRQFD_FLAG_DEASSIGN); + kvm_irqfd(vm, GSI_BASE_PRIMARY + 1, eventfd, KVM_IRQFD_FLAG_DEASSIGN); +} + +int main(int argc, char *argv[]) +{ + pthread_t racing_thread; + struct kvm_vcpu *unused; + int r, i; + + TEST_REQUIRE(kvm_arch_has_default_irqchip()); + + /* + * Create "full" VMs, as KVM_IRQFD requires an in-kernel IRQ chip. Also + * create an unused vCPU as certain architectures (like arm64) need to + * complete IRQ chip initialization after all possible vCPUs for a VM + * have been created. + */ + vm1 = vm_create_with_one_vcpu(&unused, NULL); + vm2 = vm_create_with_one_vcpu(&unused, NULL); + + WRITE_ONCE(__eventfd, kvm_new_eventfd()); + + kvm_irqfd(vm1, 10, __eventfd, 0); + + r = __kvm_irqfd(vm1, 11, __eventfd, 0); + TEST_ASSERT(r && errno == EBUSY, + "Wanted EBUSY, r = %d, errno = %d", r, errno); + + r = __kvm_irqfd(vm2, 12, __eventfd, 0); + TEST_ASSERT(r && errno == EBUSY, + "Wanted EBUSY, r = %d, errno = %d", r, errno); + + /* + * De-assign all eventfds, along with multiple eventfds that were never + * assigned. KVM's ABI is that de-assign is allowed so long as the + * eventfd itself is valid. + */ + kvm_irqfd(vm1, 11, READ_ONCE(__eventfd), KVM_IRQFD_FLAG_DEASSIGN); + kvm_irqfd(vm1, 12, READ_ONCE(__eventfd), KVM_IRQFD_FLAG_DEASSIGN); + kvm_irqfd(vm1, 13, READ_ONCE(__eventfd), KVM_IRQFD_FLAG_DEASSIGN); + kvm_irqfd(vm1, 14, READ_ONCE(__eventfd), KVM_IRQFD_FLAG_DEASSIGN); + kvm_irqfd(vm1, 10, READ_ONCE(__eventfd), KVM_IRQFD_FLAG_DEASSIGN); + + close(__eventfd); + + pthread_create(&racing_thread, NULL, secondary_irqfd_juggler, vm2); + + for (i = 0; i < 10000; i++) { + WRITE_ONCE(__eventfd, kvm_new_eventfd()); + + juggle_eventfd_primary(vm1, __eventfd); + juggle_eventfd_primary(vm2, __eventfd); + close(__eventfd); + } + + WRITE_ONCE(done, true); + pthread_join(racing_thread, NULL); +} diff --git a/tools/testing/selftests/kvm/kvm_binary_stats_test.c b/tools/testing/selftests/kvm/kvm_binary_stats_test.c index f02355c3c4c2..b7dbde9c0843 100644 --- a/tools/testing/selftests/kvm/kvm_binary_stats_test.c +++ b/tools/testing/selftests/kvm/kvm_binary_stats_test.c @@ -239,14 +239,14 @@ int main(int argc, char *argv[]) * single stats file works and doesn't cause explosions. */ vm_stats_fds = vm_get_stats_fd(vms[i]); - stats_test(dup(vm_stats_fds)); + stats_test(kvm_dup(vm_stats_fds)); /* Verify userspace can instantiate multiple stats files. */ stats_test(vm_get_stats_fd(vms[i])); for (j = 0; j < max_vcpu; ++j) { vcpu_stats_fds[j] = vcpu_get_stats_fd(vcpus[i * max_vcpu + j]); - stats_test(dup(vcpu_stats_fds[j])); + stats_test(kvm_dup(vcpu_stats_fds[j])); stats_test(vcpu_get_stats_fd(vcpus[i * max_vcpu + j])); } diff --git a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c index c78f34699f73..c5310736ed06 100644 --- a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c +++ b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c @@ -10,7 +10,6 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> -#include <sys/resource.h> #include "test_util.h" @@ -39,36 +38,11 @@ int main(int argc, char *argv[]) { int kvm_max_vcpu_id = kvm_check_cap(KVM_CAP_MAX_VCPU_ID); int kvm_max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS); - /* - * Number of file descriptors reqired, KVM_CAP_MAX_VCPUS for vCPU fds + - * an arbitrary number for everything else. - */ - int nr_fds_wanted = kvm_max_vcpus + 100; - struct rlimit rl; pr_info("KVM_CAP_MAX_VCPU_ID: %d\n", kvm_max_vcpu_id); pr_info("KVM_CAP_MAX_VCPUS: %d\n", kvm_max_vcpus); - /* - * Check that we're allowed to open nr_fds_wanted file descriptors and - * try raising the limits if needed. - */ - TEST_ASSERT(!getrlimit(RLIMIT_NOFILE, &rl), "getrlimit() failed!"); - - if (rl.rlim_cur < nr_fds_wanted) { - rl.rlim_cur = nr_fds_wanted; - if (rl.rlim_max < nr_fds_wanted) { - int old_rlim_max = rl.rlim_max; - rl.rlim_max = nr_fds_wanted; - - int r = setrlimit(RLIMIT_NOFILE, &rl); - __TEST_REQUIRE(r >= 0, - "RLIMIT_NOFILE hard limit is too low (%d, wanted %d)", - old_rlim_max, nr_fds_wanted); - } else { - TEST_ASSERT(!setrlimit(RLIMIT_NOFILE, &rl), "setrlimit() failed!"); - } - } + kvm_set_files_rlimit(kvm_max_vcpus); /* * Upstream KVM prior to 4.8 does not support KVM_CAP_MAX_VCPU_ID. diff --git a/tools/testing/selftests/kvm/kvm_page_table_test.c b/tools/testing/selftests/kvm/kvm_page_table_test.c index dd8b12f626d3..fc5242fb956f 100644 --- a/tools/testing/selftests/kvm/kvm_page_table_test.c +++ b/tools/testing/selftests/kvm/kvm_page_table_test.c @@ -46,12 +46,12 @@ static const char * const test_stage_string[] = { struct test_args { struct kvm_vm *vm; - uint64_t guest_test_virt_mem; - uint64_t host_page_size; - uint64_t host_num_pages; - uint64_t large_page_size; - uint64_t large_num_pages; - uint64_t host_pages_per_lpage; + u64 guest_test_virt_mem; + u64 host_page_size; + u64 host_num_pages; + u64 large_page_size; + u64 large_num_pages; + u64 host_pages_per_lpage; enum vm_mem_backing_src_type src_type; struct kvm_vcpu *vcpus[KVM_MAX_VCPUS]; }; @@ -63,7 +63,7 @@ struct test_args { static enum test_stage guest_test_stage; /* Host variables */ -static uint32_t nr_vcpus = 1; +static u32 nr_vcpus = 1; static struct test_args test_args; static enum test_stage *current_stage; static bool host_quit; @@ -77,19 +77,19 @@ static sem_t test_stage_completed; * This will be set to the topmost valid physical address minus * the test memory size. */ -static uint64_t guest_test_phys_mem; +static u64 guest_test_phys_mem; /* * Guest virtual memory offset of the testing memory slot. * Must not conflict with identity mapped test code. */ -static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM; +static u64 guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM; static void guest_code(bool do_write) { struct test_args *p = &test_args; enum test_stage *current_stage = &guest_test_stage; - uint64_t addr; + u64 addr; int i, j; while (true) { @@ -113,9 +113,9 @@ static void guest_code(bool do_write) case KVM_CREATE_MAPPINGS: for (i = 0; i < p->large_num_pages; i++) { if (do_write) - *(uint64_t *)addr = 0x0123456789ABCDEF; + *(u64 *)addr = 0x0123456789ABCDEF; else - READ_ONCE(*(uint64_t *)addr); + READ_ONCE(*(u64 *)addr); addr += p->large_page_size; } @@ -131,7 +131,7 @@ static void guest_code(bool do_write) case KVM_UPDATE_MAPPINGS: if (p->src_type == VM_MEM_SRC_ANONYMOUS) { for (i = 0; i < p->host_num_pages; i++) { - *(uint64_t *)addr = 0x0123456789ABCDEF; + *(u64 *)addr = 0x0123456789ABCDEF; addr += p->host_page_size; } break; @@ -142,7 +142,7 @@ static void guest_code(bool do_write) * Write to the first host page in each large * page region, and triger break of large pages. */ - *(uint64_t *)addr = 0x0123456789ABCDEF; + *(u64 *)addr = 0x0123456789ABCDEF; /* * Access the middle host pages in each large @@ -152,7 +152,7 @@ static void guest_code(bool do_write) */ addr += p->large_page_size / 2; for (j = 0; j < p->host_pages_per_lpage / 2; j++) { - READ_ONCE(*(uint64_t *)addr); + READ_ONCE(*(u64 *)addr); addr += p->host_page_size; } } @@ -167,7 +167,7 @@ static void guest_code(bool do_write) */ case KVM_ADJUST_MAPPINGS: for (i = 0; i < p->host_num_pages; i++) { - READ_ONCE(*(uint64_t *)addr); + READ_ONCE(*(u64 *)addr); addr += p->host_page_size; } break; @@ -227,8 +227,8 @@ static void *vcpu_worker(void *data) } struct test_params { - uint64_t phys_offset; - uint64_t test_mem_size; + u64 phys_offset; + u64 test_mem_size; enum vm_mem_backing_src_type src_type; }; @@ -237,12 +237,12 @@ static struct kvm_vm *pre_init_before_test(enum vm_guest_mode mode, void *arg) int ret; struct test_params *p = arg; enum vm_mem_backing_src_type src_type = p->src_type; - uint64_t large_page_size = get_backing_src_pagesz(src_type); - uint64_t guest_page_size = vm_guest_mode_params[mode].page_size; - uint64_t host_page_size = getpagesize(); - uint64_t test_mem_size = p->test_mem_size; - uint64_t guest_num_pages; - uint64_t alignment; + u64 large_page_size = get_backing_src_pagesz(src_type); + u64 guest_page_size = vm_guest_mode_params[mode].page_size; + u64 host_page_size = getpagesize(); + u64 test_mem_size = p->test_mem_size; + u64 guest_num_pages; + u64 alignment; void *host_test_mem; struct kvm_vm *vm; @@ -261,9 +261,6 @@ static struct kvm_vm *pre_init_before_test(enum vm_guest_mode mode, void *arg) guest_page_size; else guest_test_phys_mem = p->phys_offset; -#ifdef __s390x__ - alignment = max(0x100000UL, alignment); -#endif guest_test_phys_mem = align_down(guest_test_phys_mem, alignment); /* Set up the shared data structure test_args */ @@ -284,7 +281,7 @@ static struct kvm_vm *pre_init_before_test(enum vm_guest_mode mode, void *arg) virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages); /* Cache the HVA pointer of the region */ - host_test_mem = addr_gpa2hva(vm, (vm_paddr_t)guest_test_phys_mem); + host_test_mem = addr_gpa2hva(vm, (gpa_t)guest_test_phys_mem); /* Export shared structure test_args to guest */ sync_global_to_guest(vm, test_args); @@ -295,7 +292,7 @@ static struct kvm_vm *pre_init_before_test(enum vm_guest_mode mode, void *arg) ret = sem_init(&test_stage_completed, 0, 0); TEST_ASSERT(ret == 0, "Error in sem_init"); - current_stage = addr_gva2hva(vm, (vm_vaddr_t)(&guest_test_stage)); + current_stage = addr_gva2hva(vm, (gva_t)(&guest_test_stage)); *current_stage = NUM_TEST_STAGES; pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode)); @@ -307,7 +304,7 @@ static struct kvm_vm *pre_init_before_test(enum vm_guest_mode mode, void *arg) pr_info("Guest physical test memory offset: 0x%lx\n", guest_test_phys_mem); pr_info("Host virtual test memory offset: 0x%lx\n", - (uint64_t)host_test_mem); + (u64)host_test_mem); pr_info("Number of testing vCPUs: %d\n", nr_vcpus); return vm; diff --git a/tools/testing/selftests/kvm/lib/arm64/gic.c b/tools/testing/selftests/kvm/lib/arm64/gic.c index 7abbf8866512..011dfe1dfcb3 100644 --- a/tools/testing/selftests/kvm/lib/arm64/gic.c +++ b/tools/testing/selftests/kvm/lib/arm64/gic.c @@ -50,7 +50,7 @@ static void gic_dist_init(enum gic_type type, unsigned int nr_cpus) void gic_init(enum gic_type type, unsigned int nr_cpus) { - uint32_t cpu = guest_get_vcpuid(); + u32 cpu = guest_get_vcpuid(); GUEST_ASSERT(type < GIC_TYPE_MAX); GUEST_ASSERT(nr_cpus); @@ -73,7 +73,7 @@ void gic_irq_disable(unsigned int intid) unsigned int gic_get_and_ack_irq(void) { - uint64_t irqstat; + u64 irqstat; unsigned int intid; GUEST_ASSERT(gic_common_ops); @@ -102,7 +102,7 @@ void gic_set_eoi_split(bool split) gic_common_ops->gic_set_eoi_split(split); } -void gic_set_priority_mask(uint64_t pmr) +void gic_set_priority_mask(u64 pmr) { GUEST_ASSERT(gic_common_ops); gic_common_ops->gic_set_priority_mask(pmr); @@ -155,3 +155,9 @@ void gic_irq_set_config(unsigned int intid, bool is_edge) GUEST_ASSERT(gic_common_ops); gic_common_ops->gic_irq_set_config(intid, is_edge); } + +void gic_irq_set_group(unsigned int intid, bool group) +{ + GUEST_ASSERT(gic_common_ops); + gic_common_ops->gic_irq_set_group(intid, group); +} diff --git a/tools/testing/selftests/kvm/lib/arm64/gic_private.h b/tools/testing/selftests/kvm/lib/arm64/gic_private.h index d24e9ecc96c6..6d393f5c5685 100644 --- a/tools/testing/selftests/kvm/lib/arm64/gic_private.h +++ b/tools/testing/selftests/kvm/lib/arm64/gic_private.h @@ -12,19 +12,20 @@ struct gic_common_ops { void (*gic_cpu_init)(unsigned int cpu); void (*gic_irq_enable)(unsigned int intid); void (*gic_irq_disable)(unsigned int intid); - uint64_t (*gic_read_iar)(void); - void (*gic_write_eoir)(uint32_t irq); - void (*gic_write_dir)(uint32_t irq); + u64 (*gic_read_iar)(void); + void (*gic_write_eoir)(u32 irq); + void (*gic_write_dir)(u32 irq); void (*gic_set_eoi_split)(bool split); - void (*gic_set_priority_mask)(uint64_t mask); - void (*gic_set_priority)(uint32_t intid, uint32_t prio); - void (*gic_irq_set_active)(uint32_t intid); - void (*gic_irq_clear_active)(uint32_t intid); - bool (*gic_irq_get_active)(uint32_t intid); - void (*gic_irq_set_pending)(uint32_t intid); - void (*gic_irq_clear_pending)(uint32_t intid); - bool (*gic_irq_get_pending)(uint32_t intid); - void (*gic_irq_set_config)(uint32_t intid, bool is_edge); + void (*gic_set_priority_mask)(u64 mask); + void (*gic_set_priority)(u32 intid, u32 prio); + void (*gic_irq_set_active)(u32 intid); + void (*gic_irq_clear_active)(u32 intid); + bool (*gic_irq_get_active)(u32 intid); + void (*gic_irq_set_pending)(u32 intid); + void (*gic_irq_clear_pending)(u32 intid); + bool (*gic_irq_get_pending)(u32 intid); + void (*gic_irq_set_config)(u32 intid, bool is_edge); + void (*gic_irq_set_group)(u32 intid, bool group); }; extern const struct gic_common_ops gicv3_ops; diff --git a/tools/testing/selftests/kvm/lib/arm64/gic_v3.c b/tools/testing/selftests/kvm/lib/arm64/gic_v3.c index 66d05506f78b..a99a53accfe9 100644 --- a/tools/testing/selftests/kvm/lib/arm64/gic_v3.c +++ b/tools/testing/selftests/kvm/lib/arm64/gic_v3.c @@ -50,13 +50,13 @@ static void gicv3_gicd_wait_for_rwp(void) } } -static inline volatile void *gicr_base_cpu(uint32_t cpu) +static inline volatile void *gicr_base_cpu(u32 cpu) { /* Align all the redistributors sequentially */ return GICR_BASE_GVA + cpu * SZ_64K * 2; } -static void gicv3_gicr_wait_for_rwp(uint32_t cpu) +static void gicv3_gicr_wait_for_rwp(u32 cpu) { unsigned int count = 100000; /* 1s */ @@ -66,7 +66,7 @@ static void gicv3_gicr_wait_for_rwp(uint32_t cpu) } } -static void gicv3_wait_for_rwp(uint32_t cpu_or_dist) +static void gicv3_wait_for_rwp(u32 cpu_or_dist) { if (cpu_or_dist & DIST_BIT) gicv3_gicd_wait_for_rwp(); @@ -91,34 +91,34 @@ static enum gicv3_intid_range get_intid_range(unsigned int intid) return INVALID_RANGE; } -static uint64_t gicv3_read_iar(void) +static u64 gicv3_read_iar(void) { - uint64_t irqstat = read_sysreg_s(SYS_ICC_IAR1_EL1); + u64 irqstat = read_sysreg_s(SYS_ICC_IAR1_EL1); dsb(sy); return irqstat; } -static void gicv3_write_eoir(uint32_t irq) +static void gicv3_write_eoir(u32 irq) { write_sysreg_s(irq, SYS_ICC_EOIR1_EL1); isb(); } -static void gicv3_write_dir(uint32_t irq) +static void gicv3_write_dir(u32 irq) { write_sysreg_s(irq, SYS_ICC_DIR_EL1); isb(); } -static void gicv3_set_priority_mask(uint64_t mask) +static void gicv3_set_priority_mask(u64 mask) { write_sysreg_s(mask, SYS_ICC_PMR_EL1); } static void gicv3_set_eoi_split(bool split) { - uint32_t val; + u32 val; /* * All other fields are read-only, so no need to read CTLR first. In @@ -129,29 +129,29 @@ static void gicv3_set_eoi_split(bool split) isb(); } -uint32_t gicv3_reg_readl(uint32_t cpu_or_dist, uint64_t offset) +u32 gicv3_reg_readl(u32 cpu_or_dist, u64 offset) { volatile void *base = cpu_or_dist & DIST_BIT ? GICD_BASE_GVA : sgi_base_from_redist(gicr_base_cpu(cpu_or_dist)); return readl(base + offset); } -void gicv3_reg_writel(uint32_t cpu_or_dist, uint64_t offset, uint32_t reg_val) +void gicv3_reg_writel(u32 cpu_or_dist, u64 offset, u32 reg_val) { volatile void *base = cpu_or_dist & DIST_BIT ? GICD_BASE_GVA : sgi_base_from_redist(gicr_base_cpu(cpu_or_dist)); writel(reg_val, base + offset); } -uint32_t gicv3_getl_fields(uint32_t cpu_or_dist, uint64_t offset, uint32_t mask) +u32 gicv3_getl_fields(u32 cpu_or_dist, u64 offset, u32 mask) { return gicv3_reg_readl(cpu_or_dist, offset) & mask; } -void gicv3_setl_fields(uint32_t cpu_or_dist, uint64_t offset, - uint32_t mask, uint32_t reg_val) +void gicv3_setl_fields(u32 cpu_or_dist, u64 offset, + u32 mask, u32 reg_val) { - uint32_t tmp = gicv3_reg_readl(cpu_or_dist, offset) & ~mask; + u32 tmp = gicv3_reg_readl(cpu_or_dist, offset) & ~mask; tmp |= (reg_val & mask); gicv3_reg_writel(cpu_or_dist, offset, tmp); @@ -165,14 +165,14 @@ void gicv3_setl_fields(uint32_t cpu_or_dist, uint64_t offset, * map that doesn't implement it; like GICR_WAKER's offset of 0x0014 being * marked as "Reserved" in the Distributor map. */ -static void gicv3_access_reg(uint32_t intid, uint64_t offset, - uint32_t reg_bits, uint32_t bits_per_field, - bool write, uint32_t *val) +static void gicv3_access_reg(u32 intid, u64 offset, + u32 reg_bits, u32 bits_per_field, + bool write, u32 *val) { - uint32_t cpu = guest_get_vcpuid(); + u32 cpu = guest_get_vcpuid(); enum gicv3_intid_range intid_range = get_intid_range(intid); - uint32_t fields_per_reg, index, mask, shift; - uint32_t cpu_or_dist; + u32 fields_per_reg, index, mask, shift; + u32 cpu_or_dist; GUEST_ASSERT(bits_per_field <= reg_bits); GUEST_ASSERT(!write || *val < (1U << bits_per_field)); @@ -197,32 +197,32 @@ static void gicv3_access_reg(uint32_t intid, uint64_t offset, *val = gicv3_getl_fields(cpu_or_dist, offset, mask) >> shift; } -static void gicv3_write_reg(uint32_t intid, uint64_t offset, - uint32_t reg_bits, uint32_t bits_per_field, uint32_t val) +static void gicv3_write_reg(u32 intid, u64 offset, + u32 reg_bits, u32 bits_per_field, u32 val) { gicv3_access_reg(intid, offset, reg_bits, bits_per_field, true, &val); } -static uint32_t gicv3_read_reg(uint32_t intid, uint64_t offset, - uint32_t reg_bits, uint32_t bits_per_field) +static u32 gicv3_read_reg(u32 intid, u64 offset, + u32 reg_bits, u32 bits_per_field) { - uint32_t val; + u32 val; gicv3_access_reg(intid, offset, reg_bits, bits_per_field, false, &val); return val; } -static void gicv3_set_priority(uint32_t intid, uint32_t prio) +static void gicv3_set_priority(u32 intid, u32 prio) { gicv3_write_reg(intid, GICD_IPRIORITYR, 32, 8, prio); } /* Sets the intid to be level-sensitive or edge-triggered. */ -static void gicv3_irq_set_config(uint32_t intid, bool is_edge) +static void gicv3_irq_set_config(u32 intid, bool is_edge) { - uint32_t val; + u32 val; /* N/A for private interrupts. */ GUEST_ASSERT(get_intid_range(intid) == SPI_RANGE); @@ -230,57 +230,57 @@ static void gicv3_irq_set_config(uint32_t intid, bool is_edge) gicv3_write_reg(intid, GICD_ICFGR, 32, 2, val); } -static void gicv3_irq_enable(uint32_t intid) +static void gicv3_irq_enable(u32 intid) { bool is_spi = get_intid_range(intid) == SPI_RANGE; - uint32_t cpu = guest_get_vcpuid(); + u32 cpu = guest_get_vcpuid(); gicv3_write_reg(intid, GICD_ISENABLER, 32, 1, 1); gicv3_wait_for_rwp(is_spi ? DIST_BIT : cpu); } -static void gicv3_irq_disable(uint32_t intid) +static void gicv3_irq_disable(u32 intid) { bool is_spi = get_intid_range(intid) == SPI_RANGE; - uint32_t cpu = guest_get_vcpuid(); + u32 cpu = guest_get_vcpuid(); gicv3_write_reg(intid, GICD_ICENABLER, 32, 1, 1); gicv3_wait_for_rwp(is_spi ? DIST_BIT : cpu); } -static void gicv3_irq_set_active(uint32_t intid) +static void gicv3_irq_set_active(u32 intid) { gicv3_write_reg(intid, GICD_ISACTIVER, 32, 1, 1); } -static void gicv3_irq_clear_active(uint32_t intid) +static void gicv3_irq_clear_active(u32 intid) { gicv3_write_reg(intid, GICD_ICACTIVER, 32, 1, 1); } -static bool gicv3_irq_get_active(uint32_t intid) +static bool gicv3_irq_get_active(u32 intid) { return gicv3_read_reg(intid, GICD_ISACTIVER, 32, 1); } -static void gicv3_irq_set_pending(uint32_t intid) +static void gicv3_irq_set_pending(u32 intid) { gicv3_write_reg(intid, GICD_ISPENDR, 32, 1, 1); } -static void gicv3_irq_clear_pending(uint32_t intid) +static void gicv3_irq_clear_pending(u32 intid) { gicv3_write_reg(intid, GICD_ICPENDR, 32, 1, 1); } -static bool gicv3_irq_get_pending(uint32_t intid) +static bool gicv3_irq_get_pending(u32 intid) { return gicv3_read_reg(intid, GICD_ISPENDR, 32, 1); } static void gicv3_enable_redist(volatile void *redist_base) { - uint32_t val = readl(redist_base + GICR_WAKER); + u32 val = readl(redist_base + GICR_WAKER); unsigned int count = 100000; /* 1s */ val &= ~GICR_WAKER_ProcessorSleep; @@ -293,17 +293,36 @@ static void gicv3_enable_redist(volatile void *redist_base) } } +static void gicv3_set_group(u32 intid, bool grp) +{ + u32 cpu_or_dist; + u32 val; + + cpu_or_dist = (get_intid_range(intid) == SPI_RANGE) ? DIST_BIT : guest_get_vcpuid(); + val = gicv3_reg_readl(cpu_or_dist, GICD_IGROUPR + (intid / 32) * 4); + if (grp) + val |= BIT(intid % 32); + else + val &= ~BIT(intid % 32); + gicv3_reg_writel(cpu_or_dist, GICD_IGROUPR + (intid / 32) * 4, val); +} + static void gicv3_cpu_init(unsigned int cpu) { volatile void *sgi_base; unsigned int i; volatile void *redist_base_cpu; + u64 typer; GUEST_ASSERT(cpu < gicv3_data.nr_cpus); redist_base_cpu = gicr_base_cpu(cpu); sgi_base = sgi_base_from_redist(redist_base_cpu); + /* Verify assumption that GICR_TYPER.Processor_number == cpu */ + typer = readq_relaxed(redist_base_cpu + GICR_TYPER); + GUEST_ASSERT_EQ(GICR_TYPER_CPU_NUMBER(typer), cpu); + gicv3_enable_redist(redist_base_cpu); /* @@ -328,6 +347,8 @@ static void gicv3_cpu_init(unsigned int cpu) /* Set a default priority threshold */ write_sysreg_s(ICC_PMR_DEF_PRIO, SYS_ICC_PMR_EL1); + /* Disable Group-0 interrupts */ + write_sysreg_s(ICC_IGRPEN0_EL1_MASK, SYS_ICC_IGRPEN1_EL1); /* Enable non-secure Group-1 interrupts */ write_sysreg_s(ICC_IGRPEN1_EL1_MASK, SYS_ICC_IGRPEN1_EL1); } @@ -400,10 +421,11 @@ const struct gic_common_ops gicv3_ops = { .gic_irq_clear_pending = gicv3_irq_clear_pending, .gic_irq_get_pending = gicv3_irq_get_pending, .gic_irq_set_config = gicv3_irq_set_config, + .gic_irq_set_group = gicv3_set_group, }; -void gic_rdist_enable_lpis(vm_paddr_t cfg_table, size_t cfg_table_size, - vm_paddr_t pend_table) +void gic_rdist_enable_lpis(gpa_t cfg_table, size_t cfg_table_size, + gpa_t pend_table) { volatile void *rdist_base = gicr_base_cpu(guest_get_vcpuid()); diff --git a/tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c b/tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c index 09f270545646..1188b578121d 100644 --- a/tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c +++ b/tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c @@ -15,6 +15,8 @@ #include "gic_v3.h" #include "processor.h" +#define GITS_COLLECTION_TARGET_SHIFT 16 + static u64 its_read_u64(unsigned long offset) { return readq_relaxed(GITS_BASE_GVA + offset); @@ -52,7 +54,7 @@ static unsigned long its_find_baser(unsigned int type) return -1; } -static void its_install_table(unsigned int type, vm_paddr_t base, size_t size) +static void its_install_table(unsigned int type, gpa_t base, size_t size) { unsigned long offset = its_find_baser(type); u64 baser; @@ -67,7 +69,7 @@ static void its_install_table(unsigned int type, vm_paddr_t base, size_t size) its_write_u64(offset, baser); } -static void its_install_cmdq(vm_paddr_t base, size_t size) +static void its_install_cmdq(gpa_t base, size_t size) { u64 cbaser; @@ -80,9 +82,8 @@ static void its_install_cmdq(vm_paddr_t base, size_t size) its_write_u64(GITS_CBASER, cbaser); } -void its_init(vm_paddr_t coll_tbl, size_t coll_tbl_sz, - vm_paddr_t device_tbl, size_t device_tbl_sz, - vm_paddr_t cmdq, size_t cmdq_size) +void its_init(gpa_t coll_tbl, size_t coll_tbl_sz, gpa_t device_tbl, + size_t device_tbl_sz, gpa_t cmdq, size_t cmdq_size) { u32 ctlr; @@ -163,6 +164,11 @@ static void its_encode_collection(struct its_cmd_block *cmd, u16 col) its_mask_encode(&cmd->raw_cmd[2], col, 15, 0); } +static u64 procnum_to_rdbase(u32 vcpu_id) +{ + return vcpu_id << GITS_COLLECTION_TARGET_SHIFT; +} + #define GITS_CMDQ_POLL_ITERATIONS 0 static void its_send_cmd(void *cmdq_base, struct its_cmd_block *cmd) @@ -197,7 +203,7 @@ static void its_send_cmd(void *cmdq_base, struct its_cmd_block *cmd) } } -void its_send_mapd_cmd(void *cmdq_base, u32 device_id, vm_paddr_t itt_base, +void its_send_mapd_cmd(void *cmdq_base, u32 device_id, gpa_t itt_base, size_t itt_size, bool valid) { struct its_cmd_block cmd = {}; @@ -217,7 +223,7 @@ void its_send_mapc_cmd(void *cmdq_base, u32 vcpu_id, u32 collection_id, bool val its_encode_cmd(&cmd, GITS_CMD_MAPC); its_encode_collection(&cmd, collection_id); - its_encode_target(&cmd, vcpu_id); + its_encode_target(&cmd, procnum_to_rdbase(vcpu_id)); its_encode_valid(&cmd, valid); its_send_cmd(cmdq_base, &cmd); @@ -246,3 +252,13 @@ void its_send_invall_cmd(void *cmdq_base, u32 collection_id) its_send_cmd(cmdq_base, &cmd); } + +void its_send_sync_cmd(void *cmdq_base, u32 vcpu_id) +{ + struct its_cmd_block cmd = {}; + + its_encode_cmd(&cmd, GITS_CMD_SYNC); + its_encode_target(&cmd, procnum_to_rdbase(vcpu_id)); + + its_send_cmd(cmdq_base, &cmd); +} diff --git a/tools/testing/selftests/kvm/lib/arm64/processor.c b/tools/testing/selftests/kvm/lib/arm64/processor.c index 7ba3aa3755f3..01325bf4d36f 100644 --- a/tools/testing/selftests/kvm/lib/arm64/processor.c +++ b/tools/testing/selftests/kvm/lib/arm64/processor.c @@ -12,52 +12,48 @@ #include "kvm_util.h" #include "processor.h" #include "ucall_common.h" +#include "vgic.h" #include <linux/bitfield.h> #include <linux/sizes.h> #define DEFAULT_ARM64_GUEST_STACK_VADDR_MIN 0xac0000 -static vm_vaddr_t exception_handlers; +static gva_t exception_handlers; -static uint64_t page_align(struct kvm_vm *vm, uint64_t v) +static u64 pgd_index(struct kvm_vm *vm, gva_t gva) { - return (v + vm->page_size) & ~(vm->page_size - 1); -} - -static uint64_t pgd_index(struct kvm_vm *vm, vm_vaddr_t gva) -{ - unsigned int shift = (vm->pgtable_levels - 1) * (vm->page_shift - 3) + vm->page_shift; - uint64_t mask = (1UL << (vm->va_bits - shift)) - 1; + unsigned int shift = (vm->mmu.pgtable_levels - 1) * (vm->page_shift - 3) + vm->page_shift; + u64 mask = (1UL << (vm->va_bits - shift)) - 1; return (gva >> shift) & mask; } -static uint64_t pud_index(struct kvm_vm *vm, vm_vaddr_t gva) +static u64 pud_index(struct kvm_vm *vm, gva_t gva) { unsigned int shift = 2 * (vm->page_shift - 3) + vm->page_shift; - uint64_t mask = (1UL << (vm->page_shift - 3)) - 1; + u64 mask = (1UL << (vm->page_shift - 3)) - 1; - TEST_ASSERT(vm->pgtable_levels == 4, + TEST_ASSERT(vm->mmu.pgtable_levels == 4, "Mode %d does not have 4 page table levels", vm->mode); return (gva >> shift) & mask; } -static uint64_t pmd_index(struct kvm_vm *vm, vm_vaddr_t gva) +static u64 pmd_index(struct kvm_vm *vm, gva_t gva) { unsigned int shift = (vm->page_shift - 3) + vm->page_shift; - uint64_t mask = (1UL << (vm->page_shift - 3)) - 1; + u64 mask = (1UL << (vm->page_shift - 3)) - 1; - TEST_ASSERT(vm->pgtable_levels >= 3, + TEST_ASSERT(vm->mmu.pgtable_levels >= 3, "Mode %d does not have >= 3 page table levels", vm->mode); return (gva >> shift) & mask; } -static uint64_t pte_index(struct kvm_vm *vm, vm_vaddr_t gva) +static u64 pte_index(struct kvm_vm *vm, gva_t gva) { - uint64_t mask = (1UL << (vm->page_shift - 3)) - 1; + u64 mask = (1UL << (vm->page_shift - 3)) - 1; return (gva >> vm->page_shift) & mask; } @@ -67,137 +63,150 @@ static inline bool use_lpa2_pte_format(struct kvm_vm *vm) (vm->pa_bits > 48 || vm->va_bits > 48); } -static uint64_t addr_pte(struct kvm_vm *vm, uint64_t pa, uint64_t attrs) +static u64 addr_pte(struct kvm_vm *vm, u64 pa, u64 attrs) { - uint64_t pte; + u64 pte; if (use_lpa2_pte_format(vm)) { - pte = pa & GENMASK(49, vm->page_shift); - pte |= FIELD_GET(GENMASK(51, 50), pa) << 8; - attrs &= ~GENMASK(9, 8); + pte = pa & PTE_ADDR_MASK_LPA2(vm->page_shift); + pte |= FIELD_GET(GENMASK(51, 50), pa) << PTE_ADDR_51_50_LPA2_SHIFT; + attrs &= ~PTE_ADDR_51_50_LPA2; } else { - pte = pa & GENMASK(47, vm->page_shift); + pte = pa & PTE_ADDR_MASK(vm->page_shift); if (vm->page_shift == 16) - pte |= FIELD_GET(GENMASK(51, 48), pa) << 12; + pte |= FIELD_GET(GENMASK(51, 48), pa) << PTE_ADDR_51_48_SHIFT; } pte |= attrs; return pte; } -static uint64_t pte_addr(struct kvm_vm *vm, uint64_t pte) +static u64 pte_addr(struct kvm_vm *vm, u64 pte) { - uint64_t pa; + u64 pa; if (use_lpa2_pte_format(vm)) { - pa = pte & GENMASK(49, vm->page_shift); - pa |= FIELD_GET(GENMASK(9, 8), pte) << 50; + pa = pte & PTE_ADDR_MASK_LPA2(vm->page_shift); + pa |= FIELD_GET(PTE_ADDR_51_50_LPA2, pte) << 50; } else { - pa = pte & GENMASK(47, vm->page_shift); + pa = pte & PTE_ADDR_MASK(vm->page_shift); if (vm->page_shift == 16) - pa |= FIELD_GET(GENMASK(15, 12), pte) << 48; + pa |= FIELD_GET(PTE_ADDR_51_48, pte) << 48; } return pa; } -static uint64_t ptrs_per_pgd(struct kvm_vm *vm) +static u64 ptrs_per_pgd(struct kvm_vm *vm) { - unsigned int shift = (vm->pgtable_levels - 1) * (vm->page_shift - 3) + vm->page_shift; + unsigned int shift = (vm->mmu.pgtable_levels - 1) * (vm->page_shift - 3) + vm->page_shift; return 1 << (vm->va_bits - shift); } -static uint64_t __maybe_unused ptrs_per_pte(struct kvm_vm *vm) +static u64 __maybe_unused ptrs_per_pte(struct kvm_vm *vm) { return 1 << (vm->page_shift - 3); } void virt_arch_pgd_alloc(struct kvm_vm *vm) { - size_t nr_pages = page_align(vm, ptrs_per_pgd(vm) * 8) / vm->page_size; + size_t nr_pages = vm_page_align(vm, ptrs_per_pgd(vm) * 8) / vm->page_size; - if (vm->pgd_created) + if (vm->mmu.pgd_created) return; - vm->pgd = vm_phy_pages_alloc(vm, nr_pages, - KVM_GUEST_PAGE_TABLE_MIN_PADDR, - vm->memslots[MEM_REGION_PT]); - vm->pgd_created = true; + vm->mmu.pgd = vm_phy_pages_alloc(vm, nr_pages, + KVM_GUEST_PAGE_TABLE_MIN_PADDR, + vm->memslots[MEM_REGION_PT]); + vm->mmu.pgd_created = true; } -static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, - uint64_t flags) +static void _virt_pg_map(struct kvm_vm *vm, gva_t gva, gpa_t gpa, + u64 flags) { - uint8_t attr_idx = flags & 7; - uint64_t *ptep; + u8 attr_idx = flags & (PTE_ATTRINDX_MASK >> PTE_ATTRINDX_SHIFT); + u64 pg_attr; + u64 *ptep; - TEST_ASSERT((vaddr % vm->page_size) == 0, + TEST_ASSERT((gva % vm->page_size) == 0, "Virtual address not on page boundary,\n" - " vaddr: 0x%lx vm->page_size: 0x%x", vaddr, vm->page_size); - TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, - (vaddr >> vm->page_shift)), - "Invalid virtual address, vaddr: 0x%lx", vaddr); - TEST_ASSERT((paddr % vm->page_size) == 0, - "Physical address not on page boundary,\n" - " paddr: 0x%lx vm->page_size: 0x%x", paddr, vm->page_size); - TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn, - "Physical address beyond beyond maximum supported,\n" - " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", - paddr, vm->max_gfn, vm->page_size); - - ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, vaddr) * 8; + " gva: 0x%lx vm->page_size: 0x%x", gva, vm->page_size); + TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, (gva >> vm->page_shift)), + "Invalid virtual address, gva: 0x%lx", gva); + TEST_ASSERT((gpa % vm->page_size) == 0, + "Physical address not on page boundary,\n" + " gpa: 0x%lx vm->page_size: 0x%x", gpa, vm->page_size); + TEST_ASSERT((gpa >> vm->page_shift) <= vm->max_gfn, + "Physical address beyond beyond maximum supported,\n" + " gpa: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", + gpa, vm->max_gfn, vm->page_size); + + ptep = addr_gpa2hva(vm, vm->mmu.pgd) + pgd_index(vm, gva) * 8; if (!*ptep) - *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3); + *ptep = addr_pte(vm, vm_alloc_page_table(vm), + PGD_TYPE_TABLE | PTE_VALID); - switch (vm->pgtable_levels) { + switch (vm->mmu.pgtable_levels) { case 4: - ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, vaddr) * 8; + ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, gva) * 8; if (!*ptep) - *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3); + *ptep = addr_pte(vm, vm_alloc_page_table(vm), + PUD_TYPE_TABLE | PTE_VALID); /* fall through */ case 3: - ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, vaddr) * 8; + ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, gva) * 8; if (!*ptep) - *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3); + *ptep = addr_pte(vm, vm_alloc_page_table(vm), + PMD_TYPE_TABLE | PTE_VALID); /* fall through */ case 2: - ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, vaddr) * 8; + ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, gva) * 8; break; default: TEST_FAIL("Page table levels must be 2, 3, or 4"); } - *ptep = addr_pte(vm, paddr, (attr_idx << 2) | (1 << 10) | 3); /* AF */ + pg_attr = PTE_AF | PTE_ATTRINDX(attr_idx) | PTE_TYPE_PAGE | PTE_VALID; + if (!use_lpa2_pte_format(vm)) + pg_attr |= PTE_SHARED; + + *ptep = addr_pte(vm, gpa, pg_attr); } -void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) +void virt_arch_pg_map(struct kvm_vm *vm, gva_t gva, gpa_t gpa) { - uint64_t attr_idx = MT_NORMAL; + u64 attr_idx = MT_NORMAL; - _virt_pg_map(vm, vaddr, paddr, attr_idx); + _virt_pg_map(vm, gva, gpa, attr_idx); } -uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva) +u64 *virt_get_pte_hva_at_level(struct kvm_vm *vm, gva_t gva, int level) { - uint64_t *ptep; + u64 *ptep; - if (!vm->pgd_created) + if (!vm->mmu.pgd_created) goto unmapped_gva; - ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, gva) * 8; + ptep = addr_gpa2hva(vm, vm->mmu.pgd) + pgd_index(vm, gva) * 8; if (!ptep) goto unmapped_gva; + if (level == 0) + return ptep; - switch (vm->pgtable_levels) { + switch (vm->mmu.pgtable_levels) { case 4: ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, gva) * 8; if (!ptep) goto unmapped_gva; + if (level == 1) + break; /* fall through */ case 3: ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, gva) * 8; if (!ptep) goto unmapped_gva; + if (level == 2) + break; /* fall through */ case 2: ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, gva) * 8; @@ -215,18 +224,23 @@ unmapped_gva: exit(EXIT_FAILURE); } -vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) +u64 *virt_get_pte_hva(struct kvm_vm *vm, gva_t gva) +{ + return virt_get_pte_hva_at_level(vm, gva, 3); +} + +gpa_t addr_arch_gva2gpa(struct kvm_vm *vm, gva_t gva) { - uint64_t *ptep = virt_get_pte_hva(vm, gva); + u64 *ptep = virt_get_pte_hva(vm, gva); return pte_addr(vm, *ptep) + (gva & (vm->page_size - 1)); } -static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, uint64_t page, int level) +static void pte_dump(FILE *stream, struct kvm_vm *vm, u8 indent, u64 page, int level) { #ifdef DEBUG static const char * const type[] = { "", "pud", "pmd", "pte" }; - uint64_t pte, *ptep; + u64 pte, *ptep; if (level == 4) return; @@ -241,15 +255,15 @@ static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, uint64_t p #endif } -void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) +void virt_arch_dump(FILE *stream, struct kvm_vm *vm, u8 indent) { - int level = 4 - (vm->pgtable_levels - 1); - uint64_t pgd, *ptep; + int level = 4 - (vm->mmu.pgtable_levels - 1); + u64 pgd, *ptep; - if (!vm->pgd_created) + if (!vm->mmu.pgd_created) return; - for (pgd = vm->pgd; pgd < vm->pgd + ptrs_per_pgd(vm) * 8; pgd += 8) { + for (pgd = vm->mmu.pgd; pgd < vm->mmu.pgd + ptrs_per_pgd(vm) * 8; pgd += 8) { ptep = addr_gpa2hva(vm, pgd); if (!*ptep) continue; @@ -258,107 +272,133 @@ void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) } } +bool vm_supports_el2(struct kvm_vm *vm) +{ + const char *value = getenv("NV"); + + if (value && *value == '0') + return false; + + return vm_check_cap(vm, KVM_CAP_ARM_EL2) && vm->arch.has_gic; +} + +void kvm_get_default_vcpu_target(struct kvm_vm *vm, struct kvm_vcpu_init *init) +{ + struct kvm_vcpu_init preferred = {}; + + vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &preferred); + if (vm_supports_el2(vm)) + preferred.features[0] |= BIT(KVM_ARM_VCPU_HAS_EL2); + + *init = preferred; +} + void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init) { struct kvm_vcpu_init default_init = { .target = -1, }; struct kvm_vm *vm = vcpu->vm; - uint64_t sctlr_el1, tcr_el1, ttbr0_el1; + u64 sctlr_el1, tcr_el1, ttbr0_el1; - if (!init) + if (!init) { + kvm_get_default_vcpu_target(vm, &default_init); init = &default_init; - - if (init->target == -1) { - struct kvm_vcpu_init preferred; - vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &preferred); - init->target = preferred.target; } vcpu_ioctl(vcpu, KVM_ARM_VCPU_INIT, init); + vcpu->init = *init; /* * Enable FP/ASIMD to avoid trapping when accessing Q0-Q15 * registers, which the variable argument list macros do. */ - vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CPACR_EL1), 3 << 20); + vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_CPACR_EL1), 3 << 20); - sctlr_el1 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1)); - tcr_el1 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TCR_EL1)); + sctlr_el1 = vcpu_get_reg(vcpu, ctxt_reg_alias(vcpu, SYS_SCTLR_EL1)); + tcr_el1 = vcpu_get_reg(vcpu, ctxt_reg_alias(vcpu, SYS_TCR_EL1)); /* Configure base granule size */ switch (vm->mode) { - case VM_MODE_PXXV48_4K: + case VM_MODE_PXXVYY_4K: TEST_FAIL("AArch64 does not support 4K sized pages " "with ANY-bit physical address ranges"); case VM_MODE_P52V48_64K: case VM_MODE_P48V48_64K: case VM_MODE_P40V48_64K: case VM_MODE_P36V48_64K: - tcr_el1 |= 1ul << 14; /* TG0 = 64KB */ + tcr_el1 |= TCR_TG0_64K; break; case VM_MODE_P52V48_16K: case VM_MODE_P48V48_16K: case VM_MODE_P40V48_16K: case VM_MODE_P36V48_16K: case VM_MODE_P36V47_16K: - tcr_el1 |= 2ul << 14; /* TG0 = 16KB */ + tcr_el1 |= TCR_TG0_16K; break; case VM_MODE_P52V48_4K: case VM_MODE_P48V48_4K: case VM_MODE_P40V48_4K: case VM_MODE_P36V48_4K: - tcr_el1 |= 0ul << 14; /* TG0 = 4KB */ + tcr_el1 |= TCR_TG0_4K; break; default: TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode); } - ttbr0_el1 = vm->pgd & GENMASK(47, vm->page_shift); + ttbr0_el1 = vm->mmu.pgd & GENMASK(47, vm->page_shift); /* Configure output size */ switch (vm->mode) { case VM_MODE_P52V48_4K: case VM_MODE_P52V48_16K: case VM_MODE_P52V48_64K: - tcr_el1 |= 6ul << 32; /* IPS = 52 bits */ - ttbr0_el1 |= FIELD_GET(GENMASK(51, 48), vm->pgd) << 2; + tcr_el1 |= TCR_IPS_52_BITS; + ttbr0_el1 |= FIELD_GET(GENMASK(51, 48), vm->mmu.pgd) << 2; break; case VM_MODE_P48V48_4K: case VM_MODE_P48V48_16K: case VM_MODE_P48V48_64K: - tcr_el1 |= 5ul << 32; /* IPS = 48 bits */ + tcr_el1 |= TCR_IPS_48_BITS; break; case VM_MODE_P40V48_4K: case VM_MODE_P40V48_16K: case VM_MODE_P40V48_64K: - tcr_el1 |= 2ul << 32; /* IPS = 40 bits */ + tcr_el1 |= TCR_IPS_40_BITS; break; case VM_MODE_P36V48_4K: case VM_MODE_P36V48_16K: case VM_MODE_P36V48_64K: case VM_MODE_P36V47_16K: - tcr_el1 |= 1ul << 32; /* IPS = 36 bits */ + tcr_el1 |= TCR_IPS_36_BITS; break; default: TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode); } - sctlr_el1 |= (1 << 0) | (1 << 2) | (1 << 12) /* M | C | I */; - /* TCR_EL1 |= IRGN0:WBWA | ORGN0:WBWA | SH0:Inner-Shareable */; - tcr_el1 |= (1 << 8) | (1 << 10) | (3 << 12); - tcr_el1 |= (64 - vm->va_bits) /* T0SZ */; + sctlr_el1 |= SCTLR_ELx_M | SCTLR_ELx_C | SCTLR_ELx_I; + + tcr_el1 |= TCR_IRGN0_WBWA | TCR_ORGN0_WBWA | TCR_SH0_INNER; + tcr_el1 |= TCR_T0SZ(vm->va_bits); + tcr_el1 |= TCR_TBI1; + tcr_el1 |= TCR_EPD1_MASK; if (use_lpa2_pte_format(vm)) - tcr_el1 |= (1ul << 59) /* DS */; + tcr_el1 |= TCR_DS; - vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1), sctlr_el1); - vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TCR_EL1), tcr_el1); - vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_MAIR_EL1), DEFAULT_MAIR_EL1); - vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TTBR0_EL1), ttbr0_el1); + vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_SCTLR_EL1), sctlr_el1); + vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_TCR_EL1), tcr_el1); + vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_MAIR_EL1), DEFAULT_MAIR_EL1); + vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_TTBR0_EL1), ttbr0_el1); vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TPIDR_EL1), vcpu->id); + + if (!vcpu_has_el2(vcpu)) + return; + + vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_HCR_EL2), + HCR_EL2_RW | HCR_EL2_TGE | HCR_EL2_E2H); } -void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent) +void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, u8 indent) { - uint64_t pstate, pc; + u64 pstate, pc; pstate = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pstate)); pc = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc)); @@ -369,29 +409,29 @@ void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent) void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code) { - vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.pc), (uint64_t)guest_code); + vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.pc), (u64)guest_code); } -static struct kvm_vcpu *__aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, +static struct kvm_vcpu *__aarch64_vcpu_add(struct kvm_vm *vm, u32 vcpu_id, struct kvm_vcpu_init *init) { size_t stack_size; - uint64_t stack_vaddr; + gva_t stack_gva; struct kvm_vcpu *vcpu = __vm_vcpu_add(vm, vcpu_id); stack_size = vm->page_size == 4096 ? DEFAULT_STACK_PGS * vm->page_size : vm->page_size; - stack_vaddr = __vm_vaddr_alloc(vm, stack_size, - DEFAULT_ARM64_GUEST_STACK_VADDR_MIN, - MEM_REGION_DATA); + stack_gva = __vm_alloc(vm, stack_size, + DEFAULT_ARM64_GUEST_STACK_VADDR_MIN, + MEM_REGION_DATA); aarch64_vcpu_setup(vcpu, init); - vcpu_set_reg(vcpu, ARM64_CORE_REG(sp_el1), stack_vaddr + stack_size); + vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_SP_EL1), stack_gva + stack_size); return vcpu; } -struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, +struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, u32 vcpu_id, struct kvm_vcpu_init *init, void *guest_code) { struct kvm_vcpu *vcpu = __aarch64_vcpu_add(vm, vcpu_id, init); @@ -401,7 +441,7 @@ struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, return vcpu; } -struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) +struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, u32 vcpu_id) { return __aarch64_vcpu_add(vm, vcpu_id, NULL); } @@ -418,13 +458,13 @@ void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...) for (i = 0; i < num; i++) { vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.regs[i]), - va_arg(ap, uint64_t)); + va_arg(ap, u64)); } va_end(ap); } -void kvm_exit_unexpected_exception(int vector, uint64_t ec, bool valid_ec) +void kvm_exit_unexpected_exception(int vector, u64 ec, bool valid_ec) { ucall(UCALL_UNHANDLED, 3, vector, ec, valid_ec); while (1) @@ -457,7 +497,7 @@ void vcpu_init_descriptor_tables(struct kvm_vcpu *vcpu) { extern char vectors; - vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_VBAR_EL1), (uint64_t)&vectors); + vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_VBAR_EL1), (u64)&vectors); } void route_exception(struct ex_regs *regs, int vector) @@ -495,10 +535,10 @@ unexpected_exception: void vm_init_descriptor_tables(struct kvm_vm *vm) { - vm->handlers = __vm_vaddr_alloc(vm, sizeof(struct handlers), - vm->page_size, MEM_REGION_DATA); + vm->handlers = __vm_alloc(vm, sizeof(struct handlers), vm->page_size, + MEM_REGION_DATA); - *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers; + *(gva_t *)addr_gva2hva(vm, (gva_t)(&exception_handlers)) = vm->handlers; } void vm_install_sync_handler(struct kvm_vm *vm, int vector, int ec, @@ -522,13 +562,13 @@ void vm_install_exception_handler(struct kvm_vm *vm, int vector, handlers->exception_handlers[vector][0] = handler; } -uint32_t guest_get_vcpuid(void) +u32 guest_get_vcpuid(void) { return read_sysreg(tpidr_el1); } -static uint32_t max_ipa_for_page_size(uint32_t vm_ipa, uint32_t gran, - uint32_t not_sup_val, uint32_t ipa52_min_val) +static u32 max_ipa_for_page_size(u32 vm_ipa, u32 gran, + u32 not_sup_val, u32 ipa52_min_val) { if (gran == not_sup_val) return 0; @@ -538,16 +578,16 @@ static uint32_t max_ipa_for_page_size(uint32_t vm_ipa, uint32_t gran, return min(vm_ipa, 48U); } -void aarch64_get_supported_page_sizes(uint32_t ipa, uint32_t *ipa4k, - uint32_t *ipa16k, uint32_t *ipa64k) +void aarch64_get_supported_page_sizes(u32 ipa, u32 *ipa4k, + u32 *ipa16k, u32 *ipa64k) { struct kvm_vcpu_init preferred_init; int kvm_fd, vm_fd, vcpu_fd, err; - uint64_t val; - uint32_t gran; + u64 val; + u32 gran; struct kvm_one_reg reg = { .id = KVM_ARM64_SYS_REG(SYS_ID_AA64MMFR0_EL1), - .addr = (uint64_t)&val, + .addr = (u64)&val, }; kvm_fd = open_kvm_dev_path_or_exit(); @@ -565,15 +605,15 @@ void aarch64_get_supported_page_sizes(uint32_t ipa, uint32_t *ipa4k, err = ioctl(vcpu_fd, KVM_GET_ONE_REG, ®); TEST_ASSERT(err == 0, KVM_IOCTL_ERROR(KVM_GET_ONE_REG, vcpu_fd)); - gran = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN4), val); + gran = FIELD_GET(ID_AA64MMFR0_EL1_TGRAN4, val); *ipa4k = max_ipa_for_page_size(ipa, gran, ID_AA64MMFR0_EL1_TGRAN4_NI, ID_AA64MMFR0_EL1_TGRAN4_52_BIT); - gran = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN64), val); + gran = FIELD_GET(ID_AA64MMFR0_EL1_TGRAN64, val); *ipa64k = max_ipa_for_page_size(ipa, gran, ID_AA64MMFR0_EL1_TGRAN64_NI, ID_AA64MMFR0_EL1_TGRAN64_IMP); - gran = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN16), val); + gran = FIELD_GET(ID_AA64MMFR0_EL1_TGRAN16, val); *ipa16k = max_ipa_for_page_size(ipa, gran, ID_AA64MMFR0_EL1_TGRAN16_NI, ID_AA64MMFR0_EL1_TGRAN16_52_BIT); @@ -605,17 +645,17 @@ void aarch64_get_supported_page_sizes(uint32_t ipa, uint32_t *ipa4k, : "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7") -void smccc_hvc(uint32_t function_id, uint64_t arg0, uint64_t arg1, - uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5, - uint64_t arg6, struct arm_smccc_res *res) +void smccc_hvc(u32 function_id, u64 arg0, u64 arg1, + u64 arg2, u64 arg3, u64 arg4, u64 arg5, + u64 arg6, struct arm_smccc_res *res) { __smccc_call(hvc, function_id, arg0, arg1, arg2, arg3, arg4, arg5, arg6, res); } -void smccc_smc(uint32_t function_id, uint64_t arg0, uint64_t arg1, - uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5, - uint64_t arg6, struct arm_smccc_res *res) +void smccc_smc(u32 function_id, u64 arg0, u64 arg1, + u64 arg2, u64 arg3, u64 arg4, u64 arg5, + u64 arg6, struct arm_smccc_res *res) { __smccc_call(smc, function_id, arg0, arg1, arg2, arg3, arg4, arg5, arg6, res); @@ -630,7 +670,7 @@ void kvm_selftest_arch_init(void) guest_modes_append_default(); } -void vm_vaddr_populate_bitmap(struct kvm_vm *vm) +void vm_populate_gva_bitmap(struct kvm_vm *vm) { /* * arm64 selftests use only TTBR0_EL1, meaning that the valid VA space @@ -645,3 +685,44 @@ void wfi(void) { asm volatile("wfi"); } + +static bool request_mte; +static bool request_vgic = true; + +void test_wants_mte(void) +{ + request_mte = true; +} + +void test_disable_default_vgic(void) +{ + request_vgic = false; +} + +void kvm_arch_vm_post_create(struct kvm_vm *vm, unsigned int nr_vcpus) +{ + if (request_mte && vm_check_cap(vm, KVM_CAP_ARM_MTE)) + vm_enable_cap(vm, KVM_CAP_ARM_MTE, 0); + + if (request_vgic && kvm_supports_vgic_v3()) { + vm->arch.gic_fd = __vgic_v3_setup(vm, nr_vcpus, 64); + vm->arch.has_gic = true; + } +} + +void kvm_arch_vm_finalize_vcpus(struct kvm_vm *vm) +{ + if (vm->arch.has_gic) + __vgic_v3_init(vm->arch.gic_fd); +} + +void kvm_arch_vm_release(struct kvm_vm *vm) +{ + if (vm->arch.has_gic) + close(vm->arch.gic_fd); +} + +bool kvm_arch_has_default_irqchip(void) +{ + return request_vgic && kvm_supports_vgic_v3(); +} diff --git a/tools/testing/selftests/kvm/lib/arm64/ucall.c b/tools/testing/selftests/kvm/lib/arm64/ucall.c index ddab0ce89d4d..e0550ad5aa75 100644 --- a/tools/testing/selftests/kvm/lib/arm64/ucall.c +++ b/tools/testing/selftests/kvm/lib/arm64/ucall.c @@ -6,17 +6,17 @@ */ #include "kvm_util.h" -vm_vaddr_t *ucall_exit_mmio_addr; +gva_t *ucall_exit_mmio_addr; -void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) +void ucall_arch_init(struct kvm_vm *vm, gpa_t mmio_gpa) { - vm_vaddr_t mmio_gva = vm_vaddr_unused_gap(vm, vm->page_size, KVM_UTIL_MIN_VADDR); + gva_t mmio_gva = vm_unused_gva_gap(vm, vm->page_size, KVM_UTIL_MIN_VADDR); virt_map(vm, mmio_gva, mmio_gpa, 1); vm->ucall_mmio_addr = mmio_gpa; - write_guest_global(vm, ucall_exit_mmio_addr, (vm_vaddr_t *)mmio_gva); + write_guest_global(vm, ucall_exit_mmio_addr, (gva_t *)mmio_gva); } void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu) @@ -25,9 +25,9 @@ void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu) if (run->exit_reason == KVM_EXIT_MMIO && run->mmio.phys_addr == vcpu->vm->ucall_mmio_addr) { - TEST_ASSERT(run->mmio.is_write && run->mmio.len == sizeof(uint64_t), + TEST_ASSERT(run->mmio.is_write && run->mmio.len == sizeof(u64), "Unexpected ucall exit mmio address access"); - return (void *)(*((uint64_t *)run->mmio.data)); + return (void *)(*((u64 *)run->mmio.data)); } return NULL; diff --git a/tools/testing/selftests/kvm/lib/arm64/vgic.c b/tools/testing/selftests/kvm/lib/arm64/vgic.c index 4427f43f73ea..4ecebf3146a2 100644 --- a/tools/testing/selftests/kvm/lib/arm64/vgic.c +++ b/tools/testing/selftests/kvm/lib/arm64/vgic.c @@ -15,6 +15,17 @@ #include "gic.h" #include "gic_v3.h" +bool kvm_supports_vgic_v3(void) +{ + struct kvm_vm *vm = vm_create_barebones(); + int r; + + r = __kvm_test_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_V3); + kvm_vm_free(vm); + + return !r; +} + /* * vGIC-v3 default host setup * @@ -30,24 +41,11 @@ * redistributor regions of the guest. Since it depends on the number of * vCPUs for the VM, it must be called after all the vCPUs have been created. */ -int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs) +int __vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, u32 nr_irqs) { int gic_fd; - uint64_t attr; - struct list_head *iter; - unsigned int nr_gic_pages, nr_vcpus_created = 0; - - TEST_ASSERT(nr_vcpus, "Number of vCPUs cannot be empty"); - - /* - * Make sure that the caller is infact calling this - * function after all the vCPUs are added. - */ - list_for_each(iter, &vm->vcpus) - nr_vcpus_created++; - TEST_ASSERT(nr_vcpus == nr_vcpus_created, - "Number of vCPUs requested (%u) doesn't match with the ones created for the VM (%u)", - nr_vcpus, nr_vcpus_created); + u64 attr; + unsigned int nr_gic_pages; /* Distributor setup */ gic_fd = __kvm_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_V3); @@ -56,9 +54,6 @@ int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs) kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_NR_IRQS, 0, &nr_irqs); - kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, - KVM_DEV_ARM_VGIC_CTRL_INIT, NULL); - attr = GICD_BASE_GPA; kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, KVM_VGIC_V3_ADDR_TYPE_DIST, &attr); @@ -73,18 +68,47 @@ int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs) KVM_VGIC_V3_REDIST_SIZE * nr_vcpus); virt_map(vm, GICR_BASE_GPA, GICR_BASE_GPA, nr_gic_pages); - kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, + return gic_fd; +} + +void __vgic_v3_init(int fd) +{ + kvm_device_attr_set(fd, KVM_DEV_ARM_VGIC_GRP_CTRL, KVM_DEV_ARM_VGIC_CTRL_INIT, NULL); +} - return gic_fd; +int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, u32 nr_irqs) +{ + unsigned int nr_vcpus_created = 0; + struct list_head *iter; + int fd; + + TEST_ASSERT(nr_vcpus, "Number of vCPUs cannot be empty"); + + /* + * Make sure that the caller is infact calling this + * function after all the vCPUs are added. + */ + list_for_each(iter, &vm->vcpus) + nr_vcpus_created++; + TEST_ASSERT(nr_vcpus == nr_vcpus_created, + "Number of vCPUs requested (%u) doesn't match with the ones created for the VM (%u)", + nr_vcpus, nr_vcpus_created); + + fd = __vgic_v3_setup(vm, nr_vcpus, nr_irqs); + if (fd < 0) + return fd; + + __vgic_v3_init(fd); + return fd; } /* should only work for level sensitive interrupts */ -int _kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level) +int _kvm_irq_set_level_info(int gic_fd, u32 intid, int level) { - uint64_t attr = 32 * (intid / 32); - uint64_t index = intid % 32; - uint64_t val; + u64 attr = 32 * (intid / 32); + u64 index = intid % 32; + u64 val; int ret; ret = __kvm_device_attr_get(gic_fd, KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO, @@ -98,16 +122,16 @@ int _kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level) return ret; } -void kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level) +void kvm_irq_set_level_info(int gic_fd, u32 intid, int level) { int ret = _kvm_irq_set_level_info(gic_fd, intid, level); TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO, ret)); } -int _kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level) +int _kvm_arm_irq_line(struct kvm_vm *vm, u32 intid, int level) { - uint32_t irq = intid & KVM_ARM_IRQ_NUM_MASK; + u32 irq = intid & KVM_ARM_IRQ_NUM_MASK; TEST_ASSERT(!INTID_IS_SGI(intid), "KVM_IRQ_LINE's interface itself " "doesn't allow injecting SGIs. There's no mask for it."); @@ -120,23 +144,23 @@ int _kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level) return _kvm_irq_line(vm, irq, level); } -void kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level) +void kvm_arm_irq_line(struct kvm_vm *vm, u32 intid, int level) { int ret = _kvm_arm_irq_line(vm, intid, level); TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_IRQ_LINE, ret)); } -static void vgic_poke_irq(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu, - uint64_t reg_off) +static void vgic_poke_irq(int gic_fd, u32 intid, struct kvm_vcpu *vcpu, + u64 reg_off) { - uint64_t reg = intid / 32; - uint64_t index = intid % 32; - uint64_t attr = reg_off + reg * 4; - uint64_t val; + u64 reg = intid / 32; + u64 index = intid % 32; + u64 attr = reg_off + reg * 4; + u64 val; bool intid_is_private = INTID_IS_SGI(intid) || INTID_IS_PPI(intid); - uint32_t group = intid_is_private ? KVM_DEV_ARM_VGIC_GRP_REDIST_REGS + u32 group = intid_is_private ? KVM_DEV_ARM_VGIC_GRP_REDIST_REGS : KVM_DEV_ARM_VGIC_GRP_DIST_REGS; if (intid_is_private) { @@ -159,12 +183,12 @@ static void vgic_poke_irq(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu, kvm_device_attr_set(gic_fd, group, attr, &val); } -void kvm_irq_write_ispendr(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu) +void kvm_irq_write_ispendr(int gic_fd, u32 intid, struct kvm_vcpu *vcpu) { vgic_poke_irq(gic_fd, intid, vcpu, GICD_ISPENDR); } -void kvm_irq_write_isactiver(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu) +void kvm_irq_write_isactiver(int gic_fd, u32 intid, struct kvm_vcpu *vcpu) { vgic_poke_irq(gic_fd, intid, vcpu, GICD_ISACTIVER); } diff --git a/tools/testing/selftests/kvm/lib/assert.c b/tools/testing/selftests/kvm/lib/assert.c index b49690658c60..8be0d09ecf0f 100644 --- a/tools/testing/selftests/kvm/lib/assert.c +++ b/tools/testing/selftests/kvm/lib/assert.c @@ -6,11 +6,14 @@ */ #include "test_util.h" -#include <execinfo.h> + #include <sys/syscall.h> #include "kselftest.h" +#ifdef __GLIBC__ +#include <execinfo.h> + /* Dumps the current stack trace to stderr. */ static void __attribute__((noinline)) test_dump_stack(void); static void test_dump_stack(void) @@ -57,6 +60,9 @@ static void test_dump_stack(void) system(cmd); #pragma GCC diagnostic pop } +#else +static void test_dump_stack(void) {} +#endif static pid_t _gettid(void) { diff --git a/tools/testing/selftests/kvm/lib/elf.c b/tools/testing/selftests/kvm/lib/elf.c index f34d926d9735..1924a9895834 100644 --- a/tools/testing/selftests/kvm/lib/elf.c +++ b/tools/testing/selftests/kvm/lib/elf.c @@ -7,7 +7,7 @@ #include "test_util.h" -#include <bits/endian.h> +#include <endian.h> #include <linux/elf.h> #include "kvm_util.h" @@ -156,21 +156,20 @@ void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename) TEST_ASSERT(phdr.p_memsz > 0, "Unexpected loadable segment " "memsize of 0,\n" " phdr index: %u p_memsz: 0x%" PRIx64, - n1, (uint64_t) phdr.p_memsz); - vm_vaddr_t seg_vstart = align_down(phdr.p_vaddr, vm->page_size); - vm_vaddr_t seg_vend = phdr.p_vaddr + phdr.p_memsz - 1; + n1, (u64)phdr.p_memsz); + gva_t seg_vstart = align_down(phdr.p_vaddr, vm->page_size); + gva_t seg_vend = phdr.p_vaddr + phdr.p_memsz - 1; seg_vend |= vm->page_size - 1; size_t seg_size = seg_vend - seg_vstart + 1; - vm_vaddr_t vaddr = __vm_vaddr_alloc(vm, seg_size, seg_vstart, - MEM_REGION_CODE); - TEST_ASSERT(vaddr == seg_vstart, "Unable to allocate " + gva_t gva = __vm_alloc(vm, seg_size, seg_vstart, MEM_REGION_CODE); + TEST_ASSERT(gva == seg_vstart, "Unable to allocate " "virtual memory for segment at requested min addr,\n" " segment idx: %u\n" " seg_vstart: 0x%lx\n" - " vaddr: 0x%lx", - n1, seg_vstart, vaddr); - memset(addr_gva2hva(vm, vaddr), 0, seg_size); + " gva: 0x%lx", + n1, seg_vstart, gva); + memset(addr_gva2hva(vm, gva), 0, seg_size); /* TODO(lhuemill): Set permissions of each memory segment * based on the least-significant 3 bits of phdr.p_flags. */ diff --git a/tools/testing/selftests/kvm/lib/guest_modes.c b/tools/testing/selftests/kvm/lib/guest_modes.c index b04901e55138..7a96c43b5704 100644 --- a/tools/testing/selftests/kvm/lib/guest_modes.c +++ b/tools/testing/selftests/kvm/lib/guest_modes.c @@ -4,7 +4,7 @@ */ #include "guest_modes.h" -#ifdef __aarch64__ +#if defined(__aarch64__) || defined(__riscv) #include "processor.h" enum vm_guest_mode vm_mode_default; #endif @@ -13,12 +13,14 @@ struct guest_mode guest_modes[NUM_VM_MODES]; void guest_modes_append_default(void) { -#ifndef __aarch64__ +#if !defined(__aarch64__) && !defined(__riscv) guest_mode_append(VM_MODE_DEFAULT, true); -#else +#endif + +#ifdef __aarch64__ { unsigned int limit = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE); - uint32_t ipa4k, ipa16k, ipa64k; + u32 ipa4k, ipa16k, ipa64k; int i; aarch64_get_supported_page_sizes(limit, &ipa4k, &ipa16k, &ipa64k); @@ -74,11 +76,36 @@ void guest_modes_append_default(void) #ifdef __riscv { unsigned int sz = kvm_check_cap(KVM_CAP_VM_GPA_BITS); + unsigned long satp_mode = riscv64_get_satp_mode() << SATP_MODE_SHIFT; + int i; - if (sz >= 52) - guest_mode_append(VM_MODE_P52V48_4K, true); - if (sz >= 48) - guest_mode_append(VM_MODE_P48V48_4K, true); + switch (sz) { + case 59: + guest_mode_append(VM_MODE_P56V57_4K, satp_mode >= SATP_MODE_57); + guest_mode_append(VM_MODE_P56V48_4K, satp_mode >= SATP_MODE_48); + guest_mode_append(VM_MODE_P56V39_4K, satp_mode >= SATP_MODE_39); + break; + case 50: + guest_mode_append(VM_MODE_P50V57_4K, satp_mode >= SATP_MODE_57); + guest_mode_append(VM_MODE_P50V48_4K, satp_mode >= SATP_MODE_48); + guest_mode_append(VM_MODE_P50V39_4K, satp_mode >= SATP_MODE_39); + break; + case 41: + guest_mode_append(VM_MODE_P41V57_4K, satp_mode >= SATP_MODE_57); + guest_mode_append(VM_MODE_P41V48_4K, satp_mode >= SATP_MODE_48); + guest_mode_append(VM_MODE_P41V39_4K, satp_mode >= SATP_MODE_39); + break; + default: + break; + } + + /* set the first supported mode as default */ + vm_mode_default = NUM_VM_MODES; + for (i = 0; vm_mode_default == NUM_VM_MODES && i < NUM_VM_MODES; i++) { + if (guest_modes[i].supported && guest_modes[i].enabled) + vm_mode_default = i; + } + TEST_ASSERT(vm_mode_default != NUM_VM_MODES, "No supported mode!"); } #endif } diff --git a/tools/testing/selftests/kvm/lib/guest_sprintf.c b/tools/testing/selftests/kvm/lib/guest_sprintf.c index 74627514c4d4..7a33965349a7 100644 --- a/tools/testing/selftests/kvm/lib/guest_sprintf.c +++ b/tools/testing/selftests/kvm/lib/guest_sprintf.c @@ -35,8 +35,8 @@ static int skip_atoi(const char **s) ({ \ int __res; \ \ - __res = ((uint64_t) n) % (uint32_t) base; \ - n = ((uint64_t) n) / (uint32_t) base; \ + __res = ((u64)n) % (u32)base; \ + n = ((u64)n) / (u32)base; \ __res; \ }) @@ -119,7 +119,7 @@ int guest_vsnprintf(char *buf, int n, const char *fmt, va_list args) { char *str, *end; const char *s; - uint64_t num; + u64 num; int i, base; int len; @@ -216,7 +216,7 @@ repeat: while (--field_width > 0) APPEND_BUFFER_SAFE(str, end, ' '); APPEND_BUFFER_SAFE(str, end, - (uint8_t)va_arg(args, int)); + (u8)va_arg(args, int)); while (--field_width > 0) APPEND_BUFFER_SAFE(str, end, ' '); continue; @@ -240,7 +240,7 @@ repeat: flags |= SPECIAL | SMALL | ZEROPAD; } str = number(str, end, - (uint64_t)va_arg(args, void *), 16, + (u64)va_arg(args, void *), 16, field_width, precision, flags); continue; @@ -284,15 +284,15 @@ repeat: continue; } if (qualifier == 'l') - num = va_arg(args, uint64_t); + num = va_arg(args, u64); else if (qualifier == 'h') { - num = (uint16_t)va_arg(args, int); + num = (u16)va_arg(args, int); if (flags & SIGN) - num = (int16_t)num; + num = (s16)num; } else if (flags & SIGN) num = va_arg(args, int); else - num = va_arg(args, uint32_t); + num = va_arg(args, u32); str = number(str, end, num, base, field_width, precision, flags); } diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 33fefeb3ca44..e08967ef7b7b 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -5,13 +5,14 @@ * Copyright (C) 2018, Google LLC. */ #include "test_util.h" +#include "kvm_syscalls.h" #include "kvm_util.h" #include "processor.h" #include "ucall_common.h" #include <assert.h> #include <sched.h> -#include <sys/mman.h> +#include <sys/resource.h> #include <sys/types.h> #include <sys/stat.h> #include <unistd.h> @@ -19,21 +20,33 @@ #define KVM_UTIL_MIN_PFN 2 -uint32_t guest_random_seed; +u32 guest_random_seed; struct guest_random_state guest_rng; -static uint32_t last_guest_seed; +static u32 last_guest_seed; -static int vcpu_mmap_sz(void); +static size_t vcpu_mmap_sz(void); -int open_path_or_exit(const char *path, int flags) +int __open_path_or_exit(const char *path, int flags, const char *enoent_help) { int fd; fd = open(path, flags); - __TEST_REQUIRE(fd >= 0 || errno != ENOENT, "Cannot open %s: %s", path, strerror(errno)); - TEST_ASSERT(fd >= 0, "Failed to open '%s'", path); + if (fd < 0) + goto error; return fd; + +error: + if (errno == EACCES || errno == ENOENT) + ksft_exit_skip("- Cannot open '%s': %s. %s\n", + path, strerror(errno), + errno == EACCES ? "Root required?" : enoent_help); + TEST_FAIL("Failed to open '%s'", path); +} + +int open_path_or_exit(const char *path, int flags) +{ + return __open_path_or_exit(path, flags, ""); } /* @@ -47,7 +60,7 @@ int open_path_or_exit(const char *path, int flags) */ static int _open_kvm_dev_path_or_exit(int flags) { - return open_path_or_exit(KVM_DEV_PATH, flags); + return __open_path_or_exit(KVM_DEV_PATH, flags, "Is KVM loaded and enabled?"); } int open_kvm_dev_path_or_exit(void) @@ -63,6 +76,9 @@ static ssize_t get_module_param(const char *module_name, const char *param, ssize_t bytes_read; int fd, r; + /* Verify KVM is loaded, to provide a more helpful SKIP message. */ + close(open_kvm_dev_path_or_exit()); + r = snprintf(path, path_size, "/sys/module/%s/parameters/%s", module_name, param); TEST_ASSERT(r < path_size, @@ -79,7 +95,7 @@ static ssize_t get_module_param(const char *module_name, const char *param, return bytes_read; } -static int get_module_param_integer(const char *module_name, const char *param) +int kvm_get_module_param_integer(const char *module_name, const char *param) { /* * 16 bytes to hold a 64-bit value (1 byte per char), 1 byte for the @@ -103,7 +119,7 @@ static int get_module_param_integer(const char *module_name, const char *param) return atoi_paranoid(value); } -static bool get_module_param_bool(const char *module_name, const char *param) +bool kvm_get_module_param_bool(const char *module_name, const char *param) { char value; ssize_t r; @@ -119,36 +135,6 @@ static bool get_module_param_bool(const char *module_name, const char *param) TEST_FAIL("Unrecognized value '%c' for boolean module param", value); } -bool get_kvm_param_bool(const char *param) -{ - return get_module_param_bool("kvm", param); -} - -bool get_kvm_intel_param_bool(const char *param) -{ - return get_module_param_bool("kvm_intel", param); -} - -bool get_kvm_amd_param_bool(const char *param) -{ - return get_module_param_bool("kvm_amd", param); -} - -int get_kvm_param_integer(const char *param) -{ - return get_module_param_integer("kvm", param); -} - -int get_kvm_intel_param_integer(const char *param) -{ - return get_module_param_integer("kvm_intel", param); -} - -int get_kvm_amd_param_integer(const char *param) -{ - return get_module_param_integer("kvm_amd", param); -} - /* * Capability * @@ -179,7 +165,7 @@ unsigned int kvm_check_cap(long cap) return (unsigned int)ret; } -void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size) +void vm_enable_dirty_ring(struct kvm_vm *vm, u32 ring_size) { if (vm_check_cap(vm, KVM_CAP_DIRTY_LOG_RING_ACQ_REL)) vm_enable_cap(vm, KVM_CAP_DIRTY_LOG_RING_ACQ_REL, ring_size); @@ -196,9 +182,14 @@ static void vm_open(struct kvm_vm *vm) vm->fd = __kvm_ioctl(vm->kvm_fd, KVM_CREATE_VM, (void *)vm->type); TEST_ASSERT(vm->fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VM, vm->fd)); + + if (kvm_has_cap(KVM_CAP_BINARY_STATS_FD)) + vm->stats.fd = vm_get_stats_fd(vm); + else + vm->stats.fd = -1; } -const char *vm_guest_mode_string(uint32_t i) +const char *vm_guest_mode_string(u32 i) { static const char * const strings[] = { [VM_MODE_P52V48_4K] = "PA-bits:52, VA-bits:48, 4K pages", @@ -210,13 +201,23 @@ const char *vm_guest_mode_string(uint32_t i) [VM_MODE_P40V48_4K] = "PA-bits:40, VA-bits:48, 4K pages", [VM_MODE_P40V48_16K] = "PA-bits:40, VA-bits:48, 16K pages", [VM_MODE_P40V48_64K] = "PA-bits:40, VA-bits:48, 64K pages", - [VM_MODE_PXXV48_4K] = "PA-bits:ANY, VA-bits:48, 4K pages", + [VM_MODE_PXXVYY_4K] = "PA-bits:ANY, VA-bits:48 or 57, 4K pages", [VM_MODE_P47V64_4K] = "PA-bits:47, VA-bits:64, 4K pages", [VM_MODE_P44V64_4K] = "PA-bits:44, VA-bits:64, 4K pages", [VM_MODE_P36V48_4K] = "PA-bits:36, VA-bits:48, 4K pages", [VM_MODE_P36V48_16K] = "PA-bits:36, VA-bits:48, 16K pages", [VM_MODE_P36V48_64K] = "PA-bits:36, VA-bits:48, 64K pages", + [VM_MODE_P47V47_16K] = "PA-bits:47, VA-bits:47, 16K pages", [VM_MODE_P36V47_16K] = "PA-bits:36, VA-bits:47, 16K pages", + [VM_MODE_P56V57_4K] = "PA-bits:56, VA-bits:57, 4K pages", + [VM_MODE_P56V48_4K] = "PA-bits:56, VA-bits:48, 4K pages", + [VM_MODE_P56V39_4K] = "PA-bits:56, VA-bits:39, 4K pages", + [VM_MODE_P50V57_4K] = "PA-bits:50, VA-bits:57, 4K pages", + [VM_MODE_P50V48_4K] = "PA-bits:50, VA-bits:48, 4K pages", + [VM_MODE_P50V39_4K] = "PA-bits:50, VA-bits:39, 4K pages", + [VM_MODE_P41V57_4K] = "PA-bits:41, VA-bits:57, 4K pages", + [VM_MODE_P41V48_4K] = "PA-bits:41, VA-bits:48, 4K pages", + [VM_MODE_P41V39_4K] = "PA-bits:41, VA-bits:39, 4K pages", }; _Static_assert(sizeof(strings)/sizeof(char *) == NUM_VM_MODES, "Missing new mode strings?"); @@ -236,13 +237,23 @@ const struct vm_guest_mode_params vm_guest_mode_params[] = { [VM_MODE_P40V48_4K] = { 40, 48, 0x1000, 12 }, [VM_MODE_P40V48_16K] = { 40, 48, 0x4000, 14 }, [VM_MODE_P40V48_64K] = { 40, 48, 0x10000, 16 }, - [VM_MODE_PXXV48_4K] = { 0, 0, 0x1000, 12 }, + [VM_MODE_PXXVYY_4K] = { 0, 0, 0x1000, 12 }, [VM_MODE_P47V64_4K] = { 47, 64, 0x1000, 12 }, [VM_MODE_P44V64_4K] = { 44, 64, 0x1000, 12 }, [VM_MODE_P36V48_4K] = { 36, 48, 0x1000, 12 }, [VM_MODE_P36V48_16K] = { 36, 48, 0x4000, 14 }, [VM_MODE_P36V48_64K] = { 36, 48, 0x10000, 16 }, + [VM_MODE_P47V47_16K] = { 47, 47, 0x4000, 14 }, [VM_MODE_P36V47_16K] = { 36, 47, 0x4000, 14 }, + [VM_MODE_P56V57_4K] = { 56, 57, 0x1000, 12 }, + [VM_MODE_P56V48_4K] = { 56, 48, 0x1000, 12 }, + [VM_MODE_P56V39_4K] = { 56, 39, 0x1000, 12 }, + [VM_MODE_P50V57_4K] = { 50, 57, 0x1000, 12 }, + [VM_MODE_P50V48_4K] = { 50, 48, 0x1000, 12 }, + [VM_MODE_P50V39_4K] = { 50, 39, 0x1000, 12 }, + [VM_MODE_P41V57_4K] = { 41, 57, 0x1000, 12 }, + [VM_MODE_P41V48_4K] = { 41, 48, 0x1000, 12 }, + [VM_MODE_P41V39_4K] = { 41, 39, 0x1000, 12 }, }; _Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES, "Missing new mode params?"); @@ -256,7 +267,7 @@ _Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) * based on the MSB of the VA. On architectures with this behavior * the VA region spans [0, 2^(va_bits - 1)), [-(2^(va_bits - 1), -1]. */ -__weak void vm_vaddr_populate_bitmap(struct kvm_vm *vm) +__weak void vm_populate_gva_bitmap(struct kvm_vm *vm) { sparsebit_set_num(vm->vpages_valid, 0, (1ULL << (vm->va_bits - 1)) >> vm->page_shift); @@ -288,59 +299,77 @@ struct kvm_vm *____vm_create(struct vm_shape shape) /* Setup mode specific traits. */ switch (vm->mode) { case VM_MODE_P52V48_4K: - vm->pgtable_levels = 4; + vm->mmu.pgtable_levels = 4; break; case VM_MODE_P52V48_64K: - vm->pgtable_levels = 3; + vm->mmu.pgtable_levels = 3; break; case VM_MODE_P48V48_4K: - vm->pgtable_levels = 4; + vm->mmu.pgtable_levels = 4; break; case VM_MODE_P48V48_64K: - vm->pgtable_levels = 3; + vm->mmu.pgtable_levels = 3; break; case VM_MODE_P40V48_4K: case VM_MODE_P36V48_4K: - vm->pgtable_levels = 4; + vm->mmu.pgtable_levels = 4; break; case VM_MODE_P40V48_64K: case VM_MODE_P36V48_64K: - vm->pgtable_levels = 3; + vm->mmu.pgtable_levels = 3; break; case VM_MODE_P52V48_16K: case VM_MODE_P48V48_16K: case VM_MODE_P40V48_16K: case VM_MODE_P36V48_16K: - vm->pgtable_levels = 4; + vm->mmu.pgtable_levels = 4; break; + case VM_MODE_P47V47_16K: case VM_MODE_P36V47_16K: - vm->pgtable_levels = 3; + vm->mmu.pgtable_levels = 3; break; - case VM_MODE_PXXV48_4K: + case VM_MODE_PXXVYY_4K: #ifdef __x86_64__ kvm_get_cpu_address_width(&vm->pa_bits, &vm->va_bits); kvm_init_vm_address_properties(vm); - /* - * Ignore KVM support for 5-level paging (vm->va_bits == 57), - * it doesn't take effect unless a CR4.LA57 is set, which it - * isn't for this mode (48-bit virtual address space). - */ - TEST_ASSERT(vm->va_bits == 48 || vm->va_bits == 57, - "Linear address width (%d bits) not supported", - vm->va_bits); + pr_debug("Guest physical address width detected: %d\n", vm->pa_bits); - vm->pgtable_levels = 4; - vm->va_bits = 48; + pr_debug("Guest virtual address width detected: %d\n", + vm->va_bits); + + if (vm->va_bits == 57) { + vm->mmu.pgtable_levels = 5; + } else { + TEST_ASSERT(vm->va_bits == 48, + "Unexpected guest virtual address width: %d", + vm->va_bits); + vm->mmu.pgtable_levels = 4; + } #else - TEST_FAIL("VM_MODE_PXXV48_4K not supported on non-x86 platforms"); + TEST_FAIL("VM_MODE_PXXVYY_4K not supported on non-x86 platforms"); #endif break; case VM_MODE_P47V64_4K: - vm->pgtable_levels = 5; + vm->mmu.pgtable_levels = 5; break; case VM_MODE_P44V64_4K: - vm->pgtable_levels = 5; + vm->mmu.pgtable_levels = 5; + break; + case VM_MODE_P56V57_4K: + case VM_MODE_P50V57_4K: + case VM_MODE_P41V57_4K: + vm->mmu.pgtable_levels = 5; + break; + case VM_MODE_P56V48_4K: + case VM_MODE_P50V48_4K: + case VM_MODE_P41V48_4K: + vm->mmu.pgtable_levels = 4; + break; + case VM_MODE_P56V39_4K: + case VM_MODE_P50V39_4K: + case VM_MODE_P41V39_4K: + vm->mmu.pgtable_levels = 3; break; default: TEST_FAIL("Unknown guest mode: 0x%x", vm->mode); @@ -356,7 +385,7 @@ struct kvm_vm *____vm_create(struct vm_shape shape) /* Limit to VA-bit canonical virtual addresses. */ vm->vpages_valid = sparsebit_alloc(); - vm_vaddr_populate_bitmap(vm); + vm_populate_gva_bitmap(vm); /* Limit physical addresses to PA-bits. */ vm->max_gfn = vm_compute_max_gfn(vm); @@ -367,12 +396,12 @@ struct kvm_vm *____vm_create(struct vm_shape shape) return vm; } -static uint64_t vm_nr_pages_required(enum vm_guest_mode mode, - uint32_t nr_runnable_vcpus, - uint64_t extra_mem_pages) +static u64 vm_nr_pages_required(enum vm_guest_mode mode, + u32 nr_runnable_vcpus, + u64 extra_mem_pages) { - uint64_t page_size = vm_guest_mode_params[mode].page_size; - uint64_t nr_pages; + u64 page_size = vm_guest_mode_params[mode].page_size; + u64 nr_pages; TEST_ASSERT(nr_runnable_vcpus, "Use vm_create_barebones() for VMs that _never_ have vCPUs"); @@ -406,21 +435,72 @@ static uint64_t vm_nr_pages_required(enum vm_guest_mode mode, return vm_adjust_num_guest_pages(mode, nr_pages); } -struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus, - uint64_t nr_extra_pages) +void kvm_set_files_rlimit(u32 nr_vcpus) +{ + /* + * Each vCPU will open two file descriptors: the vCPU itself and the + * vCPU's binary stats file descriptor. Add an arbitrary amount of + * buffer for all other files a test may open. + */ + int nr_fds_wanted = nr_vcpus * 2 + 100; + struct rlimit rl; + + /* + * Check that we're allowed to open nr_fds_wanted file descriptors and + * try raising the limits if needed. + */ + TEST_ASSERT(!getrlimit(RLIMIT_NOFILE, &rl), "getrlimit() failed!"); + + if (rl.rlim_cur < nr_fds_wanted) { + rl.rlim_cur = nr_fds_wanted; + if (rl.rlim_max < nr_fds_wanted) { + int old_rlim_max = rl.rlim_max; + + rl.rlim_max = nr_fds_wanted; + __TEST_REQUIRE(setrlimit(RLIMIT_NOFILE, &rl) >= 0, + "RLIMIT_NOFILE hard limit is too low (%d, wanted %d)", + old_rlim_max, nr_fds_wanted); + } else { + TEST_ASSERT(!setrlimit(RLIMIT_NOFILE, &rl), "setrlimit() failed!"); + } + } + +} + +static bool is_guest_memfd_required(struct vm_shape shape) +{ +#ifdef __x86_64__ + return shape.type == KVM_X86_SNP_VM; +#else + return false; +#endif +} + +struct kvm_vm *__vm_create(struct vm_shape shape, u32 nr_runnable_vcpus, + u64 nr_extra_pages) { - uint64_t nr_pages = vm_nr_pages_required(shape.mode, nr_runnable_vcpus, + u64 nr_pages = vm_nr_pages_required(shape.mode, nr_runnable_vcpus, nr_extra_pages); struct userspace_mem_region *slot0; struct kvm_vm *vm; - int i; + int i, flags; + + kvm_set_files_rlimit(nr_runnable_vcpus); pr_debug("%s: mode='%s' type='%d', pages='%ld'\n", __func__, vm_guest_mode_string(shape.mode), shape.type, nr_pages); vm = ____vm_create(shape); - vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0, 0, nr_pages, 0); + /* + * Force GUEST_MEMFD for the primary memory region if necessary, e.g. + * for CoCo VMs that require GUEST_MEMFD backed private memory. + */ + flags = 0; + if (is_guest_memfd_required(shape)) + flags |= KVM_MEM_GUEST_MEMFD; + + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0, 0, nr_pages, flags); for (i = 0; i < NR_MEM_REGIONS; i++) vm->memslots[i] = 0; @@ -442,7 +522,7 @@ struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus, guest_rng = new_guest_random_state(guest_random_seed); sync_global_to_guest(vm, guest_rng); - kvm_arch_vm_post_create(vm); + kvm_arch_vm_post_create(vm, nr_runnable_vcpus); return vm; } @@ -466,8 +546,8 @@ struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus, * extra_mem_pages is only used to calculate the maximum page table size, * no real memory allocation for non-slot0 memory in this function. */ -struct kvm_vm *__vm_create_with_vcpus(struct vm_shape shape, uint32_t nr_vcpus, - uint64_t extra_mem_pages, +struct kvm_vm *__vm_create_with_vcpus(struct vm_shape shape, u32 nr_vcpus, + u64 extra_mem_pages, void *guest_code, struct kvm_vcpu *vcpus[]) { struct kvm_vm *vm; @@ -480,12 +560,13 @@ struct kvm_vm *__vm_create_with_vcpus(struct vm_shape shape, uint32_t nr_vcpus, for (i = 0; i < nr_vcpus; ++i) vcpus[i] = vm_vcpu_add(vm, i, guest_code); + kvm_arch_vm_finalize_vcpus(vm); return vm; } struct kvm_vm *__vm_create_shape_with_one_vcpu(struct vm_shape shape, struct kvm_vcpu **vcpu, - uint64_t extra_mem_pages, + u64 extra_mem_pages, void *guest_code) { struct kvm_vcpu *vcpus[1]; @@ -533,7 +614,7 @@ void kvm_vm_restart(struct kvm_vm *vmp) } __weak struct kvm_vcpu *vm_arch_vcpu_recreate(struct kvm_vm *vm, - uint32_t vcpu_id) + u32 vcpu_id) { return __vm_vcpu_add(vm, vcpu_id); } @@ -545,20 +626,19 @@ struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm) return vm_vcpu_recreate(vm, 0); } -void kvm_pin_this_task_to_pcpu(uint32_t pcpu) +int __pin_task_to_cpu(pthread_t task, int cpu) { - cpu_set_t mask; - int r; + cpu_set_t cpuset; - CPU_ZERO(&mask); - CPU_SET(pcpu, &mask); - r = sched_setaffinity(0, sizeof(mask), &mask); - TEST_ASSERT(!r, "sched_setaffinity() failed for pCPU '%u'.", pcpu); + CPU_ZERO(&cpuset); + CPU_SET(cpu, &cpuset); + + return pthread_setaffinity_np(task, sizeof(cpuset), &cpuset); } -static uint32_t parse_pcpu(const char *cpu_str, const cpu_set_t *allowed_mask) +static u32 parse_pcpu(const char *cpu_str, const cpu_set_t *allowed_mask) { - uint32_t pcpu = atoi_non_negative("CPU number", cpu_str); + u32 pcpu = atoi_non_negative("CPU number", cpu_str); TEST_ASSERT(CPU_ISSET(pcpu, allowed_mask), "Not allowed to run on pCPU '%d', check cgroups?", pcpu); @@ -582,7 +662,7 @@ void kvm_print_vcpu_pinning_help(void) " (default: no pinning)\n", name, name); } -void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[], +void kvm_parse_vcpu_pinning(const char *pcpus_string, u32 vcpu_to_pcpu[], int nr_vcpus) { cpu_set_t allowed_mask; @@ -607,7 +687,7 @@ void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[], /* 2. Check if the main worker needs to be pinned. */ if (cpu) { - kvm_pin_this_task_to_pcpu(parse_pcpu(cpu, &allowed_mask)); + pin_self_to_cpu(parse_pcpu(cpu, &allowed_mask)); cpu = strtok(NULL, delim); } @@ -635,15 +715,15 @@ void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[], * region exists. */ static struct userspace_mem_region * -userspace_mem_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end) +userspace_mem_region_find(struct kvm_vm *vm, u64 start, u64 end) { struct rb_node *node; for (node = vm->regions.gpa_tree.rb_node; node; ) { struct userspace_mem_region *region = container_of(node, struct userspace_mem_region, gpa_node); - uint64_t existing_start = region->region.guest_phys_addr; - uint64_t existing_end = region->region.guest_phys_addr + u64 existing_start = region->region.guest_phys_addr; + u64 existing_end = region->region.guest_phys_addr + region->region.memory_size - 1; if (start <= existing_end && end >= existing_start) return region; @@ -657,6 +737,20 @@ userspace_mem_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end) return NULL; } +static void kvm_stats_release(struct kvm_binary_stats *stats) +{ + if (stats->fd < 0) + return; + + if (stats->desc) { + free(stats->desc); + stats->desc = NULL; + } + + kvm_close(stats->fd); + stats->fd = -1; +} + __weak void vcpu_arch_free(struct kvm_vcpu *vcpu) { @@ -676,19 +770,15 @@ __weak void vcpu_arch_free(struct kvm_vcpu *vcpu) */ static void vm_vcpu_rm(struct kvm_vm *vm, struct kvm_vcpu *vcpu) { - int ret; - if (vcpu->dirty_gfns) { - ret = munmap(vcpu->dirty_gfns, vm->dirty_ring_size); - TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); + kvm_munmap(vcpu->dirty_gfns, vm->dirty_ring_size); vcpu->dirty_gfns = NULL; } - ret = munmap(vcpu->run, vcpu_mmap_sz()); - TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); + kvm_munmap(vcpu->run, vcpu_mmap_sz()); - ret = close(vcpu->fd); - TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret)); + kvm_close(vcpu->fd); + kvm_stats_release(&vcpu->stats); list_del(&vcpu->list); @@ -699,35 +789,32 @@ static void vm_vcpu_rm(struct kvm_vm *vm, struct kvm_vcpu *vcpu) void kvm_vm_release(struct kvm_vm *vmp) { struct kvm_vcpu *vcpu, *tmp; - int ret; list_for_each_entry_safe(vcpu, tmp, &vmp->vcpus, list) vm_vcpu_rm(vmp, vcpu); - ret = close(vmp->fd); - TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret)); + kvm_close(vmp->fd); + kvm_close(vmp->kvm_fd); - ret = close(vmp->kvm_fd); - TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret)); + /* Free cached stats metadata and close FD */ + kvm_stats_release(&vmp->stats); + + kvm_arch_vm_release(vmp); } static void __vm_mem_region_delete(struct kvm_vm *vm, struct userspace_mem_region *region) { - int ret; - rb_erase(®ion->gpa_node, &vm->regions.gpa_tree); rb_erase(®ion->hva_node, &vm->regions.hva_tree); hash_del(®ion->slot_node); sparsebit_free(®ion->unused_phy_pages); sparsebit_free(®ion->protected_phy_pages); - ret = munmap(region->mmap_start, region->mmap_size); - TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); + kvm_munmap(region->mmap_start, region->mmap_size); if (region->fd >= 0) { /* There's an extra map when using shared memory. */ - ret = munmap(region->mmap_alias, region->mmap_size); - TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); + kvm_munmap(region->mmap_alias, region->mmap_size); close(region->fd); } if (region->region.guest_memfd >= 0) @@ -748,12 +835,6 @@ void kvm_vm_free(struct kvm_vm *vmp) if (vmp == NULL) return; - /* Free cached stats metadata and close FD */ - if (vmp->stats_fd) { - free(vmp->stats_desc); - close(vmp->stats_fd); - } - /* Free userspace_mem_regions. */ hash_for_each_safe(vmp->regions.slot_hash, ctr, node, region, slot_node) __vm_mem_region_delete(vmp, region); @@ -771,7 +852,7 @@ void kvm_vm_free(struct kvm_vm *vmp) int kvm_memfd_alloc(size_t size, bool hugepages) { int memfd_flags = MFD_CLOEXEC; - int fd, r; + int fd; if (hugepages) memfd_flags |= MFD_HUGETLB; @@ -779,11 +860,8 @@ int kvm_memfd_alloc(size_t size, bool hugepages) fd = memfd_create("kvm_selftest", memfd_flags); TEST_ASSERT(fd != -1, __KVM_SYSCALL_ERROR("memfd_create()", fd)); - r = ftruncate(fd, size); - TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("ftruncate()", r)); - - r = fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, size); - TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r)); + kvm_ftruncate(fd, size); + kvm_fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, size); return fd; } @@ -840,8 +918,8 @@ static void vm_userspace_mem_region_hva_insert(struct rb_root *hva_tree, } -int __vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, - uint64_t gpa, uint64_t size, void *hva) +int __vm_set_user_memory_region(struct kvm_vm *vm, u32 slot, u32 flags, + gpa_t gpa, u64 size, void *hva) { struct kvm_userspace_memory_region region = { .slot = slot, @@ -854,8 +932,8 @@ int __vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags return ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, ®ion); } -void vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, - uint64_t gpa, uint64_t size, void *hva) +void vm_set_user_memory_region(struct kvm_vm *vm, u32 slot, u32 flags, + gpa_t gpa, u64 size, void *hva) { int ret = __vm_set_user_memory_region(vm, slot, flags, gpa, size, hva); @@ -867,9 +945,9 @@ void vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, __TEST_REQUIRE(kvm_has_cap(KVM_CAP_USER_MEMORY2), \ "KVM selftests now require KVM_SET_USER_MEMORY_REGION2 (introduced in v6.8)") -int __vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags, - uint64_t gpa, uint64_t size, void *hva, - uint32_t guest_memfd, uint64_t guest_memfd_offset) +int __vm_set_user_memory_region2(struct kvm_vm *vm, u32 slot, u32 flags, + gpa_t gpa, u64 size, void *hva, + u32 guest_memfd, u64 guest_memfd_offset) { struct kvm_userspace_memory_region2 region = { .slot = slot, @@ -886,9 +964,9 @@ int __vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flag return ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION2, ®ion); } -void vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags, - uint64_t gpa, uint64_t size, void *hva, - uint32_t guest_memfd, uint64_t guest_memfd_offset) +void vm_set_user_memory_region2(struct kvm_vm *vm, u32 slot, u32 flags, + gpa_t gpa, u64 size, void *hva, + u32 guest_memfd, u64 guest_memfd_offset) { int ret = __vm_set_user_memory_region2(vm, slot, flags, gpa, size, hva, guest_memfd, guest_memfd_offset); @@ -900,14 +978,14 @@ void vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags /* FIXME: This thing needs to be ripped apart and rewritten. */ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, - uint64_t guest_paddr, uint32_t slot, uint64_t npages, - uint32_t flags, int guest_memfd, uint64_t guest_memfd_offset) + gpa_t gpa, u32 slot, u64 npages, u32 flags, + int guest_memfd, u64 guest_memfd_offset) { int ret; struct userspace_mem_region *region; size_t backing_src_pagesz = get_backing_src_pagesz(src_type); size_t mem_size = npages * vm->page_size; - size_t alignment; + size_t alignment = 1; TEST_REQUIRE_SET_USER_MEMORY_REGION2(); @@ -915,32 +993,31 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, "Number of guest pages is not compatible with the host. " "Try npages=%d", vm_adjust_num_guest_pages(vm->mode, npages)); - TEST_ASSERT((guest_paddr % vm->page_size) == 0, "Guest physical " + TEST_ASSERT((gpa % vm->page_size) == 0, "Guest physical " "address not on a page boundary.\n" - " guest_paddr: 0x%lx vm->page_size: 0x%x", - guest_paddr, vm->page_size); - TEST_ASSERT((((guest_paddr >> vm->page_shift) + npages) - 1) + " gpa: 0x%lx vm->page_size: 0x%x", + gpa, vm->page_size); + TEST_ASSERT((((gpa >> vm->page_shift) + npages) - 1) <= vm->max_gfn, "Physical range beyond maximum " "supported physical address,\n" - " guest_paddr: 0x%lx npages: 0x%lx\n" + " gpa: 0x%lx npages: 0x%lx\n" " vm->max_gfn: 0x%lx vm->page_size: 0x%x", - guest_paddr, npages, vm->max_gfn, vm->page_size); + gpa, npages, vm->max_gfn, vm->page_size); /* * Confirm a mem region with an overlapping address doesn't * already exist. */ region = (struct userspace_mem_region *) userspace_mem_region_find( - vm, guest_paddr, (guest_paddr + npages * vm->page_size) - 1); + vm, gpa, (gpa + npages * vm->page_size) - 1); if (region != NULL) TEST_FAIL("overlapping userspace_mem_region already " "exists\n" - " requested guest_paddr: 0x%lx npages: 0x%lx " - "page_size: 0x%x\n" - " existing guest_paddr: 0x%lx size: 0x%lx", - guest_paddr, npages, vm->page_size, - (uint64_t) region->region.guest_phys_addr, - (uint64_t) region->region.memory_size); + " requested gpa: 0x%lx npages: 0x%lx page_size: 0x%x\n" + " existing gpa: 0x%lx size: 0x%lx", + gpa, npages, vm->page_size, + (u64)region->region.guest_phys_addr, + (u64)region->region.memory_size); /* Confirm no region with the requested slot already exists. */ hash_for_each_possible(vm->regions.slot_hash, region, slot_node, @@ -950,12 +1027,11 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, TEST_FAIL("A mem region with the requested slot " "already exists.\n" - " requested slot: %u paddr: 0x%lx npages: 0x%lx\n" - " existing slot: %u paddr: 0x%lx size: 0x%lx", - slot, guest_paddr, npages, - region->region.slot, - (uint64_t) region->region.guest_phys_addr, - (uint64_t) region->region.memory_size); + " requested slot: %u gpa: 0x%lx npages: 0x%lx\n" + " existing slot: %u gpa: 0x%lx size: 0x%lx", + slot, gpa, npages, region->region.slot, + (u64)region->region.guest_phys_addr, + (u64)region->region.memory_size); } /* Allocate and initialize new mem region structure. */ @@ -963,13 +1039,6 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, TEST_ASSERT(region != NULL, "Insufficient Memory"); region->mmap_size = mem_size; -#ifdef __s390x__ - /* On s390x, the host address must be aligned to 1M (due to PGSTEs) */ - alignment = 0x100000; -#else - alignment = 1; -#endif - /* * When using THP mmap is not guaranteed to returned a hugepage aligned * address so we have to pad the mmap. Padding is not needed for HugeTLB @@ -979,7 +1048,7 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, if (src_type == VM_MEM_SRC_ANONYMOUS_THP) alignment = max(backing_src_pagesz, alignment); - TEST_ASSERT_EQ(guest_paddr, align_up(guest_paddr, backing_src_pagesz)); + TEST_ASSERT_EQ(gpa, align_up(gpa, backing_src_pagesz)); /* Add enough memory to align up if necessary */ if (alignment > 1) @@ -990,12 +1059,9 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, region->fd = kvm_memfd_alloc(region->mmap_size, src_type == VM_MEM_SRC_SHARED_HUGETLB); - region->mmap_start = mmap(NULL, region->mmap_size, - PROT_READ | PROT_WRITE, - vm_mem_backing_src_alias(src_type)->flag, - region->fd, 0); - TEST_ASSERT(region->mmap_start != MAP_FAILED, - __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED)); + region->mmap_start = kvm_mmap(region->mmap_size, PROT_READ | PROT_WRITE, + vm_mem_backing_src_alias(src_type)->flag, + region->fd); TEST_ASSERT(!is_backing_src_hugetlb(src_type) || region->mmap_start == align_ptr_up(region->mmap_start, backing_src_pagesz), @@ -1019,7 +1085,7 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, if (flags & KVM_MEM_GUEST_MEMFD) { if (guest_memfd < 0) { - uint32_t guest_memfd_flags = 0; + u32 guest_memfd_flags = 0; TEST_ASSERT(!guest_memfd_offset, "Offset must be zero when creating new guest_memfd"); guest_memfd = vm_create_guest_memfd(vm, mem_size, guest_memfd_flags); @@ -1030,8 +1096,7 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, * needing to track if the fd is owned by the framework * or by the caller. */ - guest_memfd = dup(guest_memfd); - TEST_ASSERT(guest_memfd >= 0, __KVM_SYSCALL_ERROR("dup()", guest_memfd)); + guest_memfd = kvm_dup(guest_memfd); } region->region.guest_memfd = guest_memfd; @@ -1043,20 +1108,18 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, region->unused_phy_pages = sparsebit_alloc(); if (vm_arch_has_protected_memory(vm)) region->protected_phy_pages = sparsebit_alloc(); - sparsebit_set_num(region->unused_phy_pages, - guest_paddr >> vm->page_shift, npages); + sparsebit_set_num(region->unused_phy_pages, gpa >> vm->page_shift, npages); region->region.slot = slot; region->region.flags = flags; - region->region.guest_phys_addr = guest_paddr; + region->region.guest_phys_addr = gpa; region->region.memory_size = npages * vm->page_size; region->region.userspace_addr = (uintptr_t) region->host_mem; ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, ®ion->region); TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION2 IOCTL failed,\n" " rc: %i errno: %i\n" " slot: %u flags: 0x%x\n" - " guest_phys_addr: 0x%lx size: 0x%lx guest_memfd: %d", - ret, errno, slot, flags, - guest_paddr, (uint64_t) region->region.memory_size, + " guest_phys_addr: 0x%lx size: 0x%llx guest_memfd: %d", + ret, errno, slot, flags, gpa, region->region.memory_size, region->region.guest_memfd); /* Add to quick lookup data structures */ @@ -1066,12 +1129,10 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, /* If shared memory, create an alias. */ if (region->fd >= 0) { - region->mmap_alias = mmap(NULL, region->mmap_size, - PROT_READ | PROT_WRITE, - vm_mem_backing_src_alias(src_type)->flag, - region->fd, 0); - TEST_ASSERT(region->mmap_alias != MAP_FAILED, - __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED)); + region->mmap_alias = kvm_mmap(region->mmap_size, + PROT_READ | PROT_WRITE, + vm_mem_backing_src_alias(src_type)->flag, + region->fd); /* Align host alias address */ region->host_alias = align_ptr_up(region->mmap_alias, alignment); @@ -1080,10 +1141,9 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, void vm_userspace_mem_region_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, - uint64_t guest_paddr, uint32_t slot, - uint64_t npages, uint32_t flags) + gpa_t gpa, u32 slot, u64 npages, u32 flags) { - vm_mem_add(vm, src_type, guest_paddr, slot, npages, flags, -1, 0); + vm_mem_add(vm, src_type, gpa, slot, npages, flags, -1, 0); } /* @@ -1102,7 +1162,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, * memory slot ID). */ struct userspace_mem_region * -memslot2region(struct kvm_vm *vm, uint32_t memslot) +memslot2region(struct kvm_vm *vm, u32 memslot) { struct userspace_mem_region *region; @@ -1133,7 +1193,7 @@ memslot2region(struct kvm_vm *vm, uint32_t memslot) * Sets the flags of the memory region specified by the value of slot, * to the values given by flags. */ -void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags) +void vm_mem_region_set_flags(struct kvm_vm *vm, u32 slot, u32 flags) { int ret; struct userspace_mem_region *region; @@ -1149,6 +1209,16 @@ void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags) ret, errno, slot, flags); } +void vm_mem_region_reload(struct kvm_vm *vm, u32 slot) +{ + struct userspace_mem_region *region = memslot2region(vm, slot); + struct kvm_userspace_memory_region2 tmp = region->region; + + tmp.memory_size = 0; + vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, &tmp); + vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, ®ion->region); +} + /* * VM Memory Region Move * @@ -1163,7 +1233,7 @@ void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags) * * Change the gpa of a memory region. */ -void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa) +void vm_mem_region_move(struct kvm_vm *vm, u32 slot, u64 new_gpa) { struct userspace_mem_region *region; int ret; @@ -1192,7 +1262,7 @@ void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa) * * Delete a memory region. */ -void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot) +void vm_mem_region_delete(struct kvm_vm *vm, u32 slot) { struct userspace_mem_region *region = memslot2region(vm, slot); @@ -1202,18 +1272,18 @@ void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot) __vm_mem_region_delete(vm, region); } -void vm_guest_mem_fallocate(struct kvm_vm *vm, uint64_t base, uint64_t size, +void vm_guest_mem_fallocate(struct kvm_vm *vm, u64 base, u64 size, bool punch_hole) { const int mode = FALLOC_FL_KEEP_SIZE | (punch_hole ? FALLOC_FL_PUNCH_HOLE : 0); struct userspace_mem_region *region; - uint64_t end = base + size; - uint64_t gpa, len; + u64 end = base + size; + gpa_t gpa, len; off_t fd_offset; int ret; for (gpa = base; gpa < end; gpa += len) { - uint64_t offset; + u64 offset; region = userspace_mem_region_find(vm, gpa, gpa); TEST_ASSERT(region && region->region.flags & KVM_MEM_GUEST_MEMFD, @@ -1221,7 +1291,7 @@ void vm_guest_mem_fallocate(struct kvm_vm *vm, uint64_t base, uint64_t size, offset = gpa - region->region.guest_phys_addr; fd_offset = region->region.guest_memfd_offset + offset; - len = min_t(uint64_t, end - gpa, region->region.memory_size - offset); + len = min_t(u64, end - gpa, region->region.memory_size - offset); ret = fallocate(region->region.guest_memfd, mode, fd_offset, len); TEST_ASSERT(!ret, "fallocate() failed to %s at %lx (len = %lu), fd = %d, mode = %x, offset = %lx", @@ -1231,14 +1301,14 @@ void vm_guest_mem_fallocate(struct kvm_vm *vm, uint64_t base, uint64_t size, } /* Returns the size of a vCPU's kvm_run structure. */ -static int vcpu_mmap_sz(void) +static size_t vcpu_mmap_sz(void) { int dev_fd, ret; dev_fd = open_kvm_dev_path_or_exit(); ret = ioctl(dev_fd, KVM_GET_VCPU_MMAP_SIZE, NULL); - TEST_ASSERT(ret >= sizeof(struct kvm_run), + TEST_ASSERT(ret >= 0 && ret >= sizeof(struct kvm_run), KVM_IOCTL_ERROR(KVM_GET_VCPU_MMAP_SIZE, ret)); close(dev_fd); @@ -1246,7 +1316,7 @@ static int vcpu_mmap_sz(void) return ret; } -static bool vcpu_exists(struct kvm_vm *vm, uint32_t vcpu_id) +static bool vcpu_exists(struct kvm_vm *vm, u32 vcpu_id) { struct kvm_vcpu *vcpu; @@ -1262,7 +1332,7 @@ static bool vcpu_exists(struct kvm_vm *vm, uint32_t vcpu_id) * Adds a virtual CPU to the VM specified by vm with the ID given by vcpu_id. * No additional vCPU setup is done. Returns the vCPU. */ -struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) +struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, u32 vcpu_id) { struct kvm_vcpu *vcpu; @@ -1279,12 +1349,15 @@ struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) TEST_ASSERT_VM_VCPU_IOCTL(vcpu->fd >= 0, KVM_CREATE_VCPU, vcpu->fd, vm); TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->run), "vcpu mmap size " - "smaller than expected, vcpu_mmap_sz: %i expected_min: %zi", + "smaller than expected, vcpu_mmap_sz: %zi expected_min: %zi", vcpu_mmap_sz(), sizeof(*vcpu->run)); - vcpu->run = (struct kvm_run *) mmap(NULL, vcpu_mmap_sz(), - PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 0); - TEST_ASSERT(vcpu->run != MAP_FAILED, - __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED)); + vcpu->run = kvm_mmap(vcpu_mmap_sz(), PROT_READ | PROT_WRITE, + MAP_SHARED, vcpu->fd); + + if (kvm_has_cap(KVM_CAP_BINARY_STATS_FD)) + vcpu->stats.fd = vcpu_get_stats_fd(vcpu); + else + vcpu->stats.fd = -1; /* Add to linked-list of VCPUs. */ list_add(&vcpu->list, &vm->vcpus); @@ -1293,33 +1366,18 @@ struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) } /* - * VM Virtual Address Unused Gap - * - * Input Args: - * vm - Virtual Machine - * sz - Size (bytes) - * vaddr_min - Minimum Virtual Address - * - * Output Args: None - * - * Return: - * Lowest virtual address at or below vaddr_min, with at least - * sz unused bytes. TEST_ASSERT failure if no area of at least - * size sz is available. - * - * Within the VM specified by vm, locates the lowest starting virtual - * address >= vaddr_min, that has at least sz unallocated bytes. A + * Within the VM specified by @vm, locates the lowest starting guest virtual + * address >= @min_gva, that has at least @sz unallocated bytes. A * TEST_ASSERT failure occurs for invalid input or no area of at least - * sz unallocated bytes >= vaddr_min is available. + * @sz unallocated bytes >= @min_gva is available. */ -vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz, - vm_vaddr_t vaddr_min) +gva_t vm_unused_gva_gap(struct kvm_vm *vm, size_t sz, gva_t min_gva) { - uint64_t pages = (sz + vm->page_size - 1) >> vm->page_shift; + u64 pages = (sz + vm->page_size - 1) >> vm->page_shift; /* Determine lowest permitted virtual page index. */ - uint64_t pgidx_start = (vaddr_min + vm->page_size - 1) >> vm->page_shift; - if ((pgidx_start * vm->page_size) < vaddr_min) + u64 pgidx_start = (min_gva + vm->page_size - 1) >> vm->page_shift; + if ((pgidx_start * vm->page_size) < min_gva) goto no_va_found; /* Loop over section with enough valid virtual page indexes. */ @@ -1356,7 +1414,7 @@ vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz, } while (pgidx_start != 0); no_va_found: - TEST_FAIL("No vaddr of specified pages available, pages: 0x%lx", pages); + TEST_FAIL("No gva of specified pages available, pages: 0x%lx", pages); /* NOT REACHED */ return -1; @@ -1378,148 +1436,91 @@ va_found: return pgidx_start * vm->page_size; } -static vm_vaddr_t ____vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, - vm_vaddr_t vaddr_min, - enum kvm_mem_region_type type, - bool protected) +static gva_t ____vm_alloc(struct kvm_vm *vm, size_t sz, gva_t min_gva, + enum kvm_mem_region_type type, bool protected) { - uint64_t pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0); + u64 pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0); virt_pgd_alloc(vm); - vm_paddr_t paddr = __vm_phy_pages_alloc(vm, pages, - KVM_UTIL_MIN_PFN * vm->page_size, - vm->memslots[type], protected); + gpa_t gpa = __vm_phy_pages_alloc(vm, pages, + KVM_UTIL_MIN_PFN * vm->page_size, + vm->memslots[type], protected); /* * Find an unused range of virtual page addresses of at least * pages in length. */ - vm_vaddr_t vaddr_start = vm_vaddr_unused_gap(vm, sz, vaddr_min); + gva_t gva_start = vm_unused_gva_gap(vm, sz, min_gva); /* Map the virtual pages. */ - for (vm_vaddr_t vaddr = vaddr_start; pages > 0; - pages--, vaddr += vm->page_size, paddr += vm->page_size) { - - virt_pg_map(vm, vaddr, paddr); + for (gva_t gva = gva_start; pages > 0; + pages--, gva += vm->page_size, gpa += vm->page_size) { - sparsebit_set(vm->vpages_mapped, vaddr >> vm->page_shift); + virt_pg_map(vm, gva, gpa); } - return vaddr_start; + return gva_start; } -vm_vaddr_t __vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min, - enum kvm_mem_region_type type) +gva_t __vm_alloc(struct kvm_vm *vm, size_t sz, gva_t min_gva, + enum kvm_mem_region_type type) { - return ____vm_vaddr_alloc(vm, sz, vaddr_min, type, - vm_arch_has_protected_memory(vm)); + return ____vm_alloc(vm, sz, min_gva, type, + vm_arch_has_protected_memory(vm)); } -vm_vaddr_t vm_vaddr_alloc_shared(struct kvm_vm *vm, size_t sz, - vm_vaddr_t vaddr_min, - enum kvm_mem_region_type type) +gva_t vm_alloc_shared(struct kvm_vm *vm, size_t sz, gva_t min_gva, + enum kvm_mem_region_type type) { - return ____vm_vaddr_alloc(vm, sz, vaddr_min, type, false); + return ____vm_alloc(vm, sz, min_gva, type, false); } /* - * VM Virtual Address Allocate - * - * Input Args: - * vm - Virtual Machine - * sz - Size in bytes - * vaddr_min - Minimum starting virtual address - * - * Output Args: None - * - * Return: - * Starting guest virtual address - * - * Allocates at least sz bytes within the virtual address space of the vm - * given by vm. The allocated bytes are mapped to a virtual address >= - * the address given by vaddr_min. Note that each allocation uses a - * a unique set of pages, with the minimum real allocation being at least - * a page. The allocated physical space comes from the TEST_DATA memory region. + * Allocates at least sz bytes within the virtual address space of the VM + * given by @vm. The allocated bytes are mapped to a virtual address >= the + * address given by @min_gva. Note that each allocation uses a a unique set + * of pages, with the minimum real allocation being at least a page. The + * allocated physical space comes from the TEST_DATA memory region. */ -vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min) +gva_t vm_alloc(struct kvm_vm *vm, size_t sz, gva_t min_gva) { - return __vm_vaddr_alloc(vm, sz, vaddr_min, MEM_REGION_TEST_DATA); + return __vm_alloc(vm, sz, min_gva, MEM_REGION_TEST_DATA); } -/* - * VM Virtual Address Allocate Pages - * - * Input Args: - * vm - Virtual Machine - * - * Output Args: None - * - * Return: - * Starting guest virtual address - * - * Allocates at least N system pages worth of bytes within the virtual address - * space of the vm. - */ -vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages) +gva_t vm_alloc_pages(struct kvm_vm *vm, int nr_pages) { - return vm_vaddr_alloc(vm, nr_pages * getpagesize(), KVM_UTIL_MIN_VADDR); + return vm_alloc(vm, nr_pages * getpagesize(), KVM_UTIL_MIN_VADDR); } -vm_vaddr_t __vm_vaddr_alloc_page(struct kvm_vm *vm, enum kvm_mem_region_type type) +gva_t __vm_alloc_page(struct kvm_vm *vm, enum kvm_mem_region_type type) { - return __vm_vaddr_alloc(vm, getpagesize(), KVM_UTIL_MIN_VADDR, type); + return __vm_alloc(vm, getpagesize(), KVM_UTIL_MIN_VADDR, type); } -/* - * VM Virtual Address Allocate Page - * - * Input Args: - * vm - Virtual Machine - * - * Output Args: None - * - * Return: - * Starting guest virtual address - * - * Allocates at least one system page worth of bytes within the virtual address - * space of the vm. - */ -vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm) +gva_t vm_alloc_page(struct kvm_vm *vm) { - return vm_vaddr_alloc_pages(vm, 1); + return vm_alloc_pages(vm, 1); } /* - * Map a range of VM virtual address to the VM's physical address - * - * Input Args: - * vm - Virtual Machine - * vaddr - Virtuall address to map - * paddr - VM Physical Address - * npages - The number of pages to map + * Map a range of VM virtual address to the VM's physical address. * - * Output Args: None - * - * Return: None - * - * Within the VM given by @vm, creates a virtual translation for - * @npages starting at @vaddr to the page range starting at @paddr. + * Within the VM given by @vm, creates a virtual translation for @npages + * starting at @gva to the page range starting at @gpa. */ -void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, - unsigned int npages) +void virt_map(struct kvm_vm *vm, gva_t gva, gpa_t gpa, unsigned int npages) { size_t page_size = vm->page_size; size_t size = npages * page_size; - TEST_ASSERT(vaddr + size > vaddr, "Vaddr overflow"); - TEST_ASSERT(paddr + size > paddr, "Paddr overflow"); + TEST_ASSERT(gva + size > gva, "Vaddr overflow"); + TEST_ASSERT(gpa + size > gpa, "Paddr overflow"); while (npages--) { - virt_pg_map(vm, vaddr, paddr); - sparsebit_set(vm->vpages_mapped, vaddr >> vm->page_shift); + virt_pg_map(vm, gva, gpa); - vaddr += page_size; - paddr += page_size; + gva += page_size; + gpa += page_size; } } @@ -1540,7 +1541,7 @@ void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, * address providing the memory to the vm physical address is returned. * A TEST_ASSERT failure occurs if no region containing gpa exists. */ -void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa) +void *addr_gpa2hva(struct kvm_vm *vm, gpa_t gpa) { struct userspace_mem_region *region; @@ -1573,7 +1574,7 @@ void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa) * VM physical address is returned. A TEST_ASSERT failure occurs if no * region containing hva exists. */ -vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva) +gpa_t addr_hva2gpa(struct kvm_vm *vm, void *hva) { struct rb_node *node; @@ -1584,7 +1585,7 @@ vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva) if (hva >= region->host_mem) { if (hva <= (region->host_mem + region->region.memory_size - 1)) - return (vm_paddr_t)((uintptr_t) + return (gpa_t)((uintptr_t) region->region.guest_phys_addr + (hva - (uintptr_t)region->host_mem)); @@ -1616,7 +1617,7 @@ vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva) * memory without mapping said memory in the guest's address space. And, for * userfaultfd-based demand paging, to do so without triggering userfaults. */ -void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa) +void *addr_gpa2alias(struct kvm_vm *vm, gpa_t gpa) { struct userspace_mem_region *region; uintptr_t offset; @@ -1635,7 +1636,18 @@ void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa) /* Create an interrupt controller chip for the specified VM. */ void vm_create_irqchip(struct kvm_vm *vm) { - vm_ioctl(vm, KVM_CREATE_IRQCHIP, NULL); + int r; + + /* + * Allocate a fully in-kernel IRQ chip by default, but fall back to a + * split model (x86 only) if that fails (KVM x86 allows compiling out + * support for KVM_CREATE_IRQCHIP). + */ + r = __vm_ioctl(vm, KVM_CREATE_IRQCHIP, NULL); + if (r && errno == ENOTTY && kvm_has_cap(KVM_CAP_SPLIT_IRQCHIP)) + vm_enable_cap(vm, KVM_CAP_SPLIT_IRQCHIP, 24); + else + TEST_ASSERT_VM_VCPU_IOCTL(!r, KVM_CREATE_IRQCHIP, r, vm); vm->has_irqchip = true; } @@ -1699,8 +1711,8 @@ struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vcpu *vcpu) void *vcpu_map_dirty_ring(struct kvm_vcpu *vcpu) { - uint32_t page_size = getpagesize(); - uint32_t size = vcpu->vm->dirty_ring_size; + u32 page_size = getpagesize(); + u32 size = vcpu->vm->dirty_ring_size; TEST_ASSERT(size > 0, "Should enable dirty ring first"); @@ -1715,9 +1727,8 @@ void *vcpu_map_dirty_ring(struct kvm_vcpu *vcpu) page_size * KVM_DIRTY_LOG_PAGE_OFFSET); TEST_ASSERT(addr == MAP_FAILED, "Dirty ring mapped exec"); - addr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, - page_size * KVM_DIRTY_LOG_PAGE_OFFSET); - TEST_ASSERT(addr != MAP_FAILED, "Dirty ring map failed"); + addr = __kvm_mmap(size, PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, + page_size * KVM_DIRTY_LOG_PAGE_OFFSET); vcpu->dirty_gfns = addr; vcpu->dirty_gfns_count = size / sizeof(struct kvm_dirty_gfn); @@ -1730,7 +1741,7 @@ void *vcpu_map_dirty_ring(struct kvm_vcpu *vcpu) * Device Ioctl */ -int __kvm_has_device_attr(int dev_fd, uint32_t group, uint64_t attr) +int __kvm_has_device_attr(int dev_fd, u32 group, u64 attr) { struct kvm_device_attr attribute = { .group = group, @@ -1741,7 +1752,7 @@ int __kvm_has_device_attr(int dev_fd, uint32_t group, uint64_t attr) return ioctl(dev_fd, KVM_HAS_DEVICE_ATTR, &attribute); } -int __kvm_test_create_device(struct kvm_vm *vm, uint64_t type) +int __kvm_test_create_device(struct kvm_vm *vm, u64 type) { struct kvm_create_device create_dev = { .type = type, @@ -1751,7 +1762,7 @@ int __kvm_test_create_device(struct kvm_vm *vm, uint64_t type) return __vm_ioctl(vm, KVM_CREATE_DEVICE, &create_dev); } -int __kvm_create_device(struct kvm_vm *vm, uint64_t type) +int __kvm_create_device(struct kvm_vm *vm, u64 type) { struct kvm_create_device create_dev = { .type = type, @@ -1765,7 +1776,7 @@ int __kvm_create_device(struct kvm_vm *vm, uint64_t type) return err ? : create_dev.fd; } -int __kvm_device_attr_get(int dev_fd, uint32_t group, uint64_t attr, void *val) +int __kvm_device_attr_get(int dev_fd, u32 group, u64 attr, void *val) { struct kvm_device_attr kvmattr = { .group = group, @@ -1777,7 +1788,7 @@ int __kvm_device_attr_get(int dev_fd, uint32_t group, uint64_t attr, void *val) return __kvm_ioctl(dev_fd, KVM_GET_DEVICE_ATTR, &kvmattr); } -int __kvm_device_attr_set(int dev_fd, uint32_t group, uint64_t attr, void *val) +int __kvm_device_attr_set(int dev_fd, u32 group, u64 attr, void *val) { struct kvm_device_attr kvmattr = { .group = group, @@ -1793,7 +1804,7 @@ int __kvm_device_attr_set(int dev_fd, uint32_t group, uint64_t attr, void *val) * IRQ related functions. */ -int _kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level) +int _kvm_irq_line(struct kvm_vm *vm, u32 irq, int level) { struct kvm_irq_level irq_level = { .irq = irq, @@ -1803,7 +1814,7 @@ int _kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level) return __vm_ioctl(vm, KVM_IRQ_LINE, &irq_level); } -void kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level) +void kvm_irq_line(struct kvm_vm *vm, u32 irq, int level) { int ret = _kvm_irq_line(vm, irq, level); @@ -1825,7 +1836,7 @@ struct kvm_irq_routing *kvm_gsi_routing_create(void) } void kvm_gsi_routing_irqchip_add(struct kvm_irq_routing *routing, - uint32_t gsi, uint32_t pin) + u32 gsi, u32 pin) { int i; @@ -1875,7 +1886,7 @@ void kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing) * Dumps the current state of the VM given by vm, to the FILE stream * given by stream. */ -void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) +void vm_dump(FILE *stream, struct kvm_vm *vm, u8 indent) { int ctr; struct userspace_mem_region *region; @@ -1888,8 +1899,8 @@ void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) hash_for_each(vm->regions.slot_hash, ctr, region, slot_node) { fprintf(stream, "%*sguest_phys: 0x%lx size: 0x%lx " "host_virt: %p\n", indent + 2, "", - (uint64_t) region->region.guest_phys_addr, - (uint64_t) region->region.memory_size, + (u64)region->region.guest_phys_addr, + (u64)region->region.memory_size, region->host_mem); fprintf(stream, "%*sunused_phy_pages: ", indent + 2, ""); sparsebit_dump(stream, region->unused_phy_pages, 0); @@ -1901,8 +1912,8 @@ void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) fprintf(stream, "%*sMapped Virtual Pages:\n", indent, ""); sparsebit_dump(stream, vm->vpages_mapped, indent + 2); fprintf(stream, "%*spgd_created: %u\n", indent, "", - vm->pgd_created); - if (vm->pgd_created) { + vm->mmu.pgd_created); + if (vm->mmu.pgd_created) { fprintf(stream, "%*sVirtual Translation Tables:\n", indent + 2, ""); virt_dump(stream, vm, indent + 4); @@ -1958,9 +1969,9 @@ static struct exit_reason { KVM_EXIT_STRING(RISCV_SBI), KVM_EXIT_STRING(RISCV_CSR), KVM_EXIT_STRING(NOTIFY), -#ifdef KVM_EXIT_MEMORY_NOT_PRESENT - KVM_EXIT_STRING(MEMORY_NOT_PRESENT), -#endif + KVM_EXIT_STRING(LOONGARCH_IOCSR), + KVM_EXIT_STRING(MEMORY_FAULT), + KVM_EXIT_STRING(ARM_SEA), }; /* @@ -1996,7 +2007,7 @@ const char *exit_reason_str(unsigned int exit_reason) * Input Args: * vm - Virtual Machine * num - number of pages - * paddr_min - Physical address minimum + * min_gpa - Physical address minimum * memslot - Memory region to allocate page from * protected - True if the pages will be used as protected/private memory * @@ -2006,29 +2017,29 @@ const char *exit_reason_str(unsigned int exit_reason) * Starting physical address * * Within the VM specified by vm, locates a range of available physical - * pages at or above paddr_min. If found, the pages are marked as in use + * pages at or above min_gpa. If found, the pages are marked as in use * and their base address is returned. A TEST_ASSERT failure occurs if - * not enough pages are available at or above paddr_min. + * not enough pages are available at or above min_gpa. */ -vm_paddr_t __vm_phy_pages_alloc(struct kvm_vm *vm, size_t num, - vm_paddr_t paddr_min, uint32_t memslot, - bool protected) +gpa_t __vm_phy_pages_alloc(struct kvm_vm *vm, size_t num, + gpa_t min_gpa, u32 memslot, + bool protected) { struct userspace_mem_region *region; sparsebit_idx_t pg, base; TEST_ASSERT(num > 0, "Must allocate at least one page"); - TEST_ASSERT((paddr_min % vm->page_size) == 0, "Min physical address " + TEST_ASSERT((min_gpa % vm->page_size) == 0, "Min physical address " "not divisible by page size.\n" - " paddr_min: 0x%lx page_size: 0x%x", - paddr_min, vm->page_size); + " min_gpa: 0x%lx page_size: 0x%x", + min_gpa, vm->page_size); region = memslot2region(vm, memslot); TEST_ASSERT(!protected || region->protected_phy_pages, "Region doesn't support protected memory"); - base = pg = paddr_min >> vm->page_shift; + base = pg = min_gpa >> vm->page_shift; do { for (; pg < base + num; ++pg) { if (!sparsebit_is_set(region->unused_phy_pages, pg)) { @@ -2040,8 +2051,8 @@ vm_paddr_t __vm_phy_pages_alloc(struct kvm_vm *vm, size_t num, if (pg == 0) { fprintf(stderr, "No guest physical page available, " - "paddr_min: 0x%lx page_size: 0x%x memslot: %u\n", - paddr_min, vm->page_size, memslot); + "min_gpa: 0x%lx page_size: 0x%x memslot: %u\n", + min_gpa, vm->page_size, memslot); fputs("---- vm dump ----\n", stderr); vm_dump(stderr, vm, 2); abort(); @@ -2056,13 +2067,12 @@ vm_paddr_t __vm_phy_pages_alloc(struct kvm_vm *vm, size_t num, return base * vm->page_size; } -vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min, - uint32_t memslot) +gpa_t vm_phy_page_alloc(struct kvm_vm *vm, gpa_t min_gpa, u32 memslot) { - return vm_phy_pages_alloc(vm, 1, paddr_min, memslot); + return vm_phy_pages_alloc(vm, 1, min_gpa, memslot); } -vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm) +gpa_t vm_alloc_page_table(struct kvm_vm *vm) { return vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, vm->memslots[MEM_REGION_PT]); @@ -2080,7 +2090,7 @@ vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm) * Return: * Equivalent host virtual address */ -void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva) +void *addr_gva2hva(struct kvm_vm *vm, gva_t gva) { return addr_gpa2hva(vm, addr_gva2gpa(vm, gva)); } @@ -2178,7 +2188,7 @@ struct kvm_stats_desc *read_stats_descriptors(int stats_fd, * Read the data values of a specified stat from the binary stats interface. */ void read_stat_data(int stats_fd, struct kvm_stats_header *header, - struct kvm_stats_desc *desc, uint64_t *data, + struct kvm_stats_desc *desc, u64 *data, size_t max_elements) { size_t nr_elements = min_t(ssize_t, desc->size, max_elements); @@ -2198,49 +2208,42 @@ void read_stat_data(int stats_fd, struct kvm_stats_header *header, desc->name, size, ret); } -/* - * Read the data of the named stat - * - * Input Args: - * vm - the VM for which the stat should be read - * stat_name - the name of the stat to read - * max_elements - the maximum number of 8-byte values to read into data - * - * Output Args: - * data - the buffer into which stat data should be read - * - * Read the data values of a specified stat from the binary stats interface. - */ -void __vm_get_stat(struct kvm_vm *vm, const char *stat_name, uint64_t *data, - size_t max_elements) +void kvm_get_stat(struct kvm_binary_stats *stats, const char *name, + u64 *data, size_t max_elements) { struct kvm_stats_desc *desc; size_t size_desc; int i; - if (!vm->stats_fd) { - vm->stats_fd = vm_get_stats_fd(vm); - read_stats_header(vm->stats_fd, &vm->stats_header); - vm->stats_desc = read_stats_descriptors(vm->stats_fd, - &vm->stats_header); + if (!stats->desc) { + read_stats_header(stats->fd, &stats->header); + stats->desc = read_stats_descriptors(stats->fd, &stats->header); } - size_desc = get_stats_descriptor_size(&vm->stats_header); + size_desc = get_stats_descriptor_size(&stats->header); - for (i = 0; i < vm->stats_header.num_desc; ++i) { - desc = (void *)vm->stats_desc + (i * size_desc); + for (i = 0; i < stats->header.num_desc; ++i) { + desc = (void *)stats->desc + (i * size_desc); - if (strcmp(desc->name, stat_name)) + if (strcmp(desc->name, name)) continue; - read_stat_data(vm->stats_fd, &vm->stats_header, desc, - data, max_elements); - - break; + read_stat_data(stats->fd, &stats->header, desc, data, max_elements); + return; } + + TEST_FAIL("Unable to find stat '%s'", name); +} + +__weak void kvm_arch_vm_post_create(struct kvm_vm *vm, unsigned int nr_vcpus) +{ } -__weak void kvm_arch_vm_post_create(struct kvm_vm *vm) +__weak void kvm_arch_vm_finalize_vcpus(struct kvm_vm *vm) +{ +} + +__weak void kvm_arch_vm_release(struct kvm_vm *vm) { } @@ -2248,18 +2251,42 @@ __weak void kvm_selftest_arch_init(void) { } +static void report_unexpected_signal(int signum) +{ +#define KVM_CASE_SIGNUM(sig) \ + case sig: TEST_FAIL("Unexpected " #sig " (%d)\n", signum) + + switch (signum) { + KVM_CASE_SIGNUM(SIGBUS); + KVM_CASE_SIGNUM(SIGSEGV); + KVM_CASE_SIGNUM(SIGILL); + KVM_CASE_SIGNUM(SIGFPE); + default: + TEST_FAIL("Unexpected signal %d\n", signum); + } +} + void __attribute((constructor)) kvm_selftest_init(void) { + struct sigaction sig_sa = { + .sa_handler = report_unexpected_signal, + }; + /* Tell stdout not to buffer its content. */ setbuf(stdout, NULL); + sigaction(SIGBUS, &sig_sa, NULL); + sigaction(SIGSEGV, &sig_sa, NULL); + sigaction(SIGILL, &sig_sa, NULL); + sigaction(SIGFPE, &sig_sa, NULL); + guest_random_seed = last_guest_seed = random(); pr_info("Random seed: 0x%x\n", guest_random_seed); kvm_selftest_arch_init(); } -bool vm_is_gpa_protected(struct kvm_vm *vm, vm_paddr_t paddr) +bool vm_is_gpa_protected(struct kvm_vm *vm, gpa_t gpa) { sparsebit_idx_t pg = 0; struct userspace_mem_region *region; @@ -2267,9 +2294,14 @@ bool vm_is_gpa_protected(struct kvm_vm *vm, vm_paddr_t paddr) if (!vm_arch_has_protected_memory(vm)) return false; - region = userspace_mem_region_find(vm, paddr, paddr); - TEST_ASSERT(region, "No vm physical memory at 0x%lx", paddr); + region = userspace_mem_region_find(vm, gpa, gpa); + TEST_ASSERT(region, "No vm physical memory at 0x%lx", gpa); - pg = paddr >> vm->page_shift; + pg = gpa >> vm->page_shift; return sparsebit_is_set(region->protected_phy_pages, pg); } + +__weak bool kvm_arch_has_default_irqchip(void) +{ + return false; +} diff --git a/tools/testing/selftests/kvm/lib/loongarch/exception.S b/tools/testing/selftests/kvm/lib/loongarch/exception.S new file mode 100644 index 000000000000..3f1e4b67c5ae --- /dev/null +++ b/tools/testing/selftests/kvm/lib/loongarch/exception.S @@ -0,0 +1,65 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include "processor.h" + +/* address of refill exception should be 4K aligned */ +.balign 4096 +.global handle_tlb_refill +handle_tlb_refill: + csrwr t0, LOONGARCH_CSR_TLBRSAVE + csrrd t0, LOONGARCH_CSR_PGD + lddir t0, t0, 3 + lddir t0, t0, 1 + ldpte t0, 0 + ldpte t0, 1 + tlbfill + csrrd t0, LOONGARCH_CSR_TLBRSAVE + ertn + + /* + * save and restore all gprs except base register, + * and default value of base register is sp ($r3). + */ +.macro save_gprs base + .irp n,1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 + st.d $r\n, \base, 8 * \n + .endr +.endm + +.macro restore_gprs base + .irp n,1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 + ld.d $r\n, \base, 8 * \n + .endr +.endm + +/* address of general exception should be 4K aligned */ +.balign 4096 +.global handle_exception +handle_exception: + csrwr sp, LOONGARCH_CSR_KS0 + csrrd sp, LOONGARCH_CSR_KS1 + addi.d sp, sp, -EXREGS_SIZE + + save_gprs sp + /* save sp register to stack */ + csrrd t0, LOONGARCH_CSR_KS0 + st.d t0, sp, 3 * 8 + + csrrd t0, LOONGARCH_CSR_ERA + st.d t0, sp, PC_OFFSET_EXREGS + csrrd t0, LOONGARCH_CSR_ESTAT + st.d t0, sp, ESTAT_OFFSET_EXREGS + csrrd t0, LOONGARCH_CSR_BADV + st.d t0, sp, BADV_OFFSET_EXREGS + csrrd t0, LOONGARCH_CSR_PRMD + st.d t0, sp, PRMD_OFFSET_EXREGS + + or a0, sp, zero + bl route_exception + ld.d t0, sp, PC_OFFSET_EXREGS + csrwr t0, LOONGARCH_CSR_ERA + ld.d t0, sp, PRMD_OFFSET_EXREGS + csrwr t0, LOONGARCH_CSR_PRMD + restore_gprs sp + csrrd sp, LOONGARCH_CSR_KS0 + ertn diff --git a/tools/testing/selftests/kvm/lib/loongarch/processor.c b/tools/testing/selftests/kvm/lib/loongarch/processor.c new file mode 100644 index 000000000000..64d91fb76522 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/loongarch/processor.c @@ -0,0 +1,402 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <assert.h> +#include <linux/compiler.h> + +#include <asm/kvm.h> +#include "kvm_util.h" +#include "pmu.h" +#include "processor.h" +#include "ucall_common.h" + +#define LOONGARCH_PAGE_TABLE_PHYS_MIN 0x200000 +#define LOONGARCH_GUEST_STACK_VADDR_MIN 0x200000 + +static gpa_t invalid_pgtable[4]; +static gva_t exception_handlers; + +static u64 virt_pte_index(struct kvm_vm *vm, gva_t gva, int level) +{ + unsigned int shift; + u64 mask; + + shift = level * (vm->page_shift - 3) + vm->page_shift; + mask = (1UL << (vm->page_shift - 3)) - 1; + return (gva >> shift) & mask; +} + +static u64 pte_addr(struct kvm_vm *vm, u64 entry) +{ + return entry & ~((0x1UL << vm->page_shift) - 1); +} + +static u64 ptrs_per_pte(struct kvm_vm *vm) +{ + return 1 << (vm->page_shift - 3); +} + +static void virt_set_pgtable(struct kvm_vm *vm, gpa_t table, gpa_t child) +{ + u64 *ptep; + int i, ptrs_per_pte; + + ptep = addr_gpa2hva(vm, table); + ptrs_per_pte = 1 << (vm->page_shift - 3); + for (i = 0; i < ptrs_per_pte; i++) + WRITE_ONCE(*(ptep + i), child); +} + +void virt_arch_pgd_alloc(struct kvm_vm *vm) +{ + int i; + gpa_t child, table; + + if (vm->mmu.pgd_created) + return; + + child = table = 0; + for (i = 0; i < vm->mmu.pgtable_levels; i++) { + invalid_pgtable[i] = child; + table = vm_phy_page_alloc(vm, LOONGARCH_PAGE_TABLE_PHYS_MIN, + vm->memslots[MEM_REGION_PT]); + TEST_ASSERT(table, "Fail to allocate page tale at level %d\n", i); + virt_set_pgtable(vm, table, child); + child = table; + } + vm->mmu.pgd = table; + vm->mmu.pgd_created = true; +} + +static int virt_pte_none(u64 *ptep, int level) +{ + return *ptep == invalid_pgtable[level]; +} + +static u64 *virt_populate_pte(struct kvm_vm *vm, gva_t gva, int alloc) +{ + int level; + u64 *ptep; + gpa_t child; + + if (!vm->mmu.pgd_created) + goto unmapped_gva; + + child = vm->mmu.pgd; + level = vm->mmu.pgtable_levels - 1; + while (level > 0) { + ptep = addr_gpa2hva(vm, child) + virt_pte_index(vm, gva, level) * 8; + if (virt_pte_none(ptep, level)) { + if (alloc) { + child = vm_alloc_page_table(vm); + virt_set_pgtable(vm, child, invalid_pgtable[level - 1]); + WRITE_ONCE(*ptep, child); + } else + goto unmapped_gva; + + } else + child = pte_addr(vm, *ptep); + level--; + } + + ptep = addr_gpa2hva(vm, child) + virt_pte_index(vm, gva, level) * 8; + return ptep; + +unmapped_gva: + TEST_FAIL("No mapping for vm virtual address, gva: 0x%lx", gva); + exit(EXIT_FAILURE); +} + +gpa_t addr_arch_gva2gpa(struct kvm_vm *vm, gva_t gva) +{ + u64 *ptep; + + ptep = virt_populate_pte(vm, gva, 0); + TEST_ASSERT(*ptep != 0, "Virtual address gva: 0x%lx not mapped\n", gva); + + return pte_addr(vm, *ptep) + (gva & (vm->page_size - 1)); +} + +void virt_arch_pg_map(struct kvm_vm *vm, gva_t gva, gpa_t gpa) +{ + u32 prot_bits; + u64 *ptep; + + TEST_ASSERT((gva % vm->page_size) == 0, + "Virtual address not on page boundary,\n" + "gva: 0x%lx vm->page_size: 0x%x", gva, vm->page_size); + TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, (gva >> vm->page_shift)), + "Invalid virtual address, gva: 0x%lx", gva); + TEST_ASSERT((gpa % vm->page_size) == 0, + "Physical address not on page boundary,\n" + "gpa: 0x%lx vm->page_size: 0x%x", gpa, vm->page_size); + TEST_ASSERT((gpa >> vm->page_shift) <= vm->max_gfn, + "Physical address beyond maximum supported,\n" + "gpa: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", + gpa, vm->max_gfn, vm->page_size); + + ptep = virt_populate_pte(vm, gva, 1); + prot_bits = _PAGE_PRESENT | __READABLE | __WRITEABLE | _CACHE_CC | _PAGE_USER; + WRITE_ONCE(*ptep, gpa | prot_bits); +} + +static void pte_dump(FILE *stream, struct kvm_vm *vm, u8 indent, u64 page, int level) +{ + u64 pte, *ptep; + static const char * const type[] = { "pte", "pmd", "pud", "pgd"}; + + if (level < 0) + return; + + for (pte = page; pte < page + ptrs_per_pte(vm) * 8; pte += 8) { + ptep = addr_gpa2hva(vm, pte); + if (virt_pte_none(ptep, level)) + continue; + fprintf(stream, "%*s%s: %lx: %lx at %p\n", + indent, "", type[level], pte, *ptep, ptep); + pte_dump(stream, vm, indent + 1, pte_addr(vm, *ptep), level--); + } +} + +void virt_arch_dump(FILE *stream, struct kvm_vm *vm, u8 indent) +{ + int level; + + if (!vm->mmu.pgd_created) + return; + + level = vm->mmu.pgtable_levels - 1; + pte_dump(stream, vm, indent, vm->mmu.pgd, level); +} + +void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, u8 indent) +{ +} + +void assert_on_unhandled_exception(struct kvm_vcpu *vcpu) +{ + struct ucall uc; + + if (get_ucall(vcpu, &uc) != UCALL_UNHANDLED) + return; + + TEST_FAIL("Unexpected exception (pc:0x%lx, estat:0x%lx, badv:0x%lx)", + uc.args[0], uc.args[1], uc.args[2]); +} + +void route_exception(struct ex_regs *regs) +{ + int vector; + unsigned long pc, estat, badv; + struct handlers *handlers; + + handlers = (struct handlers *)exception_handlers; + vector = (regs->estat & CSR_ESTAT_EXC) >> CSR_ESTAT_EXC_SHIFT; + if (handlers && handlers->exception_handlers[vector]) + return handlers->exception_handlers[vector](regs); + + pc = regs->pc; + badv = regs->badv; + estat = regs->estat; + ucall(UCALL_UNHANDLED, 3, pc, estat, badv); + while (1) ; +} + +void vm_init_descriptor_tables(struct kvm_vm *vm) +{ + void *addr; + + vm->handlers = __vm_alloc(vm, sizeof(struct handlers), + LOONGARCH_GUEST_STACK_VADDR_MIN, + MEM_REGION_DATA); + + addr = addr_gva2hva(vm, vm->handlers); + memset(addr, 0, vm->page_size); + exception_handlers = vm->handlers; + sync_global_to_guest(vm, exception_handlers); +} + +void vm_install_exception_handler(struct kvm_vm *vm, int vector, handler_fn handler) +{ + struct handlers *handlers = addr_gva2hva(vm, vm->handlers); + + assert(vector < VECTOR_NUM); + handlers->exception_handlers[vector] = handler; +} + +u32 guest_get_vcpuid(void) +{ + return csr_read(LOONGARCH_CSR_CPUID); +} + +void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...) +{ + int i; + va_list ap; + struct kvm_regs regs; + + TEST_ASSERT(num >= 1 && num <= 8, "Unsupported number of args,\n" + "num: %u\n", num); + + vcpu_regs_get(vcpu, ®s); + + va_start(ap, num); + for (i = 0; i < num; i++) + regs.gpr[i + 4] = va_arg(ap, u64); + va_end(ap); + + vcpu_regs_set(vcpu, ®s); +} + +static void loongarch_set_reg(struct kvm_vcpu *vcpu, u64 id, u64 val) +{ + __vcpu_set_reg(vcpu, id, val); +} + +static void loongarch_set_cpucfg(struct kvm_vcpu *vcpu, u64 id, u64 val) +{ + u64 cfgid; + + cfgid = KVM_REG_LOONGARCH_CPUCFG | KVM_REG_SIZE_U64 | 8 * id; + __vcpu_set_reg(vcpu, cfgid, val); +} + +static void loongarch_get_csr(struct kvm_vcpu *vcpu, u64 id, void *addr) +{ + u64 csrid; + + csrid = KVM_REG_LOONGARCH_CSR | KVM_REG_SIZE_U64 | 8 * id; + __vcpu_get_reg(vcpu, csrid, addr); +} + +static void loongarch_set_csr(struct kvm_vcpu *vcpu, u64 id, u64 val) +{ + u64 csrid; + + csrid = KVM_REG_LOONGARCH_CSR | KVM_REG_SIZE_U64 | 8 * id; + __vcpu_set_reg(vcpu, csrid, val); +} + +void loongarch_vcpu_setup(struct kvm_vcpu *vcpu) +{ + int width; + unsigned int cfg; + unsigned long val; + struct kvm_vm *vm = vcpu->vm; + + switch (vm->mode) { + case VM_MODE_P36V47_16K: + case VM_MODE_P47V47_16K: + break; + + default: + TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode); + } + + cfg = read_cpucfg(LOONGARCH_CPUCFG6); + loongarch_set_cpucfg(vcpu, LOONGARCH_CPUCFG6, cfg); + + /* kernel mode and page enable mode */ + val = PLV_KERN | CSR_CRMD_PG; + loongarch_set_csr(vcpu, LOONGARCH_CSR_CRMD, val); + loongarch_set_csr(vcpu, LOONGARCH_CSR_PRMD, val); + loongarch_set_csr(vcpu, LOONGARCH_CSR_EUEN, 1); + loongarch_set_csr(vcpu, LOONGARCH_CSR_ECFG, 0); + loongarch_set_csr(vcpu, LOONGARCH_CSR_TCFG, 0); + loongarch_set_csr(vcpu, LOONGARCH_CSR_ASID, 1); + + /* time count start from 0 */ + val = 0; + loongarch_set_reg(vcpu, KVM_REG_LOONGARCH_COUNTER, val); + + width = vm->page_shift - 3; + + switch (vm->mmu.pgtable_levels) { + case 4: + /* pud page shift and width */ + val = (vm->page_shift + width * 2) << 20 | (width << 25); + /* fall throuth */ + case 3: + /* pmd page shift and width */ + val |= (vm->page_shift + width) << 10 | (width << 15); + /* pte page shift and width */ + val |= vm->page_shift | width << 5; + break; + default: + TEST_FAIL("Got %u page table levels, expected 3 or 4", vm->mmu.pgtable_levels); + } + + loongarch_set_csr(vcpu, LOONGARCH_CSR_PWCTL0, val); + + /* PGD page shift and width */ + val = (vm->page_shift + width * (vm->mmu.pgtable_levels - 1)) | width << 6; + loongarch_set_csr(vcpu, LOONGARCH_CSR_PWCTL1, val); + loongarch_set_csr(vcpu, LOONGARCH_CSR_PGDL, vm->mmu.pgd); + + /* + * Refill exception runs on real mode + * Entry address should be physical address + */ + val = addr_gva2gpa(vm, (unsigned long)handle_tlb_refill); + loongarch_set_csr(vcpu, LOONGARCH_CSR_TLBRENTRY, val); + + /* + * General exception runs on page-enabled mode + * Entry address should be virtual address + */ + val = (unsigned long)handle_exception; + loongarch_set_csr(vcpu, LOONGARCH_CSR_EENTRY, val); + + loongarch_get_csr(vcpu, LOONGARCH_CSR_TLBIDX, &val); + val &= ~CSR_TLBIDX_SIZEM; + val |= PS_DEFAULT_SIZE << CSR_TLBIDX_SIZE; + loongarch_set_csr(vcpu, LOONGARCH_CSR_TLBIDX, val); + + loongarch_set_csr(vcpu, LOONGARCH_CSR_STLBPGSIZE, PS_DEFAULT_SIZE); + + /* LOONGARCH_CSR_KS1 is used for exception stack */ + val = __vm_alloc(vm, vm->page_size, LOONGARCH_GUEST_STACK_VADDR_MIN, + MEM_REGION_DATA); + TEST_ASSERT(val != 0, "No memory for exception stack"); + val = val + vm->page_size; + loongarch_set_csr(vcpu, LOONGARCH_CSR_KS1, val); + + loongarch_get_csr(vcpu, LOONGARCH_CSR_TLBREHI, &val); + val &= ~CSR_TLBREHI_PS; + val |= PS_DEFAULT_SIZE << CSR_TLBREHI_PS_SHIFT; + loongarch_set_csr(vcpu, LOONGARCH_CSR_TLBREHI, val); + + loongarch_set_csr(vcpu, LOONGARCH_CSR_CPUID, vcpu->id); + loongarch_set_csr(vcpu, LOONGARCH_CSR_TMID, vcpu->id); +} + +struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, u32 vcpu_id) +{ + size_t stack_size; + u64 stack_gva; + struct kvm_regs regs; + struct kvm_vcpu *vcpu; + + vcpu = __vm_vcpu_add(vm, vcpu_id); + stack_size = vm->page_size; + stack_gva = __vm_alloc(vm, stack_size, + LOONGARCH_GUEST_STACK_VADDR_MIN, MEM_REGION_DATA); + TEST_ASSERT(stack_gva != 0, "No memory for vm stack"); + + loongarch_vcpu_setup(vcpu); + /* Setup guest general purpose registers */ + vcpu_regs_get(vcpu, ®s); + regs.gpr[3] = stack_gva + stack_size; + vcpu_regs_set(vcpu, ®s); + + return vcpu; +} + +void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code) +{ + struct kvm_regs regs; + + /* Setup guest PC register */ + vcpu_regs_get(vcpu, ®s); + regs.pc = (u64)guest_code; + vcpu_regs_set(vcpu, ®s); +} diff --git a/tools/testing/selftests/kvm/lib/loongarch/ucall.c b/tools/testing/selftests/kvm/lib/loongarch/ucall.c new file mode 100644 index 000000000000..cd49a3440ead --- /dev/null +++ b/tools/testing/selftests/kvm/lib/loongarch/ucall.c @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ucall support. A ucall is a "hypercall to userspace". + * + */ +#include "kvm_util.h" + +/* + * ucall_exit_mmio_addr holds per-VM values (global data is duplicated by each + * VM), it must not be accessed from host code. + */ +gva_t *ucall_exit_mmio_addr; + +void ucall_arch_init(struct kvm_vm *vm, gpa_t mmio_gpa) +{ + gva_t mmio_gva = vm_unused_gva_gap(vm, vm->page_size, KVM_UTIL_MIN_VADDR); + + virt_map(vm, mmio_gva, mmio_gpa, 1); + + vm->ucall_mmio_addr = mmio_gpa; + + write_guest_global(vm, ucall_exit_mmio_addr, (gva_t *)mmio_gva); +} + +void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + + if (run->exit_reason == KVM_EXIT_MMIO && + run->mmio.phys_addr == vcpu->vm->ucall_mmio_addr) { + TEST_ASSERT(run->mmio.is_write && run->mmio.len == sizeof(u64), + "Unexpected ucall exit mmio address access"); + + return (void *)(*((u64 *)run->mmio.data)); + } + + return NULL; +} diff --git a/tools/testing/selftests/kvm/lib/lru_gen_util.c b/tools/testing/selftests/kvm/lib/lru_gen_util.c new file mode 100644 index 000000000000..46a14fd63d9e --- /dev/null +++ b/tools/testing/selftests/kvm/lib/lru_gen_util.c @@ -0,0 +1,387 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2025, Google LLC. + */ + +#include <time.h> + +#include "lru_gen_util.h" + +/* + * Tracks state while we parse memcg lru_gen stats. The file we're parsing is + * structured like this (some extra whitespace elided): + * + * memcg (id) (path) + * node (id) + * (gen_nr) (age_in_ms) (nr_anon_pages) (nr_file_pages) + */ +struct memcg_stats_parse_context { + bool consumed; /* Whether or not this line was consumed */ + /* Next parse handler to invoke */ + void (*next_handler)(struct memcg_stats *stats, + struct memcg_stats_parse_context *ctx, + char *line); + int current_node_idx; /* Current index in nodes array */ + const char *name; /* The name of the memcg we're looking for */ +}; + +static void memcg_stats_handle_searching(struct memcg_stats *stats, + struct memcg_stats_parse_context *ctx, + char *line); +static void memcg_stats_handle_in_memcg(struct memcg_stats *stats, + struct memcg_stats_parse_context *ctx, + char *line); +static void memcg_stats_handle_in_node(struct memcg_stats *stats, + struct memcg_stats_parse_context *ctx, + char *line); + +struct split_iterator { + char *str; + char *save; +}; + +static char *split_next(struct split_iterator *it) +{ + char *ret = strtok_r(it->str, " \t\n\r", &it->save); + + it->str = NULL; + return ret; +} + +static void memcg_stats_handle_searching(struct memcg_stats *stats, + struct memcg_stats_parse_context *ctx, + char *line) +{ + struct split_iterator it = { .str = line }; + char *prefix = split_next(&it); + char *memcg_id = split_next(&it); + char *memcg_name = split_next(&it); + char *end; + + ctx->consumed = true; + + if (!prefix || strcmp("memcg", prefix)) + return; /* Not a memcg line (maybe empty), skip */ + + TEST_ASSERT(memcg_id && memcg_name, + "malformed memcg line; no memcg id or memcg_name"); + + if (strcmp(memcg_name + 1, ctx->name)) + return; /* Wrong memcg, skip */ + + /* Found it! */ + + stats->memcg_id = strtoul(memcg_id, &end, 10); + TEST_ASSERT(*end == '\0', "malformed memcg id '%s'", memcg_id); + if (!stats->memcg_id) + return; /* Removed memcg? */ + + ctx->next_handler = memcg_stats_handle_in_memcg; +} + +static void memcg_stats_handle_in_memcg(struct memcg_stats *stats, + struct memcg_stats_parse_context *ctx, + char *line) +{ + struct split_iterator it = { .str = line }; + char *prefix = split_next(&it); + char *id = split_next(&it); + long found_node_id; + char *end; + + ctx->consumed = true; + ctx->current_node_idx = -1; + + if (!prefix) + return; /* Skip empty lines */ + + if (!strcmp("memcg", prefix)) { + /* Memcg done, found next one; stop. */ + ctx->next_handler = NULL; + return; + } else if (strcmp("node", prefix)) + TEST_ASSERT(false, "found malformed line after 'memcg ...'," + "token: '%s'", prefix); + + /* At this point we know we have a node line. Parse the ID. */ + + TEST_ASSERT(id, "malformed node line; no node id"); + + found_node_id = strtol(id, &end, 10); + TEST_ASSERT(*end == '\0', "malformed node id '%s'", id); + + ctx->current_node_idx = stats->nr_nodes++; + TEST_ASSERT(ctx->current_node_idx < MAX_NR_NODES, + "memcg has stats for too many nodes, max is %d", + MAX_NR_NODES); + stats->nodes[ctx->current_node_idx].node = found_node_id; + + ctx->next_handler = memcg_stats_handle_in_node; +} + +static void memcg_stats_handle_in_node(struct memcg_stats *stats, + struct memcg_stats_parse_context *ctx, + char *line) +{ + char *my_line = strdup(line); + struct split_iterator it = { .str = my_line }; + char *gen, *age, *nr_anon, *nr_file; + struct node_stats *node_stats; + struct generation_stats *gen_stats; + char *end; + + TEST_ASSERT(it.str, "failed to copy input line"); + + gen = split_next(&it); + + if (!gen) + goto out_consume; /* Skip empty lines */ + + if (!strcmp("memcg", gen) || !strcmp("node", gen)) { + /* + * Reached next memcg or node section. Don't consume, let the + * other handler deal with this. + */ + ctx->next_handler = memcg_stats_handle_in_memcg; + goto out; + } + + node_stats = &stats->nodes[ctx->current_node_idx]; + TEST_ASSERT(node_stats->nr_gens < MAX_NR_GENS, + "found too many generation lines; max is %d", + MAX_NR_GENS); + gen_stats = &node_stats->gens[node_stats->nr_gens++]; + + age = split_next(&it); + nr_anon = split_next(&it); + nr_file = split_next(&it); + + TEST_ASSERT(age && nr_anon && nr_file, + "malformed generation line; not enough tokens"); + + gen_stats->gen = (int)strtol(gen, &end, 10); + TEST_ASSERT(*end == '\0', "malformed generation number '%s'", gen); + + gen_stats->age_ms = strtol(age, &end, 10); + TEST_ASSERT(*end == '\0', "malformed generation age '%s'", age); + + gen_stats->nr_anon = strtol(nr_anon, &end, 10); + TEST_ASSERT(*end == '\0', "malformed anonymous page count '%s'", + nr_anon); + + gen_stats->nr_file = strtol(nr_file, &end, 10); + TEST_ASSERT(*end == '\0', "malformed file page count '%s'", nr_file); + +out_consume: + ctx->consumed = true; +out: + free(my_line); +} + +static void print_memcg_stats(const struct memcg_stats *stats, const char *name) +{ + int node, gen; + + pr_debug("stats for memcg %s (id %lu):\n", name, stats->memcg_id); + for (node = 0; node < stats->nr_nodes; ++node) { + pr_debug("\tnode %d\n", stats->nodes[node].node); + for (gen = 0; gen < stats->nodes[node].nr_gens; ++gen) { + const struct generation_stats *gstats = + &stats->nodes[node].gens[gen]; + + pr_debug("\t\tgen %d\tage_ms %ld" + "\tnr_anon %ld\tnr_file %ld\n", + gstats->gen, gstats->age_ms, gstats->nr_anon, + gstats->nr_file); + } + } +} + +/* Re-read lru_gen debugfs information for @memcg into @stats. */ +void lru_gen_read_memcg_stats(struct memcg_stats *stats, const char *memcg) +{ + FILE *f; + ssize_t read = 0; + char *line = NULL; + size_t bufsz; + struct memcg_stats_parse_context ctx = { + .next_handler = memcg_stats_handle_searching, + .name = memcg, + }; + + memset(stats, 0, sizeof(struct memcg_stats)); + + f = fopen(LRU_GEN_DEBUGFS, "r"); + TEST_ASSERT(f, "fopen(%s) failed", LRU_GEN_DEBUGFS); + + while (ctx.next_handler && (read = getline(&line, &bufsz, f)) > 0) { + ctx.consumed = false; + + do { + ctx.next_handler(stats, &ctx, line); + if (!ctx.next_handler) + break; + } while (!ctx.consumed); + } + + if (read < 0 && !feof(f)) + TEST_ASSERT(false, "getline(%s) failed", LRU_GEN_DEBUGFS); + + TEST_ASSERT(stats->memcg_id > 0, "Couldn't find memcg: %s\n" + "Did the memcg get created in the proper mount?", + memcg); + if (line) + free(line); + TEST_ASSERT(!fclose(f), "fclose(%s) failed", LRU_GEN_DEBUGFS); + + print_memcg_stats(stats, memcg); +} + +/* + * Find all pages tracked by lru_gen for this memcg in generation @target_gen. + * + * If @target_gen is negative, look for all generations. + */ +long lru_gen_sum_memcg_stats_for_gen(int target_gen, + const struct memcg_stats *stats) +{ + int node, gen; + long total_nr = 0; + + for (node = 0; node < stats->nr_nodes; ++node) { + const struct node_stats *node_stats = &stats->nodes[node]; + + for (gen = 0; gen < node_stats->nr_gens; ++gen) { + const struct generation_stats *gen_stats = + &node_stats->gens[gen]; + + if (target_gen >= 0 && gen_stats->gen != target_gen) + continue; + + total_nr += gen_stats->nr_anon + gen_stats->nr_file; + } + } + + return total_nr; +} + +/* Find all pages tracked by lru_gen for this memcg. */ +long lru_gen_sum_memcg_stats(const struct memcg_stats *stats) +{ + return lru_gen_sum_memcg_stats_for_gen(-1, stats); +} + +/* + * If lru_gen aging should force page table scanning. + * + * If you want to set this to false, you will need to do eviction + * before doing extra aging passes. + */ +static const bool force_scan = true; + +static void run_aging_impl(unsigned long memcg_id, int node_id, int max_gen) +{ + FILE *f = fopen(LRU_GEN_DEBUGFS, "w"); + char *command; + size_t sz; + + TEST_ASSERT(f, "fopen(%s) failed", LRU_GEN_DEBUGFS); + sz = asprintf(&command, "+ %lu %d %d 1 %d\n", + memcg_id, node_id, max_gen, force_scan); + TEST_ASSERT(sz > 0, "creating aging command failed"); + + pr_debug("Running aging command: %s", command); + if (fwrite(command, sizeof(char), sz, f) < sz) { + TEST_ASSERT(false, "writing aging command %s to %s failed", + command, LRU_GEN_DEBUGFS); + } + + TEST_ASSERT(!fclose(f), "fclose(%s) failed", LRU_GEN_DEBUGFS); +} + +void lru_gen_do_aging(struct memcg_stats *stats, const char *memcg) +{ + int node, gen; + + pr_debug("lru_gen: invoking aging...\n"); + + /* Must read memcg stats to construct the proper aging command. */ + lru_gen_read_memcg_stats(stats, memcg); + + for (node = 0; node < stats->nr_nodes; ++node) { + int max_gen = 0; + + for (gen = 0; gen < stats->nodes[node].nr_gens; ++gen) { + int this_gen = stats->nodes[node].gens[gen].gen; + + max_gen = max_gen > this_gen ? max_gen : this_gen; + } + + run_aging_impl(stats->memcg_id, stats->nodes[node].node, + max_gen); + } + + /* Re-read so callers get updated information */ + lru_gen_read_memcg_stats(stats, memcg); +} + +/* + * Find which generation contains at least @pages pages, assuming that + * such a generation exists. + */ +int lru_gen_find_generation(const struct memcg_stats *stats, + unsigned long pages) +{ + int node, gen, gen_idx, min_gen = INT_MAX, max_gen = -1; + + for (node = 0; node < stats->nr_nodes; ++node) + for (gen_idx = 0; gen_idx < stats->nodes[node].nr_gens; + ++gen_idx) { + gen = stats->nodes[node].gens[gen_idx].gen; + max_gen = gen > max_gen ? gen : max_gen; + min_gen = gen < min_gen ? gen : min_gen; + } + + for (gen = min_gen; gen <= max_gen; ++gen) + /* See if this generation has enough pages. */ + if (lru_gen_sum_memcg_stats_for_gen(gen, stats) > pages) + return gen; + + return -1; +} + +bool lru_gen_usable(void) +{ + long required_features = LRU_GEN_ENABLED | LRU_GEN_MM_WALK; + int lru_gen_fd, lru_gen_debug_fd; + char mglru_feature_str[8] = {}; + long mglru_features; + + lru_gen_fd = open(LRU_GEN_ENABLED_PATH, O_RDONLY); + if (lru_gen_fd < 0) { + puts("lru_gen: Could not open " LRU_GEN_ENABLED_PATH); + return false; + } + if (read(lru_gen_fd, &mglru_feature_str, 7) < 7) { + puts("lru_gen: Could not read from " LRU_GEN_ENABLED_PATH); + close(lru_gen_fd); + return false; + } + close(lru_gen_fd); + + mglru_features = strtol(mglru_feature_str, NULL, 16); + if ((mglru_features & required_features) != required_features) { + printf("lru_gen: missing features, got: 0x%lx, expected: 0x%lx\n", + mglru_features, required_features); + printf("lru_gen: Try 'echo 0x%lx > /sys/kernel/mm/lru_gen/enabled'\n", + required_features); + return false; + } + + lru_gen_debug_fd = open(LRU_GEN_DEBUGFS, O_RDWR); + __TEST_REQUIRE(lru_gen_debug_fd >= 0, + "lru_gen: Could not open " LRU_GEN_DEBUGFS ", " + "but lru_gen is enabled, so cannot use page_idle."); + close(lru_gen_debug_fd); + return true; +} diff --git a/tools/testing/selftests/kvm/lib/memstress.c b/tools/testing/selftests/kvm/lib/memstress.c index 313277486a1d..6dcd15910a06 100644 --- a/tools/testing/selftests/kvm/lib/memstress.c +++ b/tools/testing/selftests/kvm/lib/memstress.c @@ -16,7 +16,7 @@ struct memstress_args memstress_args; * Guest virtual memory offset of the testing memory slot. * Must not conflict with identity mapped test code. */ -static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM; +static u64 guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM; struct vcpu_thread { /* The index of the vCPU. */ @@ -44,15 +44,15 @@ static struct kvm_vcpu *vcpus[KVM_MAX_VCPUS]; * Continuously write to the first 8 bytes of each page in the * specified region. */ -void memstress_guest_code(uint32_t vcpu_idx) +void memstress_guest_code(u32 vcpu_idx) { struct memstress_args *args = &memstress_args; struct memstress_vcpu_args *vcpu_args = &args->vcpu_args[vcpu_idx]; struct guest_random_state rand_state; - uint64_t gva; - uint64_t pages; - uint64_t addr; - uint64_t page; + gva_t gva; + u64 pages; + u64 addr; + u64 page; int i; rand_state = new_guest_random_state(guest_random_seed + vcpu_idx); @@ -76,9 +76,9 @@ void memstress_guest_code(uint32_t vcpu_idx) addr = gva + (page * args->guest_page_size); if (__guest_random_bool(&rand_state, args->write_percent)) - *(uint64_t *)addr = 0x0123456789ABCDEF; + *(u64 *)addr = 0x0123456789ABCDEF; else - READ_ONCE(*(uint64_t *)addr); + READ_ONCE(*(u64 *)addr); } GUEST_SYNC(1); @@ -87,7 +87,7 @@ void memstress_guest_code(uint32_t vcpu_idx) void memstress_setup_vcpus(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vcpus[], - uint64_t vcpu_memory_bytes, + u64 vcpu_memory_bytes, bool partition_vcpu_memory_access) { struct memstress_args *args = &memstress_args; @@ -122,15 +122,15 @@ void memstress_setup_vcpus(struct kvm_vm *vm, int nr_vcpus, } struct kvm_vm *memstress_create_vm(enum vm_guest_mode mode, int nr_vcpus, - uint64_t vcpu_memory_bytes, int slots, + u64 vcpu_memory_bytes, int slots, enum vm_mem_backing_src_type backing_src, bool partition_vcpu_memory_access) { struct memstress_args *args = &memstress_args; struct kvm_vm *vm; - uint64_t guest_num_pages, slot0_pages = 0; - uint64_t backing_src_pagesz = get_backing_src_pagesz(backing_src); - uint64_t region_end_gfn; + u64 guest_num_pages, slot0_pages = 0; + u64 backing_src_pagesz = get_backing_src_pagesz(backing_src); + u64 region_end_gfn; int i; pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode)); @@ -196,18 +196,14 @@ struct kvm_vm *memstress_create_vm(enum vm_guest_mode mode, int nr_vcpus, args->gpa = (region_end_gfn - guest_num_pages - 1) * args->guest_page_size; args->gpa = align_down(args->gpa, backing_src_pagesz); -#ifdef __s390x__ - /* Align to 1M (segment size) */ - args->gpa = align_down(args->gpa, 1 << 20); -#endif args->size = guest_num_pages * args->guest_page_size; pr_info("guest physical test memory: [0x%lx, 0x%lx)\n", args->gpa, args->gpa + args->size); /* Add extra memory slots for testing */ for (i = 0; i < slots; i++) { - uint64_t region_pages = guest_num_pages / slots; - vm_paddr_t region_start = args->gpa + region_pages * args->guest_page_size * i; + u64 region_pages = guest_num_pages / slots; + gpa_t region_start = args->gpa + region_pages * args->guest_page_size * i; vm_userspace_mem_region_add(vm, backing_src, region_start, MEMSTRESS_MEM_SLOT_INDEX + i, @@ -236,7 +232,7 @@ void memstress_destroy_vm(struct kvm_vm *vm) kvm_vm_free(vm); } -void memstress_set_write_percent(struct kvm_vm *vm, uint32_t write_percent) +void memstress_set_write_percent(struct kvm_vm *vm, u32 write_percent) { memstress_args.write_percent = write_percent; sync_global_to_guest(vm, memstress_args.write_percent); @@ -248,7 +244,7 @@ void memstress_set_random_access(struct kvm_vm *vm, bool random_access) sync_global_to_guest(vm, memstress_args.random_access); } -uint64_t __weak memstress_nested_pages(int nr_vcpus) +u64 __weak memstress_nested_pages(int nr_vcpus) { return 0; } @@ -265,7 +261,7 @@ static void *vcpu_thread_main(void *data) int vcpu_idx = vcpu->vcpu_idx; if (memstress_args.pin_vcpus) - kvm_pin_this_task_to_pcpu(memstress_args.vcpu_to_pcpu[vcpu_idx]); + pin_self_to_cpu(memstress_args.vcpu_to_pcpu[vcpu_idx]); WRITE_ONCE(vcpu->running, true); @@ -353,7 +349,7 @@ void memstress_get_dirty_log(struct kvm_vm *vm, unsigned long *bitmaps[], int sl } void memstress_clear_dirty_log(struct kvm_vm *vm, unsigned long *bitmaps[], - int slots, uint64_t pages_per_slot) + int slots, u64 pages_per_slot) { int i; @@ -364,7 +360,7 @@ void memstress_clear_dirty_log(struct kvm_vm *vm, unsigned long *bitmaps[], } } -unsigned long **memstress_alloc_bitmaps(int slots, uint64_t pages_per_slot) +unsigned long **memstress_alloc_bitmaps(int slots, u64 pages_per_slot) { unsigned long **bitmaps; int i; diff --git a/tools/testing/selftests/kvm/lib/riscv/handlers.S b/tools/testing/selftests/kvm/lib/riscv/handlers.S index aa0abd3f35bb..b787b982e922 100644 --- a/tools/testing/selftests/kvm/lib/riscv/handlers.S +++ b/tools/testing/selftests/kvm/lib/riscv/handlers.S @@ -10,85 +10,88 @@ #include <asm/csr.h> .macro save_context - addi sp, sp, (-8*34) - sd x1, 0(sp) - sd x2, 8(sp) - sd x3, 16(sp) - sd x4, 24(sp) - sd x5, 32(sp) - sd x6, 40(sp) - sd x7, 48(sp) - sd x8, 56(sp) - sd x9, 64(sp) - sd x10, 72(sp) - sd x11, 80(sp) - sd x12, 88(sp) - sd x13, 96(sp) - sd x14, 104(sp) - sd x15, 112(sp) - sd x16, 120(sp) - sd x17, 128(sp) - sd x18, 136(sp) - sd x19, 144(sp) - sd x20, 152(sp) - sd x21, 160(sp) - sd x22, 168(sp) - sd x23, 176(sp) - sd x24, 184(sp) - sd x25, 192(sp) - sd x26, 200(sp) - sd x27, 208(sp) - sd x28, 216(sp) - sd x29, 224(sp) - sd x30, 232(sp) - sd x31, 240(sp) + addi sp, sp, (-8*36) + sd x1, 8(sp) + sd x2, 16(sp) + sd x3, 24(sp) + sd x4, 32(sp) + sd x5, 40(sp) + sd x6, 48(sp) + sd x7, 56(sp) + sd x8, 64(sp) + sd x9, 72(sp) + sd x10, 80(sp) + sd x11, 88(sp) + sd x12, 96(sp) + sd x13, 104(sp) + sd x14, 112(sp) + sd x15, 120(sp) + sd x16, 128(sp) + sd x17, 136(sp) + sd x18, 144(sp) + sd x19, 152(sp) + sd x20, 160(sp) + sd x21, 168(sp) + sd x22, 176(sp) + sd x23, 184(sp) + sd x24, 192(sp) + sd x25, 200(sp) + sd x26, 208(sp) + sd x27, 216(sp) + sd x28, 224(sp) + sd x29, 232(sp) + sd x30, 240(sp) + sd x31, 248(sp) csrr s0, CSR_SEPC csrr s1, CSR_SSTATUS - csrr s2, CSR_SCAUSE - sd s0, 248(sp) + csrr s2, CSR_STVAL + csrr s3, CSR_SCAUSE + sd s0, 0(sp) sd s1, 256(sp) sd s2, 264(sp) + sd s3, 272(sp) .endm .macro restore_context + ld s3, 272(sp) ld s2, 264(sp) ld s1, 256(sp) - ld s0, 248(sp) - csrw CSR_SCAUSE, s2 + ld s0, 0(sp) + csrw CSR_SCAUSE, s3 csrw CSR_SSTATUS, s1 csrw CSR_SEPC, s0 - ld x31, 240(sp) - ld x30, 232(sp) - ld x29, 224(sp) - ld x28, 216(sp) - ld x27, 208(sp) - ld x26, 200(sp) - ld x25, 192(sp) - ld x24, 184(sp) - ld x23, 176(sp) - ld x22, 168(sp) - ld x21, 160(sp) - ld x20, 152(sp) - ld x19, 144(sp) - ld x18, 136(sp) - ld x17, 128(sp) - ld x16, 120(sp) - ld x15, 112(sp) - ld x14, 104(sp) - ld x13, 96(sp) - ld x12, 88(sp) - ld x11, 80(sp) - ld x10, 72(sp) - ld x9, 64(sp) - ld x8, 56(sp) - ld x7, 48(sp) - ld x6, 40(sp) - ld x5, 32(sp) - ld x4, 24(sp) - ld x3, 16(sp) - ld x2, 8(sp) - ld x1, 0(sp) - addi sp, sp, (8*34) + ld x31, 248(sp) + ld x30, 240(sp) + ld x29, 232(sp) + ld x28, 224(sp) + ld x27, 216(sp) + ld x26, 208(sp) + ld x25, 200(sp) + ld x24, 192(sp) + ld x23, 184(sp) + ld x22, 176(sp) + ld x21, 168(sp) + ld x20, 160(sp) + ld x19, 152(sp) + ld x18, 144(sp) + ld x17, 136(sp) + ld x16, 128(sp) + ld x15, 120(sp) + ld x14, 112(sp) + ld x13, 104(sp) + ld x12, 96(sp) + ld x11, 88(sp) + ld x10, 80(sp) + ld x9, 72(sp) + ld x8, 64(sp) + ld x7, 56(sp) + ld x6, 48(sp) + ld x5, 40(sp) + ld x4, 32(sp) + ld x3, 24(sp) + ld x2, 16(sp) + ld x1, 8(sp) + addi sp, sp, (8*36) .endm .balign 4 diff --git a/tools/testing/selftests/kvm/lib/riscv/processor.c b/tools/testing/selftests/kvm/lib/riscv/processor.c index dd663bcf0cc0..ded5429f3448 100644 --- a/tools/testing/selftests/kvm/lib/riscv/processor.c +++ b/tools/testing/selftests/kvm/lib/riscv/processor.c @@ -8,15 +8,16 @@ #include <linux/compiler.h> #include <assert.h> +#include "guest_modes.h" #include "kvm_util.h" #include "processor.h" #include "ucall_common.h" #define DEFAULT_RISCV_GUEST_STACK_VADDR_MIN 0xac0000 -static vm_vaddr_t exception_handlers; +static gva_t exception_handlers; -bool __vcpu_has_ext(struct kvm_vcpu *vcpu, uint64_t ext) +bool __vcpu_has_ext(struct kvm_vcpu *vcpu, u64 ext) { unsigned long value = 0; int ret; @@ -26,41 +27,36 @@ bool __vcpu_has_ext(struct kvm_vcpu *vcpu, uint64_t ext) return !ret && !!value; } -static uint64_t page_align(struct kvm_vm *vm, uint64_t v) -{ - return (v + vm->page_size) & ~(vm->page_size - 1); -} - -static uint64_t pte_addr(struct kvm_vm *vm, uint64_t entry) +static u64 pte_addr(struct kvm_vm *vm, u64 entry) { return ((entry & PGTBL_PTE_ADDR_MASK) >> PGTBL_PTE_ADDR_SHIFT) << PGTBL_PAGE_SIZE_SHIFT; } -static uint64_t ptrs_per_pte(struct kvm_vm *vm) +static u64 ptrs_per_pte(struct kvm_vm *vm) { - return PGTBL_PAGE_SIZE / sizeof(uint64_t); + return PGTBL_PAGE_SIZE / sizeof(u64); } -static uint64_t pte_index_mask[] = { +static u64 pte_index_mask[] = { PGTBL_L0_INDEX_MASK, PGTBL_L1_INDEX_MASK, PGTBL_L2_INDEX_MASK, PGTBL_L3_INDEX_MASK, }; -static uint32_t pte_index_shift[] = { +static u32 pte_index_shift[] = { PGTBL_L0_INDEX_SHIFT, PGTBL_L1_INDEX_SHIFT, PGTBL_L2_INDEX_SHIFT, PGTBL_L3_INDEX_SHIFT, }; -static uint64_t pte_index(struct kvm_vm *vm, vm_vaddr_t gva, int level) +static u64 pte_index(struct kvm_vm *vm, gva_t gva, int level) { TEST_ASSERT(level > -1, "Negative page table level (%d) not possible", level); - TEST_ASSERT(level < vm->pgtable_levels, + TEST_ASSERT(level < vm->mmu.pgtable_levels, "Invalid page table level (%d)", level); return (gva & pte_index_mask[level]) >> pte_index_shift[level]; @@ -68,37 +64,36 @@ static uint64_t pte_index(struct kvm_vm *vm, vm_vaddr_t gva, int level) void virt_arch_pgd_alloc(struct kvm_vm *vm) { - size_t nr_pages = page_align(vm, ptrs_per_pte(vm) * 8) / vm->page_size; + size_t nr_pages = vm_page_align(vm, ptrs_per_pte(vm) * 8) / vm->page_size; - if (vm->pgd_created) + if (vm->mmu.pgd_created) return; - vm->pgd = vm_phy_pages_alloc(vm, nr_pages, - KVM_GUEST_PAGE_TABLE_MIN_PADDR, - vm->memslots[MEM_REGION_PT]); - vm->pgd_created = true; + vm->mmu.pgd = vm_phy_pages_alloc(vm, nr_pages, + KVM_GUEST_PAGE_TABLE_MIN_PADDR, + vm->memslots[MEM_REGION_PT]); + vm->mmu.pgd_created = true; } -void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) +void virt_arch_pg_map(struct kvm_vm *vm, gva_t gva, gpa_t gpa) { - uint64_t *ptep, next_ppn; - int level = vm->pgtable_levels - 1; + u64 *ptep, next_ppn; + int level = vm->mmu.pgtable_levels - 1; - TEST_ASSERT((vaddr % vm->page_size) == 0, + TEST_ASSERT((gva % vm->page_size) == 0, "Virtual address not on page boundary,\n" - " vaddr: 0x%lx vm->page_size: 0x%x", vaddr, vm->page_size); - TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, - (vaddr >> vm->page_shift)), - "Invalid virtual address, vaddr: 0x%lx", vaddr); - TEST_ASSERT((paddr % vm->page_size) == 0, + " gva: 0x%lx vm->page_size: 0x%x", gva, vm->page_size); + TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, (gva >> vm->page_shift)), + "Invalid virtual address, gva: 0x%lx", gva); + TEST_ASSERT((gpa % vm->page_size) == 0, "Physical address not on page boundary,\n" - " paddr: 0x%lx vm->page_size: 0x%x", paddr, vm->page_size); - TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn, + " gpa: 0x%lx vm->page_size: 0x%x", gpa, vm->page_size); + TEST_ASSERT((gpa >> vm->page_shift) <= vm->max_gfn, "Physical address beyond maximum supported,\n" - " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", - paddr, vm->max_gfn, vm->page_size); + " gpa: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", + gpa, vm->max_gfn, vm->page_size); - ptep = addr_gpa2hva(vm, vm->pgd) + pte_index(vm, vaddr, level) * 8; + ptep = addr_gpa2hva(vm, vm->mmu.pgd) + pte_index(vm, gva, level) * 8; if (!*ptep) { next_ppn = vm_alloc_page_table(vm) >> PGTBL_PAGE_SIZE_SHIFT; *ptep = (next_ppn << PGTBL_PTE_ADDR_SHIFT) | @@ -108,7 +103,7 @@ void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) while (level > -1) { ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + - pte_index(vm, vaddr, level) * 8; + pte_index(vm, gva, level) * 8; if (!*ptep && level > 0) { next_ppn = vm_alloc_page_table(vm) >> PGTBL_PAGE_SIZE_SHIFT; @@ -118,20 +113,20 @@ void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) level--; } - paddr = paddr >> PGTBL_PAGE_SIZE_SHIFT; - *ptep = (paddr << PGTBL_PTE_ADDR_SHIFT) | + gpa = gpa >> PGTBL_PAGE_SIZE_SHIFT; + *ptep = (gpa << PGTBL_PTE_ADDR_SHIFT) | PGTBL_PTE_PERM_MASK | PGTBL_PTE_VALID_MASK; } -vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) +gpa_t addr_arch_gva2gpa(struct kvm_vm *vm, gva_t gva) { - uint64_t *ptep; - int level = vm->pgtable_levels - 1; + u64 *ptep; + int level = vm->mmu.pgtable_levels - 1; - if (!vm->pgd_created) + if (!vm->mmu.pgd_created) goto unmapped_gva; - ptep = addr_gpa2hva(vm, vm->pgd) + pte_index(vm, gva, level) * 8; + ptep = addr_gpa2hva(vm, vm->mmu.pgd) + pte_index(vm, gva, level) * 8; if (!ptep) goto unmapped_gva; level--; @@ -152,12 +147,12 @@ unmapped_gva: exit(1); } -static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, - uint64_t page, int level) +static void pte_dump(FILE *stream, struct kvm_vm *vm, u8 indent, + u64 page, int level) { #ifdef DEBUG static const char *const type[] = { "pte", "pmd", "pud", "p4d"}; - uint64_t pte, *ptep; + u64 pte, *ptep; if (level < 0) return; @@ -174,15 +169,16 @@ static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, #endif } -void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) +void virt_arch_dump(FILE *stream, struct kvm_vm *vm, u8 indent) { - int level = vm->pgtable_levels - 1; - uint64_t pgd, *ptep; + struct kvm_mmu *mmu = &vm->mmu; + int level = mmu->pgtable_levels - 1; + u64 pgd, *ptep; - if (!vm->pgd_created) + if (!mmu->pgd_created) return; - for (pgd = vm->pgd; pgd < vm->pgd + ptrs_per_pte(vm) * 8; pgd += 8) { + for (pgd = mmu->pgd; pgd < mmu->pgd + ptrs_per_pte(vm) * 8; pgd += 8) { ptep = addr_gpa2hva(vm, pgd); if (!*ptep) continue; @@ -197,27 +193,46 @@ void riscv_vcpu_mmu_setup(struct kvm_vcpu *vcpu) { struct kvm_vm *vm = vcpu->vm; unsigned long satp; + unsigned long satp_mode; + unsigned long max_satp_mode; /* * The RISC-V Sv48 MMU mode supports 56-bit physical address * for 48-bit virtual address with 4KB last level page size. */ switch (vm->mode) { - case VM_MODE_P52V48_4K: - case VM_MODE_P48V48_4K: - case VM_MODE_P40V48_4K: + case VM_MODE_P56V57_4K: + case VM_MODE_P50V57_4K: + case VM_MODE_P41V57_4K: + satp_mode = SATP_MODE_57; + break; + case VM_MODE_P56V48_4K: + case VM_MODE_P50V48_4K: + case VM_MODE_P41V48_4K: + satp_mode = SATP_MODE_48; + break; + case VM_MODE_P56V39_4K: + case VM_MODE_P50V39_4K: + case VM_MODE_P41V39_4K: + satp_mode = SATP_MODE_39; break; default: TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode); } - satp = (vm->pgd >> PGTBL_PAGE_SIZE_SHIFT) & SATP_PPN; - satp |= SATP_MODE_48; + max_satp_mode = vcpu_get_reg(vcpu, RISCV_CONFIG_REG(satp_mode)); + + if ((satp_mode >> SATP_MODE_SHIFT) > max_satp_mode) + TEST_FAIL("Unable to set satp mode 0x%lx, max mode 0x%lx\n", + satp_mode >> SATP_MODE_SHIFT, max_satp_mode); + + satp = (vm->mmu.pgd >> PGTBL_PAGE_SIZE_SHIFT) & SATP_PPN; + satp |= satp_mode; vcpu_set_reg(vcpu, RISCV_GENERAL_CSR_REG(satp), satp); } -void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent) +void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, u8 indent) { struct kvm_riscv_core core; @@ -295,20 +310,20 @@ void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code) vcpu_set_reg(vcpu, RISCV_CORE_REG(regs.pc), (unsigned long)guest_code); } -struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) +struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, u32 vcpu_id) { int r; size_t stack_size; - unsigned long stack_vaddr; + unsigned long stack_gva; unsigned long current_gp = 0; struct kvm_mp_state mps; struct kvm_vcpu *vcpu; stack_size = vm->page_size == 4096 ? DEFAULT_STACK_PGS * vm->page_size : vm->page_size; - stack_vaddr = __vm_vaddr_alloc(vm, stack_size, - DEFAULT_RISCV_GUEST_STACK_VADDR_MIN, - MEM_REGION_DATA); + stack_gva = __vm_alloc(vm, stack_size, + DEFAULT_RISCV_GUEST_STACK_VADDR_MIN, + MEM_REGION_DATA); vcpu = __vm_vcpu_add(vm, vcpu_id); riscv_vcpu_mmu_setup(vcpu); @@ -328,7 +343,7 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) vcpu_set_reg(vcpu, RISCV_CORE_REG(regs.gp), current_gp); /* Setup stack pointer and program counter of guest */ - vcpu_set_reg(vcpu, RISCV_CORE_REG(regs.sp), stack_vaddr + stack_size); + vcpu_set_reg(vcpu, RISCV_CORE_REG(regs.sp), stack_gva + stack_size); /* Setup sscratch for guest_get_vcpuid() */ vcpu_set_reg(vcpu, RISCV_GENERAL_CSR_REG(sscratch), vcpu_id); @@ -342,7 +357,7 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...) { va_list ap; - uint64_t id = RISCV_CORE_REG(regs.a0); + u64 id = RISCV_CORE_REG(regs.a0); int i; TEST_ASSERT(num >= 1 && num <= 8, "Unsupported number of args,\n" @@ -377,7 +392,7 @@ void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...) id = RISCV_CORE_REG(regs.a7); break; } - vcpu_set_reg(vcpu, id, va_arg(ap, uint64_t)); + vcpu_set_reg(vcpu, id, va_arg(ap, u64)); } va_end(ap); @@ -402,7 +417,7 @@ struct handlers { exception_handler_fn exception_handlers[NR_VECTORS][NR_EXCEPTIONS]; }; -void route_exception(struct ex_regs *regs) +void route_exception(struct pt_regs *regs) { struct handlers *handlers = (struct handlers *)exception_handlers; int vector = 0, ec; @@ -433,10 +448,10 @@ void vcpu_init_vector_tables(struct kvm_vcpu *vcpu) void vm_init_vector_tables(struct kvm_vm *vm) { - vm->handlers = __vm_vaddr_alloc(vm, sizeof(struct handlers), - vm->page_size, MEM_REGION_DATA); + vm->handlers = __vm_alloc(vm, sizeof(struct handlers), vm->page_size, + MEM_REGION_DATA); - *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers; + *(gva_t *)addr_gva2hva(vm, (gva_t)(&exception_handlers)) = vm->handlers; } void vm_install_exception_handler(struct kvm_vm *vm, int vector, exception_handler_fn handler) @@ -454,7 +469,7 @@ void vm_install_interrupt_handler(struct kvm_vm *vm, exception_handler_fn handle handlers->exception_handlers[1][0] = handler; } -uint32_t guest_get_vcpuid(void) +u32 guest_get_vcpuid(void) { return csr_read(CSR_SSCRATCH); } @@ -515,3 +530,43 @@ unsigned long get_host_sbi_spec_version(void) return ret.value; } + +void kvm_selftest_arch_init(void) +{ + /* + * riscv64 doesn't have a true default mode, so start by detecting the + * supported vm mode. + */ + guest_modes_append_default(); +} + +unsigned long riscv64_get_satp_mode(void) +{ + int kvm_fd, vm_fd, vcpu_fd, err; + u64 val; + struct kvm_one_reg reg = { + .id = RISCV_CONFIG_REG(satp_mode), + .addr = (u64)&val, + }; + + kvm_fd = open_kvm_dev_path_or_exit(); + vm_fd = __kvm_ioctl(kvm_fd, KVM_CREATE_VM, NULL); + TEST_ASSERT(vm_fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VM, vm_fd)); + + vcpu_fd = ioctl(vm_fd, KVM_CREATE_VCPU, 0); + TEST_ASSERT(vcpu_fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VCPU, vcpu_fd)); + + err = ioctl(vcpu_fd, KVM_GET_ONE_REG, ®); + TEST_ASSERT(err == 0, KVM_IOCTL_ERROR(KVM_GET_ONE_REG, vcpu_fd)); + + close(vcpu_fd); + close(vm_fd); + close(kvm_fd); + + return val; +} + +bool kvm_arch_has_default_irqchip(void) +{ + return kvm_check_cap(KVM_CAP_IRQCHIP); +} diff --git a/tools/testing/selftests/kvm/lib/s390/diag318_test_handler.c b/tools/testing/selftests/kvm/lib/s390/diag318_test_handler.c index 2c432fa164f1..f5480473f192 100644 --- a/tools/testing/selftests/kvm/lib/s390/diag318_test_handler.c +++ b/tools/testing/selftests/kvm/lib/s390/diag318_test_handler.c @@ -13,7 +13,7 @@ static void guest_code(void) { - uint64_t diag318_info = 0x12345678; + u64 diag318_info = 0x12345678; asm volatile ("diag %0,0,0x318\n" : : "d" (diag318_info)); } @@ -23,13 +23,13 @@ static void guest_code(void) * we create an ad-hoc VM here to handle the instruction then extract the * necessary data. It is up to the caller to decide what to do with that data. */ -static uint64_t diag318_handler(void) +static u64 diag318_handler(void) { struct kvm_vcpu *vcpu; struct kvm_vm *vm; struct kvm_run *run; - uint64_t reg; - uint64_t diag318_info; + u64 reg; + u64 diag318_info; vm = vm_create_with_one_vcpu(&vcpu, guest_code); vcpu_run(vcpu); @@ -51,9 +51,9 @@ static uint64_t diag318_handler(void) return diag318_info; } -uint64_t get_diag318_info(void) +u64 get_diag318_info(void) { - static uint64_t diag318_info; + static u64 diag318_info; static bool printed_skip; /* diff --git a/tools/testing/selftests/kvm/lib/s390/facility.c b/tools/testing/selftests/kvm/lib/s390/facility.c index d540812d911a..9a778054f07f 100644 --- a/tools/testing/selftests/kvm/lib/s390/facility.c +++ b/tools/testing/selftests/kvm/lib/s390/facility.c @@ -10,5 +10,5 @@ #include "facility.h" -uint64_t stfl_doublewords[NB_STFL_DOUBLEWORDS]; +u64 stfl_doublewords[NB_STFL_DOUBLEWORDS]; bool stfle_flag; diff --git a/tools/testing/selftests/kvm/lib/s390/processor.c b/tools/testing/selftests/kvm/lib/s390/processor.c index 20cfe970e3e3..a9adb3782b35 100644 --- a/tools/testing/selftests/kvm/lib/s390/processor.c +++ b/tools/testing/selftests/kvm/lib/s390/processor.c @@ -12,21 +12,21 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm) { - vm_paddr_t paddr; + gpa_t gpa; TEST_ASSERT(vm->page_size == PAGE_SIZE, "Unsupported page size: 0x%x", vm->page_size); - if (vm->pgd_created) + if (vm->mmu.pgd_created) return; - paddr = vm_phy_pages_alloc(vm, PAGES_PER_REGION, + gpa = vm_phy_pages_alloc(vm, PAGES_PER_REGION, KVM_GUEST_PAGE_TABLE_MIN_PADDR, vm->memslots[MEM_REGION_PT]); - memset(addr_gpa2hva(vm, paddr), 0xff, PAGES_PER_REGION * vm->page_size); + memset(addr_gpa2hva(vm, gpa), 0xff, PAGES_PER_REGION * vm->page_size); - vm->pgd = paddr; - vm->pgd_created = true; + vm->mmu.pgd = gpa; + vm->mmu.pgd_created = true; } /* @@ -34,9 +34,9 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm) * a page table (ri == 4). Returns a suitable region/segment table entry * which points to the freshly allocated pages. */ -static uint64_t virt_alloc_region(struct kvm_vm *vm, int ri) +static u64 virt_alloc_region(struct kvm_vm *vm, int ri) { - uint64_t taddr; + u64 taddr; taddr = vm_phy_pages_alloc(vm, ri < 4 ? PAGES_PER_REGION : 1, KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0); @@ -47,30 +47,28 @@ static uint64_t virt_alloc_region(struct kvm_vm *vm, int ri) | ((ri < 4 ? (PAGES_PER_REGION - 1) : 0) & REGION_ENTRY_LENGTH); } -void virt_arch_pg_map(struct kvm_vm *vm, uint64_t gva, uint64_t gpa) +void virt_arch_pg_map(struct kvm_vm *vm, gva_t gva, gpa_t gpa) { int ri, idx; - uint64_t *entry; + u64 *entry; TEST_ASSERT((gva % vm->page_size) == 0, - "Virtual address not on page boundary,\n" - " vaddr: 0x%lx vm->page_size: 0x%x", - gva, vm->page_size); - TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, - (gva >> vm->page_shift)), - "Invalid virtual address, vaddr: 0x%lx", - gva); + "Virtual address not on page boundary,\n" + " gva: 0x%lx vm->page_size: 0x%x", + gva, vm->page_size); + TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, (gva >> vm->page_shift)), + "Invalid virtual address, gva: 0x%lx", gva); TEST_ASSERT((gpa % vm->page_size) == 0, "Physical address not on page boundary,\n" - " paddr: 0x%lx vm->page_size: 0x%x", + " gpa: 0x%lx vm->page_size: 0x%x", gva, vm->page_size); TEST_ASSERT((gpa >> vm->page_shift) <= vm->max_gfn, "Physical address beyond beyond maximum supported,\n" - " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", + " gpa: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", gva, vm->max_gfn, vm->page_size); /* Walk through region and segment tables */ - entry = addr_gpa2hva(vm, vm->pgd); + entry = addr_gpa2hva(vm, vm->mmu.pgd); for (ri = 1; ri <= 4; ri++) { idx = (gva >> (64 - 11 * ri)) & 0x7ffu; if (entry[idx] & REGION_ENTRY_INVALID) @@ -86,15 +84,15 @@ void virt_arch_pg_map(struct kvm_vm *vm, uint64_t gva, uint64_t gpa) entry[idx] = gpa; } -vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) +gpa_t addr_arch_gva2gpa(struct kvm_vm *vm, gva_t gva) { int ri, idx; - uint64_t *entry; + u64 *entry; TEST_ASSERT(vm->page_size == PAGE_SIZE, "Unsupported page size: 0x%x", vm->page_size); - entry = addr_gpa2hva(vm, vm->pgd); + entry = addr_gpa2hva(vm, vm->mmu.pgd); for (ri = 1; ri <= 4; ri++) { idx = (gva >> (64 - 11 * ri)) & 0x7ffu; TEST_ASSERT(!(entry[idx] & REGION_ENTRY_INVALID), @@ -111,10 +109,10 @@ vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) return (entry[idx] & ~0xffful) + (gva & 0xffful); } -static void virt_dump_ptes(FILE *stream, struct kvm_vm *vm, uint8_t indent, - uint64_t ptea_start) +static void virt_dump_ptes(FILE *stream, struct kvm_vm *vm, u8 indent, + u64 ptea_start) { - uint64_t *pte, ptea; + u64 *pte, ptea; for (ptea = ptea_start; ptea < ptea_start + 0x100 * 8; ptea += 8) { pte = addr_gpa2hva(vm, ptea); @@ -125,10 +123,10 @@ static void virt_dump_ptes(FILE *stream, struct kvm_vm *vm, uint8_t indent, } } -static void virt_dump_region(FILE *stream, struct kvm_vm *vm, uint8_t indent, - uint64_t reg_tab_addr) +static void virt_dump_region(FILE *stream, struct kvm_vm *vm, u8 indent, + u64 reg_tab_addr) { - uint64_t addr, *entry; + u64 addr, *entry; for (addr = reg_tab_addr; addr < reg_tab_addr + 0x400 * 8; addr += 8) { entry = addr_gpa2hva(vm, addr); @@ -147,12 +145,12 @@ static void virt_dump_region(FILE *stream, struct kvm_vm *vm, uint8_t indent, } } -void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) +void virt_arch_dump(FILE *stream, struct kvm_vm *vm, u8 indent) { - if (!vm->pgd_created) + if (!vm->mmu.pgd_created) return; - virt_dump_region(stream, vm, indent, vm->pgd); + virt_dump_region(stream, vm, indent, vm->mmu.pgd); } void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code) @@ -160,10 +158,10 @@ void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code) vcpu->run->psw_addr = (uintptr_t)guest_code; } -struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) +struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, u32 vcpu_id) { size_t stack_size = DEFAULT_STACK_PGS * getpagesize(); - uint64_t stack_vaddr; + u64 stack_gva; struct kvm_regs regs; struct kvm_sregs sregs; struct kvm_vcpu *vcpu; @@ -171,20 +169,19 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) TEST_ASSERT(vm->page_size == PAGE_SIZE, "Unsupported page size: 0x%x", vm->page_size); - stack_vaddr = __vm_vaddr_alloc(vm, stack_size, - DEFAULT_GUEST_STACK_VADDR_MIN, - MEM_REGION_DATA); + stack_gva = __vm_alloc(vm, stack_size, DEFAULT_GUEST_STACK_VADDR_MIN, + MEM_REGION_DATA); vcpu = __vm_vcpu_add(vm, vcpu_id); /* Setup guest registers */ vcpu_regs_get(vcpu, ®s); - regs.gprs[15] = stack_vaddr + (DEFAULT_STACK_PGS * getpagesize()) - 160; + regs.gprs[15] = stack_gva + (DEFAULT_STACK_PGS * getpagesize()) - 160; vcpu_regs_set(vcpu, ®s); vcpu_sregs_get(vcpu, &sregs); sregs.crs[0] |= 0x00040000; /* Enable floating point regs */ - sregs.crs[1] = vm->pgd | 0xf; /* Primary region table */ + sregs.crs[1] = vm->mmu.pgd | 0xf; /* Primary region table */ vcpu_sregs_set(vcpu, &sregs); vcpu->run->psw_mask = 0x0400000180000000ULL; /* DAT enabled + 64 bit mode */ @@ -206,13 +203,13 @@ void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...) vcpu_regs_get(vcpu, ®s); for (i = 0; i < num; i++) - regs.gprs[i + 2] = va_arg(ap, uint64_t); + regs.gprs[i + 2] = va_arg(ap, u64); vcpu_regs_set(vcpu, ®s); va_end(ap); } -void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent) +void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, u8 indent) { fprintf(stream, "%*spstate: psw: 0x%.16llx:0x%.16llx\n", indent, "", vcpu->run->psw_mask, vcpu->run->psw_addr); @@ -221,3 +218,8 @@ void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent) void assert_on_unhandled_exception(struct kvm_vcpu *vcpu) { } + +bool kvm_arch_has_default_irqchip(void) +{ + return true; +} diff --git a/tools/testing/selftests/kvm/lib/sparsebit.c b/tools/testing/selftests/kvm/lib/sparsebit.c index cfed9d26cc71..4d845000de15 100644 --- a/tools/testing/selftests/kvm/lib/sparsebit.c +++ b/tools/testing/selftests/kvm/lib/sparsebit.c @@ -76,11 +76,11 @@ * the use of a binary-search tree, where each node contains at least * the following members: * - * typedef uint64_t sparsebit_idx_t; - * typedef uint64_t sparsebit_num_t; + * typedef u64 sparsebit_idx_t; + * typedef u64 sparsebit_num_t; * * sparsebit_idx_t idx; - * uint32_t mask; + * u32 mask; * sparsebit_num_t num_after; * * The idx member contains the bit index of the first bit described by this @@ -116,7 +116,7 @@ * * + A node with all mask bits set only occurs when the last bit * described by the previous node is not equal to this nodes - * starting index - 1. All such occurences of this condition are + * starting index - 1. All such occurrences of this condition are * avoided by moving the setting of the nodes mask bits into * the previous nodes num_after setting. * @@ -162,7 +162,7 @@ #define DUMP_LINE_MAX 100 /* Does not include indent amount */ -typedef uint32_t mask_t; +typedef u32 mask_t; #define MASK_BITS (sizeof(mask_t) * CHAR_BIT) struct node { @@ -592,7 +592,7 @@ static struct node *node_split(struct sparsebit *s, sparsebit_idx_t idx) * * + A node with all mask bits set only occurs when the last bit * described by the previous node is not equal to this nodes - * starting index - 1. All such occurences of this condition are + * starting index - 1. All such occurrences of this condition are * avoided by moving the setting of the nodes mask bits into * the previous nodes num_after setting. */ @@ -2056,9 +2056,9 @@ unsigned char get8(void) return ch; } -uint64_t get64(void) +u64 get64(void) { - uint64_t x; + u64 x; x = get8(); x = (x << 8) | get8(); @@ -2074,9 +2074,9 @@ int main(void) { s = sparsebit_alloc(); for (;;) { - uint8_t op = get8() & 0xf; - uint64_t first = get64(); - uint64_t last = get64(); + u8 op = get8() & 0xf; + u64 first = get64(); + u64 last = get64(); operate(op, first, last); } diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c index 8ed0b74ae837..bab1bd2b775b 100644 --- a/tools/testing/selftests/kvm/lib/test_util.c +++ b/tools/testing/selftests/kvm/lib/test_util.c @@ -18,20 +18,27 @@ #include "test_util.h" +sigjmp_buf expect_sigbus_jmpbuf; + +void __attribute__((used)) expect_sigbus_handler(int signum) +{ + siglongjmp(expect_sigbus_jmpbuf, 1); +} + /* * Random number generator that is usable from guest code. This is the * Park-Miller LCG using standard constants. */ -struct guest_random_state new_guest_random_state(uint32_t seed) +struct guest_random_state new_guest_random_state(u32 seed) { struct guest_random_state s = {.seed = seed}; return s; } -uint32_t guest_random_u32(struct guest_random_state *state) +u32 guest_random_u32(struct guest_random_state *state) { - state->seed = (uint64_t)state->seed * 48271 % ((uint32_t)(1 << 31) - 1); + state->seed = (u64)state->seed * 48271 % ((u32)(1 << 31) - 1); return state->seed; } @@ -76,12 +83,12 @@ size_t parse_size(const char *size) return base << shift; } -int64_t timespec_to_ns(struct timespec ts) +s64 timespec_to_ns(struct timespec ts) { - return (int64_t)ts.tv_nsec + 1000000000LL * (int64_t)ts.tv_sec; + return (s64)ts.tv_nsec + 1000000000LL * (s64)ts.tv_sec; } -struct timespec timespec_add_ns(struct timespec ts, int64_t ns) +struct timespec timespec_add_ns(struct timespec ts, s64 ns) { struct timespec res; @@ -94,15 +101,15 @@ struct timespec timespec_add_ns(struct timespec ts, int64_t ns) struct timespec timespec_add(struct timespec ts1, struct timespec ts2) { - int64_t ns1 = timespec_to_ns(ts1); - int64_t ns2 = timespec_to_ns(ts2); + s64 ns1 = timespec_to_ns(ts1); + s64 ns2 = timespec_to_ns(ts2); return timespec_add_ns((struct timespec){0}, ns1 + ns2); } struct timespec timespec_sub(struct timespec ts1, struct timespec ts2) { - int64_t ns1 = timespec_to_ns(ts1); - int64_t ns2 = timespec_to_ns(ts2); + s64 ns1 = timespec_to_ns(ts1); + s64 ns2 = timespec_to_ns(ts2); return timespec_add_ns((struct timespec){0}, ns1 - ns2); } @@ -116,7 +123,7 @@ struct timespec timespec_elapsed(struct timespec start) struct timespec timespec_div(struct timespec ts, int divisor) { - int64_t ns = timespec_to_ns(ts) / divisor; + s64 ns = timespec_to_ns(ts) / divisor; return timespec_add_ns((struct timespec){0}, ns); } @@ -132,37 +139,57 @@ void print_skip(const char *fmt, ...) puts(", skipping test"); } -bool thp_configured(void) +static bool test_sysfs_path(const char *path) { - int ret; struct stat statbuf; + int ret; - ret = stat("/sys/kernel/mm/transparent_hugepage", &statbuf); + ret = stat(path, &statbuf); TEST_ASSERT(ret == 0 || (ret == -1 && errno == ENOENT), - "Error in stating /sys/kernel/mm/transparent_hugepage"); + "Error in stat()ing '%s'", path); return ret == 0; } -size_t get_trans_hugepagesz(void) +bool thp_configured(void) +{ + return test_sysfs_path("/sys/kernel/mm/transparent_hugepage"); +} + +static size_t get_sysfs_val(const char *path) { size_t size; FILE *f; int ret; - TEST_ASSERT(thp_configured(), "THP is not configured in host kernel"); - - f = fopen("/sys/kernel/mm/transparent_hugepage/hpage_pmd_size", "r"); - TEST_ASSERT(f != NULL, "Error in opening transparent_hugepage/hpage_pmd_size"); + f = fopen(path, "r"); + TEST_ASSERT(f, "Error opening '%s'", path); ret = fscanf(f, "%ld", &size); + TEST_ASSERT(ret > 0, "Error reading '%s'", path); + + /* Re-scan the input stream to verify the entire file was read. */ ret = fscanf(f, "%ld", &size); - TEST_ASSERT(ret < 1, "Error reading transparent_hugepage/hpage_pmd_size"); - fclose(f); + TEST_ASSERT(ret < 1, "Error reading '%s'", path); + fclose(f); return size; } +size_t get_trans_hugepagesz(void) +{ + TEST_ASSERT(thp_configured(), "THP is not configured in host kernel"); + + return get_sysfs_val("/sys/kernel/mm/transparent_hugepage/hpage_pmd_size"); +} + +bool is_numa_balancing_enabled(void) +{ + if (!test_sysfs_path("/proc/sys/kernel/numa_balancing")) + return false; + return get_sysfs_val("/proc/sys/kernel/numa_balancing") == 1; +} + size_t get_def_hugetlb_pagesz(void) { char buf[64]; @@ -198,7 +225,7 @@ size_t get_def_hugetlb_pagesz(void) #define ANON_FLAGS (MAP_PRIVATE | MAP_ANONYMOUS) #define ANON_HUGE_FLAGS (ANON_FLAGS | MAP_HUGETLB) -const struct vm_mem_backing_src_alias *vm_mem_backing_src_alias(uint32_t i) +const struct vm_mem_backing_src_alias *vm_mem_backing_src_alias(u32 i) { static const struct vm_mem_backing_src_alias aliases[] = { [VM_MEM_SRC_ANONYMOUS] = { @@ -290,9 +317,9 @@ const struct vm_mem_backing_src_alias *vm_mem_backing_src_alias(uint32_t i) #define MAP_HUGE_PAGE_SIZE(x) (1ULL << ((x >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK)) -size_t get_backing_src_pagesz(uint32_t i) +size_t get_backing_src_pagesz(u32 i) { - uint32_t flag = vm_mem_backing_src_alias(i)->flag; + u32 flag = vm_mem_backing_src_alias(i)->flag; switch (i) { case VM_MEM_SRC_ANONYMOUS: @@ -308,7 +335,7 @@ size_t get_backing_src_pagesz(uint32_t i) } } -bool is_backing_src_hugetlb(uint32_t i) +bool is_backing_src_hugetlb(u32 i) { return !!(vm_mem_backing_src_alias(i)->flag & MAP_HUGETLB); } diff --git a/tools/testing/selftests/kvm/lib/ucall_common.c b/tools/testing/selftests/kvm/lib/ucall_common.c index 42151e571953..029ce21f9f2f 100644 --- a/tools/testing/selftests/kvm/lib/ucall_common.c +++ b/tools/testing/selftests/kvm/lib/ucall_common.c @@ -14,7 +14,7 @@ struct ucall_header { struct ucall ucalls[KVM_MAX_VCPUS]; }; -int ucall_nr_pages_required(uint64_t page_size) +int ucall_nr_pages_required(u64 page_size) { return align_up(sizeof(struct ucall_header), page_size) / page_size; } @@ -25,16 +25,16 @@ int ucall_nr_pages_required(uint64_t page_size) */ static struct ucall_header *ucall_pool; -void ucall_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) +void ucall_init(struct kvm_vm *vm, gpa_t mmio_gpa) { struct ucall_header *hdr; struct ucall *uc; - vm_vaddr_t vaddr; + gva_t gva; int i; - vaddr = vm_vaddr_alloc_shared(vm, sizeof(*hdr), KVM_UTIL_MIN_VADDR, - MEM_REGION_DATA); - hdr = (struct ucall_header *)addr_gva2hva(vm, vaddr); + gva = vm_alloc_shared(vm, sizeof(*hdr), KVM_UTIL_MIN_VADDR, + MEM_REGION_DATA); + hdr = (struct ucall_header *)addr_gva2hva(vm, gva); memset(hdr, 0, sizeof(*hdr)); for (i = 0; i < KVM_MAX_VCPUS; ++i) { @@ -42,7 +42,7 @@ void ucall_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) uc->hva = uc; } - write_guest_global(vm, ucall_pool, (struct ucall_header *)vaddr); + write_guest_global(vm, ucall_pool, (struct ucall_header *)gva); ucall_arch_init(vm, mmio_gpa); } @@ -79,7 +79,7 @@ static void ucall_free(struct ucall *uc) clear_bit(uc - ucall_pool->ucalls, ucall_pool->in_use); } -void ucall_assert(uint64_t cmd, const char *exp, const char *file, +void ucall_assert(u64 cmd, const char *exp, const char *file, unsigned int line, const char *fmt, ...) { struct ucall *uc; @@ -88,20 +88,20 @@ void ucall_assert(uint64_t cmd, const char *exp, const char *file, uc = ucall_alloc(); uc->cmd = cmd; - WRITE_ONCE(uc->args[GUEST_ERROR_STRING], (uint64_t)(exp)); - WRITE_ONCE(uc->args[GUEST_FILE], (uint64_t)(file)); + WRITE_ONCE(uc->args[GUEST_ERROR_STRING], (u64)(exp)); + WRITE_ONCE(uc->args[GUEST_FILE], (u64)(file)); WRITE_ONCE(uc->args[GUEST_LINE], line); va_start(va, fmt); guest_vsnprintf(uc->buffer, UCALL_BUFFER_LEN, fmt, va); va_end(va); - ucall_arch_do_ucall((vm_vaddr_t)uc->hva); + ucall_arch_do_ucall((gva_t)uc->hva); ucall_free(uc); } -void ucall_fmt(uint64_t cmd, const char *fmt, ...) +void ucall_fmt(u64 cmd, const char *fmt, ...) { struct ucall *uc; va_list va; @@ -113,12 +113,12 @@ void ucall_fmt(uint64_t cmd, const char *fmt, ...) guest_vsnprintf(uc->buffer, UCALL_BUFFER_LEN, fmt, va); va_end(va); - ucall_arch_do_ucall((vm_vaddr_t)uc->hva); + ucall_arch_do_ucall((gva_t)uc->hva); ucall_free(uc); } -void ucall(uint64_t cmd, int nargs, ...) +void ucall(u64 cmd, int nargs, ...) { struct ucall *uc; va_list va; @@ -132,15 +132,15 @@ void ucall(uint64_t cmd, int nargs, ...) va_start(va, nargs); for (i = 0; i < nargs; ++i) - WRITE_ONCE(uc->args[i], va_arg(va, uint64_t)); + WRITE_ONCE(uc->args[i], va_arg(va, u64)); va_end(va); - ucall_arch_do_ucall((vm_vaddr_t)uc->hva); + ucall_arch_do_ucall((gva_t)uc->hva); ucall_free(uc); } -uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc) +u64 get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc) { struct ucall ucall; void *addr; diff --git a/tools/testing/selftests/kvm/lib/userfaultfd_util.c b/tools/testing/selftests/kvm/lib/userfaultfd_util.c index 7c9de8414462..ef8d76f71f83 100644 --- a/tools/testing/selftests/kvm/lib/userfaultfd_util.c +++ b/tools/testing/selftests/kvm/lib/userfaultfd_util.c @@ -27,7 +27,7 @@ static void *uffd_handler_thread_fn(void *arg) { struct uffd_reader_args *reader_args = (struct uffd_reader_args *)arg; int uffd = reader_args->uffd; - int64_t pages = 0; + s64 pages = 0; struct timespec start; struct timespec ts_diff; struct epoll_event evt; @@ -100,8 +100,8 @@ static void *uffd_handler_thread_fn(void *arg) } struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay, - void *hva, uint64_t len, - uint64_t num_readers, + void *hva, u64 len, + u64 num_readers, uffd_handler_t handler) { struct uffd_desc *uffd_desc; @@ -109,12 +109,12 @@ struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay, int uffd; struct uffdio_api uffdio_api; struct uffdio_register uffdio_register; - uint64_t expected_ioctls = ((uint64_t) 1) << _UFFDIO_COPY; + u64 expected_ioctls = ((u64)1) << _UFFDIO_COPY; int ret, i; PER_PAGE_DEBUG("Userfaultfd %s mode, faults resolved with %s\n", is_minor ? "MINOR" : "MISSING", - is_minor ? "UFFDIO_CONINUE" : "UFFDIO_COPY"); + is_minor ? "UFFDIO_CONTINUE" : "UFFDIO_COPY"); uffd_desc = malloc(sizeof(struct uffd_desc)); TEST_ASSERT(uffd_desc, "Failed to malloc uffd descriptor"); @@ -132,7 +132,7 @@ struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay, /* In order to get minor faults, prefault via the alias. */ if (is_minor) - expected_ioctls = ((uint64_t) 1) << _UFFDIO_CONTINUE; + expected_ioctls = ((u64)1) << _UFFDIO_CONTINUE; uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); TEST_ASSERT(uffd >= 0, "uffd creation failed, errno: %d", errno); @@ -141,9 +141,9 @@ struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay, uffdio_api.features = 0; TEST_ASSERT(ioctl(uffd, UFFDIO_API, &uffdio_api) != -1, "ioctl UFFDIO_API failed: %" PRIu64, - (uint64_t)uffdio_api.api); + (u64)uffdio_api.api); - uffdio_register.range.start = (uint64_t)hva; + uffdio_register.range.start = (u64)hva; uffdio_register.range.len = len; uffdio_register.mode = uffd_mode; TEST_ASSERT(ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) != -1, diff --git a/tools/testing/selftests/kvm/lib/x86/apic.c b/tools/testing/selftests/kvm/lib/x86/apic.c index 89153a333e83..5182fd0d6a76 100644 --- a/tools/testing/selftests/kvm/lib/x86/apic.c +++ b/tools/testing/selftests/kvm/lib/x86/apic.c @@ -14,7 +14,7 @@ void apic_disable(void) void xapic_enable(void) { - uint64_t val = rdmsr(MSR_IA32_APICBASE); + u64 val = rdmsr(MSR_IA32_APICBASE); /* Per SDM: to enable xAPIC when in x2APIC must first disable APIC */ if (val & MSR_IA32_APICBASE_EXTD) { diff --git a/tools/testing/selftests/kvm/lib/x86/hyperv.c b/tools/testing/selftests/kvm/lib/x86/hyperv.c index 15bc8cd583aa..d200c5c26e2e 100644 --- a/tools/testing/selftests/kvm/lib/x86/hyperv.c +++ b/tools/testing/selftests/kvm/lib/x86/hyperv.c @@ -76,23 +76,23 @@ bool kvm_hv_cpu_has(struct kvm_x86_cpu_feature feature) } struct hyperv_test_pages *vcpu_alloc_hyperv_test_pages(struct kvm_vm *vm, - vm_vaddr_t *p_hv_pages_gva) + gva_t *p_hv_pages_gva) { - vm_vaddr_t hv_pages_gva = vm_vaddr_alloc_page(vm); + gva_t hv_pages_gva = vm_alloc_page(vm); struct hyperv_test_pages *hv = addr_gva2hva(vm, hv_pages_gva); /* Setup of a region of guest memory for the VP Assist page. */ - hv->vp_assist = (void *)vm_vaddr_alloc_page(vm); + hv->vp_assist = (void *)vm_alloc_page(vm); hv->vp_assist_hva = addr_gva2hva(vm, (uintptr_t)hv->vp_assist); hv->vp_assist_gpa = addr_gva2gpa(vm, (uintptr_t)hv->vp_assist); /* Setup of a region of guest memory for the partition assist page. */ - hv->partition_assist = (void *)vm_vaddr_alloc_page(vm); + hv->partition_assist = (void *)vm_alloc_page(vm); hv->partition_assist_hva = addr_gva2hva(vm, (uintptr_t)hv->partition_assist); hv->partition_assist_gpa = addr_gva2gpa(vm, (uintptr_t)hv->partition_assist); /* Setup of a region of guest memory for the enlightened VMCS. */ - hv->enlightened_vmcs = (void *)vm_vaddr_alloc_page(vm); + hv->enlightened_vmcs = (void *)vm_alloc_page(vm); hv->enlightened_vmcs_hva = addr_gva2hva(vm, (uintptr_t)hv->enlightened_vmcs); hv->enlightened_vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)hv->enlightened_vmcs); @@ -100,9 +100,9 @@ struct hyperv_test_pages *vcpu_alloc_hyperv_test_pages(struct kvm_vm *vm, return hv; } -int enable_vp_assist(uint64_t vp_assist_pa, void *vp_assist) +int enable_vp_assist(u64 vp_assist_pa, void *vp_assist) { - uint64_t val = (vp_assist_pa & HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK) | + u64 val = (vp_assist_pa & HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK) | HV_X64_MSR_VP_ASSIST_PAGE_ENABLE; wrmsr(HV_X64_MSR_VP_ASSIST_PAGE, val); diff --git a/tools/testing/selftests/kvm/lib/x86/memstress.c b/tools/testing/selftests/kvm/lib/x86/memstress.c index 7f5d62a65c68..61cf952cd2dc 100644 --- a/tools/testing/selftests/kvm/lib/x86/memstress.c +++ b/tools/testing/selftests/kvm/lib/x86/memstress.c @@ -13,9 +13,10 @@ #include "kvm_util.h" #include "memstress.h" #include "processor.h" +#include "svm_util.h" #include "vmx.h" -void memstress_l2_guest_code(uint64_t vcpu_id) +void memstress_l2_guest_code(u64 vcpu_id) { memstress_guest_code(vcpu_id); vmcall(); @@ -29,9 +30,10 @@ __asm__( " ud2;" ); -static void memstress_l1_guest_code(struct vmx_pages *vmx, uint64_t vcpu_id) -{ #define L2_GUEST_STACK_SIZE 64 + +static void l1_vmx_code(struct vmx_pages *vmx, u64 vcpu_id) +{ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; unsigned long *rsp; @@ -45,11 +47,35 @@ static void memstress_l1_guest_code(struct vmx_pages *vmx, uint64_t vcpu_id) prepare_vmcs(vmx, memstress_l2_guest_entry, rsp); GUEST_ASSERT(!vmlaunch()); - GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_VMCALL); GUEST_DONE(); } -uint64_t memstress_nested_pages(int nr_vcpus) +static void l1_svm_code(struct svm_test_data *svm, u64 vcpu_id) +{ + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + unsigned long *rsp; + + + rsp = &l2_guest_stack[L2_GUEST_STACK_SIZE - 1]; + *rsp = vcpu_id; + generic_svm_setup(svm, memstress_l2_guest_entry, rsp); + + run_guest(svm->vmcb, svm->vmcb_gpa); + GUEST_ASSERT_EQ(svm->vmcb->control.exit_code, SVM_EXIT_VMMCALL); + GUEST_DONE(); +} + + +static void memstress_l1_guest_code(void *data, u64 vcpu_id) +{ + if (this_cpu_has(X86_FEATURE_VMX)) + l1_vmx_code(data, vcpu_id); + else + l1_svm_code(data, vcpu_id); +} + +u64 memstress_nested_pages(int nr_vcpus) { /* * 513 page tables is enough to identity-map 256 TiB of L2 with 1G @@ -59,46 +85,37 @@ uint64_t memstress_nested_pages(int nr_vcpus) return 513 + 10 * nr_vcpus; } -void memstress_setup_ept(struct vmx_pages *vmx, struct kvm_vm *vm) +static void memstress_setup_ept_mappings(struct kvm_vm *vm) { - uint64_t start, end; - - prepare_eptp(vmx, vm, 0); + u64 start, end; /* * Identity map the first 4G and the test region with 1G pages so that * KVM can shadow the EPT12 with the maximum huge page size supported * by the backing source. */ - nested_identity_map_1g(vmx, vm, 0, 0x100000000ULL); + tdp_identity_map_1g(vm, 0, 0x100000000ULL); start = align_down(memstress_args.gpa, PG_SIZE_1G); end = align_up(memstress_args.gpa + memstress_args.size, PG_SIZE_1G); - nested_identity_map_1g(vmx, vm, start, end - start); + tdp_identity_map_1g(vm, start, end - start); } void memstress_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vcpus[]) { - struct vmx_pages *vmx, *vmx0 = NULL; struct kvm_regs regs; - vm_vaddr_t vmx_gva; + gva_t nested_gva; int vcpu_id; - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); - TEST_REQUIRE(kvm_cpu_has_ept()); + TEST_REQUIRE(kvm_cpu_has_tdp()); + vm_enable_tdp(vm); + memstress_setup_ept_mappings(vm); for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { - vmx = vcpu_alloc_vmx(vm, &vmx_gva); - - if (vcpu_id == 0) { - memstress_setup_ept(vmx, vm); - vmx0 = vmx; - } else { - /* Share the same EPT table across all vCPUs. */ - vmx->eptp = vmx0->eptp; - vmx->eptp_hva = vmx0->eptp_hva; - vmx->eptp_gpa = vmx0->eptp_gpa; - } + if (kvm_cpu_has(X86_FEATURE_VMX)) + vcpu_alloc_vmx(vm, &nested_gva); + else + vcpu_alloc_svm(vm, &nested_gva); /* * Override the vCPU to run memstress_l1_guest_code() which will @@ -107,6 +124,6 @@ void memstress_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vc vcpu_regs_get(vcpus[vcpu_id], ®s); regs.rip = (unsigned long) memstress_l1_guest_code; vcpu_regs_set(vcpus[vcpu_id], ®s); - vcpu_args_set(vcpus[vcpu_id], 2, vmx_gva, vcpu_id); + vcpu_args_set(vcpus[vcpu_id], 2, nested_gva, vcpu_id); } } diff --git a/tools/testing/selftests/kvm/lib/x86/pmu.c b/tools/testing/selftests/kvm/lib/x86/pmu.c index f31f0427c17c..0851b74b4e46 100644 --- a/tools/testing/selftests/kvm/lib/x86/pmu.c +++ b/tools/testing/selftests/kvm/lib/x86/pmu.c @@ -8,9 +8,10 @@ #include <linux/kernel.h> #include "kvm_util.h" +#include "processor.h" #include "pmu.h" -const uint64_t intel_pmu_arch_events[] = { +const u64 intel_pmu_arch_events[] = { INTEL_ARCH_CPU_CYCLES, INTEL_ARCH_INSTRUCTIONS_RETIRED, INTEL_ARCH_REFERENCE_CYCLES, @@ -19,13 +20,61 @@ const uint64_t intel_pmu_arch_events[] = { INTEL_ARCH_BRANCHES_RETIRED, INTEL_ARCH_BRANCHES_MISPREDICTED, INTEL_ARCH_TOPDOWN_SLOTS, + INTEL_ARCH_TOPDOWN_BE_BOUND, + INTEL_ARCH_TOPDOWN_BAD_SPEC, + INTEL_ARCH_TOPDOWN_FE_BOUND, + INTEL_ARCH_TOPDOWN_RETIRING, + INTEL_ARCH_LBR_INSERTS, }; kvm_static_assert(ARRAY_SIZE(intel_pmu_arch_events) == NR_INTEL_ARCH_EVENTS); -const uint64_t amd_pmu_zen_events[] = { +const u64 amd_pmu_zen_events[] = { AMD_ZEN_CORE_CYCLES, AMD_ZEN_INSTRUCTIONS_RETIRED, AMD_ZEN_BRANCHES_RETIRED, AMD_ZEN_BRANCHES_MISPREDICTED, }; kvm_static_assert(ARRAY_SIZE(amd_pmu_zen_events) == NR_AMD_ZEN_EVENTS); + +/* + * For Intel Atom CPUs, the PMU events "Instruction Retired" or + * "Branch Instruction Retired" may be overcounted for some certain + * instructions, like FAR CALL/JMP, RETF, IRET, VMENTRY/VMEXIT/VMPTRLD + * and complex SGX/SMX/CSTATE instructions/flows. + * + * The detailed information can be found in the errata (section SRF7): + * https://edc.intel.com/content/www/us/en/design/products-and-solutions/processors-and-chipsets/sierra-forest/xeon-6700-series-processor-with-e-cores-specification-update/errata-details/ + * + * For the Atom platforms before Sierra Forest (including Sierra Forest), + * Both 2 events "Instruction Retired" and "Branch Instruction Retired" would + * be overcounted on these certain instructions, but for Clearwater Forest + * only "Instruction Retired" event is overcounted on these instructions. + */ +static u64 get_pmu_errata(void) +{ + if (!this_cpu_is_intel()) + return 0; + + if (this_cpu_family() != 0x6) + return 0; + + switch (this_cpu_model()) { + case 0xDD: /* Clearwater Forest */ + return BIT_ULL(INSTRUCTIONS_RETIRED_OVERCOUNT); + case 0xAF: /* Sierra Forest */ + case 0x4D: /* Avaton, Rangely */ + case 0x5F: /* Denverton */ + case 0x86: /* Jacobsville */ + return BIT_ULL(INSTRUCTIONS_RETIRED_OVERCOUNT) | + BIT_ULL(BRANCHES_RETIRED_OVERCOUNT); + default: + return 0; + } +} + +u64 pmu_errata_mask; + +void kvm_init_pmu_errata(void) +{ + pmu_errata_mask = get_pmu_errata(); +} diff --git a/tools/testing/selftests/kvm/lib/x86/processor.c b/tools/testing/selftests/kvm/lib/x86/processor.c index bd5a802fa7a5..b51467d70f6e 100644 --- a/tools/testing/selftests/kvm/lib/x86/processor.c +++ b/tools/testing/selftests/kvm/lib/x86/processor.c @@ -6,8 +6,12 @@ #include "linux/bitmap.h" #include "test_util.h" #include "kvm_util.h" +#include "pmu.h" #include "processor.h" +#include "smm.h" +#include "svm_util.h" #include "sev.h" +#include "vmx.h" #ifndef NUM_INTERRUPTS #define NUM_INTERRUPTS 256 @@ -17,13 +21,48 @@ #define KERNEL_DS 0x10 #define KERNEL_TSS 0x18 -vm_vaddr_t exception_handlers; +gva_t exception_handlers; bool host_cpu_is_amd; bool host_cpu_is_intel; +bool host_cpu_is_hygon; +bool host_cpu_is_amd_compatible; bool is_forced_emulation_enabled; -uint64_t guest_tsc_khz; +u64 guest_tsc_khz; + +const char *ex_str(int vector) +{ + switch (vector) { +#define VEC_STR(v) case v##_VECTOR: return "#" #v + case DE_VECTOR: return "no exception"; + case KVM_MAGIC_DE_VECTOR: return "#DE"; + VEC_STR(DB); + VEC_STR(NMI); + VEC_STR(BP); + VEC_STR(OF); + VEC_STR(BR); + VEC_STR(UD); + VEC_STR(NM); + VEC_STR(DF); + VEC_STR(TS); + VEC_STR(NP); + VEC_STR(SS); + VEC_STR(GP); + VEC_STR(PF); + VEC_STR(MF); + VEC_STR(AC); + VEC_STR(MC); + VEC_STR(XM); + VEC_STR(VE); + VEC_STR(CP); + VEC_STR(HV); + VEC_STR(VC); + VEC_STR(SX); + default: return "#??"; +#undef VEC_STR + } +} -static void regs_dump(FILE *stream, struct kvm_regs *regs, uint8_t indent) +static void regs_dump(FILE *stream, struct kvm_regs *regs, u8 indent) { fprintf(stream, "%*srax: 0x%.16llx rbx: 0x%.16llx " "rcx: 0x%.16llx rdx: 0x%.16llx\n", @@ -47,7 +86,7 @@ static void regs_dump(FILE *stream, struct kvm_regs *regs, uint8_t indent) } static void segment_dump(FILE *stream, struct kvm_segment *segment, - uint8_t indent) + u8 indent) { fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.8x " "selector: 0x%.4x type: 0x%.2x\n", @@ -64,7 +103,7 @@ static void segment_dump(FILE *stream, struct kvm_segment *segment, } static void dtable_dump(FILE *stream, struct kvm_dtable *dtable, - uint8_t indent) + u8 indent) { fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.4x " "padding: 0x%.4x 0x%.4x 0x%.4x\n", @@ -72,7 +111,7 @@ static void dtable_dump(FILE *stream, struct kvm_dtable *dtable, dtable->padding[0], dtable->padding[1], dtable->padding[2]); } -static void sregs_dump(FILE *stream, struct kvm_sregs *sregs, uint8_t indent) +static void sregs_dump(FILE *stream, struct kvm_sregs *sregs, u8 indent) { unsigned int i; @@ -122,47 +161,83 @@ bool kvm_is_tdp_enabled(void) return get_kvm_amd_param_bool("npt"); } +static void virt_mmu_init(struct kvm_vm *vm, struct kvm_mmu *mmu, + struct pte_masks *pte_masks) +{ + /* If needed, create the top-level page table. */ + if (!mmu->pgd_created) { + mmu->pgd = vm_alloc_page_table(vm); + mmu->pgd_created = true; + mmu->arch.pte_masks = *pte_masks; + } + + TEST_ASSERT(mmu->pgtable_levels == 4 || mmu->pgtable_levels == 5, + "Selftests MMU only supports 4-level and 5-level paging, not %u-level paging", + mmu->pgtable_levels); +} + void virt_arch_pgd_alloc(struct kvm_vm *vm) { - TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use " - "unknown or unsupported guest mode, mode: 0x%x", vm->mode); + TEST_ASSERT(vm->mode == VM_MODE_PXXVYY_4K, + "Unknown or unsupported guest mode: 0x%x", vm->mode); + + struct pte_masks pte_masks = (struct pte_masks){ + .present = BIT_ULL(0), + .writable = BIT_ULL(1), + .user = BIT_ULL(2), + .accessed = BIT_ULL(5), + .dirty = BIT_ULL(6), + .huge = BIT_ULL(7), + .nx = BIT_ULL(63), + .executable = 0, + .c = vm->arch.c_bit, + .s = vm->arch.s_bit, + }; - /* If needed, create page map l4 table. */ - if (!vm->pgd_created) { - vm->pgd = vm_alloc_page_table(vm); - vm->pgd_created = true; - } + virt_mmu_init(vm, &vm->mmu, &pte_masks); } -static void *virt_get_pte(struct kvm_vm *vm, uint64_t *parent_pte, - uint64_t vaddr, int level) +void tdp_mmu_init(struct kvm_vm *vm, int pgtable_levels, + struct pte_masks *pte_masks) { - uint64_t pt_gpa = PTE_GET_PA(*parent_pte); - uint64_t *page_table = addr_gpa2hva(vm, pt_gpa); - int index = (vaddr >> PG_LEVEL_SHIFT(level)) & 0x1ffu; + TEST_ASSERT(!vm->stage2_mmu.pgtable_levels, "TDP MMU already initialized"); - TEST_ASSERT((*parent_pte & PTE_PRESENT_MASK) || parent_pte == &vm->pgd, + vm->stage2_mmu.pgtable_levels = pgtable_levels; + virt_mmu_init(vm, &vm->stage2_mmu, pte_masks); +} + +static void *virt_get_pte(struct kvm_vm *vm, struct kvm_mmu *mmu, + u64 *parent_pte, gva_t gva, int level) +{ + u64 pt_gpa = PTE_GET_PA(*parent_pte); + u64 *page_table = addr_gpa2hva(vm, pt_gpa); + int index = (gva >> PG_LEVEL_SHIFT(level)) & 0x1ffu; + + TEST_ASSERT((*parent_pte == mmu->pgd) || is_present_pte(mmu, parent_pte), "Parent PTE (level %d) not PRESENT for gva: 0x%08lx", - level + 1, vaddr); + level + 1, gva); return &page_table[index]; } -static uint64_t *virt_create_upper_pte(struct kvm_vm *vm, - uint64_t *parent_pte, - uint64_t vaddr, - uint64_t paddr, - int current_level, - int target_level) +static u64 *virt_create_upper_pte(struct kvm_vm *vm, + struct kvm_mmu *mmu, + u64 *parent_pte, + gva_t gva, + gpa_t gpa, + int current_level, + int target_level) { - uint64_t *pte = virt_get_pte(vm, parent_pte, vaddr, current_level); + u64 *pte = virt_get_pte(vm, mmu, parent_pte, gva, current_level); - paddr = vm_untag_gpa(vm, paddr); + gpa = vm_untag_gpa(vm, gpa); - if (!(*pte & PTE_PRESENT_MASK)) { - *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK; + if (!is_present_pte(mmu, pte)) { + *pte = PTE_PRESENT_MASK(mmu) | PTE_READABLE_MASK(mmu) | + PTE_WRITABLE_MASK(mmu) | PTE_EXECUTABLE_MASK(mmu) | + PTE_ALWAYS_SET_MASK(mmu); if (current_level == target_level) - *pte |= PTE_LARGE_MASK | (paddr & PHYSICAL_PAGE_MASK); + *pte |= PTE_HUGE_MASK(mmu) | (gpa & PHYSICAL_PAGE_MASK); else *pte |= vm_alloc_page_table(vm) & PHYSICAL_PAGE_MASK; } else { @@ -172,81 +247,84 @@ static uint64_t *virt_create_upper_pte(struct kvm_vm *vm, * this level. */ TEST_ASSERT(current_level != target_level, - "Cannot create hugepage at level: %u, vaddr: 0x%lx", - current_level, vaddr); - TEST_ASSERT(!(*pte & PTE_LARGE_MASK), - "Cannot create page table at level: %u, vaddr: 0x%lx", - current_level, vaddr); + "Cannot create hugepage at level: %u, gva: 0x%lx", + current_level, gva); + TEST_ASSERT(!is_huge_pte(mmu, pte), + "Cannot create page table at level: %u, gva: 0x%lx", + current_level, gva); } return pte; } -void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level) +void __virt_pg_map(struct kvm_vm *vm, struct kvm_mmu *mmu, gva_t gva, + gpa_t gpa, int level) { - const uint64_t pg_size = PG_LEVEL_SIZE(level); - uint64_t *pml4e, *pdpe, *pde; - uint64_t *pte; + const u64 pg_size = PG_LEVEL_SIZE(level); + u64 *pte = &mmu->pgd; + int current_level; - TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, - "Unknown or unsupported guest mode, mode: 0x%x", vm->mode); + TEST_ASSERT(vm->mode == VM_MODE_PXXVYY_4K, + "Unknown or unsupported guest mode: 0x%x", vm->mode); - TEST_ASSERT((vaddr % pg_size) == 0, + TEST_ASSERT((gva % pg_size) == 0, "Virtual address not aligned,\n" - "vaddr: 0x%lx page size: 0x%lx", vaddr, pg_size); - TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, (vaddr >> vm->page_shift)), - "Invalid virtual address, vaddr: 0x%lx", vaddr); - TEST_ASSERT((paddr % pg_size) == 0, + "gva: 0x%lx page size: 0x%lx", gva, pg_size); + TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, (gva >> vm->page_shift)), + "Invalid virtual address, gva: 0x%lx", gva); + TEST_ASSERT((gpa % pg_size) == 0, "Physical address not aligned,\n" - " paddr: 0x%lx page size: 0x%lx", paddr, pg_size); - TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn, + " gpa: 0x%lx page size: 0x%lx", gpa, pg_size); + TEST_ASSERT((gpa >> vm->page_shift) <= vm->max_gfn, "Physical address beyond maximum supported,\n" - " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", - paddr, vm->max_gfn, vm->page_size); - TEST_ASSERT(vm_untag_gpa(vm, paddr) == paddr, - "Unexpected bits in paddr: %lx", paddr); + " gpa: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", + gpa, vm->max_gfn, vm->page_size); + TEST_ASSERT(vm_untag_gpa(vm, gpa) == gpa, + "Unexpected bits in gpa: %lx", gpa); + + TEST_ASSERT(!PTE_EXECUTABLE_MASK(mmu) || !PTE_NX_MASK(mmu), + "X and NX bit masks cannot be used simultaneously"); /* * Allocate upper level page tables, if not already present. Return * early if a hugepage was created. */ - pml4e = virt_create_upper_pte(vm, &vm->pgd, vaddr, paddr, PG_LEVEL_512G, level); - if (*pml4e & PTE_LARGE_MASK) - return; - - pdpe = virt_create_upper_pte(vm, pml4e, vaddr, paddr, PG_LEVEL_1G, level); - if (*pdpe & PTE_LARGE_MASK) - return; - - pde = virt_create_upper_pte(vm, pdpe, vaddr, paddr, PG_LEVEL_2M, level); - if (*pde & PTE_LARGE_MASK) - return; + for (current_level = mmu->pgtable_levels; + current_level > PG_LEVEL_4K; + current_level--) { + pte = virt_create_upper_pte(vm, mmu, pte, gva, gpa, + current_level, level); + if (is_huge_pte(mmu, pte)) + return; + } /* Fill in page table entry. */ - pte = virt_get_pte(vm, pde, vaddr, PG_LEVEL_4K); - TEST_ASSERT(!(*pte & PTE_PRESENT_MASK), - "PTE already present for 4k page at vaddr: 0x%lx", vaddr); - *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK | (paddr & PHYSICAL_PAGE_MASK); + pte = virt_get_pte(vm, mmu, pte, gva, PG_LEVEL_4K); + TEST_ASSERT(!is_present_pte(mmu, pte), + "PTE already present for 4k page at gva: 0x%lx", gva); + *pte = PTE_PRESENT_MASK(mmu) | PTE_READABLE_MASK(mmu) | + PTE_WRITABLE_MASK(mmu) | PTE_EXECUTABLE_MASK(mmu) | + PTE_ALWAYS_SET_MASK(mmu) | (gpa & PHYSICAL_PAGE_MASK); /* * Neither SEV nor TDX supports shared page tables, so only the final * leaf PTE needs manually set the C/S-bit. */ - if (vm_is_gpa_protected(vm, paddr)) - *pte |= vm->arch.c_bit; + if (vm_is_gpa_protected(vm, gpa)) + *pte |= PTE_C_BIT_MASK(mmu); else - *pte |= vm->arch.s_bit; + *pte |= PTE_S_BIT_MASK(mmu); } -void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) +void virt_arch_pg_map(struct kvm_vm *vm, gva_t gva, gpa_t gpa) { - __virt_pg_map(vm, vaddr, paddr, PG_LEVEL_4K); + __virt_pg_map(vm, &vm->mmu, gva, gpa, PG_LEVEL_4K); } -void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, - uint64_t nr_bytes, int level) +void virt_map_level(struct kvm_vm *vm, gva_t gva, gpa_t gpa, + u64 nr_bytes, int level) { - uint64_t pg_size = PG_LEVEL_SIZE(level); - uint64_t nr_pages = nr_bytes / pg_size; + u64 pg_size = PG_LEVEL_SIZE(level); + u64 nr_pages = nr_bytes / pg_size; int i; TEST_ASSERT(nr_bytes % pg_size == 0, @@ -254,16 +332,19 @@ void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, nr_bytes, pg_size); for (i = 0; i < nr_pages; i++) { - __virt_pg_map(vm, vaddr, paddr, level); + __virt_pg_map(vm, &vm->mmu, gva, gpa, level); + sparsebit_set_num(vm->vpages_mapped, gva >> vm->page_shift, + nr_bytes / PAGE_SIZE); - vaddr += pg_size; - paddr += pg_size; + gva += pg_size; + gpa += pg_size; } } -static bool vm_is_target_pte(uint64_t *pte, int *level, int current_level) +static bool vm_is_target_pte(struct kvm_mmu *mmu, u64 *pte, + int *level, int current_level) { - if (*pte & PTE_LARGE_MASK) { + if (is_huge_pte(mmu, pte)) { TEST_ASSERT(*level == PG_LEVEL_NONE || *level == current_level, "Unexpected hugepage at level %d", current_level); @@ -273,60 +354,65 @@ static bool vm_is_target_pte(uint64_t *pte, int *level, int current_level) return *level == current_level; } -uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr, - int *level) +static u64 *__vm_get_page_table_entry(struct kvm_vm *vm, + struct kvm_mmu *mmu, + gva_t gva, + int *level) { - uint64_t *pml4e, *pdpe, *pde; + int va_width = 12 + (mmu->pgtable_levels) * 9; + u64 *pte = &mmu->pgd; + int current_level; TEST_ASSERT(!vm->arch.is_pt_protected, "Walking page tables of protected guests is impossible"); - TEST_ASSERT(*level >= PG_LEVEL_NONE && *level < PG_LEVEL_NUM, + TEST_ASSERT(*level >= PG_LEVEL_NONE && *level <= mmu->pgtable_levels, "Invalid PG_LEVEL_* '%d'", *level); - TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use " - "unknown or unsupported guest mode, mode: 0x%x", vm->mode); - TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, - (vaddr >> vm->page_shift)), - "Invalid virtual address, vaddr: 0x%lx", - vaddr); + TEST_ASSERT(vm->mode == VM_MODE_PXXVYY_4K, + "Unknown or unsupported guest mode: 0x%x", vm->mode); + TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, (gva >> vm->page_shift)), + "Invalid virtual address, gva: 0x%lx", gva); /* - * Based on the mode check above there are 48 bits in the vaddr, so - * shift 16 to sign extend the last bit (bit-47), + * Check that the gva is a sign-extended va_width value. */ - TEST_ASSERT(vaddr == (((int64_t)vaddr << 16) >> 16), - "Canonical check failed. The virtual address is invalid."); - - pml4e = virt_get_pte(vm, &vm->pgd, vaddr, PG_LEVEL_512G); - if (vm_is_target_pte(pml4e, level, PG_LEVEL_512G)) - return pml4e; + TEST_ASSERT(gva == (((s64)gva << (64 - va_width) >> (64 - va_width))), + "Canonical check failed. The virtual address is invalid."); + + for (current_level = mmu->pgtable_levels; + current_level > PG_LEVEL_4K; + current_level--) { + pte = virt_get_pte(vm, mmu, pte, gva, current_level); + if (vm_is_target_pte(mmu, pte, level, current_level)) + return pte; + } - pdpe = virt_get_pte(vm, pml4e, vaddr, PG_LEVEL_1G); - if (vm_is_target_pte(pdpe, level, PG_LEVEL_1G)) - return pdpe; + return virt_get_pte(vm, mmu, pte, gva, PG_LEVEL_4K); +} - pde = virt_get_pte(vm, pdpe, vaddr, PG_LEVEL_2M); - if (vm_is_target_pte(pde, level, PG_LEVEL_2M)) - return pde; +u64 *tdp_get_pte(struct kvm_vm *vm, u64 l2_gpa) +{ + int level = PG_LEVEL_4K; - return virt_get_pte(vm, pde, vaddr, PG_LEVEL_4K); + return __vm_get_page_table_entry(vm, &vm->stage2_mmu, l2_gpa, &level); } -uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr) +u64 *vm_get_pte(struct kvm_vm *vm, gva_t gva) { int level = PG_LEVEL_4K; - return __vm_get_page_table_entry(vm, vaddr, &level); + return __vm_get_page_table_entry(vm, &vm->mmu, gva, &level); } -void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) +void virt_arch_dump(FILE *stream, struct kvm_vm *vm, u8 indent) { - uint64_t *pml4e, *pml4e_start; - uint64_t *pdpe, *pdpe_start; - uint64_t *pde, *pde_start; - uint64_t *pte, *pte_start; + struct kvm_mmu *mmu = &vm->mmu; + u64 *pml4e, *pml4e_start; + u64 *pdpe, *pdpe_start; + u64 *pde, *pde_start; + u64 *pte, *pte_start; - if (!vm->pgd_created) + if (!mmu->pgd_created) return; fprintf(stream, "%*s " @@ -334,47 +420,47 @@ void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) fprintf(stream, "%*s index hvaddr gpaddr " "addr w exec dirty\n", indent, ""); - pml4e_start = (uint64_t *) addr_gpa2hva(vm, vm->pgd); - for (uint16_t n1 = 0; n1 <= 0x1ffu; n1++) { + pml4e_start = (u64 *)addr_gpa2hva(vm, mmu->pgd); + for (u16 n1 = 0; n1 <= 0x1ffu; n1++) { pml4e = &pml4e_start[n1]; - if (!(*pml4e & PTE_PRESENT_MASK)) + if (!is_present_pte(mmu, pml4e)) continue; fprintf(stream, "%*spml4e 0x%-3zx %p 0x%-12lx 0x%-10llx %u " " %u\n", indent, "", pml4e - pml4e_start, pml4e, addr_hva2gpa(vm, pml4e), PTE_GET_PFN(*pml4e), - !!(*pml4e & PTE_WRITABLE_MASK), !!(*pml4e & PTE_NX_MASK)); + is_writable_pte(mmu, pml4e), is_nx_pte(mmu, pml4e)); pdpe_start = addr_gpa2hva(vm, *pml4e & PHYSICAL_PAGE_MASK); - for (uint16_t n2 = 0; n2 <= 0x1ffu; n2++) { + for (u16 n2 = 0; n2 <= 0x1ffu; n2++) { pdpe = &pdpe_start[n2]; - if (!(*pdpe & PTE_PRESENT_MASK)) + if (!is_present_pte(mmu, pdpe)) continue; fprintf(stream, "%*spdpe 0x%-3zx %p 0x%-12lx 0x%-10llx " "%u %u\n", indent, "", pdpe - pdpe_start, pdpe, addr_hva2gpa(vm, pdpe), - PTE_GET_PFN(*pdpe), !!(*pdpe & PTE_WRITABLE_MASK), - !!(*pdpe & PTE_NX_MASK)); + PTE_GET_PFN(*pdpe), is_writable_pte(mmu, pdpe), + is_nx_pte(mmu, pdpe)); pde_start = addr_gpa2hva(vm, *pdpe & PHYSICAL_PAGE_MASK); - for (uint16_t n3 = 0; n3 <= 0x1ffu; n3++) { + for (u16 n3 = 0; n3 <= 0x1ffu; n3++) { pde = &pde_start[n3]; - if (!(*pde & PTE_PRESENT_MASK)) + if (!is_present_pte(mmu, pde)) continue; fprintf(stream, "%*spde 0x%-3zx %p " "0x%-12lx 0x%-10llx %u %u\n", indent, "", pde - pde_start, pde, addr_hva2gpa(vm, pde), - PTE_GET_PFN(*pde), !!(*pde & PTE_WRITABLE_MASK), - !!(*pde & PTE_NX_MASK)); + PTE_GET_PFN(*pde), is_writable_pte(mmu, pde), + is_nx_pte(mmu, pde)); pte_start = addr_gpa2hva(vm, *pde & PHYSICAL_PAGE_MASK); - for (uint16_t n4 = 0; n4 <= 0x1ffu; n4++) { + for (u16 n4 = 0; n4 <= 0x1ffu; n4++) { pte = &pte_start[n4]; - if (!(*pte & PTE_PRESENT_MASK)) + if (!is_present_pte(mmu, pte)) continue; fprintf(stream, "%*spte 0x%-3zx %p " "0x%-12lx 0x%-10llx %u %u " @@ -383,19 +469,83 @@ void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) pte - pte_start, pte, addr_hva2gpa(vm, pte), PTE_GET_PFN(*pte), - !!(*pte & PTE_WRITABLE_MASK), - !!(*pte & PTE_NX_MASK), - !!(*pte & PTE_DIRTY_MASK), - ((uint64_t) n1 << 27) - | ((uint64_t) n2 << 18) - | ((uint64_t) n3 << 9) - | ((uint64_t) n4)); + is_writable_pte(mmu, pte), + is_nx_pte(mmu, pte), + is_dirty_pte(mmu, pte), + ((u64)n1 << 27) + | ((u64)n2 << 18) + | ((u64)n3 << 9) + | ((u64)n4)); } } } } } +void vm_enable_tdp(struct kvm_vm *vm) +{ + if (kvm_cpu_has(X86_FEATURE_VMX)) + vm_enable_ept(vm); + else + vm_enable_npt(vm); +} + +bool kvm_cpu_has_tdp(void) +{ + return kvm_cpu_has_ept() || kvm_cpu_has_npt(); +} + +void __tdp_map(struct kvm_vm *vm, gpa_t l2_gpa, gpa_t gpa, u64 size, int level) +{ + size_t page_size = PG_LEVEL_SIZE(level); + size_t npages = size / page_size; + + TEST_ASSERT(l2_gpa + size > l2_gpa, "L2 GPA overflow"); + TEST_ASSERT(gpa + size > gpa, "GPA overflow"); + + while (npages--) { + __virt_pg_map(vm, &vm->stage2_mmu, l2_gpa, gpa, level); + l2_gpa += page_size; + gpa += page_size; + } +} + +void tdp_map(struct kvm_vm *vm, gpa_t l2_gpa, gpa_t gpa, u64 size) +{ + __tdp_map(vm, l2_gpa, gpa, size, PG_LEVEL_4K); +} + +/* Prepare an identity extended page table that maps all the + * physical pages in VM. + */ +void tdp_identity_map_default_memslots(struct kvm_vm *vm) +{ + u32 s, memslot = 0; + sparsebit_idx_t i, last; + struct userspace_mem_region *region = memslot2region(vm, memslot); + + /* Only memslot 0 is mapped here, ensure it's the only one being used */ + for (s = 0; s < NR_MEM_REGIONS; s++) + TEST_ASSERT_EQ(vm->memslots[s], 0); + + i = (region->region.guest_phys_addr >> vm->page_shift) - 1; + last = i + (region->region.memory_size >> vm->page_shift); + for (;;) { + i = sparsebit_next_clear(region->unused_phy_pages, i); + if (i > last) + break; + + tdp_map(vm, (u64)i << vm->page_shift, + (u64)i << vm->page_shift, 1 << vm->page_shift); + } +} + +/* Identity map a region with 1GiB Pages. */ +void tdp_identity_map_1g(struct kvm_vm *vm, u64 addr, u64 size) +{ + __tdp_map(vm, addr, addr, size, PG_LEVEL_1G); +} + /* * Set Unusable Segment * @@ -463,12 +613,12 @@ static void kvm_seg_set_kernel_data_64bit(struct kvm_segment *segp) segp->present = true; } -vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) +gpa_t addr_arch_gva2gpa(struct kvm_vm *vm, gva_t gva) { int level = PG_LEVEL_NONE; - uint64_t *pte = __vm_get_page_table_entry(vm, gva, &level); + u64 *pte = __vm_get_page_table_entry(vm, &vm->mmu, gva, &level); - TEST_ASSERT(*pte & PTE_PRESENT_MASK, + TEST_ASSERT(is_present_pte(&vm->mmu, pte), "Leaf PTE not PRESENT for gva: 0x%08lx", gva); /* @@ -478,7 +628,7 @@ vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) return vm_untag_gpa(vm, PTE_GET_PA(*pte)) | (gva & ~HUGEPAGE_MASK(level)); } -static void kvm_seg_set_tss_64bit(vm_vaddr_t base, struct kvm_segment *segp) +static void kvm_seg_set_tss_64bit(gva_t base, struct kvm_segment *segp) { memset(segp, 0, sizeof(*segp)); segp->base = base; @@ -492,7 +642,8 @@ static void vcpu_init_sregs(struct kvm_vm *vm, struct kvm_vcpu *vcpu) { struct kvm_sregs sregs; - TEST_ASSERT_EQ(vm->mode, VM_MODE_PXXV48_4K); + TEST_ASSERT(vm->mode == VM_MODE_PXXVYY_4K, + "Unknown or unsupported guest mode: 0x%x", vm->mode); /* Set mode specific system register values. */ vcpu_sregs_get(vcpu, &sregs); @@ -506,6 +657,8 @@ static void vcpu_init_sregs(struct kvm_vm *vm, struct kvm_vcpu *vcpu) sregs.cr4 |= X86_CR4_PAE | X86_CR4_OSFXSR; if (kvm_cpu_has(X86_FEATURE_XSAVE)) sregs.cr4 |= X86_CR4_OSXSAVE; + if (vm->mmu.pgtable_levels == 5) + sregs.cr4 |= X86_CR4_LA57; sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX); kvm_seg_set_unusable(&sregs.ldt); @@ -515,7 +668,7 @@ static void vcpu_init_sregs(struct kvm_vm *vm, struct kvm_vcpu *vcpu) kvm_seg_set_kernel_data_64bit(&sregs.gs); kvm_seg_set_tss_64bit(vm->arch.tss, &sregs.tr); - sregs.cr3 = vm->pgd; + sregs.cr3 = vm->mmu.pgd; vcpu_sregs_set(vcpu, &sregs); } @@ -557,7 +710,7 @@ static bool kvm_fixup_exception(struct ex_regs *regs) return false; if (regs->vector == DE_VECTOR) - return false; + regs->vector = KVM_MAGIC_DE_VECTOR; regs->rip = regs->r11; regs->r9 = regs->vector; @@ -588,16 +741,16 @@ static void vm_init_descriptor_tables(struct kvm_vm *vm) struct kvm_segment seg; int i; - vm->arch.gdt = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA); - vm->arch.idt = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA); - vm->handlers = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA); - vm->arch.tss = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA); + vm->arch.gdt = __vm_alloc_page(vm, MEM_REGION_DATA); + vm->arch.idt = __vm_alloc_page(vm, MEM_REGION_DATA); + vm->handlers = __vm_alloc_page(vm, MEM_REGION_DATA); + vm->arch.tss = __vm_alloc_page(vm, MEM_REGION_DATA); /* Handlers have the same address in both address spaces.*/ for (i = 0; i < NUM_INTERRUPTS; i++) set_idt_entry(vm, i, (unsigned long)(&idt_handlers)[i], 0, KERNEL_CS); - *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers; + *(gva_t *)addr_gva2hva(vm, (gva_t)(&exception_handlers)) = vm->handlers; kvm_seg_set_kernel_code_64bit(&seg); kvm_seg_fill_gdt_64bit(vm, &seg); @@ -612,9 +765,9 @@ static void vm_init_descriptor_tables(struct kvm_vm *vm) void vm_install_exception_handler(struct kvm_vm *vm, int vector, void (*handler)(struct ex_regs *)) { - vm_vaddr_t *handlers = (vm_vaddr_t *)addr_gva2hva(vm, vm->handlers); + gva_t *handlers = (gva_t *)addr_gva2hva(vm, vm->handlers); - handlers[vector] = (vm_vaddr_t)handler; + handlers[vector] = (gva_t)handler; } void assert_on_unhandled_exception(struct kvm_vcpu *vcpu) @@ -625,7 +778,7 @@ void assert_on_unhandled_exception(struct kvm_vcpu *vcpu) REPORT_GUEST_ASSERT(uc); } -void kvm_arch_vm_post_create(struct kvm_vm *vm) +void kvm_arch_vm_post_create(struct kvm_vm *vm, unsigned int nr_vcpus) { int r; @@ -637,9 +790,12 @@ void kvm_arch_vm_post_create(struct kvm_vm *vm) sync_global_to_guest(vm, host_cpu_is_intel); sync_global_to_guest(vm, host_cpu_is_amd); + sync_global_to_guest(vm, host_cpu_is_hygon); + sync_global_to_guest(vm, host_cpu_is_amd_compatible); sync_global_to_guest(vm, is_forced_emulation_enabled); + sync_global_to_guest(vm, pmu_errata_mask); - if (vm->type == KVM_X86_SEV_VM || vm->type == KVM_X86_SEV_ES_VM) { + if (is_sev_vm(vm)) { struct kvm_sev_init init = { 0 }; vm_sev_ioctl(vm, KVM_SEV_INIT2, &init); @@ -660,18 +816,17 @@ void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code) vcpu_regs_set(vcpu, ®s); } -struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) +struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, u32 vcpu_id) { struct kvm_mp_state mp_state; struct kvm_regs regs; - vm_vaddr_t stack_vaddr; + gva_t stack_gva; struct kvm_vcpu *vcpu; - stack_vaddr = __vm_vaddr_alloc(vm, DEFAULT_STACK_PGS * getpagesize(), - DEFAULT_GUEST_STACK_VADDR_MIN, - MEM_REGION_DATA); + stack_gva = __vm_alloc(vm, DEFAULT_STACK_PGS * getpagesize(), + DEFAULT_GUEST_STACK_VADDR_MIN, MEM_REGION_DATA); - stack_vaddr += DEFAULT_STACK_PGS * getpagesize(); + stack_gva += DEFAULT_STACK_PGS * getpagesize(); /* * Align stack to match calling sequence requirements in section "The @@ -682,9 +837,9 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) * If this code is ever used to launch a vCPU with 32-bit entry point it * may need to subtract 4 bytes instead of 8 bytes. */ - TEST_ASSERT(IS_ALIGNED(stack_vaddr, PAGE_SIZE), - "__vm_vaddr_alloc() did not provide a page-aligned address"); - stack_vaddr -= 8; + TEST_ASSERT(IS_ALIGNED(stack_gva, PAGE_SIZE), + "__vm_alloc() did not provide a page-aligned address"); + stack_gva -= 8; vcpu = __vm_vcpu_add(vm, vcpu_id); vcpu_init_cpuid(vcpu, kvm_get_supported_cpuid()); @@ -694,7 +849,7 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) /* Setup guest general purpose registers */ vcpu_regs_get(vcpu, ®s); regs.rflags = regs.rflags | 0x2; - regs.rsp = stack_vaddr; + regs.rsp = stack_gva; vcpu_regs_set(vcpu, ®s); /* Setup the MP state */ @@ -711,7 +866,7 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) return vcpu; } -struct kvm_vcpu *vm_arch_vcpu_recreate(struct kvm_vm *vm, uint32_t vcpu_id) +struct kvm_vcpu *vm_arch_vcpu_recreate(struct kvm_vm *vm, u32 vcpu_id) { struct kvm_vcpu *vcpu = __vm_vcpu_add(vm, vcpu_id); @@ -746,9 +901,9 @@ const struct kvm_cpuid2 *kvm_get_supported_cpuid(void) return kvm_supported_cpuid; } -static uint32_t __kvm_cpu_has(const struct kvm_cpuid2 *cpuid, - uint32_t function, uint32_t index, - uint8_t reg, uint8_t lo, uint8_t hi) +static u32 __kvm_cpu_has(const struct kvm_cpuid2 *cpuid, + u32 function, u32 index, + u8 reg, u8 lo, u8 hi) { const struct kvm_cpuid_entry2 *entry; int i; @@ -775,14 +930,14 @@ bool kvm_cpuid_has(const struct kvm_cpuid2 *cpuid, feature.reg, feature.bit, feature.bit); } -uint32_t kvm_cpuid_property(const struct kvm_cpuid2 *cpuid, - struct kvm_x86_cpu_property property) +u32 kvm_cpuid_property(const struct kvm_cpuid2 *cpuid, + struct kvm_x86_cpu_property property) { return __kvm_cpu_has(cpuid, property.function, property.index, property.reg, property.lo_bit, property.hi_bit); } -uint64_t kvm_get_feature_msr(uint64_t msr_index) +u64 kvm_get_feature_msr(u64 msr_index) { struct { struct kvm_msrs header; @@ -801,7 +956,7 @@ uint64_t kvm_get_feature_msr(uint64_t msr_index) return buffer.entry.data; } -void __vm_xsave_require_permission(uint64_t xfeature, const char *name) +void __vm_xsave_require_permission(u64 xfeature, const char *name) { int kvm_fd; u64 bitmask; @@ -858,7 +1013,7 @@ void vcpu_init_cpuid(struct kvm_vcpu *vcpu, const struct kvm_cpuid2 *cpuid) void vcpu_set_cpuid_property(struct kvm_vcpu *vcpu, struct kvm_x86_cpu_property property, - uint32_t value) + u32 value) { struct kvm_cpuid_entry2 *entry; @@ -873,7 +1028,7 @@ void vcpu_set_cpuid_property(struct kvm_vcpu *vcpu, TEST_ASSERT_EQ(kvm_cpuid_property(vcpu->cpuid, property), value); } -void vcpu_clear_cpuid_entry(struct kvm_vcpu *vcpu, uint32_t function) +void vcpu_clear_cpuid_entry(struct kvm_vcpu *vcpu, u32 function) { struct kvm_cpuid_entry2 *entry = vcpu_get_cpuid_entry(vcpu, function); @@ -902,7 +1057,7 @@ void vcpu_set_or_clear_cpuid_feature(struct kvm_vcpu *vcpu, vcpu_set_cpuid(vcpu); } -uint64_t vcpu_get_msr(struct kvm_vcpu *vcpu, uint64_t msr_index) +u64 vcpu_get_msr(struct kvm_vcpu *vcpu, u64 msr_index) { struct { struct kvm_msrs header; @@ -917,7 +1072,7 @@ uint64_t vcpu_get_msr(struct kvm_vcpu *vcpu, uint64_t msr_index) return buffer.entry.data; } -int _vcpu_set_msr(struct kvm_vcpu *vcpu, uint64_t msr_index, uint64_t msr_value) +int _vcpu_set_msr(struct kvm_vcpu *vcpu, u64 msr_index, u64 msr_value) { struct { struct kvm_msrs header; @@ -945,28 +1100,28 @@ void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...) vcpu_regs_get(vcpu, ®s); if (num >= 1) - regs.rdi = va_arg(ap, uint64_t); + regs.rdi = va_arg(ap, u64); if (num >= 2) - regs.rsi = va_arg(ap, uint64_t); + regs.rsi = va_arg(ap, u64); if (num >= 3) - regs.rdx = va_arg(ap, uint64_t); + regs.rdx = va_arg(ap, u64); if (num >= 4) - regs.rcx = va_arg(ap, uint64_t); + regs.rcx = va_arg(ap, u64); if (num >= 5) - regs.r8 = va_arg(ap, uint64_t); + regs.r8 = va_arg(ap, u64); if (num >= 6) - regs.r9 = va_arg(ap, uint64_t); + regs.r9 = va_arg(ap, u64); vcpu_regs_set(vcpu, ®s); va_end(ap); } -void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent) +void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, u8 indent) { struct kvm_regs regs; struct kvm_sregs sregs; @@ -1035,7 +1190,7 @@ const struct kvm_msr_list *kvm_get_feature_msr_index_list(void) return list; } -bool kvm_msr_is_in_save_restore_list(uint32_t msr_index) +bool kvm_msr_is_in_save_restore_list(u32 msr_index) { const struct kvm_msr_list *list = kvm_get_msr_index_list(); int i; @@ -1156,7 +1311,7 @@ void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits) void kvm_init_vm_address_properties(struct kvm_vm *vm) { - if (vm->type == KVM_X86_SEV_VM || vm->type == KVM_X86_SEV_ES_VM) { + if (is_sev_vm(vm)) { vm->arch.sev_fd = open_sev_dev_path_or_exit(); vm->arch.c_bit = BIT_ULL(this_cpu_property(X86_PROPERTY_SEV_C_BIT)); vm->gpa_tag_mask = vm->arch.c_bit; @@ -1166,7 +1321,7 @@ void kvm_init_vm_address_properties(struct kvm_vm *vm) } const struct kvm_cpuid_entry2 *get_cpuid_entry(const struct kvm_cpuid2 *cpuid, - uint32_t function, uint32_t index) + u32 function, u32 index) { int i; @@ -1183,7 +1338,7 @@ const struct kvm_cpuid_entry2 *get_cpuid_entry(const struct kvm_cpuid2 *cpuid, #define X86_HYPERCALL(inputs...) \ ({ \ - uint64_t r; \ + u64 r; \ \ asm volatile("test %[use_vmmcall], %[use_vmmcall]\n\t" \ "jnz 1f\n\t" \ @@ -1192,23 +1347,23 @@ const struct kvm_cpuid_entry2 *get_cpuid_entry(const struct kvm_cpuid2 *cpuid, "1: vmmcall\n\t" \ "2:" \ : "=a"(r) \ - : [use_vmmcall] "r" (host_cpu_is_amd), inputs); \ + : [use_vmmcall] "r" (host_cpu_is_amd_compatible), \ + inputs); \ \ r; \ }) -uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2, - uint64_t a3) +u64 kvm_hypercall(u64 nr, u64 a0, u64 a1, u64 a2, u64 a3) { return X86_HYPERCALL("a"(nr), "b"(a0), "c"(a1), "d"(a2), "S"(a3)); } -uint64_t __xen_hypercall(uint64_t nr, uint64_t a0, void *a1) +u64 __xen_hypercall(u64 nr, u64 a0, void *a1) { return X86_HYPERCALL("a"(nr), "D"(a0), "S"(a1)); } -void xen_hypercall(uint64_t nr, uint64_t a0, void *a1) +void xen_hypercall(u64 nr, u64 a0, void *a1) { GUEST_ASSERT(!__xen_hypercall(nr, a0, a1)); } @@ -1217,7 +1372,7 @@ unsigned long vm_compute_max_gfn(struct kvm_vm *vm) { const unsigned long num_ht_pages = 12 << (30 - vm->page_shift); /* 12 GiB */ unsigned long ht_gfn, max_gfn, max_pfn; - uint8_t maxphyaddr, guest_maxphyaddr; + u8 maxphyaddr, guest_maxphyaddr; /* * Use "guest MAXPHYADDR" from KVM if it's available. Guest MAXPHYADDR @@ -1232,8 +1387,8 @@ unsigned long vm_compute_max_gfn(struct kvm_vm *vm) max_gfn = (1ULL << (guest_maxphyaddr - vm->page_shift)) - 1; - /* Avoid reserved HyperTransport region on AMD processors. */ - if (!host_cpu_is_amd) + /* Avoid reserved HyperTransport region on AMD or Hygon processors. */ + if (!host_cpu_is_amd_compatible) return max_gfn; /* On parts with <40 physical address bits, the area is fully hidden */ @@ -1247,7 +1402,7 @@ unsigned long vm_compute_max_gfn(struct kvm_vm *vm) /* * Otherwise it's at the top of the physical address space, possibly - * reduced due to SME by bits 11:6 of CPUID[0x8000001f].EBX. Use + * reduced due to SME or CSV by bits 11:6 of CPUID[0x8000001f].EBX. Use * the old conservative value if MAXPHYADDR is not enumerated. */ if (!this_cpu_has_p(X86_PROPERTY_MAX_PHY_ADDR)) @@ -1264,21 +1419,15 @@ done: return min(max_gfn, ht_gfn - 1); } -/* Returns true if kvm_intel was loaded with unrestricted_guest=1. */ -bool vm_is_unrestricted_guest(struct kvm_vm *vm) -{ - /* Ensure that a KVM vendor-specific module is loaded. */ - if (vm == NULL) - close(open_kvm_dev_path_or_exit()); - - return get_kvm_intel_param_bool("unrestricted_guest"); -} - void kvm_selftest_arch_init(void) { host_cpu_is_intel = this_cpu_is_intel(); host_cpu_is_amd = this_cpu_is_amd(); + host_cpu_is_hygon = this_cpu_is_hygon(); + host_cpu_is_amd_compatible = host_cpu_is_amd || host_cpu_is_hygon; is_forced_emulation_enabled = kvm_is_forced_emulation_enabled(); + + kvm_init_pmu_errata(); } bool sys_clocksource_is_based_on_tsc(void) @@ -1291,3 +1440,32 @@ bool sys_clocksource_is_based_on_tsc(void) return ret; } + +bool kvm_arch_has_default_irqchip(void) +{ + return true; +} + +void setup_smram(struct kvm_vm *vm, struct kvm_vcpu *vcpu, u64 smram_gpa, + const void *smi_handler, size_t handler_size) +{ + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, smram_gpa, + SMRAM_MEMSLOT, SMRAM_PAGES, 0); + TEST_ASSERT(vm_phy_pages_alloc(vm, SMRAM_PAGES, smram_gpa, + SMRAM_MEMSLOT) == smram_gpa, + "Could not allocate guest physical addresses for SMRAM"); + + memset(addr_gpa2hva(vm, smram_gpa), 0x0, SMRAM_SIZE); + memcpy(addr_gpa2hva(vm, smram_gpa) + 0x8000, smi_handler, handler_size); + vcpu_set_msr(vcpu, MSR_IA32_SMBASE, smram_gpa); +} + +void inject_smi(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_events events; + + vcpu_events_get(vcpu, &events); + events.smi.pending = 1; + events.flags |= KVM_VCPUEVENT_VALID_SMM; + vcpu_events_set(vcpu, &events); +} diff --git a/tools/testing/selftests/kvm/lib/x86/sev.c b/tools/testing/selftests/kvm/lib/x86/sev.c index e9535ee20b7f..93f916903461 100644 --- a/tools/testing/selftests/kvm/lib/x86/sev.c +++ b/tools/testing/selftests/kvm/lib/x86/sev.c @@ -14,35 +14,46 @@ * and find the first range, but that's correct because the condition * expression would cause us to quit the loop. */ -static void encrypt_region(struct kvm_vm *vm, struct userspace_mem_region *region) +static void encrypt_region(struct kvm_vm *vm, struct userspace_mem_region *region, + u8 page_type, bool private) { const struct sparsebit *protected_phy_pages = region->protected_phy_pages; - const vm_paddr_t gpa_base = region->region.guest_phys_addr; + const gpa_t gpa_base = region->region.guest_phys_addr; const sparsebit_idx_t lowest_page_in_region = gpa_base >> vm->page_shift; sparsebit_idx_t i, j; if (!sparsebit_any_set(protected_phy_pages)) return; - sev_register_encrypted_memory(vm, region); + if (!is_sev_snp_vm(vm)) + sev_register_encrypted_memory(vm, region); sparsebit_for_each_set_range(protected_phy_pages, i, j) { - const uint64_t size = (j - i + 1) * vm->page_size; - const uint64_t offset = (i - lowest_page_in_region) * vm->page_size; + const u64 size = (j - i + 1) * vm->page_size; + const u64 offset = (i - lowest_page_in_region) * vm->page_size; + + if (private) + vm_mem_set_private(vm, gpa_base + offset, size); + + if (is_sev_snp_vm(vm)) + snp_launch_update_data(vm, gpa_base + offset, + (u64)addr_gpa2hva(vm, gpa_base + offset), + size, page_type); + else + sev_launch_update_data(vm, gpa_base + offset, size); - sev_launch_update_data(vm, gpa_base + offset, size); } } void sev_vm_init(struct kvm_vm *vm) { if (vm->type == KVM_X86_DEFAULT_VM) { - assert(vm->arch.sev_fd == -1); + TEST_ASSERT_EQ(vm->arch.sev_fd, -1); vm->arch.sev_fd = open_sev_dev_path_or_exit(); vm_sev_ioctl(vm, KVM_SEV_INIT, NULL); } else { struct kvm_sev_init init = { 0 }; - assert(vm->type == KVM_X86_SEV_VM); + TEST_ASSERT_EQ(vm->type, KVM_X86_SEV_VM); vm_sev_ioctl(vm, KVM_SEV_INIT2, &init); } } @@ -50,17 +61,25 @@ void sev_vm_init(struct kvm_vm *vm) void sev_es_vm_init(struct kvm_vm *vm) { if (vm->type == KVM_X86_DEFAULT_VM) { - assert(vm->arch.sev_fd == -1); + TEST_ASSERT_EQ(vm->arch.sev_fd, -1); vm->arch.sev_fd = open_sev_dev_path_or_exit(); vm_sev_ioctl(vm, KVM_SEV_ES_INIT, NULL); } else { struct kvm_sev_init init = { 0 }; - assert(vm->type == KVM_X86_SEV_ES_VM); + TEST_ASSERT_EQ(vm->type, KVM_X86_SEV_ES_VM); vm_sev_ioctl(vm, KVM_SEV_INIT2, &init); } } -void sev_vm_launch(struct kvm_vm *vm, uint32_t policy) +void snp_vm_init(struct kvm_vm *vm) +{ + struct kvm_sev_init init = { 0 }; + + TEST_ASSERT_EQ(vm->type, KVM_X86_SNP_VM); + vm_sev_ioctl(vm, KVM_SEV_INIT2, &init); +} + +void sev_vm_launch(struct kvm_vm *vm, u32 policy) { struct kvm_sev_launch_start launch_start = { .policy = policy, @@ -76,7 +95,7 @@ void sev_vm_launch(struct kvm_vm *vm, uint32_t policy) TEST_ASSERT_EQ(status.state, SEV_GUEST_STATE_LAUNCH_UPDATE); hash_for_each(vm->regions.slot_hash, ctr, region, slot_node) - encrypt_region(vm, region); + encrypt_region(vm, region, KVM_SEV_PAGE_TYPE_INVALID, false); if (policy & SEV_POLICY_ES) vm_sev_ioctl(vm, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL); @@ -84,7 +103,7 @@ void sev_vm_launch(struct kvm_vm *vm, uint32_t policy) vm->arch.is_pt_protected = true; } -void sev_vm_launch_measure(struct kvm_vm *vm, uint8_t *measurement) +void sev_vm_launch_measure(struct kvm_vm *vm, u8 *measurement) { struct kvm_sev_launch_measure launch_measure; struct kvm_sev_guest_status guest_status; @@ -112,7 +131,34 @@ void sev_vm_launch_finish(struct kvm_vm *vm) TEST_ASSERT_EQ(status.state, SEV_GUEST_STATE_RUNNING); } -struct kvm_vm *vm_sev_create_with_one_vcpu(uint32_t type, void *guest_code, +void snp_vm_launch_start(struct kvm_vm *vm, u64 policy) +{ + struct kvm_sev_snp_launch_start launch_start = { + .policy = policy, + }; + + vm_sev_ioctl(vm, KVM_SEV_SNP_LAUNCH_START, &launch_start); +} + +void snp_vm_launch_update(struct kvm_vm *vm) +{ + struct userspace_mem_region *region; + int ctr; + + hash_for_each(vm->regions.slot_hash, ctr, region, slot_node) + encrypt_region(vm, region, KVM_SEV_SNP_PAGE_TYPE_NORMAL, true); + + vm->arch.is_pt_protected = true; +} + +void snp_vm_launch_finish(struct kvm_vm *vm) +{ + struct kvm_sev_snp_launch_finish launch_finish = { 0 }; + + vm_sev_ioctl(vm, KVM_SEV_SNP_LAUNCH_FINISH, &launch_finish); +} + +struct kvm_vm *vm_sev_create_with_one_vcpu(u32 type, void *guest_code, struct kvm_vcpu **cpu) { struct vm_shape shape = { @@ -128,8 +174,20 @@ struct kvm_vm *vm_sev_create_with_one_vcpu(uint32_t type, void *guest_code, return vm; } -void vm_sev_launch(struct kvm_vm *vm, uint32_t policy, uint8_t *measurement) +void vm_sev_launch(struct kvm_vm *vm, u64 policy, u8 *measurement) { + if (is_sev_snp_vm(vm)) { + vm_enable_cap(vm, KVM_CAP_EXIT_HYPERCALL, BIT(KVM_HC_MAP_GPA_RANGE)); + + snp_vm_launch_start(vm, policy); + + snp_vm_launch_update(vm); + + snp_vm_launch_finish(vm); + + return; + } + sev_vm_launch(vm, policy); if (!measurement) diff --git a/tools/testing/selftests/kvm/lib/x86/svm.c b/tools/testing/selftests/kvm/lib/x86/svm.c index d239c2097391..3b01605ab016 100644 --- a/tools/testing/selftests/kvm/lib/x86/svm.c +++ b/tools/testing/selftests/kvm/lib/x86/svm.c @@ -28,24 +28,27 @@ u64 rflags; * Pointer to structure with the addresses of the SVM areas. */ struct svm_test_data * -vcpu_alloc_svm(struct kvm_vm *vm, vm_vaddr_t *p_svm_gva) +vcpu_alloc_svm(struct kvm_vm *vm, gva_t *p_svm_gva) { - vm_vaddr_t svm_gva = vm_vaddr_alloc_page(vm); + gva_t svm_gva = vm_alloc_page(vm); struct svm_test_data *svm = addr_gva2hva(vm, svm_gva); - svm->vmcb = (void *)vm_vaddr_alloc_page(vm); + svm->vmcb = (void *)vm_alloc_page(vm); svm->vmcb_hva = addr_gva2hva(vm, (uintptr_t)svm->vmcb); svm->vmcb_gpa = addr_gva2gpa(vm, (uintptr_t)svm->vmcb); - svm->save_area = (void *)vm_vaddr_alloc_page(vm); + svm->save_area = (void *)vm_alloc_page(vm); svm->save_area_hva = addr_gva2hva(vm, (uintptr_t)svm->save_area); svm->save_area_gpa = addr_gva2gpa(vm, (uintptr_t)svm->save_area); - svm->msr = (void *)vm_vaddr_alloc_page(vm); + svm->msr = (void *)vm_alloc_page(vm); svm->msr_hva = addr_gva2hva(vm, (uintptr_t)svm->msr); svm->msr_gpa = addr_gva2gpa(vm, (uintptr_t)svm->msr); memset(svm->msr_hva, 0, getpagesize()); + if (vm->stage2_mmu.pgd_created) + svm->ncr3_gpa = vm->stage2_mmu.pgd; + *p_svm_gva = svm_gva; return svm; } @@ -59,17 +62,36 @@ static void vmcb_set_seg(struct vmcb_seg *seg, u16 selector, seg->base = base; } +void vm_enable_npt(struct kvm_vm *vm) +{ + struct pte_masks pte_masks; + + TEST_ASSERT(kvm_cpu_has_npt(), "KVM doesn't supported nested NPT"); + + /* + * NPTs use the same PTE format, but deliberately drop the C-bit as the + * per-VM shared vs. private information is only meant for stage-1. + */ + pte_masks = vm->mmu.arch.pte_masks; + pte_masks.c = 0; + + /* NPT walks are treated as user accesses, so set the 'user' bit. */ + pte_masks.always_set = pte_masks.user; + + tdp_mmu_init(vm, vm->mmu.pgtable_levels, &pte_masks); +} + void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_rsp) { struct vmcb *vmcb = svm->vmcb; - uint64_t vmcb_gpa = svm->vmcb_gpa; + u64 vmcb_gpa = svm->vmcb_gpa; struct vmcb_save_area *save = &vmcb->save; struct vmcb_control_area *ctrl = &vmcb->control; u32 data_seg_attr = 3 | SVM_SELECTOR_S_MASK | SVM_SELECTOR_P_MASK | SVM_SELECTOR_DB_MASK | SVM_SELECTOR_G_MASK; u32 code_seg_attr = 9 | SVM_SELECTOR_S_MASK | SVM_SELECTOR_P_MASK | SVM_SELECTOR_L_MASK | SVM_SELECTOR_G_MASK; - uint64_t efer; + u64 efer; efer = rdmsr(MSR_EFER); wrmsr(MSR_EFER, efer | EFER_SVME); @@ -102,6 +124,11 @@ void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_r vmcb->save.rip = (u64)guest_rip; vmcb->save.rsp = (u64)guest_rsp; guest_regs.rdi = (u64)svm; + + if (svm->ncr3_gpa) { + ctrl->misc_ctl |= SVM_MISC_ENABLE_NP; + ctrl->nested_cr3 = svm->ncr3_gpa; + } } /* @@ -131,7 +158,7 @@ void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_r * for now. registers involved in LOAD/SAVE_GPR_C are eventually * unmodified so they do not need to be in the clobber list. */ -void run_guest(struct vmcb *vmcb, uint64_t vmcb_gpa) +void run_guest(struct vmcb *vmcb, u64 vmcb_gpa) { asm volatile ( "vmload %[vmcb_gpa]\n\t" diff --git a/tools/testing/selftests/kvm/lib/x86/ucall.c b/tools/testing/selftests/kvm/lib/x86/ucall.c index 1265cecc7dd1..e7dd5791959b 100644 --- a/tools/testing/selftests/kvm/lib/x86/ucall.c +++ b/tools/testing/selftests/kvm/lib/x86/ucall.c @@ -6,9 +6,9 @@ */ #include "kvm_util.h" -#define UCALL_PIO_PORT ((uint16_t)0x1000) +#define UCALL_PIO_PORT ((u16)0x1000) -void ucall_arch_do_ucall(vm_vaddr_t uc) +void ucall_arch_do_ucall(gva_t uc) { /* * FIXME: Revert this hack (the entire commit that added it) once nVMX diff --git a/tools/testing/selftests/kvm/lib/x86/vmx.c b/tools/testing/selftests/kvm/lib/x86/vmx.c index d4d1208dd023..67642759e4a0 100644 --- a/tools/testing/selftests/kvm/lib/x86/vmx.c +++ b/tools/testing/selftests/kvm/lib/x86/vmx.c @@ -10,41 +10,24 @@ #include "processor.h" #include "vmx.h" -#define PAGE_SHIFT_4K 12 - #define KVM_EPT_PAGE_TABLE_MIN_PADDR 0x1c0000 +#define EPTP_MT_SHIFT 0 /* EPTP memtype bits 2:0 */ +#define EPTP_PWL_SHIFT 3 /* EPTP page walk length bits 5:3 */ +#define EPTP_AD_ENABLED_SHIFT 6 /* EPTP AD enabled bit 6 */ + +#define EPTP_WB (X86_MEMTYPE_WB << EPTP_MT_SHIFT) +#define EPTP_PWL_4 (3ULL << EPTP_PWL_SHIFT) /* PWL is (levels - 1) */ +#define EPTP_AD_ENABLED (1ULL << EPTP_AD_ENABLED_SHIFT) + bool enable_evmcs; struct hv_enlightened_vmcs *current_evmcs; struct hv_vp_assist_page *current_vp_assist; -struct eptPageTableEntry { - uint64_t readable:1; - uint64_t writable:1; - uint64_t executable:1; - uint64_t memory_type:3; - uint64_t ignore_pat:1; - uint64_t page_size:1; - uint64_t accessed:1; - uint64_t dirty:1; - uint64_t ignored_11_10:2; - uint64_t address:40; - uint64_t ignored_62_52:11; - uint64_t suppress_ve:1; -}; - -struct eptPageTablePointer { - uint64_t memory_type:3; - uint64_t page_walk_length:3; - uint64_t ad_enabled:1; - uint64_t reserved_11_07:5; - uint64_t address:40; - uint64_t reserved_63_52:12; -}; int vcpu_enable_evmcs(struct kvm_vcpu *vcpu) { - uint16_t evmcs_ver; + u16 evmcs_ver; vcpu_enable_cap(vcpu, KVM_CAP_HYPERV_ENLIGHTENED_VMCS, (unsigned long)&evmcs_ver); @@ -58,6 +41,32 @@ int vcpu_enable_evmcs(struct kvm_vcpu *vcpu) return evmcs_ver; } +void vm_enable_ept(struct kvm_vm *vm) +{ + struct pte_masks pte_masks; + + TEST_ASSERT(kvm_cpu_has_ept(), "KVM doesn't support nested EPT"); + + /* + * EPTs do not have 'present' or 'user' bits, instead bit 0 is the + * 'readable' bit. + */ + pte_masks = (struct pte_masks) { + .present = 0, + .user = 0, + .readable = BIT_ULL(0), + .writable = BIT_ULL(1), + .executable = BIT_ULL(2), + .huge = BIT_ULL(7), + .accessed = BIT_ULL(8), + .dirty = BIT_ULL(9), + .nx = 0, + }; + + /* TODO: Add support for 5-level EPT. */ + tdp_mmu_init(vm, 4, &pte_masks); +} + /* Allocate memory regions for nested VMX tests. * * Input Args: @@ -70,51 +79,54 @@ int vcpu_enable_evmcs(struct kvm_vcpu *vcpu) * Pointer to structure with the addresses of the VMX areas. */ struct vmx_pages * -vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva) +vcpu_alloc_vmx(struct kvm_vm *vm, gva_t *p_vmx_gva) { - vm_vaddr_t vmx_gva = vm_vaddr_alloc_page(vm); + gva_t vmx_gva = vm_alloc_page(vm); struct vmx_pages *vmx = addr_gva2hva(vm, vmx_gva); /* Setup of a region of guest memory for the vmxon region. */ - vmx->vmxon = (void *)vm_vaddr_alloc_page(vm); + vmx->vmxon = (void *)vm_alloc_page(vm); vmx->vmxon_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmxon); vmx->vmxon_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmxon); /* Setup of a region of guest memory for a vmcs. */ - vmx->vmcs = (void *)vm_vaddr_alloc_page(vm); + vmx->vmcs = (void *)vm_alloc_page(vm); vmx->vmcs_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmcs); vmx->vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmcs); /* Setup of a region of guest memory for the MSR bitmap. */ - vmx->msr = (void *)vm_vaddr_alloc_page(vm); + vmx->msr = (void *)vm_alloc_page(vm); vmx->msr_hva = addr_gva2hva(vm, (uintptr_t)vmx->msr); vmx->msr_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->msr); memset(vmx->msr_hva, 0, getpagesize()); /* Setup of a region of guest memory for the shadow VMCS. */ - vmx->shadow_vmcs = (void *)vm_vaddr_alloc_page(vm); + vmx->shadow_vmcs = (void *)vm_alloc_page(vm); vmx->shadow_vmcs_hva = addr_gva2hva(vm, (uintptr_t)vmx->shadow_vmcs); vmx->shadow_vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->shadow_vmcs); /* Setup of a region of guest memory for the VMREAD and VMWRITE bitmaps. */ - vmx->vmread = (void *)vm_vaddr_alloc_page(vm); + vmx->vmread = (void *)vm_alloc_page(vm); vmx->vmread_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmread); vmx->vmread_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmread); memset(vmx->vmread_hva, 0, getpagesize()); - vmx->vmwrite = (void *)vm_vaddr_alloc_page(vm); + vmx->vmwrite = (void *)vm_alloc_page(vm); vmx->vmwrite_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmwrite); vmx->vmwrite_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmwrite); memset(vmx->vmwrite_hva, 0, getpagesize()); + if (vm->stage2_mmu.pgd_created) + vmx->eptp_gpa = vm->stage2_mmu.pgd; + *p_vmx_gva = vmx_gva; return vmx; } bool prepare_for_vmx_operation(struct vmx_pages *vmx) { - uint64_t feature_control; - uint64_t required; + u64 feature_control; + u64 required; unsigned long cr0; unsigned long cr4; @@ -148,7 +160,7 @@ bool prepare_for_vmx_operation(struct vmx_pages *vmx) wrmsr(MSR_IA32_FEAT_CTL, feature_control | required); /* Enter VMX root operation. */ - *(uint32_t *)(vmx->vmxon) = vmcs_revision(); + *(u32 *)(vmx->vmxon) = vmcs_revision(); if (vmxon(vmx->vmxon_gpa)) return false; @@ -158,7 +170,7 @@ bool prepare_for_vmx_operation(struct vmx_pages *vmx) bool load_vmcs(struct vmx_pages *vmx) { /* Load a VMCS. */ - *(uint32_t *)(vmx->vmcs) = vmcs_revision(); + *(u32 *)(vmx->vmcs) = vmcs_revision(); if (vmclear(vmx->vmcs_gpa)) return false; @@ -166,14 +178,14 @@ bool load_vmcs(struct vmx_pages *vmx) return false; /* Setup shadow VMCS, do not load it yet. */ - *(uint32_t *)(vmx->shadow_vmcs) = vmcs_revision() | 0x80000000ul; + *(u32 *)(vmx->shadow_vmcs) = vmcs_revision() | 0x80000000ul; if (vmclear(vmx->shadow_vmcs_gpa)) return false; return true; } -static bool ept_vpid_cap_supported(uint64_t mask) +static bool ept_vpid_cap_supported(u64 mask) { return rdmsr(MSR_IA32_VMX_EPT_VPID_CAP) & mask; } @@ -188,7 +200,7 @@ bool ept_1g_pages_supported(void) */ static inline void init_vmcs_control_fields(struct vmx_pages *vmx) { - uint32_t sec_exec_ctl = 0; + u32 sec_exec_ctl = 0; vmwrite(VIRTUAL_PROCESSOR_ID, 0); vmwrite(POSTED_INTR_NV, 0); @@ -196,16 +208,15 @@ static inline void init_vmcs_control_fields(struct vmx_pages *vmx) vmwrite(PIN_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_TRUE_PINBASED_CTLS)); if (vmx->eptp_gpa) { - uint64_t ept_paddr; - struct eptPageTablePointer eptp = { - .memory_type = X86_MEMTYPE_WB, - .page_walk_length = 3, /* + 1 */ - .ad_enabled = ept_vpid_cap_supported(VMX_EPT_VPID_CAP_AD_BITS), - .address = vmx->eptp_gpa >> PAGE_SHIFT_4K, - }; - - memcpy(&ept_paddr, &eptp, sizeof(ept_paddr)); - vmwrite(EPT_POINTER, ept_paddr); + u64 eptp = vmx->eptp_gpa | EPTP_WB | EPTP_PWL_4; + + TEST_ASSERT((vmx->eptp_gpa & ~PHYSICAL_PAGE_MASK) == 0, + "Illegal bits set in vmx->eptp_gpa"); + + if (ept_vpid_cap_supported(VMX_EPT_VPID_CAP_AD_BITS)) + eptp |= EPTP_AD_ENABLED; + + vmwrite(EPT_POINTER, eptp); sec_exec_ctl |= SECONDARY_EXEC_ENABLE_EPT; } @@ -248,7 +259,7 @@ static inline void init_vmcs_control_fields(struct vmx_pages *vmx) */ static inline void init_vmcs_host_state(void) { - uint32_t exit_controls = vmreadz(VM_EXIT_CONTROLS); + u32 exit_controls = vmreadz(VM_EXIT_CONTROLS); vmwrite(HOST_ES_SELECTOR, get_es()); vmwrite(HOST_CS_SELECTOR, get_cs()); @@ -347,8 +358,8 @@ static inline void init_vmcs_guest_state(void *rip, void *rsp) vmwrite(GUEST_GDTR_BASE, vmreadz(HOST_GDTR_BASE)); vmwrite(GUEST_IDTR_BASE, vmreadz(HOST_IDTR_BASE)); vmwrite(GUEST_DR7, 0x400); - vmwrite(GUEST_RSP, (uint64_t)rsp); - vmwrite(GUEST_RIP, (uint64_t)rip); + vmwrite(GUEST_RSP, (u64)rsp); + vmwrite(GUEST_RIP, (u64)rip); vmwrite(GUEST_RFLAGS, 2); vmwrite(GUEST_PENDING_DBG_EXCEPTIONS, 0); vmwrite(GUEST_SYSENTER_ESP, vmreadz(HOST_IA32_SYSENTER_ESP)); @@ -362,169 +373,12 @@ void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp) init_vmcs_guest_state(guest_rip, guest_rsp); } -static void nested_create_pte(struct kvm_vm *vm, - struct eptPageTableEntry *pte, - uint64_t nested_paddr, - uint64_t paddr, - int current_level, - int target_level) -{ - if (!pte->readable) { - pte->writable = true; - pte->readable = true; - pte->executable = true; - pte->page_size = (current_level == target_level); - if (pte->page_size) - pte->address = paddr >> vm->page_shift; - else - pte->address = vm_alloc_page_table(vm) >> vm->page_shift; - } else { - /* - * Entry already present. Assert that the caller doesn't want - * a hugepage at this level, and that there isn't a hugepage at - * this level. - */ - TEST_ASSERT(current_level != target_level, - "Cannot create hugepage at level: %u, nested_paddr: 0x%lx", - current_level, nested_paddr); - TEST_ASSERT(!pte->page_size, - "Cannot create page table at level: %u, nested_paddr: 0x%lx", - current_level, nested_paddr); - } -} - - -void __nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm, - uint64_t nested_paddr, uint64_t paddr, int target_level) -{ - const uint64_t page_size = PG_LEVEL_SIZE(target_level); - struct eptPageTableEntry *pt = vmx->eptp_hva, *pte; - uint16_t index; - - TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use " - "unknown or unsupported guest mode, mode: 0x%x", vm->mode); - - TEST_ASSERT((nested_paddr >> 48) == 0, - "Nested physical address 0x%lx requires 5-level paging", - nested_paddr); - TEST_ASSERT((nested_paddr % page_size) == 0, - "Nested physical address not on page boundary,\n" - " nested_paddr: 0x%lx page_size: 0x%lx", - nested_paddr, page_size); - TEST_ASSERT((nested_paddr >> vm->page_shift) <= vm->max_gfn, - "Physical address beyond beyond maximum supported,\n" - " nested_paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", - paddr, vm->max_gfn, vm->page_size); - TEST_ASSERT((paddr % page_size) == 0, - "Physical address not on page boundary,\n" - " paddr: 0x%lx page_size: 0x%lx", - paddr, page_size); - TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn, - "Physical address beyond beyond maximum supported,\n" - " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", - paddr, vm->max_gfn, vm->page_size); - - for (int level = PG_LEVEL_512G; level >= PG_LEVEL_4K; level--) { - index = (nested_paddr >> PG_LEVEL_SHIFT(level)) & 0x1ffu; - pte = &pt[index]; - - nested_create_pte(vm, pte, nested_paddr, paddr, level, target_level); - - if (pte->page_size) - break; - - pt = addr_gpa2hva(vm, pte->address * vm->page_size); - } - - /* - * For now mark these as accessed and dirty because the only - * testcase we have needs that. Can be reconsidered later. - */ - pte->accessed = true; - pte->dirty = true; - -} - -void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm, - uint64_t nested_paddr, uint64_t paddr) -{ - __nested_pg_map(vmx, vm, nested_paddr, paddr, PG_LEVEL_4K); -} - -/* - * Map a range of EPT guest physical addresses to the VM's physical address - * - * Input Args: - * vm - Virtual Machine - * nested_paddr - Nested guest physical address to map - * paddr - VM Physical Address - * size - The size of the range to map - * level - The level at which to map the range - * - * Output Args: None - * - * Return: None - * - * Within the VM given by vm, creates a nested guest translation for the - * page range starting at nested_paddr to the page range starting at paddr. - */ -void __nested_map(struct vmx_pages *vmx, struct kvm_vm *vm, - uint64_t nested_paddr, uint64_t paddr, uint64_t size, - int level) -{ - size_t page_size = PG_LEVEL_SIZE(level); - size_t npages = size / page_size; - - TEST_ASSERT(nested_paddr + size > nested_paddr, "Vaddr overflow"); - TEST_ASSERT(paddr + size > paddr, "Paddr overflow"); - - while (npages--) { - __nested_pg_map(vmx, vm, nested_paddr, paddr, level); - nested_paddr += page_size; - paddr += page_size; - } -} - -void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm, - uint64_t nested_paddr, uint64_t paddr, uint64_t size) -{ - __nested_map(vmx, vm, nested_paddr, paddr, size, PG_LEVEL_4K); -} - -/* Prepare an identity extended page table that maps all the - * physical pages in VM. - */ -void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm, - uint32_t memslot) -{ - sparsebit_idx_t i, last; - struct userspace_mem_region *region = - memslot2region(vm, memslot); - - i = (region->region.guest_phys_addr >> vm->page_shift) - 1; - last = i + (region->region.memory_size >> vm->page_shift); - for (;;) { - i = sparsebit_next_clear(region->unused_phy_pages, i); - if (i > last) - break; - - nested_map(vmx, vm, - (uint64_t)i << vm->page_shift, - (uint64_t)i << vm->page_shift, - 1 << vm->page_shift); - } -} - -/* Identity map a region with 1GiB Pages. */ -void nested_identity_map_1g(struct vmx_pages *vmx, struct kvm_vm *vm, - uint64_t addr, uint64_t size) -{ - __nested_map(vmx, vm, addr, addr, size, PG_LEVEL_1G); -} - bool kvm_cpu_has_ept(void) { - uint64_t ctrl; + u64 ctrl; + + if (!kvm_cpu_has(X86_FEATURE_VMX)) + return false; ctrl = kvm_get_feature_msr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS) >> 32; if (!(ctrl & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) @@ -534,19 +388,9 @@ bool kvm_cpu_has_ept(void) return ctrl & SECONDARY_EXEC_ENABLE_EPT; } -void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm, - uint32_t eptp_memslot) -{ - TEST_ASSERT(kvm_cpu_has_ept(), "KVM doesn't support nested EPT"); - - vmx->eptp = (void *)vm_vaddr_alloc_page(vm); - vmx->eptp_hva = addr_gva2hva(vm, (uintptr_t)vmx->eptp); - vmx->eptp_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->eptp); -} - void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm) { - vmx->apic_access = (void *)vm_vaddr_alloc_page(vm); + vmx->apic_access = (void *)vm_alloc_page(vm); vmx->apic_access_hva = addr_gva2hva(vm, (uintptr_t)vmx->apic_access); vmx->apic_access_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->apic_access); } diff --git a/tools/testing/selftests/kvm/loongarch/arch_timer.c b/tools/testing/selftests/kvm/loongarch/arch_timer.c new file mode 100644 index 000000000000..a7279ded8518 --- /dev/null +++ b/tools/testing/selftests/kvm/loongarch/arch_timer.c @@ -0,0 +1,200 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * The test validates periodic/one-shot constant timer IRQ using + * CSR.TCFG and CSR.TVAL registers. + */ +#include "arch_timer.h" +#include "kvm_util.h" +#include "processor.h" +#include "timer_test.h" +#include "ucall_common.h" + +static void do_idle(void) +{ + unsigned int intid; + unsigned long estat; + + __asm__ __volatile__("idle 0" : : : "memory"); + + estat = csr_read(LOONGARCH_CSR_ESTAT); + intid = !!(estat & BIT(INT_TI)); + + /* Make sure pending timer IRQ arrived */ + GUEST_ASSERT_EQ(intid, 1); + csr_write(CSR_TINTCLR_TI, LOONGARCH_CSR_TINTCLR); +} + +static void guest_irq_handler(struct ex_regs *regs) +{ + unsigned int intid; + u32 cpu = guest_get_vcpuid(); + u64 xcnt, val, cfg, xcnt_diff_us; + struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu]; + + intid = !!(regs->estat & BIT(INT_TI)); + + /* Make sure we are dealing with the correct timer IRQ */ + GUEST_ASSERT_EQ(intid, 1); + + cfg = timer_get_cfg(); + if (cfg & CSR_TCFG_PERIOD) { + WRITE_ONCE(shared_data->nr_iter, shared_data->nr_iter - 1); + if (shared_data->nr_iter == 0) + disable_timer(); + csr_write(CSR_TINTCLR_TI, LOONGARCH_CSR_TINTCLR); + return; + } + + /* + * On real machine, value of LOONGARCH_CSR_TVAL is BIT_ULL(48) - 1 + * On virtual machine, its value counts down from BIT_ULL(48) - 1 + */ + val = timer_get_val(); + xcnt = timer_get_cycles(); + xcnt_diff_us = cycles_to_usec(xcnt - shared_data->xcnt); + + /* Basic 'timer condition met' check */ + __GUEST_ASSERT(val > cfg, + "val = 0x%lx, cfg = 0x%lx, xcnt_diff_us = 0x%lx", + val, cfg, xcnt_diff_us); + + csr_write(CSR_TINTCLR_TI, LOONGARCH_CSR_TINTCLR); + WRITE_ONCE(shared_data->nr_iter, shared_data->nr_iter + 1); +} + +static void guest_test_period_timer(u32 cpu) +{ + u32 irq_iter, config_iter; + u64 us; + struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu]; + + shared_data->nr_iter = test_args.nr_iter; + shared_data->xcnt = timer_get_cycles(); + us = msecs_to_usecs(test_args.timer_period_ms) + test_args.timer_err_margin_us; + timer_set_next_cmp_ms(test_args.timer_period_ms, true); + + for (config_iter = 0; config_iter < test_args.nr_iter; config_iter++) { + /* Setup a timeout for the interrupt to arrive */ + udelay(us); + } + + irq_iter = READ_ONCE(shared_data->nr_iter); + __GUEST_ASSERT(irq_iter == 0, + "irq_iter = 0x%x.\n" + " Guest period timer interrupt was not triggered within the specified\n" + " interval, try to increase the error margin by [-e] option.\n", + irq_iter); +} + +static void guest_test_oneshot_timer(u32 cpu) +{ + u32 irq_iter, config_iter; + u64 us; + struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu]; + + shared_data->nr_iter = 0; + shared_data->guest_stage = 0; + us = msecs_to_usecs(test_args.timer_period_ms) + test_args.timer_err_margin_us; + for (config_iter = 0; config_iter < test_args.nr_iter; config_iter++) { + shared_data->xcnt = timer_get_cycles(); + + /* Setup the next interrupt */ + timer_set_next_cmp_ms(test_args.timer_period_ms, false); + /* Setup a timeout for the interrupt to arrive */ + udelay(us); + + irq_iter = READ_ONCE(shared_data->nr_iter); + __GUEST_ASSERT(config_iter + 1 == irq_iter, + "config_iter + 1 = 0x%x, irq_iter = 0x%x.\n" + " Guest timer interrupt was not triggered within the specified\n" + " interval, try to increase the error margin by [-e] option.\n", + config_iter + 1, irq_iter); + } +} + +static void guest_test_emulate_timer(u32 cpu) +{ + u32 config_iter; + u64 xcnt_diff_us, us; + struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu]; + + local_irq_disable(); + shared_data->nr_iter = 0; + us = msecs_to_usecs(test_args.timer_period_ms); + for (config_iter = 0; config_iter < test_args.nr_iter; config_iter++) { + shared_data->xcnt = timer_get_cycles(); + + /* Setup the next interrupt */ + timer_set_next_cmp_ms(test_args.timer_period_ms, false); + do_idle(); + + xcnt_diff_us = cycles_to_usec(timer_get_cycles() - shared_data->xcnt); + __GUEST_ASSERT(xcnt_diff_us >= us, + "xcnt_diff_us = 0x%lx, us = 0x%lx.\n", + xcnt_diff_us, us); + } + local_irq_enable(); +} + +static void guest_time_count_test(u32 cpu) +{ + u32 config_iter; + unsigned long start, end, prev, us; + + /* Assuming that test case starts to run in 1 second */ + start = timer_get_cycles(); + us = msec_to_cycles(1000); + __GUEST_ASSERT(start <= us, + "start = 0x%lx, us = 0x%lx.\n", + start, us); + + us = msec_to_cycles(test_args.timer_period_ms); + for (config_iter = 0; config_iter < test_args.nr_iter; config_iter++) { + start = timer_get_cycles(); + end = start + us; + /* test time count growing up always */ + while (start < end) { + prev = start; + start = timer_get_cycles(); + __GUEST_ASSERT(prev <= start, + "prev = 0x%lx, start = 0x%lx.\n", + prev, start); + } + } +} + +static void guest_code(void) +{ + u32 cpu = guest_get_vcpuid(); + + /* must run at first */ + guest_time_count_test(cpu); + + timer_irq_enable(); + local_irq_enable(); + guest_test_period_timer(cpu); + guest_test_oneshot_timer(cpu); + guest_test_emulate_timer(cpu); + + GUEST_DONE(); +} + +struct kvm_vm *test_vm_create(void) +{ + struct kvm_vm *vm; + int nr_vcpus = test_args.nr_vcpus; + + vm = vm_create_with_vcpus(nr_vcpus, guest_code, vcpus); + vm_init_descriptor_tables(vm); + vm_install_exception_handler(vm, EXCCODE_INT, guest_irq_handler); + + /* Make all the test's cmdline args visible to the guest */ + sync_global_to_guest(vm, test_args); + + return vm; +} + +void test_vm_cleanup(struct kvm_vm *vm) +{ + kvm_vm_free(vm); +} diff --git a/tools/testing/selftests/kvm/loongarch/pmu_test.c b/tools/testing/selftests/kvm/loongarch/pmu_test.c new file mode 100644 index 000000000000..ec3fefb9ea97 --- /dev/null +++ b/tools/testing/selftests/kvm/loongarch/pmu_test.c @@ -0,0 +1,205 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * LoongArch KVM PMU event counting test + * + * Test hardware event counting: CPU_CYCLES, INSTR_RETIRED, + * BRANCH_INSTRUCTIONS and BRANCH_MISSES. + */ +#include <linux/bitops.h> +#include "kvm_util.h" +#include "pmu.h" +#include "loongarch/processor.h" + +static int pmu_irq_count; + +/* Check PMU support */ +static bool has_pmu_support(void) +{ + u32 cfg6; + + /* Read CPUCFG6 to check PMU */ + cfg6 = read_cpucfg(LOONGARCH_CPUCFG6); + + /* Check PMU present bit */ + if (!(cfg6 & CPUCFG6_PMP)) + return false; + + /* Check that at least one counter exists */ + if (((cfg6 & CPUCFG6_PMNUM) >> CPUCFG6_PMNUM_SHIFT) == 0) + return false; + + return true; +} + +/* Dump PMU capabilities */ +static void dump_pmu_caps(void) +{ + u32 cfg6; + int nr_counters, counter_bits; + + cfg6 = read_cpucfg(LOONGARCH_CPUCFG6); + nr_counters = ((cfg6 & CPUCFG6_PMNUM) >> CPUCFG6_PMNUM_SHIFT) + 1; + counter_bits = ((cfg6 & CPUCFG6_PMBITS) >> CPUCFG6_PMBITS_SHIFT) + 1; + + pr_info("PMU capabilities:\n"); + pr_info(" Counters present: %s\n", cfg6 & CPUCFG6_PMP ? "yes" : "no"); + pr_info(" Number of counters: %d\n", nr_counters); + pr_info(" Counter width: %d bits\n", counter_bits); +} + +/* Guest test code - runs inside VM */ +static void guest_pmu_base_test(void) +{ + int i; + u32 cfg6, pmnum; + u64 cnt[4]; + + cfg6 = read_cpucfg(LOONGARCH_CPUCFG6); + pmnum = (cfg6 >> 4) & 0xf; + GUEST_PRINTF("CPUCFG6 = 0x%x\n", cfg6); + GUEST_PRINTF("PMP enabled: %s\n", (cfg6 & 0x1) ? "YES" : "NO"); + GUEST_PRINTF("Number of counters (PMNUM): %x\n", pmnum + 1); + GUEST_ASSERT(pmnum == 3); + + GUEST_PRINTF("Clean csr_perfcntr0-3\n"); + csr_write(0, LOONGARCH_CSR_PERFCNTR0); + csr_write(0, LOONGARCH_CSR_PERFCNTR1); + csr_write(0, LOONGARCH_CSR_PERFCNTR2); + csr_write(0, LOONGARCH_CSR_PERFCNTR3); + GUEST_PRINTF("Set csr_perfctrl0 for cycles event\n"); + csr_write(PMU_ENVENT_ENABLED | + LOONGARCH_PMU_EVENT_CYCLES, LOONGARCH_CSR_PERFCTRL0); + GUEST_PRINTF("Set csr_perfctrl1 for instr_retired event\n"); + csr_write(PMU_ENVENT_ENABLED | + LOONGARCH_PMU_EVENT_INSTR_RETIRED, LOONGARCH_CSR_PERFCTRL1); + GUEST_PRINTF("Set csr_perfctrl2 for branch_instructions event\n"); + csr_write(PMU_ENVENT_ENABLED | + PERF_COUNT_HW_BRANCH_INSTRUCTIONS, LOONGARCH_CSR_PERFCTRL2); + GUEST_PRINTF("Set csr_perfctrl3 for branch_misses event\n"); + csr_write(PMU_ENVENT_ENABLED | + PERF_COUNT_HW_BRANCH_MISSES, LOONGARCH_CSR_PERFCTRL3); + + for (i = 0; i < NUM_LOOPS; i++) + cpu_relax(); + + cnt[0] = csr_read(LOONGARCH_CSR_PERFCNTR0); + GUEST_PRINTF("csr_perfcntr0 is %lx\n", cnt[0]); + cnt[1] = csr_read(LOONGARCH_CSR_PERFCNTR1); + GUEST_PRINTF("csr_perfcntr1 is %lx\n", cnt[1]); + cnt[2] = csr_read(LOONGARCH_CSR_PERFCNTR2); + GUEST_PRINTF("csr_perfcntr2 is %lx\n", cnt[2]); + cnt[3] = csr_read(LOONGARCH_CSR_PERFCNTR3); + GUEST_PRINTF("csr_perfcntr3 is %lx\n", cnt[3]); + + GUEST_PRINTF("assert csr_perfcntr0 >EXPECTED_CYCLES_MIN && csr_perfcntr0 < UPPER_BOUND\n"); + GUEST_ASSERT(cnt[0] > EXPECTED_CYCLES_MIN && cnt[0] < UPPER_BOUND); + GUEST_PRINTF("assert csr_perfcntr1 > EXPECTED_INSTR_MIN && csr_perfcntr1 < UPPER_BOUND\n"); + GUEST_ASSERT(cnt[1] > EXPECTED_INSTR_MIN && cnt[1] < UPPER_BOUND); + GUEST_PRINTF("assert csr_perfcntr2 > 0 && csr_perfcntr2 < UPPER_BOUND\n"); + GUEST_ASSERT(cnt[2] > 0 && cnt[2] < UPPER_BOUND); + GUEST_PRINTF("assert csr_perfcntr3 > 0 && csr_perfcntr3 < UPPER_BOUND\n"); + GUEST_ASSERT(cnt[3] > 0 && cnt[3] < UPPER_BOUND); +} + +static void guest_irq_handler(struct ex_regs *regs) +{ + unsigned int intid; + + pmu_irq_disable(); + intid = !!(regs->estat & BIT(INT_PMI)); + GUEST_ASSERT_EQ(intid, 1); + GUEST_PRINTF("Get PMU interrupt\n"); + WRITE_ONCE(pmu_irq_count, pmu_irq_count + 1); +} + +static void guest_pmu_interrupt_test(void) +{ + u64 cnt; + + csr_write(PMU_OVERFLOW - 1, LOONGARCH_CSR_PERFCNTR0); + csr_write(PMU_ENVENT_ENABLED | CSR_PERFCTRL_PMIE | LOONGARCH_PMU_EVENT_CYCLES, LOONGARCH_CSR_PERFCTRL0); + + cpu_relax(); + + GUEST_ASSERT_EQ(pmu_irq_count, 1); + cnt = csr_read(LOONGARCH_CSR_PERFCNTR0); + GUEST_PRINTF("csr_perfcntr0 is %lx\n", cnt); + GUEST_PRINTF("PMU interrupt test success\n"); + +} + +static void guest_code(void) +{ + guest_pmu_base_test(); + + pmu_irq_enable(); + local_irq_enable(); + guest_pmu_interrupt_test(); + + GUEST_DONE(); +} + +int main(int argc, char *argv[]) +{ + int ret = 0; + struct kvm_device_attr attr; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + struct ucall uc; + + /* Check host KVM PMU support */ + if (!has_pmu_support()) { + print_skip("PMU not supported by host hardware\n"); + dump_pmu_caps(); + return KSFT_SKIP; + } + pr_info("Host support PMU\n"); + + /* Dump PMU capabilities */ + dump_pmu_caps(); + + vm = vm_create(VM_MODE_P47V47_16K); + vcpu = vm_vcpu_add(vm, 0, guest_code); + + pmu_irq_count = 0; + vm_init_descriptor_tables(vm); + loongarch_vcpu_setup(vcpu); + vm_install_exception_handler(vm, EXCCODE_INT, guest_irq_handler); + sync_global_to_guest(vm, pmu_irq_count); + + attr.group = KVM_LOONGARCH_VM_FEAT_CTRL, + attr.attr = KVM_LOONGARCH_VM_FEAT_PMU, + + ret = ioctl(vm->fd, KVM_HAS_DEVICE_ATTR, &attr); + + if (ret == 0) { + pr_info("PMU is enabled in VM\n"); + } else { + print_skip("PMU not enabled by VM config\n"); + return KSFT_SKIP; + } + + while (1) { + vcpu_run(vcpu); + switch (get_ucall(vcpu, &uc)) { + case UCALL_PRINTF: + printf("%s", (const char *)uc.buffer); + break; + case UCALL_DONE: + printf("PMU test PASSED\n"); + goto done; + case UCALL_ABORT: + printf("PMU test FAILED\n"); + ret = -1; + goto done; + default: + printf("Unexpected exit\n"); + ret = -1; + goto done; + } + } + +done: + kvm_vm_free(vm); + return ret; +} diff --git a/tools/testing/selftests/kvm/memslot_modification_stress_test.c b/tools/testing/selftests/kvm/memslot_modification_stress_test.c index c81a84990eab..9c7578a098c3 100644 --- a/tools/testing/selftests/kvm/memslot_modification_stress_test.c +++ b/tools/testing/selftests/kvm/memslot_modification_stress_test.c @@ -22,6 +22,7 @@ #include "processor.h" #include "test_util.h" #include "guest_modes.h" +#include "ucall_common.h" #define DUMMY_MEMSLOT_INDEX 7 @@ -29,7 +30,7 @@ static int nr_vcpus = 1; -static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE; +static u64 guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE; static void vcpu_worker(struct memstress_vcpu_args *vcpu_args) { @@ -54,10 +55,10 @@ static void vcpu_worker(struct memstress_vcpu_args *vcpu_args) } static void add_remove_memslot(struct kvm_vm *vm, useconds_t delay, - uint64_t nr_modifications) + u64 nr_modifications) { - uint64_t pages = max_t(int, vm->page_size, getpagesize()) / vm->page_size; - uint64_t gpa; + u64 pages = max_t(int, vm->page_size, getpagesize()) / vm->page_size; + gpa_t gpa; int i; /* @@ -77,7 +78,7 @@ static void add_remove_memslot(struct kvm_vm *vm, useconds_t delay, struct test_params { useconds_t delay; - uint64_t nr_iterations; + u64 nr_iterations; bool partition_vcpu_memory_access; bool disable_slot_zap_quirk; }; diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c index e3711beff7f3..e977e979470f 100644 --- a/tools/testing/selftests/kvm/memslot_perf_test.c +++ b/tools/testing/selftests/kvm/memslot_perf_test.c @@ -15,7 +15,6 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> -#include <sys/mman.h> #include <time.h> #include <unistd.h> @@ -23,8 +22,10 @@ #include <linux/sizes.h> #include <test_util.h> +#include <kvm_syscalls.h> #include <kvm_util.h> #include <processor.h> +#include <ucall_common.h> #define MEM_EXTRA_SIZE SZ_64K @@ -84,17 +85,17 @@ struct vm_data { struct kvm_vm *vm; struct kvm_vcpu *vcpu; pthread_t vcpu_thread; - uint32_t nslots; - uint64_t npages; - uint64_t pages_per_slot; + u32 nslots; + u64 npages; + u64 pages_per_slot; void **hva_slots; bool mmio_ok; - uint64_t mmio_gpa_min; - uint64_t mmio_gpa_max; + u64 mmio_gpa_min; + u64 mmio_gpa_max; }; struct sync_area { - uint32_t guest_page_size; + u32 guest_page_size; atomic_bool start_flag; atomic_bool exit_flag; atomic_bool sync_flag; @@ -185,12 +186,12 @@ static void wait_for_vcpu(void) "sem_timedwait() failed: %d", errno); } -static void *vm_gpa2hva(struct vm_data *data, uint64_t gpa, uint64_t *rempages) +static void *vm_gpa2hva(struct vm_data *data, gpa_t gpa, u64 *rempages) { - uint64_t gpage, pgoffs; - uint32_t slot, slotoffs; + gpa_t gpage, pgoffs; + u32 slot, slotoffs; void *base; - uint32_t guest_page_size = data->vm->page_size; + u32 guest_page_size = data->vm->page_size; TEST_ASSERT(gpa >= MEM_GPA, "Too low gpa to translate"); TEST_ASSERT(gpa < MEM_GPA + data->npages * guest_page_size, @@ -199,11 +200,11 @@ static void *vm_gpa2hva(struct vm_data *data, uint64_t gpa, uint64_t *rempages) gpage = gpa / guest_page_size; pgoffs = gpa % guest_page_size; - slot = min(gpage / data->pages_per_slot, (uint64_t)data->nslots - 1); + slot = min(gpage / data->pages_per_slot, (u64)data->nslots - 1); slotoffs = gpage - (slot * data->pages_per_slot); if (rempages) { - uint64_t slotpages; + u64 slotpages; if (slot == data->nslots - 1) slotpages = data->npages - slot * data->pages_per_slot; @@ -216,12 +217,12 @@ static void *vm_gpa2hva(struct vm_data *data, uint64_t gpa, uint64_t *rempages) } base = data->hva_slots[slot]; - return (uint8_t *)base + slotoffs * guest_page_size + pgoffs; + return (u8 *)base + slotoffs * guest_page_size + pgoffs; } -static uint64_t vm_slot2gpa(struct vm_data *data, uint32_t slot) +static u64 vm_slot2gpa(struct vm_data *data, u32 slot) { - uint32_t guest_page_size = data->vm->page_size; + u32 guest_page_size = data->vm->page_size; TEST_ASSERT(slot < data->nslots, "Too high slot number"); @@ -242,8 +243,8 @@ static struct vm_data *alloc_vm(void) return data; } -static bool check_slot_pages(uint32_t host_page_size, uint32_t guest_page_size, - uint64_t pages_per_slot, uint64_t rempages) +static bool check_slot_pages(u32 host_page_size, u32 guest_page_size, + u64 pages_per_slot, u64 rempages) { if (!pages_per_slot) return false; @@ -258,11 +259,11 @@ static bool check_slot_pages(uint32_t host_page_size, uint32_t guest_page_size, } -static uint64_t get_max_slots(struct vm_data *data, uint32_t host_page_size) +static u64 get_max_slots(struct vm_data *data, u32 host_page_size) { - uint32_t guest_page_size = data->vm->page_size; - uint64_t mempages, pages_per_slot, rempages; - uint64_t slots; + u32 guest_page_size = data->vm->page_size; + u64 mempages, pages_per_slot, rempages; + u64 slots; mempages = data->npages; slots = data->nslots; @@ -280,13 +281,13 @@ static uint64_t get_max_slots(struct vm_data *data, uint32_t host_page_size) return 0; } -static bool prepare_vm(struct vm_data *data, int nslots, uint64_t *maxslots, - void *guest_code, uint64_t mem_size, +static bool prepare_vm(struct vm_data *data, int nslots, u64 *maxslots, + void *guest_code, u64 mem_size, struct timespec *slot_runtime) { - uint64_t mempages, rempages; - uint64_t guest_addr; - uint32_t slot, host_page_size, guest_page_size; + u64 mempages, rempages; + u64 guest_addr; + u32 slot, host_page_size, guest_page_size; struct timespec tstart; struct sync_area *sync; @@ -316,7 +317,7 @@ static bool prepare_vm(struct vm_data *data, int nslots, uint64_t *maxslots, clock_gettime(CLOCK_MONOTONIC, &tstart); for (slot = 1, guest_addr = MEM_GPA; slot <= data->nslots; slot++) { - uint64_t npages; + u64 npages; npages = data->pages_per_slot; if (slot == data->nslots) @@ -330,8 +331,8 @@ static bool prepare_vm(struct vm_data *data, int nslots, uint64_t *maxslots, *slot_runtime = timespec_elapsed(tstart); for (slot = 1, guest_addr = MEM_GPA; slot <= data->nslots; slot++) { - uint64_t npages; - uint64_t gpa; + u64 npages; + gpa_t gpa; npages = data->pages_per_slot; if (slot == data->nslots) @@ -447,7 +448,7 @@ static bool guest_perform_sync(void) static void guest_code_test_memslot_move(void) { struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA; - uint32_t page_size = (typeof(page_size))READ_ONCE(sync->guest_page_size); + u32 page_size = (typeof(page_size))READ_ONCE(sync->guest_page_size); uintptr_t base = (typeof(base))READ_ONCE(sync->move_area_ptr); GUEST_SYNC(0); @@ -459,7 +460,7 @@ static void guest_code_test_memslot_move(void) for (ptr = base; ptr < base + MEM_TEST_MOVE_SIZE; ptr += page_size) - *(uint64_t *)ptr = MEM_TEST_VAL_1; + *(u64 *)ptr = MEM_TEST_VAL_1; /* * No host sync here since the MMIO exits are so expensive @@ -476,7 +477,7 @@ static void guest_code_test_memslot_move(void) static void guest_code_test_memslot_map(void) { struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA; - uint32_t page_size = (typeof(page_size))READ_ONCE(sync->guest_page_size); + u32 page_size = (typeof(page_size))READ_ONCE(sync->guest_page_size); GUEST_SYNC(0); @@ -488,7 +489,7 @@ static void guest_code_test_memslot_map(void) for (ptr = MEM_TEST_GPA; ptr < MEM_TEST_GPA + MEM_TEST_MAP_SIZE / 2; ptr += page_size) - *(uint64_t *)ptr = MEM_TEST_VAL_1; + *(u64 *)ptr = MEM_TEST_VAL_1; if (!guest_perform_sync()) break; @@ -496,7 +497,7 @@ static void guest_code_test_memslot_map(void) for (ptr = MEM_TEST_GPA + MEM_TEST_MAP_SIZE / 2; ptr < MEM_TEST_GPA + MEM_TEST_MAP_SIZE; ptr += page_size) - *(uint64_t *)ptr = MEM_TEST_VAL_2; + *(u64 *)ptr = MEM_TEST_VAL_2; if (!guest_perform_sync()) break; @@ -525,13 +526,13 @@ static void guest_code_test_memslot_unmap(void) * * Just access a single page to be on the safe side. */ - *(uint64_t *)ptr = MEM_TEST_VAL_1; + *(u64 *)ptr = MEM_TEST_VAL_1; if (!guest_perform_sync()) break; ptr += MEM_TEST_UNMAP_SIZE / 2; - *(uint64_t *)ptr = MEM_TEST_VAL_2; + *(u64 *)ptr = MEM_TEST_VAL_2; if (!guest_perform_sync()) break; @@ -543,7 +544,7 @@ static void guest_code_test_memslot_unmap(void) static void guest_code_test_memslot_rw(void) { struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA; - uint32_t page_size = (typeof(page_size))READ_ONCE(sync->guest_page_size); + u32 page_size = (typeof(page_size))READ_ONCE(sync->guest_page_size); GUEST_SYNC(0); @@ -554,17 +555,17 @@ static void guest_code_test_memslot_rw(void) for (ptr = MEM_TEST_GPA; ptr < MEM_TEST_GPA + MEM_TEST_SIZE; ptr += page_size) - *(uint64_t *)ptr = MEM_TEST_VAL_1; + *(u64 *)ptr = MEM_TEST_VAL_1; if (!guest_perform_sync()) break; for (ptr = MEM_TEST_GPA + page_size / 2; ptr < MEM_TEST_GPA + MEM_TEST_SIZE; ptr += page_size) { - uint64_t val = *(uint64_t *)ptr; + u64 val = *(u64 *)ptr; GUEST_ASSERT_EQ(val, MEM_TEST_VAL_2); - *(uint64_t *)ptr = 0; + *(u64 *)ptr = 0; } if (!guest_perform_sync()) @@ -576,10 +577,10 @@ static void guest_code_test_memslot_rw(void) static bool test_memslot_move_prepare(struct vm_data *data, struct sync_area *sync, - uint64_t *maxslots, bool isactive) + u64 *maxslots, bool isactive) { - uint32_t guest_page_size = data->vm->page_size; - uint64_t movesrcgpa, movetestgpa; + u32 guest_page_size = data->vm->page_size; + u64 movesrcgpa, movetestgpa; #ifdef __x86_64__ if (disable_slot_zap_quirk) @@ -589,7 +590,7 @@ static bool test_memslot_move_prepare(struct vm_data *data, movesrcgpa = vm_slot2gpa(data, data->nslots - 1); if (isactive) { - uint64_t lastpages; + u64 lastpages; vm_gpa2hva(data, movesrcgpa, &lastpages); if (lastpages * guest_page_size < MEM_TEST_MOVE_SIZE / 2) { @@ -612,21 +613,21 @@ static bool test_memslot_move_prepare(struct vm_data *data, static bool test_memslot_move_prepare_active(struct vm_data *data, struct sync_area *sync, - uint64_t *maxslots) + u64 *maxslots) { return test_memslot_move_prepare(data, sync, maxslots, true); } static bool test_memslot_move_prepare_inactive(struct vm_data *data, struct sync_area *sync, - uint64_t *maxslots) + u64 *maxslots) { return test_memslot_move_prepare(data, sync, maxslots, false); } static void test_memslot_move_loop(struct vm_data *data, struct sync_area *sync) { - uint64_t movesrcgpa; + u64 movesrcgpa; movesrcgpa = vm_slot2gpa(data, data->nslots - 1); vm_mem_region_move(data->vm, data->nslots - 1 + 1, @@ -635,13 +636,13 @@ static void test_memslot_move_loop(struct vm_data *data, struct sync_area *sync) } static void test_memslot_do_unmap(struct vm_data *data, - uint64_t offsp, uint64_t count) + u64 offsp, u64 count) { - uint64_t gpa, ctr; - uint32_t guest_page_size = data->vm->page_size; + gpa_t gpa, ctr; + u32 guest_page_size = data->vm->page_size; for (gpa = MEM_TEST_GPA + offsp * guest_page_size, ctr = 0; ctr < count; ) { - uint64_t npages; + u64 npages; void *hva; int ret; @@ -660,11 +661,11 @@ static void test_memslot_do_unmap(struct vm_data *data, } static void test_memslot_map_unmap_check(struct vm_data *data, - uint64_t offsp, uint64_t valexp) + u64 offsp, u64 valexp) { - uint64_t gpa; - uint64_t *val; - uint32_t guest_page_size = data->vm->page_size; + gpa_t gpa; + u64 *val; + u32 guest_page_size = data->vm->page_size; if (!map_unmap_verify) return; @@ -679,8 +680,8 @@ static void test_memslot_map_unmap_check(struct vm_data *data, static void test_memslot_map_loop(struct vm_data *data, struct sync_area *sync) { - uint32_t guest_page_size = data->vm->page_size; - uint64_t guest_pages = MEM_TEST_MAP_SIZE / guest_page_size; + u32 guest_page_size = data->vm->page_size; + u64 guest_pages = MEM_TEST_MAP_SIZE / guest_page_size; /* * Unmap the second half of the test area while guest writes to (maps) @@ -717,11 +718,11 @@ static void test_memslot_map_loop(struct vm_data *data, struct sync_area *sync) static void test_memslot_unmap_loop_common(struct vm_data *data, struct sync_area *sync, - uint64_t chunk) + u64 chunk) { - uint32_t guest_page_size = data->vm->page_size; - uint64_t guest_pages = MEM_TEST_UNMAP_SIZE / guest_page_size; - uint64_t ctr; + u32 guest_page_size = data->vm->page_size; + u64 guest_pages = MEM_TEST_UNMAP_SIZE / guest_page_size; + u64 ctr; /* * Wait for the guest to finish mapping page(s) in the first half @@ -745,9 +746,9 @@ static void test_memslot_unmap_loop_common(struct vm_data *data, static void test_memslot_unmap_loop(struct vm_data *data, struct sync_area *sync) { - uint32_t host_page_size = getpagesize(); - uint32_t guest_page_size = data->vm->page_size; - uint64_t guest_chunk_pages = guest_page_size >= host_page_size ? + u32 host_page_size = getpagesize(); + u32 guest_page_size = data->vm->page_size; + u64 guest_chunk_pages = guest_page_size >= host_page_size ? 1 : host_page_size / guest_page_size; test_memslot_unmap_loop_common(data, sync, guest_chunk_pages); @@ -756,27 +757,27 @@ static void test_memslot_unmap_loop(struct vm_data *data, static void test_memslot_unmap_loop_chunked(struct vm_data *data, struct sync_area *sync) { - uint32_t guest_page_size = data->vm->page_size; - uint64_t guest_chunk_pages = MEM_TEST_UNMAP_CHUNK_SIZE / guest_page_size; + u32 guest_page_size = data->vm->page_size; + u64 guest_chunk_pages = MEM_TEST_UNMAP_CHUNK_SIZE / guest_page_size; test_memslot_unmap_loop_common(data, sync, guest_chunk_pages); } static void test_memslot_rw_loop(struct vm_data *data, struct sync_area *sync) { - uint64_t gptr; - uint32_t guest_page_size = data->vm->page_size; + u64 gptr; + u32 guest_page_size = data->vm->page_size; for (gptr = MEM_TEST_GPA + guest_page_size / 2; gptr < MEM_TEST_GPA + MEM_TEST_SIZE; gptr += guest_page_size) - *(uint64_t *)vm_gpa2hva(data, gptr, NULL) = MEM_TEST_VAL_2; + *(u64 *)vm_gpa2hva(data, gptr, NULL) = MEM_TEST_VAL_2; host_perform_sync(sync); for (gptr = MEM_TEST_GPA; gptr < MEM_TEST_GPA + MEM_TEST_SIZE; gptr += guest_page_size) { - uint64_t *vptr = (typeof(vptr))vm_gpa2hva(data, gptr, NULL); - uint64_t val = *vptr; + u64 *vptr = (typeof(vptr))vm_gpa2hva(data, gptr, NULL); + u64 val = *vptr; TEST_ASSERT(val == MEM_TEST_VAL_1, "Guest written values should read back correctly (is %"PRIu64" @ %"PRIx64")", @@ -789,21 +790,21 @@ static void test_memslot_rw_loop(struct vm_data *data, struct sync_area *sync) struct test_data { const char *name; - uint64_t mem_size; + u64 mem_size; void (*guest_code)(void); bool (*prepare)(struct vm_data *data, struct sync_area *sync, - uint64_t *maxslots); + u64 *maxslots); void (*loop)(struct vm_data *data, struct sync_area *sync); }; -static bool test_execute(int nslots, uint64_t *maxslots, +static bool test_execute(int nslots, u64 *maxslots, unsigned int maxtime, const struct test_data *tdata, - uint64_t *nloops, + u64 *nloops, struct timespec *slot_runtime, struct timespec *guest_runtime) { - uint64_t mem_size = tdata->mem_size ? : MEM_SIZE; + u64 mem_size = tdata->mem_size ? : MEM_SIZE; struct vm_data *data; struct sync_area *sync; struct timespec tstart; @@ -923,8 +924,8 @@ static void help(char *name, struct test_args *targs) static bool check_memory_sizes(void) { - uint32_t host_page_size = getpagesize(); - uint32_t guest_page_size = vm_guest_mode_params[VM_MODE_DEFAULT].page_size; + u32 host_page_size = getpagesize(); + u32 guest_page_size = vm_guest_mode_params[VM_MODE_DEFAULT].page_size; if (host_page_size > SZ_64K || guest_page_size > SZ_64K) { pr_info("Unsupported page size on host (0x%x) or guest (0x%x)\n", @@ -960,7 +961,7 @@ static bool check_memory_sizes(void) static bool parse_args(int argc, char *argv[], struct test_args *targs) { - uint32_t max_mem_slots; + u32 max_mem_slots; int opt; while ((opt = getopt(argc, argv, "hvdqs:f:e:l:r:")) != -1) { @@ -1039,8 +1040,8 @@ static bool parse_args(int argc, char *argv[], struct test_result { struct timespec slot_runtime, guest_runtime, iter_runtime; - int64_t slottimens, runtimens; - uint64_t nloops; + s64 slottimens, runtimens; + u64 nloops; }; static bool test_loop(const struct test_data *data, @@ -1048,7 +1049,7 @@ static bool test_loop(const struct test_data *data, struct test_result *rbestslottime, struct test_result *rbestruntime) { - uint64_t maxslots; + u64 maxslots; struct test_result result = {}; if (!test_execute(targs->nslots, &maxslots, targs->seconds, data, diff --git a/tools/testing/selftests/kvm/mmu_stress_test.c b/tools/testing/selftests/kvm/mmu_stress_test.c index 6a437d2be9fa..54d281419d31 100644 --- a/tools/testing/selftests/kvm/mmu_stress_test.c +++ b/tools/testing/selftests/kvm/mmu_stress_test.c @@ -20,19 +20,19 @@ static bool mprotect_ro_done; static bool all_vcpus_hit_ro_fault; -static void guest_code(uint64_t start_gpa, uint64_t end_gpa, uint64_t stride) +static void guest_code(u64 start_gpa, u64 end_gpa, u64 stride) { - uint64_t gpa; + gpa_t gpa; int i; for (i = 0; i < 2; i++) { for (gpa = start_gpa; gpa < end_gpa; gpa += stride) - vcpu_arch_put_guest(*((volatile uint64_t *)gpa), gpa); + vcpu_arch_put_guest(*((volatile u64 *)gpa), gpa); GUEST_SYNC(i); } for (gpa = start_gpa; gpa < end_gpa; gpa += stride) - *((volatile uint64_t *)gpa); + *((volatile u64 *)gpa); GUEST_SYNC(2); /* @@ -55,7 +55,7 @@ static void guest_code(uint64_t start_gpa, uint64_t end_gpa, uint64_t stride) #elif defined(__aarch64__) asm volatile("str %0, [%0]" :: "r" (gpa) : "memory"); #else - vcpu_arch_put_guest(*((volatile uint64_t *)gpa), gpa); + vcpu_arch_put_guest(*((volatile u64 *)gpa), gpa); #endif } while (!READ_ONCE(mprotect_ro_done) || !READ_ONCE(all_vcpus_hit_ro_fault)); @@ -68,7 +68,7 @@ static void guest_code(uint64_t start_gpa, uint64_t end_gpa, uint64_t stride) #endif for (gpa = start_gpa; gpa < end_gpa; gpa += stride) - vcpu_arch_put_guest(*((volatile uint64_t *)gpa), gpa); + vcpu_arch_put_guest(*((volatile u64 *)gpa), gpa); GUEST_SYNC(4); GUEST_ASSERT(0); @@ -76,8 +76,8 @@ static void guest_code(uint64_t start_gpa, uint64_t end_gpa, uint64_t stride) struct vcpu_info { struct kvm_vcpu *vcpu; - uint64_t start_gpa; - uint64_t end_gpa; + u64 start_gpa; + u64 end_gpa; }; static int nr_vcpus; @@ -203,10 +203,10 @@ static void *vcpu_worker(void *data) } static pthread_t *spawn_workers(struct kvm_vm *vm, struct kvm_vcpu **vcpus, - uint64_t start_gpa, uint64_t end_gpa) + u64 start_gpa, u64 end_gpa) { struct vcpu_info *info; - uint64_t gpa, nr_bytes; + gpa_t gpa, nr_bytes; pthread_t *threads; int i; @@ -217,7 +217,7 @@ static pthread_t *spawn_workers(struct kvm_vm *vm, struct kvm_vcpu **vcpus, TEST_ASSERT(info, "Failed to allocate vCPU gpa ranges"); nr_bytes = ((end_gpa - start_gpa) / nr_vcpus) & - ~((uint64_t)vm->page_size - 1); + ~((u64)vm->page_size - 1); TEST_ASSERT(nr_bytes, "C'mon, no way you have %d CPUs", nr_vcpus); for (i = 0, gpa = start_gpa; i < nr_vcpus; i++, gpa += nr_bytes) { @@ -263,8 +263,10 @@ static void calc_default_nr_vcpus(void) TEST_ASSERT(!r, "sched_getaffinity failed, errno = %d (%s)", errno, strerror(errno)); - nr_vcpus = CPU_COUNT(&possible_mask) * 3/4; + nr_vcpus = CPU_COUNT(&possible_mask); TEST_ASSERT(nr_vcpus > 0, "Uh, no CPUs?"); + if (nr_vcpus >= 2) + nr_vcpus = nr_vcpus * 3/4; } int main(int argc, char *argv[]) @@ -276,11 +278,11 @@ int main(int argc, char *argv[]) * just below the 4gb boundary. This test could create memory at * 1gb-3gb,but it's simpler to skip straight to 4gb. */ - const uint64_t start_gpa = SZ_4G; + const u64 start_gpa = SZ_4G; const int first_slot = 1; struct timespec time_start, time_run1, time_reset, time_run2, time_ro, time_rw; - uint64_t max_gpa, gpa, slot_size, max_mem, i; + u64 max_gpa, gpa, slot_size, max_mem, i; int max_slots, slot, opt, fd; bool hugepages = false; struct kvm_vcpu **vcpus; @@ -339,14 +341,13 @@ int main(int argc, char *argv[]) TEST_ASSERT(max_gpa > (4 * slot_size), "MAXPHYADDR <4gb "); fd = kvm_memfd_alloc(slot_size, hugepages); - mem = mmap(NULL, slot_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); - TEST_ASSERT(mem != MAP_FAILED, "mmap() failed"); + mem = kvm_mmap(slot_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd); TEST_ASSERT(!madvise(mem, slot_size, MADV_NOHUGEPAGE), "madvise() failed"); /* Pre-fault the memory to avoid taking mmap_sem on guest page faults. */ for (i = 0; i < slot_size; i += vm->page_size) - ((uint8_t *)mem)[i] = 0xaa; + ((u8 *)mem)[i] = 0xaa; gpa = 0; for (slot = first_slot; slot < max_slots; slot++) { @@ -361,11 +362,9 @@ int main(int argc, char *argv[]) #ifdef __x86_64__ /* Identity map memory in the guest using 1gb pages. */ - for (i = 0; i < slot_size; i += SZ_1G) - __virt_pg_map(vm, gpa + i, gpa + i, PG_LEVEL_1G); + virt_map_level(vm, gpa, gpa, slot_size, PG_LEVEL_1G); #else - for (i = 0; i < slot_size; i += vm->page_size) - virt_pg_map(vm, gpa + i, gpa + i); + virt_map(vm, gpa, gpa, slot_size >> vm->page_shift); #endif } @@ -413,7 +412,7 @@ int main(int argc, char *argv[]) for (slot = (slot - 1) & ~1ull; slot >= first_slot; slot -= 2) vm_set_user_memory_region(vm, slot, 0, 0, 0, NULL); - munmap(mem, slot_size / 2); + kvm_munmap(mem, slot_size / 2); /* Sanity check that the vCPUs actually ran. */ for (i = 0; i < nr_vcpus; i++) diff --git a/tools/testing/selftests/kvm/pre_fault_memory_test.c b/tools/testing/selftests/kvm/pre_fault_memory_test.c index 0350a8896a2f..fcb57fd034e6 100644 --- a/tools/testing/selftests/kvm/pre_fault_memory_test.c +++ b/tools/testing/selftests/kvm/pre_fault_memory_test.c @@ -10,19 +10,20 @@ #include <test_util.h> #include <kvm_util.h> #include <processor.h> +#include <pthread.h> /* Arbitrarily chosen values */ #define TEST_SIZE (SZ_2M + PAGE_SIZE) #define TEST_NPAGES (TEST_SIZE / PAGE_SIZE) #define TEST_SLOT 10 -static void guest_code(uint64_t base_gpa) +static void guest_code(u64 base_gva) { - volatile uint64_t val __used; + volatile u64 val __used; int i; for (i = 0; i < TEST_NPAGES; i++) { - uint64_t *src = (uint64_t *)(base_gpa + i * PAGE_SIZE); + u64 *src = (u64 *)(base_gva + i * PAGE_SIZE); val = *src; } @@ -30,18 +31,66 @@ static void guest_code(uint64_t base_gpa) GUEST_DONE(); } -static void pre_fault_memory(struct kvm_vcpu *vcpu, u64 gpa, u64 size, - u64 left) +struct slot_worker_data { + struct kvm_vm *vm; + gpa_t gpa; + u32 flags; + bool worker_ready; + bool prefault_ready; + bool recreate_slot; +}; + +static void *delete_slot_worker(void *__data) +{ + struct slot_worker_data *data = __data; + struct kvm_vm *vm = data->vm; + + WRITE_ONCE(data->worker_ready, true); + + while (!READ_ONCE(data->prefault_ready)) + cpu_relax(); + + vm_mem_region_delete(vm, TEST_SLOT); + + while (!READ_ONCE(data->recreate_slot)) + cpu_relax(); + + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, data->gpa, + TEST_SLOT, TEST_NPAGES, data->flags); + + return NULL; +} + +static void pre_fault_memory(struct kvm_vcpu *vcpu, u64 base_gpa, u64 offset, + u64 size, u64 expected_left, bool private) { struct kvm_pre_fault_memory range = { - .gpa = gpa, + .gpa = base_gpa + offset, .size = size, .flags = 0, }; - u64 prev; + struct slot_worker_data data = { + .vm = vcpu->vm, + .gpa = base_gpa, + .flags = private ? KVM_MEM_GUEST_MEMFD : 0, + }; + bool slot_recreated = false; + pthread_t slot_worker; int ret, save_errno; + u64 prev; + + /* + * Concurrently delete (and recreate) the slot to test KVM's handling + * of a racing memslot deletion with prefaulting. + */ + pthread_create(&slot_worker, NULL, delete_slot_worker, &data); + + while (!READ_ONCE(data.worker_ready)) + cpu_relax(); - do { + WRITE_ONCE(data.prefault_ready, true); + + for (;;) { prev = range.size; ret = __vcpu_ioctl(vcpu, KVM_PRE_FAULT_MEMORY, &range); save_errno = errno; @@ -49,22 +98,70 @@ static void pre_fault_memory(struct kvm_vcpu *vcpu, u64 gpa, u64 size, "%sexpecting range.size to change on %s", ret < 0 ? "not " : "", ret < 0 ? "failure" : "success"); - } while (ret >= 0 ? range.size : save_errno == EINTR); - TEST_ASSERT(range.size == left, - "Completed with %lld bytes left, expected %" PRId64, - range.size, left); + /* + * Immediately retry prefaulting if KVM was interrupted by an + * unrelated signal/event. + */ + if (ret < 0 && save_errno == EINTR) + continue; + + /* + * Tell the worker to recreate the slot in order to complete + * prefaulting (if prefault didn't already succeed before the + * slot was deleted) and/or to prepare for the next testcase. + * Wait for the worker to exit so that the next invocation of + * prefaulting is guaranteed to complete (assuming no KVM bugs). + */ + if (!slot_recreated) { + WRITE_ONCE(data.recreate_slot, true); + pthread_join(slot_worker, NULL); + slot_recreated = true; + + /* + * Retry prefaulting to get a stable result, i.e. to + * avoid seeing random EAGAIN failures. Don't retry if + * prefaulting already succeeded, as KVM disallows + * prefaulting with size=0, i.e. blindly retrying would + * result in test failures due to EINVAL. KVM should + * always return success if all bytes are prefaulted, + * i.e. there is no need to guard against EAGAIN being + * returned. + */ + if (range.size) + continue; + } + + /* + * All done if there are no remaining bytes to prefault, or if + * prefaulting failed (EINTR was handled above, and EAGAIN due + * to prefaulting a memslot that's being actively deleted should + * be impossible since the memslot has already been recreated). + */ + if (!range.size || ret < 0) + break; + } - if (left == 0) - __TEST_ASSERT_VM_VCPU_IOCTL(!ret, "KVM_PRE_FAULT_MEMORY", ret, vcpu->vm); + TEST_ASSERT(range.size == expected_left, + "Completed with %llu bytes left, expected %lu", + range.size, expected_left); + + /* + * Assert success if prefaulting the entire range should succeed, i.e. + * complete with no bytes remaining. Otherwise prefaulting should have + * failed due to ENOENT (due to RET_PF_EMULATE for emulated MMIO when + * no memslot exists). + */ + if (!expected_left) + TEST_ASSERT_VM_VCPU_IOCTL(!ret, KVM_PRE_FAULT_MEMORY, ret, vcpu->vm); else - /* No memory slot causes RET_PF_EMULATE. it results in -ENOENT. */ - __TEST_ASSERT_VM_VCPU_IOCTL(ret && save_errno == ENOENT, - "KVM_PRE_FAULT_MEMORY", ret, vcpu->vm); + TEST_ASSERT_VM_VCPU_IOCTL(ret && save_errno == ENOENT, + KVM_PRE_FAULT_MEMORY, ret, vcpu->vm); } static void __test_pre_fault_memory(unsigned long vm_type, bool private) { + gpa_t gpa, gva, alignment, guest_page_size; const struct vm_shape shape = { .mode = VM_MODE_DEFAULT, .type = vm_type, @@ -74,34 +171,26 @@ static void __test_pre_fault_memory(unsigned long vm_type, bool private) struct kvm_vm *vm; struct ucall uc; - uint64_t guest_test_phys_mem; - uint64_t guest_test_virt_mem; - uint64_t alignment, guest_page_size; - vm = vm_create_shape_with_one_vcpu(shape, &vcpu, guest_code); alignment = guest_page_size = vm_guest_mode_params[VM_MODE_DEFAULT].page_size; - guest_test_phys_mem = (vm->max_gfn - TEST_NPAGES) * guest_page_size; -#ifdef __s390x__ - alignment = max(0x100000UL, guest_page_size); -#else + gpa = (vm->max_gfn - TEST_NPAGES) * guest_page_size; alignment = SZ_2M; -#endif - guest_test_phys_mem = align_down(guest_test_phys_mem, alignment); - guest_test_virt_mem = guest_test_phys_mem & ((1ULL << (vm->va_bits - 1)) - 1); + gpa = align_down(gpa, alignment); + gva = gpa & ((1ULL << (vm->va_bits - 1)) - 1); - vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, - guest_test_phys_mem, TEST_SLOT, TEST_NPAGES, - private ? KVM_MEM_GUEST_MEMFD : 0); - virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, TEST_NPAGES); + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, gpa, TEST_SLOT, + TEST_NPAGES, private ? KVM_MEM_GUEST_MEMFD : 0); + virt_map(vm, gva, gpa, TEST_NPAGES); if (private) - vm_mem_set_private(vm, guest_test_phys_mem, TEST_SIZE); - pre_fault_memory(vcpu, guest_test_phys_mem, SZ_2M, 0); - pre_fault_memory(vcpu, guest_test_phys_mem + SZ_2M, PAGE_SIZE * 2, PAGE_SIZE); - pre_fault_memory(vcpu, guest_test_phys_mem + TEST_SIZE, PAGE_SIZE, PAGE_SIZE); + vm_mem_set_private(vm, gpa, TEST_SIZE); + + pre_fault_memory(vcpu, gpa, 0, SZ_2M, 0, private); + pre_fault_memory(vcpu, gpa, SZ_2M, PAGE_SIZE * 2, PAGE_SIZE, private); + pre_fault_memory(vcpu, gpa, TEST_SIZE, PAGE_SIZE, PAGE_SIZE, private); - vcpu_args_set(vcpu, 1, guest_test_virt_mem); + vcpu_args_set(vcpu, 1, gva); vcpu_run(vcpu); run = vcpu->run; diff --git a/tools/testing/selftests/kvm/riscv/arch_timer.c b/tools/testing/selftests/kvm/riscv/arch_timer.c index 9e370800a6a2..d67c918ee310 100644 --- a/tools/testing/selftests/kvm/riscv/arch_timer.c +++ b/tools/testing/selftests/kvm/riscv/arch_timer.c @@ -15,11 +15,11 @@ static int timer_irq = IRQ_S_TIMER; -static void guest_irq_handler(struct ex_regs *regs) +static void guest_irq_handler(struct pt_regs *regs) { - uint64_t xcnt, xcnt_diff_us, cmp; + u64 xcnt, xcnt_diff_us, cmp; unsigned int intid = regs->cause & ~CAUSE_IRQ_FLAG; - uint32_t cpu = guest_get_vcpuid(); + u32 cpu = guest_get_vcpuid(); struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu]; timer_irq_disable(); @@ -40,7 +40,7 @@ static void guest_irq_handler(struct ex_regs *regs) static void guest_run(struct test_vcpu_shared_data *shared_data) { - uint32_t irq_iter, config_iter; + u32 irq_iter, config_iter; shared_data->nr_iter = 0; shared_data->guest_stage = 0; @@ -66,7 +66,7 @@ static void guest_run(struct test_vcpu_shared_data *shared_data) static void guest_code(void) { - uint32_t cpu = guest_get_vcpuid(); + u32 cpu = guest_get_vcpuid(); struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu]; timer_irq_disable(); diff --git a/tools/testing/selftests/kvm/riscv/ebreak_test.c b/tools/testing/selftests/kvm/riscv/ebreak_test.c index cfed6c727bfc..3f44b045a22e 100644 --- a/tools/testing/selftests/kvm/riscv/ebreak_test.c +++ b/tools/testing/selftests/kvm/riscv/ebreak_test.c @@ -8,10 +8,10 @@ #include "kvm_util.h" #include "ucall_common.h" -#define LABEL_ADDRESS(v) ((uint64_t)&(v)) +#define LABEL_ADDRESS(v) ((u64)&(v)) extern unsigned char sw_bp_1, sw_bp_2; -static uint64_t sw_bp_addr; +static u64 sw_bp_addr; static void guest_code(void) { @@ -27,7 +27,7 @@ static void guest_code(void) GUEST_DONE(); } -static void guest_breakpoint_handler(struct ex_regs *regs) +static void guest_breakpoint_handler(struct pt_regs *regs) { WRITE_ONCE(sw_bp_addr, regs->epc); regs->epc += 4; @@ -37,7 +37,7 @@ int main(void) { struct kvm_vm *vm; struct kvm_vcpu *vcpu; - uint64_t pc; + u64 pc; struct kvm_guest_debug debug = { .control = KVM_GUESTDBG_ENABLE, }; diff --git a/tools/testing/selftests/kvm/riscv/get-reg-list.c b/tools/testing/selftests/kvm/riscv/get-reg-list.c index 8515921dfdbf..8d6fdb5d38b8 100644 --- a/tools/testing/selftests/kvm/riscv/get-reg-list.c +++ b/tools/testing/selftests/kvm/riscv/get-reg-list.c @@ -17,6 +17,15 @@ enum { VCPU_FEATURE_SBI_EXT, }; +enum { + KVM_RISC_V_REG_OFFSET_VSTART = 0, + KVM_RISC_V_REG_OFFSET_VL, + KVM_RISC_V_REG_OFFSET_VTYPE, + KVM_RISC_V_REG_OFFSET_VCSR, + KVM_RISC_V_REG_OFFSET_VLENB, + KVM_RISC_V_REG_OFFSET_MAX, +}; + static bool isa_ext_cant_disable[KVM_RISCV_ISA_EXT_MAX]; bool filter_reg(__u64 reg) @@ -53,8 +62,11 @@ bool filter_reg(__u64 reg) case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVNAPOT: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVPBMT: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVVPTC: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZAAMO: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZABHA: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZACAS: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZALASR: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZALRSC: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZAWRS: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBA: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBB: @@ -67,11 +79,14 @@ bool filter_reg(__u64 reg) case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZCB: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZCD: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZCF: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZCLSD: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZCMOP: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZFA: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZFBFMIN: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZFH: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZFHMIN: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOM: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOP: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOZ: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICCRSE: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICNTR: @@ -81,6 +96,7 @@ bool filter_reg(__u64 reg) case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZIHINTNTL: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZIHINTPAUSE: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZIHPM: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZILSD: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZIMOP: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZKND: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZKNE: @@ -92,6 +108,8 @@ bool filter_reg(__u64 reg) case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZTSO: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVBB: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVBC: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVFBFMIN: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVFBFWMA: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVFH: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVFHMIN: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVKB: @@ -117,6 +135,8 @@ bool filter_reg(__u64 reg) case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_DBCN: case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_SUSP: case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_STA: + case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_FWFT: + case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_MPXY: case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_EXPERIMENTAL: case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_VENDOR: return true; @@ -141,11 +161,43 @@ bool check_reject_set(int err) return err == EINVAL; } +static int override_vector_reg_size(struct kvm_vcpu *vcpu, struct vcpu_reg_sublist *s, + u64 feature) +{ + unsigned long vlenb_reg = 0; + int rc; + u64 reg, size; + + /* Enable V extension so that we can get the vlenb register */ + rc = __vcpu_set_reg(vcpu, feature, 1); + if (rc) + return rc; + + vlenb_reg = vcpu_get_reg(vcpu, s->regs[KVM_RISC_V_REG_OFFSET_VLENB]); + if (!vlenb_reg) { + TEST_FAIL("Can't compute vector register size from zero vlenb\n"); + return -EPERM; + } + + size = __builtin_ctzl(vlenb_reg); + size <<= KVM_REG_SIZE_SHIFT; + + for (int i = 0; i < 32; i++) { + reg = KVM_REG_RISCV | KVM_REG_RISCV_VECTOR | size | KVM_REG_RISCV_VECTOR_REG(i); + s->regs[KVM_RISC_V_REG_OFFSET_MAX + i] = reg; + } + + /* We should assert if disabling failed here while enabling succeeded before */ + vcpu_set_reg(vcpu, feature, 0); + + return 0; +} + void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c) { unsigned long isa_ext_state[KVM_RISCV_ISA_EXT_MAX] = { 0 }; struct vcpu_reg_sublist *s; - uint64_t feature; + u64 feature; int rc; for (int i = 0; i < KVM_RISCV_ISA_EXT_MAX; i++) @@ -170,6 +222,13 @@ void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c) if (!s->feature) continue; + if (s->feature == KVM_RISCV_ISA_EXT_V) { + feature = RISCV_ISA_EXT_REG(s->feature); + rc = override_vector_reg_size(vcpu, s, feature); + if (rc) + goto skip; + } + switch (s->feature_type) { case VCPU_FEATURE_ISA_EXT: feature = RISCV_ISA_EXT_REG(s->feature); @@ -184,6 +243,7 @@ void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c) /* Try to enable the desired extension */ __vcpu_set_reg(vcpu, feature, 1); +skip: /* Double check whether the desired extension was enabled */ __TEST_REQUIRE(__vcpu_has_ext(vcpu, feature), "%s not available, skipping tests", s->name); @@ -204,6 +264,8 @@ static const char *config_id_to_str(const char *prefix, __u64 id) return "KVM_REG_RISCV_CONFIG_REG(zicbom_block_size)"; case KVM_REG_RISCV_CONFIG_REG(zicboz_block_size): return "KVM_REG_RISCV_CONFIG_REG(zicboz_block_size)"; + case KVM_REG_RISCV_CONFIG_REG(zicbop_block_size): + return "KVM_REG_RISCV_CONFIG_REG(zicbop_block_size)"; case KVM_REG_RISCV_CONFIG_REG(mvendorid): return "KVM_REG_RISCV_CONFIG_REG(mvendorid)"; case KVM_REG_RISCV_CONFIG_REG(marchid): @@ -408,6 +470,35 @@ static const char *fp_d_id_to_str(const char *prefix, __u64 id) return strdup_printf("%lld /* UNKNOWN */", reg_off); } +static const char *vector_id_to_str(const char *prefix, __u64 id) +{ + /* reg_off is the offset into struct __riscv_v_ext_state */ + __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_VECTOR); + int reg_index = 0; + + assert((id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_VECTOR); + + if (reg_off >= KVM_REG_RISCV_VECTOR_REG(0)) + reg_index = reg_off - KVM_REG_RISCV_VECTOR_REG(0); + switch (reg_off) { + case KVM_REG_RISCV_VECTOR_REG(0) ... + KVM_REG_RISCV_VECTOR_REG(31): + return strdup_printf("KVM_REG_RISCV_VECTOR_REG(%d)", reg_index); + case KVM_REG_RISCV_VECTOR_CSR_REG(vstart): + return "KVM_REG_RISCV_VECTOR_CSR_REG(vstart)"; + case KVM_REG_RISCV_VECTOR_CSR_REG(vl): + return "KVM_REG_RISCV_VECTOR_CSR_REG(vl)"; + case KVM_REG_RISCV_VECTOR_CSR_REG(vtype): + return "KVM_REG_RISCV_VECTOR_CSR_REG(vtype)"; + case KVM_REG_RISCV_VECTOR_CSR_REG(vcsr): + return "KVM_REG_RISCV_VECTOR_CSR_REG(vcsr)"; + case KVM_REG_RISCV_VECTOR_CSR_REG(vlenb): + return "KVM_REG_RISCV_VECTOR_CSR_REG(vlenb)"; + } + + return strdup_printf("%lld /* UNKNOWN */", reg_off); +} + #define KVM_ISA_EXT_ARR(ext) \ [KVM_RISCV_ISA_EXT_##ext] = "KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_" #ext @@ -434,8 +525,11 @@ static const char *isa_ext_single_id_to_str(__u64 reg_off) KVM_ISA_EXT_ARR(SVNAPOT), KVM_ISA_EXT_ARR(SVPBMT), KVM_ISA_EXT_ARR(SVVPTC), + KVM_ISA_EXT_ARR(ZAAMO), KVM_ISA_EXT_ARR(ZABHA), KVM_ISA_EXT_ARR(ZACAS), + KVM_ISA_EXT_ARR(ZALASR), + KVM_ISA_EXT_ARR(ZALRSC), KVM_ISA_EXT_ARR(ZAWRS), KVM_ISA_EXT_ARR(ZBA), KVM_ISA_EXT_ARR(ZBB), @@ -448,11 +542,14 @@ static const char *isa_ext_single_id_to_str(__u64 reg_off) KVM_ISA_EXT_ARR(ZCB), KVM_ISA_EXT_ARR(ZCD), KVM_ISA_EXT_ARR(ZCF), + KVM_ISA_EXT_ARR(ZCLSD), KVM_ISA_EXT_ARR(ZCMOP), KVM_ISA_EXT_ARR(ZFA), + KVM_ISA_EXT_ARR(ZFBFMIN), KVM_ISA_EXT_ARR(ZFH), KVM_ISA_EXT_ARR(ZFHMIN), KVM_ISA_EXT_ARR(ZICBOM), + KVM_ISA_EXT_ARR(ZICBOP), KVM_ISA_EXT_ARR(ZICBOZ), KVM_ISA_EXT_ARR(ZICCRSE), KVM_ISA_EXT_ARR(ZICNTR), @@ -462,6 +559,7 @@ static const char *isa_ext_single_id_to_str(__u64 reg_off) KVM_ISA_EXT_ARR(ZIHINTNTL), KVM_ISA_EXT_ARR(ZIHINTPAUSE), KVM_ISA_EXT_ARR(ZIHPM), + KVM_ISA_EXT_ARR(ZILSD), KVM_ISA_EXT_ARR(ZIMOP), KVM_ISA_EXT_ARR(ZKND), KVM_ISA_EXT_ARR(ZKNE), @@ -473,6 +571,8 @@ static const char *isa_ext_single_id_to_str(__u64 reg_off) KVM_ISA_EXT_ARR(ZTSO), KVM_ISA_EXT_ARR(ZVBB), KVM_ISA_EXT_ARR(ZVBC), + KVM_ISA_EXT_ARR(ZVFBFMIN), + KVM_ISA_EXT_ARR(ZVFBFWMA), KVM_ISA_EXT_ARR(ZVFH), KVM_ISA_EXT_ARR(ZVFHMIN), KVM_ISA_EXT_ARR(ZVKB), @@ -545,6 +645,8 @@ static const char *sbi_ext_single_id_to_str(__u64 reg_off) KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_DBCN), KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_SUSP), KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_STA), + KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_FWFT), + KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_MPXY), KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_EXPERIMENTAL), KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_VENDOR), }; @@ -601,6 +703,19 @@ static const char *sbi_sta_id_to_str(__u64 reg_off) return strdup_printf("KVM_REG_RISCV_SBI_STA | %lld /* UNKNOWN */", reg_off); } +static const char *sbi_fwft_id_to_str(__u64 reg_off) +{ + switch (reg_off) { + case 0: return "KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(misaligned_deleg.enable)"; + case 1: return "KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(misaligned_deleg.flags)"; + case 2: return "KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(misaligned_deleg.value)"; + case 3: return "KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(pointer_masking.enable)"; + case 4: return "KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(pointer_masking.flags)"; + case 5: return "KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(pointer_masking.value)"; + } + return strdup_printf("KVM_REG_RISCV_SBI_FWFT | %lld /* UNKNOWN */", reg_off); +} + static const char *sbi_id_to_str(const char *prefix, __u64 id) { __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_SBI_STATE); @@ -613,6 +728,8 @@ static const char *sbi_id_to_str(const char *prefix, __u64 id) switch (reg_subtype) { case KVM_REG_RISCV_SBI_STA: return sbi_sta_id_to_str(reg_off); + case KVM_REG_RISCV_SBI_FWFT: + return sbi_fwft_id_to_str(reg_off); } return strdup_printf("%lld | %lld /* UNKNOWN */", reg_subtype, reg_off); @@ -635,6 +752,9 @@ void print_reg(const char *prefix, __u64 id) case KVM_REG_SIZE_U128: reg_size = "KVM_REG_SIZE_U128"; break; + case KVM_REG_SIZE_U256: + reg_size = "KVM_REG_SIZE_U256"; + break; default: printf("\tKVM_REG_RISCV | (%lld << KVM_REG_SIZE_SHIFT) | 0x%llx /* UNKNOWN */,\n", (id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT, id & ~REG_MASK); @@ -666,6 +786,10 @@ void print_reg(const char *prefix, __u64 id) printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_FP_D | %s,\n", reg_size, fp_d_id_to_str(prefix, id)); break; + case KVM_REG_RISCV_VECTOR: + printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_VECTOR | %s,\n", + reg_size, vector_id_to_str(prefix, id)); + break; case KVM_REG_RISCV_ISA_EXT: printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_ISA_EXT | %s,\n", reg_size, isa_ext_id_to_str(prefix, id)); @@ -691,10 +815,13 @@ void print_reg(const char *prefix, __u64 id) */ static __u64 base_regs[] = { KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(isa), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(zicbom_block_size), KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(mvendorid), KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(marchid), KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(mimpid), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(zicboz_block_size), KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(satp_mode), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(zicbop_block_size), KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.pc), KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.ra), KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.sp), @@ -770,11 +897,26 @@ static __u64 sbi_sta_regs[] = { KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_STA | KVM_REG_RISCV_SBI_STA_REG(shmem_hi), }; +static __u64 sbi_fwft_regs[] = { + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_FWFT, + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(misaligned_deleg.enable), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(misaligned_deleg.flags), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(misaligned_deleg.value), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(pointer_masking.enable), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(pointer_masking.flags), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_FWFT | KVM_REG_RISCV_SBI_FWFT_REG(pointer_masking.value), +}; + static __u64 zicbom_regs[] = { KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(zicbom_block_size), KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOM, }; +static __u64 zicbop_regs[] = { + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(zicbop_block_size), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOP, +}; + static __u64 zicboz_regs[] = { KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(zicboz_block_size), KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOZ, @@ -870,6 +1012,48 @@ static __u64 fp_d_regs[] = { KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_D, }; +/* Define a default vector registers with length. This will be overwritten at runtime */ +static __u64 vector_regs[] = { + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_CSR_REG(vstart), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_CSR_REG(vl), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_CSR_REG(vtype), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_CSR_REG(vcsr), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_CSR_REG(vlenb), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(0), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(1), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(2), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(3), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(4), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(5), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(6), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(7), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(8), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(9), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(10), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(11), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(12), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(13), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(14), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(15), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(16), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(17), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(18), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(19), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(20), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(21), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(22), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(23), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(24), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(25), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(26), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(27), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(28), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(29), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(30), + KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(31), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_V, +}; + #define SUBLIST_BASE \ {"base", .regs = base_regs, .regs_n = ARRAY_SIZE(base_regs), \ .skips_set = base_skips_set, .skips_set_n = ARRAY_SIZE(base_skips_set),} @@ -879,8 +1063,13 @@ static __u64 fp_d_regs[] = { #define SUBLIST_SBI_STA \ {"sbi-sta", .feature_type = VCPU_FEATURE_SBI_EXT, .feature = KVM_RISCV_SBI_EXT_STA, \ .regs = sbi_sta_regs, .regs_n = ARRAY_SIZE(sbi_sta_regs),} +#define SUBLIST_SBI_FWFT \ + {"sbi-fwft", .feature_type = VCPU_FEATURE_SBI_EXT, .feature = KVM_RISCV_SBI_EXT_FWFT, \ + .regs = sbi_fwft_regs, .regs_n = ARRAY_SIZE(sbi_fwft_regs),} #define SUBLIST_ZICBOM \ {"zicbom", .feature = KVM_RISCV_ISA_EXT_ZICBOM, .regs = zicbom_regs, .regs_n = ARRAY_SIZE(zicbom_regs),} +#define SUBLIST_ZICBOP \ + {"zicbop", .feature = KVM_RISCV_ISA_EXT_ZICBOP, .regs = zicbop_regs, .regs_n = ARRAY_SIZE(zicbop_regs),} #define SUBLIST_ZICBOZ \ {"zicboz", .feature = KVM_RISCV_ISA_EXT_ZICBOZ, .regs = zicboz_regs, .regs_n = ARRAY_SIZE(zicboz_regs),} #define SUBLIST_AIA \ @@ -894,6 +1083,9 @@ static __u64 fp_d_regs[] = { {"fp_d", .feature = KVM_RISCV_ISA_EXT_D, .regs = fp_d_regs, \ .regs_n = ARRAY_SIZE(fp_d_regs),} +#define SUBLIST_V \ + {"v", .feature = KVM_RISCV_ISA_EXT_V, .regs = vector_regs, .regs_n = ARRAY_SIZE(vector_regs),} + #define KVM_ISA_EXT_SIMPLE_CONFIG(ext, extu) \ static __u64 regs_##ext[] = { \ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | \ @@ -958,10 +1150,13 @@ KVM_SBI_EXT_SUBLIST_CONFIG(sta, STA); KVM_SBI_EXT_SIMPLE_CONFIG(pmu, PMU); KVM_SBI_EXT_SIMPLE_CONFIG(dbcn, DBCN); KVM_SBI_EXT_SIMPLE_CONFIG(susp, SUSP); +KVM_SBI_EXT_SIMPLE_CONFIG(mpxy, MPXY); +KVM_SBI_EXT_SUBLIST_CONFIG(fwft, FWFT); KVM_ISA_EXT_SUBLIST_CONFIG(aia, AIA); KVM_ISA_EXT_SUBLIST_CONFIG(fp_f, FP_F); KVM_ISA_EXT_SUBLIST_CONFIG(fp_d, FP_D); +KVM_ISA_EXT_SUBLIST_CONFIG(v, V); KVM_ISA_EXT_SIMPLE_CONFIG(h, H); KVM_ISA_EXT_SIMPLE_CONFIG(smnpm, SMNPM); KVM_ISA_EXT_SUBLIST_CONFIG(smstateen, SMSTATEEN); @@ -974,8 +1169,11 @@ KVM_ISA_EXT_SIMPLE_CONFIG(svinval, SVINVAL); KVM_ISA_EXT_SIMPLE_CONFIG(svnapot, SVNAPOT); KVM_ISA_EXT_SIMPLE_CONFIG(svpbmt, SVPBMT); KVM_ISA_EXT_SIMPLE_CONFIG(svvptc, SVVPTC); +KVM_ISA_EXT_SIMPLE_CONFIG(zaamo, ZAAMO); KVM_ISA_EXT_SIMPLE_CONFIG(zabha, ZABHA); KVM_ISA_EXT_SIMPLE_CONFIG(zacas, ZACAS); +KVM_ISA_EXT_SIMPLE_CONFIG(zalasr, ZALASR); +KVM_ISA_EXT_SIMPLE_CONFIG(zalrsc, ZALRSC); KVM_ISA_EXT_SIMPLE_CONFIG(zawrs, ZAWRS); KVM_ISA_EXT_SIMPLE_CONFIG(zba, ZBA); KVM_ISA_EXT_SIMPLE_CONFIG(zbb, ZBB); @@ -988,11 +1186,14 @@ KVM_ISA_EXT_SIMPLE_CONFIG(zca, ZCA); KVM_ISA_EXT_SIMPLE_CONFIG(zcb, ZCB); KVM_ISA_EXT_SIMPLE_CONFIG(zcd, ZCD); KVM_ISA_EXT_SIMPLE_CONFIG(zcf, ZCF); +KVM_ISA_EXT_SIMPLE_CONFIG(zclsd, ZCLSD); KVM_ISA_EXT_SIMPLE_CONFIG(zcmop, ZCMOP); KVM_ISA_EXT_SIMPLE_CONFIG(zfa, ZFA); +KVM_ISA_EXT_SIMPLE_CONFIG(zfbfmin, ZFBFMIN); KVM_ISA_EXT_SIMPLE_CONFIG(zfh, ZFH); KVM_ISA_EXT_SIMPLE_CONFIG(zfhmin, ZFHMIN); KVM_ISA_EXT_SUBLIST_CONFIG(zicbom, ZICBOM); +KVM_ISA_EXT_SUBLIST_CONFIG(zicbop, ZICBOP); KVM_ISA_EXT_SUBLIST_CONFIG(zicboz, ZICBOZ); KVM_ISA_EXT_SIMPLE_CONFIG(ziccrse, ZICCRSE); KVM_ISA_EXT_SIMPLE_CONFIG(zicntr, ZICNTR); @@ -1002,6 +1203,7 @@ KVM_ISA_EXT_SIMPLE_CONFIG(zifencei, ZIFENCEI); KVM_ISA_EXT_SIMPLE_CONFIG(zihintntl, ZIHINTNTL); KVM_ISA_EXT_SIMPLE_CONFIG(zihintpause, ZIHINTPAUSE); KVM_ISA_EXT_SIMPLE_CONFIG(zihpm, ZIHPM); +KVM_ISA_EXT_SIMPLE_CONFIG(zilsd, ZILSD); KVM_ISA_EXT_SIMPLE_CONFIG(zimop, ZIMOP); KVM_ISA_EXT_SIMPLE_CONFIG(zknd, ZKND); KVM_ISA_EXT_SIMPLE_CONFIG(zkne, ZKNE); @@ -1013,6 +1215,8 @@ KVM_ISA_EXT_SIMPLE_CONFIG(zkt, ZKT); KVM_ISA_EXT_SIMPLE_CONFIG(ztso, ZTSO); KVM_ISA_EXT_SIMPLE_CONFIG(zvbb, ZVBB); KVM_ISA_EXT_SIMPLE_CONFIG(zvbc, ZVBC); +KVM_ISA_EXT_SIMPLE_CONFIG(zvfbfmin, ZVFBFMIN); +KVM_ISA_EXT_SIMPLE_CONFIG(zvfbfwma, ZVFBFWMA); KVM_ISA_EXT_SIMPLE_CONFIG(zvfh, ZVFH); KVM_ISA_EXT_SIMPLE_CONFIG(zvfhmin, ZVFHMIN); KVM_ISA_EXT_SIMPLE_CONFIG(zvkb, ZVKB); @@ -1030,10 +1234,13 @@ struct vcpu_reg_list *vcpu_configs[] = { &config_sbi_pmu, &config_sbi_dbcn, &config_sbi_susp, + &config_sbi_mpxy, + &config_sbi_fwft, &config_aia, &config_fp_f, &config_fp_d, &config_h, + &config_v, &config_smnpm, &config_smstateen, &config_sscofpmf, @@ -1045,8 +1252,11 @@ struct vcpu_reg_list *vcpu_configs[] = { &config_svnapot, &config_svpbmt, &config_svvptc, + &config_zaamo, &config_zabha, &config_zacas, + &config_zalrsc, + &config_zalasr, &config_zawrs, &config_zba, &config_zbb, @@ -1059,11 +1269,14 @@ struct vcpu_reg_list *vcpu_configs[] = { &config_zcb, &config_zcd, &config_zcf, + &config_zclsd, &config_zcmop, &config_zfa, + &config_zfbfmin, &config_zfh, &config_zfhmin, &config_zicbom, + &config_zicbop, &config_zicboz, &config_ziccrse, &config_zicntr, @@ -1073,6 +1286,7 @@ struct vcpu_reg_list *vcpu_configs[] = { &config_zihintntl, &config_zihintpause, &config_zihpm, + &config_zilsd, &config_zimop, &config_zknd, &config_zkne, @@ -1084,6 +1298,8 @@ struct vcpu_reg_list *vcpu_configs[] = { &config_ztso, &config_zvbb, &config_zvbc, + &config_zvfbfmin, + &config_zvfbfwma, &config_zvfh, &config_zvfhmin, &config_zvkb, diff --git a/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c b/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c index f45c0ecc902d..e56a3dd6a51e 100644 --- a/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c +++ b/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c @@ -24,7 +24,7 @@ union sbi_pmu_ctr_info ctrinfo_arr[RISCV_MAX_PMU_COUNTERS]; /* Snapshot shared memory data */ #define PMU_SNAPSHOT_GPA_BASE BIT(30) static void *snapshot_gva; -static vm_paddr_t snapshot_gpa; +static gpa_t snapshot_gpa; static int vcpu_shared_irq_count; static int counter_in_use; @@ -39,7 +39,13 @@ static bool illegal_handler_invoked; #define SBI_PMU_TEST_SNAPSHOT BIT(2) #define SBI_PMU_TEST_OVERFLOW BIT(3) -static int disabled_tests; +#define SBI_PMU_OVERFLOW_IRQNUM_DEFAULT 5 +struct test_args { + int disabled_tests; + int overflow_irqnum; +}; + +static struct test_args targs; unsigned long pmu_csr_read_num(int csr_num) { @@ -67,7 +73,6 @@ unsigned long pmu_csr_read_num(int csr_num) switch (csr_num) { switchcase_csr_read_32(CSR_CYCLE, ret) - switchcase_csr_read_32(CSR_CYCLEH, ret) default : break; } @@ -81,7 +86,7 @@ unsigned long pmu_csr_read_num(int csr_num) #undef switchcase_csr_read } -static inline void dummy_func_loop(uint64_t iter) +static inline void dummy_func_loop(u64 iter) { int i = 0; @@ -118,26 +123,44 @@ static void stop_counter(unsigned long counter, unsigned long stop_flags) ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, counter, 1, stop_flags, 0, 0, 0); - __GUEST_ASSERT(ret.error == 0, "Unable to stop counter %ld error %ld\n", - counter, ret.error); + __GUEST_ASSERT(ret.error == 0 || ret.error == SBI_ERR_ALREADY_STOPPED, + "Unable to stop counter %ld error %ld\n", counter, ret.error); } -static void guest_illegal_exception_handler(struct ex_regs *regs) +static void guest_illegal_exception_handler(struct pt_regs *regs) { + unsigned long insn; + int opcode, csr_num, funct3; + __GUEST_ASSERT(regs->cause == EXC_INST_ILLEGAL, "Unexpected exception handler %lx\n", regs->cause); + insn = regs->badaddr; + opcode = (insn & INSN_OPCODE_MASK) >> INSN_OPCODE_SHIFT; + __GUEST_ASSERT(opcode == INSN_OPCODE_SYSTEM, + "Unexpected instruction with opcode 0x%x insn 0x%lx\n", opcode, insn); + + csr_num = GET_CSR_NUM(insn); + funct3 = GET_RM(insn); + /* Validate if it is a CSR read/write operation */ + __GUEST_ASSERT(funct3 <= 7 && (funct3 != 0 && funct3 != 4), + "Unexpected system opcode with funct3 0x%x csr_num 0x%x\n", + funct3, csr_num); + + /* Validate if it is a HPMCOUNTER CSR operation */ + __GUEST_ASSERT((csr_num >= CSR_CYCLE && csr_num <= CSR_HPMCOUNTER31), + "Unexpected csr_num 0x%x\n", csr_num); + illegal_handler_invoked = true; /* skip the trapping instruction */ regs->epc += 4; } -static void guest_irq_handler(struct ex_regs *regs) +static void guest_irq_handler(struct pt_regs *regs) { unsigned int irq_num = regs->cause & ~CAUSE_IRQ_FLAG; struct riscv_pmu_snapshot_data *snapshot_data = snapshot_gva; unsigned long overflown_mask; - unsigned long counter_val = 0; /* Validate that we are in the correct irq handler */ GUEST_ASSERT_EQ(irq_num, IRQ_PMU_OVF); @@ -151,10 +174,6 @@ static void guest_irq_handler(struct ex_regs *regs) GUEST_ASSERT(overflown_mask & 0x01); WRITE_ONCE(vcpu_shared_irq_count, vcpu_shared_irq_count+1); - - counter_val = READ_ONCE(snapshot_data->ctr_values[0]); - /* Now start the counter to mimick the real driver behavior */ - start_counter(counter_in_use, SBI_PMU_START_FLAG_SET_INIT_VALUE, counter_val); } static unsigned long get_counter_index(unsigned long cbase, unsigned long cmask, @@ -240,7 +259,7 @@ static inline void verify_sbi_requirement_assert(void) __GUEST_ASSERT(0, "SBI implementation version doesn't support PMU Snapshot"); } -static void snapshot_set_shmem(vm_paddr_t gpa, unsigned long flags) +static void snapshot_set_shmem(gpa_t gpa, unsigned long flags) { unsigned long lo = (unsigned long)gpa; #if __riscv_xlen == 32 @@ -417,6 +436,7 @@ static void test_pmu_basic_sanity(void) struct sbiret ret; int num_counters = 0, i; union sbi_pmu_ctr_info ctrinfo; + unsigned long fw_eidx; probe = guest_sbi_probe_extension(SBI_EXT_PMU, &out_val); GUEST_ASSERT(probe && out_val == 1); @@ -442,7 +462,24 @@ static void test_pmu_basic_sanity(void) pmu_csr_read_num(ctrinfo.csr); GUEST_ASSERT(illegal_handler_invoked); } else if (ctrinfo.type == SBI_PMU_CTR_TYPE_FW) { - read_fw_counter(i, ctrinfo); + /* Read without configure should fail */ + ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_FW_READ, + i, 0, 0, 0, 0, 0); + GUEST_ASSERT(ret.error == SBI_ERR_INVALID_PARAM); + + /* + * Try to configure with a common firmware event. + * If configuration succeeds, verify we can read it. + */ + fw_eidx = ((unsigned long)SBI_PMU_EVENT_TYPE_FW << 16) | + SBI_PMU_FW_ACCESS_LOAD; + + ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_CFG_MATCH, + i, 1, 0, fw_eidx, 0, 0); + if (ret.error == 0) { + GUEST_ASSERT(ret.value == i); + read_fw_counter(i, ctrinfo); + } } } @@ -479,7 +516,7 @@ static void test_pmu_events_snaphost(void) static void test_pmu_events_overflow(void) { - int num_counters = 0; + int num_counters = 0, i = 0; /* Verify presence of SBI PMU and minimum requrired SBI version */ verify_sbi_requirement_assert(); @@ -496,11 +533,15 @@ static void test_pmu_events_overflow(void) * Qemu supports overflow for cycle/instruction. * This test may fail on any platform that do not support overflow for these two events. */ - test_pmu_event_overflow(SBI_PMU_HW_CPU_CYCLES); - GUEST_ASSERT_EQ(vcpu_shared_irq_count, 1); + for (i = 0; i < targs.overflow_irqnum; i++) + test_pmu_event_overflow(SBI_PMU_HW_CPU_CYCLES); + GUEST_ASSERT_EQ(vcpu_shared_irq_count, targs.overflow_irqnum); - test_pmu_event_overflow(SBI_PMU_HW_INSTRUCTIONS); - GUEST_ASSERT_EQ(vcpu_shared_irq_count, 2); + vcpu_shared_irq_count = 0; + + for (i = 0; i < targs.overflow_irqnum; i++) + test_pmu_event_overflow(SBI_PMU_HW_INSTRUCTIONS); + GUEST_ASSERT_EQ(vcpu_shared_irq_count, targs.overflow_irqnum); GUEST_DONE(); } @@ -569,7 +610,7 @@ static void test_vm_setup_snapshot_mem(struct kvm_vm *vm, struct kvm_vcpu *vcpu) virt_map(vm, PMU_SNAPSHOT_GPA_BASE, PMU_SNAPSHOT_GPA_BASE, 1); snapshot_gva = (void *)(PMU_SNAPSHOT_GPA_BASE); - snapshot_gpa = addr_gva2gpa(vcpu->vm, (vm_vaddr_t)snapshot_gva); + snapshot_gpa = addr_gva2gpa(vcpu->vm, (gva_t)snapshot_gva); sync_global_to_guest(vcpu->vm, snapshot_gva); sync_global_to_guest(vcpu->vm, snapshot_gpa); } @@ -609,7 +650,11 @@ static void test_vm_events_overflow(void *guest_code) vcpu_init_vector_tables(vcpu); /* Initialize guest timer frequency. */ timer_freq = vcpu_get_reg(vcpu, RISCV_TIMER_REG(frequency)); + + /* Export the shared variables to the guest */ sync_global_to_guest(vm, timer_freq); + sync_global_to_guest(vm, vcpu_shared_irq_count); + sync_global_to_guest(vm, targs); run_vcpu(vcpu); @@ -618,28 +663,38 @@ static void test_vm_events_overflow(void *guest_code) static void test_print_help(char *name) { - pr_info("Usage: %s [-h] [-d <test name>]\n", name); - pr_info("\t-d: Test to disable. Available tests are 'basic', 'events', 'snapshot', 'overflow'\n"); + pr_info("Usage: %s [-h] [-t <test name>] [-n <number of LCOFI interrupt for overflow test>]\n", + name); + pr_info("\t-t: Test to run (default all). Available tests are 'basic', 'events', 'snapshot', 'overflow'\n"); + pr_info("\t-n: Number of LCOFI interrupt to trigger for each event in overflow test (default: %d)\n", + SBI_PMU_OVERFLOW_IRQNUM_DEFAULT); pr_info("\t-h: print this help screen\n"); } static bool parse_args(int argc, char *argv[]) { int opt; + int temp_disabled_tests = SBI_PMU_TEST_BASIC | SBI_PMU_TEST_EVENTS | SBI_PMU_TEST_SNAPSHOT | + SBI_PMU_TEST_OVERFLOW; + int overflow_interrupts = 0; - while ((opt = getopt(argc, argv, "hd:")) != -1) { + while ((opt = getopt(argc, argv, "ht:n:")) != -1) { switch (opt) { - case 'd': + case 't': if (!strncmp("basic", optarg, 5)) - disabled_tests |= SBI_PMU_TEST_BASIC; + temp_disabled_tests &= ~SBI_PMU_TEST_BASIC; else if (!strncmp("events", optarg, 6)) - disabled_tests |= SBI_PMU_TEST_EVENTS; + temp_disabled_tests &= ~SBI_PMU_TEST_EVENTS; else if (!strncmp("snapshot", optarg, 8)) - disabled_tests |= SBI_PMU_TEST_SNAPSHOT; + temp_disabled_tests &= ~SBI_PMU_TEST_SNAPSHOT; else if (!strncmp("overflow", optarg, 8)) - disabled_tests |= SBI_PMU_TEST_OVERFLOW; + temp_disabled_tests &= ~SBI_PMU_TEST_OVERFLOW; else goto done; + targs.disabled_tests = temp_disabled_tests; + break; + case 'n': + overflow_interrupts = atoi_positive("Number of LCOFI", optarg); break; case 'h': default: @@ -647,6 +702,15 @@ static bool parse_args(int argc, char *argv[]) } } + if (overflow_interrupts > 0) { + if (targs.disabled_tests & SBI_PMU_TEST_OVERFLOW) { + pr_info("-n option is only available for overflow test\n"); + goto done; + } else { + targs.overflow_irqnum = overflow_interrupts; + } + } + return true; done: test_print_help(argv[0]); @@ -655,25 +719,28 @@ done: int main(int argc, char *argv[]) { + targs.disabled_tests = 0; + targs.overflow_irqnum = SBI_PMU_OVERFLOW_IRQNUM_DEFAULT; + if (!parse_args(argc, argv)) exit(KSFT_SKIP); - if (!(disabled_tests & SBI_PMU_TEST_BASIC)) { + if (!(targs.disabled_tests & SBI_PMU_TEST_BASIC)) { test_vm_basic_test(test_pmu_basic_sanity); pr_info("SBI PMU basic test : PASS\n"); } - if (!(disabled_tests & SBI_PMU_TEST_EVENTS)) { + if (!(targs.disabled_tests & SBI_PMU_TEST_EVENTS)) { test_vm_events_test(test_pmu_events); pr_info("SBI PMU event verification test : PASS\n"); } - if (!(disabled_tests & SBI_PMU_TEST_SNAPSHOT)) { + if (!(targs.disabled_tests & SBI_PMU_TEST_SNAPSHOT)) { test_vm_events_snapshot_test(test_pmu_events_snaphost); pr_info("SBI PMU event verification with snapshot test : PASS\n"); } - if (!(disabled_tests & SBI_PMU_TEST_OVERFLOW)) { + if (!(targs.disabled_tests & SBI_PMU_TEST_OVERFLOW)) { test_vm_events_overflow(test_pmu_events_overflow); pr_info("SBI PMU event verification with overflow test : PASS\n"); } diff --git a/tools/testing/selftests/kvm/rseq_test.c b/tools/testing/selftests/kvm/rseq_test.c index e5898678bfab..f80ad6b47d16 100644 --- a/tools/testing/selftests/kvm/rseq_test.c +++ b/tools/testing/selftests/kvm/rseq_test.c @@ -196,26 +196,29 @@ static void calc_min_max_cpu(void) static void help(const char *name) { puts(""); - printf("usage: %s [-h] [-u]\n", name); + printf("usage: %s [-h] [-u] [-l latency]\n", name); printf(" -u: Don't sanity check the number of successful KVM_RUNs\n"); + printf(" -l: Set /dev/cpu_dma_latency to suppress deep sleep states\n"); puts(""); exit(0); } int main(int argc, char *argv[]) { + int r, i, snapshot, opt, fd = -1, latency = -1; bool skip_sanity_check = false; - int r, i, snapshot; struct kvm_vm *vm; struct kvm_vcpu *vcpu; u32 cpu, rseq_cpu; - int opt; - while ((opt = getopt(argc, argv, "hu")) != -1) { + while ((opt = getopt(argc, argv, "hl:u")) != -1) { switch (opt) { case 'u': skip_sanity_check = true; break; + case 'l': + latency = atoi_paranoid(optarg); + break; case 'h': default: help(argv[0]); @@ -243,6 +246,20 @@ int main(int argc, char *argv[]) pthread_create(&migration_thread, NULL, migration_worker, (void *)(unsigned long)syscall(SYS_gettid)); + if (latency >= 0) { + /* + * Writes to cpu_dma_latency persist only while the file is + * open, i.e. it allows userspace to provide guaranteed latency + * while running a workload. Keep the file open until the test + * completes, otherwise writing cpu_dma_latency is meaningless. + */ + fd = open("/dev/cpu_dma_latency", O_RDWR); + TEST_ASSERT(fd >= 0, __KVM_SYSCALL_ERROR("open() /dev/cpu_dma_latency", fd)); + + r = write(fd, &latency, 4); + TEST_ASSERT(r >= 1, "Error setting /dev/cpu_dma_latency"); + } + for (i = 0; !done; i++) { vcpu_run(vcpu); TEST_ASSERT(get_ucall(vcpu, NULL) == UCALL_SYNC, @@ -278,6 +295,9 @@ int main(int argc, char *argv[]) "rseq CPU = %d, sched CPU = %d", rseq_cpu, cpu); } + if (fd > 0) + close(fd); + /* * Sanity check that the test was able to enter the guest a reasonable * number of times, e.g. didn't get stalled too often/long waiting for @@ -293,8 +313,8 @@ int main(int argc, char *argv[]) TEST_ASSERT(skip_sanity_check || i > (NR_TASK_MIGRATIONS / 2), "Only performed %d KVM_RUNs, task stalled too much?\n\n" " Try disabling deep sleep states to reduce CPU wakeup latency,\n" - " e.g. via cpuidle.off=1 or setting /dev/cpu_dma_latency to '0',\n" - " or run with -u to disable this sanity check.", i); + " e.g. via cpuidle.off=1 or via -l <latency>, or run with -u to\n" + " disable this sanity check.", i); pthread_join(migration_thread, NULL); diff --git a/tools/testing/selftests/kvm/s390/cmma_test.c b/tools/testing/selftests/kvm/s390/cmma_test.c index 85cc8c18d6e7..e39a724fe860 100644 --- a/tools/testing/selftests/kvm/s390/cmma_test.c +++ b/tools/testing/selftests/kvm/s390/cmma_test.c @@ -145,7 +145,7 @@ static void finish_vm_setup(struct kvm_vm *vm) slot0 = memslot2region(vm, 0); ucall_init(vm, slot0->region.guest_phys_addr + slot0->region.memory_size); - kvm_arch_vm_post_create(vm); + kvm_arch_vm_post_create(vm, 0); } static struct kvm_vm *create_vm_two_memslots(void) diff --git a/tools/testing/selftests/kvm/s390/cpumodel_subfuncs_test.c b/tools/testing/selftests/kvm/s390/cpumodel_subfuncs_test.c index 27255880dabd..aded795d42be 100644 --- a/tools/testing/selftests/kvm/s390/cpumodel_subfuncs_test.c +++ b/tools/testing/selftests/kvm/s390/cpumodel_subfuncs_test.c @@ -291,7 +291,7 @@ int main(int argc, char *argv[]) ksft_test_result_pass("%s\n", testlist[idx].subfunc_name); free(array); } else { - ksft_test_result_skip("%s feature is not avaialable\n", + ksft_test_result_skip("%s feature is not available\n", testlist[idx].subfunc_name); } } diff --git a/tools/testing/selftests/kvm/s390/debug_test.c b/tools/testing/selftests/kvm/s390/debug_test.c index ad8095968601..751c61c0f056 100644 --- a/tools/testing/selftests/kvm/s390/debug_test.c +++ b/tools/testing/selftests/kvm/s390/debug_test.c @@ -17,7 +17,7 @@ asm("int_handler:\n" "j .\n"); static struct kvm_vm *test_step_int_1(struct kvm_vcpu **vcpu, void *guest_code, - size_t new_psw_off, uint64_t *new_psw) + size_t new_psw_off, u64 *new_psw) { struct kvm_guest_debug debug = {}; struct kvm_regs regs; @@ -27,7 +27,7 @@ static struct kvm_vm *test_step_int_1(struct kvm_vcpu **vcpu, void *guest_code, vm = vm_create_with_one_vcpu(vcpu, guest_code); lowcore = addr_gpa2hva(vm, 0); new_psw[0] = (*vcpu)->run->psw_mask; - new_psw[1] = (uint64_t)int_handler; + new_psw[1] = (u64)int_handler; memcpy(lowcore + new_psw_off, new_psw, 16); vcpu_regs_get(*vcpu, ®s); regs.gprs[2] = -1; @@ -42,7 +42,7 @@ static struct kvm_vm *test_step_int_1(struct kvm_vcpu **vcpu, void *guest_code, static void test_step_int(void *guest_code, size_t new_psw_off) { struct kvm_vcpu *vcpu; - uint64_t new_psw[2]; + u64 new_psw[2]; struct kvm_vm *vm; vm = test_step_int_1(&vcpu, guest_code, new_psw_off, new_psw); @@ -79,7 +79,7 @@ static void test_step_pgm_diag(void) .u.pgm.code = PGM_SPECIFICATION, }; struct kvm_vcpu *vcpu; - uint64_t new_psw[2]; + u64 new_psw[2]; struct kvm_vm *vm; vm = test_step_int_1(&vcpu, test_step_pgm_diag_guest_code, diff --git a/tools/testing/selftests/kvm/s390/irq_routing.c b/tools/testing/selftests/kvm/s390/irq_routing.c new file mode 100644 index 000000000000..f3839284ac08 --- /dev/null +++ b/tools/testing/selftests/kvm/s390/irq_routing.c @@ -0,0 +1,75 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * IRQ routing offset tests. + * + * Copyright IBM Corp. 2026 + * + * Authors: + * Janosch Frank <frankja@linux.ibm.com> + */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> + +#include "test_util.h" +#include "kvm_util.h" +#include "kselftest.h" +#include "ucall_common.h" + +extern char guest_code[]; +asm("guest_code:\n" + "diag %r0,%r0,0\n" + "j .\n"); + +static void test(void) +{ + struct kvm_irq_routing *routing; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + gpa_t mem; + int ret; + + struct kvm_irq_routing_entry ue = { + .type = KVM_IRQ_ROUTING_S390_ADAPTER, + .gsi = 1, + }; + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + mem = vm_phy_pages_alloc(vm, 2, 4096 * 42, 0); + + routing = kvm_gsi_routing_create(); + routing->nr = 1; + routing->entries[0] = ue; + routing->entries[0].u.adapter.summary_addr = (uintptr_t)mem; + routing->entries[0].u.adapter.ind_addr = (uintptr_t)mem; + + routing->entries[0].u.adapter.summary_offset = 4096 * 8; + ret = __vm_ioctl(vm, KVM_SET_GSI_ROUTING, routing); + ksft_test_result(ret == -1 && errno == EINVAL, "summary offset outside of page\n"); + + routing->entries[0].u.adapter.summary_offset -= 4; + ret = __vm_ioctl(vm, KVM_SET_GSI_ROUTING, routing); + ksft_test_result(ret == 0, "summary offset inside of page\n"); + + routing->entries[0].u.adapter.ind_offset = 4096 * 8; + ret = __vm_ioctl(vm, KVM_SET_GSI_ROUTING, routing); + ksft_test_result(ret == -1 && errno == EINVAL, "ind offset outside of page\n"); + + routing->entries[0].u.adapter.ind_offset -= 4; + ret = __vm_ioctl(vm, KVM_SET_GSI_ROUTING, routing); + ksft_test_result(ret == 0, "ind offset inside of page\n"); + + kvm_vm_free(vm); +} + +int main(int argc, char *argv[]) +{ + TEST_REQUIRE(kvm_has_cap(KVM_CAP_IRQ_ROUTING)); + + ksft_print_header(); + ksft_set_plan(4); + test(); + + ksft_finished(); /* Print results and exit() accordingly */ +} diff --git a/tools/testing/selftests/kvm/s390/keyop.c b/tools/testing/selftests/kvm/s390/keyop.c new file mode 100644 index 000000000000..c7805e87d12c --- /dev/null +++ b/tools/testing/selftests/kvm/s390/keyop.c @@ -0,0 +1,299 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Test for s390x KVM_S390_KEYOP + * + * Copyright IBM Corp. 2026 + * + * Authors: + * Claudio Imbrenda <imbrenda@linux.ibm.com> + */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> + +#include <linux/bits.h> + +#include "test_util.h" +#include "kvm_util.h" +#include "kselftest.h" +#include "processor.h" + +#define BUF_PAGES 128UL +#define GUEST_PAGES 256UL + +#define BUF_START_GFN (GUEST_PAGES - BUF_PAGES) +#define BUF_START_ADDR (BUF_START_GFN << PAGE_SHIFT) + +#define KEY_BITS_ACC 0xf0 +#define KEY_BIT_F 0x08 +#define KEY_BIT_R 0x04 +#define KEY_BIT_C 0x02 + +#define KEY_BITS_RC (KEY_BIT_R | KEY_BIT_C) +#define KEY_BITS_ALL (KEY_BITS_ACC | KEY_BIT_F | KEY_BITS_RC) + +static unsigned char tmp[BUF_PAGES]; +static unsigned char old[BUF_PAGES]; +static unsigned char expected[BUF_PAGES]; + +static int _get_skeys(struct kvm_vcpu *vcpu, unsigned char skeys[]) +{ + struct kvm_s390_skeys skeys_ioctl = { + .start_gfn = BUF_START_GFN, + .count = BUF_PAGES, + .skeydata_addr = (unsigned long)skeys, + }; + + return __vm_ioctl(vcpu->vm, KVM_S390_GET_SKEYS, &skeys_ioctl); +} + +static void get_skeys(struct kvm_vcpu *vcpu, unsigned char skeys[]) +{ + int r = _get_skeys(vcpu, skeys); + + TEST_ASSERT(!r, "Failed to get storage keys, r=%d", r); +} + +static void set_skeys(struct kvm_vcpu *vcpu, unsigned char skeys[]) +{ + struct kvm_s390_skeys skeys_ioctl = { + .start_gfn = BUF_START_GFN, + .count = BUF_PAGES, + .skeydata_addr = (unsigned long)skeys, + }; + int r; + + r = __vm_ioctl(vcpu->vm, KVM_S390_SET_SKEYS, &skeys_ioctl); + TEST_ASSERT(!r, "Failed to set storage keys, r=%d", r); +} + +static int do_keyop(struct kvm_vcpu *vcpu, int op, unsigned long page_idx, unsigned char skey) +{ + struct kvm_s390_keyop keyop = { + .guest_addr = BUF_START_ADDR + page_idx * PAGE_SIZE, + .key = skey, + .operation = op, + }; + int r; + + r = __vm_ioctl(vcpu->vm, KVM_S390_KEYOP, &keyop); + TEST_ASSERT(!r, "Failed to perform keyop, r=%d", r); + TEST_ASSERT((keyop.key & 1) == 0, + "Last bit of key is 1, should be 0! page %lu, new key=%#x, old key=%#x", + page_idx, skey, keyop.key); + + return keyop.key; +} + +static void fault_in_buffer(struct kvm_vcpu *vcpu, int where, int cur_loc) +{ + unsigned long i; + int r; + + if (where != cur_loc) + return; + + for (i = 0; i < BUF_PAGES; i++) { + r = ioctl(vcpu->fd, KVM_S390_VCPU_FAULT, BUF_START_ADDR + i * PAGE_SIZE); + TEST_ASSERT(!r, "Faulting in buffer page %lu, r=%d", i, r); + } +} + +static inline void set_pattern(unsigned char skeys[]) +{ + int i; + + for (i = 0; i < BUF_PAGES; i++) + skeys[i] = i << 1; +} + +static void dump_sk(const unsigned char skeys[], const char *descr) +{ + int i, j; + + fprintf(stderr, "# %s:\n", descr); + for (i = 0; i < BUF_PAGES; i += 32) { + fprintf(stderr, "# %3d: ", i); + for (j = 0; j < 32; j++) + fprintf(stderr, "%02x ", skeys[i + j]); + fprintf(stderr, "\n"); + } +} + +static inline void compare(const unsigned char what[], const unsigned char expected[], + const char *descr, int fault_in_loc) +{ + int i; + + for (i = 0; i < BUF_PAGES; i++) { + if (expected[i] != what[i]) { + dump_sk(expected, "Expected"); + dump_sk(what, "Got"); + } + TEST_ASSERT(expected[i] == what[i], + "%s! fault-in location %d, page %d, expected %#x, got %#x", + descr, fault_in_loc, i, expected[i], what[i]); + } +} + +static inline void clear_all(void) +{ + memset(tmp, 0, BUF_PAGES); + memset(old, 0, BUF_PAGES); + memset(expected, 0, BUF_PAGES); +} + +static void test_init(struct kvm_vcpu *vcpu, int fault_in) +{ + /* Set all storage keys to zero */ + fault_in_buffer(vcpu, fault_in, 1); + set_skeys(vcpu, expected); + + fault_in_buffer(vcpu, fault_in, 2); + get_skeys(vcpu, tmp); + compare(tmp, expected, "Setting keys not zero", fault_in); + + /* Set storage keys to a sequential pattern */ + fault_in_buffer(vcpu, fault_in, 3); + set_pattern(expected); + set_skeys(vcpu, expected); + + fault_in_buffer(vcpu, fault_in, 4); + get_skeys(vcpu, tmp); + compare(tmp, expected, "Setting storage keys failed", fault_in); +} + +static void test_rrbe(struct kvm_vcpu *vcpu, int fault_in) +{ + unsigned char k; + int i; + + /* Set storage keys to a sequential pattern */ + fault_in_buffer(vcpu, fault_in, 1); + set_pattern(expected); + set_skeys(vcpu, expected); + + /* Call the RRBE KEYOP ioctl on each page and verify the result */ + fault_in_buffer(vcpu, fault_in, 2); + for (i = 0; i < BUF_PAGES; i++) { + k = do_keyop(vcpu, KVM_S390_KEYOP_RRBE, i, 0xff); + TEST_ASSERT((expected[i] & KEY_BITS_RC) == k, + "Old R or C value mismatch! expected: %#x, got %#x", + expected[i] & KEY_BITS_RC, k); + if (i == BUF_PAGES / 2) + fault_in_buffer(vcpu, fault_in, 3); + } + + for (i = 0; i < BUF_PAGES; i++) + expected[i] &= ~KEY_BIT_R; + + /* Verify that only the R bit has been cleared */ + fault_in_buffer(vcpu, fault_in, 4); + get_skeys(vcpu, tmp); + compare(tmp, expected, "New value mismatch", fault_in); +} + +static void test_iske(struct kvm_vcpu *vcpu, int fault_in) +{ + int i; + + /* Set storage keys to a sequential pattern */ + fault_in_buffer(vcpu, fault_in, 1); + set_pattern(expected); + set_skeys(vcpu, expected); + + /* Call the ISKE KEYOP ioctl on each page and verify the result */ + fault_in_buffer(vcpu, fault_in, 2); + for (i = 0; i < BUF_PAGES; i++) { + tmp[i] = do_keyop(vcpu, KVM_S390_KEYOP_ISKE, i, 0xff); + if (i == BUF_PAGES / 2) + fault_in_buffer(vcpu, fault_in, 3); + } + compare(tmp, expected, "Old value mismatch", fault_in); + + /* Check storage keys have not changed */ + fault_in_buffer(vcpu, fault_in, 4); + get_skeys(vcpu, tmp); + compare(tmp, expected, "Storage keys values changed", fault_in); +} + +static void test_sske(struct kvm_vcpu *vcpu, int fault_in) +{ + int i; + + /* Set storage keys to a sequential pattern */ + fault_in_buffer(vcpu, fault_in, 1); + set_pattern(tmp); + set_skeys(vcpu, tmp); + + /* Call the SSKE KEYOP ioctl on each page and verify the result */ + fault_in_buffer(vcpu, fault_in, 2); + for (i = 0; i < BUF_PAGES; i++) { + expected[i] = ~tmp[i] & KEY_BITS_ALL; + /* Set the new storage keys to be the bit-inversion of the previous ones */ + old[i] = do_keyop(vcpu, KVM_S390_KEYOP_SSKE, i, expected[i] | 1); + if (i == BUF_PAGES / 2) + fault_in_buffer(vcpu, fault_in, 3); + } + compare(old, tmp, "Old value mismatch", fault_in); + + /* Verify that the storage keys have been set correctly */ + fault_in_buffer(vcpu, fault_in, 4); + get_skeys(vcpu, tmp); + compare(tmp, expected, "New value mismatch", fault_in); +} + +static struct testdef { + const char *name; + void (*test)(struct kvm_vcpu *vcpu, int fault_in_location); + int n_fault_in_locations; +} testplan[] = { + { "Initialization", test_init, 5 }, + { "RRBE", test_rrbe, 5 }, + { "ISKE", test_iske, 5 }, + { "SSKE", test_sske, 5 }, +}; + +static void run_test(void (*the_test)(struct kvm_vcpu *, int), int fault_in_location) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + int r; + + vm = vm_create_barebones(); + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0, 0, GUEST_PAGES, 0); + vcpu = __vm_vcpu_add(vm, 0); + + r = _get_skeys(vcpu, tmp); + TEST_ASSERT(r == KVM_S390_GET_SKEYS_NONE, + "Storage keys are not disabled initially, r=%d", r); + + clear_all(); + + the_test(vcpu, fault_in_location); + + kvm_vm_free(vm); +} + +int main(int argc, char *argv[]) +{ + int i, f; + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_S390_KEYOP)); + TEST_REQUIRE(kvm_has_cap(KVM_CAP_S390_UCONTROL)); + + ksft_print_header(); + for (i = 0, f = 0; i < ARRAY_SIZE(testplan); i++) + f += testplan[i].n_fault_in_locations; + ksft_set_plan(f); + + for (i = 0; i < ARRAY_SIZE(testplan); i++) { + for (f = 0; f < testplan[i].n_fault_in_locations; f++) { + run_test(testplan[i].test, f); + ksft_test_result_pass("%s (fault-in location %d)\n", testplan[i].name, f); + } + } + + ksft_finished(); /* Print results and exit() accordingly */ +} diff --git a/tools/testing/selftests/kvm/s390/memop.c b/tools/testing/selftests/kvm/s390/memop.c index 4374b4cd2a80..0244848621b3 100644 --- a/tools/testing/selftests/kvm/s390/memop.c +++ b/tools/testing/selftests/kvm/s390/memop.c @@ -34,7 +34,7 @@ enum mop_access_mode { struct mop_desc { uintptr_t gaddr; uintptr_t gaddr_v; - uint64_t set_flags; + u64 set_flags; unsigned int f_check : 1; unsigned int f_inject : 1; unsigned int f_key : 1; @@ -42,19 +42,19 @@ struct mop_desc { unsigned int _set_flags : 1; unsigned int _sida_offset : 1; unsigned int _ar : 1; - uint32_t size; + u32 size; enum mop_target target; enum mop_access_mode mode; void *buf; - uint32_t sida_offset; + u32 sida_offset; void *old; - uint8_t old_value[16]; + u8 old_value[16]; bool *cmpxchg_success; - uint8_t ar; - uint8_t key; + u8 ar; + u8 key; }; -const uint8_t NO_KEY = 0xff; +const u8 NO_KEY = 0xff; static struct kvm_s390_mem_op ksmo_from_desc(struct mop_desc *desc) { @@ -85,7 +85,7 @@ static struct kvm_s390_mem_op ksmo_from_desc(struct mop_desc *desc) ksmo.op = KVM_S390_MEMOP_ABSOLUTE_WRITE; if (desc->mode == CMPXCHG) { ksmo.op = KVM_S390_MEMOP_ABSOLUTE_CMPXCHG; - ksmo.old_addr = (uint64_t)desc->old; + ksmo.old_addr = (u64)desc->old; memcpy(desc->old_value, desc->old, desc->size); } break; @@ -230,8 +230,8 @@ static void memop_ioctl(struct test_info info, struct kvm_s390_mem_op *ksmo, #define CR0_FETCH_PROTECTION_OVERRIDE (1UL << (63 - 38)) #define CR0_STORAGE_PROTECTION_OVERRIDE (1UL << (63 - 39)) -static uint8_t __aligned(PAGE_SIZE) mem1[65536]; -static uint8_t __aligned(PAGE_SIZE) mem2[65536]; +static u8 __aligned(PAGE_SIZE) mem1[65536]; +static u8 __aligned(PAGE_SIZE) mem2[65536]; struct test_default { struct kvm_vm *kvm_vm; @@ -296,7 +296,7 @@ static void prepare_mem12(void) TEST_ASSERT(!memcmp(p1, p2, size), "Memory contents do not match!") static void default_write_read(struct test_info copy_cpu, struct test_info mop_cpu, - enum mop_target mop_target, uint32_t size, uint8_t key) + enum mop_target mop_target, u32 size, u8 key) { prepare_mem12(); CHECK_N_DO(MOP, mop_cpu, mop_target, WRITE, mem1, size, @@ -308,7 +308,7 @@ static void default_write_read(struct test_info copy_cpu, struct test_info mop_c } static void default_read(struct test_info copy_cpu, struct test_info mop_cpu, - enum mop_target mop_target, uint32_t size, uint8_t key) + enum mop_target mop_target, u32 size, u8 key) { prepare_mem12(); CHECK_N_DO(MOP, mop_cpu, mop_target, WRITE, mem1, size, GADDR_V(mem1)); @@ -318,12 +318,12 @@ static void default_read(struct test_info copy_cpu, struct test_info mop_cpu, ASSERT_MEM_EQ(mem1, mem2, size); } -static void default_cmpxchg(struct test_default *test, uint8_t key) +static void default_cmpxchg(struct test_default *test, u8 key) { for (int size = 1; size <= 16; size *= 2) { for (int offset = 0; offset < 16; offset += size) { - uint8_t __aligned(16) new[16] = {}; - uint8_t __aligned(16) old[16]; + u8 __aligned(16) new[16] = {}; + u8 __aligned(16) old[16]; bool succ; prepare_mem12(); @@ -400,7 +400,7 @@ static void test_copy_access_register(void) kvm_vm_free(t.kvm_vm); } -static void set_storage_key_range(void *addr, size_t len, uint8_t key) +static void set_storage_key_range(void *addr, size_t len, u8 key) { uintptr_t _addr, abs, i; int not_mapped = 0; @@ -483,13 +483,13 @@ static __uint128_t cut_to_size(int size, __uint128_t val) { switch (size) { case 1: - return (uint8_t)val; + return (u8)val; case 2: - return (uint16_t)val; + return (u16)val; case 4: - return (uint32_t)val; + return (u32)val; case 8: - return (uint64_t)val; + return (u64)val; case 16: return val; } @@ -501,10 +501,10 @@ static bool popcount_eq(__uint128_t a, __uint128_t b) { unsigned int count_a, count_b; - count_a = __builtin_popcountl((uint64_t)(a >> 64)) + - __builtin_popcountl((uint64_t)a); - count_b = __builtin_popcountl((uint64_t)(b >> 64)) + - __builtin_popcountl((uint64_t)b); + count_a = __builtin_popcountl((u64)(a >> 64)) + + __builtin_popcountl((u64)a); + count_b = __builtin_popcountl((u64)(b >> 64)) + + __builtin_popcountl((u64)b); return count_a == count_b; } @@ -553,7 +553,7 @@ static __uint128_t permutate_bits(bool guest, int i, int size, __uint128_t old) if (swap) { int i, j; __uint128_t new; - uint8_t byte0, byte1; + u8 byte0, byte1; rand = rand * 3 + 1; i = rand % size; @@ -585,28 +585,28 @@ static bool _cmpxchg(int size, void *target, __uint128_t *old_addr, __uint128_t switch (size) { case 4: { - uint32_t old = *old_addr; + u32 old = *old_addr; asm volatile ("cs %[old],%[new],%[address]" : [old] "+d" (old), - [address] "+Q" (*(uint32_t *)(target)) - : [new] "d" ((uint32_t)new) + [address] "+Q" (*(u32 *)(target)) + : [new] "d" ((u32)new) : "cc" ); - ret = old == (uint32_t)*old_addr; + ret = old == (u32)*old_addr; *old_addr = old; return ret; } case 8: { - uint64_t old = *old_addr; + u64 old = *old_addr; asm volatile ("csg %[old],%[new],%[address]" : [old] "+d" (old), - [address] "+Q" (*(uint64_t *)(target)) - : [new] "d" ((uint64_t)new) + [address] "+Q" (*(u64 *)(target)) + : [new] "d" ((u64)new) : "cc" ); - ret = old == (uint64_t)*old_addr; + ret = old == (u64)*old_addr; *old_addr = old; return ret; } @@ -811,10 +811,10 @@ static void test_errors_cmpxchg_key(void) static void test_termination(void) { struct test_default t = test_default_init(guest_error_key); - uint64_t prefix; - uint64_t teid; - uint64_t teid_mask = BIT(63 - 56) | BIT(63 - 60) | BIT(63 - 61); - uint64_t psw[2]; + u64 prefix; + u64 teid; + u64 teid_mask = BIT(63 - 56) | BIT(63 - 60) | BIT(63 - 61); + u64 psw[2]; HOST_SYNC(t.vcpu, STAGE_INITED); HOST_SYNC(t.vcpu, STAGE_SKEYS_SET); @@ -855,7 +855,7 @@ static void test_errors_key_storage_prot_override(void) kvm_vm_free(t.kvm_vm); } -const uint64_t last_page_addr = -PAGE_SIZE; +const u64 last_page_addr = -PAGE_SIZE; static void guest_copy_key_fetch_prot_override(void) { @@ -878,10 +878,10 @@ static void guest_copy_key_fetch_prot_override(void) static void test_copy_key_fetch_prot_override(void) { struct test_default t = test_default_init(guest_copy_key_fetch_prot_override); - vm_vaddr_t guest_0_page, guest_last_page; + gva_t guest_0_page, guest_last_page; - guest_0_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, 0); - guest_last_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, last_page_addr); + guest_0_page = vm_alloc(t.kvm_vm, PAGE_SIZE, 0); + guest_last_page = vm_alloc(t.kvm_vm, PAGE_SIZE, last_page_addr); if (guest_0_page != 0 || guest_last_page != last_page_addr) { print_skip("did not allocate guest pages at required positions"); goto out; @@ -917,10 +917,10 @@ out: static void test_errors_key_fetch_prot_override_not_enabled(void) { struct test_default t = test_default_init(guest_copy_key_fetch_prot_override); - vm_vaddr_t guest_0_page, guest_last_page; + gva_t guest_0_page, guest_last_page; - guest_0_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, 0); - guest_last_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, last_page_addr); + guest_0_page = vm_alloc(t.kvm_vm, PAGE_SIZE, 0); + guest_last_page = vm_alloc(t.kvm_vm, PAGE_SIZE, last_page_addr); if (guest_0_page != 0 || guest_last_page != last_page_addr) { print_skip("did not allocate guest pages at required positions"); goto out; @@ -938,10 +938,10 @@ out: static void test_errors_key_fetch_prot_override_enabled(void) { struct test_default t = test_default_init(guest_copy_key_fetch_prot_override); - vm_vaddr_t guest_0_page, guest_last_page; + gva_t guest_0_page, guest_last_page; - guest_0_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, 0); - guest_last_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, last_page_addr); + guest_0_page = vm_alloc(t.kvm_vm, PAGE_SIZE, 0); + guest_last_page = vm_alloc(t.kvm_vm, PAGE_SIZE, last_page_addr); if (guest_0_page != 0 || guest_last_page != last_page_addr) { print_skip("did not allocate guest pages at required positions"); goto out; diff --git a/tools/testing/selftests/kvm/s390/resets.c b/tools/testing/selftests/kvm/s390/resets.c index b58f75b381e5..e3c7a2f148f9 100644 --- a/tools/testing/selftests/kvm/s390/resets.c +++ b/tools/testing/selftests/kvm/s390/resets.c @@ -20,7 +20,7 @@ struct kvm_s390_irq buf[ARBITRARY_NON_ZERO_VCPU_ID + LOCAL_IRQS]; -static uint8_t regs_null[512]; +static u8 regs_null[512]; static void guest_code_initial(void) { @@ -57,9 +57,9 @@ static void guest_code_initial(void) ); } -static void test_one_reg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t value) +static void test_one_reg(struct kvm_vcpu *vcpu, u64 id, u64 value) { - uint64_t eval_reg; + u64 eval_reg; eval_reg = vcpu_get_reg(vcpu, id); TEST_ASSERT(eval_reg == value, "value == 0x%lx", value); diff --git a/tools/testing/selftests/kvm/s390/shared_zeropage_test.c b/tools/testing/selftests/kvm/s390/shared_zeropage_test.c index bba0d9a6dcc8..478381e6f84e 100644 --- a/tools/testing/selftests/kvm/s390/shared_zeropage_test.c +++ b/tools/testing/selftests/kvm/s390/shared_zeropage_test.c @@ -4,16 +4,15 @@ * * Copyright (C) 2024, Red Hat, Inc. */ -#include <sys/mman.h> - #include <linux/fs.h> #include "test_util.h" +#include "kvm_syscalls.h" #include "kvm_util.h" #include "kselftest.h" #include "ucall_common.h" -static void set_storage_key(void *addr, uint8_t skey) +static void set_storage_key(void *addr, u8 skey) { asm volatile("sske %0,%1" : : "d" (skey), "a" (addr)); } diff --git a/tools/testing/selftests/kvm/s390/tprot.c b/tools/testing/selftests/kvm/s390/tprot.c index 12d5e1cb62e3..d86179827a18 100644 --- a/tools/testing/selftests/kvm/s390/tprot.c +++ b/tools/testing/selftests/kvm/s390/tprot.c @@ -4,8 +4,8 @@ * * Copyright IBM Corp. 2021 */ -#include <sys/mman.h> #include "test_util.h" +#include "kvm_syscalls.h" #include "kvm_util.h" #include "kselftest.h" #include "ucall_common.h" @@ -14,12 +14,12 @@ #define CR0_FETCH_PROTECTION_OVERRIDE (1UL << (63 - 38)) #define CR0_STORAGE_PROTECTION_OVERRIDE (1UL << (63 - 39)) -static __aligned(PAGE_SIZE) uint8_t pages[2][PAGE_SIZE]; -static uint8_t *const page_store_prot = pages[0]; -static uint8_t *const page_fetch_prot = pages[1]; +static __aligned(PAGE_SIZE) u8 pages[2][PAGE_SIZE]; +static u8 *const page_store_prot = pages[0]; +static u8 *const page_fetch_prot = pages[1]; /* Nonzero return value indicates that address not mapped */ -static int set_storage_key(void *addr, uint8_t key) +static int set_storage_key(void *addr, u8 key) { int not_mapped = 0; @@ -44,9 +44,9 @@ enum permission { TRANSL_UNAVAIL = 3, }; -static enum permission test_protection(void *addr, uint8_t key) +static enum permission test_protection(void *addr, u8 key) { - uint64_t mask; + u64 mask; asm volatile ( "tprot %[addr], 0(%[key])\n" @@ -72,7 +72,7 @@ enum stage { struct test { enum stage stage; void *addr; - uint8_t key; + u8 key; enum permission expected; } tests[] = { /* @@ -146,7 +146,7 @@ static enum stage perform_next_stage(int *i, bool mapped_0) /* * Some fetch protection override tests require that page 0 * be mapped, however, when the hosts tries to map that page via - * vm_vaddr_alloc, it may happen that some other page gets mapped + * vm_alloc, it may happen that some other page gets mapped * instead. * In order to skip these tests we detect this inside the guest */ @@ -207,7 +207,7 @@ int main(int argc, char *argv[]) struct kvm_vcpu *vcpu; struct kvm_vm *vm; struct kvm_run *run; - vm_vaddr_t guest_0_page; + gva_t guest_0_page; ksft_print_header(); ksft_set_plan(STAGE_END); @@ -216,10 +216,10 @@ int main(int argc, char *argv[]) run = vcpu->run; HOST_SYNC(vcpu, STAGE_INIT_SIMPLE); - mprotect(addr_gva2hva(vm, (vm_vaddr_t)pages), PAGE_SIZE * 2, PROT_READ); + mprotect(addr_gva2hva(vm, (gva_t)pages), PAGE_SIZE * 2, PROT_READ); HOST_SYNC(vcpu, TEST_SIMPLE); - guest_0_page = vm_vaddr_alloc(vm, PAGE_SIZE, 0); + guest_0_page = vm_alloc(vm, PAGE_SIZE, 0); if (guest_0_page != 0) { /* Use NO_TAP so we don't get a PASS print */ HOST_SYNC_NO_TAP(vcpu, STAGE_INIT_FETCH_PROT_OVERRIDE); @@ -229,7 +229,7 @@ int main(int argc, char *argv[]) HOST_SYNC(vcpu, STAGE_INIT_FETCH_PROT_OVERRIDE); } if (guest_0_page == 0) - mprotect(addr_gva2hva(vm, (vm_vaddr_t)0), PAGE_SIZE, PROT_READ); + mprotect(addr_gva2hva(vm, (gva_t)0), PAGE_SIZE, PROT_READ); run->s.regs.crs[0] |= CR0_FETCH_PROTECTION_OVERRIDE; run->kvm_dirty_regs = KVM_SYNC_CRS; HOST_SYNC(vcpu, TEST_FETCH_PROT_OVERRIDE); diff --git a/tools/testing/selftests/kvm/s390/ucontrol_test.c b/tools/testing/selftests/kvm/s390/ucontrol_test.c index d265b34c54be..b8c6f37b53e0 100644 --- a/tools/testing/selftests/kvm/s390/ucontrol_test.c +++ b/tools/testing/selftests/kvm/s390/ucontrol_test.c @@ -111,7 +111,7 @@ FIXTURE(uc_kvm) uintptr_t base_hva; uintptr_t code_hva; int kvm_run_size; - vm_paddr_t pgd; + gpa_t pgd; void *vm_mem; int vcpu_fd; int kvm_fd; @@ -142,19 +142,17 @@ FIXTURE_SETUP(uc_kvm) self->kvm_run_size = ioctl(self->kvm_fd, KVM_GET_VCPU_MMAP_SIZE, NULL); ASSERT_GE(self->kvm_run_size, sizeof(struct kvm_run)) TH_LOG(KVM_IOCTL_ERROR(KVM_GET_VCPU_MMAP_SIZE, self->kvm_run_size)); - self->run = (struct kvm_run *)mmap(NULL, self->kvm_run_size, - PROT_READ | PROT_WRITE, MAP_SHARED, self->vcpu_fd, 0); - ASSERT_NE(self->run, MAP_FAILED); + self->run = kvm_mmap(self->kvm_run_size, PROT_READ | PROT_WRITE, + MAP_SHARED, self->vcpu_fd); /** * For virtual cpus that have been created with S390 user controlled * virtual machines, the resulting vcpu fd can be memory mapped at page * offset KVM_S390_SIE_PAGE_OFFSET in order to obtain a memory map of * the virtual cpu's hardware control block. */ - self->sie_block = (struct kvm_s390_sie_block *)mmap(NULL, PAGE_SIZE, - PROT_READ | PROT_WRITE, MAP_SHARED, - self->vcpu_fd, KVM_S390_SIE_PAGE_OFFSET << PAGE_SHIFT); - ASSERT_NE(self->sie_block, MAP_FAILED); + self->sie_block = __kvm_mmap(PAGE_SIZE, PROT_READ | PROT_WRITE, + MAP_SHARED, self->vcpu_fd, + KVM_S390_SIE_PAGE_OFFSET << PAGE_SHIFT); TH_LOG("VM created %p %p", self->run, self->sie_block); @@ -186,8 +184,8 @@ FIXTURE_SETUP(uc_kvm) FIXTURE_TEARDOWN(uc_kvm) { - munmap(self->sie_block, PAGE_SIZE); - munmap(self->run, self->kvm_run_size); + kvm_munmap(self->sie_block, PAGE_SIZE); + kvm_munmap(self->run, self->kvm_run_size); close(self->vcpu_fd); close(self->vm_fd); close(self->kvm_fd); @@ -271,7 +269,7 @@ TEST(uc_cap_hpage) } /* calculate host virtual addr from guest physical addr */ -static void *gpa2hva(FIXTURE_DATA(uc_kvm) *self, u64 gpa) +static void *gpa2hva(FIXTURE_DATA(uc_kvm) *self, gpa_t gpa) { return (void *)(self->base_hva - self->base_gpa + gpa); } @@ -573,7 +571,7 @@ TEST_F(uc_kvm, uc_skey) { struct kvm_s390_sie_block *sie_block = self->sie_block; struct kvm_sync_regs *sync_regs = &self->run->s.regs; - u64 test_vaddr = VM_MEM_SIZE - (SZ_1M / 2); + u64 test_gva = VM_MEM_SIZE - (SZ_1M / 2); struct kvm_run *run = self->run; const u8 skeyvalue = 0x34; @@ -585,7 +583,7 @@ TEST_F(uc_kvm, uc_skey) /* set register content for test_skey_asm to access not mapped memory */ sync_regs->gprs[1] = skeyvalue; sync_regs->gprs[5] = self->base_gpa; - sync_regs->gprs[6] = test_vaddr; + sync_regs->gprs[6] = test_gva; run->kvm_dirty_regs |= KVM_SYNC_GPRS; /* DAT disabled + 64 bit mode */ diff --git a/tools/testing/selftests/kvm/s390/user_operexec.c b/tools/testing/selftests/kvm/s390/user_operexec.c new file mode 100644 index 000000000000..714906c1d12a --- /dev/null +++ b/tools/testing/selftests/kvm/s390/user_operexec.c @@ -0,0 +1,140 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Test operation exception forwarding. + * + * Copyright IBM Corp. 2025 + * + * Authors: + * Janosch Frank <frankja@linux.ibm.com> + */ +#include "kselftest.h" +#include "kvm_util.h" +#include "test_util.h" +#include "sie.h" + +#include <linux/kvm.h> + +static void guest_code_instr0(void) +{ + asm(".word 0x0000"); +} + +static void test_user_instr0(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + int rc; + + vm = vm_create_with_one_vcpu(&vcpu, guest_code_instr0); + rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_INSTR0, 0); + TEST_ASSERT_EQ(0, rc); + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC); + TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_OPEREXC); + TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0); + + kvm_vm_free(vm); +} + +static void guest_code_user_operexec(void) +{ + asm(".word 0x0807"); +} + +static void test_user_operexec(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + int rc; + + vm = vm_create_with_one_vcpu(&vcpu, guest_code_user_operexec); + rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_OPEREXEC, 0); + TEST_ASSERT_EQ(0, rc); + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC); + TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_OPEREXC); + TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0x0807); + + kvm_vm_free(vm); + + /* + * Since user_operexec is the superset it can be used for the + * 0 instruction. + */ + vm = vm_create_with_one_vcpu(&vcpu, guest_code_instr0); + rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_OPEREXEC, 0); + TEST_ASSERT_EQ(0, rc); + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC); + TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_OPEREXC); + TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0); + + kvm_vm_free(vm); +} + +/* combine user_instr0 and user_operexec */ +static void test_user_operexec_combined(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + int rc; + + vm = vm_create_with_one_vcpu(&vcpu, guest_code_user_operexec); + rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_INSTR0, 0); + TEST_ASSERT_EQ(0, rc); + rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_OPEREXEC, 0); + TEST_ASSERT_EQ(0, rc); + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC); + TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_OPEREXC); + TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0x0807); + + kvm_vm_free(vm); + + /* Reverse enablement order */ + vm = vm_create_with_one_vcpu(&vcpu, guest_code_user_operexec); + rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_OPEREXEC, 0); + TEST_ASSERT_EQ(0, rc); + rc = __vm_enable_cap(vm, KVM_CAP_S390_USER_INSTR0, 0); + TEST_ASSERT_EQ(0, rc); + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC); + TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_OPEREXC); + TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0x0807); + + kvm_vm_free(vm); +} + +/* + * Run all tests above. + * + * Enablement after VCPU has been added is automatically tested since + * we enable the capability after VCPU creation. + */ +static struct testdef { + const char *name; + void (*test)(void); +} testlist[] = { + { "instr0", test_user_instr0 }, + { "operexec", test_user_operexec }, + { "operexec_combined", test_user_operexec_combined}, +}; + +int main(int argc, char *argv[]) +{ + int idx; + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_S390_USER_INSTR0)); + + ksft_print_header(); + ksft_set_plan(ARRAY_SIZE(testlist)); + for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) { + testlist[idx].test(); + ksft_test_result_pass("%s\n", testlist[idx].name); + } + ksft_finished(); +} diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c index bc440d5aba57..e639a9db51ee 100644 --- a/tools/testing/selftests/kvm/set_memory_region_test.c +++ b/tools/testing/selftests/kvm/set_memory_region_test.c @@ -8,11 +8,11 @@ #include <stdlib.h> #include <string.h> #include <sys/ioctl.h> -#include <sys/mman.h> #include <linux/compiler.h> #include <test_util.h> +#include <kvm_syscalls.h> #include <kvm_util.h> #include <processor.h> @@ -30,19 +30,19 @@ #define MEM_REGION_GPA 0xc0000000 #define MEM_REGION_SLOT 10 -static const uint64_t MMIO_VAL = 0xbeefull; +static const u64 MMIO_VAL = 0xbeefull; -extern const uint64_t final_rip_start; -extern const uint64_t final_rip_end; +extern const u64 final_rip_start; +extern const u64 final_rip_end; static sem_t vcpu_ready; -static inline uint64_t guest_spin_on_val(uint64_t spin_val) +static inline u64 guest_spin_on_val(u64 spin_val) { - uint64_t val; + u64 val; do { - val = READ_ONCE(*((uint64_t *)MEM_REGION_GPA)); + val = READ_ONCE(*((u64 *)MEM_REGION_GPA)); } while (val == spin_val); GUEST_SYNC(0); @@ -54,7 +54,7 @@ static void *vcpu_worker(void *data) struct kvm_vcpu *vcpu = data; struct kvm_run *run = vcpu->run; struct ucall uc; - uint64_t cmd; + u64 cmd; /* * Loop until the guest is done. Re-enter the guest on all MMIO exits, @@ -111,8 +111,8 @@ static struct kvm_vm *spawn_vm(struct kvm_vcpu **vcpu, pthread_t *vcpu_thread, void *guest_code) { struct kvm_vm *vm; - uint64_t *hva; - uint64_t gpa; + u64 *hva; + gpa_t gpa; vm = vm_create_with_one_vcpu(vcpu, guest_code); @@ -144,7 +144,7 @@ static struct kvm_vm *spawn_vm(struct kvm_vcpu **vcpu, pthread_t *vcpu_thread, static void guest_code_move_memory_region(void) { - uint64_t val; + u64 val; GUEST_SYNC(0); @@ -180,7 +180,7 @@ static void test_move_memory_region(bool disable_slot_zap_quirk) pthread_t vcpu_thread; struct kvm_vcpu *vcpu; struct kvm_vm *vm; - uint64_t *hva; + u64 *hva; vm = spawn_vm(&vcpu, &vcpu_thread, guest_code_move_memory_region); @@ -224,7 +224,7 @@ static void test_move_memory_region(bool disable_slot_zap_quirk) static void guest_code_delete_memory_region(void) { struct desc_ptr idt; - uint64_t val; + u64 val; /* * Clobber the IDT so that a #PF due to the memory region being deleted @@ -345,12 +345,12 @@ static void test_zero_memory_regions(void) static void test_invalid_memory_region_flags(void) { - uint32_t supported_flags = KVM_MEM_LOG_DIRTY_PAGES; - const uint32_t v2_only_flags = KVM_MEM_GUEST_MEMFD; + u32 supported_flags = KVM_MEM_LOG_DIRTY_PAGES; + const u32 v2_only_flags = KVM_MEM_GUEST_MEMFD; struct kvm_vm *vm; int r, i; -#if defined __aarch64__ || defined __riscv || defined __x86_64__ +#if defined __aarch64__ || defined __riscv || defined __x86_64__ || defined __loongarch__ supported_flags |= KVM_MEM_READONLY; #endif @@ -410,17 +410,10 @@ static void test_add_max_memory_regions(void) { int ret; struct kvm_vm *vm; - uint32_t max_mem_slots; - uint32_t slot; + u32 max_mem_slots; + u32 slot; void *mem, *mem_aligned, *mem_extra; - size_t alignment; - -#ifdef __s390x__ - /* On s390x, the host address must be aligned to 1M (due to PGSTEs) */ - alignment = 0x100000; -#else - alignment = 1; -#endif + size_t alignment = 1; max_mem_slots = kvm_check_cap(KVM_CAP_NR_MEMSLOTS); TEST_ASSERT(max_mem_slots > 0, @@ -433,31 +426,30 @@ static void test_add_max_memory_regions(void) pr_info("Adding slots 0..%i, each memory region with %dK size\n", (max_mem_slots - 1), MEM_REGION_SIZE >> 10); - mem = mmap(NULL, (size_t)max_mem_slots * MEM_REGION_SIZE + alignment, - PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0); - TEST_ASSERT(mem != MAP_FAILED, "Failed to mmap() host"); + + mem = kvm_mmap((size_t)max_mem_slots * MEM_REGION_SIZE + alignment, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1); mem_aligned = (void *)(((size_t) mem + alignment - 1) & ~(alignment - 1)); for (slot = 0; slot < max_mem_slots; slot++) vm_set_user_memory_region(vm, slot, 0, - ((uint64_t)slot * MEM_REGION_SIZE), + ((u64)slot * MEM_REGION_SIZE), MEM_REGION_SIZE, - mem_aligned + (uint64_t)slot * MEM_REGION_SIZE); + mem_aligned + (u64)slot * MEM_REGION_SIZE); /* Check it cannot be added memory slots beyond the limit */ - mem_extra = mmap(NULL, MEM_REGION_SIZE, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - TEST_ASSERT(mem_extra != MAP_FAILED, "Failed to mmap() host"); + mem_extra = kvm_mmap(MEM_REGION_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1); ret = __vm_set_user_memory_region(vm, max_mem_slots, 0, - (uint64_t)max_mem_slots * MEM_REGION_SIZE, + (u64)max_mem_slots * MEM_REGION_SIZE, MEM_REGION_SIZE, mem_extra); TEST_ASSERT(ret == -1 && errno == EINVAL, "Adding one more memory slot should fail with EINVAL"); - munmap(mem, (size_t)max_mem_slots * MEM_REGION_SIZE + alignment); - munmap(mem_extra, MEM_REGION_SIZE); + kvm_munmap(mem, (size_t)max_mem_slots * MEM_REGION_SIZE + alignment); + kvm_munmap(mem_extra, MEM_REGION_SIZE); kvm_vm_free(vm); } @@ -564,7 +556,7 @@ static void guest_code_mmio_during_vectoring(void) set_idt(&idt_desc); /* Generate a #GP by dereferencing a non-canonical address */ - *((uint8_t *)NONCANONICAL) = 0x1; + *((u8 *)NONCANONICAL) = 0x1; GUEST_ASSERT(0); } diff --git a/tools/testing/selftests/kvm/steal_time.c b/tools/testing/selftests/kvm/steal_time.c index cce2520af720..76fcdd1fd3cb 100644 --- a/tools/testing/selftests/kvm/steal_time.c +++ b/tools/testing/selftests/kvm/steal_time.c @@ -25,7 +25,7 @@ #define ST_GPA_BASE (1 << 30) static void *st_gva[NR_VCPUS]; -static uint64_t guest_stolen_time[NR_VCPUS]; +static u64 guest_stolen_time[NR_VCPUS]; #if defined(__x86_64__) @@ -42,9 +42,9 @@ static void check_status(struct kvm_steal_time *st) static void guest_code(int cpu) { struct kvm_steal_time *st = st_gva[cpu]; - uint32_t version; + u32 version; - GUEST_ASSERT_EQ(rdmsr(MSR_KVM_STEAL_TIME), ((uint64_t)st_gva[cpu] | KVM_MSR_ENABLED)); + GUEST_ASSERT_EQ(rdmsr(MSR_KVM_STEAL_TIME), ((u64)st_gva[cpu] | KVM_MSR_ENABLED)); memset(st, 0, sizeof(*st)); GUEST_SYNC(0); @@ -67,22 +67,16 @@ static bool is_steal_time_supported(struct kvm_vcpu *vcpu) return kvm_cpu_has(X86_FEATURE_KVM_STEAL_TIME); } -static void steal_time_init(struct kvm_vcpu *vcpu, uint32_t i) +static void steal_time_init(struct kvm_vcpu *vcpu, u32 i) { - int ret; - /* ST_GPA_BASE is identity mapped */ st_gva[i] = (void *)(ST_GPA_BASE + i * STEAL_TIME_SIZE); sync_global_to_guest(vcpu->vm, st_gva[i]); - ret = _vcpu_set_msr(vcpu, MSR_KVM_STEAL_TIME, - (ulong)st_gva[i] | KVM_STEAL_RESERVED_MASK); - TEST_ASSERT(ret == 0, "Bad GPA didn't fail"); - vcpu_set_msr(vcpu, MSR_KVM_STEAL_TIME, (ulong)st_gva[i] | KVM_MSR_ENABLED); } -static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpu_idx) +static void steal_time_dump(struct kvm_vm *vm, u32 vcpu_idx) { struct kvm_steal_time *st = addr_gva2hva(vm, (ulong)st_gva[vcpu_idx]); @@ -99,6 +93,21 @@ static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpu_idx) st->pad[8], st->pad[9], st->pad[10]); } +static void check_steal_time_uapi(void) +{ + struct kvm_vm *vm; + struct kvm_vcpu *vcpu; + int ret; + + vm = vm_create_with_one_vcpu(&vcpu, NULL); + + ret = _vcpu_set_msr(vcpu, MSR_KVM_STEAL_TIME, + (ulong)ST_GPA_BASE | KVM_STEAL_RESERVED_MASK); + TEST_ASSERT(ret == 0, "Bad GPA didn't fail"); + + kvm_vm_free(vm); +} + #elif defined(__aarch64__) /* PV_TIME_ST must have 64-byte alignment */ @@ -109,16 +118,16 @@ static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpu_idx) #define PV_TIME_ST 0xc5000021 struct st_time { - uint32_t rev; - uint32_t attr; - uint64_t st_time; + u32 rev; + u32 attr; + u64 st_time; }; -static int64_t smccc(uint32_t func, uint64_t arg) +static s64 smccc(u32 func, u64 arg) { struct arm_smccc_res res; - smccc_hvc(func, arg, 0, 0, 0, 0, 0, 0, &res); + do_smccc(func, arg, 0, 0, 0, 0, 0, 0, &res); return res.a0; } @@ -131,7 +140,7 @@ static void check_status(struct st_time *st) static void guest_code(int cpu) { struct st_time *st; - int64_t status; + s64 status; status = smccc(SMCCC_ARCH_FEATURES, PV_TIME_FEATURES); GUEST_ASSERT_EQ(status, 0); @@ -166,36 +175,26 @@ static bool is_steal_time_supported(struct kvm_vcpu *vcpu) return !__vcpu_ioctl(vcpu, KVM_HAS_DEVICE_ATTR, &dev); } -static void steal_time_init(struct kvm_vcpu *vcpu, uint32_t i) +static void steal_time_init(struct kvm_vcpu *vcpu, u32 i) { struct kvm_vm *vm = vcpu->vm; - uint64_t st_ipa; - int ret; + u64 st_ipa; struct kvm_device_attr dev = { .group = KVM_ARM_VCPU_PVTIME_CTRL, .attr = KVM_ARM_VCPU_PVTIME_IPA, - .addr = (uint64_t)&st_ipa, + .addr = (u64)&st_ipa, }; - vcpu_ioctl(vcpu, KVM_HAS_DEVICE_ATTR, &dev); - /* ST_GPA_BASE is identity mapped */ st_gva[i] = (void *)(ST_GPA_BASE + i * STEAL_TIME_SIZE); sync_global_to_guest(vm, st_gva[i]); - st_ipa = (ulong)st_gva[i] | 1; - ret = __vcpu_ioctl(vcpu, KVM_SET_DEVICE_ATTR, &dev); - TEST_ASSERT(ret == -1 && errno == EINVAL, "Bad IPA didn't report EINVAL"); - st_ipa = (ulong)st_gva[i]; vcpu_ioctl(vcpu, KVM_SET_DEVICE_ATTR, &dev); - - ret = __vcpu_ioctl(vcpu, KVM_SET_DEVICE_ATTR, &dev); - TEST_ASSERT(ret == -1 && errno == EEXIST, "Set IPA twice without EEXIST"); } -static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpu_idx) +static void steal_time_dump(struct kvm_vm *vm, u32 vcpu_idx) { struct st_time *st = addr_gva2hva(vm, (ulong)st_gva[vcpu_idx]); @@ -205,22 +204,54 @@ static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpu_idx) ksft_print_msg(" st_time: %ld\n", st->st_time); } +static void check_steal_time_uapi(void) +{ + struct kvm_vm *vm; + struct kvm_vcpu *vcpu; + u64 st_ipa; + int ret; + + vm = vm_create_with_one_vcpu(&vcpu, NULL); + + struct kvm_device_attr dev = { + .group = KVM_ARM_VCPU_PVTIME_CTRL, + .attr = KVM_ARM_VCPU_PVTIME_IPA, + .addr = (u64)&st_ipa, + }; + + vcpu_ioctl(vcpu, KVM_HAS_DEVICE_ATTR, &dev); + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, ST_GPA_BASE, 1, 1, 0); + virt_map(vm, ST_GPA_BASE, ST_GPA_BASE, 1); + + st_ipa = (ulong)ST_GPA_BASE | 1; + ret = __vcpu_ioctl(vcpu, KVM_SET_DEVICE_ATTR, &dev); + TEST_ASSERT(ret == -1 && errno == EINVAL, "Bad IPA didn't report EINVAL"); + + st_ipa = (ulong)ST_GPA_BASE; + vcpu_ioctl(vcpu, KVM_SET_DEVICE_ATTR, &dev); + + ret = __vcpu_ioctl(vcpu, KVM_SET_DEVICE_ATTR, &dev); + TEST_ASSERT(ret == -1 && errno == EEXIST, "Set IPA twice without EEXIST"); + + kvm_vm_free(vm); +} + #elif defined(__riscv) /* SBI STA shmem must have 64-byte alignment */ #define STEAL_TIME_SIZE ((sizeof(struct sta_struct) + 63) & ~63) -static vm_paddr_t st_gpa[NR_VCPUS]; +static gpa_t st_gpa[NR_VCPUS]; struct sta_struct { - uint32_t sequence; - uint32_t flags; - uint64_t steal; - uint8_t preempted; - uint8_t pad[47]; + u32 sequence; + u32 flags; + u64 steal; + u8 preempted; + u8 pad[47]; } __packed; -static void sta_set_shmem(vm_paddr_t gpa, unsigned long flags) +static void sta_set_shmem(gpa_t gpa, unsigned long flags) { unsigned long lo = (unsigned long)gpa; #if __riscv_xlen == 32 @@ -243,7 +274,7 @@ static void check_status(struct sta_struct *st) static void guest_code(int cpu) { struct sta_struct *st = st_gva[cpu]; - uint32_t sequence; + u32 sequence; long out_val = 0; bool probe; @@ -268,7 +299,7 @@ static void guest_code(int cpu) static bool is_steal_time_supported(struct kvm_vcpu *vcpu) { - uint64_t id = RISCV_SBI_EXT_REG(KVM_RISCV_SBI_EXT_STA); + u64 id = RISCV_SBI_EXT_REG(KVM_RISCV_SBI_EXT_STA); unsigned long enabled = vcpu_get_reg(vcpu, id); TEST_ASSERT(enabled == 0 || enabled == 1, "Expected boolean result"); @@ -276,16 +307,16 @@ static bool is_steal_time_supported(struct kvm_vcpu *vcpu) return enabled; } -static void steal_time_init(struct kvm_vcpu *vcpu, uint32_t i) +static void steal_time_init(struct kvm_vcpu *vcpu, u32 i) { /* ST_GPA_BASE is identity mapped */ st_gva[i] = (void *)(ST_GPA_BASE + i * STEAL_TIME_SIZE); - st_gpa[i] = addr_gva2gpa(vcpu->vm, (vm_vaddr_t)st_gva[i]); + st_gpa[i] = addr_gva2gpa(vcpu->vm, (gva_t)st_gva[i]); sync_global_to_guest(vcpu->vm, st_gva[i]); sync_global_to_guest(vcpu->vm, st_gpa[i]); } -static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpu_idx) +static void steal_time_dump(struct kvm_vm *vm, u32 vcpu_idx) { struct sta_struct *st = addr_gva2hva(vm, (ulong)st_gva[vcpu_idx]); int i; @@ -301,6 +332,142 @@ static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpu_idx) pr_info("\n"); } +static void check_steal_time_uapi(void) +{ + struct kvm_vm *vm; + struct kvm_vcpu *vcpu; + struct kvm_one_reg reg; + u64 shmem; + int ret; + + vm = vm_create_with_one_vcpu(&vcpu, NULL); + + reg.id = KVM_REG_RISCV | + KVM_REG_SIZE_ULONG | + KVM_REG_RISCV_SBI_STATE | + KVM_REG_RISCV_SBI_STA | + KVM_REG_RISCV_SBI_STA_REG(shmem_lo); + reg.addr = (u64)&shmem; + + shmem = ST_GPA_BASE + 1; + ret = __vcpu_ioctl(vcpu, KVM_SET_ONE_REG, ®); + TEST_ASSERT(ret == -1 && errno == EINVAL, + "misaligned STA shmem returns -EINVAL"); + + shmem = ST_GPA_BASE; + ret = __vcpu_ioctl(vcpu, KVM_SET_ONE_REG, ®); + TEST_ASSERT(ret == 0, + "aligned STA shmem succeeds"); + + shmem = INVALID_GPA; + ret = __vcpu_ioctl(vcpu, KVM_SET_ONE_REG, ®); + TEST_ASSERT(ret == 0, + "all-ones for STA shmem succeeds"); + + kvm_vm_free(vm); +} + +#elif defined(__loongarch__) + +/* steal_time must have 64-byte alignment */ +#define STEAL_TIME_SIZE ((sizeof(struct kvm_steal_time) + 63) & ~63) +#define KVM_STEAL_PHYS_VALID BIT_ULL(0) + +struct kvm_steal_time { + __u64 steal; + __u32 version; + __u32 flags; + __u8 preempted; + __u8 pad[47]; +}; + +static void check_status(struct kvm_steal_time *st) +{ + GUEST_ASSERT(!(READ_ONCE(st->version) & 1)); + GUEST_ASSERT_EQ(READ_ONCE(st->flags), 0); + GUEST_ASSERT_EQ(READ_ONCE(st->preempted), 0); +} + +static void guest_code(int cpu) +{ + u32 version; + struct kvm_steal_time *st = st_gva[cpu]; + + memset(st, 0, sizeof(*st)); + GUEST_SYNC(0); + + check_status(st); + WRITE_ONCE(guest_stolen_time[cpu], st->steal); + version = READ_ONCE(st->version); + check_status(st); + GUEST_SYNC(1); + + check_status(st); + GUEST_ASSERT(version < READ_ONCE(st->version)); + WRITE_ONCE(guest_stolen_time[cpu], st->steal); + check_status(st); + GUEST_DONE(); +} + +static bool is_steal_time_supported(struct kvm_vcpu *vcpu) +{ + int err; + u64 val; + struct kvm_device_attr attr = { + .group = KVM_LOONGARCH_VCPU_CPUCFG, + .attr = CPUCFG_KVM_FEATURE, + .addr = (u64)&val, + }; + + err = __vcpu_ioctl(vcpu, KVM_HAS_DEVICE_ATTR, &attr); + if (err) + return false; + + err = __vcpu_ioctl(vcpu, KVM_GET_DEVICE_ATTR, &attr); + if (err) + return false; + + return val & BIT(KVM_FEATURE_STEAL_TIME); +} + +static void steal_time_init(struct kvm_vcpu *vcpu, u32 i) +{ + int err; + u64 st_gpa; + struct kvm_vm *vm = vcpu->vm; + struct kvm_device_attr attr = { + .group = KVM_LOONGARCH_VCPU_PVTIME_CTRL, + .attr = KVM_LOONGARCH_VCPU_PVTIME_GPA, + .addr = (u64)&st_gpa, + }; + + /* ST_GPA_BASE is identity mapped */ + st_gva[i] = (void *)(ST_GPA_BASE + i * STEAL_TIME_SIZE); + sync_global_to_guest(vm, st_gva[i]); + + err = __vcpu_ioctl(vcpu, KVM_HAS_DEVICE_ATTR, &attr); + TEST_ASSERT(err == 0, "No PV stealtime Feature"); + + st_gpa = (unsigned long)st_gva[i] | KVM_STEAL_PHYS_VALID; + err = __vcpu_ioctl(vcpu, KVM_SET_DEVICE_ATTR, &attr); + TEST_ASSERT(err == 0, "Fail to set PV stealtime GPA"); +} + +static void steal_time_dump(struct kvm_vm *vm, u32 vcpu_idx) +{ + struct kvm_steal_time *st = addr_gva2hva(vm, (ulong)st_gva[vcpu_idx]); + + ksft_print_msg("VCPU%d:\n", vcpu_idx); + ksft_print_msg(" steal: %lld\n", st->steal); + ksft_print_msg(" flags: %d\n", st->flags); + ksft_print_msg(" version: %d\n", st->version); + ksft_print_msg(" preempted: %d\n", st->preempted); +} + +static void check_steal_time_uapi(void) +{ + +} #endif static void *do_steal_time(void *arg) @@ -369,6 +536,8 @@ int main(int ac, char **av) TEST_REQUIRE(is_steal_time_supported(vcpus[0])); ksft_set_plan(NR_VCPUS); + check_steal_time_uapi(); + /* Run test on each VCPU */ for (i = 0; i < NR_VCPUS; ++i) { steal_time_init(vcpus[i], i); diff --git a/tools/testing/selftests/kvm/system_counter_offset_test.c b/tools/testing/selftests/kvm/system_counter_offset_test.c index 513d421a9bff..dc5e30b7b77f 100644 --- a/tools/testing/selftests/kvm/system_counter_offset_test.c +++ b/tools/testing/selftests/kvm/system_counter_offset_test.c @@ -17,7 +17,7 @@ #ifdef __x86_64__ struct test_case { - uint64_t tsc_offset; + u64 tsc_offset; }; static struct test_case test_cases[] = { @@ -39,12 +39,12 @@ static void setup_system_counter(struct kvm_vcpu *vcpu, struct test_case *test) &test->tsc_offset); } -static uint64_t guest_read_system_counter(struct test_case *test) +static u64 guest_read_system_counter(struct test_case *test) { return rdtsc(); } -static uint64_t host_read_guest_system_counter(struct test_case *test) +static u64 host_read_guest_system_counter(struct test_case *test) { return rdtsc() + test->tsc_offset; } @@ -69,9 +69,9 @@ static void guest_main(void) } } -static void handle_sync(struct ucall *uc, uint64_t start, uint64_t end) +static void handle_sync(struct ucall *uc, u64 start, u64 end) { - uint64_t obs = uc->args[2]; + u64 obs = uc->args[2]; TEST_ASSERT(start <= obs && obs <= end, "unexpected system counter value: %"PRIu64" expected range: [%"PRIu64", %"PRIu64"]", @@ -88,7 +88,7 @@ static void handle_abort(struct ucall *uc) static void enter_guest(struct kvm_vcpu *vcpu) { - uint64_t start, end; + u64 start, end; struct ucall uc; int i; diff --git a/tools/testing/selftests/kvm/x86/amx_test.c b/tools/testing/selftests/kvm/x86/amx_test.c index f4ce5a185a7d..4e63da2b1889 100644 --- a/tools/testing/selftests/kvm/x86/amx_test.c +++ b/tools/testing/selftests/kvm/x86/amx_test.c @@ -69,15 +69,21 @@ static inline void __tileloadd(void *tile) : : "a"(tile), "d"(0)); } +static inline int tileloadd_safe(void *tile) +{ + return kvm_asm_safe(".byte 0xc4,0xe2,0x7b,0x4b,0x04,0x10", + "a"(tile), "d"(0)); +} + static inline void __tilerelease(void) { asm volatile(".byte 0xc4, 0xe2, 0x78, 0x49, 0xc0" ::); } -static inline void __xsavec(struct xstate *xstate, uint64_t rfbm) +static inline void __xsavec(struct xstate *xstate, u64 rfbm) { - uint32_t rfbm_lo = rfbm; - uint32_t rfbm_hi = rfbm >> 32; + u32 rfbm_lo = rfbm; + u32 rfbm_hi = rfbm >> 32; asm volatile("xsavec (%%rdi)" : : "D" (xstate), "a" (rfbm_lo), "d" (rfbm_hi) @@ -124,27 +130,52 @@ static void set_tilecfg(struct tile_config *cfg) } } +enum { + /* Retrieve TMM0 from guest, stash it for TEST_RESTORE_TILEDATA */ + TEST_SAVE_TILEDATA = 1, + + /* Check TMM0 against tiledata */ + TEST_COMPARE_TILEDATA = 2, + + /* Restore TMM0 from earlier save */ + TEST_RESTORE_TILEDATA = 4, + + /* Full VM save/restore */ + TEST_SAVE_RESTORE = 8, +}; + static void __attribute__((__flatten__)) guest_code(struct tile_config *amx_cfg, struct tile_data *tiledata, struct xstate *xstate) { + int vector; + GUEST_ASSERT(this_cpu_has(X86_FEATURE_XSAVE) && this_cpu_has(X86_FEATURE_OSXSAVE)); check_xtile_info(); - GUEST_SYNC(1); + GUEST_SYNC(TEST_SAVE_RESTORE); /* xfd=0, enable amx */ wrmsr(MSR_IA32_XFD, 0); - GUEST_SYNC(2); + GUEST_SYNC(TEST_SAVE_RESTORE); GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == 0); set_tilecfg(amx_cfg); __ldtilecfg(amx_cfg); - GUEST_SYNC(3); + GUEST_SYNC(TEST_SAVE_RESTORE); /* Check save/restore when trap to userspace */ __tileloadd(tiledata); - GUEST_SYNC(4); + GUEST_SYNC(TEST_SAVE_TILEDATA | TEST_COMPARE_TILEDATA | TEST_SAVE_RESTORE); + + /* xfd=0x40000, disable amx tiledata */ + wrmsr(MSR_IA32_XFD, XFEATURE_MASK_XTILE_DATA); + + /* host tries setting tiledata while guest XFD is set */ + GUEST_SYNC(TEST_RESTORE_TILEDATA); + GUEST_SYNC(TEST_SAVE_RESTORE); + + wrmsr(MSR_IA32_XFD, 0); __tilerelease(); - GUEST_SYNC(5); + GUEST_SYNC(TEST_SAVE_RESTORE); /* * After XSAVEC, XTILEDATA is cleared in the xstate_bv but is set in * the xcomp_bv. @@ -154,6 +185,8 @@ static void __attribute__((__flatten__)) guest_code(struct tile_config *amx_cfg, GUEST_ASSERT(!(xstate->header.xstate_bv & XFEATURE_MASK_XTILE_DATA)); GUEST_ASSERT(xstate->header.xcomp_bv & XFEATURE_MASK_XTILE_DATA); + /* #NM test */ + /* xfd=0x40000, disable amx tiledata */ wrmsr(MSR_IA32_XFD, XFEATURE_MASK_XTILE_DATA); @@ -166,32 +199,33 @@ static void __attribute__((__flatten__)) guest_code(struct tile_config *amx_cfg, GUEST_ASSERT(!(xstate->header.xstate_bv & XFEATURE_MASK_XTILE_DATA)); GUEST_ASSERT((xstate->header.xcomp_bv & XFEATURE_MASK_XTILE_DATA)); - GUEST_SYNC(6); + GUEST_SYNC(TEST_SAVE_RESTORE); GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == XFEATURE_MASK_XTILE_DATA); set_tilecfg(amx_cfg); __ldtilecfg(amx_cfg); - /* Trigger #NM exception */ - __tileloadd(tiledata); - GUEST_SYNC(10); - GUEST_DONE(); -} + /* Trigger #NM exception */ + vector = tileloadd_safe(tiledata); + __GUEST_ASSERT(vector == NM_VECTOR, + "Wanted #NM on tileloadd with XFD[18]=1, got %s", + ex_str(vector)); -void guest_nm_handler(struct ex_regs *regs) -{ - /* Check if #NM is triggered by XFEATURE_MASK_XTILE_DATA */ - GUEST_SYNC(7); GUEST_ASSERT(!(get_cr0() & X86_CR0_TS)); GUEST_ASSERT(rdmsr(MSR_IA32_XFD_ERR) == XFEATURE_MASK_XTILE_DATA); GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == XFEATURE_MASK_XTILE_DATA); - GUEST_SYNC(8); + GUEST_SYNC(TEST_SAVE_RESTORE); GUEST_ASSERT(rdmsr(MSR_IA32_XFD_ERR) == XFEATURE_MASK_XTILE_DATA); GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == XFEATURE_MASK_XTILE_DATA); /* Clear xfd_err */ wrmsr(MSR_IA32_XFD_ERR, 0); /* xfd=0, enable amx */ wrmsr(MSR_IA32_XFD, 0); - GUEST_SYNC(9); + GUEST_SYNC(TEST_SAVE_RESTORE); + + __tileloadd(tiledata); + GUEST_SYNC(TEST_COMPARE_TILEDATA | TEST_SAVE_RESTORE); + + GUEST_DONE(); } int main(int argc, char *argv[]) @@ -200,10 +234,10 @@ int main(int argc, char *argv[]) struct kvm_vcpu *vcpu; struct kvm_vm *vm; struct kvm_x86_state *state; + struct kvm_x86_state *tile_state = NULL; int xsave_restore_size; - vm_vaddr_t amx_cfg, tiledata, xstate; + gva_t amx_cfg, tiledata, xstate; struct ucall uc; - u32 amx_offset; int ret; /* @@ -228,22 +262,20 @@ int main(int argc, char *argv[]) vcpu_regs_get(vcpu, ®s1); - /* Register #NM handler */ - vm_install_exception_handler(vm, NM_VECTOR, guest_nm_handler); - /* amx cfg for guest_code */ - amx_cfg = vm_vaddr_alloc_page(vm); + amx_cfg = vm_alloc_page(vm); memset(addr_gva2hva(vm, amx_cfg), 0x0, getpagesize()); /* amx tiledata for guest_code */ - tiledata = vm_vaddr_alloc_pages(vm, 2); + tiledata = vm_alloc_pages(vm, 2); memset(addr_gva2hva(vm, tiledata), rand() | 1, 2 * getpagesize()); /* XSAVE state for guest_code */ - xstate = vm_vaddr_alloc_pages(vm, DIV_ROUND_UP(XSAVE_SIZE, PAGE_SIZE)); + xstate = vm_alloc_pages(vm, DIV_ROUND_UP(XSAVE_SIZE, PAGE_SIZE)); memset(addr_gva2hva(vm, xstate), 0, PAGE_SIZE * DIV_ROUND_UP(XSAVE_SIZE, PAGE_SIZE)); vcpu_args_set(vcpu, 3, amx_cfg, tiledata, xstate); + int iter = 0; for (;;) { vcpu_run(vcpu); TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); @@ -253,37 +285,47 @@ int main(int argc, char *argv[]) REPORT_GUEST_ASSERT(uc); /* NOT REACHED */ case UCALL_SYNC: - switch (uc.args[1]) { - case 1: - case 2: - case 3: - case 5: - case 6: - case 7: - case 8: - fprintf(stderr, "GUEST_SYNC(%ld)\n", uc.args[1]); - break; - case 4: - case 10: - fprintf(stderr, - "GUEST_SYNC(%ld), check save/restore status\n", uc.args[1]); + ++iter; + if (uc.args[1] & TEST_SAVE_TILEDATA) { + fprintf(stderr, "GUEST_SYNC #%d, save tiledata\n", iter); + tile_state = vcpu_save_state(vcpu); + } + if (uc.args[1] & TEST_COMPARE_TILEDATA) { + fprintf(stderr, "GUEST_SYNC #%d, check TMM0 contents\n", iter); /* Compacted mode, get amx offset by xsave area * size subtract 8K amx size. */ - amx_offset = xsave_restore_size - NUM_TILES*TILE_SIZE; - state = vcpu_save_state(vcpu); - void *amx_start = (void *)state->xsave + amx_offset; + u32 amx_offset = xsave_restore_size - NUM_TILES*TILE_SIZE; + void *amx_start = (void *)tile_state->xsave + amx_offset; void *tiles_data = (void *)addr_gva2hva(vm, tiledata); /* Only check TMM0 register, 1 tile */ ret = memcmp(amx_start, tiles_data, TILE_SIZE); TEST_ASSERT(ret == 0, "memcmp failed, ret=%d", ret); + } + if (uc.args[1] & TEST_RESTORE_TILEDATA) { + fprintf(stderr, "GUEST_SYNC #%d, before KVM_SET_XSAVE\n", iter); + vcpu_xsave_set(vcpu, tile_state->xsave); + fprintf(stderr, "GUEST_SYNC #%d, after KVM_SET_XSAVE\n", iter); + } + if (uc.args[1] & TEST_SAVE_RESTORE) { + fprintf(stderr, "GUEST_SYNC #%d, save/restore VM state\n", iter); + state = vcpu_save_state(vcpu); + memset(®s1, 0, sizeof(regs1)); + vcpu_regs_get(vcpu, ®s1); + + kvm_vm_release(vm); + + /* Restore state in a new VM. */ + vcpu = vm_recreate_with_one_vcpu(vm); + vcpu_load_state(vcpu, state); kvm_x86_state_cleanup(state); - break; - case 9: - fprintf(stderr, - "GUEST_SYNC(%ld), #NM exception and enable amx\n", uc.args[1]); - break; + + memset(®s2, 0, sizeof(regs2)); + vcpu_regs_get(vcpu, ®s2); + TEST_ASSERT(!memcmp(®s1, ®s2, sizeof(regs2)), + "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx", + (ulong) regs2.rdi, (ulong) regs2.rsi); } break; case UCALL_DONE: @@ -293,22 +335,6 @@ int main(int argc, char *argv[]) TEST_FAIL("Unknown ucall %lu", uc.cmd); } - state = vcpu_save_state(vcpu); - memset(®s1, 0, sizeof(regs1)); - vcpu_regs_get(vcpu, ®s1); - - kvm_vm_release(vm); - - /* Restore state in a new VM. */ - vcpu = vm_recreate_with_one_vcpu(vm); - vcpu_load_state(vcpu, state); - kvm_x86_state_cleanup(state); - - memset(®s2, 0, sizeof(regs2)); - vcpu_regs_get(vcpu, ®s2); - TEST_ASSERT(!memcmp(®s1, ®s2, sizeof(regs2)), - "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx", - (ulong) regs2.rdi, (ulong) regs2.rsi); } done: kvm_vm_free(vm); diff --git a/tools/testing/selftests/kvm/x86/aperfmperf_test.c b/tools/testing/selftests/kvm/x86/aperfmperf_test.c new file mode 100644 index 000000000000..c91660103137 --- /dev/null +++ b/tools/testing/selftests/kvm/x86/aperfmperf_test.c @@ -0,0 +1,213 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Test for KVM_X86_DISABLE_EXITS_APERFMPERF + * + * Copyright (C) 2025, Google LLC. + * + * Test the ability to disable VM-exits for rdmsr of IA32_APERF and + * IA32_MPERF. When these VM-exits are disabled, reads of these MSRs + * return the host's values. + * + * Note: Requires read access to /dev/cpu/<lpu>/msr to read host MSRs. + */ + +#include <fcntl.h> +#include <limits.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdint.h> +#include <unistd.h> +#include <asm/msr-index.h> + +#include "kvm_util.h" +#include "processor.h" +#include "svm_util.h" +#include "test_util.h" +#include "vmx.h" + +#define NUM_ITERATIONS 10000 + +static int open_dev_msr(int cpu) +{ + char path[PATH_MAX]; + + snprintf(path, sizeof(path), "/dev/cpu/%d/msr", cpu); + return open_path_or_exit(path, O_RDONLY); +} + +static u64 read_dev_msr(int msr_fd, u32 msr) +{ + u64 data; + ssize_t rc; + + rc = pread(msr_fd, &data, sizeof(data), msr); + TEST_ASSERT(rc == sizeof(data), "Read of MSR 0x%x failed", msr); + + return data; +} + +static void guest_read_aperf_mperf(void) +{ + int i; + + for (i = 0; i < NUM_ITERATIONS; i++) + GUEST_SYNC2(rdmsr(MSR_IA32_APERF), rdmsr(MSR_IA32_MPERF)); +} + +#define L2_GUEST_STACK_SIZE 64 + +static void l2_guest_code(void) +{ + guest_read_aperf_mperf(); + GUEST_DONE(); +} + +static void l1_svm_code(struct svm_test_data *svm) +{ + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + struct vmcb *vmcb = svm->vmcb; + + generic_svm_setup(svm, l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]); + run_guest(vmcb, svm->vmcb_gpa); +} + +static void l1_vmx_code(struct vmx_pages *vmx) +{ + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + + GUEST_ASSERT_EQ(prepare_for_vmx_operation(vmx), true); + GUEST_ASSERT_EQ(load_vmcs(vmx), true); + + prepare_vmcs(vmx, NULL, &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + /* + * Enable MSR bitmaps (the bitmap itself is allocated, zeroed, and set + * in the VMCS by prepare_vmcs()), as MSR exiting mandatory on Intel. + */ + vmwrite(CPU_BASED_VM_EXEC_CONTROL, + vmreadz(CPU_BASED_VM_EXEC_CONTROL) | CPU_BASED_USE_MSR_BITMAPS); + + GUEST_ASSERT(!vmwrite(GUEST_RIP, (u64)l2_guest_code)); + GUEST_ASSERT(!vmlaunch()); +} + +static void guest_code(void *nested_test_data) +{ + guest_read_aperf_mperf(); + + if (this_cpu_has(X86_FEATURE_SVM)) + l1_svm_code(nested_test_data); + else if (this_cpu_has(X86_FEATURE_VMX)) + l1_vmx_code(nested_test_data); + else + GUEST_DONE(); + + TEST_FAIL("L2 should have signaled 'done'"); +} + +static void guest_no_aperfmperf(void) +{ + u64 msr_val; + u8 vector; + + vector = rdmsr_safe(MSR_IA32_APERF, &msr_val); + GUEST_ASSERT(vector == GP_VECTOR); + + vector = rdmsr_safe(MSR_IA32_APERF, &msr_val); + GUEST_ASSERT(vector == GP_VECTOR); + + GUEST_DONE(); +} + +int main(int argc, char *argv[]) +{ + const bool has_nested = kvm_cpu_has(X86_FEATURE_SVM) || kvm_cpu_has(X86_FEATURE_VMX); + u64 host_aperf_before, host_mperf_before; + gva_t nested_test_data_gva; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + int msr_fd, cpu, i; + + /* Sanity check that APERF/MPERF are unsupported by default. */ + vm = vm_create_with_one_vcpu(&vcpu, guest_no_aperfmperf); + vcpu_run(vcpu); + TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE); + kvm_vm_free(vm); + + cpu = pin_self_to_any_cpu(); + + msr_fd = open_dev_msr(cpu); + + /* + * This test requires a non-standard VM initialization, because + * KVM_ENABLE_CAP cannot be used on a VM file descriptor after + * a VCPU has been created. + */ + vm = vm_create(1); + + TEST_REQUIRE(vm_check_cap(vm, KVM_CAP_X86_DISABLE_EXITS) & + KVM_X86_DISABLE_EXITS_APERFMPERF); + + vm_enable_cap(vm, KVM_CAP_X86_DISABLE_EXITS, + KVM_X86_DISABLE_EXITS_APERFMPERF); + + vcpu = vm_vcpu_add(vm, 0, guest_code); + + if (!has_nested) + nested_test_data_gva = NONCANONICAL; + else if (kvm_cpu_has(X86_FEATURE_SVM)) + vcpu_alloc_svm(vm, &nested_test_data_gva); + else + vcpu_alloc_vmx(vm, &nested_test_data_gva); + + vcpu_args_set(vcpu, 1, nested_test_data_gva); + + host_aperf_before = read_dev_msr(msr_fd, MSR_IA32_APERF); + host_mperf_before = read_dev_msr(msr_fd, MSR_IA32_MPERF); + + for (i = 0; i <= NUM_ITERATIONS * (1 + has_nested); i++) { + u64 host_aperf_after, host_mperf_after; + u64 guest_aperf, guest_mperf; + struct ucall uc; + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_DONE: + goto done; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + case UCALL_SYNC: + guest_aperf = uc.args[0]; + guest_mperf = uc.args[1]; + + host_aperf_after = read_dev_msr(msr_fd, MSR_IA32_APERF); + host_mperf_after = read_dev_msr(msr_fd, MSR_IA32_MPERF); + + TEST_ASSERT(host_aperf_before < guest_aperf, + "APERF: host_before (0x%" PRIx64 ") >= guest (0x%" PRIx64 ")", + host_aperf_before, guest_aperf); + TEST_ASSERT(guest_aperf < host_aperf_after, + "APERF: guest (0x%" PRIx64 ") >= host_after (0x%" PRIx64 ")", + guest_aperf, host_aperf_after); + TEST_ASSERT(host_mperf_before < guest_mperf, + "MPERF: host_before (0x%" PRIx64 ") >= guest (0x%" PRIx64 ")", + host_mperf_before, guest_mperf); + TEST_ASSERT(guest_mperf < host_mperf_after, + "MPERF: guest (0x%" PRIx64 ") >= host_after (0x%" PRIx64 ")", + guest_mperf, host_mperf_after); + + host_aperf_before = host_aperf_after; + host_mperf_before = host_mperf_after; + + break; + } + } + TEST_FAIL("Didn't receive UCALL_DONE\n"); +done: + kvm_vm_free(vm); + close(msr_fd); + + return 0; +} diff --git a/tools/testing/selftests/kvm/x86/apic_bus_clock_test.c b/tools/testing/selftests/kvm/x86/apic_bus_clock_test.c index f8916bb34405..0c84c27ea584 100644 --- a/tools/testing/selftests/kvm/x86/apic_bus_clock_test.c +++ b/tools/testing/selftests/kvm/x86/apic_bus_clock_test.c @@ -19,8 +19,8 @@ * timer frequency. */ static const struct { - const uint32_t tdcr; - const uint32_t divide_count; + const u32 tdcr; + const u32 divide_count; } tdcrs[] = { {0x0, 2}, {0x1, 4}, @@ -42,12 +42,12 @@ static void apic_enable(void) xapic_enable(); } -static uint32_t apic_read_reg(unsigned int reg) +static u32 apic_read_reg(unsigned int reg) { return is_x2apic ? x2apic_read_reg(reg) : xapic_read_reg(reg); } -static void apic_write_reg(unsigned int reg, uint32_t val) +static void apic_write_reg(unsigned int reg, u32 val) { if (is_x2apic) x2apic_write_reg(reg, val); @@ -55,12 +55,12 @@ static void apic_write_reg(unsigned int reg, uint32_t val) xapic_write_reg(reg, val); } -static void apic_guest_code(uint64_t apic_hz, uint64_t delay_ms) +static void apic_guest_code(u64 apic_hz, u64 delay_ms) { - uint64_t tsc_hz = guest_tsc_khz * 1000; - const uint32_t tmict = ~0u; - uint64_t tsc0, tsc1, freq; - uint32_t tmcct; + u64 tsc_hz = guest_tsc_khz * 1000; + const u32 tmict = ~0u; + u64 tsc0, tsc1, freq; + u32 tmcct; int i; apic_enable(); @@ -121,7 +121,7 @@ static void test_apic_bus_clock(struct kvm_vcpu *vcpu) } } -static void run_apic_bus_clock_test(uint64_t apic_hz, uint64_t delay_ms, +static void run_apic_bus_clock_test(u64 apic_hz, u64 delay_ms, bool x2apic) { struct kvm_vcpu *vcpu; @@ -137,6 +137,10 @@ static void run_apic_bus_clock_test(uint64_t apic_hz, uint64_t delay_ms, vm_enable_cap(vm, KVM_CAP_X86_APIC_BUS_CYCLES_NS, NSEC_PER_SEC / apic_hz); + TEST_ASSERT_EQ(kvm_check_cap(KVM_CAP_X86_APIC_BUS_CYCLES_NS), 1); + TEST_ASSERT_EQ(vm_check_cap(vm, KVM_CAP_X86_APIC_BUS_CYCLES_NS), + NSEC_PER_SEC / apic_hz); + vcpu = vm_vcpu_add(vm, 0, apic_guest_code); vcpu_args_set(vcpu, 2, apic_hz, delay_ms); @@ -168,8 +172,8 @@ int main(int argc, char *argv[]) * Arbitrarilty default to 25MHz for the APIC bus frequency, which is * different enough from the default 1GHz to be interesting. */ - uint64_t apic_hz = 25 * 1000 * 1000; - uint64_t delay_ms = 100; + u64 apic_hz = 25 * 1000 * 1000; + u64 delay_ms = 100; int opt; TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_APIC_BUS_CYCLES_NS)); diff --git a/tools/testing/selftests/kvm/x86/cpuid_test.c b/tools/testing/selftests/kvm/x86/cpuid_test.c index 7b3fda6842bc..ef0ddd240887 100644 --- a/tools/testing/selftests/kvm/x86/cpuid_test.c +++ b/tools/testing/selftests/kvm/x86/cpuid_test.c @@ -140,10 +140,10 @@ static void run_vcpu(struct kvm_vcpu *vcpu, int stage) } } -struct kvm_cpuid2 *vcpu_alloc_cpuid(struct kvm_vm *vm, vm_vaddr_t *p_gva, struct kvm_cpuid2 *cpuid) +struct kvm_cpuid2 *vcpu_alloc_cpuid(struct kvm_vm *vm, gva_t *p_gva, struct kvm_cpuid2 *cpuid) { int size = sizeof(*cpuid) + cpuid->nent * sizeof(cpuid->entries[0]); - vm_vaddr_t gva = vm_vaddr_alloc(vm, size, KVM_UTIL_MIN_VADDR); + gva_t gva = vm_alloc(vm, size, KVM_UTIL_MIN_VADDR); struct kvm_cpuid2 *guest_cpuids = addr_gva2hva(vm, gva); memcpy(guest_cpuids, cpuid, size); @@ -155,6 +155,7 @@ struct kvm_cpuid2 *vcpu_alloc_cpuid(struct kvm_vm *vm, vm_vaddr_t *p_gva, struct static void set_cpuid_after_run(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *ent; + struct kvm_sregs sregs; int rc; u32 eax, ebx, x; @@ -162,6 +163,20 @@ static void set_cpuid_after_run(struct kvm_vcpu *vcpu) rc = __vcpu_set_cpuid(vcpu); TEST_ASSERT(!rc, "Setting unmodified CPUID after KVM_RUN failed: %d", rc); + /* + * Toggle CR4 bits that affect dynamic CPUID feature flags to verify + * setting unmodified CPUID succeeds with runtime CPUID updates. + */ + vcpu_sregs_get(vcpu, &sregs); + if (kvm_cpu_has(X86_FEATURE_XSAVE)) + sregs.cr4 ^= X86_CR4_OSXSAVE; + if (kvm_cpu_has(X86_FEATURE_PKU)) + sregs.cr4 ^= X86_CR4_PKE; + vcpu_sregs_set(vcpu, &sregs); + + rc = __vcpu_set_cpuid(vcpu); + TEST_ASSERT(!rc, "Setting unmodified CPUID after KVM_RUN failed: %d", rc); + /* Changing CPU features is forbidden */ ent = vcpu_get_cpuid_entry(vcpu, 0x7); ebx = ent->ebx; @@ -202,7 +217,7 @@ static void test_get_cpuid2(struct kvm_vcpu *vcpu) int main(void) { struct kvm_vcpu *vcpu; - vm_vaddr_t cpuid_gva; + gva_t cpuid_gva; struct kvm_vm *vm; int stage; diff --git a/tools/testing/selftests/kvm/x86/debug_regs.c b/tools/testing/selftests/kvm/x86/debug_regs.c index 2d814c1d1dc4..0dfaf03cd0a0 100644 --- a/tools/testing/selftests/kvm/x86/debug_regs.c +++ b/tools/testing/selftests/kvm/x86/debug_regs.c @@ -16,7 +16,7 @@ #define IRQ_VECTOR 0xAA /* For testing data access debug BP */ -uint32_t guest_value; +u32 guest_value; extern unsigned char sw_bp, hw_bp, write_data, ss_start, bd_start; @@ -86,7 +86,7 @@ int main(void) struct kvm_run *run; struct kvm_vm *vm; struct ucall uc; - uint64_t cmd; + u64 cmd; int i; /* Instruction lengths starting at ss_start */ int ss_size[6] = { diff --git a/tools/testing/selftests/kvm/x86/dirty_log_page_splitting_test.c b/tools/testing/selftests/kvm/x86/dirty_log_page_splitting_test.c index 2929c067c207..388ba4101f97 100644 --- a/tools/testing/selftests/kvm/x86/dirty_log_page_splitting_test.c +++ b/tools/testing/selftests/kvm/x86/dirty_log_page_splitting_test.c @@ -23,7 +23,7 @@ #define SLOTS 2 #define ITERATIONS 2 -static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE; +static u64 guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE; static enum vm_mem_backing_src_type backing_src = VM_MEM_SRC_ANONYMOUS_HUGETLB; @@ -33,17 +33,17 @@ static int iteration; static int vcpu_last_completed_iteration[KVM_MAX_VCPUS]; struct kvm_page_stats { - uint64_t pages_4k; - uint64_t pages_2m; - uint64_t pages_1g; - uint64_t hugepages; + u64 pages_4k; + u64 pages_2m; + u64 pages_1g; + u64 hugepages; }; static void get_page_stats(struct kvm_vm *vm, struct kvm_page_stats *stats, const char *stage) { - stats->pages_4k = vm_get_stat(vm, "pages_4k"); - stats->pages_2m = vm_get_stat(vm, "pages_2m"); - stats->pages_1g = vm_get_stat(vm, "pages_1g"); + stats->pages_4k = vm_get_stat(vm, pages_4k); + stats->pages_2m = vm_get_stat(vm, pages_2m); + stats->pages_1g = vm_get_stat(vm, pages_1g); stats->hugepages = stats->pages_2m + stats->pages_1g; pr_debug("\nPage stats after %s: 4K: %ld 2M: %ld 1G: %ld huge: %ld\n", @@ -89,9 +89,9 @@ static void run_test(enum vm_guest_mode mode, void *unused) { struct kvm_vm *vm; unsigned long **bitmaps; - uint64_t guest_num_pages; - uint64_t host_num_pages; - uint64_t pages_per_slot; + u64 guest_num_pages; + u64 host_num_pages; + u64 pages_per_slot; int i; struct kvm_page_stats stats_populated; struct kvm_page_stats stats_dirty_logging_enabled; diff --git a/tools/testing/selftests/kvm/x86/evmcs_smm_controls_test.c b/tools/testing/selftests/kvm/x86/evmcs_smm_controls_test.c new file mode 100644 index 000000000000..5b3aef109cfc --- /dev/null +++ b/tools/testing/selftests/kvm/x86/evmcs_smm_controls_test.c @@ -0,0 +1,150 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2026, Red Hat, Inc. + * + * Test that vmx_leave_smm() validates vmcs12 controls before re-entering + * nested guest mode on RSM. + */ +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> + +#include "test_util.h" +#include "kvm_util.h" +#include "smm.h" +#include "hyperv.h" +#include "vmx.h" + +#define SMRAM_GPA 0x1000000 +#define SMRAM_STAGE 0xfe + +#define SYNC_PORT 0xe + +#define STR(x) #x +#define XSTR(s) STR(s) + +/* + * SMI handler: runs in real-address mode. + * Reports SMRAM_STAGE via port IO, then does RSM. + */ +static u8 smi_handler[] = { + 0xb0, SMRAM_STAGE, /* mov $SMRAM_STAGE, %al */ + 0xe4, SYNC_PORT, /* in $SYNC_PORT, %al */ + 0x0f, 0xaa, /* rsm */ +}; + +static inline void sync_with_host(u64 phase) +{ + asm volatile("in $" XSTR(SYNC_PORT) ", %%al \n" + : "+a" (phase)); +} + +static void l2_guest_code(void) +{ + sync_with_host(1); + + /* After SMI+RSM with invalid controls, we should not reach here. */ + vmcall(); +} + +static void guest_code(struct vmx_pages *vmx_pages, + struct hyperv_test_pages *hv_pages) +{ +#define L2_GUEST_STACK_SIZE 64 + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + + /* Set up Hyper-V enlightenments and eVMCS */ + wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID); + enable_vp_assist(hv_pages->vp_assist_gpa, hv_pages->vp_assist); + evmcs_enable(); + + GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); + GUEST_ASSERT(load_evmcs(hv_pages)); + prepare_vmcs(vmx_pages, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + GUEST_ASSERT(!vmlaunch()); + + /* L2 exits via vmcall if test fails */ + sync_with_host(2); +} + +int main(int argc, char *argv[]) +{ + gva_t vmx_pages_gva = 0, hv_pages_gva = 0; + struct hyperv_test_pages *hv; + struct hv_enlightened_vmcs *evmcs; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + struct kvm_regs regs; + int stage_reported; + + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); + TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE)); + TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)); + TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_SMM)); + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + + setup_smram(vm, vcpu, SMRAM_GPA, smi_handler, sizeof(smi_handler)); + + vcpu_set_hv_cpuid(vcpu); + vcpu_enable_evmcs(vcpu); + vcpu_alloc_vmx(vm, &vmx_pages_gva); + hv = vcpu_alloc_hyperv_test_pages(vm, &hv_pages_gva); + vcpu_args_set(vcpu, 2, vmx_pages_gva, hv_pages_gva); + + vcpu_run(vcpu); + + /* L2 is running and syncs with host. */ + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + vcpu_regs_get(vcpu, ®s); + stage_reported = regs.rax & 0xff; + TEST_ASSERT(stage_reported == 1, + "Expected stage 1, got %d", stage_reported); + + /* Inject SMI while L2 is running. */ + inject_smi(vcpu); + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + vcpu_regs_get(vcpu, ®s); + stage_reported = regs.rax & 0xff; + TEST_ASSERT(stage_reported == SMRAM_STAGE, + "Expected SMM handler stage %#x, got %#x", + SMRAM_STAGE, stage_reported); + + /* + * Guest is now paused in the SMI handler, about to execute RSM. + * Hack the eVMCS page to set-up invalid pin-based execution + * control (PIN_BASED_VIRTUAL_NMIS without PIN_BASED_NMI_EXITING). + */ + evmcs = hv->enlightened_vmcs_hva; + evmcs->pin_based_vm_exec_control |= PIN_BASED_VIRTUAL_NMIS; + evmcs->hv_clean_fields = 0; + + /* + * Trigger copy_enlightened_to_vmcs12() via KVM_GET_NESTED_STATE, + * copying the invalid pin_based_vm_exec_control into cached_vmcs12. + */ + union { + struct kvm_nested_state state; + char state_[16384]; + } nested_state_buf; + + memset(&nested_state_buf, 0, sizeof(nested_state_buf)); + nested_state_buf.state.size = sizeof(nested_state_buf); + vcpu_nested_state_get(vcpu, &nested_state_buf.state); + + /* + * Resume the guest. The SMI handler executes RSM, which calls + * vmx_leave_smm(). nested_vmx_check_controls() should detect + * VIRTUAL_NMIS without NMI_EXITING and cause a triple fault. + */ + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_SHUTDOWN); + + kvm_vm_free(vm); + return 0; +} diff --git a/tools/testing/selftests/kvm/x86/fastops_test.c b/tools/testing/selftests/kvm/x86/fastops_test.c new file mode 100644 index 000000000000..c0d30ccd8767 --- /dev/null +++ b/tools/testing/selftests/kvm/x86/fastops_test.c @@ -0,0 +1,209 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" + +/* + * Execute a fastop() instruction, with or without forced emulation. BT bit 0 + * to set RFLAGS.CF based on whether or not the input is even or odd, so that + * instructions like ADC and SBB are deterministic. + */ +#define fastop(__insn) \ + "bt $0, %[bt_val]\n\t" \ + __insn "\n\t" \ + "pushfq\n\t" \ + "pop %[flags]\n\t" + +#define flags_constraint(flags_val) [flags]"=r"(flags_val) +#define bt_constraint(__bt_val) [bt_val]"rm"((u32)__bt_val) + +#define guest_execute_fastop_1(FEP, insn, __val, __flags) \ +({ \ + __asm__ __volatile__(fastop(FEP insn " %[val]") \ + : [val]"+r"(__val), flags_constraint(__flags) \ + : bt_constraint(__val) \ + : "cc", "memory"); \ +}) + +#define guest_test_fastop_1(insn, type_t, __val) \ +({ \ + type_t val = __val, ex_val = __val, input = __val; \ + u64 flags, ex_flags; \ + \ + guest_execute_fastop_1("", insn, ex_val, ex_flags); \ + guest_execute_fastop_1(KVM_FEP, insn, val, flags); \ + \ + __GUEST_ASSERT(val == ex_val, \ + "Wanted 0x%lx for '%s 0x%lx', got 0x%lx", \ + (u64)ex_val, insn, (u64)input, (u64)val); \ + __GUEST_ASSERT(flags == ex_flags, \ + "Wanted flags 0x%lx for '%s 0x%lx', got 0x%lx", \ + ex_flags, insn, (u64)input, flags); \ +}) + +#define guest_execute_fastop_2(FEP, insn, __input, __output, __flags) \ +({ \ + __asm__ __volatile__(fastop(FEP insn " %[input], %[output]") \ + : [output]"+r"(__output), flags_constraint(__flags) \ + : [input]"r"(__input), bt_constraint(__output) \ + : "cc", "memory"); \ +}) + +#define guest_test_fastop_2(insn, type_t, __val1, __val2) \ +({ \ + type_t input = __val1, input2 = __val2, output = __val2, ex_output = __val2; \ + u64 flags, ex_flags; \ + \ + guest_execute_fastop_2("", insn, input, ex_output, ex_flags); \ + guest_execute_fastop_2(KVM_FEP, insn, input, output, flags); \ + \ + __GUEST_ASSERT(output == ex_output, \ + "Wanted 0x%lx for '%s 0x%lx 0x%lx', got 0x%lx", \ + (u64)ex_output, insn, (u64)input, \ + (u64)input2, (u64)output); \ + __GUEST_ASSERT(flags == ex_flags, \ + "Wanted flags 0x%lx for '%s 0x%lx, 0x%lx', got 0x%lx", \ + ex_flags, insn, (u64)input, (u64)input2, flags); \ +}) + +#define guest_execute_fastop_cl(FEP, insn, __shift, __output, __flags) \ +({ \ + __asm__ __volatile__(fastop(FEP insn " %%cl, %[output]") \ + : [output]"+r"(__output), flags_constraint(__flags) \ + : "c"(__shift), bt_constraint(__output) \ + : "cc", "memory"); \ +}) + +#define guest_test_fastop_cl(insn, type_t, __val1, __val2) \ +({ \ + type_t output = __val2, ex_output = __val2, input = __val2; \ + u8 shift = __val1; \ + u64 flags, ex_flags; \ + \ + guest_execute_fastop_cl("", insn, shift, ex_output, ex_flags); \ + guest_execute_fastop_cl(KVM_FEP, insn, shift, output, flags); \ + \ + __GUEST_ASSERT(output == ex_output, \ + "Wanted 0x%lx for '%s 0x%x, 0x%lx', got 0x%lx", \ + (u64)ex_output, insn, shift, (u64)input, \ + (u64)output); \ + __GUEST_ASSERT(flags == ex_flags, \ + "Wanted flags 0x%lx for '%s 0x%x, 0x%lx', got 0x%lx", \ + ex_flags, insn, shift, (u64)input, flags); \ +}) + +#define guest_execute_fastop_div(__KVM_ASM_SAFE, insn, __a, __d, __rm, __flags) \ +({ \ + u64 ign_error_code; \ + u8 vector; \ + \ + __asm__ __volatile__(fastop(__KVM_ASM_SAFE(insn " %[denom]")) \ + : "+a"(__a), "+d"(__d), flags_constraint(__flags), \ + KVM_ASM_SAFE_OUTPUTS(vector, ign_error_code) \ + : [denom]"rm"(__rm), bt_constraint(__rm) \ + : "cc", "memory", KVM_ASM_SAFE_CLOBBERS); \ + vector; \ +}) + +#define guest_test_fastop_div(insn, type_t, __val1, __val2) \ +({ \ + type_t _a = __val1, _d = __val1, rm = __val2; \ + type_t a = _a, d = _d, ex_a = _a, ex_d = _d; \ + u64 flags, ex_flags; \ + u8 v, ex_v; \ + \ + ex_v = guest_execute_fastop_div(KVM_ASM_SAFE, insn, ex_a, ex_d, rm, ex_flags); \ + v = guest_execute_fastop_div(KVM_ASM_SAFE_FEP, insn, a, d, rm, flags); \ + \ + GUEST_ASSERT_EQ(v, ex_v); \ + __GUEST_ASSERT(v == ex_v, \ + "Wanted vector 0x%x for '%s 0x%lx:0x%lx/0x%lx', got 0x%x", \ + ex_v, insn, (u64)_a, (u64)_d, (u64)rm, v); \ + __GUEST_ASSERT(a == ex_a && d == ex_d, \ + "Wanted 0x%lx:0x%lx for '%s 0x%lx:0x%lx/0x%lx', got 0x%lx:0x%lx",\ + (u64)ex_a, (u64)ex_d, insn, (u64)_a, \ + (u64)_d, (u64)rm, (u64)a, (u64)d); \ + __GUEST_ASSERT(v || ex_v || (flags == ex_flags), \ + "Wanted flags 0x%lx for '%s 0x%lx:0x%lx/0x%lx', got 0x%lx", \ + ex_flags, insn, (u64)_a, (u64)_d, (u64)rm, flags);\ +}) + +static const u64 vals[] = { + 0, + 1, + 2, + 4, + 7, + 0x5555555555555555, + 0xaaaaaaaaaaaaaaaa, + 0xfefefefefefefefe, + 0xffffffffffffffff, +}; + +#define guest_test_fastops(type_t, suffix) \ +do { \ + int i, j; \ + \ + for (i = 0; i < ARRAY_SIZE(vals); i++) { \ + guest_test_fastop_1("dec" suffix, type_t, vals[i]); \ + guest_test_fastop_1("inc" suffix, type_t, vals[i]); \ + guest_test_fastop_1("neg" suffix, type_t, vals[i]); \ + guest_test_fastop_1("not" suffix, type_t, vals[i]); \ + \ + for (j = 0; j < ARRAY_SIZE(vals); j++) { \ + guest_test_fastop_2("add" suffix, type_t, vals[i], vals[j]); \ + guest_test_fastop_2("adc" suffix, type_t, vals[i], vals[j]); \ + guest_test_fastop_2("and" suffix, type_t, vals[i], vals[j]); \ +if (sizeof(type_t) != 1) { \ + guest_test_fastop_2("bsf" suffix, type_t, vals[i], vals[j]); \ + guest_test_fastop_2("bsr" suffix, type_t, vals[i], vals[j]); \ + guest_test_fastop_2("bt" suffix, type_t, vals[i], vals[j]); \ + guest_test_fastop_2("btc" suffix, type_t, vals[i], vals[j]); \ + guest_test_fastop_2("btr" suffix, type_t, vals[i], vals[j]); \ + guest_test_fastop_2("bts" suffix, type_t, vals[i], vals[j]); \ + guest_test_fastop_2("imul" suffix, type_t, vals[i], vals[j]); \ +} \ + guest_test_fastop_2("cmp" suffix, type_t, vals[i], vals[j]); \ + guest_test_fastop_2("or" suffix, type_t, vals[i], vals[j]); \ + guest_test_fastop_2("sbb" suffix, type_t, vals[i], vals[j]); \ + guest_test_fastop_2("sub" suffix, type_t, vals[i], vals[j]); \ + guest_test_fastop_2("test" suffix, type_t, vals[i], vals[j]); \ + guest_test_fastop_2("xor" suffix, type_t, vals[i], vals[j]); \ + \ + guest_test_fastop_cl("rol" suffix, type_t, vals[i], vals[j]); \ + guest_test_fastop_cl("ror" suffix, type_t, vals[i], vals[j]); \ + guest_test_fastop_cl("rcl" suffix, type_t, vals[i], vals[j]); \ + guest_test_fastop_cl("rcr" suffix, type_t, vals[i], vals[j]); \ + guest_test_fastop_cl("sar" suffix, type_t, vals[i], vals[j]); \ + guest_test_fastop_cl("shl" suffix, type_t, vals[i], vals[j]); \ + guest_test_fastop_cl("shr" suffix, type_t, vals[i], vals[j]); \ + \ + guest_test_fastop_div("div" suffix, type_t, vals[i], vals[j]); \ + } \ + } \ +} while (0) + +static void guest_code(void) +{ + guest_test_fastops(u8, "b"); + guest_test_fastops(u16, "w"); + guest_test_fastops(u32, "l"); + guest_test_fastops(u64, "q"); + + GUEST_DONE(); +} + +int main(int argc, char *argv[]) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + TEST_REQUIRE(is_forced_emulation_enabled); + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + + vcpu_run(vcpu); + TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE); + + kvm_vm_free(vm); +} diff --git a/tools/testing/selftests/kvm/x86/feature_msrs_test.c b/tools/testing/selftests/kvm/x86/feature_msrs_test.c index a72f13ae2edb..158550701771 100644 --- a/tools/testing/selftests/kvm/x86/feature_msrs_test.c +++ b/tools/testing/selftests/kvm/x86/feature_msrs_test.c @@ -12,7 +12,7 @@ #include "kvm_util.h" #include "processor.h" -static bool is_kvm_controlled_msr(uint32_t msr) +static bool is_kvm_controlled_msr(u32 msr) { return msr == MSR_IA32_VMX_CR0_FIXED1 || msr == MSR_IA32_VMX_CR4_FIXED1; } @@ -21,7 +21,7 @@ static bool is_kvm_controlled_msr(uint32_t msr) * For VMX MSRs with a "true" variant, KVM requires userspace to set the "true" * MSR, and doesn't allow setting the hidden version. */ -static bool is_hidden_vmx_msr(uint32_t msr) +static bool is_hidden_vmx_msr(u32 msr) { switch (msr) { case MSR_IA32_VMX_PINBASED_CTLS: @@ -34,15 +34,15 @@ static bool is_hidden_vmx_msr(uint32_t msr) } } -static bool is_quirked_msr(uint32_t msr) +static bool is_quirked_msr(u32 msr) { return msr != MSR_AMD64_DE_CFG; } -static void test_feature_msr(uint32_t msr) +static void test_feature_msr(u32 msr) { - const uint64_t supported_mask = kvm_get_feature_msr(msr); - uint64_t reset_value = is_quirked_msr(msr) ? supported_mask : 0; + const u64 supported_mask = kvm_get_feature_msr(msr); + u64 reset_value = is_quirked_msr(msr) ? supported_mask : 0; struct kvm_vcpu *vcpu; struct kvm_vm *vm; diff --git a/tools/testing/selftests/kvm/x86/fix_hypercall_test.c b/tools/testing/selftests/kvm/x86/fix_hypercall_test.c index 762628f7d4ba..753a0e730ea8 100644 --- a/tools/testing/selftests/kvm/x86/fix_hypercall_test.c +++ b/tools/testing/selftests/kvm/x86/fix_hypercall_test.c @@ -26,18 +26,18 @@ static void guest_ud_handler(struct ex_regs *regs) regs->rip += HYPERCALL_INSN_SIZE; } -static const uint8_t vmx_vmcall[HYPERCALL_INSN_SIZE] = { 0x0f, 0x01, 0xc1 }; -static const uint8_t svm_vmmcall[HYPERCALL_INSN_SIZE] = { 0x0f, 0x01, 0xd9 }; +static const u8 vmx_vmcall[HYPERCALL_INSN_SIZE] = { 0x0f, 0x01, 0xc1 }; +static const u8 svm_vmmcall[HYPERCALL_INSN_SIZE] = { 0x0f, 0x01, 0xd9 }; -extern uint8_t hypercall_insn[HYPERCALL_INSN_SIZE]; -static uint64_t do_sched_yield(uint8_t apic_id) +extern u8 hypercall_insn[HYPERCALL_INSN_SIZE]; +static u64 do_sched_yield(u8 apic_id) { - uint64_t ret; + u64 ret; asm volatile("hypercall_insn:\n\t" ".byte 0xcc,0xcc,0xcc\n\t" : "=a"(ret) - : "a"((uint64_t)KVM_HC_SCHED_YIELD), "b"((uint64_t)apic_id) + : "a"((u64)KVM_HC_SCHED_YIELD), "b"((u64)apic_id) : "memory"); return ret; @@ -45,14 +45,14 @@ static uint64_t do_sched_yield(uint8_t apic_id) static void guest_main(void) { - const uint8_t *native_hypercall_insn; - const uint8_t *other_hypercall_insn; - uint64_t ret; + const u8 *native_hypercall_insn; + const u8 *other_hypercall_insn; + u64 ret; if (host_cpu_is_intel) { native_hypercall_insn = vmx_vmcall; other_hypercall_insn = svm_vmmcall; - } else if (host_cpu_is_amd) { + } else if (host_cpu_is_amd_compatible) { native_hypercall_insn = svm_vmmcall; other_hypercall_insn = vmx_vmcall; } else { @@ -72,7 +72,7 @@ static void guest_main(void) * the "right" hypercall. */ if (quirk_disabled) { - GUEST_ASSERT(ret == (uint64_t)-EFAULT); + GUEST_ASSERT(ret == (u64)-EFAULT); GUEST_ASSERT(!memcmp(other_hypercall_insn, hypercall_insn, HYPERCALL_INSN_SIZE)); } else { diff --git a/tools/testing/selftests/kvm/x86/flds_emulation.h b/tools/testing/selftests/kvm/x86/flds_emulation.h index 37b1a9f52864..fd6b6c67199a 100644 --- a/tools/testing/selftests/kvm/x86/flds_emulation.h +++ b/tools/testing/selftests/kvm/x86/flds_emulation.h @@ -12,7 +12,7 @@ * KVM to emulate the instruction (e.g. by providing an MMIO address) to * exercise emulation failures. */ -static inline void flds(uint64_t address) +static inline void flds(u64 address) { __asm__ __volatile__(FLDS_MEM_EAX :: "a"(address)); } @@ -21,8 +21,8 @@ static inline void handle_flds_emulation_failure_exit(struct kvm_vcpu *vcpu) { struct kvm_run *run = vcpu->run; struct kvm_regs regs; - uint8_t *insn_bytes; - uint64_t flags; + u8 *insn_bytes; + u64 flags; TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_INTERNAL_ERROR); diff --git a/tools/testing/selftests/kvm/x86/hwcr_msr_test.c b/tools/testing/selftests/kvm/x86/hwcr_msr_test.c index 10b1b0ba374e..8e20a03b3329 100644 --- a/tools/testing/selftests/kvm/x86/hwcr_msr_test.c +++ b/tools/testing/selftests/kvm/x86/hwcr_msr_test.c @@ -10,11 +10,11 @@ void test_hwcr_bit(struct kvm_vcpu *vcpu, unsigned int bit) { - const uint64_t ignored = BIT_ULL(3) | BIT_ULL(6) | BIT_ULL(8); - const uint64_t valid = BIT_ULL(18) | BIT_ULL(24); - const uint64_t legal = ignored | valid; - uint64_t val = BIT_ULL(bit); - uint64_t actual; + const u64 ignored = BIT_ULL(3) | BIT_ULL(6) | BIT_ULL(8); + const u64 valid = BIT_ULL(18) | BIT_ULL(24); + const u64 legal = ignored | valid; + u64 val = BIT_ULL(bit); + u64 actual; int r; r = _vcpu_set_msr(vcpu, MSR_K7_HWCR, val); diff --git a/tools/testing/selftests/kvm/x86/hyperv_clock.c b/tools/testing/selftests/kvm/x86/hyperv_clock.c index e058bc676cd6..c083cea546dc 100644 --- a/tools/testing/selftests/kvm/x86/hyperv_clock.c +++ b/tools/testing/selftests/kvm/x86/hyperv_clock.c @@ -98,7 +98,7 @@ static inline void check_tsc_msr_tsc_page(struct ms_hyperv_tsc_page *tsc_page) GUEST_ASSERT(r2 >= t1 && r2 - t2 < 100000); } -static void guest_main(struct ms_hyperv_tsc_page *tsc_page, vm_paddr_t tsc_page_gpa) +static void guest_main(struct ms_hyperv_tsc_page *tsc_page, gpa_t tsc_page_gpa) { u64 tsc_scale, tsc_offset; @@ -208,7 +208,7 @@ int main(void) struct kvm_vcpu *vcpu; struct kvm_vm *vm; struct ucall uc; - vm_vaddr_t tsc_page_gva; + gva_t tsc_page_gva; int stage; TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_TIME)); @@ -218,7 +218,7 @@ int main(void) vcpu_set_hv_cpuid(vcpu); - tsc_page_gva = vm_vaddr_alloc_page(vm); + tsc_page_gva = vm_alloc_page(vm); memset(addr_gva2hva(vm, tsc_page_gva), 0x0, getpagesize()); TEST_ASSERT((addr_gva2gpa(vm, tsc_page_gva) & (getpagesize() - 1)) == 0, "TSC page has to be page aligned"); diff --git a/tools/testing/selftests/kvm/x86/hyperv_cpuid.c b/tools/testing/selftests/kvm/x86/hyperv_cpuid.c index 4e920705681a..3c21af811d8f 100644 --- a/tools/testing/selftests/kvm/x86/hyperv_cpuid.c +++ b/tools/testing/selftests/kvm/x86/hyperv_cpuid.c @@ -22,25 +22,6 @@ static void guest_code(void) { } -static bool smt_possible(void) -{ - char buf[16]; - FILE *f; - bool res = true; - - f = fopen("/sys/devices/system/cpu/smt/control", "r"); - if (f) { - if (fread(buf, sizeof(*buf), sizeof(buf), f) > 0) { - if (!strncmp(buf, "forceoff", 8) || - !strncmp(buf, "notsupported", 12)) - res = false; - } - fclose(f); - } - - return res; -} - static void test_hv_cpuid(struct kvm_vcpu *vcpu, bool evmcs_expected) { const bool has_irqchip = !vcpu || vcpu->vm->has_irqchip; @@ -64,7 +45,7 @@ static void test_hv_cpuid(struct kvm_vcpu *vcpu, bool evmcs_expected) TEST_ASSERT((entry->function >= 0x40000000) && (entry->function <= 0x40000082), - "function %x is our of supported range", + "function %x is out of supported range", entry->function); TEST_ASSERT(entry->index == 0, @@ -93,7 +74,7 @@ static void test_hv_cpuid(struct kvm_vcpu *vcpu, bool evmcs_expected) case 0x40000004: test_val = entry->eax & (1UL << 18); - TEST_ASSERT(!!test_val == !smt_possible(), + TEST_ASSERT(!!test_val == !is_smt_possible(), "NoNonArchitecturalCoreSharing bit" " doesn't reflect SMT setting"); diff --git a/tools/testing/selftests/kvm/x86/hyperv_evmcs.c b/tools/testing/selftests/kvm/x86/hyperv_evmcs.c index 74cf19661309..c7fa114aee20 100644 --- a/tools/testing/selftests/kvm/x86/hyperv_evmcs.c +++ b/tools/testing/selftests/kvm/x86/hyperv_evmcs.c @@ -30,7 +30,7 @@ static void guest_nmi_handler(struct ex_regs *regs) { } -static inline void rdmsr_from_l2(uint32_t msr) +static inline void rdmsr_from_l2(u32 msr) { /* Currently, L1 doesn't preserve GPRs during vmexits. */ __asm__ __volatile__ ("rdmsr" : : "c"(msr) : @@ -76,7 +76,7 @@ void l2_guest_code(void) } void guest_code(struct vmx_pages *vmx_pages, struct hyperv_test_pages *hv_pages, - vm_vaddr_t hv_hcall_page_gpa) + gpa_t hv_hcall_page_gpa) { #define L2_GUEST_STACK_SIZE 64 unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; @@ -231,8 +231,8 @@ static struct kvm_vcpu *save_restore_vm(struct kvm_vm *vm, int main(int argc, char *argv[]) { - vm_vaddr_t vmx_pages_gva = 0, hv_pages_gva = 0; - vm_vaddr_t hcall_page; + gva_t vmx_pages_gva = 0, hv_pages_gva = 0; + gva_t hcall_page; struct kvm_vcpu *vcpu; struct kvm_vm *vm; @@ -246,7 +246,7 @@ int main(int argc, char *argv[]) vm = vm_create_with_one_vcpu(&vcpu, guest_code); - hcall_page = vm_vaddr_alloc_pages(vm, 1); + hcall_page = vm_alloc_pages(vm, 1); memset(addr_gva2hva(vm, hcall_page), 0x0, getpagesize()); vcpu_set_hv_cpuid(vcpu); diff --git a/tools/testing/selftests/kvm/x86/hyperv_extended_hypercalls.c b/tools/testing/selftests/kvm/x86/hyperv_extended_hypercalls.c index 949e08e98f31..ae047db7b1be 100644 --- a/tools/testing/selftests/kvm/x86/hyperv_extended_hypercalls.c +++ b/tools/testing/selftests/kvm/x86/hyperv_extended_hypercalls.c @@ -15,19 +15,19 @@ /* Any value is fine */ #define EXT_CAPABILITIES 0xbull -static void guest_code(vm_paddr_t in_pg_gpa, vm_paddr_t out_pg_gpa, - vm_vaddr_t out_pg_gva) +static void guest_code(gpa_t in_pg_gpa, gpa_t out_pg_gpa, + gva_t out_pg_gva) { - uint64_t *output_gva; + u64 *output_gva; wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID); wrmsr(HV_X64_MSR_HYPERCALL, in_pg_gpa); - output_gva = (uint64_t *)out_pg_gva; + output_gva = (u64 *)out_pg_gva; hyperv_hypercall(HV_EXT_CALL_QUERY_CAPABILITIES, in_pg_gpa, out_pg_gpa); - /* TLFS states output will be a uint64_t value */ + /* TLFS states output will be a u64 value */ GUEST_ASSERT_EQ(*output_gva, EXT_CAPABILITIES); GUEST_DONE(); @@ -35,12 +35,12 @@ static void guest_code(vm_paddr_t in_pg_gpa, vm_paddr_t out_pg_gpa, int main(void) { - vm_vaddr_t hcall_out_page; - vm_vaddr_t hcall_in_page; + gva_t hcall_out_page; + gva_t hcall_in_page; struct kvm_vcpu *vcpu; struct kvm_run *run; struct kvm_vm *vm; - uint64_t *outval; + u64 *outval; struct ucall uc; TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_CPUID)); @@ -57,11 +57,11 @@ int main(void) vcpu_set_hv_cpuid(vcpu); /* Hypercall input */ - hcall_in_page = vm_vaddr_alloc_pages(vm, 1); + hcall_in_page = vm_alloc_pages(vm, 1); memset(addr_gva2hva(vm, hcall_in_page), 0x0, vm->page_size); /* Hypercall output */ - hcall_out_page = vm_vaddr_alloc_pages(vm, 1); + hcall_out_page = vm_alloc_pages(vm, 1); memset(addr_gva2hva(vm, hcall_out_page), 0x0, vm->page_size); vcpu_args_set(vcpu, 3, addr_gva2gpa(vm, hcall_in_page), diff --git a/tools/testing/selftests/kvm/x86/hyperv_features.c b/tools/testing/selftests/kvm/x86/hyperv_features.c index 068e9c69710d..7347f1fe5157 100644 --- a/tools/testing/selftests/kvm/x86/hyperv_features.c +++ b/tools/testing/selftests/kvm/x86/hyperv_features.c @@ -22,27 +22,27 @@ KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EBX, 0) struct msr_data { - uint32_t idx; + u32 idx; bool fault_expected; bool write; u64 write_val; }; struct hcall_data { - uint64_t control; - uint64_t expect; + u64 control; + u64 expect; bool ud_expected; }; -static bool is_write_only_msr(uint32_t msr) +static bool is_write_only_msr(u32 msr) { return msr == HV_X64_MSR_EOI; } static void guest_msr(struct msr_data *msr) { - uint8_t vector = 0; - uint64_t msr_val = 0; + u8 vector = 0; + u64 msr_val = 0; GUEST_ASSERT(msr->idx); @@ -54,12 +54,12 @@ static void guest_msr(struct msr_data *msr) if (msr->fault_expected) __GUEST_ASSERT(vector == GP_VECTOR, - "Expected #GP on %sMSR(0x%x), got vector '0x%x'", - msr->write ? "WR" : "RD", msr->idx, vector); + "Expected #GP on %sMSR(0x%x), got %s", + msr->write ? "WR" : "RD", msr->idx, ex_str(vector)); else __GUEST_ASSERT(!vector, - "Expected success on %sMSR(0x%x), got vector '0x%x'", - msr->write ? "WR" : "RD", msr->idx, vector); + "Expected success on %sMSR(0x%x), got %s", + msr->write ? "WR" : "RD", msr->idx, ex_str(vector)); if (vector || is_write_only_msr(msr->idx)) goto done; @@ -82,10 +82,10 @@ done: GUEST_DONE(); } -static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall) +static void guest_hcall(gpa_t pgs_gpa, struct hcall_data *hcall) { u64 res, input, output; - uint8_t vector; + u8 vector; GUEST_ASSERT_NE(hcall->control, 0); @@ -94,7 +94,7 @@ static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall) if (!(hcall->control & HV_HYPERCALL_FAST_BIT)) { input = pgs_gpa; - output = pgs_gpa + 4096; + output = pgs_gpa + PAGE_SIZE; } else { input = output = 0; } @@ -102,12 +102,12 @@ static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall) vector = __hyperv_hypercall(hcall->control, input, output, &res); if (hcall->ud_expected) { __GUEST_ASSERT(vector == UD_VECTOR, - "Expected #UD for control '%lu', got vector '0x%x'", - hcall->control, vector); + "Expected #UD for control '%lu', got %s", + hcall->control, ex_str(vector)); } else { __GUEST_ASSERT(!vector, - "Expected no exception for control '%lu', got vector '0x%x'", - hcall->control, vector); + "Expected no exception for control '%lu', got %s", + hcall->control, ex_str(vector)); GUEST_ASSERT_EQ(res, hcall->expect); } @@ -134,14 +134,14 @@ static void guest_test_msrs_access(void) struct kvm_vm *vm; struct ucall uc; int stage = 0; - vm_vaddr_t msr_gva; + gva_t msr_gva; struct msr_data *msr; bool has_invtsc = kvm_cpu_has(X86_FEATURE_INVTSC); while (true) { vm = vm_create_with_one_vcpu(&vcpu, guest_msr); - msr_gva = vm_vaddr_alloc_page(vm); + msr_gva = vm_alloc_page(vm); memset(addr_gva2hva(vm, msr_gva), 0x0, getpagesize()); msr = addr_gva2hva(vm, msr_gva); @@ -523,17 +523,17 @@ static void guest_test_hcalls_access(void) struct kvm_vm *vm; struct ucall uc; int stage = 0; - vm_vaddr_t hcall_page, hcall_params; + gva_t hcall_page, hcall_params; struct hcall_data *hcall; while (true) { vm = vm_create_with_one_vcpu(&vcpu, guest_hcall); /* Hypercall input/output */ - hcall_page = vm_vaddr_alloc_pages(vm, 2); + hcall_page = vm_alloc_pages(vm, 2); memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize()); - hcall_params = vm_vaddr_alloc_page(vm); + hcall_params = vm_alloc_page(vm); memset(addr_gva2hva(vm, hcall_params), 0x0, getpagesize()); hcall = addr_gva2hva(vm, hcall_params); diff --git a/tools/testing/selftests/kvm/x86/hyperv_ipi.c b/tools/testing/selftests/kvm/x86/hyperv_ipi.c index 22c0c124582f..771535f9aad3 100644 --- a/tools/testing/selftests/kvm/x86/hyperv_ipi.c +++ b/tools/testing/selftests/kvm/x86/hyperv_ipi.c @@ -18,7 +18,7 @@ #define IPI_VECTOR 0xfe -static volatile uint64_t ipis_rcvd[RECEIVER_VCPU_ID_2 + 1]; +static volatile u64 ipis_rcvd[RECEIVER_VCPU_ID_2 + 1]; struct hv_vpset { u64 format; @@ -45,13 +45,13 @@ struct hv_send_ipi_ex { struct hv_vpset vp_set; }; -static inline void hv_init(vm_vaddr_t pgs_gpa) +static inline void hv_init(gpa_t pgs_gpa) { wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID); wrmsr(HV_X64_MSR_HYPERCALL, pgs_gpa); } -static void receiver_code(void *hcall_page, vm_vaddr_t pgs_gpa) +static void receiver_code(void *hcall_page, gpa_t pgs_gpa) { u32 vcpu_id; @@ -63,8 +63,10 @@ static void receiver_code(void *hcall_page, vm_vaddr_t pgs_gpa) /* Signal sender vCPU we're ready */ ipis_rcvd[vcpu_id] = (u64)-1; - for (;;) - asm volatile("sti; hlt; cli"); + for (;;) { + safe_halt(); + cli(); + } } static void guest_ipi_handler(struct ex_regs *regs) @@ -83,7 +85,7 @@ static inline void nop_loop(void) asm volatile("nop"); } -static void sender_guest_code(void *hcall_page, vm_vaddr_t pgs_gpa) +static void sender_guest_code(void *hcall_page, gpa_t pgs_gpa) { struct hv_send_ipi *ipi = (struct hv_send_ipi *)hcall_page; struct hv_send_ipi_ex *ipi_ex = (struct hv_send_ipi_ex *)hcall_page; @@ -100,7 +102,7 @@ static void sender_guest_code(void *hcall_page, vm_vaddr_t pgs_gpa) /* 'Slow' HvCallSendSyntheticClusterIpi to RECEIVER_VCPU_ID_1 */ ipi->vector = IPI_VECTOR; ipi->cpu_mask = 1 << RECEIVER_VCPU_ID_1; - hyperv_hypercall(HVCALL_SEND_IPI, pgs_gpa, pgs_gpa + 4096); + hyperv_hypercall(HVCALL_SEND_IPI, pgs_gpa, pgs_gpa + PAGE_SIZE); nop_loop(); GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]); GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]); @@ -114,13 +116,13 @@ static void sender_guest_code(void *hcall_page, vm_vaddr_t pgs_gpa) GUEST_SYNC(stage++); /* 'Slow' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_1 */ - memset(hcall_page, 0, 4096); + memset(hcall_page, 0, PAGE_SIZE); ipi_ex->vector = IPI_VECTOR; ipi_ex->vp_set.format = HV_GENERIC_SET_SPARSE_4K; ipi_ex->vp_set.valid_bank_mask = 1 << 0; ipi_ex->vp_set.bank_contents[0] = BIT(RECEIVER_VCPU_ID_1); hyperv_hypercall(HVCALL_SEND_IPI_EX | (1 << HV_HYPERCALL_VARHEAD_OFFSET), - pgs_gpa, pgs_gpa + 4096); + pgs_gpa, pgs_gpa + PAGE_SIZE); nop_loop(); GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]); GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]); @@ -136,13 +138,13 @@ static void sender_guest_code(void *hcall_page, vm_vaddr_t pgs_gpa) GUEST_SYNC(stage++); /* 'Slow' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_2 */ - memset(hcall_page, 0, 4096); + memset(hcall_page, 0, PAGE_SIZE); ipi_ex->vector = IPI_VECTOR; ipi_ex->vp_set.format = HV_GENERIC_SET_SPARSE_4K; ipi_ex->vp_set.valid_bank_mask = 1 << 1; ipi_ex->vp_set.bank_contents[0] = BIT(RECEIVER_VCPU_ID_2 - 64); hyperv_hypercall(HVCALL_SEND_IPI_EX | (1 << HV_HYPERCALL_VARHEAD_OFFSET), - pgs_gpa, pgs_gpa + 4096); + pgs_gpa, pgs_gpa + PAGE_SIZE); nop_loop(); GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ipis_expected[0]); GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]); @@ -158,14 +160,14 @@ static void sender_guest_code(void *hcall_page, vm_vaddr_t pgs_gpa) GUEST_SYNC(stage++); /* 'Slow' HvCallSendSyntheticClusterIpiEx to both RECEIVER_VCPU_ID_{1,2} */ - memset(hcall_page, 0, 4096); + memset(hcall_page, 0, PAGE_SIZE); ipi_ex->vector = IPI_VECTOR; ipi_ex->vp_set.format = HV_GENERIC_SET_SPARSE_4K; ipi_ex->vp_set.valid_bank_mask = 1 << 1 | 1; ipi_ex->vp_set.bank_contents[0] = BIT(RECEIVER_VCPU_ID_1); ipi_ex->vp_set.bank_contents[1] = BIT(RECEIVER_VCPU_ID_2 - 64); hyperv_hypercall(HVCALL_SEND_IPI_EX | (2 << HV_HYPERCALL_VARHEAD_OFFSET), - pgs_gpa, pgs_gpa + 4096); + pgs_gpa, pgs_gpa + PAGE_SIZE); nop_loop(); GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]); GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]); @@ -181,10 +183,10 @@ static void sender_guest_code(void *hcall_page, vm_vaddr_t pgs_gpa) GUEST_SYNC(stage++); /* 'Slow' HvCallSendSyntheticClusterIpiEx to HV_GENERIC_SET_ALL */ - memset(hcall_page, 0, 4096); + memset(hcall_page, 0, PAGE_SIZE); ipi_ex->vector = IPI_VECTOR; ipi_ex->vp_set.format = HV_GENERIC_SET_ALL; - hyperv_hypercall(HVCALL_SEND_IPI_EX, pgs_gpa, pgs_gpa + 4096); + hyperv_hypercall(HVCALL_SEND_IPI_EX, pgs_gpa, pgs_gpa + PAGE_SIZE); nop_loop(); GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]); GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]); @@ -241,7 +243,7 @@ int main(int argc, char *argv[]) { struct kvm_vm *vm; struct kvm_vcpu *vcpu[3]; - vm_vaddr_t hcall_page; + gva_t hcall_page; pthread_t threads[2]; int stage = 1, r; struct ucall uc; @@ -251,7 +253,7 @@ int main(int argc, char *argv[]) vm = vm_create_with_one_vcpu(&vcpu[0], sender_guest_code); /* Hypercall input/output */ - hcall_page = vm_vaddr_alloc_pages(vm, 2); + hcall_page = vm_alloc_pages(vm, 2); memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize()); diff --git a/tools/testing/selftests/kvm/x86/hyperv_svm_test.c b/tools/testing/selftests/kvm/x86/hyperv_svm_test.c index 0ddb63229bcb..7a62f6a9d606 100644 --- a/tools/testing/selftests/kvm/x86/hyperv_svm_test.c +++ b/tools/testing/selftests/kvm/x86/hyperv_svm_test.c @@ -21,7 +21,7 @@ #define L2_GUEST_STACK_SIZE 256 /* Exit to L1 from L2 with RDMSR instruction */ -static inline void rdmsr_from_l2(uint32_t msr) +static inline void rdmsr_from_l2(u32 msr) { /* Currently, L1 doesn't preserve GPRs during vmexits. */ __asm__ __volatile__ ("rdmsr" : : "c"(msr) : @@ -67,7 +67,7 @@ void l2_guest_code(void) static void __attribute__((__flatten__)) guest_code(struct svm_test_data *svm, struct hyperv_test_pages *hv_pages, - vm_vaddr_t pgs_gpa) + gpa_t pgs_gpa) { unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; struct vmcb *vmcb = svm->vmcb; @@ -149,8 +149,8 @@ static void __attribute__((__flatten__)) guest_code(struct svm_test_data *svm, int main(int argc, char *argv[]) { - vm_vaddr_t nested_gva = 0, hv_pages_gva = 0; - vm_vaddr_t hcall_page; + gva_t nested_gva = 0, hv_pages_gva = 0; + gva_t hcall_page; struct kvm_vcpu *vcpu; struct kvm_vm *vm; struct ucall uc; @@ -165,7 +165,7 @@ int main(int argc, char *argv[]) vcpu_alloc_svm(vm, &nested_gva); vcpu_alloc_hyperv_test_pages(vm, &hv_pages_gva); - hcall_page = vm_vaddr_alloc_pages(vm, 1); + hcall_page = vm_alloc_pages(vm, 1); memset(addr_gva2hva(vm, hcall_page), 0x0, getpagesize()); vcpu_args_set(vcpu, 3, nested_gva, hv_pages_gva, addr_gva2gpa(vm, hcall_page)); diff --git a/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c b/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c index 077cd0ec3040..15ee8b7bfc11 100644 --- a/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c +++ b/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c @@ -61,14 +61,14 @@ struct hv_tlb_flush_ex { * - GVAs of the test pages' PTEs */ struct test_data { - vm_vaddr_t hcall_gva; - vm_paddr_t hcall_gpa; - vm_vaddr_t test_pages; - vm_vaddr_t test_pages_pte[NTEST_PAGES]; + gva_t hcall_gva; + gpa_t hcall_gpa; + gva_t test_pages; + gva_t test_pages_pte[NTEST_PAGES]; }; /* 'Worker' vCPU code checking the contents of the test page */ -static void worker_guest_code(vm_vaddr_t test_data) +static void worker_guest_code(gva_t test_data) { struct test_data *data = (struct test_data *)test_data; u32 vcpu_id = rdmsr(HV_X64_MSR_VP_INDEX); @@ -133,12 +133,12 @@ static void set_expected_val(void *addr, u64 val, int vcpu_id) * Update PTEs swapping two test pages. * TODO: use swap()/xchg() when these are provided. */ -static void swap_two_test_pages(vm_paddr_t pte_gva1, vm_paddr_t pte_gva2) +static void swap_two_test_pages(gpa_t pte_gva1, gpa_t pte_gva2) { - uint64_t tmp = *(uint64_t *)pte_gva1; + u64 tmp = *(u64 *)pte_gva1; - *(uint64_t *)pte_gva1 = *(uint64_t *)pte_gva2; - *(uint64_t *)pte_gva2 = tmp; + *(u64 *)pte_gva1 = *(u64 *)pte_gva2; + *(u64 *)pte_gva2 = tmp; } /* @@ -196,12 +196,12 @@ static inline void post_test(struct test_data *data, u64 exp1, u64 exp2) #define TESTVAL2 0x0202020202020202 /* Main vCPU doing the test */ -static void sender_guest_code(vm_vaddr_t test_data) +static void sender_guest_code(gva_t test_data) { struct test_data *data = (struct test_data *)test_data; struct hv_tlb_flush *flush = (struct hv_tlb_flush *)data->hcall_gva; struct hv_tlb_flush_ex *flush_ex = (struct hv_tlb_flush_ex *)data->hcall_gva; - vm_paddr_t hcall_gpa = data->hcall_gpa; + gpa_t hcall_gpa = data->hcall_gpa; int i, stage = 1; wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID); @@ -581,9 +581,9 @@ int main(int argc, char *argv[]) struct kvm_vm *vm; struct kvm_vcpu *vcpu[3]; pthread_t threads[2]; - vm_vaddr_t test_data_page, gva; - vm_paddr_t gpa; - uint64_t *pte; + gva_t test_data_page, gva; + gpa_t gpa; + u64 *pte; struct test_data *data; struct ucall uc; int stage = 1, r, i; @@ -593,11 +593,11 @@ int main(int argc, char *argv[]) vm = vm_create_with_one_vcpu(&vcpu[0], sender_guest_code); /* Test data page */ - test_data_page = vm_vaddr_alloc_page(vm); + test_data_page = vm_alloc_page(vm); data = (struct test_data *)addr_gva2hva(vm, test_data_page); /* Hypercall input/output */ - data->hcall_gva = vm_vaddr_alloc_pages(vm, 2); + data->hcall_gva = vm_alloc_pages(vm, 2); data->hcall_gpa = addr_gva2gpa(vm, data->hcall_gva); memset(addr_gva2hva(vm, data->hcall_gva), 0x0, 2 * PAGE_SIZE); @@ -606,7 +606,7 @@ int main(int argc, char *argv[]) * and the test will swap their mappings. The third page keeps the indication * about the current state of mappings. */ - data->test_pages = vm_vaddr_alloc_pages(vm, NTEST_PAGES + 1); + data->test_pages = vm_alloc_pages(vm, NTEST_PAGES + 1); for (i = 0; i < NTEST_PAGES; i++) memset(addr_gva2hva(vm, data->test_pages + PAGE_SIZE * i), (u8)(i + 1), PAGE_SIZE); @@ -617,11 +617,11 @@ int main(int argc, char *argv[]) * Get PTE pointers for test pages and map them inside the guest. * Use separate page for each PTE for simplicity. */ - gva = vm_vaddr_unused_gap(vm, NTEST_PAGES * PAGE_SIZE, KVM_UTIL_MIN_VADDR); + gva = vm_unused_gva_gap(vm, NTEST_PAGES * PAGE_SIZE, KVM_UTIL_MIN_VADDR); for (i = 0; i < NTEST_PAGES; i++) { - pte = vm_get_page_table_entry(vm, data->test_pages + i * PAGE_SIZE); + pte = vm_get_pte(vm, data->test_pages + i * PAGE_SIZE); gpa = addr_hva2gpa(vm, pte); - __virt_pg_map(vm, gva + PAGE_SIZE * i, gpa & PAGE_MASK, PG_LEVEL_4K); + virt_pg_map(vm, gva + PAGE_SIZE * i, gpa & PAGE_MASK); data->test_pages_pte[i] = gva + (gpa & ~PAGE_MASK); } diff --git a/tools/testing/selftests/kvm/x86/kvm_buslock_test.c b/tools/testing/selftests/kvm/x86/kvm_buslock_test.c new file mode 100644 index 000000000000..52014a3210c8 --- /dev/null +++ b/tools/testing/selftests/kvm/x86/kvm_buslock_test.c @@ -0,0 +1,135 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2024 Advanced Micro Devices, Inc. + */ +#include <linux/atomic.h> + +#include "kvm_util.h" +#include "processor.h" +#include "svm_util.h" +#include "vmx.h" +#include "test_util.h" + +#define NR_BUS_LOCKS_PER_LEVEL 100 +#define CACHE_LINE_SIZE 64 + +/* + * To generate a bus lock, carve out a buffer that precisely occupies two cache + * lines and perform an atomic access that splits the two lines. + */ +static u8 buffer[CACHE_LINE_SIZE * 2] __aligned(CACHE_LINE_SIZE); +static atomic_t *val = (void *)&buffer[CACHE_LINE_SIZE - (sizeof(*val) / 2)]; + +static void guest_generate_buslocks(void) +{ + for (int i = 0; i < NR_BUS_LOCKS_PER_LEVEL; i++) + atomic_inc(val); +} + +#define L2_GUEST_STACK_SIZE 64 + +static void l2_guest_code(void) +{ + guest_generate_buslocks(); + GUEST_DONE(); +} + +static void l1_svm_code(struct svm_test_data *svm) +{ + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + struct vmcb *vmcb = svm->vmcb; + + generic_svm_setup(svm, l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]); + run_guest(vmcb, svm->vmcb_gpa); +} + +static void l1_vmx_code(struct vmx_pages *vmx) +{ + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + + GUEST_ASSERT_EQ(prepare_for_vmx_operation(vmx), true); + GUEST_ASSERT_EQ(load_vmcs(vmx), true); + + prepare_vmcs(vmx, NULL, &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + GUEST_ASSERT(!vmwrite(GUEST_RIP, (u64)l2_guest_code)); + GUEST_ASSERT(!vmlaunch()); +} + +static void guest_code(void *test_data) +{ + guest_generate_buslocks(); + + if (this_cpu_has(X86_FEATURE_SVM)) + l1_svm_code(test_data); + else if (this_cpu_has(X86_FEATURE_VMX)) + l1_vmx_code(test_data); + else + GUEST_DONE(); + + TEST_FAIL("L2 should have signaled 'done'"); +} + +int main(int argc, char *argv[]) +{ + const bool has_nested = kvm_cpu_has(X86_FEATURE_SVM) || kvm_cpu_has(X86_FEATURE_VMX); + gva_t nested_test_data_gva; + struct kvm_vcpu *vcpu; + struct kvm_run *run; + struct kvm_vm *vm; + int i, bus_locks = 0; + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_BUS_LOCK_EXIT)); + + vm = vm_create(1); + vm_enable_cap(vm, KVM_CAP_X86_BUS_LOCK_EXIT, KVM_BUS_LOCK_DETECTION_EXIT); + vcpu = vm_vcpu_add(vm, 0, guest_code); + + if (kvm_cpu_has(X86_FEATURE_SVM)) + vcpu_alloc_svm(vm, &nested_test_data_gva); + else + vcpu_alloc_vmx(vm, &nested_test_data_gva); + + vcpu_args_set(vcpu, 1, nested_test_data_gva); + + run = vcpu->run; + + for (i = 0; i <= NR_BUS_LOCKS_PER_LEVEL * (1 + has_nested); i++) { + struct ucall uc; + + vcpu_run(vcpu); + + if (run->exit_reason == KVM_EXIT_IO) { + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + goto done; + case UCALL_SYNC: + continue; + case UCALL_DONE: + goto done; + default: + TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd); + } + } + + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_X86_BUS_LOCK); + + /* + * Verify the counter is actually getting incremented, e.g. that + * KVM isn't skipping the instruction. On Intel, the exit is + * trap-like, i.e. the counter should already have been + * incremented. On AMD, it's fault-like, i.e. the counter will + * be incremented when the guest re-executes the instruction. + */ + sync_global_from_guest(vm, *val); + TEST_ASSERT_EQ(atomic_read(val), bus_locks + host_cpu_is_intel); + + bus_locks++; + } + TEST_FAIL("Didn't receive UCALL_DONE, took %u bus lock exits\n", bus_locks); +done: + TEST_ASSERT_EQ(i, bus_locks); + kvm_vm_free(vm); + return 0; +} diff --git a/tools/testing/selftests/kvm/x86/kvm_clock_test.c b/tools/testing/selftests/kvm/x86/kvm_clock_test.c index 5bc12222d87a..5ad4aeb8e373 100644 --- a/tools/testing/selftests/kvm/x86/kvm_clock_test.c +++ b/tools/testing/selftests/kvm/x86/kvm_clock_test.c @@ -17,8 +17,8 @@ #include "processor.h" struct test_case { - uint64_t kvmclock_base; - int64_t realtime_offset; + u64 kvmclock_base; + s64 realtime_offset; }; static struct test_case test_cases[] = { @@ -31,7 +31,7 @@ static struct test_case test_cases[] = { #define GUEST_SYNC_CLOCK(__stage, __val) \ GUEST_SYNC_ARGS(__stage, __val, 0, 0, 0) -static void guest_main(vm_paddr_t pvti_pa, struct pvclock_vcpu_time_info *pvti) +static void guest_main(gpa_t pvti_pa, struct pvclock_vcpu_time_info *pvti) { int i; @@ -52,7 +52,7 @@ static inline void assert_flags(struct kvm_clock_data *data) static void handle_sync(struct ucall *uc, struct kvm_clock_data *start, struct kvm_clock_data *end) { - uint64_t obs, exp_lo, exp_hi; + u64 obs, exp_lo, exp_hi; obs = uc->args[2]; exp_lo = start->clock; @@ -135,8 +135,8 @@ static void enter_guest(struct kvm_vcpu *vcpu) int main(void) { struct kvm_vcpu *vcpu; - vm_vaddr_t pvti_gva; - vm_paddr_t pvti_gpa; + gva_t pvti_gva; + gpa_t pvti_gpa; struct kvm_vm *vm; int flags; @@ -147,7 +147,7 @@ int main(void) vm = vm_create_with_one_vcpu(&vcpu, guest_main); - pvti_gva = vm_vaddr_alloc(vm, getpagesize(), 0x10000); + pvti_gva = vm_alloc(vm, getpagesize(), 0x10000); pvti_gpa = addr_gva2gpa(vm, pvti_gva); vcpu_args_set(vcpu, 2, pvti_gpa, pvti_gva); diff --git a/tools/testing/selftests/kvm/x86/kvm_pv_test.c b/tools/testing/selftests/kvm/x86/kvm_pv_test.c index 1b805cbdb47b..8ed5fa635021 100644 --- a/tools/testing/selftests/kvm/x86/kvm_pv_test.c +++ b/tools/testing/selftests/kvm/x86/kvm_pv_test.c @@ -13,7 +13,7 @@ #include "processor.h" struct msr_data { - uint32_t idx; + u32 idx; const char *name; }; @@ -40,8 +40,8 @@ static struct msr_data msrs_to_test[] = { static void test_msr(struct msr_data *msr) { - uint64_t ignored; - uint8_t vector; + u64 ignored; + u8 vector; PR_MSR(msr); @@ -53,7 +53,7 @@ static void test_msr(struct msr_data *msr) } struct hcall_data { - uint64_t nr; + u64 nr; const char *name; }; @@ -73,7 +73,7 @@ static struct hcall_data hcalls_to_test[] = { static void test_hcall(struct hcall_data *hc) { - uint64_t r; + u64 r; PR_HCALL(hc); r = kvm_hypercall(hc->nr, 0, 0, 0, 0); diff --git a/tools/testing/selftests/kvm/x86/monitor_mwait_test.c b/tools/testing/selftests/kvm/x86/monitor_mwait_test.c index 2b550eff35f1..9c156cf7db0e 100644 --- a/tools/testing/selftests/kvm/x86/monitor_mwait_test.c +++ b/tools/testing/selftests/kvm/x86/monitor_mwait_test.c @@ -7,6 +7,7 @@ #include "kvm_util.h" #include "processor.h" +#include "kselftest.h" #define CPUID_MWAIT (1u << 3) @@ -14,6 +15,8 @@ enum monitor_mwait_testcases { MWAIT_QUIRK_DISABLED = BIT(0), MISC_ENABLES_QUIRK_DISABLED = BIT(1), MWAIT_DISABLED = BIT(2), + CPUID_DISABLED = BIT(3), + TEST_MAX = CPUID_DISABLED * 2 - 1, }; /* @@ -27,19 +30,27 @@ do { \ \ if (fault_wanted) \ __GUEST_ASSERT((vector) == UD_VECTOR, \ - "Expected #UD on " insn " for testcase '0x%x', got '0x%x'", \ - testcase, vector); \ + "Expected #UD on " insn " for testcase '0x%x', got %s", \ + testcase, ex_str(vector)); \ else \ __GUEST_ASSERT(!(vector), \ - "Expected success on " insn " for testcase '0x%x', got '0x%x'", \ - testcase, vector); \ + "Expected success on " insn " for testcase '0x%x', got %s", \ + testcase, ex_str(vector)); \ } while (0) -static void guest_monitor_wait(int testcase) +static void guest_monitor_wait(void *arg) { + int testcase = (int) (long) arg; u8 vector; - GUEST_SYNC(testcase); + u64 val = rdmsr(MSR_IA32_MISC_ENABLE) & ~MSR_IA32_MISC_ENABLE_MWAIT; + if (!(testcase & MWAIT_DISABLED)) + val |= MSR_IA32_MISC_ENABLE_MWAIT; + wrmsr(MSR_IA32_MISC_ENABLE, val); + + __GUEST_ASSERT(this_cpu_has(X86_FEATURE_MWAIT) == !(testcase & MWAIT_DISABLED), + "Expected CPUID.MWAIT %s\n", + (testcase & MWAIT_DISABLED) ? "cleared" : "set"); /* * Arbitrarily MONITOR this function, SVM performs fault checks before @@ -50,80 +61,76 @@ static void guest_monitor_wait(int testcase) vector = kvm_asm_safe("mwait", "a"(guest_monitor_wait), "c"(0), "d"(0)); GUEST_ASSERT_MONITOR_MWAIT("MWAIT", testcase, vector); -} - -static void guest_code(void) -{ - guest_monitor_wait(MWAIT_DISABLED); - - guest_monitor_wait(MWAIT_QUIRK_DISABLED | MWAIT_DISABLED); - - guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_DISABLED); - guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED); - - guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_QUIRK_DISABLED | MWAIT_DISABLED); - guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_QUIRK_DISABLED); GUEST_DONE(); } int main(int argc, char *argv[]) { - uint64_t disabled_quirks; + u64 disabled_quirks; struct kvm_vcpu *vcpu; struct kvm_vm *vm; struct ucall uc; int testcase; + char test[80]; TEST_REQUIRE(this_cpu_has(X86_FEATURE_MWAIT)); TEST_REQUIRE(kvm_has_cap(KVM_CAP_DISABLE_QUIRKS2)); - vm = vm_create_with_one_vcpu(&vcpu, guest_code); - vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_MWAIT); + ksft_print_header(); + ksft_set_plan(12); + for (testcase = 0; testcase <= TEST_MAX; testcase++) { + vm = vm_create_with_one_vcpu(&vcpu, guest_monitor_wait); + vcpu_args_set(vcpu, 1, (void *)(long)testcase); + + disabled_quirks = 0; + if (testcase & MWAIT_QUIRK_DISABLED) { + disabled_quirks |= KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS; + strcpy(test, "MWAIT can fault"); + } else { + strcpy(test, "MWAIT never faults"); + } + if (testcase & MISC_ENABLES_QUIRK_DISABLED) { + disabled_quirks |= KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT; + strcat(test, ", MISC_ENABLE updates CPUID"); + } else { + strcat(test, ", no CPUID updates"); + } + + vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, disabled_quirks); + + if (!(testcase & MISC_ENABLES_QUIRK_DISABLED) && + (!!(testcase & CPUID_DISABLED) ^ !!(testcase & MWAIT_DISABLED))) + continue; + + if (testcase & CPUID_DISABLED) { + strcat(test, ", CPUID clear"); + vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_MWAIT); + } else { + strcat(test, ", CPUID set"); + vcpu_set_cpuid_feature(vcpu, X86_FEATURE_MWAIT); + } + + if (testcase & MWAIT_DISABLED) + strcat(test, ", MWAIT disabled"); - while (1) { vcpu_run(vcpu); TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); switch (get_ucall(vcpu, &uc)) { - case UCALL_SYNC: - testcase = uc.args[1]; - break; case UCALL_ABORT: - REPORT_GUEST_ASSERT(uc); - goto done; + /* Detected in vcpu_run */ + break; case UCALL_DONE: - goto done; + ksft_test_result_pass("%s\n", test); + break; default: TEST_FAIL("Unknown ucall %lu", uc.cmd); - goto done; - } - - disabled_quirks = 0; - if (testcase & MWAIT_QUIRK_DISABLED) - disabled_quirks |= KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS; - if (testcase & MISC_ENABLES_QUIRK_DISABLED) - disabled_quirks |= KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT; - vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, disabled_quirks); - - /* - * If the MISC_ENABLES quirk (KVM neglects to update CPUID to - * enable/disable MWAIT) is disabled, toggle the ENABLE_MWAIT - * bit in MISC_ENABLES accordingly. If the quirk is enabled, - * the only valid configuration is MWAIT disabled, as CPUID - * can't be manually changed after running the vCPU. - */ - if (!(testcase & MISC_ENABLES_QUIRK_DISABLED)) { - TEST_ASSERT(testcase & MWAIT_DISABLED, - "Can't toggle CPUID features after running vCPU"); - continue; + break; } - - vcpu_set_msr(vcpu, MSR_IA32_MISC_ENABLE, - (testcase & MWAIT_DISABLED) ? 0 : MSR_IA32_MISC_ENABLE_MWAIT); + kvm_vm_free(vm); } + ksft_finished(); -done: - kvm_vm_free(vm); return 0; } diff --git a/tools/testing/selftests/kvm/x86/msrs_test.c b/tools/testing/selftests/kvm/x86/msrs_test.c new file mode 100644 index 000000000000..f7e39bf887ad --- /dev/null +++ b/tools/testing/selftests/kvm/x86/msrs_test.c @@ -0,0 +1,489 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <asm/msr-index.h> + +#include <stdint.h> + +#include "kvm_util.h" +#include "processor.h" + +/* Use HYPERVISOR for MSRs that are emulated unconditionally (as is HYPERVISOR). */ +#define X86_FEATURE_NONE X86_FEATURE_HYPERVISOR + +struct kvm_msr { + const struct kvm_x86_cpu_feature feature; + const struct kvm_x86_cpu_feature feature2; + const char *name; + const u64 reset_val; + const u64 write_val; + const u64 rsvd_val; + const u32 index; + const bool is_kvm_defined; +}; + +#define ____MSR_TEST(msr, str, val, rsvd, reset, feat, f2, is_kvm) \ +{ \ + .index = msr, \ + .name = str, \ + .write_val = val, \ + .rsvd_val = rsvd, \ + .reset_val = reset, \ + .feature = X86_FEATURE_ ##feat, \ + .feature2 = X86_FEATURE_ ##f2, \ + .is_kvm_defined = is_kvm, \ +} + +#define __MSR_TEST(msr, str, val, rsvd, reset, feat) \ + ____MSR_TEST(msr, str, val, rsvd, reset, feat, feat, false) + +#define MSR_TEST_NON_ZERO(msr, val, rsvd, reset, feat) \ + __MSR_TEST(msr, #msr, val, rsvd, reset, feat) + +#define MSR_TEST(msr, val, rsvd, feat) \ + __MSR_TEST(msr, #msr, val, rsvd, 0, feat) + +#define MSR_TEST2(msr, val, rsvd, feat, f2) \ + ____MSR_TEST(msr, #msr, val, rsvd, 0, feat, f2, false) + +/* + * Note, use a page aligned value for the canonical value so that the value + * is compatible with MSRs that use bits 11:0 for things other than addresses. + */ +static const u64 canonical_val = 0x123456789000ull; + +/* + * Arbitrary value with bits set in every byte, but not all bits set. This is + * also a non-canonical value, but that's coincidental (any 64-bit value with + * an alternating 0s/1s pattern will be non-canonical). + */ +static const u64 u64_val = 0xaaaa5555aaaa5555ull; + +#define MSR_TEST_CANONICAL(msr, feat) \ + __MSR_TEST(msr, #msr, canonical_val, NONCANONICAL, 0, feat) + +#define MSR_TEST_KVM(msr, val, rsvd, feat) \ + ____MSR_TEST(KVM_REG_ ##msr, #msr, val, rsvd, 0, feat, feat, true) + +/* + * The main struct must be scoped to a function due to the use of structures to + * define features. For the global structure, allocate enough space for the + * foreseeable future without getting too ridiculous, to minimize maintenance + * costs (bumping the array size every time an MSR is added is really annoying). + */ +static struct kvm_msr msrs[128]; +static int idx; + +static bool ignore_unsupported_msrs; + +static u64 fixup_rdmsr_val(u32 msr, u64 want) +{ + /* + * AMD CPUs drop bits 63:32 on some MSRs that Intel CPUs support. KVM + * is supposed to emulate that behavior based on guest vendor model + * (which is the same as the host vendor model for this test). + */ + if (!host_cpu_is_amd_compatible) + return want; + + switch (msr) { + case MSR_IA32_SYSENTER_ESP: + case MSR_IA32_SYSENTER_EIP: + case MSR_TSC_AUX: + return want & GENMASK_ULL(31, 0); + default: + return want; + } +} + +static void __rdmsr(u32 msr, u64 want) +{ + u64 val; + u8 vec; + + vec = rdmsr_safe(msr, &val); + __GUEST_ASSERT(!vec, "Unexpected %s on RDMSR(0x%x)", ex_str(vec), msr); + + __GUEST_ASSERT(val == want, "Wanted 0x%lx from RDMSR(0x%x), got 0x%lx", + want, msr, val); +} + +static void __wrmsr(u32 msr, u64 val) +{ + u8 vec; + + vec = wrmsr_safe(msr, val); + __GUEST_ASSERT(!vec, "Unexpected %s on WRMSR(0x%x, 0x%lx)", + ex_str(vec), msr, val); + __rdmsr(msr, fixup_rdmsr_val(msr, val)); +} + +static void guest_test_supported_msr(const struct kvm_msr *msr) +{ + __rdmsr(msr->index, msr->reset_val); + __wrmsr(msr->index, msr->write_val); + GUEST_SYNC(fixup_rdmsr_val(msr->index, msr->write_val)); + + __rdmsr(msr->index, msr->reset_val); +} + +static void guest_test_unsupported_msr(const struct kvm_msr *msr) +{ + u64 val; + u8 vec; + + /* + * KVM's ABI with respect to ignore_msrs is a mess and largely beyond + * repair, just skip the unsupported MSR tests. + */ + if (ignore_unsupported_msrs) + goto skip_wrmsr_gp; + + /* + * {S,U}_CET exist if IBT or SHSTK is supported, but with bits that are + * writable only if their associated feature is supported. Skip the + * RDMSR #GP test if the secondary feature is supported, but perform + * the WRMSR #GP test as the to-be-written value is tied to the primary + * feature. For all other MSRs, simply do nothing. + */ + if (this_cpu_has(msr->feature2)) { + if (msr->index != MSR_IA32_U_CET && + msr->index != MSR_IA32_S_CET) + goto skip_wrmsr_gp; + + goto skip_rdmsr_gp; + } + + vec = rdmsr_safe(msr->index, &val); + __GUEST_ASSERT(vec == GP_VECTOR, "Wanted #GP on RDMSR(0x%x), got %s", + msr->index, ex_str(vec)); + +skip_rdmsr_gp: + vec = wrmsr_safe(msr->index, msr->write_val); + __GUEST_ASSERT(vec == GP_VECTOR, "Wanted #GP on WRMSR(0x%x, 0x%lx), got %s", + msr->index, msr->write_val, ex_str(vec)); + +skip_wrmsr_gp: + GUEST_SYNC(0); +} + +void guest_test_reserved_val(const struct kvm_msr *msr) +{ + /* Skip reserved value checks as well, ignore_msrs is trully a mess. */ + if (ignore_unsupported_msrs) + return; + + /* + * If the CPU will truncate the written value (e.g. SYSENTER on AMD), + * expect success and a truncated value, not #GP. + */ + if ((!this_cpu_has(msr->feature) && !this_cpu_has(msr->feature2)) || + msr->rsvd_val == fixup_rdmsr_val(msr->index, msr->rsvd_val)) { + u8 vec = wrmsr_safe(msr->index, msr->rsvd_val); + + __GUEST_ASSERT(vec == GP_VECTOR, + "Wanted #GP on WRMSR(0x%x, 0x%lx), got %s", + msr->index, msr->rsvd_val, ex_str(vec)); + } else { + __wrmsr(msr->index, msr->rsvd_val); + __wrmsr(msr->index, msr->reset_val); + } +} + +static void guest_main(void) +{ + for (;;) { + const struct kvm_msr *msr = &msrs[READ_ONCE(idx)]; + + if (this_cpu_has(msr->feature)) + guest_test_supported_msr(msr); + else + guest_test_unsupported_msr(msr); + + if (msr->rsvd_val) + guest_test_reserved_val(msr); + + GUEST_SYNC(msr->reset_val); + } +} + +static bool has_one_reg; +static bool use_one_reg; + +#define KVM_X86_MAX_NR_REGS 1 + +static bool vcpu_has_reg(struct kvm_vcpu *vcpu, u64 reg) +{ + struct { + struct kvm_reg_list list; + u64 regs[KVM_X86_MAX_NR_REGS]; + } regs = {}; + int r, i; + + /* + * If KVM_GET_REG_LIST succeeds with n=0, i.e. there are no supported + * regs, then the vCPU obviously doesn't support the reg. + */ + r = __vcpu_ioctl(vcpu, KVM_GET_REG_LIST, ®s.list); + if (!r) + return false; + + TEST_ASSERT_EQ(errno, E2BIG); + + /* + * KVM x86 is expected to support enumerating a relative small number + * of regs. The majority of registers supported by KVM_{G,S}ET_ONE_REG + * are enumerated via other ioctls, e.g. KVM_GET_MSR_INDEX_LIST. For + * simplicity, hardcode the maximum number of regs and manually update + * the test as necessary. + */ + TEST_ASSERT(regs.list.n <= KVM_X86_MAX_NR_REGS, + "KVM reports %llu regs, test expects at most %u regs, stale test?", + regs.list.n, KVM_X86_MAX_NR_REGS); + + vcpu_ioctl(vcpu, KVM_GET_REG_LIST, ®s.list); + for (i = 0; i < regs.list.n; i++) { + if (regs.regs[i] == reg) + return true; + } + + return false; +} + +static void host_test_kvm_reg(struct kvm_vcpu *vcpu) +{ + bool has_reg = vcpu_cpuid_has(vcpu, msrs[idx].feature); + u64 reset_val = msrs[idx].reset_val; + u64 write_val = msrs[idx].write_val; + u64 rsvd_val = msrs[idx].rsvd_val; + u32 reg = msrs[idx].index; + u64 val; + int r; + + if (!use_one_reg) + return; + + TEST_ASSERT_EQ(vcpu_has_reg(vcpu, KVM_X86_REG_KVM(reg)), has_reg); + + if (!has_reg) { + r = __vcpu_get_reg(vcpu, KVM_X86_REG_KVM(reg), &val); + TEST_ASSERT(r && errno == EINVAL, + "Expected failure on get_reg(0x%x)", reg); + rsvd_val = 0; + goto out; + } + + val = vcpu_get_reg(vcpu, KVM_X86_REG_KVM(reg)); + TEST_ASSERT(val == reset_val, "Wanted 0x%lx from get_reg(0x%x), got 0x%lx", + reset_val, reg, val); + + vcpu_set_reg(vcpu, KVM_X86_REG_KVM(reg), write_val); + val = vcpu_get_reg(vcpu, KVM_X86_REG_KVM(reg)); + TEST_ASSERT(val == write_val, "Wanted 0x%lx from get_reg(0x%x), got 0x%lx", + write_val, reg, val); + +out: + r = __vcpu_set_reg(vcpu, KVM_X86_REG_KVM(reg), rsvd_val); + TEST_ASSERT(r, "Expected failure on set_reg(0x%x, 0x%lx)", reg, rsvd_val); +} + +static void host_test_msr(struct kvm_vcpu *vcpu, u64 guest_val) +{ + u64 reset_val = msrs[idx].reset_val; + u32 msr = msrs[idx].index; + u64 val; + + if (!kvm_cpu_has(msrs[idx].feature)) + return; + + val = vcpu_get_msr(vcpu, msr); + TEST_ASSERT(val == guest_val, "Wanted 0x%lx from get_msr(0x%x), got 0x%lx", + guest_val, msr, val); + + if (use_one_reg) + vcpu_set_reg(vcpu, KVM_X86_REG_MSR(msr), reset_val); + else + vcpu_set_msr(vcpu, msr, reset_val); + + val = vcpu_get_msr(vcpu, msr); + TEST_ASSERT(val == reset_val, "Wanted 0x%lx from get_msr(0x%x), got 0x%lx", + reset_val, msr, val); + + if (!has_one_reg) + return; + + val = vcpu_get_reg(vcpu, KVM_X86_REG_MSR(msr)); + TEST_ASSERT(val == reset_val, "Wanted 0x%lx from get_reg(0x%x), got 0x%lx", + reset_val, msr, val); +} + +static void do_vcpu_run(struct kvm_vcpu *vcpu) +{ + struct ucall uc; + + for (;;) { + vcpu_run(vcpu); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_SYNC: + host_test_msr(vcpu, uc.args[1]); + return; + case UCALL_PRINTF: + pr_info("%s", uc.buffer); + break; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + case UCALL_DONE: + TEST_FAIL("Unexpected UCALL_DONE"); + default: + TEST_FAIL("Unexpected ucall: %lu", uc.cmd); + } + } +} + +static void vcpus_run(struct kvm_vcpu **vcpus, const int NR_VCPUS) +{ + int i; + + for (i = 0; i < NR_VCPUS; i++) + do_vcpu_run(vcpus[i]); +} + +#define MISC_ENABLES_RESET_VAL (MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL | MSR_IA32_MISC_ENABLE_BTS_UNAVAIL) + +static void test_msrs(void) +{ + const struct kvm_msr __msrs[] = { + MSR_TEST_NON_ZERO(MSR_IA32_MISC_ENABLE, + MISC_ENABLES_RESET_VAL | MSR_IA32_MISC_ENABLE_FAST_STRING, + MSR_IA32_MISC_ENABLE_FAST_STRING, MISC_ENABLES_RESET_VAL, NONE), + MSR_TEST_NON_ZERO(MSR_IA32_CR_PAT, 0x07070707, 0, 0x7040600070406, NONE), + + /* + * TSC_AUX is supported if RDTSCP *or* RDPID is supported. Add + * entries for each features so that TSC_AUX doesn't exists for + * the "unsupported" vCPU, and obviously to test both cases. + */ + MSR_TEST2(MSR_TSC_AUX, 0x12345678, u64_val, RDTSCP, RDPID), + MSR_TEST2(MSR_TSC_AUX, 0x12345678, u64_val, RDPID, RDTSCP), + + MSR_TEST(MSR_IA32_SYSENTER_CS, 0x1234, 0, NONE), + /* + * SYSENTER_{ESP,EIP} are technically non-canonical on Intel, + * but KVM doesn't emulate that behavior on emulated writes, + * i.e. this test will observe different behavior if the MSR + * writes are handed by hardware vs. KVM. KVM's behavior is + * intended (though far from ideal), so don't bother testing + * non-canonical values. + */ + MSR_TEST(MSR_IA32_SYSENTER_ESP, canonical_val, 0, NONE), + MSR_TEST(MSR_IA32_SYSENTER_EIP, canonical_val, 0, NONE), + + MSR_TEST_CANONICAL(MSR_FS_BASE, LM), + MSR_TEST_CANONICAL(MSR_GS_BASE, LM), + MSR_TEST_CANONICAL(MSR_KERNEL_GS_BASE, LM), + MSR_TEST_CANONICAL(MSR_LSTAR, LM), + MSR_TEST_CANONICAL(MSR_CSTAR, LM), + MSR_TEST(MSR_SYSCALL_MASK, 0xffffffff, 0, LM), + + MSR_TEST2(MSR_IA32_S_CET, CET_SHSTK_EN, CET_RESERVED, SHSTK, IBT), + MSR_TEST2(MSR_IA32_S_CET, CET_ENDBR_EN, CET_RESERVED, IBT, SHSTK), + MSR_TEST2(MSR_IA32_U_CET, CET_SHSTK_EN, CET_RESERVED, SHSTK, IBT), + MSR_TEST2(MSR_IA32_U_CET, CET_ENDBR_EN, CET_RESERVED, IBT, SHSTK), + MSR_TEST_CANONICAL(MSR_IA32_PL0_SSP, SHSTK), + MSR_TEST(MSR_IA32_PL0_SSP, canonical_val, canonical_val | 1, SHSTK), + MSR_TEST_CANONICAL(MSR_IA32_PL1_SSP, SHSTK), + MSR_TEST(MSR_IA32_PL1_SSP, canonical_val, canonical_val | 1, SHSTK), + MSR_TEST_CANONICAL(MSR_IA32_PL2_SSP, SHSTK), + MSR_TEST(MSR_IA32_PL2_SSP, canonical_val, canonical_val | 1, SHSTK), + MSR_TEST_CANONICAL(MSR_IA32_PL3_SSP, SHSTK), + MSR_TEST(MSR_IA32_PL3_SSP, canonical_val, canonical_val | 1, SHSTK), + + MSR_TEST_KVM(GUEST_SSP, canonical_val, NONCANONICAL, SHSTK), + }; + + const struct kvm_x86_cpu_feature feat_none = X86_FEATURE_NONE; + const struct kvm_x86_cpu_feature feat_lm = X86_FEATURE_LM; + + /* + * Create three vCPUs, but run them on the same task, to validate KVM's + * context switching of MSR state. Don't pin the task to a pCPU to + * also validate KVM's handling of cross-pCPU migration. Use the full + * set of features for the first two vCPUs, but clear all features in + * third vCPU in order to test both positive and negative paths. + */ + const int NR_VCPUS = 3; + struct kvm_vcpu *vcpus[NR_VCPUS]; + struct kvm_vm *vm; + int i; + + kvm_static_assert(sizeof(__msrs) <= sizeof(msrs)); + kvm_static_assert(ARRAY_SIZE(__msrs) <= ARRAY_SIZE(msrs)); + memcpy(msrs, __msrs, sizeof(__msrs)); + + ignore_unsupported_msrs = kvm_is_ignore_msrs(); + + vm = vm_create_with_vcpus(NR_VCPUS, guest_main, vcpus); + + sync_global_to_guest(vm, msrs); + sync_global_to_guest(vm, ignore_unsupported_msrs); + + /* + * Clear features in the "unsupported features" vCPU. This needs to be + * done before the first vCPU run as KVM's ABI is that guest CPUID is + * immutable once the vCPU has been run. + */ + for (idx = 0; idx < ARRAY_SIZE(__msrs); idx++) { + /* + * Don't clear LM; selftests are 64-bit only, and KVM doesn't + * honor LM=0 for MSRs that are supposed to exist if and only + * if the vCPU is a 64-bit model. Ditto for NONE; clearing a + * fake feature flag will result in false failures. + */ + if (memcmp(&msrs[idx].feature, &feat_lm, sizeof(feat_lm)) && + memcmp(&msrs[idx].feature, &feat_none, sizeof(feat_none))) + vcpu_clear_cpuid_feature(vcpus[2], msrs[idx].feature); + } + + for (idx = 0; idx < ARRAY_SIZE(__msrs); idx++) { + struct kvm_msr *msr = &msrs[idx]; + + if (msr->is_kvm_defined) { + for (i = 0; i < NR_VCPUS; i++) + host_test_kvm_reg(vcpus[i]); + continue; + } + + /* + * Verify KVM_GET_SUPPORTED_CPUID and KVM_GET_MSR_INDEX_LIST + * are consistent with respect to MSRs whose existence is + * enumerated via CPUID. Skip the check for FS/GS.base MSRs, + * as they aren't reported in the save/restore list since their + * state is managed via SREGS. + */ + TEST_ASSERT(msr->index == MSR_FS_BASE || msr->index == MSR_GS_BASE || + kvm_msr_is_in_save_restore_list(msr->index) == + (kvm_cpu_has(msr->feature) || kvm_cpu_has(msr->feature2)), + "%s %s in save/restore list, but %s according to CPUID", msr->name, + kvm_msr_is_in_save_restore_list(msr->index) ? "is" : "isn't", + (kvm_cpu_has(msr->feature) || kvm_cpu_has(msr->feature2)) ? + "supported" : "unsupported"); + + sync_global_to_guest(vm, idx); + + vcpus_run(vcpus, NR_VCPUS); + vcpus_run(vcpus, NR_VCPUS); + } + + kvm_vm_free(vm); +} + +int main(void) +{ + has_one_reg = kvm_has_cap(KVM_CAP_ONE_REG); + + test_msrs(); + + if (has_one_reg) { + use_one_reg = true; + test_msrs(); + } +} diff --git a/tools/testing/selftests/kvm/x86/vmx_close_while_nested_test.c b/tools/testing/selftests/kvm/x86/nested_close_kvm_test.c index dad988351493..761fec293408 100644 --- a/tools/testing/selftests/kvm/x86/vmx_close_while_nested_test.c +++ b/tools/testing/selftests/kvm/x86/nested_close_kvm_test.c @@ -1,7 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * vmx_close_while_nested - * * Copyright (C) 2019, Red Hat, Inc. * * Verify that nothing bad happens if a KVM user exits with open @@ -12,6 +10,7 @@ #include "kvm_util.h" #include "processor.h" #include "vmx.h" +#include "svm_util.h" #include <string.h> #include <sys/ioctl.h> @@ -22,6 +21,8 @@ enum { PORT_L0_EXIT = 0x2000, }; +#define L2_GUEST_STACK_SIZE 64 + static void l2_guest_code(void) { /* Exit to L0 */ @@ -29,9 +30,8 @@ static void l2_guest_code(void) : : [port] "d" (PORT_L0_EXIT) : "rax"); } -static void l1_guest_code(struct vmx_pages *vmx_pages) +static void l1_vmx_code(struct vmx_pages *vmx_pages) { -#define L2_GUEST_STACK_SIZE 64 unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); @@ -45,19 +45,43 @@ static void l1_guest_code(struct vmx_pages *vmx_pages) GUEST_ASSERT(0); } +static void l1_svm_code(struct svm_test_data *svm) +{ + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + + /* Prepare the VMCB for L2 execution. */ + generic_svm_setup(svm, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + run_guest(svm->vmcb, svm->vmcb_gpa); + GUEST_ASSERT(0); +} + +static void l1_guest_code(void *data) +{ + if (this_cpu_has(X86_FEATURE_VMX)) + l1_vmx_code(data); + else + l1_svm_code(data); +} + int main(int argc, char *argv[]) { - vm_vaddr_t vmx_pages_gva; + gva_t guest_gva; struct kvm_vcpu *vcpu; struct kvm_vm *vm; - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX) || + kvm_cpu_has(X86_FEATURE_SVM)); vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); - /* Allocate VMX pages and shared descriptors (vmx_pages). */ - vcpu_alloc_vmx(vm, &vmx_pages_gva); - vcpu_args_set(vcpu, 1, vmx_pages_gva); + if (kvm_cpu_has(X86_FEATURE_VMX)) + vcpu_alloc_vmx(vm, &guest_gva); + else + vcpu_alloc_svm(vm, &guest_gva); + + vcpu_args_set(vcpu, 1, guest_gva); for (;;) { volatile struct kvm_run *run = vcpu->run; diff --git a/tools/testing/selftests/kvm/x86/nested_dirty_log_test.c b/tools/testing/selftests/kvm/x86/nested_dirty_log_test.c new file mode 100644 index 000000000000..0e67cce83570 --- /dev/null +++ b/tools/testing/selftests/kvm/x86/nested_dirty_log_test.c @@ -0,0 +1,293 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * KVM dirty page logging test + * + * Copyright (C) 2018, Red Hat, Inc. + */ +#include <stdio.h> +#include <stdlib.h> +#include <linux/bitmap.h> +#include <linux/bitops.h> + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" +#include "svm_util.h" +#include "vmx.h" + +/* The memory slot index to track dirty pages */ +#define TEST_MEM_SLOT_INDEX 1 + +/* + * Allocate four pages total. Two pages are used to verify that the KVM marks + * the accessed page/GFN as marked dirty, but not the "other" page. Times two + * so that each "normal" page can be accessed from L2 via an aliased L2 GVA+GPA + * (when TDP is enabled), to verify KVM marks _L1's_ page/GFN as dirty (to + * detect failures, L2 => L1 GPAs can't be identity mapped in the TDP page + * tables, as marking L2's GPA dirty would get a false pass if L1 == L2). + */ +#define TEST_MEM_PAGES 4 + +#define TEST_MEM_BASE 0xc0000000 +#define TEST_MEM_ALIAS_BASE 0xc0002000 + +#define TEST_GUEST_ADDR(base, idx) ((base) + (idx) * PAGE_SIZE) + +#define TEST_GVA(idx) TEST_GUEST_ADDR(TEST_MEM_BASE, idx) +#define TEST_GPA(idx) TEST_GUEST_ADDR(TEST_MEM_BASE, idx) + +#define TEST_ALIAS_GPA(idx) TEST_GUEST_ADDR(TEST_MEM_ALIAS_BASE, idx) + +#define TEST_HVA(vm, idx) addr_gpa2hva(vm, TEST_GPA(idx)) + +#define L2_GUEST_STACK_SIZE 64 + +/* Use the page offset bits to communicate the access+fault type. */ +#define TEST_SYNC_READ_FAULT BIT(0) +#define TEST_SYNC_WRITE_FAULT BIT(1) +#define TEST_SYNC_NO_FAULT BIT(2) + +static void l2_guest_code(gva_t base) +{ + gva_t page0 = TEST_GUEST_ADDR(base, 0); + gva_t page1 = TEST_GUEST_ADDR(base, 1); + + READ_ONCE(*(u64 *)page0); + GUEST_SYNC(page0 | TEST_SYNC_READ_FAULT); + WRITE_ONCE(*(u64 *)page0, 1); + GUEST_SYNC(page0 | TEST_SYNC_WRITE_FAULT); + READ_ONCE(*(u64 *)page0); + GUEST_SYNC(page0 | TEST_SYNC_NO_FAULT); + + WRITE_ONCE(*(u64 *)page1, 1); + GUEST_SYNC(page1 | TEST_SYNC_WRITE_FAULT); + WRITE_ONCE(*(u64 *)page1, 1); + GUEST_SYNC(page1 | TEST_SYNC_WRITE_FAULT); + READ_ONCE(*(u64 *)page1); + GUEST_SYNC(page1 | TEST_SYNC_NO_FAULT); + + /* Exit to L1 and never come back. */ + vmcall(); +} + +static void l2_guest_code_tdp_enabled(void) +{ + /* + * Use the aliased virtual addresses when running with TDP to verify + * that KVM correctly handles the case where a page is dirtied via a + * different GPA than would be used by L1. + */ + l2_guest_code(TEST_MEM_ALIAS_BASE); +} + +static void l2_guest_code_tdp_disabled(void) +{ + /* + * Use the "normal" virtual addresses when running without TDP enabled, + * in which case L2 will use the same page tables as L1, and thus needs + * to use the same virtual addresses that are mapped into L1. + */ + l2_guest_code(TEST_MEM_BASE); +} + +void l1_vmx_code(struct vmx_pages *vmx) +{ + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + void *l2_rip; + + GUEST_ASSERT(vmx->vmcs_gpa); + GUEST_ASSERT(prepare_for_vmx_operation(vmx)); + GUEST_ASSERT(load_vmcs(vmx)); + + if (vmx->eptp_gpa) + l2_rip = l2_guest_code_tdp_enabled; + else + l2_rip = l2_guest_code_tdp_disabled; + + prepare_vmcs(vmx, l2_rip, &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + GUEST_SYNC(TEST_SYNC_NO_FAULT); + GUEST_ASSERT(!vmlaunch()); + GUEST_SYNC(TEST_SYNC_NO_FAULT); + GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_VMCALL); + GUEST_DONE(); +} + +static void l1_svm_code(struct svm_test_data *svm) +{ + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + void *l2_rip; + + if (svm->ncr3_gpa) + l2_rip = l2_guest_code_tdp_enabled; + else + l2_rip = l2_guest_code_tdp_disabled; + + generic_svm_setup(svm, l2_rip, &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + GUEST_SYNC(TEST_SYNC_NO_FAULT); + run_guest(svm->vmcb, svm->vmcb_gpa); + GUEST_SYNC(TEST_SYNC_NO_FAULT); + GUEST_ASSERT_EQ(svm->vmcb->control.exit_code, SVM_EXIT_VMMCALL); + GUEST_DONE(); +} + +static void l1_guest_code(void *data) +{ + if (this_cpu_has(X86_FEATURE_VMX)) + l1_vmx_code(data); + else + l1_svm_code(data); +} + +static void test_handle_ucall_sync(struct kvm_vm *vm, u64 arg, + unsigned long *bmap) +{ + gva_t gva = arg & ~(PAGE_SIZE - 1); + int page_nr, i; + + /* + * Extract the page number of underlying physical page, which is also + * the _L1_ page number. The dirty bitmap _must_ be updated based on + * the L1 GPA, not L2 GPA, i.e. whether or not L2 used an aliased GPA + * (i.e. if TDP enabled for L2) is irrelevant with respect to the dirty + * bitmap and which underlying physical page is accessed. + * + * Note, gva will be '0' if there was no access, i.e. if the purpose of + * the sync is to verify all pages are clean. + */ + if (!gva) + page_nr = 0; + else if (gva >= TEST_MEM_ALIAS_BASE) + page_nr = (gva - TEST_MEM_ALIAS_BASE) >> PAGE_SHIFT; + else + page_nr = (gva - TEST_MEM_BASE) >> PAGE_SHIFT; + TEST_ASSERT(page_nr == 0 || page_nr == 1, + "Test bug, unexpected frame number '%u' for arg = %lx", page_nr, arg); + TEST_ASSERT(gva || (arg & TEST_SYNC_NO_FAULT), + "Test bug, gva must be valid if a fault is expected"); + + kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap); + + /* + * Check all pages to verify the correct physical page was modified (or + * not), and that all pages are clean/dirty as expected. + * + * If a fault of any kind is expected, the target page should be dirty + * as the Dirty bit is set in the gPTE. KVM should create a writable + * SPTE even on a read fault, *and* KVM must mark the GFN as dirty + * when doing so. + */ + for (i = 0; i < TEST_MEM_PAGES; i++) { + if (i == page_nr && (arg & TEST_SYNC_WRITE_FAULT)) + TEST_ASSERT(*(u64 *)TEST_HVA(vm, i) == 1, + "Page %u incorrectly not written by guest", i); + else + TEST_ASSERT(*(u64 *)TEST_HVA(vm, i) == 0xaaaaaaaaaaaaaaaaULL, + "Page %u incorrectly written by guest", i); + + if (i == page_nr && !(arg & TEST_SYNC_NO_FAULT)) + TEST_ASSERT(test_bit(i, bmap), + "Page %u incorrectly reported clean on %s fault", + i, arg & TEST_SYNC_READ_FAULT ? "read" : "write"); + else + TEST_ASSERT(!test_bit(i, bmap), + "Page %u incorrectly reported dirty", i); + } +} + +static void test_dirty_log(bool nested_tdp) +{ + gva_t nested_gva = 0; + unsigned long *bmap; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + struct ucall uc; + bool done = false; + + pr_info("Nested TDP: %s\n", nested_tdp ? "enabled" : "disabled"); + + /* Create VM */ + vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); + if (nested_tdp) + vm_enable_tdp(vm); + + if (kvm_cpu_has(X86_FEATURE_VMX)) + vcpu_alloc_vmx(vm, &nested_gva); + else + vcpu_alloc_svm(vm, &nested_gva); + + vcpu_args_set(vcpu, 1, nested_gva); + + /* Add an extra memory slot for testing dirty logging */ + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, + TEST_MEM_BASE, + TEST_MEM_SLOT_INDEX, + TEST_MEM_PAGES, + KVM_MEM_LOG_DIRTY_PAGES); + + /* + * Add an identity map for GVA range [0xc0000000, 0xc0004000). This + * affects both L1 and L2. However... + */ + virt_map(vm, TEST_MEM_BASE, TEST_MEM_BASE, TEST_MEM_PAGES); + + /* + * ... pages in the L2 GPA address range [0xc0002000, 0xc0004000) will + * map to [0xc0000000, 0xc0002000) when TDP is enabled (for L2). + * + * When TDP is disabled, the L2 guest code will still access the same L1 + * GPAs as the TDP enabled case. + * + * Set the Dirty bit in the PTEs used by L2 so that KVM will create + * writable SPTEs when handling read faults (if the Dirty bit isn't + * set, KVM must intercept the next write to emulate the Dirty bit + * update). + */ + if (nested_tdp) { + tdp_identity_map_default_memslots(vm); + tdp_map(vm, TEST_ALIAS_GPA(0), TEST_GPA(0), PAGE_SIZE); + tdp_map(vm, TEST_ALIAS_GPA(1), TEST_GPA(1), PAGE_SIZE); + + *tdp_get_pte(vm, TEST_ALIAS_GPA(0)) |= PTE_DIRTY_MASK(&vm->stage2_mmu); + *tdp_get_pte(vm, TEST_ALIAS_GPA(1)) |= PTE_DIRTY_MASK(&vm->stage2_mmu); + } else { + *vm_get_pte(vm, TEST_GVA(0)) |= PTE_DIRTY_MASK(&vm->mmu); + *vm_get_pte(vm, TEST_GVA(1)) |= PTE_DIRTY_MASK(&vm->mmu); + } + + bmap = bitmap_zalloc(TEST_MEM_PAGES); + + while (!done) { + memset(TEST_HVA(vm, 0), 0xaa, TEST_MEM_PAGES * PAGE_SIZE); + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + /* NOT REACHED */ + case UCALL_SYNC: + test_handle_ucall_sync(vm, uc.args[1], bmap); + break; + case UCALL_DONE: + done = true; + break; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + } +} + +int main(int argc, char *argv[]) +{ + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX) || kvm_cpu_has(X86_FEATURE_SVM)); + + test_dirty_log(/*nested_tdp=*/false); + + if (kvm_cpu_has_tdp()) + test_dirty_log(/*nested_tdp=*/true); + + return 0; +} diff --git a/tools/testing/selftests/kvm/x86/nested_emulation_test.c b/tools/testing/selftests/kvm/x86/nested_emulation_test.c new file mode 100644 index 000000000000..fb7dcbe53ac7 --- /dev/null +++ b/tools/testing/selftests/kvm/x86/nested_emulation_test.c @@ -0,0 +1,146 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" +#include "vmx.h" +#include "svm_util.h" + +enum { + SVM_F, + VMX_F, + NR_VIRTUALIZATION_FLAVORS, +}; + +struct emulated_instruction { + const char name[32]; + u8 opcode[15]; + u32 exit_reason[NR_VIRTUALIZATION_FLAVORS]; +}; + +static struct emulated_instruction instructions[] = { + { + .name = "pause", + .opcode = { 0xf3, 0x90 }, + .exit_reason = { SVM_EXIT_PAUSE, + EXIT_REASON_PAUSE_INSTRUCTION, } + }, + { + .name = "hlt", + .opcode = { 0xf4 }, + .exit_reason = { SVM_EXIT_HLT, + EXIT_REASON_HLT, } + }, +}; + +static u8 kvm_fep[] = { 0x0f, 0x0b, 0x6b, 0x76, 0x6d }; /* ud2 ; .ascii "kvm" */ +static u8 l2_guest_code[sizeof(kvm_fep) + 15]; +static u8 *l2_instruction = &l2_guest_code[sizeof(kvm_fep)]; + +static u32 get_instruction_length(struct emulated_instruction *insn) +{ + u32 i; + + for (i = 0; i < ARRAY_SIZE(insn->opcode) && insn->opcode[i]; i++) + ; + + return i; +} + +static void guest_code(void *test_data) +{ + int f = this_cpu_has(X86_FEATURE_SVM) ? SVM_F : VMX_F; + int i; + + memcpy(l2_guest_code, kvm_fep, sizeof(kvm_fep)); + + if (f == SVM_F) { + struct svm_test_data *svm = test_data; + struct vmcb *vmcb = svm->vmcb; + + generic_svm_setup(svm, NULL, NULL); + vmcb->save.idtr.limit = 0; + vmcb->save.rip = (u64)l2_guest_code; + + vmcb->control.intercept |= BIT_ULL(INTERCEPT_SHUTDOWN) | + BIT_ULL(INTERCEPT_PAUSE) | + BIT_ULL(INTERCEPT_HLT); + vmcb->control.intercept_exceptions = 0; + } else { + GUEST_ASSERT(prepare_for_vmx_operation(test_data)); + GUEST_ASSERT(load_vmcs(test_data)); + + prepare_vmcs(test_data, NULL, NULL); + GUEST_ASSERT(!vmwrite(GUEST_IDTR_LIMIT, 0)); + GUEST_ASSERT(!vmwrite(GUEST_RIP, (u64)l2_guest_code)); + GUEST_ASSERT(!vmwrite(EXCEPTION_BITMAP, 0)); + + vmwrite(CPU_BASED_VM_EXEC_CONTROL, vmreadz(CPU_BASED_VM_EXEC_CONTROL) | + CPU_BASED_PAUSE_EXITING | + CPU_BASED_HLT_EXITING); + } + + for (i = 0; i < ARRAY_SIZE(instructions); i++) { + struct emulated_instruction *insn = &instructions[i]; + u32 insn_len = get_instruction_length(insn); + u32 exit_insn_len; + u32 exit_reason; + + /* + * Copy the target instruction to the L2 code stream, and fill + * the remaining bytes with INT3s so that a missed intercept + * results in a consistent failure mode (SHUTDOWN). + */ + memcpy(l2_instruction, insn->opcode, insn_len); + memset(l2_instruction + insn_len, 0xcc, sizeof(insn->opcode) - insn_len); + + if (f == SVM_F) { + struct svm_test_data *svm = test_data; + struct vmcb *vmcb = svm->vmcb; + + run_guest(vmcb, svm->vmcb_gpa); + exit_reason = vmcb->control.exit_code; + exit_insn_len = vmcb->control.next_rip - vmcb->save.rip; + GUEST_ASSERT_EQ(vmcb->save.rip, (u64)l2_instruction); + } else { + GUEST_ASSERT_EQ(i ? vmresume() : vmlaunch(), 0); + exit_reason = vmreadz(VM_EXIT_REASON); + exit_insn_len = vmreadz(VM_EXIT_INSTRUCTION_LEN); + GUEST_ASSERT_EQ(vmreadz(GUEST_RIP), (u64)l2_instruction); + } + + __GUEST_ASSERT(exit_reason == insn->exit_reason[f], + "Wanted exit_reason '0x%x' for '%s', got '0x%x'", + insn->exit_reason[f], insn->name, exit_reason); + + __GUEST_ASSERT(exit_insn_len == insn_len, + "Wanted insn_len '%u' for '%s', got '%u'", + insn_len, insn->name, exit_insn_len); + } + + GUEST_DONE(); +} + +int main(int argc, char *argv[]) +{ + gva_t nested_test_data_gva; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + TEST_REQUIRE(is_forced_emulation_enabled); + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM) || kvm_cpu_has(X86_FEATURE_VMX)); + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + vm_enable_cap(vm, KVM_CAP_EXCEPTION_PAYLOAD, -2ul); + + if (kvm_cpu_has(X86_FEATURE_SVM)) + vcpu_alloc_svm(vm, &nested_test_data_gva); + else + vcpu_alloc_vmx(vm, &nested_test_data_gva); + + vcpu_args_set(vcpu, 1, nested_test_data_gva); + + vcpu_run(vcpu); + TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE); + + kvm_vm_free(vm); +} diff --git a/tools/testing/selftests/kvm/x86/nested_exceptions_test.c b/tools/testing/selftests/kvm/x86/nested_exceptions_test.c index 3641a42934ac..186e980aa8ee 100644 --- a/tools/testing/selftests/kvm/x86/nested_exceptions_test.c +++ b/tools/testing/selftests/kvm/x86/nested_exceptions_test.c @@ -72,7 +72,7 @@ static void l2_ss_injected_tf_test(void) } static void svm_run_l2(struct svm_test_data *svm, void *l2_code, int vector, - uint32_t error_code) + u32 error_code) { struct vmcb *vmcb = svm->vmcb; struct vmcb_control_area *ctrl = &vmcb->control; @@ -111,7 +111,7 @@ static void l1_svm_code(struct svm_test_data *svm) GUEST_DONE(); } -static void vmx_run_l2(void *l2_code, int vector, uint32_t error_code) +static void vmx_run_l2(void *l2_code, int vector, u32 error_code) { GUEST_ASSERT(!vmwrite(GUEST_RIP, (u64)l2_code)); @@ -216,7 +216,7 @@ static void queue_ss_exception(struct kvm_vcpu *vcpu, bool inject) */ int main(int argc, char *argv[]) { - vm_vaddr_t nested_test_data_gva; + gva_t nested_test_data_gva; struct kvm_vcpu_events events; struct kvm_vcpu *vcpu; struct kvm_vm *vm; diff --git a/tools/testing/selftests/kvm/x86/nested_invalid_cr3_test.c b/tools/testing/selftests/kvm/x86/nested_invalid_cr3_test.c new file mode 100644 index 000000000000..11fd2467d823 --- /dev/null +++ b/tools/testing/selftests/kvm/x86/nested_invalid_cr3_test.c @@ -0,0 +1,116 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2025, Google LLC. + * + * This test verifies that L1 fails to enter L2 with an invalid CR3, and + * succeeds otherwise. + */ +#include "kvm_util.h" +#include "vmx.h" +#include "svm_util.h" +#include "kselftest.h" + + +#define L2_GUEST_STACK_SIZE 64 + +static void l2_guest_code(void) +{ + vmcall(); +} + +static void l1_svm_code(struct svm_test_data *svm) +{ + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + uintptr_t save_cr3; + + generic_svm_setup(svm, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + /* Try to run L2 with invalid CR3 and make sure it fails */ + save_cr3 = svm->vmcb->save.cr3; + svm->vmcb->save.cr3 = -1ull; + run_guest(svm->vmcb, svm->vmcb_gpa); + GUEST_ASSERT(svm->vmcb->control.exit_code == SVM_EXIT_ERR); + + /* Now restore CR3 and make sure L2 runs successfully */ + svm->vmcb->save.cr3 = save_cr3; + run_guest(svm->vmcb, svm->vmcb_gpa); + GUEST_ASSERT(svm->vmcb->control.exit_code == SVM_EXIT_VMMCALL); + + GUEST_DONE(); +} + +static void l1_vmx_code(struct vmx_pages *vmx_pages) +{ + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + uintptr_t save_cr3; + + GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); + GUEST_ASSERT(load_vmcs(vmx_pages)); + + prepare_vmcs(vmx_pages, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + /* Try to run L2 with invalid CR3 and make sure it fails */ + save_cr3 = vmreadz(GUEST_CR3); + vmwrite(GUEST_CR3, -1ull); + GUEST_ASSERT(!vmlaunch()); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == + (EXIT_REASON_FAILED_VMENTRY | EXIT_REASON_INVALID_STATE)); + + /* Now restore CR3 and make sure L2 runs successfully */ + vmwrite(GUEST_CR3, save_cr3); + GUEST_ASSERT(!vmlaunch()); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + + GUEST_DONE(); +} + +static void l1_guest_code(void *data) +{ + if (this_cpu_has(X86_FEATURE_VMX)) + l1_vmx_code(data); + else + l1_svm_code(data); +} + +int main(int argc, char *argv[]) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + gva_t guest_gva = 0; + + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX) || + kvm_cpu_has(X86_FEATURE_SVM)); + + vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); + + if (kvm_cpu_has(X86_FEATURE_VMX)) + vcpu_alloc_vmx(vm, &guest_gva); + else + vcpu_alloc_svm(vm, &guest_gva); + + vcpu_args_set(vcpu, 1, guest_gva); + + for (;;) { + struct ucall uc; + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + case UCALL_SYNC: + break; + case UCALL_DONE: + goto done; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + } + +done: + kvm_vm_free(vm); + return 0; +} diff --git a/tools/testing/selftests/kvm/x86/vmx_set_nested_state_test.c b/tools/testing/selftests/kvm/x86/nested_set_state_test.c index 67a62a5a8895..831380732671 100644 --- a/tools/testing/selftests/kvm/x86/vmx_set_nested_state_test.c +++ b/tools/testing/selftests/kvm/x86/nested_set_state_test.c @@ -1,7 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * vmx_set_nested_state_test - * * Copyright (C) 2019, Google LLC. * * This test verifies the integrity of calling the ioctl KVM_SET_NESTED_STATE. @@ -11,6 +9,7 @@ #include "kvm_util.h" #include "processor.h" #include "vmx.h" +#include "svm_util.h" #include <errno.h> #include <linux/kvm.h> @@ -241,8 +240,108 @@ void test_vmx_nested_state(struct kvm_vcpu *vcpu) TEST_ASSERT(state->size >= sizeof(*state) && state->size <= state_sz, "Size must be between %ld and %d. The size returned was %d.", sizeof(*state), state_sz, state->size); - TEST_ASSERT(state->hdr.vmx.vmxon_pa == -1ull, "vmxon_pa must be -1ull."); - TEST_ASSERT(state->hdr.vmx.vmcs12_pa == -1ull, "vmcs_pa must be -1ull."); + + TEST_ASSERT_EQ(state->hdr.vmx.vmxon_pa, -1ull); + TEST_ASSERT_EQ(state->hdr.vmx.vmcs12_pa, -1ull); + TEST_ASSERT_EQ(state->flags, 0); + + free(state); +} + +static void vcpu_efer_enable_svm(struct kvm_vcpu *vcpu) +{ + u64 old_efer = vcpu_get_msr(vcpu, MSR_EFER); + + vcpu_set_msr(vcpu, MSR_EFER, old_efer | EFER_SVME); +} + +static void vcpu_efer_disable_svm(struct kvm_vcpu *vcpu) +{ + u64 old_efer = vcpu_get_msr(vcpu, MSR_EFER); + + vcpu_set_msr(vcpu, MSR_EFER, old_efer & ~EFER_SVME); +} + +void set_default_svm_state(struct kvm_nested_state *state, int size) +{ + memset(state, 0, size); + state->format = 1; + state->size = size; + state->hdr.svm.vmcb_pa = 0x3000; +} + +void test_svm_nested_state(struct kvm_vcpu *vcpu) +{ + /* Add a page for VMCB. */ + const int state_sz = sizeof(struct kvm_nested_state) + getpagesize(); + struct kvm_nested_state *state = + (struct kvm_nested_state *)malloc(state_sz); + + vcpu_set_cpuid_feature(vcpu, X86_FEATURE_SVM); + + /* The format must be set to 1. 0 for VMX, 1 for SVM. */ + set_default_svm_state(state, state_sz); + state->format = 0; + test_nested_state_expect_einval(vcpu, state); + + /* Invalid flags are rejected, KVM_STATE_NESTED_EVMCS is VMX-only */ + set_default_svm_state(state, state_sz); + state->flags = KVM_STATE_NESTED_EVMCS; + test_nested_state_expect_einval(vcpu, state); + + /* + * If EFER.SVME is clear, guest mode is disallowed and GIF can be set or + * cleared. + */ + vcpu_efer_disable_svm(vcpu); + + set_default_svm_state(state, state_sz); + state->flags = KVM_STATE_NESTED_GUEST_MODE; + test_nested_state_expect_einval(vcpu, state); + + state->flags = 0; + test_nested_state(vcpu, state); + + state->flags = KVM_STATE_NESTED_GIF_SET; + test_nested_state(vcpu, state); + + /* Enable SVM in the guest EFER. */ + vcpu_efer_enable_svm(vcpu); + + /* Setting vmcb_pa to a non-aligned address is only fine when not entering guest mode */ + set_default_svm_state(state, state_sz); + state->hdr.svm.vmcb_pa = -1ull; + state->flags = 0; + test_nested_state(vcpu, state); + state->flags = KVM_STATE_NESTED_GUEST_MODE; + test_nested_state_expect_einval(vcpu, state); + + /* + * Size must be large enough to fit kvm_nested_state and VMCB + * only when entering guest mode. + */ + set_default_svm_state(state, state_sz/2); + state->flags = 0; + test_nested_state(vcpu, state); + state->flags = KVM_STATE_NESTED_GUEST_MODE; + test_nested_state_expect_einval(vcpu, state); + + /* + * Test that if we leave nesting the state reflects that when we get it + * again, except for vmcb_pa, which is always returned as 0 when not in + * guest mode. + */ + set_default_svm_state(state, state_sz); + state->hdr.svm.vmcb_pa = -1ull; + state->flags = KVM_STATE_NESTED_GIF_SET; + test_nested_state(vcpu, state); + vcpu_nested_state_get(vcpu, state); + TEST_ASSERT(state->size >= sizeof(*state) && state->size <= state_sz, + "Size must be between %ld and %d. The size returned was %d.", + sizeof(*state), state_sz, state->size); + + TEST_ASSERT_EQ(state->hdr.svm.vmcb_pa, 0); + TEST_ASSERT_EQ(state->flags, KVM_STATE_NESTED_GIF_SET); free(state); } @@ -255,20 +354,20 @@ int main(int argc, char *argv[]) have_evmcs = kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS); + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX) || + kvm_cpu_has(X86_FEATURE_SVM)); TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE)); - /* - * AMD currently does not implement set_nested_state, so for now we - * just early out. - */ - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); - vm = vm_create_with_one_vcpu(&vcpu, NULL); /* - * First run tests with VMX disabled to check error handling. + * First run tests with VMX/SVM disabled to check error handling. + * test_{vmx/svm}_nested_state() will re-enable as needed. */ - vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_VMX); + if (kvm_cpu_has(X86_FEATURE_VMX)) + vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_VMX); + else + vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_SVM); /* Passing a NULL kvm_nested_state causes a EFAULT. */ test_nested_state_expect_efault(vcpu, NULL); @@ -297,7 +396,10 @@ int main(int argc, char *argv[]) state.flags = KVM_STATE_NESTED_RUN_PENDING; test_nested_state_expect_einval(vcpu, &state); - test_vmx_nested_state(vcpu); + if (kvm_cpu_has(X86_FEATURE_VMX)) + test_vmx_nested_state(vcpu); + else + test_svm_nested_state(vcpu); kvm_vm_free(vm); return 0; diff --git a/tools/testing/selftests/kvm/x86/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/x86/nested_tsc_adjust_test.c index 2ceb5c78c442..f0e4adac4751 100644 --- a/tools/testing/selftests/kvm/x86/vmx_tsc_adjust_test.c +++ b/tools/testing/selftests/kvm/x86/nested_tsc_adjust_test.c @@ -1,7 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * vmx_tsc_adjust_test - * * Copyright (C) 2018, Google LLC. * * IA32_TSC_ADJUST test @@ -22,6 +20,7 @@ #include "kvm_util.h" #include "processor.h" #include "vmx.h" +#include "svm_util.h" #include <string.h> #include <sys/ioctl.h> @@ -35,6 +34,8 @@ #define TSC_ADJUST_VALUE (1ll << 32) #define TSC_OFFSET_VALUE -(1ll << 48) +#define L2_GUEST_STACK_SIZE 64 + enum { PORT_ABORT = 0x1000, PORT_REPORT, @@ -52,9 +53,9 @@ enum { /* The virtual machine object. */ static struct kvm_vm *vm; -static void check_ia32_tsc_adjust(int64_t max) +static void check_ia32_tsc_adjust(s64 max) { - int64_t adjust; + s64 adjust; adjust = rdmsr(MSR_IA32_TSC_ADJUST); GUEST_SYNC(adjust); @@ -63,7 +64,7 @@ static void check_ia32_tsc_adjust(int64_t max) static void l2_guest_code(void) { - uint64_t l1_tsc = rdtsc() - TSC_OFFSET_VALUE; + u64 l1_tsc = rdtsc() - TSC_OFFSET_VALUE; wrmsr(MSR_IA32_TSC, l1_tsc - TSC_ADJUST_VALUE); check_ia32_tsc_adjust(-2 * TSC_ADJUST_VALUE); @@ -72,46 +73,51 @@ static void l2_guest_code(void) __asm__ __volatile__("vmcall"); } -static void l1_guest_code(struct vmx_pages *vmx_pages) +static void l1_guest_code(void *data) { -#define L2_GUEST_STACK_SIZE 64 unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; - uint32_t control; - uintptr_t save_cr3; + /* Set TSC from L1 and make sure TSC_ADJUST is updated correctly */ GUEST_ASSERT(rdtsc() < TSC_ADJUST_VALUE); wrmsr(MSR_IA32_TSC, rdtsc() - TSC_ADJUST_VALUE); check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE); - GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); - GUEST_ASSERT(load_vmcs(vmx_pages)); - - /* Prepare the VMCS for L2 execution. */ - prepare_vmcs(vmx_pages, l2_guest_code, - &l2_guest_stack[L2_GUEST_STACK_SIZE]); - control = vmreadz(CPU_BASED_VM_EXEC_CONTROL); - control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETTING; - vmwrite(CPU_BASED_VM_EXEC_CONTROL, control); - vmwrite(TSC_OFFSET, TSC_OFFSET_VALUE); - - /* Jump into L2. First, test failure to load guest CR3. */ - save_cr3 = vmreadz(GUEST_CR3); - vmwrite(GUEST_CR3, -1ull); - GUEST_ASSERT(!vmlaunch()); - GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == - (EXIT_REASON_FAILED_VMENTRY | EXIT_REASON_INVALID_STATE)); - check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE); - vmwrite(GUEST_CR3, save_cr3); - - GUEST_ASSERT(!vmlaunch()); - GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + /* + * Run L2 with TSC_OFFSET. L2 will write to TSC, and L1 is not + * intercepting the write so it should update L1's TSC_ADJUST. + */ + if (this_cpu_has(X86_FEATURE_VMX)) { + struct vmx_pages *vmx_pages = data; + u32 control; + + GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); + GUEST_ASSERT(load_vmcs(vmx_pages)); + + prepare_vmcs(vmx_pages, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + control = vmreadz(CPU_BASED_VM_EXEC_CONTROL); + control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETTING; + vmwrite(CPU_BASED_VM_EXEC_CONTROL, control); + vmwrite(TSC_OFFSET, TSC_OFFSET_VALUE); + + GUEST_ASSERT(!vmlaunch()); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + } else { + struct svm_test_data *svm = data; + + generic_svm_setup(svm, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + svm->vmcb->control.tsc_offset = TSC_OFFSET_VALUE; + run_guest(svm->vmcb, svm->vmcb_gpa); + GUEST_ASSERT(svm->vmcb->control.exit_code == SVM_EXIT_VMMCALL); + } check_ia32_tsc_adjust(-2 * TSC_ADJUST_VALUE); - GUEST_DONE(); } -static void report(int64_t val) +static void report(s64 val) { pr_info("IA32_TSC_ADJUST is %ld (%lld * TSC_ADJUST_VALUE + %lld).\n", val, val / TSC_ADJUST_VALUE, val % TSC_ADJUST_VALUE); @@ -119,16 +125,19 @@ static void report(int64_t val) int main(int argc, char *argv[]) { - vm_vaddr_t vmx_pages_gva; + gva_t nested_gva; struct kvm_vcpu *vcpu; - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX) || + kvm_cpu_has(X86_FEATURE_SVM)); - vm = vm_create_with_one_vcpu(&vcpu, (void *) l1_guest_code); + vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); + if (kvm_cpu_has(X86_FEATURE_VMX)) + vcpu_alloc_vmx(vm, &nested_gva); + else + vcpu_alloc_svm(vm, &nested_gva); - /* Allocate VMX pages and shared descriptors (vmx_pages). */ - vcpu_alloc_vmx(vm, &vmx_pages_gva); - vcpu_args_set(vcpu, 1, vmx_pages_gva); + vcpu_args_set(vcpu, 1, nested_gva); for (;;) { struct ucall uc; diff --git a/tools/testing/selftests/kvm/x86/vmx_nested_tsc_scaling_test.c b/tools/testing/selftests/kvm/x86/nested_tsc_scaling_test.c index 1759fa5cb3f2..190e93af20a1 100644 --- a/tools/testing/selftests/kvm/x86/vmx_nested_tsc_scaling_test.c +++ b/tools/testing/selftests/kvm/x86/nested_tsc_scaling_test.c @@ -13,12 +13,13 @@ #include "kvm_util.h" #include "vmx.h" +#include "svm_util.h" #include "kselftest.h" /* L2 is scaled up (from L1's perspective) by this factor */ #define L2_SCALE_FACTOR 4ULL -#define TSC_OFFSET_L2 ((uint64_t) -33125236320908) +#define TSC_OFFSET_L2 ((u64)-33125236320908) #define TSC_MULTIPLIER_L2 (L2_SCALE_FACTOR << 48) #define L2_GUEST_STACK_SIZE 64 @@ -34,9 +35,9 @@ enum { USLEEP, UCHECK_L1, UCHECK_L2 }; * measurements, a difference of 1% between the actual and the expected value * is tolerated. */ -static void compare_tsc_freq(uint64_t actual, uint64_t expected) +static void compare_tsc_freq(u64 actual, u64 expected) { - uint64_t tolerance, thresh_low, thresh_high; + u64 tolerance, thresh_low, thresh_high; tolerance = expected / 100; thresh_low = expected - tolerance; @@ -54,7 +55,7 @@ static void compare_tsc_freq(uint64_t actual, uint64_t expected) static void check_tsc_freq(int level) { - uint64_t tsc_start, tsc_end, tsc_freq; + u64 tsc_start, tsc_end, tsc_freq; /* * Reading the TSC twice with about a second's difference should give @@ -79,10 +80,33 @@ static void l2_guest_code(void) __asm__ __volatile__("vmcall"); } -static void l1_guest_code(struct vmx_pages *vmx_pages) +static void l1_svm_code(struct svm_test_data *svm) { unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; - uint32_t control; + + /* check that L1's frequency looks alright before launching L2 */ + check_tsc_freq(UCHECK_L1); + + generic_svm_setup(svm, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + /* enable TSC scaling for L2 */ + wrmsr(MSR_AMD64_TSC_RATIO, L2_SCALE_FACTOR << 32); + + /* launch L2 */ + run_guest(svm->vmcb, svm->vmcb_gpa); + GUEST_ASSERT(svm->vmcb->control.exit_code == SVM_EXIT_VMMCALL); + + /* check that L1's frequency still looks good */ + check_tsc_freq(UCHECK_L1); + + GUEST_DONE(); +} + +static void l1_vmx_code(struct vmx_pages *vmx_pages) +{ + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + u32 control; /* check that L1's frequency looks alright before launching L2 */ check_tsc_freq(UCHECK_L1); @@ -116,20 +140,29 @@ static void l1_guest_code(struct vmx_pages *vmx_pages) GUEST_DONE(); } +static void l1_guest_code(void *data) +{ + if (this_cpu_has(X86_FEATURE_VMX)) + l1_vmx_code(data); + else + l1_svm_code(data); +} + int main(int argc, char *argv[]) { struct kvm_vcpu *vcpu; struct kvm_vm *vm; - vm_vaddr_t vmx_pages_gva; + gva_t guest_gva = 0; - uint64_t tsc_start, tsc_end; - uint64_t tsc_khz; - uint64_t l1_scale_factor; - uint64_t l0_tsc_freq = 0; - uint64_t l1_tsc_freq = 0; - uint64_t l2_tsc_freq = 0; + u64 tsc_start, tsc_end; + u64 tsc_khz; + u64 l1_scale_factor; + u64 l0_tsc_freq = 0; + u64 l1_tsc_freq = 0; + u64 l2_tsc_freq = 0; - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX) || + kvm_cpu_has(X86_FEATURE_SVM)); TEST_REQUIRE(kvm_has_cap(KVM_CAP_TSC_CONTROL)); TEST_REQUIRE(sys_clocksource_is_based_on_tsc()); @@ -152,8 +185,13 @@ int main(int argc, char *argv[]) printf("real TSC frequency is around: %"PRIu64"\n", l0_tsc_freq); vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); - vcpu_alloc_vmx(vm, &vmx_pages_gva); - vcpu_args_set(vcpu, 1, vmx_pages_gva); + + if (kvm_cpu_has(X86_FEATURE_VMX)) + vcpu_alloc_vmx(vm, &guest_gva); + else + vcpu_alloc_svm(vm, &guest_gva); + + vcpu_args_set(vcpu, 1, guest_gva); tsc_khz = __vcpu_ioctl(vcpu, KVM_GET_TSC_KHZ, NULL); TEST_ASSERT(tsc_khz != -1, "vcpu ioctl KVM_GET_TSC_KHZ failed"); diff --git a/tools/testing/selftests/kvm/x86/nested_vmsave_vmload_test.c b/tools/testing/selftests/kvm/x86/nested_vmsave_vmload_test.c new file mode 100644 index 000000000000..85d3f4cc76f3 --- /dev/null +++ b/tools/testing/selftests/kvm/x86/nested_vmsave_vmload_test.c @@ -0,0 +1,197 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2026, Google LLC. + */ +#include "kvm_util.h" +#include "vmx.h" +#include "svm_util.h" +#include "kselftest.h" + +/* + * Allocate two VMCB pages for testing. Both pages have different GVAs (shared + * by both L1 and L2) and L1 GPAs. A single L2 GPA is used such that: + * - L2 GPA == L1 GPA for VMCB0. + * - L2 GPA is mapped to L1 GPA for VMCB1 using NPT in L1. + * + * This allows testing whether the GPA used by VMSAVE/VMLOAD in L2 is + * interpreted as a direct L1 GPA or translated using NPT as an L2 GPA, depends + * on which VMCB is accessed. + */ +#define TEST_MEM_SLOT_INDEX 1 +#define TEST_MEM_PAGES 2 +#define TEST_MEM_BASE 0xc0000000 + +#define TEST_GUEST_ADDR(idx) (TEST_MEM_BASE + (idx) * PAGE_SIZE) + +#define TEST_VMCB_L1_GPA(idx) TEST_GUEST_ADDR(idx) +#define TEST_VMCB_GVA(idx) TEST_GUEST_ADDR(idx) + +#define TEST_VMCB_L2_GPA TEST_VMCB_L1_GPA(0) + +#define L2_GUEST_STACK_SIZE 64 + +static void l2_guest_code_vmsave(void) +{ + asm volatile("vmsave %0" : : "a"(TEST_VMCB_L2_GPA) : "memory"); +} + +static void l2_guest_code_vmload(void) +{ + asm volatile("vmload %0" : : "a"(TEST_VMCB_L2_GPA) : "memory"); +} + +static void l2_guest_code_vmcb(int vmcb_idx) +{ + wrmsr(MSR_KERNEL_GS_BASE, 0xaaaa); + l2_guest_code_vmsave(); + + /* Verify the VMCB used by VMSAVE and update KERNEL_GS_BASE to 0xbbbb */ + GUEST_SYNC(vmcb_idx); + + l2_guest_code_vmload(); + GUEST_ASSERT_EQ(rdmsr(MSR_KERNEL_GS_BASE), 0xbbbb); + + /* Reset MSR_KERNEL_GS_BASE */ + wrmsr(MSR_KERNEL_GS_BASE, 0); + l2_guest_code_vmsave(); + + vmmcall(); +} + +static void l2_guest_code_vmcb0(void) +{ + l2_guest_code_vmcb(0); +} + +static void l2_guest_code_vmcb1(void) +{ + l2_guest_code_vmcb(1); +} + +static void l1_guest_code(struct svm_test_data *svm) +{ + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + + /* Each test case initializes the guest RIP below */ + generic_svm_setup(svm, NULL, &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + /* Set VMSAVE/VMLOAD intercepts and make sure they work with.. */ + svm->vmcb->control.intercept |= (BIT_ULL(INTERCEPT_VMSAVE) | + BIT_ULL(INTERCEPT_VMLOAD)); + + /* ..SVM_MISC2_ENABLE_V_VMLOAD_VMSAVE cleared.. */ + svm->vmcb->control.misc_ctl2 &= ~SVM_MISC2_ENABLE_V_VMLOAD_VMSAVE; + + svm->vmcb->save.rip = (u64)l2_guest_code_vmsave; + run_guest(svm->vmcb, svm->vmcb_gpa); + GUEST_ASSERT_EQ(svm->vmcb->control.exit_code, SVM_EXIT_VMSAVE); + + svm->vmcb->save.rip = (u64)l2_guest_code_vmload; + run_guest(svm->vmcb, svm->vmcb_gpa); + GUEST_ASSERT_EQ(svm->vmcb->control.exit_code, SVM_EXIT_VMLOAD); + + /* ..and SVM_MISC2_ENABLE_V_VMLOAD_VMSAVE set */ + svm->vmcb->control.misc_ctl2 |= SVM_MISC2_ENABLE_V_VMLOAD_VMSAVE; + + svm->vmcb->save.rip = (u64)l2_guest_code_vmsave; + run_guest(svm->vmcb, svm->vmcb_gpa); + GUEST_ASSERT_EQ(svm->vmcb->control.exit_code, SVM_EXIT_VMSAVE); + + svm->vmcb->save.rip = (u64)l2_guest_code_vmload; + run_guest(svm->vmcb, svm->vmcb_gpa); + GUEST_ASSERT_EQ(svm->vmcb->control.exit_code, SVM_EXIT_VMLOAD); + + /* Now clear the intercepts to test VMSAVE/VMLOAD behavior */ + svm->vmcb->control.intercept &= ~(BIT_ULL(INTERCEPT_VMSAVE) | + BIT_ULL(INTERCEPT_VMLOAD)); + + /* + * Without SVM_MISC2_ENABLE_V_VMLOAD_VMSAVE, the GPA will be + * interpreted as an L1 GPA, so VMCB0 should be used. + */ + svm->vmcb->save.rip = (u64)l2_guest_code_vmcb0; + svm->vmcb->control.misc_ctl2 &= ~SVM_MISC2_ENABLE_V_VMLOAD_VMSAVE; + run_guest(svm->vmcb, svm->vmcb_gpa); + GUEST_ASSERT_EQ(svm->vmcb->control.exit_code, SVM_EXIT_VMMCALL); + + /* + * With SVM_MISC2_ENABLE_V_VMLOAD_VMSAVE, the GPA will be interpeted as + * an L2 GPA, and translated through the NPT to VMCB1. + */ + svm->vmcb->save.rip = (u64)l2_guest_code_vmcb1; + svm->vmcb->control.misc_ctl2 |= SVM_MISC2_ENABLE_V_VMLOAD_VMSAVE; + run_guest(svm->vmcb, svm->vmcb_gpa); + GUEST_ASSERT_EQ(svm->vmcb->control.exit_code, SVM_EXIT_VMMCALL); + + GUEST_DONE(); +} + +int main(int argc, char *argv[]) +{ + gva_t nested_gva = 0; + struct vmcb *test_vmcb[2]; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + int i; + + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM)); + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_NPT)); + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_V_VMSAVE_VMLOAD)); + + vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); + vm_enable_tdp(vm); + + vcpu_alloc_svm(vm, &nested_gva); + vcpu_args_set(vcpu, 1, nested_gva); + + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, + TEST_MEM_BASE, TEST_MEM_SLOT_INDEX, + TEST_MEM_PAGES, 0); + + for (i = 0; i <= 1; i++) { + virt_map(vm, TEST_VMCB_GVA(i), TEST_VMCB_L1_GPA(i), 1); + test_vmcb[i] = (struct vmcb *)addr_gva2hva(vm, TEST_VMCB_GVA(i)); + } + + tdp_identity_map_default_memslots(vm); + + /* + * L2 GPA == L1_GPA(0), but map it to L1_GPA(1), to allow testing + * whether the L2 GPA is interpreted as an L1 GPA or translated through + * the NPT. + */ + TEST_ASSERT_EQ(TEST_VMCB_L2_GPA, TEST_VMCB_L1_GPA(0)); + tdp_map(vm, TEST_VMCB_L2_GPA, TEST_VMCB_L1_GPA(1), PAGE_SIZE); + + for (;;) { + struct ucall uc; + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + case UCALL_SYNC: + i = uc.args[1]; + TEST_ASSERT(i == 0 || i == 1, "Unexpected VMCB idx: %d", i); + + /* + * Check that only the expected VMCB has KERNEL_GS_BASE + * set to 0xaaaa, and update it to 0xbbbb. + */ + TEST_ASSERT_EQ(test_vmcb[i]->save.kernel_gs_base, 0xaaaa); + TEST_ASSERT_EQ(test_vmcb[1-i]->save.kernel_gs_base, 0); + test_vmcb[i]->save.kernel_gs_base = 0xbbbb; + break; + case UCALL_DONE: + goto done; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + } + +done: + kvm_vm_free(vm); + return 0; +} diff --git a/tools/testing/selftests/kvm/x86/nx_huge_pages_test.c b/tools/testing/selftests/kvm/x86/nx_huge_pages_test.c index e7efb2b35f8b..70950067b989 100644 --- a/tools/testing/selftests/kvm/x86/nx_huge_pages_test.c +++ b/tools/testing/selftests/kvm/x86/nx_huge_pages_test.c @@ -32,7 +32,7 @@ #define RETURN_OPCODE 0xC3 /* Call the specified memory address. */ -static void guest_do_CALL(uint64_t target) +static void guest_do_CALL(u64 target) { ((void (*)(void)) target)(); } @@ -46,14 +46,14 @@ static void guest_do_CALL(uint64_t target) */ void guest_code(void) { - uint64_t hpage_1 = HPAGE_GVA; - uint64_t hpage_2 = hpage_1 + (PAGE_SIZE * 512); - uint64_t hpage_3 = hpage_2 + (PAGE_SIZE * 512); + u64 hpage_1 = HPAGE_GVA; + u64 hpage_2 = hpage_1 + (PAGE_SIZE * 512); + u64 hpage_3 = hpage_2 + (PAGE_SIZE * 512); - READ_ONCE(*(uint64_t *)hpage_1); + READ_ONCE(*(u64 *)hpage_1); GUEST_SYNC(1); - READ_ONCE(*(uint64_t *)hpage_2); + READ_ONCE(*(u64 *)hpage_2); GUEST_SYNC(2); guest_do_CALL(hpage_1); @@ -62,10 +62,10 @@ void guest_code(void) guest_do_CALL(hpage_3); GUEST_SYNC(4); - READ_ONCE(*(uint64_t *)hpage_1); + READ_ONCE(*(u64 *)hpage_1); GUEST_SYNC(5); - READ_ONCE(*(uint64_t *)hpage_3); + READ_ONCE(*(u64 *)hpage_3); GUEST_SYNC(6); } @@ -73,7 +73,7 @@ static void check_2m_page_count(struct kvm_vm *vm, int expected_pages_2m) { int actual_pages_2m; - actual_pages_2m = vm_get_stat(vm, "pages_2m"); + actual_pages_2m = vm_get_stat(vm, pages_2m); TEST_ASSERT(actual_pages_2m == expected_pages_2m, "Unexpected 2m page count. Expected %d, got %d", @@ -84,7 +84,7 @@ static void check_split_count(struct kvm_vm *vm, int expected_splits) { int actual_splits; - actual_splits = vm_get_stat(vm, "nx_lpage_splits"); + actual_splits = vm_get_stat(vm, nx_lpage_splits); TEST_ASSERT(actual_splits == expected_splits, "Unexpected NX huge page split count. Expected %d, got %d", @@ -107,7 +107,7 @@ void run_test(int reclaim_period_ms, bool disable_nx_huge_pages, { struct kvm_vcpu *vcpu; struct kvm_vm *vm; - uint64_t nr_bytes; + u64 nr_bytes; void *hva; int r; diff --git a/tools/testing/selftests/kvm/x86/platform_info_test.c b/tools/testing/selftests/kvm/x86/platform_info_test.c index 9cbf283ebc55..80bb07e6531c 100644 --- a/tools/testing/selftests/kvm/x86/platform_info_test.c +++ b/tools/testing/selftests/kvm/x86/platform_info_test.c @@ -23,8 +23,8 @@ static void guest_code(void) { - uint64_t msr_platform_info; - uint8_t vector; + u64 msr_platform_info; + u8 vector; GUEST_SYNC(true); msr_platform_info = rdmsr(MSR_PLATFORM_INFO); @@ -42,7 +42,7 @@ int main(int argc, char *argv[]) { struct kvm_vcpu *vcpu; struct kvm_vm *vm; - uint64_t msr_platform_info; + u64 msr_platform_info; struct ucall uc; TEST_REQUIRE(kvm_has_cap(KVM_CAP_MSR_PLATFORM_INFO)); diff --git a/tools/testing/selftests/kvm/x86/pmu_counters_test.c b/tools/testing/selftests/kvm/x86/pmu_counters_test.c index 698cb36989db..dc6afac3aa91 100644 --- a/tools/testing/selftests/kvm/x86/pmu_counters_test.c +++ b/tools/testing/selftests/kvm/x86/pmu_counters_test.c @@ -14,10 +14,10 @@ #define NUM_BRANCH_INSNS_RETIRED (NUM_LOOPS) /* - * Number of instructions in each loop. 1 CLFLUSH/CLFLUSHOPT/NOP, 1 MFENCE, - * 1 LOOP. + * Number of instructions in each loop. 1 ENTER, 1 CLFLUSH/CLFLUSHOPT/NOP, + * 1 MFENCE, 1 MOV, 1 LEAVE, 1 LOOP. */ -#define NUM_INSNS_PER_LOOP 3 +#define NUM_INSNS_PER_LOOP 6 /* * Number of "extra" instructions that will be counted, i.e. the number of @@ -29,19 +29,74 @@ /* Total number of instructions retired within the measured section. */ #define NUM_INSNS_RETIRED (NUM_LOOPS * NUM_INSNS_PER_LOOP + NUM_EXTRA_INSNS) +/* Track which architectural events are supported by hardware. */ +static u32 hardware_pmu_arch_events; -static uint8_t kvm_pmu_version; +static u8 kvm_pmu_version; static bool kvm_has_perf_caps; +#define X86_PMU_FEATURE_NULL \ +({ \ + struct kvm_x86_pmu_feature feature = {}; \ + \ + feature; \ +}) + +static bool pmu_is_null_feature(struct kvm_x86_pmu_feature event) +{ + return !(*(u64 *)&event); +} + +struct kvm_intel_pmu_event { + struct kvm_x86_pmu_feature gp_event; + struct kvm_x86_pmu_feature fixed_event; +}; + +/* + * Wrap the array to appease the compiler, as the macros used to construct each + * kvm_x86_pmu_feature use syntax that's only valid in function scope, and the + * compiler often thinks the feature definitions aren't compile-time constants. + */ +static struct kvm_intel_pmu_event intel_event_to_feature(u8 idx) +{ + const struct kvm_intel_pmu_event __intel_event_to_feature[] = { + [INTEL_ARCH_CPU_CYCLES_INDEX] = { X86_PMU_FEATURE_CPU_CYCLES, X86_PMU_FEATURE_CPU_CYCLES_FIXED }, + [INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX] = { X86_PMU_FEATURE_INSNS_RETIRED, X86_PMU_FEATURE_INSNS_RETIRED_FIXED }, + /* + * Note, the fixed counter for reference cycles is NOT the same as the + * general purpose architectural event. The fixed counter explicitly + * counts at the same frequency as the TSC, whereas the GP event counts + * at a fixed, but uarch specific, frequency. Bundle them here for + * simplicity. + */ + [INTEL_ARCH_REFERENCE_CYCLES_INDEX] = { X86_PMU_FEATURE_REFERENCE_CYCLES, X86_PMU_FEATURE_REFERENCE_TSC_CYCLES_FIXED }, + [INTEL_ARCH_LLC_REFERENCES_INDEX] = { X86_PMU_FEATURE_LLC_REFERENCES, X86_PMU_FEATURE_NULL }, + [INTEL_ARCH_LLC_MISSES_INDEX] = { X86_PMU_FEATURE_LLC_MISSES, X86_PMU_FEATURE_NULL }, + [INTEL_ARCH_BRANCHES_RETIRED_INDEX] = { X86_PMU_FEATURE_BRANCH_INSNS_RETIRED, X86_PMU_FEATURE_NULL }, + [INTEL_ARCH_BRANCHES_MISPREDICTED_INDEX] = { X86_PMU_FEATURE_BRANCHES_MISPREDICTED, X86_PMU_FEATURE_NULL }, + [INTEL_ARCH_TOPDOWN_SLOTS_INDEX] = { X86_PMU_FEATURE_TOPDOWN_SLOTS, X86_PMU_FEATURE_TOPDOWN_SLOTS_FIXED }, + [INTEL_ARCH_TOPDOWN_BE_BOUND_INDEX] = { X86_PMU_FEATURE_TOPDOWN_BE_BOUND, X86_PMU_FEATURE_NULL }, + [INTEL_ARCH_TOPDOWN_BAD_SPEC_INDEX] = { X86_PMU_FEATURE_TOPDOWN_BAD_SPEC, X86_PMU_FEATURE_NULL }, + [INTEL_ARCH_TOPDOWN_FE_BOUND_INDEX] = { X86_PMU_FEATURE_TOPDOWN_FE_BOUND, X86_PMU_FEATURE_NULL }, + [INTEL_ARCH_TOPDOWN_RETIRING_INDEX] = { X86_PMU_FEATURE_TOPDOWN_RETIRING, X86_PMU_FEATURE_NULL }, + [INTEL_ARCH_LBR_INSERTS_INDEX] = { X86_PMU_FEATURE_LBR_INSERTS, X86_PMU_FEATURE_NULL }, + }; + + kvm_static_assert(ARRAY_SIZE(__intel_event_to_feature) == NR_INTEL_ARCH_EVENTS); + + return __intel_event_to_feature[idx]; +} + static struct kvm_vm *pmu_vm_create_with_one_vcpu(struct kvm_vcpu **vcpu, void *guest_code, - uint8_t pmu_version, - uint64_t perf_capabilities) + u8 pmu_version, + u64 perf_capabilities) { struct kvm_vm *vm; vm = vm_create_with_one_vcpu(vcpu, guest_code); sync_global_to_guest(vm, kvm_pmu_version); + sync_global_to_guest(vm, hardware_pmu_arch_events); /* * Set PERF_CAPABILITIES before PMU version as KVM disallows enabling @@ -77,7 +132,7 @@ static void run_vcpu(struct kvm_vcpu *vcpu) } while (uc.cmd != UCALL_DONE); } -static uint8_t guest_get_pmu_version(void) +static u8 guest_get_pmu_version(void) { /* * Return the effective PMU version, i.e. the minimum between what KVM @@ -86,7 +141,7 @@ static uint8_t guest_get_pmu_version(void) * supported by KVM to verify KVM doesn't freak out and do something * bizarre with an architecturally valid, but unsupported, version. */ - return min_t(uint8_t, kvm_pmu_version, this_cpu_property(X86_PROPERTY_PMU_VERSION)); + return min_t(u8, kvm_pmu_version, this_cpu_property(X86_PROPERTY_PMU_VERSION)); } /* @@ -98,22 +153,28 @@ static uint8_t guest_get_pmu_version(void) * Sanity check that in all cases, the event doesn't count when it's disabled, * and that KVM correctly emulates the write of an arbitrary value. */ -static void guest_assert_event_count(uint8_t idx, - struct kvm_x86_pmu_feature event, - uint32_t pmc, uint32_t pmc_msr) +static void guest_assert_event_count(u8 idx, u32 pmc, u32 pmc_msr) { - uint64_t count; + u64 count; count = _rdpmc(pmc); - if (!this_pmu_has(event)) + if (!(hardware_pmu_arch_events & BIT(idx))) goto sanity_checks; switch (idx) { case INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX: - GUEST_ASSERT_EQ(count, NUM_INSNS_RETIRED); + /* Relax precise count check due to VM-EXIT/VM-ENTRY overcount issue */ + if (this_pmu_has_errata(INSTRUCTIONS_RETIRED_OVERCOUNT)) + GUEST_ASSERT(count >= NUM_INSNS_RETIRED); + else + GUEST_ASSERT_EQ(count, NUM_INSNS_RETIRED); break; case INTEL_ARCH_BRANCHES_RETIRED_INDEX: - GUEST_ASSERT_EQ(count, NUM_BRANCH_INSNS_RETIRED); + /* Relax precise count check due to VM-EXIT/VM-ENTRY overcount issue */ + if (this_pmu_has_errata(BRANCHES_RETIRED_OVERCOUNT)) + GUEST_ASSERT(count >= NUM_BRANCH_INSNS_RETIRED); + else + GUEST_ASSERT_EQ(count, NUM_BRANCH_INSNS_RETIRED); break; case INTEL_ARCH_LLC_REFERENCES_INDEX: case INTEL_ARCH_LLC_MISSES_INDEX: @@ -123,10 +184,15 @@ static void guest_assert_event_count(uint8_t idx, fallthrough; case INTEL_ARCH_CPU_CYCLES_INDEX: case INTEL_ARCH_REFERENCE_CYCLES_INDEX: + case INTEL_ARCH_TOPDOWN_BE_BOUND_INDEX: + case INTEL_ARCH_TOPDOWN_FE_BOUND_INDEX: GUEST_ASSERT_NE(count, 0); break; case INTEL_ARCH_TOPDOWN_SLOTS_INDEX: - GUEST_ASSERT(count >= NUM_INSNS_RETIRED); + case INTEL_ARCH_TOPDOWN_RETIRING_INDEX: + __GUEST_ASSERT(count >= NUM_INSNS_RETIRED, + "Expected top-down slots >= %u, got count = %lu", + NUM_INSNS_RETIRED, count); break; default: break; @@ -160,83 +226,52 @@ do { \ __asm__ __volatile__("wrmsr\n\t" \ " mov $" __stringify(NUM_LOOPS) ", %%ecx\n\t" \ "1:\n\t" \ + FEP "enter $0, $0\n\t" \ clflush "\n\t" \ "mfence\n\t" \ + "mov %[m], %%eax\n\t" \ + FEP "leave\n\t" \ FEP "loop 1b\n\t" \ FEP "mov %%edi, %%ecx\n\t" \ FEP "xor %%eax, %%eax\n\t" \ FEP "xor %%edx, %%edx\n\t" \ "wrmsr\n\t" \ - :: "a"((uint32_t)_value), "d"(_value >> 32), \ - "c"(_msr), "D"(_msr) \ + :: "a"((u32)_value), "d"(_value >> 32), \ + "c"(_msr), "D"(_msr), [m]"m"(kvm_pmu_version) \ ); \ } while (0) -#define GUEST_TEST_EVENT(_idx, _event, _pmc, _pmc_msr, _ctrl_msr, _value, FEP) \ +#define GUEST_TEST_EVENT(_idx, _pmc, _pmc_msr, _ctrl_msr, _value, FEP) \ do { \ - wrmsr(pmc_msr, 0); \ + wrmsr(_pmc_msr, 0); \ \ if (this_cpu_has(X86_FEATURE_CLFLUSHOPT)) \ - GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflushopt .", FEP); \ + GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflushopt %[m]", FEP); \ else if (this_cpu_has(X86_FEATURE_CLFLUSH)) \ - GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflush .", FEP); \ + GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflush %[m]", FEP); \ else \ GUEST_MEASURE_EVENT(_ctrl_msr, _value, "nop", FEP); \ \ - guest_assert_event_count(_idx, _event, _pmc, _pmc_msr); \ + guest_assert_event_count(_idx, _pmc, _pmc_msr); \ } while (0) -static void __guest_test_arch_event(uint8_t idx, struct kvm_x86_pmu_feature event, - uint32_t pmc, uint32_t pmc_msr, - uint32_t ctrl_msr, uint64_t ctrl_msr_value) +static void __guest_test_arch_event(u8 idx, u32 pmc, u32 pmc_msr, + u32 ctrl_msr, u64 ctrl_msr_value) { - GUEST_TEST_EVENT(idx, event, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, ""); + GUEST_TEST_EVENT(idx, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, ""); if (is_forced_emulation_enabled) - GUEST_TEST_EVENT(idx, event, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, KVM_FEP); -} - -#define X86_PMU_FEATURE_NULL \ -({ \ - struct kvm_x86_pmu_feature feature = {}; \ - \ - feature; \ -}) - -static bool pmu_is_null_feature(struct kvm_x86_pmu_feature event) -{ - return !(*(u64 *)&event); + GUEST_TEST_EVENT(idx, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, KVM_FEP); } -static void guest_test_arch_event(uint8_t idx) +static void guest_test_arch_event(u8 idx) { - const struct { - struct kvm_x86_pmu_feature gp_event; - struct kvm_x86_pmu_feature fixed_event; - } intel_event_to_feature[] = { - [INTEL_ARCH_CPU_CYCLES_INDEX] = { X86_PMU_FEATURE_CPU_CYCLES, X86_PMU_FEATURE_CPU_CYCLES_FIXED }, - [INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX] = { X86_PMU_FEATURE_INSNS_RETIRED, X86_PMU_FEATURE_INSNS_RETIRED_FIXED }, - /* - * Note, the fixed counter for reference cycles is NOT the same - * as the general purpose architectural event. The fixed counter - * explicitly counts at the same frequency as the TSC, whereas - * the GP event counts at a fixed, but uarch specific, frequency. - * Bundle them here for simplicity. - */ - [INTEL_ARCH_REFERENCE_CYCLES_INDEX] = { X86_PMU_FEATURE_REFERENCE_CYCLES, X86_PMU_FEATURE_REFERENCE_TSC_CYCLES_FIXED }, - [INTEL_ARCH_LLC_REFERENCES_INDEX] = { X86_PMU_FEATURE_LLC_REFERENCES, X86_PMU_FEATURE_NULL }, - [INTEL_ARCH_LLC_MISSES_INDEX] = { X86_PMU_FEATURE_LLC_MISSES, X86_PMU_FEATURE_NULL }, - [INTEL_ARCH_BRANCHES_RETIRED_INDEX] = { X86_PMU_FEATURE_BRANCH_INSNS_RETIRED, X86_PMU_FEATURE_NULL }, - [INTEL_ARCH_BRANCHES_MISPREDICTED_INDEX] = { X86_PMU_FEATURE_BRANCHES_MISPREDICTED, X86_PMU_FEATURE_NULL }, - [INTEL_ARCH_TOPDOWN_SLOTS_INDEX] = { X86_PMU_FEATURE_TOPDOWN_SLOTS, X86_PMU_FEATURE_TOPDOWN_SLOTS_FIXED }, - }; - - uint32_t nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS); - uint32_t pmu_version = guest_get_pmu_version(); + u32 nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS); + u32 pmu_version = guest_get_pmu_version(); /* PERF_GLOBAL_CTRL exists only for Architectural PMU Version 2+. */ bool guest_has_perf_global_ctrl = pmu_version >= 2; struct kvm_x86_pmu_feature gp_event, fixed_event; - uint32_t base_pmc_msr; + u32 base_pmc_msr; unsigned int i; /* The host side shouldn't invoke this without a guest PMU. */ @@ -248,13 +283,13 @@ static void guest_test_arch_event(uint8_t idx) else base_pmc_msr = MSR_IA32_PERFCTR0; - gp_event = intel_event_to_feature[idx].gp_event; + gp_event = intel_event_to_feature(idx).gp_event; GUEST_ASSERT_EQ(idx, gp_event.f.bit); GUEST_ASSERT(nr_gp_counters); for (i = 0; i < nr_gp_counters; i++) { - uint64_t eventsel = ARCH_PERFMON_EVENTSEL_OS | + u64 eventsel = ARCH_PERFMON_EVENTSEL_OS | ARCH_PERFMON_EVENTSEL_ENABLE | intel_pmu_arch_events[idx]; @@ -262,14 +297,14 @@ static void guest_test_arch_event(uint8_t idx) if (guest_has_perf_global_ctrl) wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, BIT_ULL(i)); - __guest_test_arch_event(idx, gp_event, i, base_pmc_msr + i, + __guest_test_arch_event(idx, i, base_pmc_msr + i, MSR_P6_EVNTSEL0 + i, eventsel); } if (!guest_has_perf_global_ctrl) return; - fixed_event = intel_event_to_feature[idx].fixed_event; + fixed_event = intel_event_to_feature(idx).fixed_event; if (pmu_is_null_feature(fixed_event) || !this_pmu_has(fixed_event)) return; @@ -277,7 +312,7 @@ static void guest_test_arch_event(uint8_t idx) wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL)); - __guest_test_arch_event(idx, fixed_event, i | INTEL_RDPMC_FIXED, + __guest_test_arch_event(idx, i | INTEL_RDPMC_FIXED, MSR_CORE_PERF_FIXED_CTR0 + i, MSR_CORE_PERF_GLOBAL_CTRL, FIXED_PMC_GLOBAL_CTRL_ENABLE(i)); @@ -285,7 +320,7 @@ static void guest_test_arch_event(uint8_t idx) static void guest_test_arch_events(void) { - uint8_t i; + u8 i; for (i = 0; i < NR_INTEL_ARCH_EVENTS; i++) guest_test_arch_event(i); @@ -293,8 +328,8 @@ static void guest_test_arch_events(void) GUEST_DONE(); } -static void test_arch_events(uint8_t pmu_version, uint64_t perf_capabilities, - uint8_t length, uint8_t unavailable_mask) +static void test_arch_events(u8 pmu_version, u64 perf_capabilities, + u8 length, u32 unavailable_mask) { struct kvm_vcpu *vcpu; struct kvm_vm *vm; @@ -303,6 +338,9 @@ static void test_arch_events(uint8_t pmu_version, uint64_t perf_capabilities, if (!pmu_version) return; + unavailable_mask &= GENMASK(X86_PROPERTY_PMU_EVENTS_MASK.hi_bit, + X86_PROPERTY_PMU_EVENTS_MASK.lo_bit); + vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_arch_events, pmu_version, perf_capabilities); @@ -327,19 +365,19 @@ static void test_arch_events(uint8_t pmu_version, uint64_t perf_capabilities, #define GUEST_ASSERT_PMC_MSR_ACCESS(insn, msr, expect_gp, vector) \ __GUEST_ASSERT(expect_gp ? vector == GP_VECTOR : !vector, \ - "Expected %s on " #insn "(0x%x), got vector %u", \ - expect_gp ? "#GP" : "no fault", msr, vector) \ + "Expected %s on " #insn "(0x%x), got %s", \ + expect_gp ? "#GP" : "no fault", msr, ex_str(vector)) \ #define GUEST_ASSERT_PMC_VALUE(insn, msr, val, expected) \ - __GUEST_ASSERT(val == expected_val, \ + __GUEST_ASSERT(val == expected, \ "Expected " #insn "(0x%x) to yield 0x%lx, got 0x%lx", \ - msr, expected_val, val); + msr, expected, val); -static void guest_test_rdpmc(uint32_t rdpmc_idx, bool expect_success, - uint64_t expected_val) +static void guest_test_rdpmc(u32 rdpmc_idx, bool expect_success, + u64 expected_val) { - uint8_t vector; - uint64_t val; + u8 vector; + u64 val; vector = rdpmc_safe(rdpmc_idx, &val); GUEST_ASSERT_PMC_MSR_ACCESS(RDPMC, rdpmc_idx, !expect_success, vector); @@ -355,19 +393,19 @@ static void guest_test_rdpmc(uint32_t rdpmc_idx, bool expect_success, GUEST_ASSERT_PMC_VALUE(RDPMC, rdpmc_idx, val, expected_val); } -static void guest_rd_wr_counters(uint32_t base_msr, uint8_t nr_possible_counters, - uint8_t nr_counters, uint32_t or_mask) +static void guest_rd_wr_counters(u32 base_msr, u8 nr_possible_counters, + u8 nr_counters, u32 or_mask) { const bool pmu_has_fast_mode = !guest_get_pmu_version(); - uint8_t i; + u8 i; for (i = 0; i < nr_possible_counters; i++) { /* * TODO: Test a value that validates full-width writes and the * width of the counters. */ - const uint64_t test_val = 0xffff; - const uint32_t msr = base_msr + i; + const u64 test_val = 0xffff; + const u32 msr = base_msr + i; /* * Fixed counters are supported if the counter is less than the @@ -380,12 +418,12 @@ static void guest_rd_wr_counters(uint32_t base_msr, uint8_t nr_possible_counters * KVM drops writes to MSR_P6_PERFCTR[0|1] if the counters are * unsupported, i.e. doesn't #GP and reads back '0'. */ - const uint64_t expected_val = expect_success ? test_val : 0; + const u64 expected_val = expect_success ? test_val : 0; const bool expect_gp = !expect_success && msr != MSR_P6_PERFCTR0 && msr != MSR_P6_PERFCTR1; - uint32_t rdpmc_idx; - uint8_t vector; - uint64_t val; + u32 rdpmc_idx; + u8 vector; + u64 val; vector = wrmsr_safe(msr, test_val); GUEST_ASSERT_PMC_MSR_ACCESS(WRMSR, msr, expect_gp, vector); @@ -423,9 +461,9 @@ static void guest_rd_wr_counters(uint32_t base_msr, uint8_t nr_possible_counters static void guest_test_gp_counters(void) { - uint8_t pmu_version = guest_get_pmu_version(); - uint8_t nr_gp_counters = 0; - uint32_t base_msr; + u8 pmu_version = guest_get_pmu_version(); + u8 nr_gp_counters = 0; + u32 base_msr; if (pmu_version) nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS); @@ -439,7 +477,7 @@ static void guest_test_gp_counters(void) * counters, of which there are none. */ if (pmu_version > 1) { - uint64_t global_ctrl = rdmsr(MSR_CORE_PERF_GLOBAL_CTRL); + u64 global_ctrl = rdmsr(MSR_CORE_PERF_GLOBAL_CTRL); if (nr_gp_counters) GUEST_ASSERT_EQ(global_ctrl, GENMASK_ULL(nr_gp_counters - 1, 0)); @@ -457,8 +495,8 @@ static void guest_test_gp_counters(void) GUEST_DONE(); } -static void test_gp_counters(uint8_t pmu_version, uint64_t perf_capabilities, - uint8_t nr_gp_counters) +static void test_gp_counters(u8 pmu_version, u64 perf_capabilities, + u8 nr_gp_counters) { struct kvm_vcpu *vcpu; struct kvm_vm *vm; @@ -476,9 +514,9 @@ static void test_gp_counters(uint8_t pmu_version, uint64_t perf_capabilities, static void guest_test_fixed_counters(void) { - uint64_t supported_bitmask = 0; - uint8_t nr_fixed_counters = 0; - uint8_t i; + u64 supported_bitmask = 0; + u8 nr_fixed_counters = 0; + u8 i; /* Fixed counters require Architectural vPMU Version 2+. */ if (guest_get_pmu_version() >= 2) @@ -495,8 +533,8 @@ static void guest_test_fixed_counters(void) nr_fixed_counters, supported_bitmask); for (i = 0; i < MAX_NR_FIXED_COUNTERS; i++) { - uint8_t vector; - uint64_t val; + u8 vector; + u64 val; if (i >= nr_fixed_counters && !(supported_bitmask & BIT_ULL(i))) { vector = wrmsr_safe(MSR_CORE_PERF_FIXED_CTR_CTRL, @@ -523,9 +561,8 @@ static void guest_test_fixed_counters(void) GUEST_DONE(); } -static void test_fixed_counters(uint8_t pmu_version, uint64_t perf_capabilities, - uint8_t nr_fixed_counters, - uint32_t supported_bitmask) +static void test_fixed_counters(u8 pmu_version, u64 perf_capabilities, + u8 nr_fixed_counters, u32 supported_bitmask) { struct kvm_vcpu *vcpu; struct kvm_vm *vm; @@ -545,40 +582,67 @@ static void test_fixed_counters(uint8_t pmu_version, uint64_t perf_capabilities, static void test_intel_counters(void) { - uint8_t nr_arch_events = kvm_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH); - uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS); - uint8_t nr_gp_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS); - uint8_t pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION); + u8 nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS); + u8 nr_gp_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS); + u8 pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION); unsigned int i; - uint8_t v, j; - uint32_t k; + u8 v, j; + u32 k; - const uint64_t perf_caps[] = { + const u64 perf_caps[] = { 0, PMU_CAP_FW_WRITES, }; /* + * To keep the total runtime reasonable, test only a handful of select, + * semi-arbitrary values for the mask of unavailable PMU events. Test + * 0 (all events available) and all ones (no events available) as well + * as alternating bit sequencues, e.g. to detect if KVM is checking the + * wrong bit(s). + */ + const u32 unavailable_masks[] = { + 0x0, + 0xffffffffu, + 0xaaaaaaaau, + 0x55555555u, + 0xf0f0f0f0u, + 0x0f0f0f0fu, + 0xa0a0a0a0u, + 0x0a0a0a0au, + 0x50505050u, + 0x05050505u, + }; + + /* * Test up to PMU v5, which is the current maximum version defined by * Intel, i.e. is the last version that is guaranteed to be backwards * compatible with KVM's existing behavior. */ - uint8_t max_pmu_version = max_t(typeof(pmu_version), pmu_version, 5); + u8 max_pmu_version = max_t(typeof(pmu_version), pmu_version, 5); /* * Detect the existence of events that aren't supported by selftests. - * This will (obviously) fail any time the kernel adds support for a - * new event, but it's worth paying that price to keep the test fresh. + * This will (obviously) fail any time hardware adds support for a new + * event, but it's worth paying that price to keep the test fresh. */ - TEST_ASSERT(nr_arch_events <= NR_INTEL_ARCH_EVENTS, + TEST_ASSERT(this_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH) <= NR_INTEL_ARCH_EVENTS, "New architectural event(s) detected; please update this test (length = %u, mask = %x)", - nr_arch_events, kvm_cpu_property(X86_PROPERTY_PMU_EVENTS_MASK)); + this_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH), + this_cpu_property(X86_PROPERTY_PMU_EVENTS_MASK)); /* - * Force iterating over known arch events regardless of whether or not - * KVM/hardware supports a given event. + * Iterate over known arch events irrespective of KVM/hardware support + * to verify that KVM doesn't reject programming of events just because + * the *architectural* encoding is unsupported. Track which events are + * supported in hardware; the guest side will validate supported events + * count correctly, even if *enumeration* of the event is unsupported + * by KVM and/or isn't exposed to the guest. */ - nr_arch_events = max_t(typeof(nr_arch_events), nr_arch_events, NR_INTEL_ARCH_EVENTS); + for (i = 0; i < NR_INTEL_ARCH_EVENTS; i++) { + if (this_pmu_has(intel_event_to_feature(i).gp_event)) + hardware_pmu_arch_events |= BIT(i); + } for (v = 0; v <= max_pmu_version; v++) { for (i = 0; i < ARRAY_SIZE(perf_caps); i++) { @@ -587,16 +651,7 @@ static void test_intel_counters(void) pr_info("Testing arch events, PMU version %u, perf_caps = %lx\n", v, perf_caps[i]); - /* - * To keep the total runtime reasonable, test every - * possible non-zero, non-reserved bitmap combination - * only with the native PMU version and the full bit - * vector length. - */ - if (v == pmu_version) { - for (k = 1; k < (BIT(nr_arch_events) - 1); k++) - test_arch_events(v, perf_caps[i], nr_arch_events, k); - } + /* * Test single bits for all PMU version and lengths up * the number of events +1 (to verify KVM doesn't do @@ -604,12 +659,9 @@ static void test_intel_counters(void) * host length). Explicitly test a mask of '0' and all * ones i.e. all events being available and unavailable. */ - for (j = 0; j <= nr_arch_events + 1; j++) { - test_arch_events(v, perf_caps[i], j, 0); - test_arch_events(v, perf_caps[i], j, 0xff); - - for (k = 0; k < nr_arch_events; k++) - test_arch_events(v, perf_caps[i], j, BIT(k)); + for (j = 0; j <= NR_INTEL_ARCH_EVENTS + 1; j++) { + for (k = 1; k < ARRAY_SIZE(unavailable_masks); k++) + test_arch_events(v, perf_caps[i], j, unavailable_masks[k]); } pr_info("Testing GP counters, PMU version %u, perf_caps = %lx\n", diff --git a/tools/testing/selftests/kvm/x86/pmu_event_filter_test.c b/tools/testing/selftests/kvm/x86/pmu_event_filter_test.c index c15513cd74d1..c1232344fda8 100644 --- a/tools/testing/selftests/kvm/x86/pmu_event_filter_test.c +++ b/tools/testing/selftests/kvm/x86/pmu_event_filter_test.c @@ -53,11 +53,11 @@ static const struct __kvm_pmu_event_filter base_event_filter = { }; struct { - uint64_t loads; - uint64_t stores; - uint64_t loads_stores; - uint64_t branches_retired; - uint64_t instructions_retired; + u64 loads; + u64 stores; + u64 loads_stores; + u64 branches_retired; + u64 instructions_retired; } pmc_results; /* @@ -75,9 +75,9 @@ static void guest_gp_handler(struct ex_regs *regs) * * Return on success. GUEST_SYNC(0) on error. */ -static void check_msr(uint32_t msr, uint64_t bits_to_flip) +static void check_msr(u32 msr, u64 bits_to_flip) { - uint64_t v = rdmsr(msr) ^ bits_to_flip; + u64 v = rdmsr(msr) ^ bits_to_flip; wrmsr(msr, v); if (rdmsr(msr) != v) @@ -89,10 +89,10 @@ static void check_msr(uint32_t msr, uint64_t bits_to_flip) GUEST_SYNC(-EIO); } -static void run_and_measure_loop(uint32_t msr_base) +static void run_and_measure_loop(u32 msr_base) { - const uint64_t branches_retired = rdmsr(msr_base + 0); - const uint64_t insn_retired = rdmsr(msr_base + 1); + const u64 branches_retired = rdmsr(msr_base + 0); + const u64 insn_retired = rdmsr(msr_base + 1); __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES})); @@ -147,7 +147,7 @@ static void amd_guest_code(void) * Run the VM to the next GUEST_SYNC(value), and return the value passed * to the sync. Any other exit from the guest is fatal. */ -static uint64_t run_vcpu_to_sync(struct kvm_vcpu *vcpu) +static u64 run_vcpu_to_sync(struct kvm_vcpu *vcpu) { struct ucall uc; @@ -161,7 +161,7 @@ static uint64_t run_vcpu_to_sync(struct kvm_vcpu *vcpu) static void run_vcpu_and_sync_pmc_results(struct kvm_vcpu *vcpu) { - uint64_t r; + u64 r; memset(&pmc_results, 0, sizeof(pmc_results)); sync_global_to_guest(vcpu->vm, pmc_results); @@ -182,7 +182,7 @@ static void run_vcpu_and_sync_pmc_results(struct kvm_vcpu *vcpu) */ static bool sanity_check_pmu(struct kvm_vcpu *vcpu) { - uint64_t r; + u64 r; vm_install_exception_handler(vcpu->vm, GP_VECTOR, guest_gp_handler); r = run_vcpu_to_sync(vcpu); @@ -195,7 +195,7 @@ static bool sanity_check_pmu(struct kvm_vcpu *vcpu) * Remove the first occurrence of 'event' (if any) from the filter's * event list. */ -static void remove_event(struct __kvm_pmu_event_filter *f, uint64_t event) +static void remove_event(struct __kvm_pmu_event_filter *f, u64 event) { bool found = false; int i; @@ -212,10 +212,12 @@ static void remove_event(struct __kvm_pmu_event_filter *f, uint64_t event) #define ASSERT_PMC_COUNTING_INSTRUCTIONS() \ do { \ - uint64_t br = pmc_results.branches_retired; \ - uint64_t ir = pmc_results.instructions_retired; \ + u64 br = pmc_results.branches_retired; \ + u64 ir = pmc_results.instructions_retired; \ + bool br_matched = this_pmu_has_errata(BRANCHES_RETIRED_OVERCOUNT) ? \ + br >= NUM_BRANCHES : br == NUM_BRANCHES; \ \ - if (br && br != NUM_BRANCHES) \ + if (br && !br_matched) \ pr_info("%s: Branch instructions retired = %lu (expected %u)\n", \ __func__, br, NUM_BRANCHES); \ TEST_ASSERT(br, "%s: Branch instructions retired = %lu (expected > 0)", \ @@ -226,8 +228,8 @@ do { \ #define ASSERT_PMC_NOT_COUNTING_INSTRUCTIONS() \ do { \ - uint64_t br = pmc_results.branches_retired; \ - uint64_t ir = pmc_results.instructions_retired; \ + u64 br = pmc_results.branches_retired; \ + u64 ir = pmc_results.instructions_retired; \ \ TEST_ASSERT(!br, "%s: Branch instructions retired = %lu (expected 0)", \ __func__, br); \ @@ -359,7 +361,8 @@ static bool use_intel_pmu(void) */ static bool use_amd_pmu(void) { - return host_cpu_is_amd && kvm_cpu_family() >= 0x17; + return (host_cpu_is_amd && kvm_cpu_family() >= 0x17) || + host_cpu_is_hygon; } /* @@ -375,7 +378,7 @@ static bool use_amd_pmu(void) static bool supports_event_mem_inst_retired(void) { - uint32_t eax, ebx, ecx, edx; + u32 eax, ebx, ecx, edx; cpuid(1, &eax, &ebx, &ecx, &edx); if (x86_family(eax) == 0x6) { @@ -412,15 +415,15 @@ static bool supports_event_mem_inst_retired(void) #define EXCLUDE_MASKED_ENTRY(event_select, mask, match) \ KVM_PMU_ENCODE_MASKED_ENTRY(event_select, mask, match, true) -static void masked_events_guest_test(uint32_t msr_base) +static void masked_events_guest_test(u32 msr_base) { /* * The actual value of the counters don't determine the outcome of * the test. Only that they are zero or non-zero. */ - const uint64_t loads = rdmsr(msr_base + 0); - const uint64_t stores = rdmsr(msr_base + 1); - const uint64_t loads_stores = rdmsr(msr_base + 2); + const u64 loads = rdmsr(msr_base + 0); + const u64 stores = rdmsr(msr_base + 1); + const u64 loads_stores = rdmsr(msr_base + 2); int val; @@ -473,7 +476,7 @@ static void amd_masked_events_guest_code(void) } static void run_masked_events_test(struct kvm_vcpu *vcpu, - const uint64_t masked_events[], + const u64 masked_events[], const int nmasked_events) { struct __kvm_pmu_event_filter f = { @@ -482,7 +485,7 @@ static void run_masked_events_test(struct kvm_vcpu *vcpu, .flags = KVM_PMU_EVENT_FLAG_MASKED_EVENTS, }; - memcpy(f.events, masked_events, sizeof(uint64_t) * nmasked_events); + memcpy(f.events, masked_events, sizeof(u64) * nmasked_events); test_with_filter(vcpu, &f); } @@ -491,12 +494,12 @@ static void run_masked_events_test(struct kvm_vcpu *vcpu, #define ALLOW_LOADS_STORES BIT(2) struct masked_events_test { - uint64_t intel_events[MAX_TEST_EVENTS]; - uint64_t intel_event_end; - uint64_t amd_events[MAX_TEST_EVENTS]; - uint64_t amd_event_end; + u64 intel_events[MAX_TEST_EVENTS]; + u64 intel_event_end; + u64 amd_events[MAX_TEST_EVENTS]; + u64 amd_event_end; const char *msg; - uint32_t flags; + u32 flags; }; /* @@ -579,9 +582,9 @@ const struct masked_events_test test_cases[] = { }; static int append_test_events(const struct masked_events_test *test, - uint64_t *events, int nevents) + u64 *events, int nevents) { - const uint64_t *evts; + const u64 *evts; int i; evts = use_intel_pmu() ? test->intel_events : test->amd_events; @@ -600,7 +603,7 @@ static bool bool_eq(bool a, bool b) return a == b; } -static void run_masked_events_tests(struct kvm_vcpu *vcpu, uint64_t *events, +static void run_masked_events_tests(struct kvm_vcpu *vcpu, u64 *events, int nevents) { int ntests = ARRAY_SIZE(test_cases); @@ -627,7 +630,7 @@ static void run_masked_events_tests(struct kvm_vcpu *vcpu, uint64_t *events, } } -static void add_dummy_events(uint64_t *events, int nevents) +static void add_dummy_events(u64 *events, int nevents) { int i; @@ -647,7 +650,7 @@ static void add_dummy_events(uint64_t *events, int nevents) static void test_masked_events(struct kvm_vcpu *vcpu) { int nevents = KVM_PMU_EVENT_FILTER_MAX_EVENTS - MAX_TEST_EVENTS; - uint64_t events[KVM_PMU_EVENT_FILTER_MAX_EVENTS]; + u64 events[KVM_PMU_EVENT_FILTER_MAX_EVENTS]; /* Run the test cases against a sparse PMU event filter. */ run_masked_events_tests(vcpu, events, 0); @@ -665,8 +668,8 @@ static int set_pmu_event_filter(struct kvm_vcpu *vcpu, return __vm_ioctl(vcpu->vm, KVM_SET_PMU_EVENT_FILTER, f); } -static int set_pmu_single_event_filter(struct kvm_vcpu *vcpu, uint64_t event, - uint32_t flags, uint32_t action) +static int set_pmu_single_event_filter(struct kvm_vcpu *vcpu, u64 event, + u32 flags, u32 action) { struct __kvm_pmu_event_filter f = { .nevents = 1, @@ -682,9 +685,9 @@ static int set_pmu_single_event_filter(struct kvm_vcpu *vcpu, uint64_t event, static void test_filter_ioctl(struct kvm_vcpu *vcpu) { - uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS); + u8 nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS); struct __kvm_pmu_event_filter f; - uint64_t e = ~0ul; + u64 e = ~0ul; int r; /* @@ -726,7 +729,7 @@ static void test_filter_ioctl(struct kvm_vcpu *vcpu) TEST_ASSERT(!r, "Masking non-existent fixed counters should be allowed"); } -static void intel_run_fixed_counter_guest_code(uint8_t idx) +static void intel_run_fixed_counter_guest_code(u8 idx) { for (;;) { wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0); @@ -742,8 +745,8 @@ static void intel_run_fixed_counter_guest_code(uint8_t idx) } } -static uint64_t test_with_fixed_counter_filter(struct kvm_vcpu *vcpu, - uint32_t action, uint32_t bitmap) +static u64 test_with_fixed_counter_filter(struct kvm_vcpu *vcpu, + u32 action, u32 bitmap) { struct __kvm_pmu_event_filter f = { .action = action, @@ -754,9 +757,9 @@ static uint64_t test_with_fixed_counter_filter(struct kvm_vcpu *vcpu, return run_vcpu_to_sync(vcpu); } -static uint64_t test_set_gp_and_fixed_event_filter(struct kvm_vcpu *vcpu, - uint32_t action, - uint32_t bitmap) +static u64 test_set_gp_and_fixed_event_filter(struct kvm_vcpu *vcpu, + u32 action, + u32 bitmap) { struct __kvm_pmu_event_filter f = base_event_filter; @@ -767,12 +770,12 @@ static uint64_t test_set_gp_and_fixed_event_filter(struct kvm_vcpu *vcpu, return run_vcpu_to_sync(vcpu); } -static void __test_fixed_counter_bitmap(struct kvm_vcpu *vcpu, uint8_t idx, - uint8_t nr_fixed_counters) +static void __test_fixed_counter_bitmap(struct kvm_vcpu *vcpu, u8 idx, + u8 nr_fixed_counters) { unsigned int i; - uint32_t bitmap; - uint64_t count; + u32 bitmap; + u64 count; TEST_ASSERT(nr_fixed_counters < sizeof(bitmap) * 8, "Invalid nr_fixed_counters"); @@ -812,10 +815,10 @@ static void __test_fixed_counter_bitmap(struct kvm_vcpu *vcpu, uint8_t idx, static void test_fixed_counter_bitmap(void) { - uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS); + u8 nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS); struct kvm_vm *vm; struct kvm_vcpu *vcpu; - uint8_t idx; + u8 idx; /* * Check that pmu_event_filter works as expected when it's applied to diff --git a/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c b/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c index 82a8d88b5338..1d2f5d4fd45d 100644 --- a/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c +++ b/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c @@ -23,13 +23,13 @@ #include <processor.h> #define BASE_DATA_SLOT 10 -#define BASE_DATA_GPA ((uint64_t)(1ull << 32)) -#define PER_CPU_DATA_SIZE ((uint64_t)(SZ_2M + PAGE_SIZE)) +#define BASE_DATA_GPA ((u64)(1ull << 32)) +#define PER_CPU_DATA_SIZE ((u64)(SZ_2M + PAGE_SIZE)) /* Horrific macro so that the line info is captured accurately :-( */ #define memcmp_g(gpa, pattern, size) \ do { \ - uint8_t *mem = (uint8_t *)gpa; \ + u8 *mem = (u8 *)gpa; \ size_t i; \ \ for (i = 0; i < size; i++) \ @@ -38,7 +38,7 @@ do { \ pattern, i, gpa + i, mem[i]); \ } while (0) -static void memcmp_h(uint8_t *mem, uint64_t gpa, uint8_t pattern, size_t size) +static void memcmp_h(u8 *mem, gpa_t gpa, u8 pattern, size_t size) { size_t i; @@ -70,13 +70,13 @@ enum ucall_syncs { SYNC_PRIVATE, }; -static void guest_sync_shared(uint64_t gpa, uint64_t size, - uint8_t current_pattern, uint8_t new_pattern) +static void guest_sync_shared(gpa_t gpa, u64 size, + u8 current_pattern, u8 new_pattern) { GUEST_SYNC5(SYNC_SHARED, gpa, size, current_pattern, new_pattern); } -static void guest_sync_private(uint64_t gpa, uint64_t size, uint8_t pattern) +static void guest_sync_private(gpa_t gpa, u64 size, u8 pattern) { GUEST_SYNC4(SYNC_PRIVATE, gpa, size, pattern); } @@ -86,10 +86,10 @@ static void guest_sync_private(uint64_t gpa, uint64_t size, uint8_t pattern) #define MAP_GPA_SHARED BIT(1) #define MAP_GPA_DO_FALLOCATE BIT(2) -static void guest_map_mem(uint64_t gpa, uint64_t size, bool map_shared, +static void guest_map_mem(gpa_t gpa, u64 size, bool map_shared, bool do_fallocate) { - uint64_t flags = MAP_GPA_SET_ATTRIBUTES; + u64 flags = MAP_GPA_SET_ATTRIBUTES; if (map_shared) flags |= MAP_GPA_SHARED; @@ -98,19 +98,19 @@ static void guest_map_mem(uint64_t gpa, uint64_t size, bool map_shared, kvm_hypercall_map_gpa_range(gpa, size, flags); } -static void guest_map_shared(uint64_t gpa, uint64_t size, bool do_fallocate) +static void guest_map_shared(gpa_t gpa, u64 size, bool do_fallocate) { guest_map_mem(gpa, size, true, do_fallocate); } -static void guest_map_private(uint64_t gpa, uint64_t size, bool do_fallocate) +static void guest_map_private(gpa_t gpa, u64 size, bool do_fallocate) { guest_map_mem(gpa, size, false, do_fallocate); } struct { - uint64_t offset; - uint64_t size; + u64 offset; + u64 size; } static const test_ranges[] = { GUEST_STAGE(0, PAGE_SIZE), GUEST_STAGE(0, SZ_2M), @@ -119,11 +119,11 @@ struct { GUEST_STAGE(SZ_2M, PAGE_SIZE), }; -static void guest_test_explicit_conversion(uint64_t base_gpa, bool do_fallocate) +static void guest_test_explicit_conversion(u64 base_gpa, bool do_fallocate) { - const uint8_t def_p = 0xaa; - const uint8_t init_p = 0xcc; - uint64_t j; + const u8 def_p = 0xaa; + const u8 init_p = 0xcc; + u64 j; int i; /* Memory should be shared by default. */ @@ -134,12 +134,12 @@ static void guest_test_explicit_conversion(uint64_t base_gpa, bool do_fallocate) memcmp_g(base_gpa, init_p, PER_CPU_DATA_SIZE); for (i = 0; i < ARRAY_SIZE(test_ranges); i++) { - uint64_t gpa = base_gpa + test_ranges[i].offset; - uint64_t size = test_ranges[i].size; - uint8_t p1 = 0x11; - uint8_t p2 = 0x22; - uint8_t p3 = 0x33; - uint8_t p4 = 0x44; + gpa_t gpa = base_gpa + test_ranges[i].offset; + u64 size = test_ranges[i].size; + u8 p1 = 0x11; + u8 p2 = 0x22; + u8 p3 = 0x33; + u8 p4 = 0x44; /* * Set the test region to pattern one to differentiate it from @@ -214,10 +214,10 @@ skip: } } -static void guest_punch_hole(uint64_t gpa, uint64_t size) +static void guest_punch_hole(gpa_t gpa, u64 size) { /* "Mapping" memory shared via fallocate() is done via PUNCH_HOLE. */ - uint64_t flags = MAP_GPA_SHARED | MAP_GPA_DO_FALLOCATE; + u64 flags = MAP_GPA_SHARED | MAP_GPA_DO_FALLOCATE; kvm_hypercall_map_gpa_range(gpa, size, flags); } @@ -227,9 +227,9 @@ static void guest_punch_hole(uint64_t gpa, uint64_t size) * proper conversion. Freeing (PUNCH_HOLE) should zap SPTEs, and reallocating * (subsequent fault) should zero memory. */ -static void guest_test_punch_hole(uint64_t base_gpa, bool precise) +static void guest_test_punch_hole(u64 base_gpa, bool precise) { - const uint8_t init_p = 0xcc; + const u8 init_p = 0xcc; int i; /* @@ -239,8 +239,8 @@ static void guest_test_punch_hole(uint64_t base_gpa, bool precise) guest_map_private(base_gpa, PER_CPU_DATA_SIZE, false); for (i = 0; i < ARRAY_SIZE(test_ranges); i++) { - uint64_t gpa = base_gpa + test_ranges[i].offset; - uint64_t size = test_ranges[i].size; + gpa_t gpa = base_gpa + test_ranges[i].offset; + u64 size = test_ranges[i].size; /* * Free all memory before each iteration, even for the !precise @@ -268,7 +268,7 @@ static void guest_test_punch_hole(uint64_t base_gpa, bool precise) } } -static void guest_code(uint64_t base_gpa) +static void guest_code(u64 base_gpa) { /* * Run the conversion test twice, with and without doing fallocate() on @@ -289,8 +289,8 @@ static void guest_code(uint64_t base_gpa) static void handle_exit_hypercall(struct kvm_vcpu *vcpu) { struct kvm_run *run = vcpu->run; - uint64_t gpa = run->hypercall.args[0]; - uint64_t size = run->hypercall.args[1] * PAGE_SIZE; + gpa_t gpa = run->hypercall.args[0]; + u64 size = run->hypercall.args[1] * PAGE_SIZE; bool set_attributes = run->hypercall.args[2] & MAP_GPA_SET_ATTRIBUTES; bool map_shared = run->hypercall.args[2] & MAP_GPA_SHARED; bool do_fallocate = run->hypercall.args[2] & MAP_GPA_DO_FALLOCATE; @@ -337,7 +337,7 @@ static void *__test_mem_conversions(void *__vcpu) case UCALL_ABORT: REPORT_GUEST_ASSERT(uc); case UCALL_SYNC: { - uint64_t gpa = uc.args[1]; + gpa_t gpa = uc.args[1]; size_t size = uc.args[2]; size_t i; @@ -347,7 +347,7 @@ static void *__test_mem_conversions(void *__vcpu) for (i = 0; i < size; i += vm->page_size) { size_t nr_bytes = min_t(size_t, vm->page_size, size - i); - uint8_t *hva = addr_gpa2hva(vm, gpa + i); + u8 *hva = addr_gpa2hva(vm, gpa + i); /* In all cases, the host should observe the shared data. */ memcmp_h(hva, gpa + i, uc.args[3], nr_bytes); @@ -366,8 +366,8 @@ static void *__test_mem_conversions(void *__vcpu) } } -static void test_mem_conversions(enum vm_mem_backing_src_type src_type, uint32_t nr_vcpus, - uint32_t nr_memslots) +static void test_mem_conversions(enum vm_mem_backing_src_type src_type, u32 nr_vcpus, + u32 nr_memslots) { /* * Allocate enough memory so that each vCPU's chunk of memory can be @@ -380,7 +380,7 @@ static void test_mem_conversions(enum vm_mem_backing_src_type src_type, uint32_t struct kvm_vcpu *vcpus[KVM_MAX_VCPUS]; pthread_t threads[KVM_MAX_VCPUS]; struct kvm_vm *vm; - int memfd, i, r; + int memfd, i; const struct vm_shape shape = { .mode = VM_MODE_DEFAULT, @@ -402,7 +402,7 @@ static void test_mem_conversions(enum vm_mem_backing_src_type src_type, uint32_t KVM_MEM_GUEST_MEMFD, memfd, slot_size * i); for (i = 0; i < nr_vcpus; i++) { - uint64_t gpa = BASE_DATA_GPA + i * per_cpu_size; + gpa_t gpa = BASE_DATA_GPA + i * per_cpu_size; vcpu_args_set(vcpus[i], 1, gpa); @@ -428,11 +428,8 @@ static void test_mem_conversions(enum vm_mem_backing_src_type src_type, uint32_t * should prevent the VM from being fully destroyed until the last * reference to the guest_memfd is also put. */ - r = fallocate(memfd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0, memfd_size); - TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r)); - - r = fallocate(memfd, FALLOC_FL_KEEP_SIZE, 0, memfd_size); - TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r)); + kvm_fallocate(memfd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0, memfd_size); + kvm_fallocate(memfd, FALLOC_FL_KEEP_SIZE, 0, memfd_size); close(memfd); } @@ -453,8 +450,8 @@ static void usage(const char *cmd) int main(int argc, char *argv[]) { enum vm_mem_backing_src_type src_type = DEFAULT_VM_MEM_SRC; - uint32_t nr_memslots = 1; - uint32_t nr_vcpus = 1; + u32 nr_memslots = 1; + u32 nr_vcpus = 1; int opt; TEST_REQUIRE(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM)); diff --git a/tools/testing/selftests/kvm/x86/private_mem_kvm_exits_test.c b/tools/testing/selftests/kvm/x86/private_mem_kvm_exits_test.c index 13e72fcec8dd..10db9fe6d906 100644 --- a/tools/testing/selftests/kvm/x86/private_mem_kvm_exits_test.c +++ b/tools/testing/selftests/kvm/x86/private_mem_kvm_exits_test.c @@ -17,17 +17,17 @@ #define EXITS_TEST_SIZE (EXITS_TEST_NPAGES * PAGE_SIZE) #define EXITS_TEST_SLOT 10 -static uint64_t guest_repeatedly_read(void) +static u64 guest_repeatedly_read(void) { - volatile uint64_t value; + volatile u64 value; while (true) - value = *((uint64_t *) EXITS_TEST_GVA); + value = *((u64 *)EXITS_TEST_GVA); return value; } -static uint32_t run_vcpu_get_exit_reason(struct kvm_vcpu *vcpu) +static u32 run_vcpu_get_exit_reason(struct kvm_vcpu *vcpu) { int r; @@ -50,7 +50,7 @@ static void test_private_access_memslot_deleted(void) struct kvm_vcpu *vcpu; pthread_t vm_thread; void *thread_return; - uint32_t exit_reason; + u32 exit_reason; vm = vm_create_shape_with_one_vcpu(protected_vm_shape, &vcpu, guest_repeatedly_read); @@ -72,7 +72,7 @@ static void test_private_access_memslot_deleted(void) vm_mem_region_delete(vm, EXITS_TEST_SLOT); pthread_join(vm_thread, &thread_return); - exit_reason = (uint32_t)(uint64_t)thread_return; + exit_reason = (u32)(u64)thread_return; TEST_ASSERT_EQ(exit_reason, KVM_EXIT_MEMORY_FAULT); TEST_ASSERT_EQ(vcpu->run->memory_fault.flags, KVM_MEMORY_EXIT_FLAG_PRIVATE); @@ -86,7 +86,7 @@ static void test_private_access_memslot_not_private(void) { struct kvm_vm *vm; struct kvm_vcpu *vcpu; - uint32_t exit_reason; + u32 exit_reason; vm = vm_create_shape_with_one_vcpu(protected_vm_shape, &vcpu, guest_repeatedly_read); diff --git a/tools/testing/selftests/kvm/x86/set_boot_cpu_id.c b/tools/testing/selftests/kvm/x86/set_boot_cpu_id.c index 49913784bc82..8e3898646c69 100644 --- a/tools/testing/selftests/kvm/x86/set_boot_cpu_id.c +++ b/tools/testing/selftests/kvm/x86/set_boot_cpu_id.c @@ -86,11 +86,11 @@ static void run_vcpu(struct kvm_vcpu *vcpu) } } -static struct kvm_vm *create_vm(uint32_t nr_vcpus, uint32_t bsp_vcpu_id, +static struct kvm_vm *create_vm(u32 nr_vcpus, u32 bsp_vcpu_id, struct kvm_vcpu *vcpus[]) { struct kvm_vm *vm; - uint32_t i; + u32 i; vm = vm_create(nr_vcpus); @@ -104,7 +104,7 @@ static struct kvm_vm *create_vm(uint32_t nr_vcpus, uint32_t bsp_vcpu_id, return vm; } -static void run_vm_bsp(uint32_t bsp_vcpu_id) +static void run_vm_bsp(u32 bsp_vcpu_id) { struct kvm_vcpu *vcpus[2]; struct kvm_vm *vm; diff --git a/tools/testing/selftests/kvm/x86/set_sregs_test.c b/tools/testing/selftests/kvm/x86/set_sregs_test.c index f4095a3d1278..8e654cc9ab16 100644 --- a/tools/testing/selftests/kvm/x86/set_sregs_test.c +++ b/tools/testing/selftests/kvm/x86/set_sregs_test.c @@ -46,9 +46,9 @@ do { \ X86_CR4_MCE | X86_CR4_PGE | X86_CR4_PCE | \ X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT) -static uint64_t calc_supported_cr4_feature_bits(void) +static u64 calc_supported_cr4_feature_bits(void) { - uint64_t cr4 = KVM_ALWAYS_ALLOWED_CR4; + u64 cr4 = KVM_ALWAYS_ALLOWED_CR4; if (kvm_cpu_has(X86_FEATURE_UMIP)) cr4 |= X86_CR4_UMIP; @@ -74,7 +74,7 @@ static uint64_t calc_supported_cr4_feature_bits(void) return cr4; } -static void test_cr_bits(struct kvm_vcpu *vcpu, uint64_t cr4) +static void test_cr_bits(struct kvm_vcpu *vcpu, u64 cr4) { struct kvm_sregs sregs; int rc, i; diff --git a/tools/testing/selftests/kvm/x86/sev_init2_tests.c b/tools/testing/selftests/kvm/x86/sev_init2_tests.c index 3fb967f40c6a..8eeba2327c7c 100644 --- a/tools/testing/selftests/kvm/x86/sev_init2_tests.c +++ b/tools/testing/selftests/kvm/x86/sev_init2_tests.c @@ -28,12 +28,13 @@ int kvm_fd; u64 supported_vmsa_features; bool have_sev_es; +bool have_snp; static int __sev_ioctl(int vm_fd, int cmd_id, void *data) { struct kvm_sev_cmd cmd = { .id = cmd_id, - .data = (uint64_t)data, + .data = (u64)data, .sev_fd = open_sev_dev_path_or_exit(), }; int ret; @@ -83,6 +84,9 @@ void test_vm_types(void) if (have_sev_es) test_init2(KVM_X86_SEV_ES_VM, &(struct kvm_sev_init){}); + if (have_snp) + test_init2(KVM_X86_SNP_VM, &(struct kvm_sev_init){}); + test_init2_invalid(0, &(struct kvm_sev_init){}, "VM type is KVM_X86_DEFAULT_VM"); if (kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM)) @@ -90,7 +94,7 @@ void test_vm_types(void) "VM type is KVM_X86_SW_PROTECTED_VM"); } -void test_flags(uint32_t vm_type) +void test_flags(u32 vm_type) { int i; @@ -100,7 +104,7 @@ void test_flags(uint32_t vm_type) "invalid flag"); } -void test_features(uint32_t vm_type, uint64_t supported_features) +void test_features(u32 vm_type, u64 supported_features) { int i; @@ -138,15 +142,24 @@ int main(int argc, char *argv[]) "sev-es: KVM_CAP_VM_TYPES (%x) does not match cpuid (checking %x)", kvm_check_cap(KVM_CAP_VM_TYPES), 1 << KVM_X86_SEV_ES_VM); + have_snp = kvm_cpu_has(X86_FEATURE_SEV_SNP); + TEST_ASSERT(have_snp == !!(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SNP_VM)), + "sev-snp: KVM_CAP_VM_TYPES (%x) indicates SNP support (bit %d), but CPUID does not", + kvm_check_cap(KVM_CAP_VM_TYPES), KVM_X86_SNP_VM); + test_vm_types(); test_flags(KVM_X86_SEV_VM); if (have_sev_es) test_flags(KVM_X86_SEV_ES_VM); + if (have_snp) + test_flags(KVM_X86_SNP_VM); test_features(KVM_X86_SEV_VM, 0); if (have_sev_es) test_features(KVM_X86_SEV_ES_VM, supported_vmsa_features); + if (have_snp) + test_features(KVM_X86_SNP_VM, supported_vmsa_features); return 0; } diff --git a/tools/testing/selftests/kvm/x86/sev_migrate_tests.c b/tools/testing/selftests/kvm/x86/sev_migrate_tests.c index 0a6dfba3905b..6b0928e69051 100644 --- a/tools/testing/selftests/kvm/x86/sev_migrate_tests.c +++ b/tools/testing/selftests/kvm/x86/sev_migrate_tests.c @@ -36,8 +36,6 @@ static struct kvm_vm *sev_vm_create(bool es) sev_vm_launch(vm, es ? SEV_POLICY_ES : 0); - if (es) - vm_sev_ioctl(vm, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL); return vm; } diff --git a/tools/testing/selftests/kvm/x86/sev_smoke_test.c b/tools/testing/selftests/kvm/x86/sev_smoke_test.c index d97816dc476a..1a49ee391586 100644 --- a/tools/testing/selftests/kvm/x86/sev_smoke_test.c +++ b/tools/testing/selftests/kvm/x86/sev_smoke_test.c @@ -13,21 +13,61 @@ #include "linux/psp-sev.h" #include "sev.h" +static void guest_sev_test_msr(u32 msr) +{ + u64 val = rdmsr(msr); + + wrmsr(msr, val); + GUEST_ASSERT(val == rdmsr(msr)); +} + +#define guest_sev_test_reg(reg) \ +do { \ + u64 val = get_##reg(); \ + \ + set_##reg(val); \ + GUEST_ASSERT(val == get_##reg()); \ +} while (0) + +static void guest_sev_test_regs(void) +{ + guest_sev_test_msr(MSR_EFER); + guest_sev_test_reg(cr0); + guest_sev_test_reg(cr3); + guest_sev_test_reg(cr4); + guest_sev_test_reg(cr8); +} #define XFEATURE_MASK_X87_AVX (XFEATURE_MASK_FP | XFEATURE_MASK_SSE | XFEATURE_MASK_YMM) +static void guest_snp_code(void) +{ + u64 sev_msr = rdmsr(MSR_AMD64_SEV); + + GUEST_ASSERT(sev_msr & MSR_AMD64_SEV_ENABLED); + GUEST_ASSERT(sev_msr & MSR_AMD64_SEV_ES_ENABLED); + GUEST_ASSERT(sev_msr & MSR_AMD64_SEV_SNP_ENABLED); + + guest_sev_test_regs(); + + wrmsr(MSR_AMD64_SEV_ES_GHCB, GHCB_MSR_TERM_REQ); + vmgexit(); +} + static void guest_sev_es_code(void) { /* TODO: Check CPUID after GHCB-based hypercall support is added. */ GUEST_ASSERT(rdmsr(MSR_AMD64_SEV) & MSR_AMD64_SEV_ENABLED); GUEST_ASSERT(rdmsr(MSR_AMD64_SEV) & MSR_AMD64_SEV_ES_ENABLED); + guest_sev_test_regs(); + /* * TODO: Add GHCB and ucall support for SEV-ES guests. For now, simply * force "termination" to signal "done" via the GHCB MSR protocol. */ wrmsr(MSR_AMD64_SEV_ES_GHCB, GHCB_MSR_TERM_REQ); - __asm__ __volatile__("rep; vmmcall"); + vmgexit(); } static void guest_sev_code(void) @@ -35,6 +75,8 @@ static void guest_sev_code(void) GUEST_ASSERT(this_cpu_has(X86_FEATURE_SEV)); GUEST_ASSERT(rdmsr(MSR_AMD64_SEV) & MSR_AMD64_SEV_ENABLED); + guest_sev_test_regs(); + GUEST_DONE(); } @@ -62,19 +104,19 @@ static void compare_xsave(u8 *from_host, u8 *from_guest) abort(); } -static void test_sync_vmsa(uint32_t policy) +static void test_sync_vmsa(u32 type, u64 policy) { struct kvm_vcpu *vcpu; struct kvm_vm *vm; - vm_vaddr_t gva; + gva_t gva; void *hva; double x87val = M_PI; struct kvm_xsave __attribute__((aligned(64))) xsave = { 0 }; - vm = vm_sev_create_with_one_vcpu(KVM_X86_SEV_ES_VM, guest_code_xsave, &vcpu); - gva = vm_vaddr_alloc_shared(vm, PAGE_SIZE, KVM_UTIL_MIN_VADDR, - MEM_REGION_TEST_DATA); + vm = vm_sev_create_with_one_vcpu(type, guest_code_xsave, &vcpu); + gva = vm_alloc_shared(vm, PAGE_SIZE, KVM_UTIL_MIN_VADDR, + MEM_REGION_TEST_DATA); hva = addr_gva2hva(vm, gva); vcpu_args_set(vcpu, 1, gva); @@ -89,10 +131,10 @@ static void test_sync_vmsa(uint32_t policy) : "ymm4", "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)"); vcpu_xsave_set(vcpu, &xsave); - vm_sev_launch(vm, SEV_POLICY_ES | policy, NULL); + vm_sev_launch(vm, policy, NULL); /* This page is shared, so make it decrypted. */ - memset(hva, 0, 4096); + memset(hva, 0, PAGE_SIZE); vcpu_run(vcpu); @@ -108,14 +150,12 @@ static void test_sync_vmsa(uint32_t policy) kvm_vm_free(vm); } -static void test_sev(void *guest_code, uint64_t policy) +static void test_sev(void *guest_code, u32 type, u64 policy) { struct kvm_vcpu *vcpu; struct kvm_vm *vm; struct ucall uc; - uint32_t type = policy & SEV_POLICY_ES ? KVM_X86_SEV_ES_VM : KVM_X86_SEV_VM; - vm = vm_sev_create_with_one_vcpu(type, guest_code, &vcpu); /* TODO: Validate the measurement is as expected. */ @@ -124,7 +164,7 @@ static void test_sev(void *guest_code, uint64_t policy) for (;;) { vcpu_run(vcpu); - if (policy & SEV_POLICY_ES) { + if (is_sev_es_vm(vm)) { TEST_ASSERT(vcpu->run->exit_reason == KVM_EXIT_SYSTEM_EVENT, "Wanted SYSTEM_EVENT, got %s", exit_reason_str(vcpu->run->exit_reason)); @@ -161,16 +201,14 @@ static void guest_shutdown_code(void) __asm__ __volatile__("ud2"); } -static void test_sev_es_shutdown(void) +static void test_sev_shutdown(u32 type, u64 policy) { struct kvm_vcpu *vcpu; struct kvm_vm *vm; - uint32_t type = KVM_X86_SEV_ES_VM; - vm = vm_sev_create_with_one_vcpu(type, guest_shutdown_code, &vcpu); - vm_sev_launch(vm, SEV_POLICY_ES, NULL); + vm_sev_launch(vm, policy, NULL); vcpu_run(vcpu); TEST_ASSERT(vcpu->run->exit_reason == KVM_EXIT_SHUTDOWN, @@ -180,27 +218,42 @@ static void test_sev_es_shutdown(void) kvm_vm_free(vm); } -int main(int argc, char *argv[]) +static void test_sev_smoke(void *guest, u32 type, u64 policy) { const u64 xf_mask = XFEATURE_MASK_X87_AVX; - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SEV)); - - test_sev(guest_sev_code, SEV_POLICY_NO_DBG); - test_sev(guest_sev_code, 0); + if (type == KVM_X86_SNP_VM) + test_sev(guest, type, policy | SNP_POLICY_DBG); + else + test_sev(guest, type, policy | SEV_POLICY_NO_DBG); + test_sev(guest, type, policy); - if (kvm_cpu_has(X86_FEATURE_SEV_ES)) { - test_sev(guest_sev_es_code, SEV_POLICY_ES | SEV_POLICY_NO_DBG); - test_sev(guest_sev_es_code, SEV_POLICY_ES); + if (type == KVM_X86_SEV_VM) + return; - test_sev_es_shutdown(); + test_sev_shutdown(type, policy); - if (kvm_has_cap(KVM_CAP_XCRS) && - (xgetbv(0) & kvm_cpu_supported_xcr0() & xf_mask) == xf_mask) { - test_sync_vmsa(0); - test_sync_vmsa(SEV_POLICY_NO_DBG); - } + if (kvm_has_cap(KVM_CAP_XCRS) && + (xgetbv(0) & kvm_cpu_supported_xcr0() & xf_mask) == xf_mask) { + test_sync_vmsa(type, policy); + if (type == KVM_X86_SNP_VM) + test_sync_vmsa(type, policy | SNP_POLICY_DBG); + else + test_sync_vmsa(type, policy | SEV_POLICY_NO_DBG); } +} + +int main(int argc, char *argv[]) +{ + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SEV)); + + test_sev_smoke(guest_sev_code, KVM_X86_SEV_VM, 0); + + if (kvm_cpu_has(X86_FEATURE_SEV_ES)) + test_sev_smoke(guest_sev_es_code, KVM_X86_SEV_ES_VM, SEV_POLICY_ES); + + if (kvm_cpu_has(X86_FEATURE_SEV_SNP)) + test_sev_smoke(guest_snp_code, KVM_X86_SNP_VM, snp_default_policy()); return 0; } diff --git a/tools/testing/selftests/kvm/x86/smaller_maxphyaddr_emulation_test.c b/tools/testing/selftests/kvm/x86/smaller_maxphyaddr_emulation_test.c index fabeeaddfb3a..3dca85e95478 100644 --- a/tools/testing/selftests/kvm/x86/smaller_maxphyaddr_emulation_test.c +++ b/tools/testing/selftests/kvm/x86/smaller_maxphyaddr_emulation_test.c @@ -20,8 +20,8 @@ static void guest_code(bool tdp_enabled) { - uint64_t error_code; - uint64_t vector; + u64 error_code; + u64 vector; vector = kvm_asm_safe_ec(FLDS_MEM_EAX, error_code, "a"(MEM_REGION_GVA)); @@ -47,9 +47,8 @@ int main(int argc, char *argv[]) struct kvm_vcpu *vcpu; struct kvm_vm *vm; struct ucall uc; - uint64_t *pte; - uint64_t *hva; - uint64_t gpa; + u64 *hva; + gpa_t gpa; int rc; TEST_REQUIRE(kvm_has_cap(KVM_CAP_SMALLER_MAXPHYADDR)); @@ -73,8 +72,7 @@ int main(int argc, char *argv[]) hva = addr_gpa2hva(vm, MEM_REGION_GPA); memset(hva, 0, PAGE_SIZE); - pte = vm_get_page_table_entry(vm, MEM_REGION_GVA); - *pte |= BIT_ULL(MAXPHYADDR); + *vm_get_pte(vm, MEM_REGION_GVA) |= BIT_ULL(MAXPHYADDR); vcpu_run(vcpu); diff --git a/tools/testing/selftests/kvm/x86/smm_test.c b/tools/testing/selftests/kvm/x86/smm_test.c index 55c88d664a94..740051167dbd 100644 --- a/tools/testing/selftests/kvm/x86/smm_test.c +++ b/tools/testing/selftests/kvm/x86/smm_test.c @@ -14,13 +14,11 @@ #include "test_util.h" #include "kvm_util.h" +#include "smm.h" #include "vmx.h" #include "svm_util.h" -#define SMRAM_SIZE 65536 -#define SMRAM_MEMSLOT ((1 << 16) | 1) -#define SMRAM_PAGES (SMRAM_SIZE / PAGE_SIZE) #define SMRAM_GPA 0x1000000 #define SMRAM_STAGE 0xfe @@ -36,13 +34,13 @@ * independent subset of asm here. * SMI handler always report back fixed stage SMRAM_STAGE. */ -uint8_t smi_handler[] = { +u8 smi_handler[] = { 0xb0, SMRAM_STAGE, /* mov $SMRAM_STAGE, %al */ 0xe4, SYNC_PORT, /* in $SYNC_PORT, %al */ 0x0f, 0xaa, /* rsm */ }; -static inline void sync_with_host(uint64_t phase) +static inline void sync_with_host(u64 phase) { asm volatile("in $" XSTR(SYNC_PORT)", %%al \n" : "+a" (phase)); @@ -67,7 +65,7 @@ static void guest_code(void *arg) { #define L2_GUEST_STACK_SIZE 64 unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; - uint64_t apicbase = rdmsr(MSR_IA32_APICBASE); + u64 apicbase = rdmsr(MSR_IA32_APICBASE); struct svm_test_data *svm = arg; struct vmx_pages *vmx_pages = arg; @@ -113,21 +111,9 @@ static void guest_code(void *arg) sync_with_host(DONE); } -void inject_smi(struct kvm_vcpu *vcpu) -{ - struct kvm_vcpu_events events; - - vcpu_events_get(vcpu, &events); - - events.smi.pending = 1; - events.flags |= KVM_VCPUEVENT_VALID_SMM; - - vcpu_events_set(vcpu, &events); -} - int main(int argc, char *argv[]) { - vm_vaddr_t nested_gva = 0; + gva_t nested_gva = 0; struct kvm_vcpu *vcpu; struct kvm_regs regs; @@ -140,16 +126,7 @@ int main(int argc, char *argv[]) /* Create VM */ vm = vm_create_with_one_vcpu(&vcpu, guest_code); - vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, SMRAM_GPA, - SMRAM_MEMSLOT, SMRAM_PAGES, 0); - TEST_ASSERT(vm_phy_pages_alloc(vm, SMRAM_PAGES, SMRAM_GPA, SMRAM_MEMSLOT) - == SMRAM_GPA, "could not allocate guest physical addresses?"); - - memset(addr_gpa2hva(vm, SMRAM_GPA), 0x0, SMRAM_SIZE); - memcpy(addr_gpa2hva(vm, SMRAM_GPA) + 0x8000, smi_handler, - sizeof(smi_handler)); - - vcpu_set_msr(vcpu, MSR_IA32_SMBASE, SMRAM_GPA); + setup_smram(vm, vcpu, SMRAM_GPA, smi_handler, sizeof(smi_handler)); if (kvm_has_cap(KVM_CAP_NESTED_STATE)) { if (kvm_cpu_has(X86_FEATURE_SVM)) diff --git a/tools/testing/selftests/kvm/x86/state_test.c b/tools/testing/selftests/kvm/x86/state_test.c index 141b7fc0c965..409c6cc9f921 100644 --- a/tools/testing/selftests/kvm/x86/state_test.c +++ b/tools/testing/selftests/kvm/x86/state_test.c @@ -26,7 +26,9 @@ void svm_l2_guest_code(void) GUEST_SYNC(4); /* Exit to L1 */ vmcall(); + clgi(); GUEST_SYNC(6); + stgi(); /* Done, exit to L1 and never come back. */ vmcall(); } @@ -41,6 +43,8 @@ static void svm_l1_guest_code(struct svm_test_data *svm) generic_svm_setup(svm, svm_l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]); + vmcb->control.int_ctl |= (V_GIF_ENABLE_MASK | V_GIF_MASK); + GUEST_SYNC(3); run_guest(vmcb, svm->vmcb_gpa); GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL); @@ -140,8 +144,8 @@ static void __attribute__((__flatten__)) guest_code(void *arg) GUEST_SYNC(1); if (this_cpu_has(X86_FEATURE_XSAVE)) { - uint64_t supported_xcr0 = this_cpu_supported_xcr0(); - uint8_t buffer[4096]; + u64 supported_xcr0 = this_cpu_supported_xcr0(); + u8 buffer[PAGE_SIZE]; memset(buffer, 0xcc, sizeof(buffer)); @@ -168,8 +172,8 @@ static void __attribute__((__flatten__)) guest_code(void *arg) } if (this_cpu_has(X86_FEATURE_MPX)) { - uint64_t bounds[2] = { 10, 0xffffffffull }; - uint64_t output[2] = { }; + u64 bounds[2] = { 10, 0xffffffffull }; + u64 output[2] = { }; GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_BNDREGS); GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_BNDCSR); @@ -222,10 +226,39 @@ static void __attribute__((__flatten__)) guest_code(void *arg) GUEST_DONE(); } +void svm_check_nested_state(int stage, struct kvm_x86_state *state) +{ + struct vmcb *vmcb = (struct vmcb *)state->nested.data.svm; + + if (kvm_cpu_has(X86_FEATURE_VGIF)) { + if (stage == 4) + TEST_ASSERT_EQ(!!(vmcb->control.int_ctl & V_GIF_MASK), 1); + if (stage == 6) + TEST_ASSERT_EQ(!!(vmcb->control.int_ctl & V_GIF_MASK), 0); + } + + if (kvm_cpu_has(X86_FEATURE_NRIPS)) { + /* + * GUEST_SYNC() causes IO emulation in KVM, in which case the + * RIP is advanced before exiting to userspace. Hence, the RIP + * in the saved state should be the same as nRIP saved by the + * CPU in the VMCB. + */ + if (stage == 6) + TEST_ASSERT_EQ(vmcb->control.next_rip, state->regs.rip); + } +} + +void check_nested_state(int stage, struct kvm_x86_state *state) +{ + if (kvm_has_cap(KVM_CAP_NESTED_STATE) && kvm_cpu_has(X86_FEATURE_SVM)) + svm_check_nested_state(stage, state); +} + int main(int argc, char *argv[]) { - uint64_t *xstate_bv, saved_xstate_bv; - vm_vaddr_t nested_gva = 0; + u64 *xstate_bv, saved_xstate_bv; + gva_t nested_gva = 0; struct kvm_cpuid2 empty_cpuid = {}; struct kvm_regs regs1, regs2; struct kvm_vcpu *vcpu, *vcpuN; @@ -278,6 +311,8 @@ int main(int argc, char *argv[]) kvm_vm_release(vm); + check_nested_state(stage, state); + /* Restore state in a new VM. */ vcpu = vm_recreate_with_one_vcpu(vm); vcpu_load_state(vcpu, state); @@ -296,7 +331,7 @@ int main(int argc, char *argv[]) * supported features, even if something goes awry in saving * the original snapshot. */ - xstate_bv = (void *)&((uint8_t *)state->xsave->region)[512]; + xstate_bv = (void *)&((u8 *)state->xsave->region)[512]; saved_xstate_bv = *xstate_bv; vcpuN = __vm_vcpu_add(vm, vcpu->id + 1); diff --git a/tools/testing/selftests/kvm/x86/svm_int_ctl_test.c b/tools/testing/selftests/kvm/x86/svm_int_ctl_test.c index 916e04248fbb..d3cc5e4f7883 100644 --- a/tools/testing/selftests/kvm/x86/svm_int_ctl_test.c +++ b/tools/testing/selftests/kvm/x86/svm_int_ctl_test.c @@ -42,10 +42,7 @@ static void l2_guest_code(struct svm_test_data *svm) x2apic_write_reg(APIC_ICR, APIC_DEST_SELF | APIC_INT_ASSERT | INTR_IRQ_NUMBER); - __asm__ __volatile__( - "sti\n" - "nop\n" - ); + sti_nop(); GUEST_ASSERT(vintr_irq_called); GUEST_ASSERT(intr_irq_called); @@ -85,7 +82,7 @@ static void l1_guest_code(struct svm_test_data *svm) int main(int argc, char *argv[]) { struct kvm_vcpu *vcpu; - vm_vaddr_t svm_gva; + gva_t svm_gva; struct kvm_vm *vm; struct ucall uc; diff --git a/tools/testing/selftests/kvm/x86/svm_lbr_nested_state.c b/tools/testing/selftests/kvm/x86/svm_lbr_nested_state.c new file mode 100644 index 000000000000..7fbfaa054c95 --- /dev/null +++ b/tools/testing/selftests/kvm/x86/svm_lbr_nested_state.c @@ -0,0 +1,145 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2026, Google, Inc. + */ + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" +#include "svm_util.h" + + +#define L2_GUEST_STACK_SIZE 64 + +#define DO_BRANCH() do { asm volatile("jmp 1f\n 1: nop"); } while (0) + +struct lbr_branch { + u64 from, to; +}; + +volatile struct lbr_branch l2_branch; + +#define RECORD_AND_CHECK_BRANCH(b) \ +do { \ + wrmsr(MSR_IA32_DEBUGCTLMSR, DEBUGCTLMSR_LBR); \ + DO_BRANCH(); \ + (b)->from = rdmsr(MSR_IA32_LASTBRANCHFROMIP); \ + (b)->to = rdmsr(MSR_IA32_LASTBRANCHTOIP); \ + /* Disable LBR right after to avoid overriding the IPs */ \ + wrmsr(MSR_IA32_DEBUGCTLMSR, 0); \ + \ + GUEST_ASSERT_NE((b)->from, 0); \ + GUEST_ASSERT_NE((b)->to, 0); \ +} while (0) + +#define CHECK_BRANCH_MSRS(b) \ +do { \ + GUEST_ASSERT_EQ((b)->from, rdmsr(MSR_IA32_LASTBRANCHFROMIP)); \ + GUEST_ASSERT_EQ((b)->to, rdmsr(MSR_IA32_LASTBRANCHTOIP)); \ +} while (0) + +#define CHECK_BRANCH_VMCB(b, vmcb) \ +do { \ + GUEST_ASSERT_EQ((b)->from, vmcb->save.br_from); \ + GUEST_ASSERT_EQ((b)->to, vmcb->save.br_to); \ +} while (0) + +static void l2_guest_code(struct svm_test_data *svm) +{ + /* Record a branch, trigger save/restore, and make sure LBRs are intact */ + RECORD_AND_CHECK_BRANCH(&l2_branch); + GUEST_SYNC(true); + CHECK_BRANCH_MSRS(&l2_branch); + vmmcall(); +} + +static void l1_guest_code(struct svm_test_data *svm, bool nested_lbrv) +{ + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + struct vmcb *vmcb = svm->vmcb; + struct lbr_branch l1_branch; + + /* Record a branch, trigger save/restore, and make sure LBRs are intact */ + RECORD_AND_CHECK_BRANCH(&l1_branch); + GUEST_SYNC(true); + CHECK_BRANCH_MSRS(&l1_branch); + + /* Run L2, which will also do the same */ + generic_svm_setup(svm, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + if (nested_lbrv) + vmcb->control.misc_ctl2 = SVM_MISC2_ENABLE_V_LBR; + else + vmcb->control.misc_ctl2 &= ~SVM_MISC2_ENABLE_V_LBR; + + run_guest(vmcb, svm->vmcb_gpa); + GUEST_ASSERT(svm->vmcb->control.exit_code == SVM_EXIT_VMMCALL); + + /* Trigger save/restore one more time before checking, just for kicks */ + GUEST_SYNC(true); + + /* + * If LBR_CTL_ENABLE is set, L1 and L2 should have separate LBR MSRs, so + * expect L1's LBRs to remain intact and L2 LBRs to be in the VMCB. + * Otherwise, the MSRs are shared between L1 & L2 so expect L2's LBRs. + */ + if (nested_lbrv) { + CHECK_BRANCH_MSRS(&l1_branch); + CHECK_BRANCH_VMCB(&l2_branch, vmcb); + } else { + CHECK_BRANCH_MSRS(&l2_branch); + } + GUEST_DONE(); +} + +void test_lbrv_nested_state(bool nested_lbrv) +{ + struct kvm_x86_state *state = NULL; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + struct ucall uc; + gva_t svm_gva; + + pr_info("Testing with nested LBRV %s\n", nested_lbrv ? "enabled" : "disabled"); + + vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); + vcpu_alloc_svm(vm, &svm_gva); + vcpu_args_set(vcpu, 2, svm_gva, nested_lbrv); + + for (;;) { + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + switch (get_ucall(vcpu, &uc)) { + case UCALL_SYNC: + /* Save the vCPU state and restore it in a new VM on sync */ + pr_info("Guest triggered save/restore.\n"); + state = vcpu_save_state(vcpu); + kvm_vm_release(vm); + vcpu = vm_recreate_with_one_vcpu(vm); + vcpu_load_state(vcpu, state); + kvm_x86_state_cleanup(state); + break; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + /* NOT REACHED */ + case UCALL_DONE: + goto done; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + } +done: + kvm_vm_free(vm); +} + +int main(int argc, char *argv[]) +{ + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM)); + TEST_REQUIRE(kvm_is_lbrv_enabled()); + + test_lbrv_nested_state(/*nested_lbrv=*/false); + test_lbrv_nested_state(/*nested_lbrv=*/true); + + return 0; +} diff --git a/tools/testing/selftests/kvm/x86/svm_nested_clear_efer_svme.c b/tools/testing/selftests/kvm/x86/svm_nested_clear_efer_svme.c new file mode 100644 index 000000000000..6a89eaffc657 --- /dev/null +++ b/tools/testing/selftests/kvm/x86/svm_nested_clear_efer_svme.c @@ -0,0 +1,55 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2026, Google LLC. + */ +#include "kvm_util.h" +#include "vmx.h" +#include "svm_util.h" +#include "kselftest.h" + + +#define L2_GUEST_STACK_SIZE 64 + +static void l2_guest_code(void) +{ + unsigned long efer = rdmsr(MSR_EFER); + + /* generic_svm_setup() initializes EFER_SVME set for L2 */ + GUEST_ASSERT(efer & EFER_SVME); + wrmsr(MSR_EFER, efer & ~EFER_SVME); + + /* Unreachable, L1 should be shutdown */ + GUEST_ASSERT(0); +} + +static void l1_guest_code(struct svm_test_data *svm) +{ + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + + generic_svm_setup(svm, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + run_guest(svm->vmcb, svm->vmcb_gpa); + + /* Unreachable, L1 should be shutdown */ + GUEST_ASSERT(0); +} + +int main(int argc, char *argv[]) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + gva_t nested_gva = 0; + + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM)); + + vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); + + vcpu_alloc_svm(vm, &nested_gva); + vcpu_args_set(vcpu, 1, nested_gva); + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_SHUTDOWN); + + kvm_vm_free(vm); + return 0; +} diff --git a/tools/testing/selftests/kvm/x86/svm_nested_shutdown_test.c b/tools/testing/selftests/kvm/x86/svm_nested_shutdown_test.c index 00135cbba35e..c6ea3d609a62 100644 --- a/tools/testing/selftests/kvm/x86/svm_nested_shutdown_test.c +++ b/tools/testing/selftests/kvm/x86/svm_nested_shutdown_test.c @@ -42,7 +42,7 @@ static void l1_guest_code(struct svm_test_data *svm, struct idt_entry *idt) int main(int argc, char *argv[]) { struct kvm_vcpu *vcpu; - vm_vaddr_t svm_gva; + gva_t svm_gva; struct kvm_vm *vm; TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM)); diff --git a/tools/testing/selftests/kvm/x86/svm_nested_soft_inject_test.c b/tools/testing/selftests/kvm/x86/svm_nested_soft_inject_test.c index 7b6481d6c0d3..f72f11d4c4f8 100644 --- a/tools/testing/selftests/kvm/x86/svm_nested_soft_inject_test.c +++ b/tools/testing/selftests/kvm/x86/svm_nested_soft_inject_test.c @@ -76,7 +76,7 @@ static void l2_guest_code_nmi(void) ud2(); } -static void l1_guest_code(struct svm_test_data *svm, uint64_t is_nmi, uint64_t idt_alt) +static void l1_guest_code(struct svm_test_data *svm, u64 is_nmi, u64 idt_alt) { #define L2_GUEST_STACK_SIZE 64 unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; @@ -103,7 +103,7 @@ static void l1_guest_code(struct svm_test_data *svm, uint64_t is_nmi, uint64_t i run_guest(vmcb, svm->vmcb_gpa); __GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL, - "Expected VMMCAL #VMEXIT, got '0x%x', info1 = '0x%lx, info2 = '0x%lx'", + "Expected VMMCAL #VMEXIT, got '0x%lx', info1 = '0x%lx, info2 = '0x%lx'", vmcb->control.exit_code, vmcb->control.exit_info_1, vmcb->control.exit_info_2); @@ -133,7 +133,7 @@ static void l1_guest_code(struct svm_test_data *svm, uint64_t is_nmi, uint64_t i run_guest(vmcb, svm->vmcb_gpa); __GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_HLT, - "Expected HLT #VMEXIT, got '0x%x', info1 = '0x%lx, info2 = '0x%lx'", + "Expected HLT #VMEXIT, got '0x%lx', info1 = '0x%lx, info2 = '0x%lx'", vmcb->control.exit_code, vmcb->control.exit_info_1, vmcb->control.exit_info_2); @@ -144,8 +144,8 @@ static void run_test(bool is_nmi) { struct kvm_vcpu *vcpu; struct kvm_vm *vm; - vm_vaddr_t svm_gva; - vm_vaddr_t idt_alt_vm; + gva_t svm_gva; + gva_t idt_alt_vm; struct kvm_guest_debug debug; pr_info("Running %s test\n", is_nmi ? "NMI" : "soft int"); @@ -161,14 +161,14 @@ static void run_test(bool is_nmi) if (!is_nmi) { void *idt, *idt_alt; - idt_alt_vm = vm_vaddr_alloc_page(vm); + idt_alt_vm = vm_alloc_page(vm); idt_alt = addr_gva2hva(vm, idt_alt_vm); idt = addr_gva2hva(vm, vm->arch.idt); memcpy(idt_alt, idt, getpagesize()); } else { idt_alt_vm = 0; } - vcpu_args_set(vcpu, 3, svm_gva, (uint64_t)is_nmi, (uint64_t)idt_alt_vm); + vcpu_args_set(vcpu, 3, svm_gva, (u64)is_nmi, (u64)idt_alt_vm); memset(&debug, 0, sizeof(debug)); vcpu_guest_debug_set(vcpu, &debug); diff --git a/tools/testing/selftests/kvm/x86/svm_nested_vmcb12_gpa.c b/tools/testing/selftests/kvm/x86/svm_nested_vmcb12_gpa.c new file mode 100644 index 000000000000..a4935ce2fb99 --- /dev/null +++ b/tools/testing/selftests/kvm/x86/svm_nested_vmcb12_gpa.c @@ -0,0 +1,176 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2026, Google LLC. + */ +#include "kvm_util.h" +#include "vmx.h" +#include "svm_util.h" +#include "kselftest.h" +#include "kvm_test_harness.h" +#include "test_util.h" + + +#define L2_GUEST_STACK_SIZE 64 + +#define SYNC_GP 101 +#define SYNC_L2_STARTED 102 + +static unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + +static void guest_gp_handler(struct ex_regs *regs) +{ + GUEST_SYNC(SYNC_GP); +} + +static void l2_code(void) +{ + GUEST_SYNC(SYNC_L2_STARTED); + vmcall(); +} + +static void l1_vmrun(struct svm_test_data *svm, gpa_t gpa) +{ + generic_svm_setup(svm, l2_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + asm volatile ("vmrun %[gpa]" : : [gpa] "a" (gpa) : "memory"); +} + +static void l1_vmload(struct svm_test_data *svm, gpa_t gpa) +{ + generic_svm_setup(svm, l2_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + asm volatile ("vmload %[gpa]" : : [gpa] "a" (gpa) : "memory"); +} + +static void l1_vmsave(struct svm_test_data *svm, gpa_t gpa) +{ + generic_svm_setup(svm, l2_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + asm volatile ("vmsave %[gpa]" : : [gpa] "a" (gpa) : "memory"); +} + +static void l1_vmexit(struct svm_test_data *svm, gpa_t gpa) +{ + generic_svm_setup(svm, l2_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + run_guest(svm->vmcb, svm->vmcb_gpa); + GUEST_ASSERT(svm->vmcb->control.exit_code == SVM_EXIT_VMMCALL); + GUEST_DONE(); +} + +static u64 unmappable_gpa(struct kvm_vcpu *vcpu) +{ + struct userspace_mem_region *region; + u64 region_gpa_end, vm_gpa_end = 0; + int i; + + hash_for_each(vcpu->vm->regions.slot_hash, i, region, slot_node) { + region_gpa_end = region->region.guest_phys_addr + region->region.memory_size; + vm_gpa_end = max(vm_gpa_end, region_gpa_end); + } + + return vm_gpa_end; +} + +static void test_invalid_vmcb12(struct kvm_vcpu *vcpu) +{ + gva_t nested_gva = 0; + struct ucall uc; + + + vm_install_exception_handler(vcpu->vm, GP_VECTOR, guest_gp_handler); + vcpu_alloc_svm(vcpu->vm, &nested_gva); + vcpu_args_set(vcpu, 2, nested_gva, -1ULL); + vcpu_run(vcpu); + + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + TEST_ASSERT_EQ(get_ucall(vcpu, &uc), UCALL_SYNC); + TEST_ASSERT_EQ(uc.args[1], SYNC_GP); +} + +static void test_unmappable_vmcb12(struct kvm_vcpu *vcpu) +{ + gva_t nested_gva = 0; + + vcpu_alloc_svm(vcpu->vm, &nested_gva); + vcpu_args_set(vcpu, 2, nested_gva, unmappable_gpa(vcpu)); + vcpu_run(vcpu); + + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_INTERNAL_ERROR); + TEST_ASSERT_EQ(vcpu->run->emulation_failure.suberror, KVM_INTERNAL_ERROR_EMULATION); +} + +static void test_unmappable_vmcb12_vmexit(struct kvm_vcpu *vcpu) +{ + struct kvm_x86_state *state; + gva_t nested_gva = 0; + struct ucall uc; + + /* + * Enter L2 (with a legit vmcb12 GPA), then overwrite vmcb12 GPA with an + * unmappable GPA. KVM will fail to map vmcb12 on nested VM-Exit and + * cause a shutdown. + */ + vcpu_alloc_svm(vcpu->vm, &nested_gva); + vcpu_args_set(vcpu, 2, nested_gva, unmappable_gpa(vcpu)); + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + TEST_ASSERT_EQ(get_ucall(vcpu, &uc), UCALL_SYNC); + TEST_ASSERT_EQ(uc.args[1], SYNC_L2_STARTED); + + state = vcpu_save_state(vcpu); + state->nested.hdr.svm.vmcb_pa = unmappable_gpa(vcpu); + vcpu_load_state(vcpu, state); + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_SHUTDOWN); + + kvm_x86_state_cleanup(state); +} + +KVM_ONE_VCPU_TEST_SUITE(vmcb12_gpa); + +KVM_ONE_VCPU_TEST(vmcb12_gpa, vmrun_invalid, l1_vmrun) +{ + test_invalid_vmcb12(vcpu); +} + +KVM_ONE_VCPU_TEST(vmcb12_gpa, vmload_invalid, l1_vmload) +{ + test_invalid_vmcb12(vcpu); +} + +KVM_ONE_VCPU_TEST(vmcb12_gpa, vmsave_invalid, l1_vmsave) +{ + test_invalid_vmcb12(vcpu); +} + +KVM_ONE_VCPU_TEST(vmcb12_gpa, vmrun_unmappable, l1_vmrun) +{ + test_unmappable_vmcb12(vcpu); +} + +KVM_ONE_VCPU_TEST(vmcb12_gpa, vmload_unmappable, l1_vmload) +{ + test_unmappable_vmcb12(vcpu); +} + +KVM_ONE_VCPU_TEST(vmcb12_gpa, vmsave_unmappable, l1_vmsave) +{ + test_unmappable_vmcb12(vcpu); +} + +/* + * Invalid vmcb12_gpa cannot be test for #VMEXIT as KVM_SET_NESTED_STATE will + * reject it. + */ +KVM_ONE_VCPU_TEST(vmcb12_gpa, vmexit_unmappable, l1_vmexit) +{ + test_unmappable_vmcb12_vmexit(vcpu); +} + +int main(int argc, char *argv[]) +{ + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM)); + + return test_harness_run(argc, argv); +} diff --git a/tools/testing/selftests/kvm/x86/svm_vmcall_test.c b/tools/testing/selftests/kvm/x86/svm_vmcall_test.c index 8a62cca28cfb..b1887242f3b8 100644 --- a/tools/testing/selftests/kvm/x86/svm_vmcall_test.c +++ b/tools/testing/selftests/kvm/x86/svm_vmcall_test.c @@ -36,7 +36,7 @@ static void l1_guest_code(struct svm_test_data *svm) int main(int argc, char *argv[]) { struct kvm_vcpu *vcpu; - vm_vaddr_t svm_gva; + gva_t svm_gva; struct kvm_vm *vm; TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM)); diff --git a/tools/testing/selftests/kvm/x86/sync_regs_test.c b/tools/testing/selftests/kvm/x86/sync_regs_test.c index 8fa3948b0170..e0c52321f87c 100644 --- a/tools/testing/selftests/kvm/x86/sync_regs_test.c +++ b/tools/testing/selftests/kvm/x86/sync_regs_test.c @@ -20,7 +20,7 @@ #include "kvm_util.h" #include "processor.h" -#define UCALL_PIO_PORT ((uint16_t)0x1000) +#define UCALL_PIO_PORT ((u16)0x1000) struct ucall uc_none = { .cmd = UCALL_NONE, diff --git a/tools/testing/selftests/kvm/x86/triple_fault_event_test.c b/tools/testing/selftests/kvm/x86/triple_fault_event_test.c index 56306a19144a..f1c488e0d497 100644 --- a/tools/testing/selftests/kvm/x86/triple_fault_event_test.c +++ b/tools/testing/selftests/kvm/x86/triple_fault_event_test.c @@ -72,13 +72,13 @@ int main(void) if (has_vmx) { - vm_vaddr_t vmx_pages_gva; + gva_t vmx_pages_gva; vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code_vmx); vcpu_alloc_vmx(vm, &vmx_pages_gva); vcpu_args_set(vcpu, 1, vmx_pages_gva); } else { - vm_vaddr_t svm_gva; + gva_t svm_gva; vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code_svm); vcpu_alloc_svm(vm, &svm_gva); diff --git a/tools/testing/selftests/kvm/x86/tsc_msrs_test.c b/tools/testing/selftests/kvm/x86/tsc_msrs_test.c index 12b0964f4f13..91583969a14f 100644 --- a/tools/testing/selftests/kvm/x86/tsc_msrs_test.c +++ b/tools/testing/selftests/kvm/x86/tsc_msrs_test.c @@ -95,7 +95,7 @@ int main(void) { struct kvm_vcpu *vcpu; struct kvm_vm *vm; - uint64_t val; + u64 val; ksft_print_header(); ksft_set_plan(5); diff --git a/tools/testing/selftests/kvm/x86/tsc_scaling_sync.c b/tools/testing/selftests/kvm/x86/tsc_scaling_sync.c index 59c7304f805e..59da8d4da607 100644 --- a/tools/testing/selftests/kvm/x86/tsc_scaling_sync.c +++ b/tools/testing/selftests/kvm/x86/tsc_scaling_sync.c @@ -21,10 +21,10 @@ pthread_spinlock_t create_lock; #define TEST_TSC_KHZ 2345678UL #define TEST_TSC_OFFSET 200000000 -uint64_t tsc_sync; +u64 tsc_sync; static void guest_code(void) { - uint64_t start_tsc, local_tsc, tmp; + u64 start_tsc, local_tsc, tmp; start_tsc = rdtsc(); do { diff --git a/tools/testing/selftests/kvm/x86/ucna_injection_test.c b/tools/testing/selftests/kvm/x86/ucna_injection_test.c index 57f157c06b39..df1ec8209c76 100644 --- a/tools/testing/selftests/kvm/x86/ucna_injection_test.c +++ b/tools/testing/selftests/kvm/x86/ucna_injection_test.c @@ -45,7 +45,7 @@ #define MCI_CTL2_RESERVED_BIT BIT_ULL(29) -static uint64_t supported_mcg_caps; +static u64 supported_mcg_caps; /* * Record states about the injected UCNA. @@ -53,30 +53,30 @@ static uint64_t supported_mcg_caps; * handler. Variables without the 'i_' prefixes are recorded in guest main * execution thread. */ -static volatile uint64_t i_ucna_rcvd; -static volatile uint64_t i_ucna_addr; -static volatile uint64_t ucna_addr; -static volatile uint64_t ucna_addr2; +static volatile u64 i_ucna_rcvd; +static volatile u64 i_ucna_addr; +static volatile u64 ucna_addr; +static volatile u64 ucna_addr2; struct thread_params { struct kvm_vcpu *vcpu; - uint64_t *p_i_ucna_rcvd; - uint64_t *p_i_ucna_addr; - uint64_t *p_ucna_addr; - uint64_t *p_ucna_addr2; + u64 *p_i_ucna_rcvd; + u64 *p_i_ucna_addr; + u64 *p_ucna_addr; + u64 *p_ucna_addr2; }; static void verify_apic_base_addr(void) { - uint64_t msr = rdmsr(MSR_IA32_APICBASE); - uint64_t base = GET_APIC_BASE(msr); + u64 msr = rdmsr(MSR_IA32_APICBASE); + u64 base = GET_APIC_BASE(msr); GUEST_ASSERT(base == APIC_DEFAULT_GPA); } static void ucna_injection_guest_code(void) { - uint64_t ctl2; + u64 ctl2; verify_apic_base_addr(); xapic_enable(); @@ -86,7 +86,7 @@ static void ucna_injection_guest_code(void) wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 | MCI_CTL2_CMCI_EN); /* Enables interrupt in guest. */ - asm volatile("sti"); + sti(); /* Let user space inject the first UCNA */ GUEST_SYNC(SYNC_FIRST_UCNA); @@ -106,7 +106,7 @@ static void ucna_injection_guest_code(void) static void cmci_disabled_guest_code(void) { - uint64_t ctl2 = rdmsr(MSR_IA32_MCx_CTL2(UCNA_BANK)); + u64 ctl2 = rdmsr(MSR_IA32_MCx_CTL2(UCNA_BANK)); wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 | MCI_CTL2_CMCI_EN); GUEST_DONE(); @@ -114,7 +114,7 @@ static void cmci_disabled_guest_code(void) static void cmci_enabled_guest_code(void) { - uint64_t ctl2 = rdmsr(MSR_IA32_MCx_CTL2(UCNA_BANK)); + u64 ctl2 = rdmsr(MSR_IA32_MCx_CTL2(UCNA_BANK)); wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 | MCI_CTL2_RESERVED_BIT); GUEST_DONE(); @@ -145,14 +145,15 @@ static void run_vcpu_expect_gp(struct kvm_vcpu *vcpu) printf("vCPU received GP in guest.\n"); } -static void inject_ucna(struct kvm_vcpu *vcpu, uint64_t addr) { +static void inject_ucna(struct kvm_vcpu *vcpu, u64 addr) +{ /* * A UCNA error is indicated with VAL=1, UC=1, PCC=0, S=0 and AR=0 in * the IA32_MCi_STATUS register. * MSCOD=1 (BIT[16] - MscodDataRdErr). * MCACOD=0x0090 (Memory controller error format, channel 0) */ - uint64_t status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN | + u64 status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN | MCI_STATUS_MISCV | MCI_STATUS_ADDRV | 0x10090; struct kvm_x86_mce mce = {}; mce.status = status; @@ -216,10 +217,10 @@ static void test_ucna_injection(struct kvm_vcpu *vcpu, struct thread_params *par { struct kvm_vm *vm = vcpu->vm; params->vcpu = vcpu; - params->p_i_ucna_rcvd = (uint64_t *)addr_gva2hva(vm, (uint64_t)&i_ucna_rcvd); - params->p_i_ucna_addr = (uint64_t *)addr_gva2hva(vm, (uint64_t)&i_ucna_addr); - params->p_ucna_addr = (uint64_t *)addr_gva2hva(vm, (uint64_t)&ucna_addr); - params->p_ucna_addr2 = (uint64_t *)addr_gva2hva(vm, (uint64_t)&ucna_addr2); + params->p_i_ucna_rcvd = (u64 *)addr_gva2hva(vm, (u64)&i_ucna_rcvd); + params->p_i_ucna_addr = (u64 *)addr_gva2hva(vm, (u64)&i_ucna_addr); + params->p_ucna_addr = (u64 *)addr_gva2hva(vm, (u64)&ucna_addr); + params->p_ucna_addr2 = (u64 *)addr_gva2hva(vm, (u64)&ucna_addr2); run_ucna_injection(params); @@ -242,7 +243,7 @@ static void test_ucna_injection(struct kvm_vcpu *vcpu, struct thread_params *par static void setup_mce_cap(struct kvm_vcpu *vcpu, bool enable_cmci_p) { - uint64_t mcg_caps = MCG_CTL_P | MCG_SER_P | MCG_LMCE_P | KVM_MAX_MCE_BANKS; + u64 mcg_caps = MCG_CTL_P | MCG_SER_P | MCG_LMCE_P | KVM_MAX_MCE_BANKS; if (enable_cmci_p) mcg_caps |= MCG_CMCI_P; @@ -250,7 +251,7 @@ static void setup_mce_cap(struct kvm_vcpu *vcpu, bool enable_cmci_p) vcpu_ioctl(vcpu, KVM_X86_SETUP_MCE, &mcg_caps); } -static struct kvm_vcpu *create_vcpu_with_mce_cap(struct kvm_vm *vm, uint32_t vcpuid, +static struct kvm_vcpu *create_vcpu_with_mce_cap(struct kvm_vm *vm, u32 vcpuid, bool enable_cmci_p, void *guest_code) { struct kvm_vcpu *vcpu = vm_vcpu_add(vm, vcpuid, guest_code); diff --git a/tools/testing/selftests/kvm/x86/userspace_io_test.c b/tools/testing/selftests/kvm/x86/userspace_io_test.c index 9481cbcf284f..9c5a87576c2e 100644 --- a/tools/testing/selftests/kvm/x86/userspace_io_test.c +++ b/tools/testing/selftests/kvm/x86/userspace_io_test.c @@ -10,7 +10,7 @@ #include "kvm_util.h" #include "processor.h" -static void guest_ins_port80(uint8_t *buffer, unsigned int count) +static void guest_ins_port80(u8 *buffer, unsigned int count) { unsigned long end; @@ -26,7 +26,7 @@ static void guest_ins_port80(uint8_t *buffer, unsigned int count) static void guest_code(void) { - uint8_t buffer[8192]; + u8 buffer[8192]; int i; /* @@ -85,7 +85,7 @@ int main(int argc, char *argv[]) regs.rcx = 1; if (regs.rcx == 3) regs.rcx = 8192; - memset((void *)run + run->io.data_offset, 0xaa, 4096); + memset((void *)run + run->io.data_offset, 0xaa, PAGE_SIZE); vcpu_regs_set(vcpu, ®s); } diff --git a/tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c b/tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c index 32b2794b78fe..2808ce727e5f 100644 --- a/tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c +++ b/tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c @@ -23,21 +23,21 @@ struct kvm_msr_filter filter_allow = { .nmsrs = 1, /* Test an MSR the kernel knows about. */ .base = MSR_IA32_XSS, - .bitmap = (uint8_t*)&deny_bits, + .bitmap = (u8 *)&deny_bits, }, { .flags = KVM_MSR_FILTER_READ | KVM_MSR_FILTER_WRITE, .nmsrs = 1, /* Test an MSR the kernel doesn't know about. */ .base = MSR_IA32_FLUSH_CMD, - .bitmap = (uint8_t*)&deny_bits, + .bitmap = (u8 *)&deny_bits, }, { .flags = KVM_MSR_FILTER_READ | KVM_MSR_FILTER_WRITE, .nmsrs = 1, /* Test a fabricated MSR that no one knows about. */ .base = MSR_NON_EXISTENT, - .bitmap = (uint8_t*)&deny_bits, + .bitmap = (u8 *)&deny_bits, }, }, }; @@ -49,7 +49,7 @@ struct kvm_msr_filter filter_fs = { .flags = KVM_MSR_FILTER_READ, .nmsrs = 1, .base = MSR_FS_BASE, - .bitmap = (uint8_t*)&deny_bits, + .bitmap = (u8 *)&deny_bits, }, }, }; @@ -61,12 +61,12 @@ struct kvm_msr_filter filter_gs = { .flags = KVM_MSR_FILTER_READ, .nmsrs = 1, .base = MSR_GS_BASE, - .bitmap = (uint8_t*)&deny_bits, + .bitmap = (u8 *)&deny_bits, }, }, }; -static uint64_t msr_non_existent_data; +static u64 msr_non_existent_data; static int guest_exception_count; static u32 msr_reads, msr_writes; @@ -77,7 +77,7 @@ static u8 bitmap_c0000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE]; static u8 bitmap_c0000000_read[KVM_MSR_FILTER_MAX_BITMAP_SIZE]; static u8 bitmap_deadbeef[1] = { 0x1 }; -static void deny_msr(uint8_t *bitmap, u32 msr) +static void deny_msr(u8 *bitmap, u32 msr) { u32 idx = msr & (KVM_MSR_FILTER_MAX_BITMAP_SIZE - 1); @@ -142,26 +142,26 @@ struct kvm_msr_filter no_filter_deny = { * Note: Force test_rdmsr() to not be inlined to prevent the labels, * rdmsr_start and rdmsr_end, from being defined multiple times. */ -static noinline uint64_t test_rdmsr(uint32_t msr) +static noinline u64 test_rdmsr(u32 msr) { - uint32_t a, d; + u32 a, d; guest_exception_count = 0; __asm__ __volatile__("rdmsr_start: rdmsr; rdmsr_end:" : "=a"(a), "=d"(d) : "c"(msr) : "memory"); - return a | ((uint64_t) d << 32); + return a | ((u64)d << 32); } /* * Note: Force test_wrmsr() to not be inlined to prevent the labels, * wrmsr_start and wrmsr_end, from being defined multiple times. */ -static noinline void test_wrmsr(uint32_t msr, uint64_t value) +static noinline void test_wrmsr(u32 msr, u64 value) { - uint32_t a = value; - uint32_t d = value >> 32; + u32 a = value; + u32 d = value >> 32; guest_exception_count = 0; @@ -176,26 +176,26 @@ extern char wrmsr_start, wrmsr_end; * Note: Force test_em_rdmsr() to not be inlined to prevent the labels, * rdmsr_start and rdmsr_end, from being defined multiple times. */ -static noinline uint64_t test_em_rdmsr(uint32_t msr) +static noinline u64 test_em_rdmsr(u32 msr) { - uint32_t a, d; + u32 a, d; guest_exception_count = 0; __asm__ __volatile__(KVM_FEP "em_rdmsr_start: rdmsr; em_rdmsr_end:" : "=a"(a), "=d"(d) : "c"(msr) : "memory"); - return a | ((uint64_t) d << 32); + return a | ((u64)d << 32); } /* * Note: Force test_em_wrmsr() to not be inlined to prevent the labels, * wrmsr_start and wrmsr_end, from being defined multiple times. */ -static noinline void test_em_wrmsr(uint32_t msr, uint64_t value) +static noinline void test_em_wrmsr(u32 msr, u64 value) { - uint32_t a = value; - uint32_t d = value >> 32; + u32 a = value; + u32 d = value >> 32; guest_exception_count = 0; @@ -208,7 +208,7 @@ extern char em_wrmsr_start, em_wrmsr_end; static void guest_code_filter_allow(void) { - uint64_t data; + u64 data; /* * Test userspace intercepting rdmsr / wrmsr for MSR_IA32_XSS. @@ -328,7 +328,7 @@ static void guest_code_filter_deny(void) static void guest_code_permission_bitmap(void) { - uint64_t data; + u64 data; data = test_rdmsr(MSR_FS_BASE); GUEST_ASSERT(data == MSR_FS_BASE); @@ -343,6 +343,12 @@ static void guest_code_permission_bitmap(void) data = test_rdmsr(MSR_GS_BASE); GUEST_ASSERT(data == MSR_GS_BASE); + /* Access the MSRs again to ensure KVM has disabled interception.*/ + data = test_rdmsr(MSR_FS_BASE); + GUEST_ASSERT(data != MSR_FS_BASE); + data = test_rdmsr(MSR_GS_BASE); + GUEST_ASSERT(data != MSR_GS_BASE); + GUEST_DONE(); } @@ -385,7 +391,7 @@ static void check_for_guest_assert(struct kvm_vcpu *vcpu) } } -static void process_rdmsr(struct kvm_vcpu *vcpu, uint32_t msr_index) +static void process_rdmsr(struct kvm_vcpu *vcpu, u32 msr_index) { struct kvm_run *run = vcpu->run; @@ -417,7 +423,7 @@ static void process_rdmsr(struct kvm_vcpu *vcpu, uint32_t msr_index) } } -static void process_wrmsr(struct kvm_vcpu *vcpu, uint32_t msr_index) +static void process_wrmsr(struct kvm_vcpu *vcpu, u32 msr_index) { struct kvm_run *run = vcpu->run; @@ -458,7 +464,7 @@ static void process_ucall_done(struct kvm_vcpu *vcpu) uc.cmd, UCALL_DONE); } -static uint64_t process_ucall(struct kvm_vcpu *vcpu) +static u64 process_ucall(struct kvm_vcpu *vcpu) { struct ucall uc = {}; @@ -483,20 +489,20 @@ static uint64_t process_ucall(struct kvm_vcpu *vcpu) } static void run_guest_then_process_rdmsr(struct kvm_vcpu *vcpu, - uint32_t msr_index) + u32 msr_index) { vcpu_run(vcpu); process_rdmsr(vcpu, msr_index); } static void run_guest_then_process_wrmsr(struct kvm_vcpu *vcpu, - uint32_t msr_index) + u32 msr_index) { vcpu_run(vcpu); process_wrmsr(vcpu, msr_index); } -static uint64_t run_guest_then_process_ucall(struct kvm_vcpu *vcpu) +static u64 run_guest_then_process_ucall(struct kvm_vcpu *vcpu) { vcpu_run(vcpu); return process_ucall(vcpu); @@ -513,7 +519,7 @@ KVM_ONE_VCPU_TEST_SUITE(user_msr); KVM_ONE_VCPU_TEST(user_msr, msr_filter_allow, guest_code_filter_allow) { struct kvm_vm *vm = vcpu->vm; - uint64_t cmd; + u64 cmd; int rc; rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR); @@ -682,6 +688,8 @@ KVM_ONE_VCPU_TEST(user_msr, msr_permission_bitmap, guest_code_permission_bitmap) "Expected ucall state to be UCALL_SYNC."); vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_gs); run_guest_then_process_rdmsr(vcpu, MSR_GS_BASE); + + vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_allow); run_guest_then_process_ucall_done(vcpu); } @@ -724,7 +732,7 @@ static void run_msr_filter_flag_test(struct kvm_vm *vm) .flags = KVM_MSR_FILTER_READ, .nmsrs = 1, .base = 0, - .bitmap = (uint8_t *)&deny_bits, + .bitmap = (u8 *)&deny_bits, }, }, }; diff --git a/tools/testing/selftests/kvm/x86/vmx_apic_access_test.c b/tools/testing/selftests/kvm/x86/vmx_apic_access_test.c index a81a24761aac..1720113eae79 100644 --- a/tools/testing/selftests/kvm/x86/vmx_apic_access_test.c +++ b/tools/testing/selftests/kvm/x86/vmx_apic_access_test.c @@ -38,7 +38,7 @@ static void l1_guest_code(struct vmx_pages *vmx_pages, unsigned long high_gpa) { #define L2_GUEST_STACK_SIZE 64 unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; - uint32_t control; + u32 control; GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); GUEST_ASSERT(load_vmcs(vmx_pages)); @@ -72,7 +72,7 @@ static void l1_guest_code(struct vmx_pages *vmx_pages, unsigned long high_gpa) int main(int argc, char *argv[]) { unsigned long apic_access_addr = ~0ul; - vm_vaddr_t vmx_pages_gva; + gva_t vmx_pages_gva; unsigned long high_gpa; struct vmx_pages *vmx; bool done = false; diff --git a/tools/testing/selftests/kvm/x86/vmx_apicv_updates_test.c b/tools/testing/selftests/kvm/x86/vmx_apicv_updates_test.c new file mode 100644 index 000000000000..80a4fd1e5bbb --- /dev/null +++ b/tools/testing/selftests/kvm/x86/vmx_apicv_updates_test.c @@ -0,0 +1,155 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" +#include "vmx.h" + +#define GOOD_IPI_VECTOR 0xe0 +#define BAD_IPI_VECTOR 0xf0 + +static volatile int good_ipis_received; + +static void good_ipi_handler(struct ex_regs *regs) +{ + good_ipis_received++; +} + +static void bad_ipi_handler(struct ex_regs *regs) +{ + GUEST_FAIL("Received \"bad\" IPI; ICR MMIO write should have been ignored"); +} + +static void l2_guest_code(void) +{ + x2apic_enable(); + vmcall(); + + xapic_enable(); + xapic_write_reg(APIC_ID, 1 << 24); + vmcall(); +} + +static void l1_guest_code(struct vmx_pages *vmx_pages) +{ +#define L2_GUEST_STACK_SIZE 64 + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + u32 control; + + GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); + GUEST_ASSERT(load_vmcs(vmx_pages)); + + /* Prepare the VMCS for L2 execution. */ + prepare_vmcs(vmx_pages, l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]); + control = vmreadz(CPU_BASED_VM_EXEC_CONTROL); + control |= CPU_BASED_USE_MSR_BITMAPS; + vmwrite(CPU_BASED_VM_EXEC_CONTROL, control); + + /* Modify APIC ID to coerce KVM into inhibiting APICv. */ + xapic_enable(); + xapic_write_reg(APIC_ID, 1 << 24); + + /* + * Generate+receive an IRQ without doing EOI to get an IRQ set in vISR + * but not SVI. APICv should be inhibited due to running with a + * modified APIC ID. + */ + xapic_write_reg(APIC_ICR, APIC_DEST_SELF | APIC_DM_FIXED | GOOD_IPI_VECTOR); + GUEST_ASSERT_EQ(xapic_read_reg(APIC_ID), 1 << 24); + + /* Enable IRQs and verify the IRQ was received. */ + sti_nop(); + GUEST_ASSERT_EQ(good_ipis_received, 1); + + /* + * Run L2 to switch to x2APIC mode, which in turn will uninhibit APICv, + * as KVM should force the APIC ID back to its default. + */ + GUEST_ASSERT(!vmlaunch()); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + vmwrite(GUEST_RIP, vmreadz(GUEST_RIP) + vmreadz(VM_EXIT_INSTRUCTION_LEN)); + GUEST_ASSERT(rdmsr(MSR_IA32_APICBASE) & MSR_IA32_APICBASE_EXTD); + + /* + * Scribble the APIC access page to verify KVM disabled xAPIC + * virtualization in vmcs01, and to verify that KVM flushes L1's TLB + * when L2 switches back to accelerated xAPIC mode. + */ + xapic_write_reg(APIC_ICR2, 0xdeadbeefu); + xapic_write_reg(APIC_ICR, APIC_DEST_SELF | APIC_DM_FIXED | BAD_IPI_VECTOR); + + /* + * Verify the IRQ is still in-service and emit an EOI to verify KVM + * propagates the highest vISR vector to SVI when APICv is activated + * (and does so even if APICv was uninhibited while L2 was active). + */ + GUEST_ASSERT_EQ(x2apic_read_reg(APIC_ISR + APIC_VECTOR_TO_REG_OFFSET(GOOD_IPI_VECTOR)), + BIT(APIC_VECTOR_TO_BIT_NUMBER(GOOD_IPI_VECTOR))); + x2apic_write_reg(APIC_EOI, 0); + GUEST_ASSERT_EQ(x2apic_read_reg(APIC_ISR + APIC_VECTOR_TO_REG_OFFSET(GOOD_IPI_VECTOR)), 0); + + /* + * Run L2 one more time to switch back to xAPIC mode to verify that KVM + * handles the x2APIC => xAPIC transition and inhibits APICv while L2 + * is active. + */ + GUEST_ASSERT(!vmresume()); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + GUEST_ASSERT(!(rdmsr(MSR_IA32_APICBASE) & MSR_IA32_APICBASE_EXTD)); + + xapic_write_reg(APIC_ICR, APIC_DEST_SELF | APIC_DM_FIXED | GOOD_IPI_VECTOR); + /* Re-enable IRQs, as VM-Exit clears RFLAGS.IF. */ + sti_nop(); + GUEST_ASSERT_EQ(good_ipis_received, 2); + + GUEST_ASSERT_EQ(xapic_read_reg(APIC_ISR + APIC_VECTOR_TO_REG_OFFSET(GOOD_IPI_VECTOR)), + BIT(APIC_VECTOR_TO_BIT_NUMBER(GOOD_IPI_VECTOR))); + xapic_write_reg(APIC_EOI, 0); + GUEST_ASSERT_EQ(xapic_read_reg(APIC_ISR + APIC_VECTOR_TO_REG_OFFSET(GOOD_IPI_VECTOR)), 0); + GUEST_DONE(); +} + +int main(int argc, char *argv[]) +{ + gva_t vmx_pages_gva; + struct vmx_pages *vmx; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + struct ucall uc; + + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); + + vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); + + vmx = vcpu_alloc_vmx(vm, &vmx_pages_gva); + prepare_virtualize_apic_accesses(vmx, vm); + vcpu_args_set(vcpu, 1, vmx_pages_gva); + + virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA); + vm_install_exception_handler(vm, BAD_IPI_VECTOR, bad_ipi_handler); + vm_install_exception_handler(vm, GOOD_IPI_VECTOR, good_ipi_handler); + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + /* NOT REACHED */ + case UCALL_DONE: + break; + default: + TEST_FAIL("Unexpected ucall %lu", uc.cmd); + } + + /* + * Verify at least two IRQs were injected. Unfortunately, KVM counts + * re-injected IRQs (e.g. if delivering the IRQ hits an EPT violation), + * so being more precise isn't possible given the current stats. + */ + TEST_ASSERT(vcpu_get_stat(vcpu, irq_injections) >= 2, + "Wanted at least 2 IRQ injections, got %lu\n", + vcpu_get_stat(vcpu, irq_injections)); + + kvm_vm_free(vm); + return 0; +} diff --git a/tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c b/tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c deleted file mode 100644 index fa512d033205..000000000000 --- a/tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c +++ /dev/null @@ -1,179 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * KVM dirty page logging test - * - * Copyright (C) 2018, Red Hat, Inc. - */ -#include <stdio.h> -#include <stdlib.h> -#include <linux/bitmap.h> -#include <linux/bitops.h> - -#include "test_util.h" -#include "kvm_util.h" -#include "processor.h" -#include "vmx.h" - -/* The memory slot index to track dirty pages */ -#define TEST_MEM_SLOT_INDEX 1 -#define TEST_MEM_PAGES 3 - -/* L1 guest test virtual memory offset */ -#define GUEST_TEST_MEM 0xc0000000 - -/* L2 guest test virtual memory offset */ -#define NESTED_TEST_MEM1 0xc0001000 -#define NESTED_TEST_MEM2 0xc0002000 - -static void l2_guest_code(u64 *a, u64 *b) -{ - READ_ONCE(*a); - WRITE_ONCE(*a, 1); - GUEST_SYNC(true); - GUEST_SYNC(false); - - WRITE_ONCE(*b, 1); - GUEST_SYNC(true); - WRITE_ONCE(*b, 1); - GUEST_SYNC(true); - GUEST_SYNC(false); - - /* Exit to L1 and never come back. */ - vmcall(); -} - -static void l2_guest_code_ept_enabled(void) -{ - l2_guest_code((u64 *)NESTED_TEST_MEM1, (u64 *)NESTED_TEST_MEM2); -} - -static void l2_guest_code_ept_disabled(void) -{ - /* Access the same L1 GPAs as l2_guest_code_ept_enabled() */ - l2_guest_code((u64 *)GUEST_TEST_MEM, (u64 *)GUEST_TEST_MEM); -} - -void l1_guest_code(struct vmx_pages *vmx) -{ -#define L2_GUEST_STACK_SIZE 64 - unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; - void *l2_rip; - - GUEST_ASSERT(vmx->vmcs_gpa); - GUEST_ASSERT(prepare_for_vmx_operation(vmx)); - GUEST_ASSERT(load_vmcs(vmx)); - - if (vmx->eptp_gpa) - l2_rip = l2_guest_code_ept_enabled; - else - l2_rip = l2_guest_code_ept_disabled; - - prepare_vmcs(vmx, l2_rip, &l2_guest_stack[L2_GUEST_STACK_SIZE]); - - GUEST_SYNC(false); - GUEST_ASSERT(!vmlaunch()); - GUEST_SYNC(false); - GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); - GUEST_DONE(); -} - -static void test_vmx_dirty_log(bool enable_ept) -{ - vm_vaddr_t vmx_pages_gva = 0; - struct vmx_pages *vmx; - unsigned long *bmap; - uint64_t *host_test_mem; - - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; - struct ucall uc; - bool done = false; - - pr_info("Nested EPT: %s\n", enable_ept ? "enabled" : "disabled"); - - /* Create VM */ - vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); - vmx = vcpu_alloc_vmx(vm, &vmx_pages_gva); - vcpu_args_set(vcpu, 1, vmx_pages_gva); - - /* Add an extra memory slot for testing dirty logging */ - vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, - GUEST_TEST_MEM, - TEST_MEM_SLOT_INDEX, - TEST_MEM_PAGES, - KVM_MEM_LOG_DIRTY_PAGES); - - /* - * Add an identity map for GVA range [0xc0000000, 0xc0002000). This - * affects both L1 and L2. However... - */ - virt_map(vm, GUEST_TEST_MEM, GUEST_TEST_MEM, TEST_MEM_PAGES); - - /* - * ... pages in the L2 GPA range [0xc0001000, 0xc0003000) will map to - * 0xc0000000. - * - * Note that prepare_eptp should be called only L1's GPA map is done, - * meaning after the last call to virt_map. - * - * When EPT is disabled, the L2 guest code will still access the same L1 - * GPAs as the EPT enabled case. - */ - if (enable_ept) { - prepare_eptp(vmx, vm, 0); - nested_map_memslot(vmx, vm, 0); - nested_map(vmx, vm, NESTED_TEST_MEM1, GUEST_TEST_MEM, 4096); - nested_map(vmx, vm, NESTED_TEST_MEM2, GUEST_TEST_MEM, 4096); - } - - bmap = bitmap_zalloc(TEST_MEM_PAGES); - host_test_mem = addr_gpa2hva(vm, GUEST_TEST_MEM); - - while (!done) { - memset(host_test_mem, 0xaa, TEST_MEM_PAGES * 4096); - vcpu_run(vcpu); - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); - - switch (get_ucall(vcpu, &uc)) { - case UCALL_ABORT: - REPORT_GUEST_ASSERT(uc); - /* NOT REACHED */ - case UCALL_SYNC: - /* - * The nested guest wrote at offset 0x1000 in the memslot, but the - * dirty bitmap must be filled in according to L1 GPA, not L2. - */ - kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap); - if (uc.args[1]) { - TEST_ASSERT(test_bit(0, bmap), "Page 0 incorrectly reported clean"); - TEST_ASSERT(host_test_mem[0] == 1, "Page 0 not written by guest"); - } else { - TEST_ASSERT(!test_bit(0, bmap), "Page 0 incorrectly reported dirty"); - TEST_ASSERT(host_test_mem[0] == 0xaaaaaaaaaaaaaaaaULL, "Page 0 written by guest"); - } - - TEST_ASSERT(!test_bit(1, bmap), "Page 1 incorrectly reported dirty"); - TEST_ASSERT(host_test_mem[4096 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 1 written by guest"); - TEST_ASSERT(!test_bit(2, bmap), "Page 2 incorrectly reported dirty"); - TEST_ASSERT(host_test_mem[8192 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 2 written by guest"); - break; - case UCALL_DONE: - done = true; - break; - default: - TEST_FAIL("Unknown ucall %lu", uc.cmd); - } - } -} - -int main(int argc, char *argv[]) -{ - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); - - test_vmx_dirty_log(/*enable_ept=*/false); - - if (kvm_cpu_has_ept()) - test_vmx_dirty_log(/*enable_ept=*/true); - - return 0; -} diff --git a/tools/testing/selftests/kvm/x86/vmx_exception_with_invalid_guest_state.c b/tools/testing/selftests/kvm/x86/vmx_exception_with_invalid_guest_state.c index 3fd6eceab46f..2cae86d9d5e2 100644 --- a/tools/testing/selftests/kvm/x86/vmx_exception_with_invalid_guest_state.c +++ b/tools/testing/selftests/kvm/x86/vmx_exception_with_invalid_guest_state.c @@ -110,7 +110,7 @@ int main(int argc, char *argv[]) struct kvm_vm *vm; TEST_REQUIRE(host_cpu_is_intel); - TEST_REQUIRE(!vm_is_unrestricted_guest(NULL)); + TEST_REQUIRE(!kvm_is_unrestricted_guest_enabled()); vm = vm_create_with_one_vcpu(&vcpu, guest_code); get_set_sigalrm_vcpu(vcpu); diff --git a/tools/testing/selftests/kvm/x86/vmx_invalid_nested_guest_state.c b/tools/testing/selftests/kvm/x86/vmx_invalid_nested_guest_state.c index a100ee5f0009..a2eaceed9ad5 100644 --- a/tools/testing/selftests/kvm/x86/vmx_invalid_nested_guest_state.c +++ b/tools/testing/selftests/kvm/x86/vmx_invalid_nested_guest_state.c @@ -52,7 +52,7 @@ static void l1_guest_code(struct vmx_pages *vmx_pages) int main(int argc, char *argv[]) { - vm_vaddr_t vmx_pages_gva; + gva_t vmx_pages_gva; struct kvm_sregs sregs; struct kvm_vcpu *vcpu; struct kvm_run *run; diff --git a/tools/testing/selftests/kvm/x86/vmx_msrs_test.c b/tools/testing/selftests/kvm/x86/vmx_msrs_test.c index 90720b6205f4..c1e8632a1bb6 100644 --- a/tools/testing/selftests/kvm/x86/vmx_msrs_test.c +++ b/tools/testing/selftests/kvm/x86/vmx_msrs_test.c @@ -12,11 +12,10 @@ #include "kvm_util.h" #include "vmx.h" -static void vmx_fixed1_msr_test(struct kvm_vcpu *vcpu, uint32_t msr_index, - uint64_t mask) +static void vmx_fixed1_msr_test(struct kvm_vcpu *vcpu, u32 msr_index, u64 mask) { - uint64_t val = vcpu_get_msr(vcpu, msr_index); - uint64_t bit; + u64 val = vcpu_get_msr(vcpu, msr_index); + u64 bit; mask &= val; @@ -26,11 +25,10 @@ static void vmx_fixed1_msr_test(struct kvm_vcpu *vcpu, uint32_t msr_index, } } -static void vmx_fixed0_msr_test(struct kvm_vcpu *vcpu, uint32_t msr_index, - uint64_t mask) +static void vmx_fixed0_msr_test(struct kvm_vcpu *vcpu, u32 msr_index, u64 mask) { - uint64_t val = vcpu_get_msr(vcpu, msr_index); - uint64_t bit; + u64 val = vcpu_get_msr(vcpu, msr_index); + u64 bit; mask = ~mask | val; @@ -40,7 +38,7 @@ static void vmx_fixed0_msr_test(struct kvm_vcpu *vcpu, uint32_t msr_index, } } -static void vmx_fixed0and1_msr_test(struct kvm_vcpu *vcpu, uint32_t msr_index) +static void vmx_fixed0and1_msr_test(struct kvm_vcpu *vcpu, u32 msr_index) { vmx_fixed0_msr_test(vcpu, msr_index, GENMASK_ULL(31, 0)); vmx_fixed1_msr_test(vcpu, msr_index, GENMASK_ULL(63, 32)); @@ -68,10 +66,10 @@ static void vmx_save_restore_msrs_test(struct kvm_vcpu *vcpu) } static void __ia32_feature_control_msr_test(struct kvm_vcpu *vcpu, - uint64_t msr_bit, + u64 msr_bit, struct kvm_x86_cpu_feature feature) { - uint64_t val; + u64 val; vcpu_clear_cpuid_feature(vcpu, feature); @@ -90,7 +88,7 @@ static void __ia32_feature_control_msr_test(struct kvm_vcpu *vcpu, static void ia32_feature_control_msr_test(struct kvm_vcpu *vcpu) { - uint64_t supported_bits = FEAT_CTL_LOCKED | + u64 supported_bits = FEAT_CTL_LOCKED | FEAT_CTL_VMX_ENABLED_INSIDE_SMX | FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX | FEAT_CTL_SGX_LC_ENABLED | diff --git a/tools/testing/selftests/kvm/x86/vmx_nested_la57_state_test.c b/tools/testing/selftests/kvm/x86/vmx_nested_la57_state_test.c new file mode 100644 index 000000000000..f13dee317383 --- /dev/null +++ b/tools/testing/selftests/kvm/x86/vmx_nested_la57_state_test.c @@ -0,0 +1,132 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2025, Google LLC. + * + * Test KVM's ability to save and restore nested state when the L1 guest + * is using 5-level paging and the L2 guest is using 4-level paging. + * + * This test would have failed prior to commit 9245fd6b8531 ("KVM: x86: + * model canonical checks more precisely"). + */ +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" +#include "vmx.h" + +#define LA57_GS_BASE 0xff2bc0311fb00000ull + +static void l2_guest_code(void) +{ + /* + * Sync with L0 to trigger save/restore. After + * resuming, execute VMCALL to exit back to L1. + */ + GUEST_SYNC(1); + vmcall(); +} + +static void l1_guest_code(struct vmx_pages *vmx_pages) +{ +#define L2_GUEST_STACK_SIZE 64 + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + u64 guest_cr4; + gpa_t pml5_pa, pml4_pa; + u64 *pml5; + u64 exit_reason; + + /* Set GS_BASE to a value that is only canonical with LA57. */ + wrmsr(MSR_GS_BASE, LA57_GS_BASE); + GUEST_ASSERT(rdmsr(MSR_GS_BASE) == LA57_GS_BASE); + + GUEST_ASSERT(vmx_pages->vmcs_gpa); + GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); + GUEST_ASSERT(load_vmcs(vmx_pages)); + + prepare_vmcs(vmx_pages, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + /* + * Set up L2 with a 4-level page table by pointing its CR3 to + * L1's first PML4 table and clearing CR4.LA57. This creates + * the CR4.LA57 mismatch that exercises the bug. + */ + pml5_pa = get_cr3() & PHYSICAL_PAGE_MASK; + pml5 = (u64 *)pml5_pa; + pml4_pa = pml5[0] & PHYSICAL_PAGE_MASK; + vmwrite(GUEST_CR3, pml4_pa); + + guest_cr4 = vmreadz(GUEST_CR4); + guest_cr4 &= ~X86_CR4_LA57; + vmwrite(GUEST_CR4, guest_cr4); + + GUEST_ASSERT(!vmlaunch()); + + exit_reason = vmreadz(VM_EXIT_REASON); + GUEST_ASSERT(exit_reason == EXIT_REASON_VMCALL); +} + +void guest_code(struct vmx_pages *vmx_pages) +{ + l1_guest_code(vmx_pages); + GUEST_DONE(); +} + +int main(int argc, char *argv[]) +{ + gva_t vmx_pages_gva = 0; + struct kvm_vm *vm; + struct kvm_vcpu *vcpu; + struct kvm_x86_state *state; + struct ucall uc; + int stage; + + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_LA57)); + TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE)); + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + + /* + * L1 needs to read its own PML5 table to set up L2. Identity map + * the PML5 table to facilitate this. + */ + virt_map(vm, vm->mmu.pgd, vm->mmu.pgd, 1); + + vcpu_alloc_vmx(vm, &vmx_pages_gva); + vcpu_args_set(vcpu, 1, vmx_pages_gva); + + for (stage = 1;; stage++) { + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + /* NOT REACHED */ + case UCALL_SYNC: + break; + case UCALL_DONE: + goto done; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + + TEST_ASSERT(uc.args[1] == stage, + "Expected stage %d, got stage %lu", stage, (ulong)uc.args[1]); + if (stage == 1) { + pr_info("L2 is active; performing save/restore.\n"); + state = vcpu_save_state(vcpu); + + kvm_vm_release(vm); + + /* Restore state in a new VM. */ + vcpu = vm_recreate_with_one_vcpu(vm); + vcpu_load_state(vcpu, state); + kvm_x86_state_cleanup(state); + } + } + +done: + kvm_vm_free(vm); + return 0; +} diff --git a/tools/testing/selftests/kvm/x86/vmx_pmu_caps_test.c b/tools/testing/selftests/kvm/x86/vmx_pmu_caps_test.c index a1f5ff45d518..d004108dbdc6 100644 --- a/tools/testing/selftests/kvm/x86/vmx_pmu_caps_test.c +++ b/tools/testing/selftests/kvm/x86/vmx_pmu_caps_test.c @@ -29,7 +29,7 @@ static union perf_capabilities { u64 pebs_baseline:1; u64 perf_metrics:1; u64 pebs_output_pt_available:1; - u64 anythread_deprecated:1; + u64 pebs_timing_info:1; }; u64 capabilities; } host_cap; @@ -44,6 +44,7 @@ static const union perf_capabilities immutable_caps = { .pebs_arch_reg = 1, .pebs_format = -1, .pebs_baseline = 1, + .pebs_timing_info = 1, }; static const union perf_capabilities format_caps = { @@ -51,16 +52,16 @@ static const union perf_capabilities format_caps = { .pebs_format = -1, }; -static void guest_test_perf_capabilities_gp(uint64_t val) +static void guest_test_perf_capabilities_gp(u64 val) { - uint8_t vector = wrmsr_safe(MSR_IA32_PERF_CAPABILITIES, val); + u8 vector = wrmsr_safe(MSR_IA32_PERF_CAPABILITIES, val); __GUEST_ASSERT(vector == GP_VECTOR, - "Expected #GP for value '0x%lx', got vector '0x%x'", - val, vector); + "Expected #GP for value '0x%lx', got %s", + val, ex_str(vector)); } -static void guest_code(uint64_t current_val) +static void guest_code(u64 current_val) { int i; @@ -128,7 +129,7 @@ KVM_ONE_VCPU_TEST(vmx_pmu_caps, basic_perf_capabilities, guest_code) KVM_ONE_VCPU_TEST(vmx_pmu_caps, fungible_perf_capabilities, guest_code) { - const uint64_t fungible_caps = host_cap.capabilities & ~immutable_caps.capabilities; + const u64 fungible_caps = host_cap.capabilities & ~immutable_caps.capabilities; int bit; for_each_set_bit(bit, &fungible_caps, 64) { @@ -147,7 +148,7 @@ KVM_ONE_VCPU_TEST(vmx_pmu_caps, fungible_perf_capabilities, guest_code) */ KVM_ONE_VCPU_TEST(vmx_pmu_caps, immutable_perf_capabilities, guest_code) { - const uint64_t reserved_caps = (~host_cap.capabilities | + const u64 reserved_caps = (~host_cap.capabilities | immutable_caps.capabilities) & ~format_caps.capabilities; union perf_capabilities val = host_cap; @@ -209,7 +210,7 @@ KVM_ONE_VCPU_TEST(vmx_pmu_caps, lbr_perf_capabilities, guest_code) KVM_ONE_VCPU_TEST(vmx_pmu_caps, perf_capabilities_unsupported, guest_code) { - uint64_t val; + u64 val; int i, r; vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities); diff --git a/tools/testing/selftests/kvm/x86/vmx_preemption_timer_test.c b/tools/testing/selftests/kvm/x86/vmx_preemption_timer_test.c index 00dd2ac07a61..1b7b6ba23de7 100644 --- a/tools/testing/selftests/kvm/x86/vmx_preemption_timer_test.c +++ b/tools/testing/selftests/kvm/x86/vmx_preemption_timer_test.c @@ -152,7 +152,7 @@ void guest_code(struct vmx_pages *vmx_pages) int main(int argc, char *argv[]) { - vm_vaddr_t vmx_pages_gva = 0; + gva_t vmx_pages_gva = 0; struct kvm_regs regs1, regs2; struct kvm_vm *vm; diff --git a/tools/testing/selftests/kvm/x86/xapic_ipi_test.c b/tools/testing/selftests/kvm/x86/xapic_ipi_test.c index a76078a08ff8..39ce9a9369f5 100644 --- a/tools/testing/selftests/kvm/x86/xapic_ipi_test.c +++ b/tools/testing/selftests/kvm/x86/xapic_ipi_test.c @@ -48,20 +48,20 @@ * Incremented in the IPI handler. Provides evidence to the sender that the IPI * arrived at the destination */ -static volatile uint64_t ipis_rcvd; +static volatile u64 ipis_rcvd; /* Data struct shared between host main thread and vCPUs */ struct test_data_page { - uint32_t halter_apic_id; - volatile uint64_t hlt_count; - volatile uint64_t wake_count; - uint64_t ipis_sent; - uint64_t migrations_attempted; - uint64_t migrations_completed; - uint32_t icr; - uint32_t icr2; - uint32_t halter_tpr; - uint32_t halter_ppr; + u32 halter_apic_id; + volatile u64 hlt_count; + volatile u64 wake_count; + u64 ipis_sent; + u64 migrations_attempted; + u64 migrations_completed; + u32 icr; + u32 icr2; + u32 halter_tpr; + u32 halter_ppr; /* * Record local version register as a cross-check that APIC access @@ -69,19 +69,19 @@ struct test_data_page { * arch/x86/kvm/lapic.c). If test is failing, check that values match * to determine whether APIC access exits are working. */ - uint32_t halter_lvr; + u32 halter_lvr; }; struct thread_params { struct test_data_page *data; struct kvm_vcpu *vcpu; - uint64_t *pipis_rcvd; /* host address of ipis_rcvd global */ + u64 *pipis_rcvd; /* host address of ipis_rcvd global */ }; void verify_apic_base_addr(void) { - uint64_t msr = rdmsr(MSR_IA32_APICBASE); - uint64_t base = GET_APIC_BASE(msr); + u64 msr = rdmsr(MSR_IA32_APICBASE); + u64 base = GET_APIC_BASE(msr); GUEST_ASSERT(base == APIC_DEFAULT_GPA); } @@ -106,7 +106,8 @@ static void halter_guest_code(struct test_data_page *data) data->halter_tpr = xapic_read_reg(APIC_TASKPRI); data->halter_ppr = xapic_read_reg(APIC_PROCPRI); data->hlt_count++; - asm volatile("sti; hlt; cli"); + safe_halt(); + cli(); data->wake_count++; } } @@ -124,12 +125,12 @@ static void guest_ipi_handler(struct ex_regs *regs) static void sender_guest_code(struct test_data_page *data) { - uint64_t last_wake_count; - uint64_t last_hlt_count; - uint64_t last_ipis_rcvd_count; - uint32_t icr_val; - uint32_t icr2_val; - uint64_t tsc_start; + u64 last_wake_count; + u64 last_hlt_count; + u64 last_ipis_rcvd_count; + u32 icr_val; + u32 icr2_val; + u64 tsc_start; verify_apic_base_addr(); xapic_enable(); @@ -247,7 +248,7 @@ static void cancel_join_vcpu_thread(pthread_t thread, struct kvm_vcpu *vcpu) } void do_migrations(struct test_data_page *data, int run_secs, int delay_usecs, - uint64_t *pipis_rcvd) + u64 *pipis_rcvd) { long pages_not_moved; unsigned long nodemask = 0; @@ -255,20 +256,19 @@ void do_migrations(struct test_data_page *data, int run_secs, int delay_usecs, int nodes = 0; time_t start_time, last_update, now; time_t interval_secs = 1; - int i, r; + int i; int from, to; unsigned long bit; - uint64_t hlt_count; - uint64_t wake_count; - uint64_t ipis_sent; + u64 hlt_count; + u64 wake_count; + u64 ipis_sent; fprintf(stderr, "Calling migrate_pages every %d microseconds\n", delay_usecs); /* Get set of first 64 numa nodes available */ - r = get_mempolicy(NULL, &nodemask, sizeof(nodemask) * 8, + kvm_get_mempolicy(NULL, &nodemask, sizeof(nodemask) * 8, 0, MPOL_F_MEMS_ALLOWED); - TEST_ASSERT(r == 0, "get_mempolicy failed errno=%d", errno); fprintf(stderr, "Numa nodes found amongst first %lu possible nodes " "(each 1-bit indicates node is present): %#lx\n", @@ -393,12 +393,12 @@ int main(int argc, char *argv[]) int run_secs = 0; int delay_usecs = 0; struct test_data_page *data; - vm_vaddr_t test_data_page_vaddr; + gva_t test_data_page_gva; bool migrate = false; pthread_t threads[2]; struct thread_params params[2]; struct kvm_vm *vm; - uint64_t *pipis_rcvd; + u64 *pipis_rcvd; get_cmdline_args(argc, argv, &run_secs, &migrate, &delay_usecs); if (run_secs <= 0) @@ -414,16 +414,16 @@ int main(int argc, char *argv[]) params[1].vcpu = vm_vcpu_add(vm, 1, sender_guest_code); - test_data_page_vaddr = vm_vaddr_alloc_page(vm); - data = addr_gva2hva(vm, test_data_page_vaddr); + test_data_page_gva = vm_alloc_page(vm); + data = addr_gva2hva(vm, test_data_page_gva); memset(data, 0, sizeof(*data)); params[0].data = data; params[1].data = data; - vcpu_args_set(params[0].vcpu, 1, test_data_page_vaddr); - vcpu_args_set(params[1].vcpu, 1, test_data_page_vaddr); + vcpu_args_set(params[0].vcpu, 1, test_data_page_gva); + vcpu_args_set(params[1].vcpu, 1, test_data_page_gva); - pipis_rcvd = (uint64_t *)addr_gva2hva(vm, (uint64_t)&ipis_rcvd); + pipis_rcvd = (u64 *)addr_gva2hva(vm, (u64)&ipis_rcvd); params[0].pipis_rcvd = pipis_rcvd; params[1].pipis_rcvd = pipis_rcvd; @@ -465,6 +465,19 @@ int main(int argc, char *argv[]) cancel_join_vcpu_thread(threads[0], params[0].vcpu); cancel_join_vcpu_thread(threads[1], params[1].vcpu); + /* + * If the host support Idle HLT, i.e. KVM *might* be using Idle HLT, + * then the number of HLT exits may be less than the number of HLTs + * that were executed, as Idle HLT elides the exit if the vCPU has an + * unmasked, pending IRQ (or NMI). + */ + if (this_cpu_has(X86_FEATURE_IDLE_HLT)) + TEST_ASSERT(data->hlt_count >= vcpu_get_stat(params[0].vcpu, halt_exits), + "HLT insns = %lu, HLT exits = %lu", + data->hlt_count, vcpu_get_stat(params[0].vcpu, halt_exits)); + else + TEST_ASSERT_EQ(data->hlt_count, vcpu_get_stat(params[0].vcpu, halt_exits)); + fprintf(stderr, "Test successful after running for %d seconds.\n" "Sending vCPU sent %lu IPIs to halting vCPU\n" diff --git a/tools/testing/selftests/kvm/x86/xapic_state_test.c b/tools/testing/selftests/kvm/x86/xapic_state_test.c index 88bcca188799..637bb90c1d93 100644 --- a/tools/testing/selftests/kvm/x86/xapic_state_test.c +++ b/tools/testing/selftests/kvm/x86/xapic_state_test.c @@ -18,12 +18,12 @@ struct xapic_vcpu { static void xapic_guest_code(void) { - asm volatile("cli"); + cli(); xapic_enable(); while (1) { - uint64_t val = (u64)xapic_read_reg(APIC_IRR) | + u64 val = (u64)xapic_read_reg(APIC_IRR) | (u64)xapic_read_reg(APIC_IRR + 0x10) << 32; xapic_write_reg(APIC_ICR2, val >> 32); @@ -38,12 +38,12 @@ static void xapic_guest_code(void) static void x2apic_guest_code(void) { - asm volatile("cli"); + cli(); x2apic_enable(); do { - uint64_t val = x2apic_read_reg(APIC_IRR) | + u64 val = x2apic_read_reg(APIC_IRR) | x2apic_read_reg(APIC_IRR + 0x10) << 32; if (val & X2APIC_RSVD_BITS_MASK) { @@ -56,12 +56,12 @@ static void x2apic_guest_code(void) } while (1); } -static void ____test_icr(struct xapic_vcpu *x, uint64_t val) +static void ____test_icr(struct xapic_vcpu *x, u64 val) { struct kvm_vcpu *vcpu = x->vcpu; struct kvm_lapic_state xapic; struct ucall uc; - uint64_t icr; + u64 icr; /* * Tell the guest what ICR value to write. Use the IRR to pass info, @@ -93,7 +93,7 @@ static void ____test_icr(struct xapic_vcpu *x, uint64_t val) TEST_ASSERT_EQ(icr, val & ~APIC_ICR_BUSY); } -static void __test_icr(struct xapic_vcpu *x, uint64_t val) +static void __test_icr(struct xapic_vcpu *x, u64 val) { /* * The BUSY bit is reserved on both AMD and Intel, but only AMD treats @@ -109,7 +109,7 @@ static void __test_icr(struct xapic_vcpu *x, uint64_t val) static void test_icr(struct xapic_vcpu *x) { struct kvm_vcpu *vcpu = x->vcpu; - uint64_t icr, i, j; + u64 icr, i, j; icr = APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_FIXED; for (i = 0; i <= 0xff; i++) @@ -120,8 +120,8 @@ static void test_icr(struct xapic_vcpu *x) __test_icr(x, icr | i); /* - * Send all flavors of IPIs to non-existent vCPUs. TODO: use number of - * vCPUs, not vcpu.id + 1. Arbitrarily use vector 0xff. + * Send all flavors of IPIs to non-existent vCPUs. Arbitrarily use + * vector 0xff. */ icr = APIC_INT_ASSERT | 0xff; for (i = 0; i < 0xff; i++) { @@ -142,9 +142,9 @@ static void test_icr(struct xapic_vcpu *x) __test_icr(x, -1ull & ~APIC_DM_FIXED_MASK); } -static void __test_apic_id(struct kvm_vcpu *vcpu, uint64_t apic_base) +static void __test_apic_id(struct kvm_vcpu *vcpu, u64 apic_base) { - uint32_t apic_id, expected; + u32 apic_id, expected; struct kvm_lapic_state xapic; vcpu_set_msr(vcpu, MSR_IA32_APICBASE, apic_base); @@ -170,9 +170,9 @@ static void __test_apic_id(struct kvm_vcpu *vcpu, uint64_t apic_base) */ static void test_apic_id(void) { - const uint32_t NR_VCPUS = 3; + const u32 NR_VCPUS = 3; struct kvm_vcpu *vcpus[NR_VCPUS]; - uint64_t apic_base; + u64 apic_base; struct kvm_vm *vm; int i; @@ -248,7 +248,7 @@ int main(int argc, char *argv[]) * drops writes, AMD does not). Account for the errata when checking * that KVM reads back what was written. */ - x.has_xavic_errata = host_cpu_is_amd && + x.has_xavic_errata = host_cpu_is_amd_compatible && get_kvm_amd_param_bool("avic"); vcpu_clear_cpuid_feature(x.vcpu, X86_FEATURE_X2APIC); diff --git a/tools/testing/selftests/kvm/x86/xapic_tpr_test.c b/tools/testing/selftests/kvm/x86/xapic_tpr_test.c new file mode 100644 index 000000000000..ab25db2235d5 --- /dev/null +++ b/tools/testing/selftests/kvm/x86/xapic_tpr_test.c @@ -0,0 +1,276 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <fcntl.h> +#include <stdatomic.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> +#include <unistd.h> + +#include "apic.h" +#include "kvm_util.h" +#include "processor.h" +#include "test_util.h" + +static bool is_x2apic; + +#define IRQ_VECTOR 0x20 + +/* See also the comment at similar assertion in memslot_perf_test.c */ +static_assert(ATOMIC_INT_LOCK_FREE == 2, "atomic int is not lockless"); + +static atomic_uint tpr_guest_irq_sync_val; + +static void tpr_guest_irq_sync_flag_reset(void) +{ + atomic_store_explicit(&tpr_guest_irq_sync_val, 0, + memory_order_release); +} + +static unsigned int tpr_guest_irq_sync_val_get(void) +{ + return atomic_load_explicit(&tpr_guest_irq_sync_val, + memory_order_acquire); +} + +static void tpr_guest_irq_sync_val_inc(void) +{ + atomic_fetch_add_explicit(&tpr_guest_irq_sync_val, 1, + memory_order_acq_rel); +} + +static void tpr_guest_irq_handler_xapic(struct ex_regs *regs) +{ + tpr_guest_irq_sync_val_inc(); + + xapic_write_reg(APIC_EOI, 0); +} + +static void tpr_guest_irq_handler_x2apic(struct ex_regs *regs) +{ + tpr_guest_irq_sync_val_inc(); + + x2apic_write_reg(APIC_EOI, 0); +} + +static void tpr_guest_irq_queue(void) +{ + if (is_x2apic) { + x2apic_write_reg(APIC_SELF_IPI, IRQ_VECTOR); + } else { + u32 icr, icr2; + + icr = APIC_DEST_SELF | APIC_DEST_PHYSICAL | APIC_DM_FIXED | + IRQ_VECTOR; + icr2 = 0; + + xapic_write_reg(APIC_ICR2, icr2); + xapic_write_reg(APIC_ICR, icr); + } +} + +static u8 tpr_guest_tpr_get(void) +{ + u32 taskpri; + + if (is_x2apic) + taskpri = x2apic_read_reg(APIC_TASKPRI); + else + taskpri = xapic_read_reg(APIC_TASKPRI); + + return GET_APIC_PRI(taskpri); +} + +static u8 tpr_guest_ppr_get(void) +{ + u32 procpri; + + if (is_x2apic) + procpri = x2apic_read_reg(APIC_PROCPRI); + else + procpri = xapic_read_reg(APIC_PROCPRI); + + return GET_APIC_PRI(procpri); +} + +static u8 tpr_guest_cr8_get(void) +{ + u64 cr8; + + asm volatile ("mov %%cr8, %[cr8]\n\t" : [cr8] "=r"(cr8)); + + return cr8 & GENMASK(3, 0); +} + +static void tpr_guest_check_tpr_ppr_cr8_equal(void) +{ + u8 tpr; + + tpr = tpr_guest_tpr_get(); + + GUEST_ASSERT_EQ(tpr_guest_ppr_get(), tpr); + GUEST_ASSERT_EQ(tpr_guest_cr8_get(), tpr); +} + +static void tpr_guest_code(void) +{ + cli(); + + if (is_x2apic) + x2apic_enable(); + else + xapic_enable(); + + GUEST_ASSERT_EQ(tpr_guest_tpr_get(), 0); + tpr_guest_check_tpr_ppr_cr8_equal(); + + tpr_guest_irq_queue(); + + /* TPR = 0 but IRQ masked by IF=0, should not fire */ + udelay(1000); + GUEST_ASSERT_EQ(tpr_guest_irq_sync_val_get(), 0); + + sti(); + + /* IF=1 now, IRQ should fire */ + while (tpr_guest_irq_sync_val_get() == 0) + cpu_relax(); + GUEST_ASSERT_EQ(tpr_guest_irq_sync_val_get(), 1); + + GUEST_SYNC(true); + tpr_guest_check_tpr_ppr_cr8_equal(); + + tpr_guest_irq_queue(); + + /* IRQ masked by barely high enough TPR now, should not fire */ + udelay(1000); + GUEST_ASSERT_EQ(tpr_guest_irq_sync_val_get(), 1); + + GUEST_SYNC(false); + tpr_guest_check_tpr_ppr_cr8_equal(); + + /* TPR barely low enough now to unmask IRQ, should fire */ + while (tpr_guest_irq_sync_val_get() == 1) + cpu_relax(); + GUEST_ASSERT_EQ(tpr_guest_irq_sync_val_get(), 2); + + GUEST_DONE(); +} + +static u8 lapic_tpr_get(struct kvm_lapic_state *xapic) +{ + return GET_APIC_PRI(*((u32 *)&xapic->regs[APIC_TASKPRI])); +} + +static void lapic_tpr_set(struct kvm_lapic_state *xapic, u8 val) +{ + u32 *taskpri = (u32 *)&xapic->regs[APIC_TASKPRI]; + + *taskpri = SET_APIC_PRI(*taskpri, val); +} + +static u8 sregs_tpr(struct kvm_sregs *sregs) +{ + return sregs->cr8 & GENMASK(3, 0); +} + +static void test_tpr_check_tpr_zero(struct kvm_vcpu *vcpu) +{ + struct kvm_lapic_state xapic; + + vcpu_ioctl(vcpu, KVM_GET_LAPIC, &xapic); + + TEST_ASSERT_EQ(lapic_tpr_get(&xapic), 0); +} + +static void test_tpr_check_tpr_cr8_equal(struct kvm_vcpu *vcpu) +{ + struct kvm_sregs sregs; + struct kvm_lapic_state xapic; + + vcpu_sregs_get(vcpu, &sregs); + vcpu_ioctl(vcpu, KVM_GET_LAPIC, &xapic); + + TEST_ASSERT_EQ(sregs_tpr(&sregs), lapic_tpr_get(&xapic)); +} + +static void test_tpr_set_tpr_for_irq(struct kvm_vcpu *vcpu, bool mask) +{ + struct kvm_lapic_state xapic; + u8 tpr; + + static_assert(IRQ_VECTOR >= 16, "invalid IRQ vector number"); + tpr = IRQ_VECTOR / 16; + if (!mask) + tpr--; + + vcpu_ioctl(vcpu, KVM_GET_LAPIC, &xapic); + lapic_tpr_set(&xapic, tpr); + vcpu_ioctl(vcpu, KVM_SET_LAPIC, &xapic); +} + +static void test_tpr(bool __is_x2apic) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + bool done = false; + + is_x2apic = __is_x2apic; + + vm = vm_create_with_one_vcpu(&vcpu, tpr_guest_code); + if (is_x2apic) { + vm_install_exception_handler(vm, IRQ_VECTOR, + tpr_guest_irq_handler_x2apic); + } else { + vm_install_exception_handler(vm, IRQ_VECTOR, + tpr_guest_irq_handler_xapic); + vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_X2APIC); + virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA); + } + + sync_global_to_guest(vcpu->vm, is_x2apic); + + /* According to the SDM/APM the TPR value at reset is 0 */ + test_tpr_check_tpr_zero(vcpu); + test_tpr_check_tpr_cr8_equal(vcpu); + + tpr_guest_irq_sync_flag_reset(); + sync_global_to_guest(vcpu->vm, tpr_guest_irq_sync_val); + + while (!done) { + struct ucall uc; + + alarm(2); + vcpu_run(vcpu); + alarm(0); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + case UCALL_DONE: + test_tpr_check_tpr_cr8_equal(vcpu); + done = true; + break; + case UCALL_SYNC: + test_tpr_check_tpr_cr8_equal(vcpu); + test_tpr_set_tpr_for_irq(vcpu, uc.args[1]); + break; + default: + TEST_FAIL("Unknown ucall result 0x%lx", uc.cmd); + break; + } + } + kvm_vm_free(vm); +} + +int main(int argc, char *argv[]) +{ + /* + * Use separate VMs for the xAPIC and x2APIC tests so that x2APIC can + * be fully hidden from the guest. KVM disallows changing CPUID after + * KVM_RUN and AVIC is disabled if _any_ vCPU is allowed to use x2APIC. + */ + test_tpr(false); + test_tpr(true); +} diff --git a/tools/testing/selftests/kvm/x86/xcr0_cpuid_test.c b/tools/testing/selftests/kvm/x86/xcr0_cpuid_test.c index c8a5c5e51661..40dc9e6b3fad 100644 --- a/tools/testing/selftests/kvm/x86/xcr0_cpuid_test.c +++ b/tools/testing/selftests/kvm/x86/xcr0_cpuid_test.c @@ -21,7 +21,7 @@ */ #define ASSERT_XFEATURE_DEPENDENCIES(supported_xcr0, xfeatures, dependencies) \ do { \ - uint64_t __supported = (supported_xcr0) & ((xfeatures) | (dependencies)); \ + u64 __supported = (supported_xcr0) & ((xfeatures) | (dependencies)); \ \ __GUEST_ASSERT((__supported & (xfeatures)) != (xfeatures) || \ __supported == ((xfeatures) | (dependencies)), \ @@ -39,7 +39,7 @@ do { \ */ #define ASSERT_ALL_OR_NONE_XFEATURE(supported_xcr0, xfeatures) \ do { \ - uint64_t __supported = (supported_xcr0) & (xfeatures); \ + u64 __supported = (supported_xcr0) & (xfeatures); \ \ __GUEST_ASSERT(!__supported || __supported == (xfeatures), \ "supported = 0x%lx, xfeatures = 0x%llx", \ @@ -48,8 +48,8 @@ do { \ static void guest_code(void) { - uint64_t initial_xcr0; - uint64_t supported_xcr0; + u64 initial_xcr0; + u64 supported_xcr0; int i, vector; set_cr4(get_cr4() | X86_CR4_OSXSAVE); @@ -81,13 +81,13 @@ static void guest_code(void) vector = xsetbv_safe(0, XFEATURE_MASK_FP); __GUEST_ASSERT(!vector, - "Expected success on XSETBV(FP), got vector '0x%x'", - vector); + "Expected success on XSETBV(FP), got %s", + ex_str(vector)); vector = xsetbv_safe(0, supported_xcr0); __GUEST_ASSERT(!vector, - "Expected success on XSETBV(0x%lx), got vector '0x%x'", - supported_xcr0, vector); + "Expected success on XSETBV(0x%lx), got %s", + supported_xcr0, ex_str(vector)); for (i = 0; i < 64; i++) { if (supported_xcr0 & BIT_ULL(i)) @@ -95,8 +95,8 @@ static void guest_code(void) vector = xsetbv_safe(0, supported_xcr0 | BIT_ULL(i)); __GUEST_ASSERT(vector == GP_VECTOR, - "Expected #GP on XSETBV(0x%llx), supported XCR0 = %lx, got vector '0x%x'", - BIT_ULL(i), supported_xcr0, vector); + "Expected #GP on XSETBV(0x%llx), supported XCR0 = %lx, got %s", + BIT_ULL(i), supported_xcr0, ex_str(vector)); } GUEST_DONE(); diff --git a/tools/testing/selftests/kvm/x86/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86/xen_shinfo_test.c index a59b3c799bb2..5076f6a75455 100644 --- a/tools/testing/selftests/kvm/x86/xen_shinfo_test.c +++ b/tools/testing/selftests/kvm/x86/xen_shinfo_test.c @@ -116,15 +116,15 @@ struct pvclock_wall_clock { } __attribute__((__packed__)); struct vcpu_runstate_info { - uint32_t state; - uint64_t state_entry_time; - uint64_t time[5]; /* Extra field for overrun check */ + u32 state; + u64 state_entry_time; + u64 time[5]; /* Extra field for overrun check */ }; struct compat_vcpu_runstate_info { - uint32_t state; - uint64_t state_entry_time; - uint64_t time[5]; + u32 state; + u64 state_entry_time; + u64 time[5]; } __attribute__((__packed__)); struct arch_vcpu_info { @@ -133,8 +133,8 @@ struct arch_vcpu_info { }; struct vcpu_info { - uint8_t evtchn_upcall_pending; - uint8_t evtchn_upcall_mask; + u8 evtchn_upcall_pending; + u8 evtchn_upcall_mask; unsigned long evtchn_pending_sel; struct arch_vcpu_info arch; struct pvclock_vcpu_time_info time; @@ -145,7 +145,7 @@ struct shared_info { unsigned long evtchn_pending[64]; unsigned long evtchn_mask[64]; struct pvclock_wall_clock wc; - uint32_t wc_sec_hi; + u32 wc_sec_hi; /* arch_shared_info here */ }; @@ -191,10 +191,7 @@ static void guest_code(void) struct vcpu_runstate_info *rs = (void *)RUNSTATE_VADDR; int i; - __asm__ __volatile__( - "sti\n" - "nop\n" - ); + sti_nop(); /* Trigger an interrupt injection */ GUEST_SYNC(TEST_INJECT_VECTOR); @@ -550,15 +547,9 @@ int main(int argc, char *argv[]) int irq_fd[2] = { -1, -1 }; if (do_eventfd_tests) { - irq_fd[0] = eventfd(0, 0); - irq_fd[1] = eventfd(0, 0); + irq_fd[0] = kvm_new_eventfd(); + irq_fd[1] = kvm_new_eventfd(); - /* Unexpected, but not a KVM failure */ - if (irq_fd[0] == -1 || irq_fd[1] == -1) - do_evtchn_tests = do_eventfd_tests = false; - } - - if (do_eventfd_tests) { irq_routes.info.nr = 2; irq_routes.entries[0].gsi = 32; @@ -575,15 +566,8 @@ int main(int argc, char *argv[]) vm_ioctl(vm, KVM_SET_GSI_ROUTING, &irq_routes.info); - struct kvm_irqfd ifd = { }; - - ifd.fd = irq_fd[0]; - ifd.gsi = 32; - vm_ioctl(vm, KVM_IRQFD, &ifd); - - ifd.fd = irq_fd[1]; - ifd.gsi = 33; - vm_ioctl(vm, KVM_IRQFD, &ifd); + kvm_assign_irqfd(vm, 32, irq_fd[0]); + kvm_assign_irqfd(vm, 33, irq_fd[1]); struct sigaction sa = { }; sa.sa_handler = handle_alrm; @@ -674,7 +658,7 @@ int main(int argc, char *argv[]) printf("Testing RUNSTATE_ADJUST\n"); rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST; memset(&rst.u, 0, sizeof(rst.u)); - rst.u.runstate.state = (uint64_t)-1; + rst.u.runstate.state = (u64)-1; rst.u.runstate.time_blocked = 0x5a - rs->time[RUNSTATE_blocked]; rst.u.runstate.time_offline = @@ -1129,7 +1113,7 @@ int main(int argc, char *argv[]) /* Don't change the address, just trigger a write */ struct kvm_xen_vcpu_attr adj = { .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST, - .u.runstate.state = (uint64_t)-1 + .u.runstate.state = (u64)-1 }; vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &adj); diff --git a/tools/testing/selftests/kvm/x86/xss_msr_test.c b/tools/testing/selftests/kvm/x86/xss_msr_test.c index f331a4e9bae3..12c63df6bbce 100644 --- a/tools/testing/selftests/kvm/x86/xss_msr_test.c +++ b/tools/testing/selftests/kvm/x86/xss_msr_test.c @@ -17,7 +17,7 @@ int main(int argc, char *argv[]) bool xss_in_msr_list; struct kvm_vm *vm; struct kvm_vcpu *vcpu; - uint64_t xss_val; + u64 xss_val; int i, r; /* Create VM */ |
