summaryrefslogtreecommitdiff
path: root/tools/testing/selftests/kvm
diff options
context:
space:
mode:
Diffstat (limited to 'tools/testing/selftests/kvm')
-rw-r--r--tools/testing/selftests/kvm/Makefile.kvm46
-rw-r--r--tools/testing/selftests/kvm/access_tracking_perf_test.c2
-rw-r--r--tools/testing/selftests/kvm/arm64/get-reg-list.c1
-rw-r--r--tools/testing/selftests/kvm/arm64/hypercalls.c46
-rw-r--r--tools/testing/selftests/kvm/arm64/page_fault_test.c2
-rw-r--r--tools/testing/selftests/kvm/arm64/set_id_regs.c40
-rw-r--r--tools/testing/selftests/kvm/dirty_log_test.c523
-rw-r--r--tools/testing/selftests/kvm/include/arm64/processor.h67
-rw-r--r--tools/testing/selftests/kvm/include/kvm_util.h33
-rw-r--r--tools/testing/selftests/kvm/include/test_util.h2
-rw-r--r--tools/testing/selftests/kvm/include/x86/processor.h50
-rw-r--r--tools/testing/selftests/kvm/kvm_create_max_vcpus.c28
-rw-r--r--tools/testing/selftests/kvm/lib/arm64/processor.c60
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util.c119
-rw-r--r--tools/testing/selftests/kvm/lib/userfaultfd_util.c2
-rw-r--r--tools/testing/selftests/kvm/riscv/get-reg-list.c8
-rw-r--r--tools/testing/selftests/kvm/riscv/sbi_pmu_test.c81
-rw-r--r--tools/testing/selftests/kvm/rseq_test.c31
-rw-r--r--tools/testing/selftests/kvm/x86/dirty_log_page_splitting_test.c6
-rw-r--r--tools/testing/selftests/kvm/x86/hyperv_ipi.c6
-rw-r--r--tools/testing/selftests/kvm/x86/monitor_mwait_test.c108
-rw-r--r--tools/testing/selftests/kvm/x86/nested_emulation_test.c146
-rw-r--r--tools/testing/selftests/kvm/x86/nx_huge_pages_test.c4
-rw-r--r--tools/testing/selftests/kvm/x86/pmu_counters_test.c158
-rw-r--r--tools/testing/selftests/kvm/x86/svm_int_ctl_test.c5
-rw-r--r--tools/testing/selftests/kvm/x86/ucna_injection_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86/xapic_ipi_test.c16
-rw-r--r--tools/testing/selftests/kvm/x86/xapic_state_test.c4
-rw-r--r--tools/testing/selftests/kvm/x86/xen_shinfo_test.c5
29 files changed, 997 insertions, 604 deletions
diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm
index 4277b983cace..f62b0a5aba35 100644
--- a/tools/testing/selftests/kvm/Makefile.kvm
+++ b/tools/testing/selftests/kvm/Makefile.kvm
@@ -50,8 +50,18 @@ LIBKVM_riscv += lib/riscv/ucall.c
# Non-compiled test targets
TEST_PROGS_x86 += x86/nx_huge_pages_test.sh
+# Compiled test targets valid on all architectures with libkvm support
+TEST_GEN_PROGS_COMMON = demand_paging_test
+TEST_GEN_PROGS_COMMON += dirty_log_test
+TEST_GEN_PROGS_COMMON += guest_print_test
+TEST_GEN_PROGS_COMMON += kvm_binary_stats_test
+TEST_GEN_PROGS_COMMON += kvm_create_max_vcpus
+TEST_GEN_PROGS_COMMON += kvm_page_table_test
+TEST_GEN_PROGS_COMMON += set_memory_region_test
+
# Compiled test targets
-TEST_GEN_PROGS_x86 = x86/cpuid_test
+TEST_GEN_PROGS_x86 = $(TEST_GEN_PROGS_COMMON)
+TEST_GEN_PROGS_x86 += x86/cpuid_test
TEST_GEN_PROGS_x86 += x86/cr4_cpuid_sync_test
TEST_GEN_PROGS_x86 += x86/dirty_log_page_splitting_test
TEST_GEN_PROGS_x86 += x86/feature_msrs_test
@@ -69,6 +79,7 @@ TEST_GEN_PROGS_x86 += x86/hyperv_tlb_flush
TEST_GEN_PROGS_x86 += x86/kvm_clock_test
TEST_GEN_PROGS_x86 += x86/kvm_pv_test
TEST_GEN_PROGS_x86 += x86/monitor_mwait_test
+TEST_GEN_PROGS_x86 += x86/nested_emulation_test
TEST_GEN_PROGS_x86 += x86/nested_exceptions_test
TEST_GEN_PROGS_x86 += x86/platform_info_test
TEST_GEN_PROGS_x86 += x86/pmu_counters_test
@@ -118,27 +129,21 @@ TEST_GEN_PROGS_x86 += x86/triple_fault_event_test
TEST_GEN_PROGS_x86 += x86/recalc_apic_map_test
TEST_GEN_PROGS_x86 += access_tracking_perf_test
TEST_GEN_PROGS_x86 += coalesced_io_test
-TEST_GEN_PROGS_x86 += demand_paging_test
-TEST_GEN_PROGS_x86 += dirty_log_test
TEST_GEN_PROGS_x86 += dirty_log_perf_test
TEST_GEN_PROGS_x86 += guest_memfd_test
-TEST_GEN_PROGS_x86 += guest_print_test
TEST_GEN_PROGS_x86 += hardware_disable_test
-TEST_GEN_PROGS_x86 += kvm_create_max_vcpus
-TEST_GEN_PROGS_x86 += kvm_page_table_test
TEST_GEN_PROGS_x86 += memslot_modification_stress_test
TEST_GEN_PROGS_x86 += memslot_perf_test
TEST_GEN_PROGS_x86 += mmu_stress_test
TEST_GEN_PROGS_x86 += rseq_test
-TEST_GEN_PROGS_x86 += set_memory_region_test
TEST_GEN_PROGS_x86 += steal_time
-TEST_GEN_PROGS_x86 += kvm_binary_stats_test
TEST_GEN_PROGS_x86 += system_counter_offset_test
TEST_GEN_PROGS_x86 += pre_fault_memory_test
# Compiled outputs used by test targets
TEST_GEN_PROGS_EXTENDED_x86 += x86/nx_huge_pages_test
+TEST_GEN_PROGS_arm64 = $(TEST_GEN_PROGS_COMMON)
TEST_GEN_PROGS_arm64 += arm64/aarch32_id_regs
TEST_GEN_PROGS_arm64 += arm64/arch_timer_edge_cases
TEST_GEN_PROGS_arm64 += arm64/debug-exceptions
@@ -157,22 +162,16 @@ TEST_GEN_PROGS_arm64 += arm64/no-vgic-v3
TEST_GEN_PROGS_arm64 += access_tracking_perf_test
TEST_GEN_PROGS_arm64 += arch_timer
TEST_GEN_PROGS_arm64 += coalesced_io_test
-TEST_GEN_PROGS_arm64 += demand_paging_test
-TEST_GEN_PROGS_arm64 += dirty_log_test
TEST_GEN_PROGS_arm64 += dirty_log_perf_test
-TEST_GEN_PROGS_arm64 += guest_print_test
TEST_GEN_PROGS_arm64 += get-reg-list
-TEST_GEN_PROGS_arm64 += kvm_create_max_vcpus
-TEST_GEN_PROGS_arm64 += kvm_page_table_test
TEST_GEN_PROGS_arm64 += memslot_modification_stress_test
TEST_GEN_PROGS_arm64 += memslot_perf_test
TEST_GEN_PROGS_arm64 += mmu_stress_test
TEST_GEN_PROGS_arm64 += rseq_test
-TEST_GEN_PROGS_arm64 += set_memory_region_test
TEST_GEN_PROGS_arm64 += steal_time
-TEST_GEN_PROGS_arm64 += kvm_binary_stats_test
-TEST_GEN_PROGS_s390 = s390/memop
+TEST_GEN_PROGS_s390 = $(TEST_GEN_PROGS_COMMON)
+TEST_GEN_PROGS_s390 += s390/memop
TEST_GEN_PROGS_s390 += s390/resets
TEST_GEN_PROGS_s390 += s390/sync_regs_test
TEST_GEN_PROGS_s390 += s390/tprot
@@ -181,27 +180,14 @@ TEST_GEN_PROGS_s390 += s390/debug_test
TEST_GEN_PROGS_s390 += s390/cpumodel_subfuncs_test
TEST_GEN_PROGS_s390 += s390/shared_zeropage_test
TEST_GEN_PROGS_s390 += s390/ucontrol_test
-TEST_GEN_PROGS_s390 += demand_paging_test
-TEST_GEN_PROGS_s390 += dirty_log_test
-TEST_GEN_PROGS_s390 += guest_print_test
-TEST_GEN_PROGS_s390 += kvm_create_max_vcpus
-TEST_GEN_PROGS_s390 += kvm_page_table_test
TEST_GEN_PROGS_s390 += rseq_test
-TEST_GEN_PROGS_s390 += set_memory_region_test
-TEST_GEN_PROGS_s390 += kvm_binary_stats_test
+TEST_GEN_PROGS_riscv = $(TEST_GEN_PROGS_COMMON)
TEST_GEN_PROGS_riscv += riscv/sbi_pmu_test
TEST_GEN_PROGS_riscv += riscv/ebreak_test
TEST_GEN_PROGS_riscv += arch_timer
TEST_GEN_PROGS_riscv += coalesced_io_test
-TEST_GEN_PROGS_riscv += demand_paging_test
-TEST_GEN_PROGS_riscv += dirty_log_test
TEST_GEN_PROGS_riscv += get-reg-list
-TEST_GEN_PROGS_riscv += guest_print_test
-TEST_GEN_PROGS_riscv += kvm_binary_stats_test
-TEST_GEN_PROGS_riscv += kvm_create_max_vcpus
-TEST_GEN_PROGS_riscv += kvm_page_table_test
-TEST_GEN_PROGS_riscv += set_memory_region_test
TEST_GEN_PROGS_riscv += steal_time
SPLIT_TESTS += arch_timer
diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c
index 3c7defd34f56..447e619cf856 100644
--- a/tools/testing/selftests/kvm/access_tracking_perf_test.c
+++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c
@@ -239,7 +239,7 @@ static void vcpu_thread_main(struct memstress_vcpu_args *vcpu_args)
case ITERATION_MARK_IDLE:
mark_vcpu_memory_idle(vm, vcpu_args);
break;
- };
+ }
vcpu_last_completed_iteration[vcpu_idx] = current_iteration;
}
diff --git a/tools/testing/selftests/kvm/arm64/get-reg-list.c b/tools/testing/selftests/kvm/arm64/get-reg-list.c
index d43fb3f49050..d01798b6b3b4 100644
--- a/tools/testing/selftests/kvm/arm64/get-reg-list.c
+++ b/tools/testing/selftests/kvm/arm64/get-reg-list.c
@@ -332,6 +332,7 @@ static __u64 base_regs[] = {
KVM_REG_ARM_FW_FEAT_BMAP_REG(0), /* KVM_REG_ARM_STD_BMAP */
KVM_REG_ARM_FW_FEAT_BMAP_REG(1), /* KVM_REG_ARM_STD_HYP_BMAP */
KVM_REG_ARM_FW_FEAT_BMAP_REG(2), /* KVM_REG_ARM_VENDOR_HYP_BMAP */
+ KVM_REG_ARM_FW_FEAT_BMAP_REG(3), /* KVM_REG_ARM_VENDOR_HYP_BMAP_2 */
ARM64_SYS_REG(3, 3, 14, 3, 1), /* CNTV_CTL_EL0 */
ARM64_SYS_REG(3, 3, 14, 3, 2), /* CNTV_CVAL_EL0 */
ARM64_SYS_REG(3, 3, 14, 0, 2),
diff --git a/tools/testing/selftests/kvm/arm64/hypercalls.c b/tools/testing/selftests/kvm/arm64/hypercalls.c
index ec54ec7726e9..44cfcf8a7f46 100644
--- a/tools/testing/selftests/kvm/arm64/hypercalls.c
+++ b/tools/testing/selftests/kvm/arm64/hypercalls.c
@@ -21,22 +21,31 @@
#define KVM_REG_ARM_STD_BMAP_BIT_MAX 0
#define KVM_REG_ARM_STD_HYP_BMAP_BIT_MAX 0
#define KVM_REG_ARM_VENDOR_HYP_BMAP_BIT_MAX 1
+#define KVM_REG_ARM_VENDOR_HYP_BMAP_2_BIT_MAX 1
+
+#define KVM_REG_ARM_STD_BMAP_RESET_VAL FW_REG_ULIMIT_VAL(KVM_REG_ARM_STD_BMAP_BIT_MAX)
+#define KVM_REG_ARM_STD_HYP_BMAP_RESET_VAL FW_REG_ULIMIT_VAL(KVM_REG_ARM_STD_HYP_BMAP_BIT_MAX)
+#define KVM_REG_ARM_VENDOR_HYP_BMAP_RESET_VAL FW_REG_ULIMIT_VAL(KVM_REG_ARM_VENDOR_HYP_BMAP_BIT_MAX)
+#define KVM_REG_ARM_VENDOR_HYP_BMAP_2_RESET_VAL 0
struct kvm_fw_reg_info {
uint64_t reg; /* Register definition */
uint64_t max_feat_bit; /* Bit that represents the upper limit of the feature-map */
+ uint64_t reset_val; /* Reset value for the register */
};
#define FW_REG_INFO(r) \
{ \
.reg = r, \
.max_feat_bit = r##_BIT_MAX, \
+ .reset_val = r##_RESET_VAL \
}
static const struct kvm_fw_reg_info fw_reg_info[] = {
FW_REG_INFO(KVM_REG_ARM_STD_BMAP),
FW_REG_INFO(KVM_REG_ARM_STD_HYP_BMAP),
FW_REG_INFO(KVM_REG_ARM_VENDOR_HYP_BMAP),
+ FW_REG_INFO(KVM_REG_ARM_VENDOR_HYP_BMAP_2),
};
enum test_stage {
@@ -171,22 +180,39 @@ static void test_fw_regs_before_vm_start(struct kvm_vcpu *vcpu)
for (i = 0; i < ARRAY_SIZE(fw_reg_info); i++) {
const struct kvm_fw_reg_info *reg_info = &fw_reg_info[i];
+ uint64_t set_val;
- /* First 'read' should be an upper limit of the features supported */
+ /* First 'read' should be the reset value for the reg */
val = vcpu_get_reg(vcpu, reg_info->reg);
- TEST_ASSERT(val == FW_REG_ULIMIT_VAL(reg_info->max_feat_bit),
- "Expected all the features to be set for reg: 0x%lx; expected: 0x%lx; read: 0x%lx",
- reg_info->reg, FW_REG_ULIMIT_VAL(reg_info->max_feat_bit), val);
+ TEST_ASSERT(val == reg_info->reset_val,
+ "Unexpected reset value for reg: 0x%lx; expected: 0x%lx; read: 0x%lx",
+ reg_info->reg, reg_info->reset_val, val);
+
+ if (reg_info->reset_val)
+ set_val = 0;
+ else
+ set_val = FW_REG_ULIMIT_VAL(reg_info->max_feat_bit);
- /* Test a 'write' by disabling all the features of the register map */
- ret = __vcpu_set_reg(vcpu, reg_info->reg, 0);
+ ret = __vcpu_set_reg(vcpu, reg_info->reg, set_val);
TEST_ASSERT(ret == 0,
- "Failed to clear all the features of reg: 0x%lx; ret: %d",
- reg_info->reg, errno);
+ "Failed to %s all the features of reg: 0x%lx; ret: %d",
+ (set_val ? "set" : "clear"), reg_info->reg, errno);
val = vcpu_get_reg(vcpu, reg_info->reg);
- TEST_ASSERT(val == 0,
- "Expected all the features to be cleared for reg: 0x%lx", reg_info->reg);
+ TEST_ASSERT(val == set_val,
+ "Expected all the features to be %s for reg: 0x%lx",
+ (set_val ? "set" : "cleared"), reg_info->reg);
+
+ /*
+ * If the reg has been set, clear it as test_fw_regs_after_vm_start()
+ * expects it to be cleared.
+ */
+ if (set_val) {
+ ret = __vcpu_set_reg(vcpu, reg_info->reg, 0);
+ TEST_ASSERT(ret == 0,
+ "Failed to clear all the features of reg: 0x%lx; ret: %d",
+ reg_info->reg, errno);
+ }
/*
* Test enabling a feature that's not supported.
diff --git a/tools/testing/selftests/kvm/arm64/page_fault_test.c b/tools/testing/selftests/kvm/arm64/page_fault_test.c
index ec33a8f9c908..dc6559dad9d8 100644
--- a/tools/testing/selftests/kvm/arm64/page_fault_test.c
+++ b/tools/testing/selftests/kvm/arm64/page_fault_test.c
@@ -199,7 +199,7 @@ static bool guest_set_ha(void)
if (hadbs == 0)
return false;
- tcr = read_sysreg(tcr_el1) | TCR_EL1_HA;
+ tcr = read_sysreg(tcr_el1) | TCR_HA;
write_sysreg(tcr, tcr_el1);
isb();
diff --git a/tools/testing/selftests/kvm/arm64/set_id_regs.c b/tools/testing/selftests/kvm/arm64/set_id_regs.c
index 217541fe6536..322b9d3b0125 100644
--- a/tools/testing/selftests/kvm/arm64/set_id_regs.c
+++ b/tools/testing/selftests/kvm/arm64/set_id_regs.c
@@ -146,6 +146,9 @@ static const struct reg_ftr_bits ftr_id_aa64pfr1_el1[] = {
static const struct reg_ftr_bits ftr_id_aa64mmfr0_el1[] = {
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, ECV, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, EXS, 0),
+ REG_FTR_BITS(FTR_EXACT, ID_AA64MMFR0_EL1, TGRAN4_2, 1),
+ REG_FTR_BITS(FTR_EXACT, ID_AA64MMFR0_EL1, TGRAN64_2, 1),
+ REG_FTR_BITS(FTR_EXACT, ID_AA64MMFR0_EL1, TGRAN16_2, 1),
S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, TGRAN4, 0),
S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, TGRAN64, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, TGRAN16, 0),
@@ -230,6 +233,9 @@ static void guest_code(void)
GUEST_REG_SYNC(SYS_ID_AA64MMFR2_EL1);
GUEST_REG_SYNC(SYS_ID_AA64ZFR0_EL1);
GUEST_REG_SYNC(SYS_CTR_EL0);
+ GUEST_REG_SYNC(SYS_MIDR_EL1);
+ GUEST_REG_SYNC(SYS_REVIDR_EL1);
+ GUEST_REG_SYNC(SYS_AIDR_EL1);
GUEST_DONE();
}
@@ -609,18 +615,31 @@ static void test_ctr(struct kvm_vcpu *vcpu)
test_reg_vals[encoding_to_range_idx(SYS_CTR_EL0)] = ctr;
}
-static void test_vcpu_ftr_id_regs(struct kvm_vcpu *vcpu)
+static void test_id_reg(struct kvm_vcpu *vcpu, u32 id)
{
u64 val;
+ val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(id));
+ val++;
+ vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(id), val);
+ test_reg_vals[encoding_to_range_idx(id)] = val;
+}
+
+static void test_vcpu_ftr_id_regs(struct kvm_vcpu *vcpu)
+{
test_clidr(vcpu);
test_ctr(vcpu);
- val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1));
- val++;
- vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1), val);
+ test_id_reg(vcpu, SYS_MPIDR_EL1);
+ ksft_test_result_pass("%s\n", __func__);
+}
+
+static void test_vcpu_non_ftr_id_regs(struct kvm_vcpu *vcpu)
+{
+ test_id_reg(vcpu, SYS_MIDR_EL1);
+ test_id_reg(vcpu, SYS_REVIDR_EL1);
+ test_id_reg(vcpu, SYS_AIDR_EL1);
- test_reg_vals[encoding_to_range_idx(SYS_MPIDR_EL1)] = val;
ksft_test_result_pass("%s\n", __func__);
}
@@ -647,6 +666,9 @@ static void test_reset_preserves_id_regs(struct kvm_vcpu *vcpu)
test_assert_id_reg_unchanged(vcpu, SYS_MPIDR_EL1);
test_assert_id_reg_unchanged(vcpu, SYS_CLIDR_EL1);
test_assert_id_reg_unchanged(vcpu, SYS_CTR_EL0);
+ test_assert_id_reg_unchanged(vcpu, SYS_MIDR_EL1);
+ test_assert_id_reg_unchanged(vcpu, SYS_REVIDR_EL1);
+ test_assert_id_reg_unchanged(vcpu, SYS_AIDR_EL1);
ksft_test_result_pass("%s\n", __func__);
}
@@ -660,8 +682,11 @@ int main(void)
int test_cnt;
TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES));
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_WRITABLE_IMP_ID_REGS));
- vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ vm = vm_create(1);
+ vm_enable_cap(vm, KVM_CAP_ARM_WRITABLE_IMP_ID_REGS, 0);
+ vcpu = vm_vcpu_add(vm, 0, guest_code);
/* Check for AARCH64 only system */
val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1));
@@ -675,13 +700,14 @@ int main(void)
ARRAY_SIZE(ftr_id_aa64isar2_el1) + ARRAY_SIZE(ftr_id_aa64pfr0_el1) +
ARRAY_SIZE(ftr_id_aa64pfr1_el1) + ARRAY_SIZE(ftr_id_aa64mmfr0_el1) +
ARRAY_SIZE(ftr_id_aa64mmfr1_el1) + ARRAY_SIZE(ftr_id_aa64mmfr2_el1) +
- ARRAY_SIZE(ftr_id_aa64zfr0_el1) - ARRAY_SIZE(test_regs) + 2 +
+ ARRAY_SIZE(ftr_id_aa64zfr0_el1) - ARRAY_SIZE(test_regs) + 3 +
MPAM_IDREG_TEST;
ksft_set_plan(test_cnt);
test_vm_ftr_id_regs(vcpu, aarch64_only);
test_vcpu_ftr_id_regs(vcpu);
+ test_vcpu_non_ftr_id_regs(vcpu);
test_user_set_mpam_reg(vcpu);
test_guest_reg_read(vcpu);
diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c
index aacf80f57439..23593d9eeba9 100644
--- a/tools/testing/selftests/kvm/dirty_log_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_test.c
@@ -31,15 +31,18 @@
/* Default guest test virtual memory offset */
#define DEFAULT_GUEST_TEST_MEM 0xc0000000
-/* How many pages to dirty for each guest loop */
-#define TEST_PAGES_PER_LOOP 1024
-
/* How many host loops to run (one KVM_GET_DIRTY_LOG for each loop) */
#define TEST_HOST_LOOP_N 32UL
/* Interval for each host loop (ms) */
#define TEST_HOST_LOOP_INTERVAL 10UL
+/*
+ * Ensure the vCPU is able to perform a reasonable number of writes in each
+ * iteration to provide a lower bound on coverage.
+ */
+#define TEST_MIN_WRITES_PER_ITERATION 0x100
+
/* Dirty bitmaps are always little endian, so we need to swap on big endian */
#if defined(__s390x__)
# define BITOP_LE_SWIZZLE ((BITS_PER_LONG-1) & ~0x7)
@@ -75,6 +78,8 @@ static uint64_t host_page_size;
static uint64_t guest_page_size;
static uint64_t guest_num_pages;
static uint64_t iteration;
+static uint64_t nr_writes;
+static bool vcpu_stop;
/*
* Guest physical memory offset of the testing memory slot.
@@ -96,7 +101,9 @@ static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM;
static void guest_code(void)
{
uint64_t addr;
- int i;
+
+#ifdef __s390x__
+ uint64_t i;
/*
* On s390x, all pages of a 1M segment are initially marked as dirty
@@ -107,16 +114,19 @@ static void guest_code(void)
for (i = 0; i < guest_num_pages; i++) {
addr = guest_test_virt_mem + i * guest_page_size;
vcpu_arch_put_guest(*(uint64_t *)addr, READ_ONCE(iteration));
+ nr_writes++;
}
+#endif
while (true) {
- for (i = 0; i < TEST_PAGES_PER_LOOP; i++) {
+ while (!READ_ONCE(vcpu_stop)) {
addr = guest_test_virt_mem;
addr += (guest_random_u64(&guest_rng) % guest_num_pages)
* guest_page_size;
addr = align_down(addr, host_page_size);
vcpu_arch_put_guest(*(uint64_t *)addr, READ_ONCE(iteration));
+ nr_writes++;
}
GUEST_SYNC(1);
@@ -133,25 +143,18 @@ static uint64_t host_num_pages;
/* For statistics only */
static uint64_t host_dirty_count;
static uint64_t host_clear_count;
-static uint64_t host_track_next_count;
/* Whether dirty ring reset is requested, or finished */
static sem_t sem_vcpu_stop;
static sem_t sem_vcpu_cont;
-/*
- * This is only set by main thread, and only cleared by vcpu thread. It is
- * used to request vcpu thread to stop at the next GUEST_SYNC, since GUEST_SYNC
- * is the only place that we'll guarantee both "dirty bit" and "dirty data"
- * will match. E.g., SIG_IPI won't guarantee that if the vcpu is interrupted
- * after setting dirty bit but before the data is written.
- */
-static atomic_t vcpu_sync_stop_requested;
+
/*
* This is updated by the vcpu thread to tell the host whether it's a
* ring-full event. It should only be read until a sem_wait() of
* sem_vcpu_stop and before vcpu continues to run.
*/
static bool dirty_ring_vcpu_ring_full;
+
/*
* This is only used for verifying the dirty pages. Dirty ring has a very
* tricky case when the ring just got full, kvm will do userspace exit due to
@@ -166,7 +169,51 @@ static bool dirty_ring_vcpu_ring_full;
* dirty gfn we've collected, so that if a mismatch of data found later in the
* verifying process, we let it pass.
*/
-static uint64_t dirty_ring_last_page;
+static uint64_t dirty_ring_last_page = -1ULL;
+
+/*
+ * In addition to the above, it is possible (especially if this
+ * test is run nested) for the above scenario to repeat multiple times:
+ *
+ * The following can happen:
+ *
+ * - L1 vCPU: Memory write is logged to PML but not committed.
+ *
+ * - L1 test thread: Ignores the write because its last dirty ring entry
+ * Resets the dirty ring which:
+ * - Resets the A/D bits in EPT
+ * - Issues tlb flush (invept), which is intercepted by L0
+ *
+ * - L0: frees the whole nested ept mmu root as the response to invept,
+ * and thus ensures that when memory write is retried, it will fault again
+ *
+ * - L1 vCPU: Same memory write is logged to the PML but not committed again.
+ *
+ * - L1 test thread: Ignores the write because its last dirty ring entry (again)
+ * Resets the dirty ring which:
+ * - Resets the A/D bits in EPT (again)
+ * - Issues tlb flush (again) which is intercepted by L0
+ *
+ * ...
+ *
+ * N times
+ *
+ * - L1 vCPU: Memory write is logged in the PML and then committed.
+ * Lots of other memory writes are logged and committed.
+ * ...
+ *
+ * - L1 test thread: Sees the memory write along with other memory writes
+ * in the dirty ring, and since the write is usually not
+ * the last entry in the dirty-ring and has a very outdated
+ * iteration, the test fails.
+ *
+ *
+ * Note that this is only possible when the write was the last log entry
+ * write during iteration N-1, thus remember last iteration last log entry
+ * and also don't fail when it is reported in the next iteration, together with
+ * an outdated iteration count.
+ */
+static uint64_t dirty_ring_prev_iteration_last_page;
enum log_mode_t {
/* Only use KVM_GET_DIRTY_LOG for logging */
@@ -191,24 +238,6 @@ static enum log_mode_t host_log_mode;
static pthread_t vcpu_thread;
static uint32_t test_dirty_ring_count = TEST_DIRTY_RING_COUNT;
-static void vcpu_kick(void)
-{
- pthread_kill(vcpu_thread, SIG_IPI);
-}
-
-/*
- * In our test we do signal tricks, let's use a better version of
- * sem_wait to avoid signal interrupts
- */
-static void sem_wait_until(sem_t *sem)
-{
- int ret;
-
- do
- ret = sem_wait(sem);
- while (ret == -1 && errno == EINTR);
-}
-
static bool clear_log_supported(void)
{
return kvm_has_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2);
@@ -243,21 +272,16 @@ static void clear_log_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot,
/* Should only be called after a GUEST_SYNC */
static void vcpu_handle_sync_stop(void)
{
- if (atomic_read(&vcpu_sync_stop_requested)) {
- /* It means main thread is sleeping waiting */
- atomic_set(&vcpu_sync_stop_requested, false);
+ if (READ_ONCE(vcpu_stop)) {
sem_post(&sem_vcpu_stop);
- sem_wait_until(&sem_vcpu_cont);
+ sem_wait(&sem_vcpu_cont);
}
}
-static void default_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err)
+static void default_after_vcpu_run(struct kvm_vcpu *vcpu)
{
struct kvm_run *run = vcpu->run;
- TEST_ASSERT(ret == 0 || (ret == -1 && err == EINTR),
- "vcpu run failed: errno=%d", err);
-
TEST_ASSERT(get_ucall(vcpu, NULL) == UCALL_SYNC,
"Invalid guest sync status: exit_reason=%s",
exit_reason_str(run->exit_reason));
@@ -324,7 +348,6 @@ static uint32_t dirty_ring_collect_one(struct kvm_dirty_gfn *dirty_gfns,
"%u != %u", cur->slot, slot);
TEST_ASSERT(cur->offset < num_pages, "Offset overflow: "
"0x%llx >= 0x%x", cur->offset, num_pages);
- //pr_info("fetch 0x%x page %llu\n", *fetch_index, cur->offset);
__set_bit_le(cur->offset, bitmap);
dirty_ring_last_page = cur->offset;
dirty_gfn_set_collected(cur);
@@ -335,36 +358,11 @@ static uint32_t dirty_ring_collect_one(struct kvm_dirty_gfn *dirty_gfns,
return count;
}
-static void dirty_ring_wait_vcpu(void)
-{
- /* This makes sure that hardware PML cache flushed */
- vcpu_kick();
- sem_wait_until(&sem_vcpu_stop);
-}
-
-static void dirty_ring_continue_vcpu(void)
-{
- pr_info("Notifying vcpu to continue\n");
- sem_post(&sem_vcpu_cont);
-}
-
static void dirty_ring_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot,
void *bitmap, uint32_t num_pages,
uint32_t *ring_buf_idx)
{
- uint32_t count = 0, cleared;
- bool continued_vcpu = false;
-
- dirty_ring_wait_vcpu();
-
- if (!dirty_ring_vcpu_ring_full) {
- /*
- * This is not a ring-full event, it's safe to allow
- * vcpu to continue
- */
- dirty_ring_continue_vcpu();
- continued_vcpu = true;
- }
+ uint32_t count, cleared;
/* Only have one vcpu */
count = dirty_ring_collect_one(vcpu_map_dirty_ring(vcpu),
@@ -379,35 +377,18 @@ static void dirty_ring_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot,
*/
TEST_ASSERT(cleared == count, "Reset dirty pages (%u) mismatch "
"with collected (%u)", cleared, count);
-
- if (!continued_vcpu) {
- TEST_ASSERT(dirty_ring_vcpu_ring_full,
- "Didn't continue vcpu even without ring full");
- dirty_ring_continue_vcpu();
- }
-
- pr_info("Iteration %ld collected %u pages\n", iteration, count);
}
-static void dirty_ring_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err)
+static void dirty_ring_after_vcpu_run(struct kvm_vcpu *vcpu)
{
struct kvm_run *run = vcpu->run;
/* A ucall-sync or ring-full event is allowed */
if (get_ucall(vcpu, NULL) == UCALL_SYNC) {
- /* We should allow this to continue */
- ;
- } else if (run->exit_reason == KVM_EXIT_DIRTY_RING_FULL ||
- (ret == -1 && err == EINTR)) {
- /* Update the flag first before pause */
- WRITE_ONCE(dirty_ring_vcpu_ring_full,
- run->exit_reason == KVM_EXIT_DIRTY_RING_FULL);
- sem_post(&sem_vcpu_stop);
- pr_info("vcpu stops because %s...\n",
- dirty_ring_vcpu_ring_full ?
- "dirty ring is full" : "vcpu is kicked out");
- sem_wait_until(&sem_vcpu_cont);
- pr_info("vcpu continues now.\n");
+ vcpu_handle_sync_stop();
+ } else if (run->exit_reason == KVM_EXIT_DIRTY_RING_FULL) {
+ WRITE_ONCE(dirty_ring_vcpu_ring_full, true);
+ vcpu_handle_sync_stop();
} else {
TEST_ASSERT(false, "Invalid guest sync status: "
"exit_reason=%s",
@@ -426,7 +407,7 @@ struct log_mode {
void *bitmap, uint32_t num_pages,
uint32_t *ring_buf_idx);
/* Hook to call when after each vcpu run */
- void (*after_vcpu_run)(struct kvm_vcpu *vcpu, int ret, int err);
+ void (*after_vcpu_run)(struct kvm_vcpu *vcpu);
} log_modes[LOG_MODE_NUM] = {
{
.name = "dirty-log",
@@ -449,15 +430,6 @@ struct log_mode {
},
};
-/*
- * We use this bitmap to track some pages that should have its dirty
- * bit set in the _next_ iteration. For example, if we detected the
- * page value changed to current iteration but at the same time the
- * page bit is cleared in the latest bitmap, then the system must
- * report that write in the next get dirty log call.
- */
-static unsigned long *host_bmap_track;
-
static void log_modes_dump(void)
{
int i;
@@ -497,170 +469,109 @@ static void log_mode_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot,
mode->collect_dirty_pages(vcpu, slot, bitmap, num_pages, ring_buf_idx);
}
-static void log_mode_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err)
+static void log_mode_after_vcpu_run(struct kvm_vcpu *vcpu)
{
struct log_mode *mode = &log_modes[host_log_mode];
if (mode->after_vcpu_run)
- mode->after_vcpu_run(vcpu, ret, err);
+ mode->after_vcpu_run(vcpu);
}
static void *vcpu_worker(void *data)
{
- int ret;
struct kvm_vcpu *vcpu = data;
- uint64_t pages_count = 0;
- struct kvm_signal_mask *sigmask = alloca(offsetof(struct kvm_signal_mask, sigset)
- + sizeof(sigset_t));
- sigset_t *sigset = (sigset_t *) &sigmask->sigset;
- /*
- * SIG_IPI is unblocked atomically while in KVM_RUN. It causes the
- * ioctl to return with -EINTR, but it is still pending and we need
- * to accept it with the sigwait.
- */
- sigmask->len = 8;
- pthread_sigmask(0, NULL, sigset);
- sigdelset(sigset, SIG_IPI);
- vcpu_ioctl(vcpu, KVM_SET_SIGNAL_MASK, sigmask);
-
- sigemptyset(sigset);
- sigaddset(sigset, SIG_IPI);
+ sem_wait(&sem_vcpu_cont);
while (!READ_ONCE(host_quit)) {
- /* Clear any existing kick signals */
- pages_count += TEST_PAGES_PER_LOOP;
/* Let the guest dirty the random pages */
- ret = __vcpu_run(vcpu);
- if (ret == -1 && errno == EINTR) {
- int sig = -1;
- sigwait(sigset, &sig);
- assert(sig == SIG_IPI);
- }
- log_mode_after_vcpu_run(vcpu, ret, errno);
+ vcpu_run(vcpu);
+ log_mode_after_vcpu_run(vcpu);
}
- pr_info("Dirtied %"PRIu64" pages\n", pages_count);
-
return NULL;
}
-static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long *bmap)
+static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long **bmap)
{
+ uint64_t page, nr_dirty_pages = 0, nr_clean_pages = 0;
uint64_t step = vm_num_host_pages(mode, 1);
- uint64_t page;
- uint64_t *value_ptr;
- uint64_t min_iter = 0;
for (page = 0; page < host_num_pages; page += step) {
- value_ptr = host_test_mem + page * host_page_size;
-
- /* If this is a special page that we were tracking... */
- if (__test_and_clear_bit_le(page, host_bmap_track)) {
- host_track_next_count++;
- TEST_ASSERT(test_bit_le(page, bmap),
- "Page %"PRIu64" should have its dirty bit "
- "set in this iteration but it is missing",
- page);
- }
+ uint64_t val = *(uint64_t *)(host_test_mem + page * host_page_size);
+ bool bmap0_dirty = __test_and_clear_bit_le(page, bmap[0]);
- if (__test_and_clear_bit_le(page, bmap)) {
- bool matched;
-
- host_dirty_count++;
+ /*
+ * Ensure both bitmaps are cleared, as a page can be written
+ * multiple times per iteration, i.e. can show up in both
+ * bitmaps, and the dirty ring is additive, i.e. doesn't purge
+ * bitmap entries from previous collections.
+ */
+ if (__test_and_clear_bit_le(page, bmap[1]) || bmap0_dirty) {
+ nr_dirty_pages++;
/*
- * If the bit is set, the value written onto
- * the corresponding page should be either the
- * previous iteration number or the current one.
+ * If the page is dirty, the value written to memory
+ * should be the current iteration number.
*/
- matched = (*value_ptr == iteration ||
- *value_ptr == iteration - 1);
-
- if (host_log_mode == LOG_MODE_DIRTY_RING && !matched) {
- if (*value_ptr == iteration - 2 && min_iter <= iteration - 2) {
- /*
- * Short answer: this case is special
- * only for dirty ring test where the
- * page is the last page before a kvm
- * dirty ring full in iteration N-2.
- *
- * Long answer: Assuming ring size R,
- * one possible condition is:
- *
- * main thr vcpu thr
- * -------- --------
- * iter=1
- * write 1 to page 0~(R-1)
- * full, vmexit
- * collect 0~(R-1)
- * kick vcpu
- * write 1 to (R-1)~(2R-2)
- * full, vmexit
- * iter=2
- * collect (R-1)~(2R-2)
- * kick vcpu
- * write 1 to (2R-2)
- * (NOTE!!! "1" cached in cpu reg)
- * write 2 to (2R-1)~(3R-3)
- * full, vmexit
- * iter=3
- * collect (2R-2)~(3R-3)
- * (here if we read value on page
- * "2R-2" is 1, while iter=3!!!)
- *
- * This however can only happen once per iteration.
- */
- min_iter = iteration - 1;
+ if (val == iteration)
+ continue;
+
+ if (host_log_mode == LOG_MODE_DIRTY_RING) {
+ /*
+ * The last page in the ring from previous
+ * iteration can be written with the value
+ * from the previous iteration, as the value to
+ * be written may be cached in a CPU register.
+ */
+ if (page == dirty_ring_prev_iteration_last_page &&
+ val == iteration - 1)
continue;
- } else if (page == dirty_ring_last_page) {
- /*
- * Please refer to comments in
- * dirty_ring_last_page.
- */
+
+ /*
+ * Any value from a previous iteration is legal
+ * for the last entry, as the write may not yet
+ * have retired, i.e. the page may hold whatever
+ * it had before this iteration started.
+ */
+ if (page == dirty_ring_last_page &&
+ val < iteration)
continue;
- }
+ } else if (!val && iteration == 1 && bmap0_dirty) {
+ /*
+ * When testing get+clear, the dirty bitmap
+ * starts with all bits set, and so the first
+ * iteration can observe a "dirty" page that
+ * was never written, but only in the first
+ * bitmap (collecting the bitmap also clears
+ * all dirty pages).
+ */
+ continue;
}
- TEST_ASSERT(matched,
- "Set page %"PRIu64" value %"PRIu64
- " incorrect (iteration=%"PRIu64")",
- page, *value_ptr, iteration);
+ TEST_FAIL("Dirty page %lu value (%lu) != iteration (%lu) "
+ "(last = %lu, prev_last = %lu)",
+ page, val, iteration, dirty_ring_last_page,
+ dirty_ring_prev_iteration_last_page);
} else {
- host_clear_count++;
+ nr_clean_pages++;
/*
* If cleared, the value written can be any
- * value smaller or equals to the iteration
- * number. Note that the value can be exactly
- * (iteration-1) if that write can happen
- * like this:
- *
- * (1) increase loop count to "iteration-1"
- * (2) write to page P happens (with value
- * "iteration-1")
- * (3) get dirty log for "iteration-1"; we'll
- * see that page P bit is set (dirtied),
- * and not set the bit in host_bmap_track
- * (4) increase loop count to "iteration"
- * (which is current iteration)
- * (5) get dirty log for current iteration,
- * we'll see that page P is cleared, with
- * value "iteration-1".
+ * value smaller than the iteration number.
*/
- TEST_ASSERT(*value_ptr <= iteration,
- "Clear page %"PRIu64" value %"PRIu64
- " incorrect (iteration=%"PRIu64")",
- page, *value_ptr, iteration);
- if (*value_ptr == iteration) {
- /*
- * This page is _just_ modified; it
- * should report its dirtyness in the
- * next run
- */
- __set_bit_le(page, host_bmap_track);
- }
+ TEST_ASSERT(val < iteration,
+ "Clear page %lu value (%lu) >= iteration (%lu) "
+ "(last = %lu, prev_last = %lu)",
+ page, val, iteration, dirty_ring_last_page,
+ dirty_ring_prev_iteration_last_page);
}
}
+
+ pr_info("Iteration %2ld: dirty: %-6lu clean: %-6lu writes: %-6lu\n",
+ iteration, nr_dirty_pages, nr_clean_pages, nr_writes);
+
+ host_dirty_count += nr_dirty_pages;
+ host_clear_count += nr_clean_pages;
}
static struct kvm_vm *create_vm(enum vm_guest_mode mode, struct kvm_vcpu **vcpu,
@@ -688,7 +599,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
struct test_params *p = arg;
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
- unsigned long *bmap;
+ unsigned long *bmap[2];
uint32_t ring_buf_idx = 0;
int sem_val;
@@ -731,12 +642,21 @@ static void run_test(enum vm_guest_mode mode, void *arg)
#ifdef __s390x__
/* Align to 1M (segment size) */
guest_test_phys_mem = align_down(guest_test_phys_mem, 1 << 20);
+
+ /*
+ * The workaround in guest_code() to write all pages prior to the first
+ * iteration isn't compatible with the dirty ring, as the dirty ring
+ * support relies on the vCPU to actually stop when vcpu_stop is set so
+ * that the vCPU doesn't hang waiting for the dirty ring to be emptied.
+ */
+ TEST_ASSERT(host_log_mode != LOG_MODE_DIRTY_RING,
+ "Test needs to be updated to support s390 dirty ring");
#endif
pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem);
- bmap = bitmap_zalloc(host_num_pages);
- host_bmap_track = bitmap_zalloc(host_num_pages);
+ bmap[0] = bitmap_zalloc(host_num_pages);
+ bmap[1] = bitmap_zalloc(host_num_pages);
/* Add an extra memory slot for testing dirty logging */
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
@@ -757,14 +677,9 @@ static void run_test(enum vm_guest_mode mode, void *arg)
sync_global_to_guest(vm, guest_test_virt_mem);
sync_global_to_guest(vm, guest_num_pages);
- /* Start the iterations */
- iteration = 1;
- sync_global_to_guest(vm, iteration);
- WRITE_ONCE(host_quit, false);
host_dirty_count = 0;
host_clear_count = 0;
- host_track_next_count = 0;
- WRITE_ONCE(dirty_ring_vcpu_ring_full, false);
+ WRITE_ONCE(host_quit, false);
/*
* Ensure the previous iteration didn't leave a dangling semaphore, i.e.
@@ -776,21 +691,95 @@ static void run_test(enum vm_guest_mode mode, void *arg)
sem_getvalue(&sem_vcpu_cont, &sem_val);
TEST_ASSERT_EQ(sem_val, 0);
+ TEST_ASSERT_EQ(vcpu_stop, false);
+
pthread_create(&vcpu_thread, NULL, vcpu_worker, vcpu);
- while (iteration < p->iterations) {
- /* Give the vcpu thread some time to dirty some pages */
- usleep(p->interval * 1000);
- log_mode_collect_dirty_pages(vcpu, TEST_MEM_SLOT_INDEX,
- bmap, host_num_pages,
- &ring_buf_idx);
+ for (iteration = 1; iteration <= p->iterations; iteration++) {
+ unsigned long i;
+
+ sync_global_to_guest(vm, iteration);
+
+ WRITE_ONCE(nr_writes, 0);
+ sync_global_to_guest(vm, nr_writes);
+
+ dirty_ring_prev_iteration_last_page = dirty_ring_last_page;
+ WRITE_ONCE(dirty_ring_vcpu_ring_full, false);
+
+ sem_post(&sem_vcpu_cont);
+
+ /*
+ * Let the vCPU run beyond the configured interval until it has
+ * performed the minimum number of writes. This verifies the
+ * guest is making forward progress, e.g. isn't stuck because
+ * of a KVM bug, and puts a firm floor on test coverage.
+ */
+ for (i = 0; i < p->interval || nr_writes < TEST_MIN_WRITES_PER_ITERATION; i++) {
+ /*
+ * Sleep in 1ms chunks to keep the interval math simple
+ * and so that the test doesn't run too far beyond the
+ * specified interval.
+ */
+ usleep(1000);
+
+ sync_global_from_guest(vm, nr_writes);
+
+ /*
+ * Reap dirty pages while the guest is running so that
+ * dirty ring full events are resolved, i.e. so that a
+ * larger interval doesn't always end up with a vCPU
+ * that's effectively blocked. Collecting while the
+ * guest is running also verifies KVM doesn't lose any
+ * state.
+ *
+ * For bitmap modes, KVM overwrites the entire bitmap,
+ * i.e. collecting the bitmaps is destructive. Collect
+ * the bitmap only on the first pass, otherwise this
+ * test would lose track of dirty pages.
+ */
+ if (i && host_log_mode != LOG_MODE_DIRTY_RING)
+ continue;
+
+ /*
+ * For the dirty ring, empty the ring on subsequent
+ * passes only if the ring was filled at least once,
+ * to verify KVM's handling of a full ring (emptying
+ * the ring on every pass would make it unlikely the
+ * vCPU would ever fill the fing).
+ */
+ if (i && !READ_ONCE(dirty_ring_vcpu_ring_full))
+ continue;
+
+ log_mode_collect_dirty_pages(vcpu, TEST_MEM_SLOT_INDEX,
+ bmap[0], host_num_pages,
+ &ring_buf_idx);
+ }
+
+ /*
+ * Stop the vCPU prior to collecting and verifying the dirty
+ * log. If the vCPU is allowed to run during collection, then
+ * pages that are written during this iteration may be missed,
+ * i.e. collected in the next iteration. And if the vCPU is
+ * writing memory during verification, pages that this thread
+ * sees as clean may be written with this iteration's value.
+ */
+ WRITE_ONCE(vcpu_stop, true);
+ sync_global_to_guest(vm, vcpu_stop);
+ sem_wait(&sem_vcpu_stop);
/*
- * See vcpu_sync_stop_requested definition for details on why
- * we need to stop vcpu when verify data.
+ * Clear vcpu_stop after the vCPU thread has acknowledge the
+ * stop request and is waiting, i.e. is definitely not running!
*/
- atomic_set(&vcpu_sync_stop_requested, true);
- sem_wait_until(&sem_vcpu_stop);
+ WRITE_ONCE(vcpu_stop, false);
+ sync_global_to_guest(vm, vcpu_stop);
+
+ /*
+ * Sync the number of writes performed before verification, the
+ * info will be printed along with the dirty/clean page counts.
+ */
+ sync_global_from_guest(vm, nr_writes);
+
/*
* NOTE: for dirty ring, it's possible that we didn't stop at
* GUEST_SYNC but instead we stopped because ring is full;
@@ -798,32 +787,22 @@ static void run_test(enum vm_guest_mode mode, void *arg)
* the flush of the last page, and since we handle the last
* page specially verification will succeed anyway.
*/
- assert(host_log_mode == LOG_MODE_DIRTY_RING ||
- atomic_read(&vcpu_sync_stop_requested) == false);
+ log_mode_collect_dirty_pages(vcpu, TEST_MEM_SLOT_INDEX,
+ bmap[1], host_num_pages,
+ &ring_buf_idx);
vm_dirty_log_verify(mode, bmap);
-
- /*
- * Set host_quit before sem_vcpu_cont in the final iteration to
- * ensure that the vCPU worker doesn't resume the guest. As
- * above, the dirty ring test may stop and wait even when not
- * explicitly request to do so, i.e. would hang waiting for a
- * "continue" if it's allowed to resume the guest.
- */
- if (++iteration == p->iterations)
- WRITE_ONCE(host_quit, true);
-
- sem_post(&sem_vcpu_cont);
- sync_global_to_guest(vm, iteration);
}
+ WRITE_ONCE(host_quit, true);
+ sem_post(&sem_vcpu_cont);
+
pthread_join(vcpu_thread, NULL);
- pr_info("Total bits checked: dirty (%"PRIu64"), clear (%"PRIu64"), "
- "track_next (%"PRIu64")\n", host_dirty_count, host_clear_count,
- host_track_next_count);
+ pr_info("Total bits checked: dirty (%lu), clear (%lu)\n",
+ host_dirty_count, host_clear_count);
- free(bmap);
- free(host_bmap_track);
+ free(bmap[0]);
+ free(bmap[1]);
kvm_vm_free(vm);
}
@@ -857,7 +836,6 @@ int main(int argc, char *argv[])
.interval = TEST_HOST_LOOP_INTERVAL,
};
int opt, i;
- sigset_t sigset;
sem_init(&sem_vcpu_stop, 0, 0);
sem_init(&sem_vcpu_cont, 0, 0);
@@ -908,19 +886,12 @@ int main(int argc, char *argv[])
}
}
- TEST_ASSERT(p.iterations > 2, "Iterations must be greater than two");
+ TEST_ASSERT(p.iterations > 0, "Iterations must be greater than zero");
TEST_ASSERT(p.interval > 0, "Interval must be greater than zero");
pr_info("Test iterations: %"PRIu64", interval: %"PRIu64" (ms)\n",
p.iterations, p.interval);
- srandom(time(0));
-
- /* Ensure that vCPU threads start with SIG_IPI blocked. */
- sigemptyset(&sigset);
- sigaddset(&sigset, SIG_IPI);
- pthread_sigmask(SIG_BLOCK, &sigset, NULL);
-
if (host_log_mode_option == LOG_MODE_ALL) {
/* Run each log mode */
for (i = 0; i < LOG_MODE_NUM; i++) {
diff --git a/tools/testing/selftests/kvm/include/arm64/processor.h b/tools/testing/selftests/kvm/include/arm64/processor.h
index 1e8d0d531fbd..b0fc0f945766 100644
--- a/tools/testing/selftests/kvm/include/arm64/processor.h
+++ b/tools/testing/selftests/kvm/include/arm64/processor.h
@@ -62,6 +62,67 @@
MAIR_ATTRIDX(MAIR_ATTR_NORMAL, MT_NORMAL) | \
MAIR_ATTRIDX(MAIR_ATTR_NORMAL_WT, MT_NORMAL_WT))
+/* TCR_EL1 specific flags */
+#define TCR_T0SZ_OFFSET 0
+#define TCR_T0SZ(x) ((UL(64) - (x)) << TCR_T0SZ_OFFSET)
+
+#define TCR_IRGN0_SHIFT 8
+#define TCR_IRGN0_MASK (UL(3) << TCR_IRGN0_SHIFT)
+#define TCR_IRGN0_NC (UL(0) << TCR_IRGN0_SHIFT)
+#define TCR_IRGN0_WBWA (UL(1) << TCR_IRGN0_SHIFT)
+#define TCR_IRGN0_WT (UL(2) << TCR_IRGN0_SHIFT)
+#define TCR_IRGN0_WBnWA (UL(3) << TCR_IRGN0_SHIFT)
+
+#define TCR_ORGN0_SHIFT 10
+#define TCR_ORGN0_MASK (UL(3) << TCR_ORGN0_SHIFT)
+#define TCR_ORGN0_NC (UL(0) << TCR_ORGN0_SHIFT)
+#define TCR_ORGN0_WBWA (UL(1) << TCR_ORGN0_SHIFT)
+#define TCR_ORGN0_WT (UL(2) << TCR_ORGN0_SHIFT)
+#define TCR_ORGN0_WBnWA (UL(3) << TCR_ORGN0_SHIFT)
+
+#define TCR_SH0_SHIFT 12
+#define TCR_SH0_MASK (UL(3) << TCR_SH0_SHIFT)
+#define TCR_SH0_INNER (UL(3) << TCR_SH0_SHIFT)
+
+#define TCR_TG0_SHIFT 14
+#define TCR_TG0_MASK (UL(3) << TCR_TG0_SHIFT)
+#define TCR_TG0_4K (UL(0) << TCR_TG0_SHIFT)
+#define TCR_TG0_64K (UL(1) << TCR_TG0_SHIFT)
+#define TCR_TG0_16K (UL(2) << TCR_TG0_SHIFT)
+
+#define TCR_IPS_SHIFT 32
+#define TCR_IPS_MASK (UL(7) << TCR_IPS_SHIFT)
+#define TCR_IPS_52_BITS (UL(6) << TCR_IPS_SHIFT)
+#define TCR_IPS_48_BITS (UL(5) << TCR_IPS_SHIFT)
+#define TCR_IPS_40_BITS (UL(2) << TCR_IPS_SHIFT)
+#define TCR_IPS_36_BITS (UL(1) << TCR_IPS_SHIFT)
+
+#define TCR_HA (UL(1) << 39)
+#define TCR_DS (UL(1) << 59)
+
+/*
+ * AttrIndx[2:0] encoding (mapping attributes defined in the MAIR* registers).
+ */
+#define PTE_ATTRINDX(t) ((t) << 2)
+#define PTE_ATTRINDX_MASK GENMASK(4, 2)
+#define PTE_ATTRINDX_SHIFT 2
+
+#define PTE_VALID BIT(0)
+#define PGD_TYPE_TABLE BIT(1)
+#define PUD_TYPE_TABLE BIT(1)
+#define PMD_TYPE_TABLE BIT(1)
+#define PTE_TYPE_PAGE BIT(1)
+
+#define PTE_SHARED (UL(3) << 8) /* SH[1:0], inner shareable */
+#define PTE_AF BIT(10)
+
+#define PTE_ADDR_MASK(page_shift) GENMASK(47, (page_shift))
+#define PTE_ADDR_51_48 GENMASK(15, 12)
+#define PTE_ADDR_51_48_SHIFT 12
+#define PTE_ADDR_MASK_LPA2(page_shift) GENMASK(49, (page_shift))
+#define PTE_ADDR_51_50_LPA2 GENMASK(9, 8)
+#define PTE_ADDR_51_50_LPA2_SHIFT 8
+
void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init);
struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
struct kvm_vcpu_init *init, void *guest_code);
@@ -102,12 +163,6 @@ enum {
(v) == VECTOR_SYNC_LOWER_64 || \
(v) == VECTOR_SYNC_LOWER_32)
-/* Access flag */
-#define PTE_AF (1ULL << 10)
-
-/* Access flag update enable/disable */
-#define TCR_EL1_HA (1ULL << 39)
-
void aarch64_get_supported_page_sizes(uint32_t ipa, uint32_t *ipa4k,
uint32_t *ipa16k, uint32_t *ipa64k);
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index 4c4e5a847f67..373912464fb4 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -46,6 +46,12 @@ struct userspace_mem_region {
struct hlist_node slot_node;
};
+struct kvm_binary_stats {
+ int fd;
+ struct kvm_stats_header header;
+ struct kvm_stats_desc *desc;
+};
+
struct kvm_vcpu {
struct list_head list;
uint32_t id;
@@ -55,6 +61,7 @@ struct kvm_vcpu {
#ifdef __x86_64__
struct kvm_cpuid2 *cpuid;
#endif
+ struct kvm_binary_stats stats;
struct kvm_dirty_gfn *dirty_gfns;
uint32_t fetch_index;
uint32_t dirty_gfns_count;
@@ -99,10 +106,7 @@ struct kvm_vm {
struct kvm_vm_arch arch;
- /* Cache of information for binary stats interface */
- int stats_fd;
- struct kvm_stats_header stats_header;
- struct kvm_stats_desc *stats_desc;
+ struct kvm_binary_stats stats;
/*
* KVM region slots. These are the default memslots used by page
@@ -531,16 +535,19 @@ void read_stat_data(int stats_fd, struct kvm_stats_header *header,
struct kvm_stats_desc *desc, uint64_t *data,
size_t max_elements);
-void __vm_get_stat(struct kvm_vm *vm, const char *stat_name, uint64_t *data,
- size_t max_elements);
+void kvm_get_stat(struct kvm_binary_stats *stats, const char *name,
+ uint64_t *data, size_t max_elements);
-static inline uint64_t vm_get_stat(struct kvm_vm *vm, const char *stat_name)
-{
- uint64_t data;
+#define __get_stat(stats, stat) \
+({ \
+ uint64_t data; \
+ \
+ kvm_get_stat(stats, #stat, &data, 1); \
+ data; \
+})
- __vm_get_stat(vm, stat_name, &data, 1);
- return data;
-}
+#define vm_get_stat(vm, stat) __get_stat(&(vm)->stats, stat)
+#define vcpu_get_stat(vcpu, stat) __get_stat(&(vcpu)->stats, stat)
void vm_create_irqchip(struct kvm_vm *vm);
@@ -963,6 +970,8 @@ static inline struct kvm_vm *vm_create_shape_with_one_vcpu(struct vm_shape shape
struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm);
+void kvm_set_files_rlimit(uint32_t nr_vcpus);
+
void kvm_pin_this_task_to_pcpu(uint32_t pcpu);
void kvm_print_vcpu_pinning_help(void);
void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[],
diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h
index 3e473058849f..77d13d7920cb 100644
--- a/tools/testing/selftests/kvm/include/test_util.h
+++ b/tools/testing/selftests/kvm/include/test_util.h
@@ -22,7 +22,7 @@
#define msecs_to_usecs(msec) ((msec) * 1000ULL)
-static inline int _no_printf(const char *format, ...) { return 0; }
+static inline __printf(1, 2) int _no_printf(const char *format, ...) { return 0; }
#ifdef DEBUG
#define pr_debug(...) printf(__VA_ARGS__)
diff --git a/tools/testing/selftests/kvm/include/x86/processor.h b/tools/testing/selftests/kvm/include/x86/processor.h
index d60da8966772..32ab6ca7ec32 100644
--- a/tools/testing/selftests/kvm/include/x86/processor.h
+++ b/tools/testing/selftests/kvm/include/x86/processor.h
@@ -183,6 +183,9 @@ struct kvm_x86_cpu_feature {
* Extended Leafs, a.k.a. AMD defined
*/
#define X86_FEATURE_SVM KVM_X86_CPU_FEATURE(0x80000001, 0, ECX, 2)
+#define X86_FEATURE_PERFCTR_CORE KVM_X86_CPU_FEATURE(0x80000001, 0, ECX, 23)
+#define X86_FEATURE_PERFCTR_NB KVM_X86_CPU_FEATURE(0x80000001, 0, ECX, 24)
+#define X86_FEATURE_PERFCTR_LLC KVM_X86_CPU_FEATURE(0x80000001, 0, ECX, 28)
#define X86_FEATURE_NX KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 20)
#define X86_FEATURE_GBPAGES KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 26)
#define X86_FEATURE_RDTSCP KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 27)
@@ -197,8 +200,11 @@ struct kvm_x86_cpu_feature {
#define X86_FEATURE_PAUSEFILTER KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 10)
#define X86_FEATURE_PFTHRESHOLD KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 12)
#define X86_FEATURE_VGIF KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 16)
+#define X86_FEATURE_IDLE_HLT KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 30)
#define X86_FEATURE_SEV KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 1)
#define X86_FEATURE_SEV_ES KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 3)
+#define X86_FEATURE_PERFMON_V2 KVM_X86_CPU_FEATURE(0x80000022, 0, EAX, 0)
+#define X86_FEATURE_LBR_PMC_FREEZE KVM_X86_CPU_FEATURE(0x80000022, 0, EAX, 2)
/*
* KVM defined paravirt features.
@@ -285,6 +291,8 @@ struct kvm_x86_cpu_property {
#define X86_PROPERTY_GUEST_MAX_PHY_ADDR KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 16, 23)
#define X86_PROPERTY_SEV_C_BIT KVM_X86_CPU_PROPERTY(0x8000001F, 0, EBX, 0, 5)
#define X86_PROPERTY_PHYS_ADDR_REDUCTION KVM_X86_CPU_PROPERTY(0x8000001F, 0, EBX, 6, 11)
+#define X86_PROPERTY_NR_PERFCTR_CORE KVM_X86_CPU_PROPERTY(0x80000022, 0, EBX, 0, 3)
+#define X86_PROPERTY_NR_PERFCTR_NB KVM_X86_CPU_PROPERTY(0x80000022, 0, EBX, 10, 15)
#define X86_PROPERTY_MAX_CENTAUR_LEAF KVM_X86_CPU_PROPERTY(0xC0000000, 0, EAX, 0, 31)
@@ -1244,7 +1252,7 @@ void vm_install_exception_handler(struct kvm_vm *vm, int vector,
uint64_t ign_error_code; \
uint8_t vector; \
\
- asm volatile(KVM_ASM_SAFE(insn) \
+ asm volatile(KVM_ASM_SAFE_FEP(insn) \
: KVM_ASM_SAFE_OUTPUTS(vector, ign_error_code) \
: inputs \
: KVM_ASM_SAFE_CLOBBERS); \
@@ -1339,6 +1347,46 @@ static inline void kvm_hypercall_map_gpa_range(uint64_t gpa, uint64_t size,
GUEST_ASSERT(!ret);
}
+/*
+ * Execute HLT in an STI interrupt shadow to ensure that a pending IRQ that's
+ * intended to be a wake event arrives *after* HLT is executed. Modern CPUs,
+ * except for a few oddballs that KVM is unlikely to run on, block IRQs for one
+ * instruction after STI, *if* RFLAGS.IF=0 before STI. Note, Intel CPUs may
+ * block other events beyond regular IRQs, e.g. may block NMIs and SMIs too.
+ */
+static inline void safe_halt(void)
+{
+ asm volatile("sti; hlt");
+}
+
+/*
+ * Enable interrupts and ensure that interrupts are evaluated upon return from
+ * this function, i.e. execute a nop to consume the STi interrupt shadow.
+ */
+static inline void sti_nop(void)
+{
+ asm volatile ("sti; nop");
+}
+
+/*
+ * Enable interrupts for one instruction (nop), to allow the CPU to process all
+ * interrupts that are already pending.
+ */
+static inline void sti_nop_cli(void)
+{
+ asm volatile ("sti; nop; cli");
+}
+
+static inline void sti(void)
+{
+ asm volatile("sti");
+}
+
+static inline void cli(void)
+{
+ asm volatile ("cli");
+}
+
void __vm_xsave_require_permission(uint64_t xfeature, const char *name);
#define vm_xsave_require_permission(xfeature) \
diff --git a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c
index c78f34699f73..c5310736ed06 100644
--- a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c
+++ b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c
@@ -10,7 +10,6 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-#include <sys/resource.h>
#include "test_util.h"
@@ -39,36 +38,11 @@ int main(int argc, char *argv[])
{
int kvm_max_vcpu_id = kvm_check_cap(KVM_CAP_MAX_VCPU_ID);
int kvm_max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS);
- /*
- * Number of file descriptors reqired, KVM_CAP_MAX_VCPUS for vCPU fds +
- * an arbitrary number for everything else.
- */
- int nr_fds_wanted = kvm_max_vcpus + 100;
- struct rlimit rl;
pr_info("KVM_CAP_MAX_VCPU_ID: %d\n", kvm_max_vcpu_id);
pr_info("KVM_CAP_MAX_VCPUS: %d\n", kvm_max_vcpus);
- /*
- * Check that we're allowed to open nr_fds_wanted file descriptors and
- * try raising the limits if needed.
- */
- TEST_ASSERT(!getrlimit(RLIMIT_NOFILE, &rl), "getrlimit() failed!");
-
- if (rl.rlim_cur < nr_fds_wanted) {
- rl.rlim_cur = nr_fds_wanted;
- if (rl.rlim_max < nr_fds_wanted) {
- int old_rlim_max = rl.rlim_max;
- rl.rlim_max = nr_fds_wanted;
-
- int r = setrlimit(RLIMIT_NOFILE, &rl);
- __TEST_REQUIRE(r >= 0,
- "RLIMIT_NOFILE hard limit is too low (%d, wanted %d)",
- old_rlim_max, nr_fds_wanted);
- } else {
- TEST_ASSERT(!setrlimit(RLIMIT_NOFILE, &rl), "setrlimit() failed!");
- }
- }
+ kvm_set_files_rlimit(kvm_max_vcpus);
/*
* Upstream KVM prior to 4.8 does not support KVM_CAP_MAX_VCPU_ID.
diff --git a/tools/testing/selftests/kvm/lib/arm64/processor.c b/tools/testing/selftests/kvm/lib/arm64/processor.c
index 7ba3aa3755f3..9d69904cb608 100644
--- a/tools/testing/selftests/kvm/lib/arm64/processor.c
+++ b/tools/testing/selftests/kvm/lib/arm64/processor.c
@@ -72,13 +72,13 @@ static uint64_t addr_pte(struct kvm_vm *vm, uint64_t pa, uint64_t attrs)
uint64_t pte;
if (use_lpa2_pte_format(vm)) {
- pte = pa & GENMASK(49, vm->page_shift);
- pte |= FIELD_GET(GENMASK(51, 50), pa) << 8;
- attrs &= ~GENMASK(9, 8);
+ pte = pa & PTE_ADDR_MASK_LPA2(vm->page_shift);
+ pte |= FIELD_GET(GENMASK(51, 50), pa) << PTE_ADDR_51_50_LPA2_SHIFT;
+ attrs &= ~PTE_ADDR_51_50_LPA2;
} else {
- pte = pa & GENMASK(47, vm->page_shift);
+ pte = pa & PTE_ADDR_MASK(vm->page_shift);
if (vm->page_shift == 16)
- pte |= FIELD_GET(GENMASK(51, 48), pa) << 12;
+ pte |= FIELD_GET(GENMASK(51, 48), pa) << PTE_ADDR_51_48_SHIFT;
}
pte |= attrs;
@@ -90,12 +90,12 @@ static uint64_t pte_addr(struct kvm_vm *vm, uint64_t pte)
uint64_t pa;
if (use_lpa2_pte_format(vm)) {
- pa = pte & GENMASK(49, vm->page_shift);
- pa |= FIELD_GET(GENMASK(9, 8), pte) << 50;
+ pa = pte & PTE_ADDR_MASK_LPA2(vm->page_shift);
+ pa |= FIELD_GET(PTE_ADDR_51_50_LPA2, pte) << 50;
} else {
- pa = pte & GENMASK(47, vm->page_shift);
+ pa = pte & PTE_ADDR_MASK(vm->page_shift);
if (vm->page_shift == 16)
- pa |= FIELD_GET(GENMASK(15, 12), pte) << 48;
+ pa |= FIELD_GET(PTE_ADDR_51_48, pte) << 48;
}
return pa;
@@ -128,7 +128,8 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm)
static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
uint64_t flags)
{
- uint8_t attr_idx = flags & 7;
+ uint8_t attr_idx = flags & (PTE_ATTRINDX_MASK >> PTE_ATTRINDX_SHIFT);
+ uint64_t pg_attr;
uint64_t *ptep;
TEST_ASSERT((vaddr % vm->page_size) == 0,
@@ -147,18 +148,21 @@ static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, vaddr) * 8;
if (!*ptep)
- *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3);
+ *ptep = addr_pte(vm, vm_alloc_page_table(vm),
+ PGD_TYPE_TABLE | PTE_VALID);
switch (vm->pgtable_levels) {
case 4:
ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, vaddr) * 8;
if (!*ptep)
- *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3);
+ *ptep = addr_pte(vm, vm_alloc_page_table(vm),
+ PUD_TYPE_TABLE | PTE_VALID);
/* fall through */
case 3:
ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, vaddr) * 8;
if (!*ptep)
- *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3);
+ *ptep = addr_pte(vm, vm_alloc_page_table(vm),
+ PMD_TYPE_TABLE | PTE_VALID);
/* fall through */
case 2:
ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, vaddr) * 8;
@@ -167,7 +171,11 @@ static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
TEST_FAIL("Page table levels must be 2, 3, or 4");
}
- *ptep = addr_pte(vm, paddr, (attr_idx << 2) | (1 << 10) | 3); /* AF */
+ pg_attr = PTE_AF | PTE_ATTRINDX(attr_idx) | PTE_TYPE_PAGE | PTE_VALID;
+ if (!use_lpa2_pte_format(vm))
+ pg_attr |= PTE_SHARED;
+
+ *ptep = addr_pte(vm, paddr, pg_attr);
}
void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
@@ -293,20 +301,20 @@ void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init)
case VM_MODE_P48V48_64K:
case VM_MODE_P40V48_64K:
case VM_MODE_P36V48_64K:
- tcr_el1 |= 1ul << 14; /* TG0 = 64KB */
+ tcr_el1 |= TCR_TG0_64K;
break;
case VM_MODE_P52V48_16K:
case VM_MODE_P48V48_16K:
case VM_MODE_P40V48_16K:
case VM_MODE_P36V48_16K:
case VM_MODE_P36V47_16K:
- tcr_el1 |= 2ul << 14; /* TG0 = 16KB */
+ tcr_el1 |= TCR_TG0_16K;
break;
case VM_MODE_P52V48_4K:
case VM_MODE_P48V48_4K:
case VM_MODE_P40V48_4K:
case VM_MODE_P36V48_4K:
- tcr_el1 |= 0ul << 14; /* TG0 = 4KB */
+ tcr_el1 |= TCR_TG0_4K;
break;
default:
TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode);
@@ -319,35 +327,35 @@ void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init)
case VM_MODE_P52V48_4K:
case VM_MODE_P52V48_16K:
case VM_MODE_P52V48_64K:
- tcr_el1 |= 6ul << 32; /* IPS = 52 bits */
+ tcr_el1 |= TCR_IPS_52_BITS;
ttbr0_el1 |= FIELD_GET(GENMASK(51, 48), vm->pgd) << 2;
break;
case VM_MODE_P48V48_4K:
case VM_MODE_P48V48_16K:
case VM_MODE_P48V48_64K:
- tcr_el1 |= 5ul << 32; /* IPS = 48 bits */
+ tcr_el1 |= TCR_IPS_48_BITS;
break;
case VM_MODE_P40V48_4K:
case VM_MODE_P40V48_16K:
case VM_MODE_P40V48_64K:
- tcr_el1 |= 2ul << 32; /* IPS = 40 bits */
+ tcr_el1 |= TCR_IPS_40_BITS;
break;
case VM_MODE_P36V48_4K:
case VM_MODE_P36V48_16K:
case VM_MODE_P36V48_64K:
case VM_MODE_P36V47_16K:
- tcr_el1 |= 1ul << 32; /* IPS = 36 bits */
+ tcr_el1 |= TCR_IPS_36_BITS;
break;
default:
TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode);
}
- sctlr_el1 |= (1 << 0) | (1 << 2) | (1 << 12) /* M | C | I */;
- /* TCR_EL1 |= IRGN0:WBWA | ORGN0:WBWA | SH0:Inner-Shareable */;
- tcr_el1 |= (1 << 8) | (1 << 10) | (3 << 12);
- tcr_el1 |= (64 - vm->va_bits) /* T0SZ */;
+ sctlr_el1 |= SCTLR_ELx_M | SCTLR_ELx_C | SCTLR_ELx_I;
+
+ tcr_el1 |= TCR_IRGN0_WBWA | TCR_ORGN0_WBWA | TCR_SH0_INNER;
+ tcr_el1 |= TCR_T0SZ(vm->va_bits);
if (use_lpa2_pte_format(vm))
- tcr_el1 |= (1ul << 59) /* DS */;
+ tcr_el1 |= TCR_DS;
vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1), sctlr_el1);
vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TCR_EL1), tcr_el1);
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 33fefeb3ca44..815bc45dd8dc 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -12,6 +12,7 @@
#include <assert.h>
#include <sched.h>
#include <sys/mman.h>
+#include <sys/resource.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
@@ -196,6 +197,11 @@ static void vm_open(struct kvm_vm *vm)
vm->fd = __kvm_ioctl(vm->kvm_fd, KVM_CREATE_VM, (void *)vm->type);
TEST_ASSERT(vm->fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VM, vm->fd));
+
+ if (kvm_has_cap(KVM_CAP_BINARY_STATS_FD))
+ vm->stats.fd = vm_get_stats_fd(vm);
+ else
+ vm->stats.fd = -1;
}
const char *vm_guest_mode_string(uint32_t i)
@@ -406,6 +412,38 @@ static uint64_t vm_nr_pages_required(enum vm_guest_mode mode,
return vm_adjust_num_guest_pages(mode, nr_pages);
}
+void kvm_set_files_rlimit(uint32_t nr_vcpus)
+{
+ /*
+ * Each vCPU will open two file descriptors: the vCPU itself and the
+ * vCPU's binary stats file descriptor. Add an arbitrary amount of
+ * buffer for all other files a test may open.
+ */
+ int nr_fds_wanted = nr_vcpus * 2 + 100;
+ struct rlimit rl;
+
+ /*
+ * Check that we're allowed to open nr_fds_wanted file descriptors and
+ * try raising the limits if needed.
+ */
+ TEST_ASSERT(!getrlimit(RLIMIT_NOFILE, &rl), "getrlimit() failed!");
+
+ if (rl.rlim_cur < nr_fds_wanted) {
+ rl.rlim_cur = nr_fds_wanted;
+ if (rl.rlim_max < nr_fds_wanted) {
+ int old_rlim_max = rl.rlim_max;
+
+ rl.rlim_max = nr_fds_wanted;
+ __TEST_REQUIRE(setrlimit(RLIMIT_NOFILE, &rl) >= 0,
+ "RLIMIT_NOFILE hard limit is too low (%d, wanted %d)",
+ old_rlim_max, nr_fds_wanted);
+ } else {
+ TEST_ASSERT(!setrlimit(RLIMIT_NOFILE, &rl), "setrlimit() failed!");
+ }
+ }
+
+}
+
struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus,
uint64_t nr_extra_pages)
{
@@ -415,6 +453,8 @@ struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus,
struct kvm_vm *vm;
int i;
+ kvm_set_files_rlimit(nr_runnable_vcpus);
+
pr_debug("%s: mode='%s' type='%d', pages='%ld'\n", __func__,
vm_guest_mode_string(shape.mode), shape.type, nr_pages);
@@ -657,6 +697,23 @@ userspace_mem_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end)
return NULL;
}
+static void kvm_stats_release(struct kvm_binary_stats *stats)
+{
+ int ret;
+
+ if (stats->fd < 0)
+ return;
+
+ if (stats->desc) {
+ free(stats->desc);
+ stats->desc = NULL;
+ }
+
+ ret = close(stats->fd);
+ TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret));
+ stats->fd = -1;
+}
+
__weak void vcpu_arch_free(struct kvm_vcpu *vcpu)
{
@@ -690,6 +747,8 @@ static void vm_vcpu_rm(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
ret = close(vcpu->fd);
TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret));
+ kvm_stats_release(&vcpu->stats);
+
list_del(&vcpu->list);
vcpu_arch_free(vcpu);
@@ -709,6 +768,9 @@ void kvm_vm_release(struct kvm_vm *vmp)
ret = close(vmp->kvm_fd);
TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret));
+
+ /* Free cached stats metadata and close FD */
+ kvm_stats_release(&vmp->stats);
}
static void __vm_mem_region_delete(struct kvm_vm *vm,
@@ -748,12 +810,6 @@ void kvm_vm_free(struct kvm_vm *vmp)
if (vmp == NULL)
return;
- /* Free cached stats metadata and close FD */
- if (vmp->stats_fd) {
- free(vmp->stats_desc);
- close(vmp->stats_fd);
- }
-
/* Free userspace_mem_regions. */
hash_for_each_safe(vmp->regions.slot_hash, ctr, node, region, slot_node)
__vm_mem_region_delete(vmp, region);
@@ -1286,6 +1342,11 @@ struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
TEST_ASSERT(vcpu->run != MAP_FAILED,
__KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED));
+ if (kvm_has_cap(KVM_CAP_BINARY_STATS_FD))
+ vcpu->stats.fd = vcpu_get_stats_fd(vcpu);
+ else
+ vcpu->stats.fd = -1;
+
/* Add to linked-list of VCPUs. */
list_add(&vcpu->list, &vm->vcpus);
@@ -1958,9 +2019,8 @@ static struct exit_reason {
KVM_EXIT_STRING(RISCV_SBI),
KVM_EXIT_STRING(RISCV_CSR),
KVM_EXIT_STRING(NOTIFY),
-#ifdef KVM_EXIT_MEMORY_NOT_PRESENT
- KVM_EXIT_STRING(MEMORY_NOT_PRESENT),
-#endif
+ KVM_EXIT_STRING(LOONGARCH_IOCSR),
+ KVM_EXIT_STRING(MEMORY_FAULT),
};
/*
@@ -2198,46 +2258,31 @@ void read_stat_data(int stats_fd, struct kvm_stats_header *header,
desc->name, size, ret);
}
-/*
- * Read the data of the named stat
- *
- * Input Args:
- * vm - the VM for which the stat should be read
- * stat_name - the name of the stat to read
- * max_elements - the maximum number of 8-byte values to read into data
- *
- * Output Args:
- * data - the buffer into which stat data should be read
- *
- * Read the data values of a specified stat from the binary stats interface.
- */
-void __vm_get_stat(struct kvm_vm *vm, const char *stat_name, uint64_t *data,
- size_t max_elements)
+void kvm_get_stat(struct kvm_binary_stats *stats, const char *name,
+ uint64_t *data, size_t max_elements)
{
struct kvm_stats_desc *desc;
size_t size_desc;
int i;
- if (!vm->stats_fd) {
- vm->stats_fd = vm_get_stats_fd(vm);
- read_stats_header(vm->stats_fd, &vm->stats_header);
- vm->stats_desc = read_stats_descriptors(vm->stats_fd,
- &vm->stats_header);
+ if (!stats->desc) {
+ read_stats_header(stats->fd, &stats->header);
+ stats->desc = read_stats_descriptors(stats->fd, &stats->header);
}
- size_desc = get_stats_descriptor_size(&vm->stats_header);
+ size_desc = get_stats_descriptor_size(&stats->header);
- for (i = 0; i < vm->stats_header.num_desc; ++i) {
- desc = (void *)vm->stats_desc + (i * size_desc);
+ for (i = 0; i < stats->header.num_desc; ++i) {
+ desc = (void *)stats->desc + (i * size_desc);
- if (strcmp(desc->name, stat_name))
+ if (strcmp(desc->name, name))
continue;
- read_stat_data(vm->stats_fd, &vm->stats_header, desc,
- data, max_elements);
-
- break;
+ read_stat_data(stats->fd, &stats->header, desc, data, max_elements);
+ return;
}
+
+ TEST_FAIL("Unable to find stat '%s'", name);
}
__weak void kvm_arch_vm_post_create(struct kvm_vm *vm)
diff --git a/tools/testing/selftests/kvm/lib/userfaultfd_util.c b/tools/testing/selftests/kvm/lib/userfaultfd_util.c
index 7c9de8414462..5bde176cedd5 100644
--- a/tools/testing/selftests/kvm/lib/userfaultfd_util.c
+++ b/tools/testing/selftests/kvm/lib/userfaultfd_util.c
@@ -114,7 +114,7 @@ struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay,
PER_PAGE_DEBUG("Userfaultfd %s mode, faults resolved with %s\n",
is_minor ? "MINOR" : "MISSING",
- is_minor ? "UFFDIO_CONINUE" : "UFFDIO_COPY");
+ is_minor ? "UFFDIO_CONTINUE" : "UFFDIO_COPY");
uffd_desc = malloc(sizeof(struct uffd_desc));
TEST_ASSERT(uffd_desc, "Failed to malloc uffd descriptor");
diff --git a/tools/testing/selftests/kvm/riscv/get-reg-list.c b/tools/testing/selftests/kvm/riscv/get-reg-list.c
index 8515921dfdbf..569f2d67c9b8 100644
--- a/tools/testing/selftests/kvm/riscv/get-reg-list.c
+++ b/tools/testing/selftests/kvm/riscv/get-reg-list.c
@@ -53,8 +53,10 @@ bool filter_reg(__u64 reg)
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVNAPOT:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVPBMT:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVVPTC:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZAAMO:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZABHA:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZACAS:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZALRSC:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZAWRS:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBA:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBB:
@@ -434,8 +436,10 @@ static const char *isa_ext_single_id_to_str(__u64 reg_off)
KVM_ISA_EXT_ARR(SVNAPOT),
KVM_ISA_EXT_ARR(SVPBMT),
KVM_ISA_EXT_ARR(SVVPTC),
+ KVM_ISA_EXT_ARR(ZAAMO),
KVM_ISA_EXT_ARR(ZABHA),
KVM_ISA_EXT_ARR(ZACAS),
+ KVM_ISA_EXT_ARR(ZALRSC),
KVM_ISA_EXT_ARR(ZAWRS),
KVM_ISA_EXT_ARR(ZBA),
KVM_ISA_EXT_ARR(ZBB),
@@ -974,8 +978,10 @@ KVM_ISA_EXT_SIMPLE_CONFIG(svinval, SVINVAL);
KVM_ISA_EXT_SIMPLE_CONFIG(svnapot, SVNAPOT);
KVM_ISA_EXT_SIMPLE_CONFIG(svpbmt, SVPBMT);
KVM_ISA_EXT_SIMPLE_CONFIG(svvptc, SVVPTC);
+KVM_ISA_EXT_SIMPLE_CONFIG(zaamo, ZAAMO);
KVM_ISA_EXT_SIMPLE_CONFIG(zabha, ZABHA);
KVM_ISA_EXT_SIMPLE_CONFIG(zacas, ZACAS);
+KVM_ISA_EXT_SIMPLE_CONFIG(zalrsc, ZALRSC);
KVM_ISA_EXT_SIMPLE_CONFIG(zawrs, ZAWRS);
KVM_ISA_EXT_SIMPLE_CONFIG(zba, ZBA);
KVM_ISA_EXT_SIMPLE_CONFIG(zbb, ZBB);
@@ -1045,8 +1051,10 @@ struct vcpu_reg_list *vcpu_configs[] = {
&config_svnapot,
&config_svpbmt,
&config_svvptc,
+ &config_zaamo,
&config_zabha,
&config_zacas,
+ &config_zalrsc,
&config_zawrs,
&config_zba,
&config_zbb,
diff --git a/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c b/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c
index f45c0ecc902d..03406de4989d 100644
--- a/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c
+++ b/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c
@@ -39,7 +39,13 @@ static bool illegal_handler_invoked;
#define SBI_PMU_TEST_SNAPSHOT BIT(2)
#define SBI_PMU_TEST_OVERFLOW BIT(3)
-static int disabled_tests;
+#define SBI_PMU_OVERFLOW_IRQNUM_DEFAULT 5
+struct test_args {
+ int disabled_tests;
+ int overflow_irqnum;
+};
+
+static struct test_args targs;
unsigned long pmu_csr_read_num(int csr_num)
{
@@ -118,8 +124,8 @@ static void stop_counter(unsigned long counter, unsigned long stop_flags)
ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, counter, 1, stop_flags,
0, 0, 0);
- __GUEST_ASSERT(ret.error == 0, "Unable to stop counter %ld error %ld\n",
- counter, ret.error);
+ __GUEST_ASSERT(ret.error == 0 || ret.error == SBI_ERR_ALREADY_STOPPED,
+ "Unable to stop counter %ld error %ld\n", counter, ret.error);
}
static void guest_illegal_exception_handler(struct ex_regs *regs)
@@ -137,7 +143,6 @@ static void guest_irq_handler(struct ex_regs *regs)
unsigned int irq_num = regs->cause & ~CAUSE_IRQ_FLAG;
struct riscv_pmu_snapshot_data *snapshot_data = snapshot_gva;
unsigned long overflown_mask;
- unsigned long counter_val = 0;
/* Validate that we are in the correct irq handler */
GUEST_ASSERT_EQ(irq_num, IRQ_PMU_OVF);
@@ -151,10 +156,6 @@ static void guest_irq_handler(struct ex_regs *regs)
GUEST_ASSERT(overflown_mask & 0x01);
WRITE_ONCE(vcpu_shared_irq_count, vcpu_shared_irq_count+1);
-
- counter_val = READ_ONCE(snapshot_data->ctr_values[0]);
- /* Now start the counter to mimick the real driver behavior */
- start_counter(counter_in_use, SBI_PMU_START_FLAG_SET_INIT_VALUE, counter_val);
}
static unsigned long get_counter_index(unsigned long cbase, unsigned long cmask,
@@ -479,7 +480,7 @@ static void test_pmu_events_snaphost(void)
static void test_pmu_events_overflow(void)
{
- int num_counters = 0;
+ int num_counters = 0, i = 0;
/* Verify presence of SBI PMU and minimum requrired SBI version */
verify_sbi_requirement_assert();
@@ -496,11 +497,15 @@ static void test_pmu_events_overflow(void)
* Qemu supports overflow for cycle/instruction.
* This test may fail on any platform that do not support overflow for these two events.
*/
- test_pmu_event_overflow(SBI_PMU_HW_CPU_CYCLES);
- GUEST_ASSERT_EQ(vcpu_shared_irq_count, 1);
+ for (i = 0; i < targs.overflow_irqnum; i++)
+ test_pmu_event_overflow(SBI_PMU_HW_CPU_CYCLES);
+ GUEST_ASSERT_EQ(vcpu_shared_irq_count, targs.overflow_irqnum);
+
+ vcpu_shared_irq_count = 0;
- test_pmu_event_overflow(SBI_PMU_HW_INSTRUCTIONS);
- GUEST_ASSERT_EQ(vcpu_shared_irq_count, 2);
+ for (i = 0; i < targs.overflow_irqnum; i++)
+ test_pmu_event_overflow(SBI_PMU_HW_INSTRUCTIONS);
+ GUEST_ASSERT_EQ(vcpu_shared_irq_count, targs.overflow_irqnum);
GUEST_DONE();
}
@@ -609,7 +614,11 @@ static void test_vm_events_overflow(void *guest_code)
vcpu_init_vector_tables(vcpu);
/* Initialize guest timer frequency. */
timer_freq = vcpu_get_reg(vcpu, RISCV_TIMER_REG(frequency));
+
+ /* Export the shared variables to the guest */
sync_global_to_guest(vm, timer_freq);
+ sync_global_to_guest(vm, vcpu_shared_irq_count);
+ sync_global_to_guest(vm, targs);
run_vcpu(vcpu);
@@ -618,28 +627,38 @@ static void test_vm_events_overflow(void *guest_code)
static void test_print_help(char *name)
{
- pr_info("Usage: %s [-h] [-d <test name>]\n", name);
- pr_info("\t-d: Test to disable. Available tests are 'basic', 'events', 'snapshot', 'overflow'\n");
+ pr_info("Usage: %s [-h] [-t <test name>] [-n <number of LCOFI interrupt for overflow test>]\n",
+ name);
+ pr_info("\t-t: Test to run (default all). Available tests are 'basic', 'events', 'snapshot', 'overflow'\n");
+ pr_info("\t-n: Number of LCOFI interrupt to trigger for each event in overflow test (default: %d)\n",
+ SBI_PMU_OVERFLOW_IRQNUM_DEFAULT);
pr_info("\t-h: print this help screen\n");
}
static bool parse_args(int argc, char *argv[])
{
int opt;
+ int temp_disabled_tests = SBI_PMU_TEST_BASIC | SBI_PMU_TEST_EVENTS | SBI_PMU_TEST_SNAPSHOT |
+ SBI_PMU_TEST_OVERFLOW;
+ int overflow_interrupts = 0;
- while ((opt = getopt(argc, argv, "hd:")) != -1) {
+ while ((opt = getopt(argc, argv, "ht:n:")) != -1) {
switch (opt) {
- case 'd':
+ case 't':
if (!strncmp("basic", optarg, 5))
- disabled_tests |= SBI_PMU_TEST_BASIC;
+ temp_disabled_tests &= ~SBI_PMU_TEST_BASIC;
else if (!strncmp("events", optarg, 6))
- disabled_tests |= SBI_PMU_TEST_EVENTS;
+ temp_disabled_tests &= ~SBI_PMU_TEST_EVENTS;
else if (!strncmp("snapshot", optarg, 8))
- disabled_tests |= SBI_PMU_TEST_SNAPSHOT;
+ temp_disabled_tests &= ~SBI_PMU_TEST_SNAPSHOT;
else if (!strncmp("overflow", optarg, 8))
- disabled_tests |= SBI_PMU_TEST_OVERFLOW;
+ temp_disabled_tests &= ~SBI_PMU_TEST_OVERFLOW;
else
goto done;
+ targs.disabled_tests = temp_disabled_tests;
+ break;
+ case 'n':
+ overflow_interrupts = atoi_positive("Number of LCOFI", optarg);
break;
case 'h':
default:
@@ -647,6 +666,15 @@ static bool parse_args(int argc, char *argv[])
}
}
+ if (overflow_interrupts > 0) {
+ if (targs.disabled_tests & SBI_PMU_TEST_OVERFLOW) {
+ pr_info("-n option is only available for overflow test\n");
+ goto done;
+ } else {
+ targs.overflow_irqnum = overflow_interrupts;
+ }
+ }
+
return true;
done:
test_print_help(argv[0]);
@@ -655,25 +683,28 @@ done:
int main(int argc, char *argv[])
{
+ targs.disabled_tests = 0;
+ targs.overflow_irqnum = SBI_PMU_OVERFLOW_IRQNUM_DEFAULT;
+
if (!parse_args(argc, argv))
exit(KSFT_SKIP);
- if (!(disabled_tests & SBI_PMU_TEST_BASIC)) {
+ if (!(targs.disabled_tests & SBI_PMU_TEST_BASIC)) {
test_vm_basic_test(test_pmu_basic_sanity);
pr_info("SBI PMU basic test : PASS\n");
}
- if (!(disabled_tests & SBI_PMU_TEST_EVENTS)) {
+ if (!(targs.disabled_tests & SBI_PMU_TEST_EVENTS)) {
test_vm_events_test(test_pmu_events);
pr_info("SBI PMU event verification test : PASS\n");
}
- if (!(disabled_tests & SBI_PMU_TEST_SNAPSHOT)) {
+ if (!(targs.disabled_tests & SBI_PMU_TEST_SNAPSHOT)) {
test_vm_events_snapshot_test(test_pmu_events_snaphost);
pr_info("SBI PMU event verification with snapshot test : PASS\n");
}
- if (!(disabled_tests & SBI_PMU_TEST_OVERFLOW)) {
+ if (!(targs.disabled_tests & SBI_PMU_TEST_OVERFLOW)) {
test_vm_events_overflow(test_pmu_events_overflow);
pr_info("SBI PMU event verification with overflow test : PASS\n");
}
diff --git a/tools/testing/selftests/kvm/rseq_test.c b/tools/testing/selftests/kvm/rseq_test.c
index e5898678bfab..1375fca80bcd 100644
--- a/tools/testing/selftests/kvm/rseq_test.c
+++ b/tools/testing/selftests/kvm/rseq_test.c
@@ -196,25 +196,27 @@ static void calc_min_max_cpu(void)
static void help(const char *name)
{
puts("");
- printf("usage: %s [-h] [-u]\n", name);
+ printf("usage: %s [-h] [-u] [-l latency]\n", name);
printf(" -u: Don't sanity check the number of successful KVM_RUNs\n");
+ printf(" -l: Set /dev/cpu_dma_latency to suppress deep sleep states\n");
puts("");
exit(0);
}
int main(int argc, char *argv[])
{
+ int r, i, snapshot, opt, fd = -1, latency = -1;
bool skip_sanity_check = false;
- int r, i, snapshot;
struct kvm_vm *vm;
struct kvm_vcpu *vcpu;
u32 cpu, rseq_cpu;
- int opt;
- while ((opt = getopt(argc, argv, "hu")) != -1) {
+ while ((opt = getopt(argc, argv, "hl:u")) != -1) {
switch (opt) {
case 'u':
skip_sanity_check = true;
+ case 'l':
+ latency = atoi_paranoid(optarg);
break;
case 'h':
default:
@@ -243,6 +245,20 @@ int main(int argc, char *argv[])
pthread_create(&migration_thread, NULL, migration_worker,
(void *)(unsigned long)syscall(SYS_gettid));
+ if (latency >= 0) {
+ /*
+ * Writes to cpu_dma_latency persist only while the file is
+ * open, i.e. it allows userspace to provide guaranteed latency
+ * while running a workload. Keep the file open until the test
+ * completes, otherwise writing cpu_dma_latency is meaningless.
+ */
+ fd = open("/dev/cpu_dma_latency", O_RDWR);
+ TEST_ASSERT(fd >= 0, __KVM_SYSCALL_ERROR("open() /dev/cpu_dma_latency", fd));
+
+ r = write(fd, &latency, 4);
+ TEST_ASSERT(r >= 1, "Error setting /dev/cpu_dma_latency");
+ }
+
for (i = 0; !done; i++) {
vcpu_run(vcpu);
TEST_ASSERT(get_ucall(vcpu, NULL) == UCALL_SYNC,
@@ -278,6 +294,9 @@ int main(int argc, char *argv[])
"rseq CPU = %d, sched CPU = %d", rseq_cpu, cpu);
}
+ if (fd > 0)
+ close(fd);
+
/*
* Sanity check that the test was able to enter the guest a reasonable
* number of times, e.g. didn't get stalled too often/long waiting for
@@ -293,8 +312,8 @@ int main(int argc, char *argv[])
TEST_ASSERT(skip_sanity_check || i > (NR_TASK_MIGRATIONS / 2),
"Only performed %d KVM_RUNs, task stalled too much?\n\n"
" Try disabling deep sleep states to reduce CPU wakeup latency,\n"
- " e.g. via cpuidle.off=1 or setting /dev/cpu_dma_latency to '0',\n"
- " or run with -u to disable this sanity check.", i);
+ " e.g. via cpuidle.off=1 or via -l <latency>, or run with -u to\n"
+ " disable this sanity check.", i);
pthread_join(migration_thread, NULL);
diff --git a/tools/testing/selftests/kvm/x86/dirty_log_page_splitting_test.c b/tools/testing/selftests/kvm/x86/dirty_log_page_splitting_test.c
index 2929c067c207..b0d2b04a7ff2 100644
--- a/tools/testing/selftests/kvm/x86/dirty_log_page_splitting_test.c
+++ b/tools/testing/selftests/kvm/x86/dirty_log_page_splitting_test.c
@@ -41,9 +41,9 @@ struct kvm_page_stats {
static void get_page_stats(struct kvm_vm *vm, struct kvm_page_stats *stats, const char *stage)
{
- stats->pages_4k = vm_get_stat(vm, "pages_4k");
- stats->pages_2m = vm_get_stat(vm, "pages_2m");
- stats->pages_1g = vm_get_stat(vm, "pages_1g");
+ stats->pages_4k = vm_get_stat(vm, pages_4k);
+ stats->pages_2m = vm_get_stat(vm, pages_2m);
+ stats->pages_1g = vm_get_stat(vm, pages_1g);
stats->hugepages = stats->pages_2m + stats->pages_1g;
pr_debug("\nPage stats after %s: 4K: %ld 2M: %ld 1G: %ld huge: %ld\n",
diff --git a/tools/testing/selftests/kvm/x86/hyperv_ipi.c b/tools/testing/selftests/kvm/x86/hyperv_ipi.c
index 22c0c124582f..2b5b4bc6ef7e 100644
--- a/tools/testing/selftests/kvm/x86/hyperv_ipi.c
+++ b/tools/testing/selftests/kvm/x86/hyperv_ipi.c
@@ -63,8 +63,10 @@ static void receiver_code(void *hcall_page, vm_vaddr_t pgs_gpa)
/* Signal sender vCPU we're ready */
ipis_rcvd[vcpu_id] = (u64)-1;
- for (;;)
- asm volatile("sti; hlt; cli");
+ for (;;) {
+ safe_halt();
+ cli();
+ }
}
static void guest_ipi_handler(struct ex_regs *regs)
diff --git a/tools/testing/selftests/kvm/x86/monitor_mwait_test.c b/tools/testing/selftests/kvm/x86/monitor_mwait_test.c
index 2b550eff35f1..390ae2d87493 100644
--- a/tools/testing/selftests/kvm/x86/monitor_mwait_test.c
+++ b/tools/testing/selftests/kvm/x86/monitor_mwait_test.c
@@ -7,6 +7,7 @@
#include "kvm_util.h"
#include "processor.h"
+#include "kselftest.h"
#define CPUID_MWAIT (1u << 3)
@@ -14,6 +15,8 @@ enum monitor_mwait_testcases {
MWAIT_QUIRK_DISABLED = BIT(0),
MISC_ENABLES_QUIRK_DISABLED = BIT(1),
MWAIT_DISABLED = BIT(2),
+ CPUID_DISABLED = BIT(3),
+ TEST_MAX = CPUID_DISABLED * 2 - 1,
};
/*
@@ -35,11 +38,19 @@ do { \
testcase, vector); \
} while (0)
-static void guest_monitor_wait(int testcase)
+static void guest_monitor_wait(void *arg)
{
+ int testcase = (int) (long) arg;
u8 vector;
- GUEST_SYNC(testcase);
+ u64 val = rdmsr(MSR_IA32_MISC_ENABLE) & ~MSR_IA32_MISC_ENABLE_MWAIT;
+ if (!(testcase & MWAIT_DISABLED))
+ val |= MSR_IA32_MISC_ENABLE_MWAIT;
+ wrmsr(MSR_IA32_MISC_ENABLE, val);
+
+ __GUEST_ASSERT(this_cpu_has(X86_FEATURE_MWAIT) == !(testcase & MWAIT_DISABLED),
+ "Expected CPUID.MWAIT %s\n",
+ (testcase & MWAIT_DISABLED) ? "cleared" : "set");
/*
* Arbitrarily MONITOR this function, SVM performs fault checks before
@@ -50,19 +61,6 @@ static void guest_monitor_wait(int testcase)
vector = kvm_asm_safe("mwait", "a"(guest_monitor_wait), "c"(0), "d"(0));
GUEST_ASSERT_MONITOR_MWAIT("MWAIT", testcase, vector);
-}
-
-static void guest_code(void)
-{
- guest_monitor_wait(MWAIT_DISABLED);
-
- guest_monitor_wait(MWAIT_QUIRK_DISABLED | MWAIT_DISABLED);
-
- guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_DISABLED);
- guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED);
-
- guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_QUIRK_DISABLED | MWAIT_DISABLED);
- guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_QUIRK_DISABLED);
GUEST_DONE();
}
@@ -74,56 +72,64 @@ int main(int argc, char *argv[])
struct kvm_vm *vm;
struct ucall uc;
int testcase;
+ char test[80];
- TEST_REQUIRE(this_cpu_has(X86_FEATURE_MWAIT));
TEST_REQUIRE(kvm_has_cap(KVM_CAP_DISABLE_QUIRKS2));
- vm = vm_create_with_one_vcpu(&vcpu, guest_code);
- vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_MWAIT);
+ ksft_print_header();
+ ksft_set_plan(12);
+ for (testcase = 0; testcase <= TEST_MAX; testcase++) {
+ vm = vm_create_with_one_vcpu(&vcpu, guest_monitor_wait);
+ vcpu_args_set(vcpu, 1, (void *)(long)testcase);
+
+ disabled_quirks = 0;
+ if (testcase & MWAIT_QUIRK_DISABLED) {
+ disabled_quirks |= KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS;
+ strcpy(test, "MWAIT can fault");
+ } else {
+ strcpy(test, "MWAIT never faults");
+ }
+ if (testcase & MISC_ENABLES_QUIRK_DISABLED) {
+ disabled_quirks |= KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT;
+ strcat(test, ", MISC_ENABLE updates CPUID");
+ } else {
+ strcat(test, ", no CPUID updates");
+ }
+
+ vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, disabled_quirks);
+
+ if (!(testcase & MISC_ENABLES_QUIRK_DISABLED) &&
+ (!!(testcase & CPUID_DISABLED) ^ !!(testcase & MWAIT_DISABLED)))
+ continue;
+
+ if (testcase & CPUID_DISABLED) {
+ strcat(test, ", CPUID clear");
+ vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_MWAIT);
+ } else {
+ strcat(test, ", CPUID set");
+ vcpu_set_cpuid_feature(vcpu, X86_FEATURE_MWAIT);
+ }
+
+ if (testcase & MWAIT_DISABLED)
+ strcat(test, ", MWAIT disabled");
- while (1) {
vcpu_run(vcpu);
TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
switch (get_ucall(vcpu, &uc)) {
- case UCALL_SYNC:
- testcase = uc.args[1];
- break;
case UCALL_ABORT:
- REPORT_GUEST_ASSERT(uc);
- goto done;
+ /* Detected in vcpu_run */
+ break;
case UCALL_DONE:
- goto done;
+ ksft_test_result_pass("%s\n", test);
+ break;
default:
TEST_FAIL("Unknown ucall %lu", uc.cmd);
- goto done;
- }
-
- disabled_quirks = 0;
- if (testcase & MWAIT_QUIRK_DISABLED)
- disabled_quirks |= KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS;
- if (testcase & MISC_ENABLES_QUIRK_DISABLED)
- disabled_quirks |= KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT;
- vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, disabled_quirks);
-
- /*
- * If the MISC_ENABLES quirk (KVM neglects to update CPUID to
- * enable/disable MWAIT) is disabled, toggle the ENABLE_MWAIT
- * bit in MISC_ENABLES accordingly. If the quirk is enabled,
- * the only valid configuration is MWAIT disabled, as CPUID
- * can't be manually changed after running the vCPU.
- */
- if (!(testcase & MISC_ENABLES_QUIRK_DISABLED)) {
- TEST_ASSERT(testcase & MWAIT_DISABLED,
- "Can't toggle CPUID features after running vCPU");
- continue;
+ break;
}
-
- vcpu_set_msr(vcpu, MSR_IA32_MISC_ENABLE,
- (testcase & MWAIT_DISABLED) ? 0 : MSR_IA32_MISC_ENABLE_MWAIT);
+ kvm_vm_free(vm);
}
+ ksft_finished();
-done:
- kvm_vm_free(vm);
return 0;
}
diff --git a/tools/testing/selftests/kvm/x86/nested_emulation_test.c b/tools/testing/selftests/kvm/x86/nested_emulation_test.c
new file mode 100644
index 000000000000..abc824dba04f
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/nested_emulation_test.c
@@ -0,0 +1,146 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+#include "svm_util.h"
+
+enum {
+ SVM_F,
+ VMX_F,
+ NR_VIRTUALIZATION_FLAVORS,
+};
+
+struct emulated_instruction {
+ const char name[32];
+ uint8_t opcode[15];
+ uint32_t exit_reason[NR_VIRTUALIZATION_FLAVORS];
+};
+
+static struct emulated_instruction instructions[] = {
+ {
+ .name = "pause",
+ .opcode = { 0xf3, 0x90 },
+ .exit_reason = { SVM_EXIT_PAUSE,
+ EXIT_REASON_PAUSE_INSTRUCTION, }
+ },
+ {
+ .name = "hlt",
+ .opcode = { 0xf4 },
+ .exit_reason = { SVM_EXIT_HLT,
+ EXIT_REASON_HLT, }
+ },
+};
+
+static uint8_t kvm_fep[] = { 0x0f, 0x0b, 0x6b, 0x76, 0x6d }; /* ud2 ; .ascii "kvm" */
+static uint8_t l2_guest_code[sizeof(kvm_fep) + 15];
+static uint8_t *l2_instruction = &l2_guest_code[sizeof(kvm_fep)];
+
+static uint32_t get_instruction_length(struct emulated_instruction *insn)
+{
+ uint32_t i;
+
+ for (i = 0; i < ARRAY_SIZE(insn->opcode) && insn->opcode[i]; i++)
+ ;
+
+ return i;
+}
+
+static void guest_code(void *test_data)
+{
+ int f = this_cpu_has(X86_FEATURE_SVM) ? SVM_F : VMX_F;
+ int i;
+
+ memcpy(l2_guest_code, kvm_fep, sizeof(kvm_fep));
+
+ if (f == SVM_F) {
+ struct svm_test_data *svm = test_data;
+ struct vmcb *vmcb = svm->vmcb;
+
+ generic_svm_setup(svm, NULL, NULL);
+ vmcb->save.idtr.limit = 0;
+ vmcb->save.rip = (u64)l2_guest_code;
+
+ vmcb->control.intercept |= BIT_ULL(INTERCEPT_SHUTDOWN) |
+ BIT_ULL(INTERCEPT_PAUSE) |
+ BIT_ULL(INTERCEPT_HLT);
+ vmcb->control.intercept_exceptions = 0;
+ } else {
+ GUEST_ASSERT(prepare_for_vmx_operation(test_data));
+ GUEST_ASSERT(load_vmcs(test_data));
+
+ prepare_vmcs(test_data, NULL, NULL);
+ GUEST_ASSERT(!vmwrite(GUEST_IDTR_LIMIT, 0));
+ GUEST_ASSERT(!vmwrite(GUEST_RIP, (u64)l2_guest_code));
+ GUEST_ASSERT(!vmwrite(EXCEPTION_BITMAP, 0));
+
+ vmwrite(CPU_BASED_VM_EXEC_CONTROL, vmreadz(CPU_BASED_VM_EXEC_CONTROL) |
+ CPU_BASED_PAUSE_EXITING |
+ CPU_BASED_HLT_EXITING);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(instructions); i++) {
+ struct emulated_instruction *insn = &instructions[i];
+ uint32_t insn_len = get_instruction_length(insn);
+ uint32_t exit_insn_len;
+ u32 exit_reason;
+
+ /*
+ * Copy the target instruction to the L2 code stream, and fill
+ * the remaining bytes with INT3s so that a missed intercept
+ * results in a consistent failure mode (SHUTDOWN).
+ */
+ memcpy(l2_instruction, insn->opcode, insn_len);
+ memset(l2_instruction + insn_len, 0xcc, sizeof(insn->opcode) - insn_len);
+
+ if (f == SVM_F) {
+ struct svm_test_data *svm = test_data;
+ struct vmcb *vmcb = svm->vmcb;
+
+ run_guest(vmcb, svm->vmcb_gpa);
+ exit_reason = vmcb->control.exit_code;
+ exit_insn_len = vmcb->control.next_rip - vmcb->save.rip;
+ GUEST_ASSERT_EQ(vmcb->save.rip, (u64)l2_instruction);
+ } else {
+ GUEST_ASSERT_EQ(i ? vmresume() : vmlaunch(), 0);
+ exit_reason = vmreadz(VM_EXIT_REASON);
+ exit_insn_len = vmreadz(VM_EXIT_INSTRUCTION_LEN);
+ GUEST_ASSERT_EQ(vmreadz(GUEST_RIP), (u64)l2_instruction);
+ }
+
+ __GUEST_ASSERT(exit_reason == insn->exit_reason[f],
+ "Wanted exit_reason '0x%x' for '%s', got '0x%x'",
+ insn->exit_reason[f], insn->name, exit_reason);
+
+ __GUEST_ASSERT(exit_insn_len == insn_len,
+ "Wanted insn_len '%u' for '%s', got '%u'",
+ insn_len, insn->name, exit_insn_len);
+ }
+
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ vm_vaddr_t nested_test_data_gva;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ TEST_REQUIRE(is_forced_emulation_enabled);
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM) || kvm_cpu_has(X86_FEATURE_VMX));
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ vm_enable_cap(vm, KVM_CAP_EXCEPTION_PAYLOAD, -2ul);
+
+ if (kvm_cpu_has(X86_FEATURE_SVM))
+ vcpu_alloc_svm(vm, &nested_test_data_gva);
+ else
+ vcpu_alloc_vmx(vm, &nested_test_data_gva);
+
+ vcpu_args_set(vcpu, 1, nested_test_data_gva);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
+
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86/nx_huge_pages_test.c b/tools/testing/selftests/kvm/x86/nx_huge_pages_test.c
index e7efb2b35f8b..c0d84827f736 100644
--- a/tools/testing/selftests/kvm/x86/nx_huge_pages_test.c
+++ b/tools/testing/selftests/kvm/x86/nx_huge_pages_test.c
@@ -73,7 +73,7 @@ static void check_2m_page_count(struct kvm_vm *vm, int expected_pages_2m)
{
int actual_pages_2m;
- actual_pages_2m = vm_get_stat(vm, "pages_2m");
+ actual_pages_2m = vm_get_stat(vm, pages_2m);
TEST_ASSERT(actual_pages_2m == expected_pages_2m,
"Unexpected 2m page count. Expected %d, got %d",
@@ -84,7 +84,7 @@ static void check_split_count(struct kvm_vm *vm, int expected_splits)
{
int actual_splits;
- actual_splits = vm_get_stat(vm, "nx_lpage_splits");
+ actual_splits = vm_get_stat(vm, nx_lpage_splits);
TEST_ASSERT(actual_splits == expected_splits,
"Unexpected NX huge page split count. Expected %d, got %d",
diff --git a/tools/testing/selftests/kvm/x86/pmu_counters_test.c b/tools/testing/selftests/kvm/x86/pmu_counters_test.c
index 698cb36989db..8aaaf25b6111 100644
--- a/tools/testing/selftests/kvm/x86/pmu_counters_test.c
+++ b/tools/testing/selftests/kvm/x86/pmu_counters_test.c
@@ -17,7 +17,7 @@
* Number of instructions in each loop. 1 CLFLUSH/CLFLUSHOPT/NOP, 1 MFENCE,
* 1 LOOP.
*/
-#define NUM_INSNS_PER_LOOP 3
+#define NUM_INSNS_PER_LOOP 4
/*
* Number of "extra" instructions that will be counted, i.e. the number of
@@ -29,10 +29,59 @@
/* Total number of instructions retired within the measured section. */
#define NUM_INSNS_RETIRED (NUM_LOOPS * NUM_INSNS_PER_LOOP + NUM_EXTRA_INSNS)
+/* Track which architectural events are supported by hardware. */
+static uint32_t hardware_pmu_arch_events;
static uint8_t kvm_pmu_version;
static bool kvm_has_perf_caps;
+#define X86_PMU_FEATURE_NULL \
+({ \
+ struct kvm_x86_pmu_feature feature = {}; \
+ \
+ feature; \
+})
+
+static bool pmu_is_null_feature(struct kvm_x86_pmu_feature event)
+{
+ return !(*(u64 *)&event);
+}
+
+struct kvm_intel_pmu_event {
+ struct kvm_x86_pmu_feature gp_event;
+ struct kvm_x86_pmu_feature fixed_event;
+};
+
+/*
+ * Wrap the array to appease the compiler, as the macros used to construct each
+ * kvm_x86_pmu_feature use syntax that's only valid in function scope, and the
+ * compiler often thinks the feature definitions aren't compile-time constants.
+ */
+static struct kvm_intel_pmu_event intel_event_to_feature(uint8_t idx)
+{
+ const struct kvm_intel_pmu_event __intel_event_to_feature[] = {
+ [INTEL_ARCH_CPU_CYCLES_INDEX] = { X86_PMU_FEATURE_CPU_CYCLES, X86_PMU_FEATURE_CPU_CYCLES_FIXED },
+ [INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX] = { X86_PMU_FEATURE_INSNS_RETIRED, X86_PMU_FEATURE_INSNS_RETIRED_FIXED },
+ /*
+ * Note, the fixed counter for reference cycles is NOT the same as the
+ * general purpose architectural event. The fixed counter explicitly
+ * counts at the same frequency as the TSC, whereas the GP event counts
+ * at a fixed, but uarch specific, frequency. Bundle them here for
+ * simplicity.
+ */
+ [INTEL_ARCH_REFERENCE_CYCLES_INDEX] = { X86_PMU_FEATURE_REFERENCE_CYCLES, X86_PMU_FEATURE_REFERENCE_TSC_CYCLES_FIXED },
+ [INTEL_ARCH_LLC_REFERENCES_INDEX] = { X86_PMU_FEATURE_LLC_REFERENCES, X86_PMU_FEATURE_NULL },
+ [INTEL_ARCH_LLC_MISSES_INDEX] = { X86_PMU_FEATURE_LLC_MISSES, X86_PMU_FEATURE_NULL },
+ [INTEL_ARCH_BRANCHES_RETIRED_INDEX] = { X86_PMU_FEATURE_BRANCH_INSNS_RETIRED, X86_PMU_FEATURE_NULL },
+ [INTEL_ARCH_BRANCHES_MISPREDICTED_INDEX] = { X86_PMU_FEATURE_BRANCHES_MISPREDICTED, X86_PMU_FEATURE_NULL },
+ [INTEL_ARCH_TOPDOWN_SLOTS_INDEX] = { X86_PMU_FEATURE_TOPDOWN_SLOTS, X86_PMU_FEATURE_TOPDOWN_SLOTS_FIXED },
+ };
+
+ kvm_static_assert(ARRAY_SIZE(__intel_event_to_feature) == NR_INTEL_ARCH_EVENTS);
+
+ return __intel_event_to_feature[idx];
+}
+
static struct kvm_vm *pmu_vm_create_with_one_vcpu(struct kvm_vcpu **vcpu,
void *guest_code,
uint8_t pmu_version,
@@ -42,6 +91,7 @@ static struct kvm_vm *pmu_vm_create_with_one_vcpu(struct kvm_vcpu **vcpu,
vm = vm_create_with_one_vcpu(vcpu, guest_code);
sync_global_to_guest(vm, kvm_pmu_version);
+ sync_global_to_guest(vm, hardware_pmu_arch_events);
/*
* Set PERF_CAPABILITIES before PMU version as KVM disallows enabling
@@ -98,14 +148,12 @@ static uint8_t guest_get_pmu_version(void)
* Sanity check that in all cases, the event doesn't count when it's disabled,
* and that KVM correctly emulates the write of an arbitrary value.
*/
-static void guest_assert_event_count(uint8_t idx,
- struct kvm_x86_pmu_feature event,
- uint32_t pmc, uint32_t pmc_msr)
+static void guest_assert_event_count(uint8_t idx, uint32_t pmc, uint32_t pmc_msr)
{
uint64_t count;
count = _rdpmc(pmc);
- if (!this_pmu_has(event))
+ if (!(hardware_pmu_arch_events & BIT(idx)))
goto sanity_checks;
switch (idx) {
@@ -126,7 +174,9 @@ static void guest_assert_event_count(uint8_t idx,
GUEST_ASSERT_NE(count, 0);
break;
case INTEL_ARCH_TOPDOWN_SLOTS_INDEX:
- GUEST_ASSERT(count >= NUM_INSNS_RETIRED);
+ __GUEST_ASSERT(count >= NUM_INSNS_RETIRED,
+ "Expected top-down slots >= %u, got count = %lu",
+ NUM_INSNS_RETIRED, count);
break;
default:
break;
@@ -162,75 +212,42 @@ do { \
"1:\n\t" \
clflush "\n\t" \
"mfence\n\t" \
+ "mov %[m], %%eax\n\t" \
FEP "loop 1b\n\t" \
FEP "mov %%edi, %%ecx\n\t" \
FEP "xor %%eax, %%eax\n\t" \
FEP "xor %%edx, %%edx\n\t" \
"wrmsr\n\t" \
:: "a"((uint32_t)_value), "d"(_value >> 32), \
- "c"(_msr), "D"(_msr) \
+ "c"(_msr), "D"(_msr), [m]"m"(kvm_pmu_version) \
); \
} while (0)
-#define GUEST_TEST_EVENT(_idx, _event, _pmc, _pmc_msr, _ctrl_msr, _value, FEP) \
+#define GUEST_TEST_EVENT(_idx, _pmc, _pmc_msr, _ctrl_msr, _value, FEP) \
do { \
- wrmsr(pmc_msr, 0); \
+ wrmsr(_pmc_msr, 0); \
\
if (this_cpu_has(X86_FEATURE_CLFLUSHOPT)) \
- GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflushopt .", FEP); \
+ GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflushopt %[m]", FEP); \
else if (this_cpu_has(X86_FEATURE_CLFLUSH)) \
- GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflush .", FEP); \
+ GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflush %[m]", FEP); \
else \
GUEST_MEASURE_EVENT(_ctrl_msr, _value, "nop", FEP); \
\
- guest_assert_event_count(_idx, _event, _pmc, _pmc_msr); \
+ guest_assert_event_count(_idx, _pmc, _pmc_msr); \
} while (0)
-static void __guest_test_arch_event(uint8_t idx, struct kvm_x86_pmu_feature event,
- uint32_t pmc, uint32_t pmc_msr,
+static void __guest_test_arch_event(uint8_t idx, uint32_t pmc, uint32_t pmc_msr,
uint32_t ctrl_msr, uint64_t ctrl_msr_value)
{
- GUEST_TEST_EVENT(idx, event, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, "");
+ GUEST_TEST_EVENT(idx, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, "");
if (is_forced_emulation_enabled)
- GUEST_TEST_EVENT(idx, event, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, KVM_FEP);
-}
-
-#define X86_PMU_FEATURE_NULL \
-({ \
- struct kvm_x86_pmu_feature feature = {}; \
- \
- feature; \
-})
-
-static bool pmu_is_null_feature(struct kvm_x86_pmu_feature event)
-{
- return !(*(u64 *)&event);
+ GUEST_TEST_EVENT(idx, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, KVM_FEP);
}
static void guest_test_arch_event(uint8_t idx)
{
- const struct {
- struct kvm_x86_pmu_feature gp_event;
- struct kvm_x86_pmu_feature fixed_event;
- } intel_event_to_feature[] = {
- [INTEL_ARCH_CPU_CYCLES_INDEX] = { X86_PMU_FEATURE_CPU_CYCLES, X86_PMU_FEATURE_CPU_CYCLES_FIXED },
- [INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX] = { X86_PMU_FEATURE_INSNS_RETIRED, X86_PMU_FEATURE_INSNS_RETIRED_FIXED },
- /*
- * Note, the fixed counter for reference cycles is NOT the same
- * as the general purpose architectural event. The fixed counter
- * explicitly counts at the same frequency as the TSC, whereas
- * the GP event counts at a fixed, but uarch specific, frequency.
- * Bundle them here for simplicity.
- */
- [INTEL_ARCH_REFERENCE_CYCLES_INDEX] = { X86_PMU_FEATURE_REFERENCE_CYCLES, X86_PMU_FEATURE_REFERENCE_TSC_CYCLES_FIXED },
- [INTEL_ARCH_LLC_REFERENCES_INDEX] = { X86_PMU_FEATURE_LLC_REFERENCES, X86_PMU_FEATURE_NULL },
- [INTEL_ARCH_LLC_MISSES_INDEX] = { X86_PMU_FEATURE_LLC_MISSES, X86_PMU_FEATURE_NULL },
- [INTEL_ARCH_BRANCHES_RETIRED_INDEX] = { X86_PMU_FEATURE_BRANCH_INSNS_RETIRED, X86_PMU_FEATURE_NULL },
- [INTEL_ARCH_BRANCHES_MISPREDICTED_INDEX] = { X86_PMU_FEATURE_BRANCHES_MISPREDICTED, X86_PMU_FEATURE_NULL },
- [INTEL_ARCH_TOPDOWN_SLOTS_INDEX] = { X86_PMU_FEATURE_TOPDOWN_SLOTS, X86_PMU_FEATURE_TOPDOWN_SLOTS_FIXED },
- };
-
uint32_t nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
uint32_t pmu_version = guest_get_pmu_version();
/* PERF_GLOBAL_CTRL exists only for Architectural PMU Version 2+. */
@@ -248,7 +265,7 @@ static void guest_test_arch_event(uint8_t idx)
else
base_pmc_msr = MSR_IA32_PERFCTR0;
- gp_event = intel_event_to_feature[idx].gp_event;
+ gp_event = intel_event_to_feature(idx).gp_event;
GUEST_ASSERT_EQ(idx, gp_event.f.bit);
GUEST_ASSERT(nr_gp_counters);
@@ -262,14 +279,14 @@ static void guest_test_arch_event(uint8_t idx)
if (guest_has_perf_global_ctrl)
wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, BIT_ULL(i));
- __guest_test_arch_event(idx, gp_event, i, base_pmc_msr + i,
+ __guest_test_arch_event(idx, i, base_pmc_msr + i,
MSR_P6_EVNTSEL0 + i, eventsel);
}
if (!guest_has_perf_global_ctrl)
return;
- fixed_event = intel_event_to_feature[idx].fixed_event;
+ fixed_event = intel_event_to_feature(idx).fixed_event;
if (pmu_is_null_feature(fixed_event) || !this_pmu_has(fixed_event))
return;
@@ -277,7 +294,7 @@ static void guest_test_arch_event(uint8_t idx)
wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL));
- __guest_test_arch_event(idx, fixed_event, i | INTEL_RDPMC_FIXED,
+ __guest_test_arch_event(idx, i | INTEL_RDPMC_FIXED,
MSR_CORE_PERF_FIXED_CTR0 + i,
MSR_CORE_PERF_GLOBAL_CTRL,
FIXED_PMC_GLOBAL_CTRL_ENABLE(i));
@@ -331,9 +348,9 @@ __GUEST_ASSERT(expect_gp ? vector == GP_VECTOR : !vector, \
expect_gp ? "#GP" : "no fault", msr, vector) \
#define GUEST_ASSERT_PMC_VALUE(insn, msr, val, expected) \
- __GUEST_ASSERT(val == expected_val, \
+ __GUEST_ASSERT(val == expected, \
"Expected " #insn "(0x%x) to yield 0x%lx, got 0x%lx", \
- msr, expected_val, val);
+ msr, expected, val);
static void guest_test_rdpmc(uint32_t rdpmc_idx, bool expect_success,
uint64_t expected_val)
@@ -545,7 +562,6 @@ static void test_fixed_counters(uint8_t pmu_version, uint64_t perf_capabilities,
static void test_intel_counters(void)
{
- uint8_t nr_arch_events = kvm_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH);
uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
uint8_t nr_gp_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
uint8_t pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION);
@@ -567,18 +583,26 @@ static void test_intel_counters(void)
/*
* Detect the existence of events that aren't supported by selftests.
- * This will (obviously) fail any time the kernel adds support for a
- * new event, but it's worth paying that price to keep the test fresh.
+ * This will (obviously) fail any time hardware adds support for a new
+ * event, but it's worth paying that price to keep the test fresh.
*/
- TEST_ASSERT(nr_arch_events <= NR_INTEL_ARCH_EVENTS,
+ TEST_ASSERT(this_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH) <= NR_INTEL_ARCH_EVENTS,
"New architectural event(s) detected; please update this test (length = %u, mask = %x)",
- nr_arch_events, kvm_cpu_property(X86_PROPERTY_PMU_EVENTS_MASK));
+ this_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH),
+ this_cpu_property(X86_PROPERTY_PMU_EVENTS_MASK));
/*
- * Force iterating over known arch events regardless of whether or not
- * KVM/hardware supports a given event.
+ * Iterate over known arch events irrespective of KVM/hardware support
+ * to verify that KVM doesn't reject programming of events just because
+ * the *architectural* encoding is unsupported. Track which events are
+ * supported in hardware; the guest side will validate supported events
+ * count correctly, even if *enumeration* of the event is unsupported
+ * by KVM and/or isn't exposed to the guest.
*/
- nr_arch_events = max_t(typeof(nr_arch_events), nr_arch_events, NR_INTEL_ARCH_EVENTS);
+ for (i = 0; i < NR_INTEL_ARCH_EVENTS; i++) {
+ if (this_pmu_has(intel_event_to_feature(i).gp_event))
+ hardware_pmu_arch_events |= BIT(i);
+ }
for (v = 0; v <= max_pmu_version; v++) {
for (i = 0; i < ARRAY_SIZE(perf_caps); i++) {
@@ -594,8 +618,8 @@ static void test_intel_counters(void)
* vector length.
*/
if (v == pmu_version) {
- for (k = 1; k < (BIT(nr_arch_events) - 1); k++)
- test_arch_events(v, perf_caps[i], nr_arch_events, k);
+ for (k = 1; k < (BIT(NR_INTEL_ARCH_EVENTS) - 1); k++)
+ test_arch_events(v, perf_caps[i], NR_INTEL_ARCH_EVENTS, k);
}
/*
* Test single bits for all PMU version and lengths up
@@ -604,11 +628,11 @@ static void test_intel_counters(void)
* host length). Explicitly test a mask of '0' and all
* ones i.e. all events being available and unavailable.
*/
- for (j = 0; j <= nr_arch_events + 1; j++) {
+ for (j = 0; j <= NR_INTEL_ARCH_EVENTS + 1; j++) {
test_arch_events(v, perf_caps[i], j, 0);
test_arch_events(v, perf_caps[i], j, 0xff);
- for (k = 0; k < nr_arch_events; k++)
+ for (k = 0; k < NR_INTEL_ARCH_EVENTS; k++)
test_arch_events(v, perf_caps[i], j, BIT(k));
}
diff --git a/tools/testing/selftests/kvm/x86/svm_int_ctl_test.c b/tools/testing/selftests/kvm/x86/svm_int_ctl_test.c
index 916e04248fbb..917b6066cfc1 100644
--- a/tools/testing/selftests/kvm/x86/svm_int_ctl_test.c
+++ b/tools/testing/selftests/kvm/x86/svm_int_ctl_test.c
@@ -42,10 +42,7 @@ static void l2_guest_code(struct svm_test_data *svm)
x2apic_write_reg(APIC_ICR,
APIC_DEST_SELF | APIC_INT_ASSERT | INTR_IRQ_NUMBER);
- __asm__ __volatile__(
- "sti\n"
- "nop\n"
- );
+ sti_nop();
GUEST_ASSERT(vintr_irq_called);
GUEST_ASSERT(intr_irq_called);
diff --git a/tools/testing/selftests/kvm/x86/ucna_injection_test.c b/tools/testing/selftests/kvm/x86/ucna_injection_test.c
index 57f157c06b39..1e5e564523b3 100644
--- a/tools/testing/selftests/kvm/x86/ucna_injection_test.c
+++ b/tools/testing/selftests/kvm/x86/ucna_injection_test.c
@@ -86,7 +86,7 @@ static void ucna_injection_guest_code(void)
wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 | MCI_CTL2_CMCI_EN);
/* Enables interrupt in guest. */
- asm volatile("sti");
+ sti();
/* Let user space inject the first UCNA */
GUEST_SYNC(SYNC_FIRST_UCNA);
diff --git a/tools/testing/selftests/kvm/x86/xapic_ipi_test.c b/tools/testing/selftests/kvm/x86/xapic_ipi_test.c
index a76078a08ff8..35cb9de54a82 100644
--- a/tools/testing/selftests/kvm/x86/xapic_ipi_test.c
+++ b/tools/testing/selftests/kvm/x86/xapic_ipi_test.c
@@ -106,7 +106,8 @@ static void halter_guest_code(struct test_data_page *data)
data->halter_tpr = xapic_read_reg(APIC_TASKPRI);
data->halter_ppr = xapic_read_reg(APIC_PROCPRI);
data->hlt_count++;
- asm volatile("sti; hlt; cli");
+ safe_halt();
+ cli();
data->wake_count++;
}
}
@@ -465,6 +466,19 @@ int main(int argc, char *argv[])
cancel_join_vcpu_thread(threads[0], params[0].vcpu);
cancel_join_vcpu_thread(threads[1], params[1].vcpu);
+ /*
+ * If the host support Idle HLT, i.e. KVM *might* be using Idle HLT,
+ * then the number of HLT exits may be less than the number of HLTs
+ * that were executed, as Idle HLT elides the exit if the vCPU has an
+ * unmasked, pending IRQ (or NMI).
+ */
+ if (this_cpu_has(X86_FEATURE_IDLE_HLT))
+ TEST_ASSERT(data->hlt_count >= vcpu_get_stat(params[0].vcpu, halt_exits),
+ "HLT insns = %lu, HLT exits = %lu",
+ data->hlt_count, vcpu_get_stat(params[0].vcpu, halt_exits));
+ else
+ TEST_ASSERT_EQ(data->hlt_count, vcpu_get_stat(params[0].vcpu, halt_exits));
+
fprintf(stderr,
"Test successful after running for %d seconds.\n"
"Sending vCPU sent %lu IPIs to halting vCPU\n"
diff --git a/tools/testing/selftests/kvm/x86/xapic_state_test.c b/tools/testing/selftests/kvm/x86/xapic_state_test.c
index 88bcca188799..fdebff1165c7 100644
--- a/tools/testing/selftests/kvm/x86/xapic_state_test.c
+++ b/tools/testing/selftests/kvm/x86/xapic_state_test.c
@@ -18,7 +18,7 @@ struct xapic_vcpu {
static void xapic_guest_code(void)
{
- asm volatile("cli");
+ cli();
xapic_enable();
@@ -38,7 +38,7 @@ static void xapic_guest_code(void)
static void x2apic_guest_code(void)
{
- asm volatile("cli");
+ cli();
x2apic_enable();
diff --git a/tools/testing/selftests/kvm/x86/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86/xen_shinfo_test.c
index a59b3c799bb2..287829f850f7 100644
--- a/tools/testing/selftests/kvm/x86/xen_shinfo_test.c
+++ b/tools/testing/selftests/kvm/x86/xen_shinfo_test.c
@@ -191,10 +191,7 @@ static void guest_code(void)
struct vcpu_runstate_info *rs = (void *)RUNSTATE_VADDR;
int i;
- __asm__ __volatile__(
- "sti\n"
- "nop\n"
- );
+ sti_nop();
/* Trigger an interrupt injection */
GUEST_SYNC(TEST_INJECT_VECTOR);