diff options
Diffstat (limited to 'tools/testing/selftests/kvm')
38 files changed, 2480 insertions, 210 deletions
diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 6d9381d60172..7f57abf936e7 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -5,3 +5,7 @@ !*.h !*.S !*.sh +!.gitignore +!config +!settings +!Makefile diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 48d32c5aa3eb..960cf6a77198 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -130,6 +130,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/max_vcpuid_cap_test TEST_GEN_PROGS_x86_64 += x86_64/triple_fault_event_test TEST_GEN_PROGS_x86_64 += x86_64/recalc_apic_map_test TEST_GEN_PROGS_x86_64 += access_tracking_perf_test +TEST_GEN_PROGS_x86_64 += coalesced_io_test TEST_GEN_PROGS_x86_64 += demand_paging_test TEST_GEN_PROGS_x86_64 += dirty_log_test TEST_GEN_PROGS_x86_64 += dirty_log_perf_test @@ -152,6 +153,7 @@ TEST_GEN_PROGS_x86_64 += pre_fault_memory_test TEST_GEN_PROGS_EXTENDED_x86_64 += x86_64/nx_huge_pages_test TEST_GEN_PROGS_aarch64 += aarch64/aarch32_id_regs +TEST_GEN_PROGS_aarch64 += aarch64/arch_timer_edge_cases TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions TEST_GEN_PROGS_aarch64 += aarch64/hypercalls TEST_GEN_PROGS_aarch64 += aarch64/page_fault_test @@ -163,8 +165,10 @@ TEST_GEN_PROGS_aarch64 += aarch64/vgic_init TEST_GEN_PROGS_aarch64 += aarch64/vgic_irq TEST_GEN_PROGS_aarch64 += aarch64/vgic_lpi_stress TEST_GEN_PROGS_aarch64 += aarch64/vpmu_counter_access +TEST_GEN_PROGS_aarch64 += aarch64/no-vgic-v3 TEST_GEN_PROGS_aarch64 += access_tracking_perf_test TEST_GEN_PROGS_aarch64 += arch_timer +TEST_GEN_PROGS_aarch64 += coalesced_io_test TEST_GEN_PROGS_aarch64 += demand_paging_test TEST_GEN_PROGS_aarch64 += dirty_log_test TEST_GEN_PROGS_aarch64 += dirty_log_perf_test @@ -186,6 +190,7 @@ TEST_GEN_PROGS_s390x += s390x/tprot TEST_GEN_PROGS_s390x += s390x/cmma_test TEST_GEN_PROGS_s390x += s390x/debug_test TEST_GEN_PROGS_s390x += s390x/shared_zeropage_test +TEST_GEN_PROGS_s390x += s390x/ucontrol_test TEST_GEN_PROGS_s390x += demand_paging_test TEST_GEN_PROGS_s390x += dirty_log_test TEST_GEN_PROGS_s390x += guest_print_test @@ -198,6 +203,7 @@ TEST_GEN_PROGS_s390x += kvm_binary_stats_test TEST_GEN_PROGS_riscv += riscv/sbi_pmu_test TEST_GEN_PROGS_riscv += riscv/ebreak_test TEST_GEN_PROGS_riscv += arch_timer +TEST_GEN_PROGS_riscv += coalesced_io_test TEST_GEN_PROGS_riscv += demand_paging_test TEST_GEN_PROGS_riscv += dirty_log_test TEST_GEN_PROGS_riscv += get-reg-list diff --git a/tools/testing/selftests/kvm/aarch64/arch_timer_edge_cases.c b/tools/testing/selftests/kvm/aarch64/arch_timer_edge_cases.c new file mode 100644 index 000000000000..a36a7e2db434 --- /dev/null +++ b/tools/testing/selftests/kvm/aarch64/arch_timer_edge_cases.c @@ -0,0 +1,1062 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * arch_timer_edge_cases.c - Tests the aarch64 timer IRQ functionality. + * + * The test validates some edge cases related to the arch-timer: + * - timers above the max TVAL value. + * - timers in the past + * - moving counters ahead and behind pending timers. + * - reprograming timers. + * - timers fired multiple times. + * - masking/unmasking using the timer control mask. + * + * Copyright (c) 2021, Google LLC. + */ + +#define _GNU_SOURCE + +#include <pthread.h> +#include <sys/sysinfo.h> + +#include "arch_timer.h" +#include "gic.h" +#include "vgic.h" + +static const uint64_t CVAL_MAX = ~0ULL; +/* tval is a signed 32-bit int. */ +static const int32_t TVAL_MAX = INT32_MAX; +static const int32_t TVAL_MIN = INT32_MIN; + +/* After how much time we say there is no IRQ. */ +static const uint32_t TIMEOUT_NO_IRQ_US = 50000; + +/* A nice counter value to use as the starting one for most tests. */ +static const uint64_t DEF_CNT = (CVAL_MAX / 2); + +/* Number of runs. */ +static const uint32_t NR_TEST_ITERS_DEF = 5; + +/* Default wait test time in ms. */ +static const uint32_t WAIT_TEST_MS = 10; + +/* Default "long" wait test time in ms. */ +static const uint32_t LONG_WAIT_TEST_MS = 100; + +/* Shared with IRQ handler. */ +struct test_vcpu_shared_data { + atomic_t handled; + atomic_t spurious; +} shared_data; + +struct test_args { + /* Virtual or physical timer and counter tests. */ + enum arch_timer timer; + /* Delay used for most timer tests. */ + uint64_t wait_ms; + /* Delay used in the test_long_timer_delays test. */ + uint64_t long_wait_ms; + /* Number of iterations. */ + int iterations; + /* Whether to test the physical timer. */ + bool test_physical; + /* Whether to test the virtual timer. */ + bool test_virtual; +}; + +struct test_args test_args = { + .wait_ms = WAIT_TEST_MS, + .long_wait_ms = LONG_WAIT_TEST_MS, + .iterations = NR_TEST_ITERS_DEF, + .test_physical = true, + .test_virtual = true, +}; + +static int vtimer_irq, ptimer_irq; + +enum sync_cmd { + SET_COUNTER_VALUE, + USERSPACE_USLEEP, + USERSPACE_SCHED_YIELD, + USERSPACE_MIGRATE_SELF, + NO_USERSPACE_CMD, +}; + +typedef void (*sleep_method_t)(enum arch_timer timer, uint64_t usec); + +static void sleep_poll(enum arch_timer timer, uint64_t usec); +static void sleep_sched_poll(enum arch_timer timer, uint64_t usec); +static void sleep_in_userspace(enum arch_timer timer, uint64_t usec); +static void sleep_migrate(enum arch_timer timer, uint64_t usec); + +sleep_method_t sleep_method[] = { + sleep_poll, + sleep_sched_poll, + sleep_migrate, + sleep_in_userspace, +}; + +typedef void (*irq_wait_method_t)(void); + +static void wait_for_non_spurious_irq(void); +static void wait_poll_for_irq(void); +static void wait_sched_poll_for_irq(void); +static void wait_migrate_poll_for_irq(void); + +irq_wait_method_t irq_wait_method[] = { + wait_for_non_spurious_irq, + wait_poll_for_irq, + wait_sched_poll_for_irq, + wait_migrate_poll_for_irq, +}; + +enum timer_view { + TIMER_CVAL, + TIMER_TVAL, +}; + +static void assert_irqs_handled(uint32_t n) +{ + int h = atomic_read(&shared_data.handled); + + __GUEST_ASSERT(h == n, "Handled %d IRQS but expected %d", h, n); +} + +static void userspace_cmd(uint64_t cmd) +{ + GUEST_SYNC_ARGS(cmd, 0, 0, 0, 0); +} + +static void userspace_migrate_vcpu(void) +{ + userspace_cmd(USERSPACE_MIGRATE_SELF); +} + +static void userspace_sleep(uint64_t usecs) +{ + GUEST_SYNC_ARGS(USERSPACE_USLEEP, usecs, 0, 0, 0); +} + +static void set_counter(enum arch_timer timer, uint64_t counter) +{ + GUEST_SYNC_ARGS(SET_COUNTER_VALUE, counter, timer, 0, 0); +} + +static void guest_irq_handler(struct ex_regs *regs) +{ + unsigned int intid = gic_get_and_ack_irq(); + enum arch_timer timer; + uint64_t cnt, cval; + uint32_t ctl; + bool timer_condition, istatus; + + if (intid == IAR_SPURIOUS) { + atomic_inc(&shared_data.spurious); + goto out; + } + + if (intid == ptimer_irq) + timer = PHYSICAL; + else if (intid == vtimer_irq) + timer = VIRTUAL; + else + goto out; + + ctl = timer_get_ctl(timer); + cval = timer_get_cval(timer); + cnt = timer_get_cntct(timer); + timer_condition = cnt >= cval; + istatus = (ctl & CTL_ISTATUS) && (ctl & CTL_ENABLE); + GUEST_ASSERT_EQ(timer_condition, istatus); + + /* Disable and mask the timer. */ + timer_set_ctl(timer, CTL_IMASK); + + atomic_inc(&shared_data.handled); + +out: + gic_set_eoi(intid); +} + +static void set_cval_irq(enum arch_timer timer, uint64_t cval_cycles, + uint32_t ctl) +{ + atomic_set(&shared_data.handled, 0); + atomic_set(&shared_data.spurious, 0); + timer_set_cval(timer, cval_cycles); + timer_set_ctl(timer, ctl); +} + +static void set_tval_irq(enum arch_timer timer, uint64_t tval_cycles, + uint32_t ctl) +{ + atomic_set(&shared_data.handled, 0); + atomic_set(&shared_data.spurious, 0); + timer_set_ctl(timer, ctl); + timer_set_tval(timer, tval_cycles); +} + +static void set_xval_irq(enum arch_timer timer, uint64_t xval, uint32_t ctl, + enum timer_view tv) +{ + switch (tv) { + case TIMER_CVAL: + set_cval_irq(timer, xval, ctl); + break; + case TIMER_TVAL: + set_tval_irq(timer, xval, ctl); + break; + default: + GUEST_FAIL("Could not get timer %d", timer); + } +} + +/* + * Note that this can theoretically hang forever, so we rely on having + * a timeout mechanism in the "runner", like: + * tools/testing/selftests/kselftest/runner.sh. + */ +static void wait_for_non_spurious_irq(void) +{ + int h; + + local_irq_disable(); + + for (h = atomic_read(&shared_data.handled); h == atomic_read(&shared_data.handled);) { + wfi(); + local_irq_enable(); + isb(); /* handle IRQ */ + local_irq_disable(); + } +} + +/* + * Wait for an non-spurious IRQ by polling in the guest or in + * userspace (e.g. userspace_cmd=USERSPACE_SCHED_YIELD). + * + * Note that this can theoretically hang forever, so we rely on having + * a timeout mechanism in the "runner", like: + * tools/testing/selftests/kselftest/runner.sh. + */ +static void poll_for_non_spurious_irq(enum sync_cmd usp_cmd) +{ + int h; + + local_irq_disable(); + + h = atomic_read(&shared_data.handled); + + local_irq_enable(); + while (h == atomic_read(&shared_data.handled)) { + if (usp_cmd == NO_USERSPACE_CMD) + cpu_relax(); + else + userspace_cmd(usp_cmd); + } + local_irq_disable(); +} + +static void wait_poll_for_irq(void) +{ + poll_for_non_spurious_irq(NO_USERSPACE_CMD); +} + +static void wait_sched_poll_for_irq(void) +{ + poll_for_non_spurious_irq(USERSPACE_SCHED_YIELD); +} + +static void wait_migrate_poll_for_irq(void) +{ + poll_for_non_spurious_irq(USERSPACE_MIGRATE_SELF); +} + +/* + * Sleep for usec microseconds by polling in the guest or in + * userspace (e.g. userspace_cmd=USERSPACE_SCHEDULE). + */ +static void guest_poll(enum arch_timer test_timer, uint64_t usec, + enum sync_cmd usp_cmd) +{ + uint64_t cycles = usec_to_cycles(usec); + /* Whichever timer we are testing with, sleep with the other. */ + enum arch_timer sleep_timer = 1 - test_timer; + uint64_t start = timer_get_cntct(sleep_timer); + + while ((timer_get_cntct(sleep_timer) - start) < cycles) { + if (usp_cmd == NO_USERSPACE_CMD) + cpu_relax(); + else + userspace_cmd(usp_cmd); + } +} + +static void sleep_poll(enum arch_timer timer, uint64_t usec) +{ + guest_poll(timer, usec, NO_USERSPACE_CMD); +} + +static void sleep_sched_poll(enum arch_timer timer, uint64_t usec) +{ + guest_poll(timer, usec, USERSPACE_SCHED_YIELD); +} + +static void sleep_migrate(enum arch_timer timer, uint64_t usec) +{ + guest_poll(timer, usec, USERSPACE_MIGRATE_SELF); +} + +static void sleep_in_userspace(enum arch_timer timer, uint64_t usec) +{ + userspace_sleep(usec); +} + +/* + * Reset the timer state to some nice values like the counter not being close + * to the edge, and the control register masked and disabled. + */ +static void reset_timer_state(enum arch_timer timer, uint64_t cnt) +{ + set_counter(timer, cnt); + timer_set_ctl(timer, CTL_IMASK); +} + +static void test_timer_xval(enum arch_timer timer, uint64_t xval, + enum timer_view tv, irq_wait_method_t wm, bool reset_state, + uint64_t reset_cnt) +{ + local_irq_disable(); + + if (reset_state) + reset_timer_state(timer, reset_cnt); + + set_xval_irq(timer, xval, CTL_ENABLE, tv); + + /* This method re-enables IRQs to handle the one we're looking for. */ + wm(); + + assert_irqs_handled(1); + local_irq_enable(); +} + +/* + * The test_timer_* functions will program the timer, wait for it, and assert + * the firing of the correct IRQ. + * + * These functions don't have a timeout and return as soon as they receive an + * IRQ. They can hang (forever), so we rely on having a timeout mechanism in + * the "runner", like: tools/testing/selftests/kselftest/runner.sh. + */ + +static void test_timer_cval(enum arch_timer timer, uint64_t cval, + irq_wait_method_t wm, bool reset_state, + uint64_t reset_cnt) +{ + test_timer_xval(timer, cval, TIMER_CVAL, wm, reset_state, reset_cnt); +} + +static void test_timer_tval(enum arch_timer timer, int32_t tval, + irq_wait_method_t wm, bool reset_state, + uint64_t reset_cnt) +{ + test_timer_xval(timer, (uint64_t) tval, TIMER_TVAL, wm, reset_state, + reset_cnt); +} + +static void test_xval_check_no_irq(enum arch_timer timer, uint64_t xval, + uint64_t usec, enum timer_view timer_view, + sleep_method_t guest_sleep) +{ + local_irq_disable(); + + set_xval_irq(timer, xval, CTL_ENABLE | CTL_IMASK, timer_view); + guest_sleep(timer, usec); + + local_irq_enable(); + isb(); + + /* Assume success (no IRQ) after waiting usec microseconds */ + assert_irqs_handled(0); +} + +static void test_cval_no_irq(enum arch_timer timer, uint64_t cval, + uint64_t usec, sleep_method_t wm) +{ + test_xval_check_no_irq(timer, cval, usec, TIMER_CVAL, wm); +} + +static void test_tval_no_irq(enum arch_timer timer, int32_t tval, uint64_t usec, + sleep_method_t wm) +{ + /* tval will be cast to an int32_t in test_xval_check_no_irq */ + test_xval_check_no_irq(timer, (uint64_t) tval, usec, TIMER_TVAL, wm); +} + +/* Test masking/unmasking a timer using the timer mask (not the IRQ mask). */ +static void test_timer_control_mask_then_unmask(enum arch_timer timer) +{ + reset_timer_state(timer, DEF_CNT); + set_tval_irq(timer, -1, CTL_ENABLE | CTL_IMASK); + + /* Unmask the timer, and then get an IRQ. */ + local_irq_disable(); + timer_set_ctl(timer, CTL_ENABLE); + /* This method re-enables IRQs to handle the one we're looking for. */ + wait_for_non_spurious_irq(); + + assert_irqs_handled(1); + local_irq_enable(); +} + +/* Check that timer control masks actually mask a timer being fired. */ +static void test_timer_control_masks(enum arch_timer timer) +{ + reset_timer_state(timer, DEF_CNT); + + /* Local IRQs are not masked at this point. */ + + set_tval_irq(timer, -1, CTL_ENABLE | CTL_IMASK); + + /* Assume no IRQ after waiting TIMEOUT_NO_IRQ_US microseconds */ + sleep_poll(timer, TIMEOUT_NO_IRQ_US); + + assert_irqs_handled(0); + timer_set_ctl(timer, CTL_IMASK); +} + +static void test_fire_a_timer_multiple_times(enum arch_timer timer, + irq_wait_method_t wm, int num) +{ + int i; + + local_irq_disable(); + reset_timer_state(timer, DEF_CNT); + + set_tval_irq(timer, 0, CTL_ENABLE); + + for (i = 1; i <= num; i++) { + /* This method re-enables IRQs to handle the one we're looking for. */ + wm(); + + /* The IRQ handler masked and disabled the timer. + * Enable and unmmask it again. + */ + timer_set_ctl(timer, CTL_ENABLE); + + assert_irqs_handled(i); + } + + local_irq_enable(); +} + +static void test_timers_fired_multiple_times(enum arch_timer timer) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) + test_fire_a_timer_multiple_times(timer, irq_wait_method[i], 10); +} + +/* + * Set a timer for tval=delta_1_ms then reprogram it to + * tval=delta_2_ms. Check that we get the timer fired. There is no + * timeout for the wait: we use the wfi instruction. + */ +static void test_reprogramming_timer(enum arch_timer timer, irq_wait_method_t wm, + int32_t delta_1_ms, int32_t delta_2_ms) +{ + local_irq_disable(); + reset_timer_state(timer, DEF_CNT); + + /* Program the timer to DEF_CNT + delta_1_ms. */ + set_tval_irq(timer, msec_to_cycles(delta_1_ms), CTL_ENABLE); + + /* Reprogram the timer to DEF_CNT + delta_2_ms. */ + timer_set_tval(timer, msec_to_cycles(delta_2_ms)); + + /* This method re-enables IRQs to handle the one we're looking for. */ + wm(); + + /* The IRQ should arrive at DEF_CNT + delta_2_ms (or after). */ + GUEST_ASSERT(timer_get_cntct(timer) >= + DEF_CNT + msec_to_cycles(delta_2_ms)); + + local_irq_enable(); + assert_irqs_handled(1); +}; + +static void test_reprogram_timers(enum arch_timer timer) +{ + int i; + uint64_t base_wait = test_args.wait_ms; + + for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) { + /* + * Ensure reprogramming works whether going from a + * longer time to a shorter or vice versa. + */ + test_reprogramming_timer(timer, irq_wait_method[i], 2 * base_wait, + base_wait); + test_reprogramming_timer(timer, irq_wait_method[i], base_wait, + 2 * base_wait); + } +} + +static void test_basic_functionality(enum arch_timer timer) +{ + int32_t tval = (int32_t) msec_to_cycles(test_args.wait_ms); + uint64_t cval = DEF_CNT + msec_to_cycles(test_args.wait_ms); + int i; + + for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) { + irq_wait_method_t wm = irq_wait_method[i]; + + test_timer_cval(timer, cval, wm, true, DEF_CNT); + test_timer_tval(timer, tval, wm, true, DEF_CNT); + } +} + +/* + * This test checks basic timer behavior without actually firing timers, things + * like: the relationship between cval and tval, tval down-counting. + */ +static void timers_sanity_checks(enum arch_timer timer, bool use_sched) +{ + reset_timer_state(timer, DEF_CNT); + + local_irq_disable(); + + /* cval in the past */ + timer_set_cval(timer, + timer_get_cntct(timer) - + msec_to_cycles(test_args.wait_ms)); + if (use_sched) + userspace_migrate_vcpu(); + GUEST_ASSERT(timer_get_tval(timer) < 0); + + /* tval in the past */ + timer_set_tval(timer, -1); + if (use_sched) + userspace_migrate_vcpu(); + GUEST_ASSERT(timer_get_cval(timer) < timer_get_cntct(timer)); + + /* tval larger than TVAL_MAX. This requires programming with + * timer_set_cval instead so the value is expressible + */ + timer_set_cval(timer, + timer_get_cntct(timer) + TVAL_MAX + + msec_to_cycles(test_args.wait_ms)); + if (use_sched) + userspace_migrate_vcpu(); + GUEST_ASSERT(timer_get_tval(timer) <= 0); + + /* + * tval larger than 2 * TVAL_MAX. + * Twice the TVAL_MAX completely loops around the TVAL. + */ + timer_set_cval(timer, + timer_get_cntct(timer) + 2ULL * TVAL_MAX + + msec_to_cycles(test_args.wait_ms)); + if (use_sched) + userspace_migrate_vcpu(); + GUEST_ASSERT(timer_get_tval(timer) <= + msec_to_cycles(test_args.wait_ms)); + + /* negative tval that rollovers from 0. */ + set_counter(timer, msec_to_cycles(1)); + timer_set_tval(timer, -1 * msec_to_cycles(test_args.wait_ms)); + if (use_sched) + userspace_migrate_vcpu(); + GUEST_ASSERT(timer_get_cval(timer) >= (CVAL_MAX - msec_to_cycles(test_args.wait_ms))); + + /* tval should keep down-counting from 0 to -1. */ + timer_set_tval(timer, 0); + sleep_poll(timer, 1); + GUEST_ASSERT(timer_get_tval(timer) < 0); + + local_irq_enable(); + + /* Mask and disable any pending timer. */ + timer_set_ctl(timer, CTL_IMASK); +} + +static void test_timers_sanity_checks(enum arch_timer timer) +{ + timers_sanity_checks(timer, false); + /* Check how KVM saves/restores these edge-case values. */ + timers_sanity_checks(timer, true); +} + +static void test_set_cnt_after_tval_max(enum arch_timer timer, irq_wait_method_t wm) +{ + local_irq_disable(); + reset_timer_state(timer, DEF_CNT); + + set_cval_irq(timer, + (uint64_t) TVAL_MAX + + msec_to_cycles(test_args.wait_ms) / 2, CTL_ENABLE); + + set_counter(timer, TVAL_MAX); + + /* This method re-enables IRQs to handle the one we're looking for. */ + wm(); + + assert_irqs_handled(1); + local_irq_enable(); +} + +/* Test timers set for: cval = now + TVAL_MAX + wait_ms / 2 */ +static void test_timers_above_tval_max(enum arch_timer timer) +{ + uint64_t cval; + int i; + + /* + * Test that the system is not implementing cval in terms of + * tval. If that was the case, setting a cval to "cval = now + * + TVAL_MAX + wait_ms" would wrap to "cval = now + + * wait_ms", and the timer would fire immediately. Test that it + * doesn't. + */ + for (i = 0; i < ARRAY_SIZE(sleep_method); i++) { + reset_timer_state(timer, DEF_CNT); + cval = timer_get_cntct(timer) + TVAL_MAX + + msec_to_cycles(test_args.wait_ms); + test_cval_no_irq(timer, cval, + msecs_to_usecs(test_args.wait_ms) + + TIMEOUT_NO_IRQ_US, sleep_method[i]); + } + + for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) { + /* Get the IRQ by moving the counter forward. */ + test_set_cnt_after_tval_max(timer, irq_wait_method[i]); + } +} + +/* + * Template function to be used by the test_move_counter_ahead_* tests. It + * sets the counter to cnt_1, the [c|t]val, the counter to cnt_2, and + * then waits for an IRQ. + */ +static void test_set_cnt_after_xval(enum arch_timer timer, uint64_t cnt_1, + uint64_t xval, uint64_t cnt_2, + irq_wait_method_t wm, enum timer_view tv) +{ + local_irq_disable(); + + set_counter(timer, cnt_1); + timer_set_ctl(timer, CTL_IMASK); + + set_xval_irq(timer, xval, CTL_ENABLE, tv); + set_counter(timer, cnt_2); + /* This method re-enables IRQs to handle the one we're looking for. */ + wm(); + + assert_irqs_handled(1); + local_irq_enable(); +} + +/* + * Template function to be used by the test_move_counter_ahead_* tests. It + * sets the counter to cnt_1, the [c|t]val, the counter to cnt_2, and + * then waits for an IRQ. + */ +static void test_set_cnt_after_xval_no_irq(enum arch_timer timer, + uint64_t cnt_1, uint64_t xval, + uint64_t cnt_2, + sleep_method_t guest_sleep, + enum timer_view tv) +{ + local_irq_disable(); + + set_counter(timer, cnt_1); + timer_set_ctl(timer, CTL_IMASK); + + set_xval_irq(timer, xval, CTL_ENABLE, tv); + set_counter(timer, cnt_2); + guest_sleep(timer, TIMEOUT_NO_IRQ_US); + + local_irq_enable(); + isb(); + + /* Assume no IRQ after waiting TIMEOUT_NO_IRQ_US microseconds */ + assert_irqs_handled(0); + timer_set_ctl(timer, CTL_IMASK); +} + +static void test_set_cnt_after_tval(enum arch_timer timer, uint64_t cnt_1, + int32_t tval, uint64_t cnt_2, + irq_wait_method_t wm) +{ + test_set_cnt_after_xval(timer, cnt_1, tval, cnt_2, wm, TIMER_TVAL); +} + +static void test_set_cnt_after_cval(enum arch_timer timer, uint64_t cnt_1, + uint64_t cval, uint64_t cnt_2, + irq_wait_method_t wm) +{ + test_set_cnt_after_xval(timer, cnt_1, cval, cnt_2, wm, TIMER_CVAL); +} + +static void test_set_cnt_after_tval_no_irq(enum arch_timer timer, + uint64_t cnt_1, int32_t tval, + uint64_t cnt_2, sleep_method_t wm) +{ + test_set_cnt_after_xval_no_irq(timer, cnt_1, tval, cnt_2, wm, + TIMER_TVAL); +} + +static void test_set_cnt_after_cval_no_irq(enum arch_timer timer, + uint64_t cnt_1, uint64_t cval, + uint64_t cnt_2, sleep_method_t wm) +{ + test_set_cnt_after_xval_no_irq(timer, cnt_1, cval, cnt_2, wm, + TIMER_CVAL); +} + +/* Set a timer and then move the counter ahead of it. */ +static void test_move_counters_ahead_of_timers(enum arch_timer timer) +{ + int i; + int32_t tval; + + for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) { + irq_wait_method_t wm = irq_wait_method[i]; + + test_set_cnt_after_cval(timer, 0, DEF_CNT, DEF_CNT + 1, wm); + test_set_cnt_after_cval(timer, CVAL_MAX, 1, 2, wm); + + /* Move counter ahead of negative tval. */ + test_set_cnt_after_tval(timer, 0, -1, DEF_CNT + 1, wm); + test_set_cnt_after_tval(timer, 0, -1, TVAL_MAX, wm); + tval = TVAL_MAX; + test_set_cnt_after_tval(timer, 0, tval, (uint64_t) tval + 1, + wm); + } + + for (i = 0; i < ARRAY_SIZE(sleep_method); i++) { + sleep_method_t sm = sleep_method[i]; + + test_set_cnt_after_cval_no_irq(timer, 0, DEF_CNT, CVAL_MAX, sm); + } +} + +/* + * Program a timer, mask it, and then change the tval or counter to cancel it. + * Unmask it and check that nothing fires. + */ +static void test_move_counters_behind_timers(enum arch_timer timer) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(sleep_method); i++) { + sleep_method_t sm = sleep_method[i]; + + test_set_cnt_after_cval_no_irq(timer, DEF_CNT, DEF_CNT - 1, 0, + sm); + test_set_cnt_after_tval_no_irq(timer, DEF_CNT, -1, 0, sm); + } +} + +static void test_timers_in_the_past(enum arch_timer timer) +{ + int32_t tval = -1 * (int32_t) msec_to_cycles(test_args.wait_ms); + uint64_t cval; + int i; + + for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) { + irq_wait_method_t wm = irq_wait_method[i]; + + /* set a timer wait_ms the past. */ + cval = DEF_CNT - msec_to_cycles(test_args.wait_ms); + test_timer_cval(timer, cval, wm, true, DEF_CNT); + test_timer_tval(timer, tval, wm, true, DEF_CNT); + + /* Set a timer to counter=0 (in the past) */ + test_timer_cval(timer, 0, wm, true, DEF_CNT); + + /* Set a time for tval=0 (now) */ + test_timer_tval(timer, 0, wm, true, DEF_CNT); + + /* Set a timer to as far in the past as possible */ + test_timer_tval(timer, TVAL_MIN, wm, true, DEF_CNT); + } + + /* + * Set the counter to wait_ms, and a tval to -wait_ms. There should be no + * IRQ as that tval means cval=CVAL_MAX-wait_ms. + */ + for (i = 0; i < ARRAY_SIZE(sleep_method); i++) { + sleep_method_t sm = sleep_method[i]; + + set_counter(timer, msec_to_cycles(test_args.wait_ms)); + test_tval_no_irq(timer, tval, TIMEOUT_NO_IRQ_US, sm); + } +} + +static void test_long_timer_delays(enum arch_timer timer) +{ + int32_t tval = (int32_t) msec_to_cycles(test_args.long_wait_ms); + uint64_t cval = DEF_CNT + msec_to_cycles(test_args.long_wait_ms); + int i; + + for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) { + irq_wait_method_t wm = irq_wait_method[i]; + + test_timer_cval(timer, cval, wm, true, DEF_CNT); + test_timer_tval(timer, tval, wm, true, DEF_CNT); + } +} + +static void guest_run_iteration(enum arch_timer timer) +{ + test_basic_functionality(timer); + test_timers_sanity_checks(timer); + + test_timers_above_tval_max(timer); + test_timers_in_the_past(timer); + + test_move_counters_ahead_of_timers(timer); + test_move_counters_behind_timers(timer); + test_reprogram_timers(timer); + + test_timers_fired_multiple_times(timer); + + test_timer_control_mask_then_unmask(timer); + test_timer_control_masks(timer); +} + +static void guest_code(enum arch_timer timer) +{ + int i; + + local_irq_disable(); + + gic_init(GIC_V3, 1); + + timer_set_ctl(VIRTUAL, CTL_IMASK); + timer_set_ctl(PHYSICAL, CTL_IMASK); + + gic_irq_enable(vtimer_irq); + gic_irq_enable(ptimer_irq); + local_irq_enable(); + + for (i = 0; i < test_args.iterations; i++) { + GUEST_SYNC(i); + guest_run_iteration(timer); + } + + test_long_timer_delays(timer); + GUEST_DONE(); +} + +static uint32_t next_pcpu(void) +{ + uint32_t max = get_nprocs(); + uint32_t cur = sched_getcpu(); + uint32_t next = cur; + cpu_set_t cpuset; + + TEST_ASSERT(max > 1, "Need at least two physical cpus"); + + sched_getaffinity(0, sizeof(cpuset), &cpuset); + + do { + next = (next + 1) % CPU_SETSIZE; + } while (!CPU_ISSET(next, &cpuset)); + + return next; +} + +static void migrate_self(uint32_t new_pcpu) +{ + int ret; + cpu_set_t cpuset; + pthread_t thread; + + thread = pthread_self(); + + CPU_ZERO(&cpuset); + CPU_SET(new_pcpu, &cpuset); + + pr_debug("Migrating from %u to %u\n", sched_getcpu(), new_pcpu); + + ret = pthread_setaffinity_np(thread, sizeof(cpuset), &cpuset); + + TEST_ASSERT(ret == 0, "Failed to migrate to pCPU: %u; ret: %d\n", + new_pcpu, ret); +} + +static void kvm_set_cntxct(struct kvm_vcpu *vcpu, uint64_t cnt, + enum arch_timer timer) +{ + if (timer == PHYSICAL) + vcpu_set_reg(vcpu, KVM_REG_ARM_PTIMER_CNT, cnt); + else + vcpu_set_reg(vcpu, KVM_REG_ARM_TIMER_CNT, cnt); +} + +static void handle_sync(struct kvm_vcpu *vcpu, struct ucall *uc) +{ + enum sync_cmd cmd = uc->args[1]; + uint64_t val = uc->args[2]; + enum arch_timer timer = uc->args[3]; + + switch (cmd) { + case SET_COUNTER_VALUE: + kvm_set_cntxct(vcpu, val, timer); + break; + case USERSPACE_USLEEP: + usleep(val); + break; + case USERSPACE_SCHED_YIELD: + sched_yield(); + break; + case USERSPACE_MIGRATE_SELF: + migrate_self(next_pcpu()); + break; + default: + break; + } +} + +static void test_run(struct kvm_vm *vm, struct kvm_vcpu *vcpu) +{ + struct ucall uc; + + /* Start on CPU 0 */ + migrate_self(0); + + while (true) { + vcpu_run(vcpu); + switch (get_ucall(vcpu, &uc)) { + case UCALL_SYNC: + handle_sync(vcpu, &uc); + break; + case UCALL_DONE: + goto out; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + goto out; + default: + TEST_FAIL("Unexpected guest exit\n"); + } + } + + out: + return; +} + +static void test_init_timer_irq(struct kvm_vm *vm, struct kvm_vcpu *vcpu) +{ + vcpu_device_attr_get(vcpu, KVM_ARM_VCPU_TIMER_CTRL, + KVM_ARM_VCPU_TIMER_IRQ_PTIMER, &ptimer_irq); + vcpu_device_attr_get(vcpu, KVM_ARM_VCPU_TIMER_CTRL, + KVM_ARM_VCPU_TIMER_IRQ_VTIMER, &vtimer_irq); + + sync_global_to_guest(vm, ptimer_irq); + sync_global_to_guest(vm, vtimer_irq); + + pr_debug("ptimer_irq: %d; vtimer_irq: %d\n", ptimer_irq, vtimer_irq); +} + +static void test_vm_create(struct kvm_vm **vm, struct kvm_vcpu **vcpu, + enum arch_timer timer) +{ + *vm = vm_create_with_one_vcpu(vcpu, guest_code); + TEST_ASSERT(*vm, "Failed to create the test VM\n"); + + vm_init_descriptor_tables(*vm); + vm_install_exception_handler(*vm, VECTOR_IRQ_CURRENT, + guest_irq_handler); + + vcpu_init_descriptor_tables(*vcpu); + vcpu_args_set(*vcpu, 1, timer); + + test_init_timer_irq(*vm, *vcpu); + vgic_v3_setup(*vm, 1, 64); + sync_global_to_guest(*vm, test_args); +} + +static void test_print_help(char *name) +{ + pr_info("Usage: %s [-h] [-b] [-i iterations] [-l long_wait_ms] [-p] [-v]\n" + , name); + pr_info("\t-i: Number of iterations (default: %u)\n", + NR_TEST_ITERS_DEF); + pr_info("\t-b: Test both physical and virtual timers (default: true)\n"); + pr_info("\t-l: Delta (in ms) used for long wait time test (default: %u)\n", + LONG_WAIT_TEST_MS); + pr_info("\t-l: Delta (in ms) used for wait times (default: %u)\n", + WAIT_TEST_MS); + pr_info("\t-p: Test physical timer (default: true)\n"); + pr_info("\t-v: Test virtual timer (default: true)\n"); + pr_info("\t-h: Print this help message\n"); +} + +static bool parse_args(int argc, char *argv[]) +{ + int opt; + + while ((opt = getopt(argc, argv, "bhi:l:pvw:")) != -1) { + switch (opt) { + case 'b': + test_args.test_physical = true; + test_args.test_virtual = true; + break; + case 'i': + test_args.iterations = + atoi_positive("Number of iterations", optarg); + break; + case 'l': + test_args.long_wait_ms = + atoi_positive("Long wait time", optarg); + break; + case 'p': + test_args.test_physical = true; + test_args.test_virtual = false; + break; + case 'v': + test_args.test_virtual = true; + test_args.test_physical = false; + break; + case 'w': + test_args.wait_ms = atoi_positive("Wait time", optarg); + break; + case 'h': + default: + goto err; + } + } + + return true; + + err: + test_print_help(argv[0]); + return false; +} + +int main(int argc, char *argv[]) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + /* Tell stdout not to buffer its content */ + setbuf(stdout, NULL); + + if (!parse_args(argc, argv)) + exit(KSFT_SKIP); + + if (test_args.test_virtual) { + test_vm_create(&vm, &vcpu, VIRTUAL); + test_run(vm, vcpu); + kvm_vm_free(vm); + } + + if (test_args.test_physical) { + test_vm_create(&vm, &vcpu, PHYSICAL); + test_run(vm, vcpu); + kvm_vm_free(vm); + } + + return 0; +} diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c index 4abebde78187..d43fb3f49050 100644 --- a/tools/testing/selftests/kvm/aarch64/get-reg-list.c +++ b/tools/testing/selftests/kvm/aarch64/get-reg-list.c @@ -40,6 +40,18 @@ static struct feature_id_reg feat_id_regs[] = { ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */ 8, 1 + }, + { + ARM64_SYS_REG(3, 0, 10, 2, 4), /* POR_EL1 */ + ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */ + 16, + 1 + }, + { + ARM64_SYS_REG(3, 3, 10, 2, 4), /* POR_EL0 */ + ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */ + 16, + 1 } }; @@ -468,6 +480,7 @@ static __u64 base_regs[] = { ARM64_SYS_REG(3, 0, 10, 2, 0), /* MAIR_EL1 */ ARM64_SYS_REG(3, 0, 10, 2, 2), /* PIRE0_EL1 */ ARM64_SYS_REG(3, 0, 10, 2, 3), /* PIR_EL1 */ + ARM64_SYS_REG(3, 0, 10, 2, 4), /* POR_EL1 */ ARM64_SYS_REG(3, 0, 10, 3, 0), /* AMAIR_EL1 */ ARM64_SYS_REG(3, 0, 12, 0, 0), /* VBAR_EL1 */ ARM64_SYS_REG(3, 0, 12, 1, 1), /* DISR_EL1 */ @@ -475,6 +488,7 @@ static __u64 base_regs[] = { ARM64_SYS_REG(3, 0, 13, 0, 4), /* TPIDR_EL1 */ ARM64_SYS_REG(3, 0, 14, 1, 0), /* CNTKCTL_EL1 */ ARM64_SYS_REG(3, 2, 0, 0, 0), /* CSSELR_EL1 */ + ARM64_SYS_REG(3, 3, 10, 2, 4), /* POR_EL0 */ ARM64_SYS_REG(3, 3, 13, 0, 2), /* TPIDR_EL0 */ ARM64_SYS_REG(3, 3, 13, 0, 3), /* TPIDRRO_EL0 */ ARM64_SYS_REG(3, 3, 14, 0, 1), /* CNTPCT_EL0 */ diff --git a/tools/testing/selftests/kvm/aarch64/no-vgic-v3.c b/tools/testing/selftests/kvm/aarch64/no-vgic-v3.c new file mode 100644 index 000000000000..943d65fc6b0b --- /dev/null +++ b/tools/testing/selftests/kvm/aarch64/no-vgic-v3.c @@ -0,0 +1,175 @@ +// SPDX-License-Identifier: GPL-2.0 + +// Check that, on a GICv3 system, not configuring GICv3 correctly +// results in all of the sysregs generating an UNDEF exception. + +#include <test_util.h> +#include <kvm_util.h> +#include <processor.h> + +static volatile bool handled; + +#define __check_sr_read(r) \ + ({ \ + uint64_t val; \ + \ + handled = false; \ + dsb(sy); \ + val = read_sysreg_s(SYS_ ## r); \ + val; \ + }) + +#define __check_sr_write(r) \ + do { \ + handled = false; \ + dsb(sy); \ + write_sysreg_s(0, SYS_ ## r); \ + isb(); \ + } while(0) + +/* Fatal checks */ +#define check_sr_read(r) \ + do { \ + __check_sr_read(r); \ + __GUEST_ASSERT(handled, #r " no read trap"); \ + } while(0) + +#define check_sr_write(r) \ + do { \ + __check_sr_write(r); \ + __GUEST_ASSERT(handled, #r " no write trap"); \ + } while(0) + +#define check_sr_rw(r) \ + do { \ + check_sr_read(r); \ + check_sr_write(r); \ + } while(0) + +static void guest_code(void) +{ + uint64_t val; + + /* + * Check that we advertise that ID_AA64PFR0_EL1.GIC == 0, having + * hidden the feature at runtime without any other userspace action. + */ + __GUEST_ASSERT(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_GIC), + read_sysreg(id_aa64pfr0_el1)) == 0, + "GICv3 wrongly advertised"); + + /* + * Access all GICv3 registers, and fail if we don't get an UNDEF. + * Note that we happily access all the APxRn registers without + * checking their existance, as all we want to see is a failure. + */ + check_sr_rw(ICC_PMR_EL1); + check_sr_read(ICC_IAR0_EL1); + check_sr_write(ICC_EOIR0_EL1); + check_sr_rw(ICC_HPPIR0_EL1); + check_sr_rw(ICC_BPR0_EL1); + check_sr_rw(ICC_AP0R0_EL1); + check_sr_rw(ICC_AP0R1_EL1); + check_sr_rw(ICC_AP0R2_EL1); + check_sr_rw(ICC_AP0R3_EL1); + check_sr_rw(ICC_AP1R0_EL1); + check_sr_rw(ICC_AP1R1_EL1); + check_sr_rw(ICC_AP1R2_EL1); + check_sr_rw(ICC_AP1R3_EL1); + check_sr_write(ICC_DIR_EL1); + check_sr_read(ICC_RPR_EL1); + check_sr_write(ICC_SGI1R_EL1); + check_sr_write(ICC_ASGI1R_EL1); + check_sr_write(ICC_SGI0R_EL1); + check_sr_read(ICC_IAR1_EL1); + check_sr_write(ICC_EOIR1_EL1); + check_sr_rw(ICC_HPPIR1_EL1); + check_sr_rw(ICC_BPR1_EL1); + check_sr_rw(ICC_CTLR_EL1); + check_sr_rw(ICC_IGRPEN0_EL1); + check_sr_rw(ICC_IGRPEN1_EL1); + + /* + * ICC_SRE_EL1 may not be trappable, as ICC_SRE_EL2.Enable can + * be RAO/WI. Engage in non-fatal accesses, starting with a + * write of 0 to try and disable SRE, and let's see if it + * sticks. + */ + __check_sr_write(ICC_SRE_EL1); + if (!handled) + GUEST_PRINTF("ICC_SRE_EL1 write not trapping (OK)\n"); + + val = __check_sr_read(ICC_SRE_EL1); + if (!handled) { + __GUEST_ASSERT((val & BIT(0)), + "ICC_SRE_EL1 not trapped but ICC_SRE_EL1.SRE not set\n"); + GUEST_PRINTF("ICC_SRE_EL1 read not trapping (OK)\n"); + } + + GUEST_DONE(); +} + +static void guest_undef_handler(struct ex_regs *regs) +{ + /* Success, we've gracefully exploded! */ + handled = true; + regs->pc += 4; +} + +static void test_run_vcpu(struct kvm_vcpu *vcpu) +{ + struct ucall uc; + + do { + vcpu_run(vcpu); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + case UCALL_PRINTF: + printf("%s", uc.buffer); + break; + case UCALL_DONE: + break; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + } while (uc.cmd != UCALL_DONE); +} + +static void test_guest_no_gicv3(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + /* Create a VM without a GICv3 */ + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(vcpu); + + vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, + ESR_EC_UNKNOWN, guest_undef_handler); + + test_run_vcpu(vcpu); + + kvm_vm_free(vm); +} + +int main(int argc, char *argv[]) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + uint64_t pfr0; + + vm = vm_create_with_one_vcpu(&vcpu, NULL); + vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), &pfr0); + __TEST_REQUIRE(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_GIC), pfr0), + "GICv3 not supported."); + kvm_vm_free(vm); + + test_guest_no_gicv3(); + + return 0; +} diff --git a/tools/testing/selftests/kvm/aarch64/set_id_regs.c b/tools/testing/selftests/kvm/aarch64/set_id_regs.c index d20981663831..2a3fe7914b72 100644 --- a/tools/testing/selftests/kvm/aarch64/set_id_regs.c +++ b/tools/testing/selftests/kvm/aarch64/set_id_regs.c @@ -126,6 +126,7 @@ static const struct reg_ftr_bits ftr_id_aa64pfr0_el1[] = { REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, CSV2, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, DIT, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, SEL2, 0), + REG_FTR_BITS(FTR_EXACT, ID_AA64PFR0_EL1, GIC, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL3, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL2, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL1, 0), diff --git a/tools/testing/selftests/kvm/aarch64/vgic_irq.c b/tools/testing/selftests/kvm/aarch64/vgic_irq.c index a51dbd2a5f84..f4ac28d53747 100644 --- a/tools/testing/selftests/kvm/aarch64/vgic_irq.c +++ b/tools/testing/selftests/kvm/aarch64/vgic_irq.c @@ -269,13 +269,12 @@ static void guest_inject(struct test_args *args, KVM_INJECT_MULTI(cmd, first_intid, num); while (irq_handled < num) { - asm volatile("wfi\n" - "msr daifclr, #2\n" - /* handle IRQ */ - "msr daifset, #2\n" - : : : "memory"); + wfi(); + local_irq_enable(); + isb(); /* handle IRQ */ + local_irq_disable(); } - asm volatile("msr daifclr, #2" : : : "memory"); + local_irq_enable(); GUEST_ASSERT_EQ(irq_handled, num); for (i = first_intid; i < num + first_intid; i++) diff --git a/tools/testing/selftests/kvm/coalesced_io_test.c b/tools/testing/selftests/kvm/coalesced_io_test.c new file mode 100644 index 000000000000..60cb25454899 --- /dev/null +++ b/tools/testing/selftests/kvm/coalesced_io_test.c @@ -0,0 +1,236 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> + +#include <linux/sizes.h> + +#include <kvm_util.h> +#include <processor.h> + +#include "ucall_common.h" + +struct kvm_coalesced_io { + struct kvm_coalesced_mmio_ring *ring; + uint32_t ring_size; + uint64_t mmio_gpa; + uint64_t *mmio; + + /* + * x86-only, but define pio_port for all architectures to minimize the + * amount of #ifdeffery and complexity, without having to sacrifice + * verbose error messages. + */ + uint8_t pio_port; +}; + +static struct kvm_coalesced_io kvm_builtin_io_ring; + +#ifdef __x86_64__ +static const int has_pio = 1; +#else +static const int has_pio = 0; +#endif + +static void guest_code(struct kvm_coalesced_io *io) +{ + int i, j; + + for (;;) { + for (j = 0; j < 1 + has_pio; j++) { + /* + * KVM always leaves one free entry, i.e. exits to + * userspace before the last entry is filled. + */ + for (i = 0; i < io->ring_size - 1; i++) { +#ifdef __x86_64__ + if (i & 1) + outl(io->pio_port, io->pio_port + i); + else +#endif + WRITE_ONCE(*io->mmio, io->mmio_gpa + i); + } +#ifdef __x86_64__ + if (j & 1) + outl(io->pio_port, io->pio_port + i); + else +#endif + WRITE_ONCE(*io->mmio, io->mmio_gpa + i); + } + GUEST_SYNC(0); + + WRITE_ONCE(*io->mmio, io->mmio_gpa + i); +#ifdef __x86_64__ + outl(io->pio_port, io->pio_port + i); +#endif + } +} + +static void vcpu_run_and_verify_io_exit(struct kvm_vcpu *vcpu, + struct kvm_coalesced_io *io, + uint32_t ring_start, + uint32_t expected_exit) +{ + const bool want_pio = expected_exit == KVM_EXIT_IO; + struct kvm_coalesced_mmio_ring *ring = io->ring; + struct kvm_run *run = vcpu->run; + uint32_t pio_value; + + WRITE_ONCE(ring->first, ring_start); + WRITE_ONCE(ring->last, ring_start); + + vcpu_run(vcpu); + + /* + * Annoyingly, reading PIO data is safe only for PIO exits, otherwise + * data_offset is garbage, e.g. an MMIO gpa. + */ + if (run->exit_reason == KVM_EXIT_IO) + pio_value = *(uint32_t *)((void *)run + run->io.data_offset); + else + pio_value = 0; + + TEST_ASSERT((!want_pio && (run->exit_reason == KVM_EXIT_MMIO && run->mmio.is_write && + run->mmio.phys_addr == io->mmio_gpa && run->mmio.len == 8 && + *(uint64_t *)run->mmio.data == io->mmio_gpa + io->ring_size - 1)) || + (want_pio && (run->exit_reason == KVM_EXIT_IO && run->io.port == io->pio_port && + run->io.direction == KVM_EXIT_IO_OUT && run->io.count == 1 && + pio_value == io->pio_port + io->ring_size - 1)), + "For start = %u, expected exit on %u-byte %s write 0x%llx = %lx, got exit_reason = %u (%s)\n " + "(MMIO addr = 0x%llx, write = %u, len = %u, data = %lx)\n " + "(PIO port = 0x%x, write = %u, len = %u, count = %u, data = %x", + ring_start, want_pio ? 4 : 8, want_pio ? "PIO" : "MMIO", + want_pio ? (unsigned long long)io->pio_port : io->mmio_gpa, + (want_pio ? io->pio_port : io->mmio_gpa) + io->ring_size - 1, run->exit_reason, + run->exit_reason == KVM_EXIT_MMIO ? "MMIO" : run->exit_reason == KVM_EXIT_IO ? "PIO" : "other", + run->mmio.phys_addr, run->mmio.is_write, run->mmio.len, *(uint64_t *)run->mmio.data, + run->io.port, run->io.direction, run->io.size, run->io.count, pio_value); +} + +static void vcpu_run_and_verify_coalesced_io(struct kvm_vcpu *vcpu, + struct kvm_coalesced_io *io, + uint32_t ring_start, + uint32_t expected_exit) +{ + struct kvm_coalesced_mmio_ring *ring = io->ring; + int i; + + vcpu_run_and_verify_io_exit(vcpu, io, ring_start, expected_exit); + + TEST_ASSERT((ring->last + 1) % io->ring_size == ring->first, + "Expected ring to be full (minus 1), first = %u, last = %u, max = %u, start = %u", + ring->first, ring->last, io->ring_size, ring_start); + + for (i = 0; i < io->ring_size - 1; i++) { + uint32_t idx = (ring->first + i) % io->ring_size; + struct kvm_coalesced_mmio *entry = &ring->coalesced_mmio[idx]; + +#ifdef __x86_64__ + if (i & 1) + TEST_ASSERT(entry->phys_addr == io->pio_port && + entry->len == 4 && entry->pio && + *(uint32_t *)entry->data == io->pio_port + i, + "Wanted 4-byte port I/O 0x%x = 0x%x in entry %u, got %u-byte %s 0x%llx = 0x%x", + io->pio_port, io->pio_port + i, i, + entry->len, entry->pio ? "PIO" : "MMIO", + entry->phys_addr, *(uint32_t *)entry->data); + else +#endif + TEST_ASSERT(entry->phys_addr == io->mmio_gpa && + entry->len == 8 && !entry->pio, + "Wanted 8-byte MMIO to 0x%lx = %lx in entry %u, got %u-byte %s 0x%llx = 0x%lx", + io->mmio_gpa, io->mmio_gpa + i, i, + entry->len, entry->pio ? "PIO" : "MMIO", + entry->phys_addr, *(uint64_t *)entry->data); + } +} + +static void test_coalesced_io(struct kvm_vcpu *vcpu, + struct kvm_coalesced_io *io, uint32_t ring_start) +{ + struct kvm_coalesced_mmio_ring *ring = io->ring; + + kvm_vm_register_coalesced_io(vcpu->vm, io->mmio_gpa, 8, false /* pio */); +#ifdef __x86_64__ + kvm_vm_register_coalesced_io(vcpu->vm, io->pio_port, 8, true /* pio */); +#endif + + vcpu_run_and_verify_coalesced_io(vcpu, io, ring_start, KVM_EXIT_MMIO); +#ifdef __x86_64__ + vcpu_run_and_verify_coalesced_io(vcpu, io, ring_start, KVM_EXIT_IO); +#endif + + /* + * Verify ucall, which may use non-coalesced MMIO or PIO, generates an + * immediate exit. + */ + WRITE_ONCE(ring->first, ring_start); + WRITE_ONCE(ring->last, ring_start); + vcpu_run(vcpu); + TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_SYNC); + TEST_ASSERT_EQ(ring->first, ring_start); + TEST_ASSERT_EQ(ring->last, ring_start); + + /* Verify that non-coalesced MMIO/PIO generates an exit to userspace. */ + kvm_vm_unregister_coalesced_io(vcpu->vm, io->mmio_gpa, 8, false /* pio */); + vcpu_run_and_verify_io_exit(vcpu, io, ring_start, KVM_EXIT_MMIO); + +#ifdef __x86_64__ + kvm_vm_unregister_coalesced_io(vcpu->vm, io->pio_port, 8, true /* pio */); + vcpu_run_and_verify_io_exit(vcpu, io, ring_start, KVM_EXIT_IO); +#endif +} + +int main(int argc, char *argv[]) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + int i; + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_COALESCED_MMIO)); + +#ifdef __x86_64__ + TEST_REQUIRE(kvm_has_cap(KVM_CAP_COALESCED_PIO)); +#endif + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + + kvm_builtin_io_ring = (struct kvm_coalesced_io) { + /* + * The I/O ring is a kernel-allocated page whose address is + * relative to each vCPU's run page, with the page offset + * provided by KVM in the return of KVM_CAP_COALESCED_MMIO. + */ + .ring = (void *)vcpu->run + + (kvm_check_cap(KVM_CAP_COALESCED_MMIO) * getpagesize()), + + /* + * The size of the I/O ring is fixed, but KVM defines the sized + * based on the kernel's PAGE_SIZE. Thus, userspace must query + * the host's page size at runtime to compute the ring size. + */ + .ring_size = (getpagesize() - sizeof(struct kvm_coalesced_mmio_ring)) / + sizeof(struct kvm_coalesced_mmio), + + /* + * Arbitrary address+port (MMIO mustn't overlap memslots), with + * the MMIO GPA identity mapped in the guest. + */ + .mmio_gpa = 4ull * SZ_1G, + .mmio = (uint64_t *)(4ull * SZ_1G), + .pio_port = 0x80, + }; + + virt_map(vm, (uint64_t)kvm_builtin_io_ring.mmio, kvm_builtin_io_ring.mmio_gpa, 1); + + sync_global_to_guest(vm, kvm_builtin_io_ring); + vcpu_args_set(vcpu, 1, &kvm_builtin_io_ring); + + for (i = 0; i < kvm_builtin_io_ring.ring_size; i++) + test_coalesced_io(vcpu, &kvm_builtin_io_ring, i); + + kvm_vm_free(vm); + return 0; +} diff --git a/tools/testing/selftests/kvm/guest_print_test.c b/tools/testing/selftests/kvm/guest_print_test.c index 8092c2d0f5d6..bcf582852db9 100644 --- a/tools/testing/selftests/kvm/guest_print_test.c +++ b/tools/testing/selftests/kvm/guest_print_test.c @@ -107,6 +107,21 @@ static void ucall_abort(const char *assert_msg, const char *expected_assert_msg) expected_assert_msg, &assert_msg[offset]); } +/* + * Open code vcpu_run(), sans the UCALL_ABORT handling, so that intentional + * guest asserts guest can be verified instead of being reported as failures. + */ +static void do_vcpu_run(struct kvm_vcpu *vcpu) +{ + int r; + + do { + r = __vcpu_run(vcpu); + } while (r == -1 && errno == EINTR); + + TEST_ASSERT(!r, KVM_IOCTL_ERROR(KVM_RUN, r)); +} + static void run_test(struct kvm_vcpu *vcpu, const char *expected_printf, const char *expected_assert) { @@ -114,7 +129,7 @@ static void run_test(struct kvm_vcpu *vcpu, const char *expected_printf, struct ucall uc; while (1) { - vcpu_run(vcpu); + do_vcpu_run(vcpu); TEST_ASSERT(run->exit_reason == UCALL_EXIT_REASON, "Unexpected exit reason: %u (%s),", @@ -159,7 +174,7 @@ static void test_limits(void) vm = vm_create_with_one_vcpu(&vcpu, guest_code_limits); run = vcpu->run; - vcpu_run(vcpu); + do_vcpu_run(vcpu); TEST_ASSERT(run->exit_reason == UCALL_EXIT_REASON, "Unexpected exit reason: %u (%s),", diff --git a/tools/testing/selftests/kvm/include/aarch64/arch_timer.h b/tools/testing/selftests/kvm/include/aarch64/arch_timer.h index b3e97525cb55..bf461de34785 100644 --- a/tools/testing/selftests/kvm/include/aarch64/arch_timer.h +++ b/tools/testing/selftests/kvm/include/aarch64/arch_timer.h @@ -79,7 +79,7 @@ static inline uint64_t timer_get_cval(enum arch_timer timer) return 0; } -static inline void timer_set_tval(enum arch_timer timer, uint32_t tval) +static inline void timer_set_tval(enum arch_timer timer, int32_t tval) { switch (timer) { case VIRTUAL: @@ -95,6 +95,22 @@ static inline void timer_set_tval(enum arch_timer timer, uint32_t tval) isb(); } +static inline int32_t timer_get_tval(enum arch_timer timer) +{ + isb(); + switch (timer) { + case VIRTUAL: + return read_sysreg(cntv_tval_el0); + case PHYSICAL: + return read_sysreg(cntp_tval_el0); + default: + GUEST_FAIL("Could not get timer %d\n", timer); + } + + /* We should not reach here */ + return 0; +} + static inline void timer_set_ctl(enum arch_timer timer, uint32_t ctl) { switch (timer) { diff --git a/tools/testing/selftests/kvm/include/aarch64/processor.h b/tools/testing/selftests/kvm/include/aarch64/processor.h index 9b20a355d81a..de977d131082 100644 --- a/tools/testing/selftests/kvm/include/aarch64/processor.h +++ b/tools/testing/selftests/kvm/include/aarch64/processor.h @@ -243,4 +243,7 @@ void smccc_smc(uint32_t function_id, uint64_t arg0, uint64_t arg1, uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5, uint64_t arg6, struct arm_smccc_res *res); +/* Execute a Wait For Interrupt instruction. */ +void wfi(void); + #endif /* SELFTEST_KVM_PROCESSOR_H */ diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 63c2aaae51f3..bc7c242480d6 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -428,8 +428,6 @@ const char *vm_guest_mode_string(uint32_t i); void kvm_vm_free(struct kvm_vm *vmp); void kvm_vm_restart(struct kvm_vm *vmp); void kvm_vm_release(struct kvm_vm *vmp); -int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, const vm_vaddr_t gva, - size_t len); void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename); int kvm_memfd_alloc(size_t size, bool hugepages); @@ -460,6 +458,32 @@ static inline uint32_t kvm_vm_reset_dirty_ring(struct kvm_vm *vm) return __vm_ioctl(vm, KVM_RESET_DIRTY_RINGS, NULL); } +static inline void kvm_vm_register_coalesced_io(struct kvm_vm *vm, + uint64_t address, + uint64_t size, bool pio) +{ + struct kvm_coalesced_mmio_zone zone = { + .addr = address, + .size = size, + .pio = pio, + }; + + vm_ioctl(vm, KVM_REGISTER_COALESCED_MMIO, &zone); +} + +static inline void kvm_vm_unregister_coalesced_io(struct kvm_vm *vm, + uint64_t address, + uint64_t size, bool pio) +{ + struct kvm_coalesced_mmio_zone zone = { + .addr = address, + .size = size, + .pio = pio, + }; + + vm_ioctl(vm, KVM_UNREGISTER_COALESCED_MMIO, &zone); +} + static inline int vm_get_stats_fd(struct kvm_vm *vm) { int fd = __vm_ioctl(vm, KVM_GET_STATS_FD, NULL); diff --git a/tools/testing/selftests/kvm/include/s390x/debug_print.h b/tools/testing/selftests/kvm/include/s390x/debug_print.h new file mode 100644 index 000000000000..1bf275631cc6 --- /dev/null +++ b/tools/testing/selftests/kvm/include/s390x/debug_print.h @@ -0,0 +1,69 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Definition for kernel virtual machines on s390x + * + * Copyright IBM Corp. 2024 + * + * Authors: + * Christoph Schlameuss <schlameuss@linux.ibm.com> + */ + +#ifndef SELFTEST_KVM_DEBUG_PRINT_H +#define SELFTEST_KVM_DEBUG_PRINT_H + +#include "asm/ptrace.h" +#include "kvm_util.h" +#include "sie.h" + +static inline void print_hex_bytes(const char *name, u64 addr, size_t len) +{ + u64 pos; + + pr_debug("%s (%p)\n", name, (void *)addr); + pr_debug(" 0/0x00---------|"); + if (len > 8) + pr_debug(" 8/0x08---------|"); + if (len > 16) + pr_debug(" 16/0x10--------|"); + if (len > 24) + pr_debug(" 24/0x18--------|"); + for (pos = 0; pos < len; pos += 8) { + if ((pos % 32) == 0) + pr_debug("\n %3lu 0x%.3lx ", pos, pos); + pr_debug(" %16lx", *((u64 *)(addr + pos))); + } + pr_debug("\n"); +} + +static inline void print_hex(const char *name, u64 addr) +{ + print_hex_bytes(name, addr, 512); +} + +static inline void print_psw(struct kvm_run *run, struct kvm_s390_sie_block *sie_block) +{ + pr_debug("flags:0x%x psw:0x%.16llx:0x%.16llx exit:%u %s\n", + run->flags, + run->psw_mask, run->psw_addr, + run->exit_reason, exit_reason_str(run->exit_reason)); + pr_debug("sie_block psw:0x%.16llx:0x%.16llx\n", + sie_block->psw_mask, sie_block->psw_addr); +} + +static inline void print_run(struct kvm_run *run, struct kvm_s390_sie_block *sie_block) +{ + print_hex_bytes("run", (u64)run, 0x150); + print_hex("sie_block", (u64)sie_block); + print_psw(run, sie_block); +} + +static inline void print_regs(struct kvm_run *run) +{ + struct kvm_sync_regs *sync_regs = &run->s.regs; + + print_hex_bytes("GPRS", (u64)sync_regs->gprs, 8 * NUM_GPRS); + print_hex_bytes("ACRS", (u64)sync_regs->acrs, 4 * NUM_ACRS); + print_hex_bytes("CRS", (u64)sync_regs->crs, 8 * NUM_CRS); +} + +#endif /* SELFTEST_KVM_DEBUG_PRINT_H */ diff --git a/tools/testing/selftests/kvm/include/s390x/processor.h b/tools/testing/selftests/kvm/include/s390x/processor.h index 255c9b990f4c..481bd2fd6a32 100644 --- a/tools/testing/selftests/kvm/include/s390x/processor.h +++ b/tools/testing/selftests/kvm/include/s390x/processor.h @@ -21,6 +21,11 @@ #define PAGE_PROTECT 0x200 /* HW read-only bit */ #define PAGE_NOEXEC 0x100 /* HW no-execute bit */ +/* Page size definitions */ +#define PAGE_SHIFT 12 +#define PAGE_SIZE BIT_ULL(PAGE_SHIFT) +#define PAGE_MASK (~(PAGE_SIZE - 1)) + /* Is there a portable way to do this? */ static inline void cpu_relax(void) { diff --git a/tools/testing/selftests/kvm/include/s390x/sie.h b/tools/testing/selftests/kvm/include/s390x/sie.h new file mode 100644 index 000000000000..160acd4a1db9 --- /dev/null +++ b/tools/testing/selftests/kvm/include/s390x/sie.h @@ -0,0 +1,240 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Definition for kernel virtual machines on s390. + * + * Adapted copy of struct definition kvm_s390_sie_block from + * arch/s390/include/asm/kvm_host.h for use in userspace selftest programs. + * + * Copyright IBM Corp. 2008, 2024 + * + * Authors: + * Christoph Schlameuss <schlameuss@linux.ibm.com> + * Carsten Otte <cotte@de.ibm.com> + */ + +#ifndef SELFTEST_KVM_SIE_H +#define SELFTEST_KVM_SIE_H + +#include <linux/types.h> + +struct kvm_s390_sie_block { +#define CPUSTAT_STOPPED 0x80000000 +#define CPUSTAT_WAIT 0x10000000 +#define CPUSTAT_ECALL_PEND 0x08000000 +#define CPUSTAT_STOP_INT 0x04000000 +#define CPUSTAT_IO_INT 0x02000000 +#define CPUSTAT_EXT_INT 0x01000000 +#define CPUSTAT_RUNNING 0x00800000 +#define CPUSTAT_RETAINED 0x00400000 +#define CPUSTAT_TIMING_SUB 0x00020000 +#define CPUSTAT_SIE_SUB 0x00010000 +#define CPUSTAT_RRF 0x00008000 +#define CPUSTAT_SLSV 0x00004000 +#define CPUSTAT_SLSR 0x00002000 +#define CPUSTAT_ZARCH 0x00000800 +#define CPUSTAT_MCDS 0x00000100 +#define CPUSTAT_KSS 0x00000200 +#define CPUSTAT_SM 0x00000080 +#define CPUSTAT_IBS 0x00000040 +#define CPUSTAT_GED2 0x00000010 +#define CPUSTAT_G 0x00000008 +#define CPUSTAT_GED 0x00000004 +#define CPUSTAT_J 0x00000002 +#define CPUSTAT_P 0x00000001 + __u32 cpuflags; /* 0x0000 */ + __u32: 1; /* 0x0004 */ + __u32 prefix : 18; + __u32: 1; + __u32 ibc : 12; + __u8 reserved08[4]; /* 0x0008 */ +#define PROG_IN_SIE BIT(0) + __u32 prog0c; /* 0x000c */ + union { + __u8 reserved10[16]; /* 0x0010 */ + struct { + __u64 pv_handle_cpu; + __u64 pv_handle_config; + }; + }; +#define PROG_BLOCK_SIE BIT(0) +#define PROG_REQUEST BIT(1) + __u32 prog20; /* 0x0020 */ + __u8 reserved24[4]; /* 0x0024 */ + __u64 cputm; /* 0x0028 */ + __u64 ckc; /* 0x0030 */ + __u64 epoch; /* 0x0038 */ + __u32 svcc; /* 0x0040 */ +#define LCTL_CR0 0x8000 +#define LCTL_CR6 0x0200 +#define LCTL_CR9 0x0040 +#define LCTL_CR10 0x0020 +#define LCTL_CR11 0x0010 +#define LCTL_CR14 0x0002 + __u16 lctl; /* 0x0044 */ + __s16 icpua; /* 0x0046 */ +#define ICTL_OPEREXC 0x80000000 +#define ICTL_PINT 0x20000000 +#define ICTL_LPSW 0x00400000 +#define ICTL_STCTL 0x00040000 +#define ICTL_ISKE 0x00004000 +#define ICTL_SSKE 0x00002000 +#define ICTL_RRBE 0x00001000 +#define ICTL_TPROT 0x00000200 + __u32 ictl; /* 0x0048 */ +#define ECA_CEI 0x80000000 +#define ECA_IB 0x40000000 +#define ECA_SIGPI 0x10000000 +#define ECA_MVPGI 0x01000000 +#define ECA_AIV 0x00200000 +#define ECA_VX 0x00020000 +#define ECA_PROTEXCI 0x00002000 +#define ECA_APIE 0x00000008 +#define ECA_SII 0x00000001 + __u32 eca; /* 0x004c */ +#define ICPT_INST 0x04 +#define ICPT_PROGI 0x08 +#define ICPT_INSTPROGI 0x0C +#define ICPT_EXTREQ 0x10 +#define ICPT_EXTINT 0x14 +#define ICPT_IOREQ 0x18 +#define ICPT_WAIT 0x1c +#define ICPT_VALIDITY 0x20 +#define ICPT_STOP 0x28 +#define ICPT_OPEREXC 0x2C +#define ICPT_PARTEXEC 0x38 +#define ICPT_IOINST 0x40 +#define ICPT_KSS 0x5c +#define ICPT_MCHKREQ 0x60 +#define ICPT_INT_ENABLE 0x64 +#define ICPT_PV_INSTR 0x68 +#define ICPT_PV_NOTIFY 0x6c +#define ICPT_PV_PREF 0x70 + __u8 icptcode; /* 0x0050 */ + __u8 icptstatus; /* 0x0051 */ + __u16 ihcpu; /* 0x0052 */ + __u8 reserved54; /* 0x0054 */ +#define IICTL_CODE_NONE 0x00 +#define IICTL_CODE_MCHK 0x01 +#define IICTL_CODE_EXT 0x02 +#define IICTL_CODE_IO 0x03 +#define IICTL_CODE_RESTART 0x04 +#define IICTL_CODE_SPECIFICATION 0x10 +#define IICTL_CODE_OPERAND 0x11 + __u8 iictl; /* 0x0055 */ + __u16 ipa; /* 0x0056 */ + __u32 ipb; /* 0x0058 */ + __u32 scaoh; /* 0x005c */ +#define FPF_BPBC 0x20 + __u8 fpf; /* 0x0060 */ +#define ECB_GS 0x40 +#define ECB_TE 0x10 +#define ECB_SPECI 0x08 +#define ECB_SRSI 0x04 +#define ECB_HOSTPROTINT 0x02 +#define ECB_PTF 0x01 + __u8 ecb; /* 0x0061 */ +#define ECB2_CMMA 0x80 +#define ECB2_IEP 0x20 +#define ECB2_PFMFI 0x08 +#define ECB2_ESCA 0x04 +#define ECB2_ZPCI_LSI 0x02 + __u8 ecb2; /* 0x0062 */ +#define ECB3_AISI 0x20 +#define ECB3_AISII 0x10 +#define ECB3_DEA 0x08 +#define ECB3_AES 0x04 +#define ECB3_RI 0x01 + __u8 ecb3; /* 0x0063 */ +#define ESCA_SCAOL_MASK ~0x3fU + __u32 scaol; /* 0x0064 */ + __u8 sdf; /* 0x0068 */ + __u8 epdx; /* 0x0069 */ + __u8 cpnc; /* 0x006a */ + __u8 reserved6b; /* 0x006b */ + __u32 todpr; /* 0x006c */ +#define GISA_FORMAT1 0x00000001 + __u32 gd; /* 0x0070 */ + __u8 reserved74[12]; /* 0x0074 */ + __u64 mso; /* 0x0080 */ + __u64 msl; /* 0x0088 */ + __u64 psw_mask; /* 0x0090 */ + __u64 psw_addr; /* 0x0098 */ + __u64 gg14; /* 0x00a0 */ + __u64 gg15; /* 0x00a8 */ + __u8 reservedb0[8]; /* 0x00b0 */ +#define HPID_KVM 0x4 +#define HPID_VSIE 0x5 + __u8 hpid; /* 0x00b8 */ + __u8 reservedb9[7]; /* 0x00b9 */ + union { + struct { + __u32 eiparams; /* 0x00c0 */ + __u16 extcpuaddr; /* 0x00c4 */ + __u16 eic; /* 0x00c6 */ + }; + __u64 mcic; /* 0x00c0 */ + } __packed; + __u32 reservedc8; /* 0x00c8 */ + union { + struct { + __u16 pgmilc; /* 0x00cc */ + __u16 iprcc; /* 0x00ce */ + }; + __u32 edc; /* 0x00cc */ + } __packed; + union { + struct { + __u32 dxc; /* 0x00d0 */ + __u16 mcn; /* 0x00d4 */ + __u8 perc; /* 0x00d6 */ + __u8 peratmid; /* 0x00d7 */ + }; + __u64 faddr; /* 0x00d0 */ + } __packed; + __u64 peraddr; /* 0x00d8 */ + __u8 eai; /* 0x00e0 */ + __u8 peraid; /* 0x00e1 */ + __u8 oai; /* 0x00e2 */ + __u8 armid; /* 0x00e3 */ + __u8 reservede4[4]; /* 0x00e4 */ + union { + __u64 tecmc; /* 0x00e8 */ + struct { + __u16 subchannel_id; /* 0x00e8 */ + __u16 subchannel_nr; /* 0x00ea */ + __u32 io_int_parm; /* 0x00ec */ + __u32 io_int_word; /* 0x00f0 */ + }; + } __packed; + __u8 reservedf4[8]; /* 0x00f4 */ +#define CRYCB_FORMAT_MASK 0x00000003 +#define CRYCB_FORMAT0 0x00000000 +#define CRYCB_FORMAT1 0x00000001 +#define CRYCB_FORMAT2 0x00000003 + __u32 crycbd; /* 0x00fc */ + __u64 gcr[16]; /* 0x0100 */ + union { + __u64 gbea; /* 0x0180 */ + __u64 sidad; + }; + __u8 reserved188[8]; /* 0x0188 */ + __u64 sdnxo; /* 0x0190 */ + __u8 reserved198[8]; /* 0x0198 */ + __u32 fac; /* 0x01a0 */ + __u8 reserved1a4[20]; /* 0x01a4 */ + __u64 cbrlo; /* 0x01b8 */ + __u8 reserved1c0[8]; /* 0x01c0 */ +#define ECD_HOSTREGMGMT 0x20000000 +#define ECD_MEF 0x08000000 +#define ECD_ETOKENF 0x02000000 +#define ECD_ECC 0x00200000 + __u32 ecd; /* 0x01c8 */ + __u8 reserved1cc[18]; /* 0x01cc */ + __u64 pp; /* 0x01de */ + __u8 reserved1e6[2]; /* 0x01e6 */ + __u64 itdba; /* 0x01e8 */ + __u64 riccbd; /* 0x01f0 */ + __u64 gvrd; /* 0x01f8 */ +} __packed __aligned(512); + +#endif /* SELFTEST_KVM_SIE_H */ diff --git a/tools/testing/selftests/kvm/include/x86_64/apic.h b/tools/testing/selftests/kvm/include/x86_64/apic.h index 0f268b55fa06..51990094effd 100644 --- a/tools/testing/selftests/kvm/include/x86_64/apic.h +++ b/tools/testing/selftests/kvm/include/x86_64/apic.h @@ -11,6 +11,7 @@ #include <stdint.h> #include "processor.h" +#include "ucall_common.h" #define APIC_DEFAULT_GPA 0xfee00000ULL @@ -93,9 +94,27 @@ static inline uint64_t x2apic_read_reg(unsigned int reg) return rdmsr(APIC_BASE_MSR + (reg >> 4)); } +static inline uint8_t x2apic_write_reg_safe(unsigned int reg, uint64_t value) +{ + return wrmsr_safe(APIC_BASE_MSR + (reg >> 4), value); +} + static inline void x2apic_write_reg(unsigned int reg, uint64_t value) { - wrmsr(APIC_BASE_MSR + (reg >> 4), value); + uint8_t fault = x2apic_write_reg_safe(reg, value); + + __GUEST_ASSERT(!fault, "Unexpected fault 0x%x on WRMSR(%x) = %lx\n", + fault, APIC_BASE_MSR + (reg >> 4), value); } +static inline void x2apic_write_reg_fault(unsigned int reg, uint64_t value) +{ + uint8_t fault = x2apic_write_reg_safe(reg, value); + + __GUEST_ASSERT(fault == GP_VECTOR, + "Wanted #GP on WRMSR(%x) = %lx, got 0x%x\n", + APIC_BASE_MSR + (reg >> 4), value, fault); +} + + #endif /* SELFTEST_KVM_APIC_H */ diff --git a/tools/testing/selftests/kvm/include/x86_64/hyperv.h b/tools/testing/selftests/kvm/include/x86_64/hyperv.h index fa65b908b13e..6849e2552f1b 100644 --- a/tools/testing/selftests/kvm/include/x86_64/hyperv.h +++ b/tools/testing/selftests/kvm/include/x86_64/hyperv.h @@ -186,6 +186,18 @@ #define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED \ KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 14) +/* HYPERV_CPUID_NESTED_FEATURES.EAX */ +#define HV_X64_NESTED_DIRECT_FLUSH \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_NESTED_FEATURES, 0, EAX, 17) +#define HV_X64_NESTED_GUEST_MAPPING_FLUSH \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_NESTED_FEATURES, 0, EAX, 18) +#define HV_X64_NESTED_MSR_BITMAP \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_NESTED_FEATURES, 0, EAX, 19) + +/* HYPERV_CPUID_NESTED_FEATURES.EBX */ +#define HV_X64_NESTED_EVMCS1_PERF_GLOBAL_CTRL \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_NESTED_FEATURES, 0, EBX, 0) + /* HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES.EAX */ #define HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING \ KVM_X86_CPU_FEATURE(HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES, 0, EAX, 1) @@ -343,4 +355,10 @@ struct hyperv_test_pages *vcpu_alloc_hyperv_test_pages(struct kvm_vm *vm, /* HV_X64_MSR_TSC_INVARIANT_CONTROL bits */ #define HV_INVARIANT_TSC_EXPOSED BIT_ULL(0) +const struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void); +const struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vcpu *vcpu); +void vcpu_set_hv_cpuid(struct kvm_vcpu *vcpu); + +bool kvm_hv_cpu_has(struct kvm_x86_cpu_feature feature); + #endif /* !SELFTEST_KVM_HYPERV_H */ diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index a0c1440017bb..e247f99e0473 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -25,6 +25,10 @@ extern bool host_cpu_is_intel; extern bool host_cpu_is_amd; extern uint64_t guest_tsc_khz; +#ifndef MAX_NR_CPUID_ENTRIES +#define MAX_NR_CPUID_ENTRIES 100 +#endif + /* Forced emulation prefix, used to invoke the emulator unconditionally. */ #define KVM_FEP "ud2; .byte 'k', 'v', 'm';" @@ -908,8 +912,6 @@ static inline void vcpu_xcrs_set(struct kvm_vcpu *vcpu, struct kvm_xcrs *xcrs) const struct kvm_cpuid_entry2 *get_cpuid_entry(const struct kvm_cpuid2 *cpuid, uint32_t function, uint32_t index); const struct kvm_cpuid2 *kvm_get_supported_cpuid(void); -const struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void); -const struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vcpu *vcpu); static inline uint32_t kvm_cpu_fms(void) { @@ -1009,7 +1011,6 @@ static inline struct kvm_cpuid2 *allocate_kvm_cpuid2(int nr_entries) } void vcpu_init_cpuid(struct kvm_vcpu *vcpu, const struct kvm_cpuid2 *cpuid); -void vcpu_set_hv_cpuid(struct kvm_vcpu *vcpu); static inline struct kvm_cpuid_entry2 *__vcpu_get_cpuid_entry(struct kvm_vcpu *vcpu, uint32_t function, diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c index 0ac7cc89f38c..fe4dc3693112 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/processor.c +++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c @@ -639,3 +639,9 @@ void vm_vaddr_populate_bitmap(struct kvm_vm *vm) sparsebit_set_num(vm->vpages_valid, 0, (1ULL << vm->va_bits) >> vm->page_shift); } + +/* Helper to call wfi instruction. */ +void wfi(void) +{ + asm volatile("wfi"); +} diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 56b170b725b3..a2b7df5f1d39 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -712,16 +712,13 @@ void kvm_vm_release(struct kvm_vm *vmp) } static void __vm_mem_region_delete(struct kvm_vm *vm, - struct userspace_mem_region *region, - bool unlink) + struct userspace_mem_region *region) { int ret; - if (unlink) { - rb_erase(®ion->gpa_node, &vm->regions.gpa_tree); - rb_erase(®ion->hva_node, &vm->regions.hva_tree); - hash_del(®ion->slot_node); - } + rb_erase(®ion->gpa_node, &vm->regions.gpa_tree); + rb_erase(®ion->hva_node, &vm->regions.hva_tree); + hash_del(®ion->slot_node); region->region.memory_size = 0; vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, ®ion->region); @@ -762,7 +759,7 @@ void kvm_vm_free(struct kvm_vm *vmp) /* Free userspace_mem_regions. */ hash_for_each_safe(vmp->regions.slot_hash, ctr, node, region, slot_node) - __vm_mem_region_delete(vmp, region, false); + __vm_mem_region_delete(vmp, region); /* Free sparsebit arrays. */ sparsebit_free(&vmp->vpages_valid); @@ -794,76 +791,6 @@ int kvm_memfd_alloc(size_t size, bool hugepages) return fd; } -/* - * Memory Compare, host virtual to guest virtual - * - * Input Args: - * hva - Starting host virtual address - * vm - Virtual Machine - * gva - Starting guest virtual address - * len - number of bytes to compare - * - * Output Args: None - * - * Input/Output Args: None - * - * Return: - * Returns 0 if the bytes starting at hva for a length of len - * are equal the guest virtual bytes starting at gva. Returns - * a value < 0, if bytes at hva are less than those at gva. - * Otherwise a value > 0 is returned. - * - * Compares the bytes starting at the host virtual address hva, for - * a length of len, to the guest bytes starting at the guest virtual - * address given by gva. - */ -int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, vm_vaddr_t gva, size_t len) -{ - size_t amt; - - /* - * Compare a batch of bytes until either a match is found - * or all the bytes have been compared. - */ - for (uintptr_t offset = 0; offset < len; offset += amt) { - uintptr_t ptr1 = (uintptr_t)hva + offset; - - /* - * Determine host address for guest virtual address - * at offset. - */ - uintptr_t ptr2 = (uintptr_t)addr_gva2hva(vm, gva + offset); - - /* - * Determine amount to compare on this pass. - * Don't allow the comparsion to cross a page boundary. - */ - amt = len - offset; - if ((ptr1 >> vm->page_shift) != ((ptr1 + amt) >> vm->page_shift)) - amt = vm->page_size - (ptr1 % vm->page_size); - if ((ptr2 >> vm->page_shift) != ((ptr2 + amt) >> vm->page_shift)) - amt = vm->page_size - (ptr2 % vm->page_size); - - assert((ptr1 >> vm->page_shift) == ((ptr1 + amt - 1) >> vm->page_shift)); - assert((ptr2 >> vm->page_shift) == ((ptr2 + amt - 1) >> vm->page_shift)); - - /* - * Perform the comparison. If there is a difference - * return that result to the caller, otherwise need - * to continue on looking for a mismatch. - */ - int ret = memcmp((void *)ptr1, (void *)ptr2, amt); - if (ret != 0) - return ret; - } - - /* - * No mismatch found. Let the caller know the two memory - * areas are equal. - */ - return 0; -} - static void vm_userspace_mem_region_gpa_insert(struct rb_root *gpa_tree, struct userspace_mem_region *region) { @@ -1270,7 +1197,7 @@ void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa) */ void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot) { - __vm_mem_region_delete(vm, memslot2region(vm, slot), true); + __vm_mem_region_delete(vm, memslot2region(vm, slot)); } void vm_guest_mem_fallocate(struct kvm_vm *vm, uint64_t base, uint64_t size, diff --git a/tools/testing/selftests/kvm/lib/s390x/processor.c b/tools/testing/selftests/kvm/lib/s390x/processor.c index 4ad4492eea1d..20cfe970e3e3 100644 --- a/tools/testing/selftests/kvm/lib/s390x/processor.c +++ b/tools/testing/selftests/kvm/lib/s390x/processor.c @@ -14,7 +14,7 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm) { vm_paddr_t paddr; - TEST_ASSERT(vm->page_size == 4096, "Unsupported page size: 0x%x", + TEST_ASSERT(vm->page_size == PAGE_SIZE, "Unsupported page size: 0x%x", vm->page_size); if (vm->pgd_created) @@ -79,7 +79,7 @@ void virt_arch_pg_map(struct kvm_vm *vm, uint64_t gva, uint64_t gpa) } /* Fill in page table entry */ - idx = (gva >> 12) & 0x0ffu; /* page index */ + idx = (gva >> PAGE_SHIFT) & 0x0ffu; /* page index */ if (!(entry[idx] & PAGE_INVALID)) fprintf(stderr, "WARNING: PTE for gpa=0x%"PRIx64" already set!\n", gpa); @@ -91,7 +91,7 @@ vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) int ri, idx; uint64_t *entry; - TEST_ASSERT(vm->page_size == 4096, "Unsupported page size: 0x%x", + TEST_ASSERT(vm->page_size == PAGE_SIZE, "Unsupported page size: 0x%x", vm->page_size); entry = addr_gpa2hva(vm, vm->pgd); @@ -103,7 +103,7 @@ vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) entry = addr_gpa2hva(vm, entry[idx] & REGION_ENTRY_ORIGIN); } - idx = (gva >> 12) & 0x0ffu; /* page index */ + idx = (gva >> PAGE_SHIFT) & 0x0ffu; /* page index */ TEST_ASSERT(!(entry[idx] & PAGE_INVALID), "No page mapping for vm virtual address 0x%lx", gva); @@ -168,7 +168,7 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) struct kvm_sregs sregs; struct kvm_vcpu *vcpu; - TEST_ASSERT(vm->page_size == 4096, "Unsupported page size: 0x%x", + TEST_ASSERT(vm->page_size == PAGE_SIZE, "Unsupported page size: 0x%x", vm->page_size); stack_vaddr = __vm_vaddr_alloc(vm, stack_size, diff --git a/tools/testing/selftests/kvm/lib/x86_64/hyperv.c b/tools/testing/selftests/kvm/lib/x86_64/hyperv.c index efb7e7a1354d..15bc8cd583aa 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/hyperv.c +++ b/tools/testing/selftests/kvm/lib/x86_64/hyperv.c @@ -8,6 +8,73 @@ #include "processor.h" #include "hyperv.h" +const struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void) +{ + static struct kvm_cpuid2 *cpuid; + int kvm_fd; + + if (cpuid) + return cpuid; + + cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES); + kvm_fd = open_kvm_dev_path_or_exit(); + + kvm_ioctl(kvm_fd, KVM_GET_SUPPORTED_HV_CPUID, cpuid); + + close(kvm_fd); + return cpuid; +} + +void vcpu_set_hv_cpuid(struct kvm_vcpu *vcpu) +{ + static struct kvm_cpuid2 *cpuid_full; + const struct kvm_cpuid2 *cpuid_sys, *cpuid_hv; + int i, nent = 0; + + if (!cpuid_full) { + cpuid_sys = kvm_get_supported_cpuid(); + cpuid_hv = kvm_get_supported_hv_cpuid(); + + cpuid_full = allocate_kvm_cpuid2(cpuid_sys->nent + cpuid_hv->nent); + if (!cpuid_full) { + perror("malloc"); + abort(); + } + + /* Need to skip KVM CPUID leaves 0x400000xx */ + for (i = 0; i < cpuid_sys->nent; i++) { + if (cpuid_sys->entries[i].function >= 0x40000000 && + cpuid_sys->entries[i].function < 0x40000100) + continue; + cpuid_full->entries[nent] = cpuid_sys->entries[i]; + nent++; + } + + memcpy(&cpuid_full->entries[nent], cpuid_hv->entries, + cpuid_hv->nent * sizeof(struct kvm_cpuid_entry2)); + cpuid_full->nent = nent + cpuid_hv->nent; + } + + vcpu_init_cpuid(vcpu, cpuid_full); +} + +const struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vcpu *vcpu) +{ + struct kvm_cpuid2 *cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES); + + vcpu_ioctl(vcpu, KVM_GET_SUPPORTED_HV_CPUID, cpuid); + + return cpuid; +} + +bool kvm_hv_cpu_has(struct kvm_x86_cpu_feature feature) +{ + if (!kvm_has_cap(KVM_CAP_SYS_HYPERV_CPUID)) + return false; + + return kvm_cpuid_has(kvm_get_supported_hv_cpuid(), feature); +} + struct hyperv_test_pages *vcpu_alloc_hyperv_test_pages(struct kvm_vm *vm, vm_vaddr_t *p_hv_pages_gva) { diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index 153739f2e201..974bcd2df6d7 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -19,8 +19,6 @@ #define KERNEL_DS 0x10 #define KERNEL_TSS 0x18 -#define MAX_NR_CPUID_ENTRIES 100 - vm_vaddr_t exception_handlers; bool host_cpu_is_amd; bool host_cpu_is_intel; @@ -566,10 +564,8 @@ void route_exception(struct ex_regs *regs) if (kvm_fixup_exception(regs)) return; - ucall_assert(UCALL_UNHANDLED, - "Unhandled exception in guest", __FILE__, __LINE__, - "Unhandled exception '0x%lx' at guest RIP '0x%lx'", - regs->vector, regs->rip); + GUEST_FAIL("Unhandled exception '0x%lx' at guest RIP '0x%lx'", + regs->vector, regs->rip); } static void vm_init_descriptor_tables(struct kvm_vm *vm) @@ -611,7 +607,7 @@ void assert_on_unhandled_exception(struct kvm_vcpu *vcpu) { struct ucall uc; - if (get_ucall(vcpu, &uc) == UCALL_UNHANDLED) + if (get_ucall(vcpu, &uc) == UCALL_ABORT) REPORT_GUEST_ASSERT(uc); } @@ -1195,65 +1191,6 @@ void xen_hypercall(uint64_t nr, uint64_t a0, void *a1) GUEST_ASSERT(!__xen_hypercall(nr, a0, a1)); } -const struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void) -{ - static struct kvm_cpuid2 *cpuid; - int kvm_fd; - - if (cpuid) - return cpuid; - - cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES); - kvm_fd = open_kvm_dev_path_or_exit(); - - kvm_ioctl(kvm_fd, KVM_GET_SUPPORTED_HV_CPUID, cpuid); - - close(kvm_fd); - return cpuid; -} - -void vcpu_set_hv_cpuid(struct kvm_vcpu *vcpu) -{ - static struct kvm_cpuid2 *cpuid_full; - const struct kvm_cpuid2 *cpuid_sys, *cpuid_hv; - int i, nent = 0; - - if (!cpuid_full) { - cpuid_sys = kvm_get_supported_cpuid(); - cpuid_hv = kvm_get_supported_hv_cpuid(); - - cpuid_full = allocate_kvm_cpuid2(cpuid_sys->nent + cpuid_hv->nent); - if (!cpuid_full) { - perror("malloc"); - abort(); - } - - /* Need to skip KVM CPUID leaves 0x400000xx */ - for (i = 0; i < cpuid_sys->nent; i++) { - if (cpuid_sys->entries[i].function >= 0x40000000 && - cpuid_sys->entries[i].function < 0x40000100) - continue; - cpuid_full->entries[nent] = cpuid_sys->entries[i]; - nent++; - } - - memcpy(&cpuid_full->entries[nent], cpuid_hv->entries, - cpuid_hv->nent * sizeof(struct kvm_cpuid_entry2)); - cpuid_full->nent = nent + cpuid_hv->nent; - } - - vcpu_init_cpuid(vcpu, cpuid_full); -} - -const struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vcpu *vcpu) -{ - struct kvm_cpuid2 *cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES); - - vcpu_ioctl(vcpu, KVM_GET_SUPPORTED_HV_CPUID, cpuid); - - return cpuid; -} - unsigned long vm_compute_max_gfn(struct kvm_vm *vm) { const unsigned long num_ht_pages = 12 << (30 - vm->page_shift); /* 12 GiB */ diff --git a/tools/testing/selftests/kvm/memslot_modification_stress_test.c b/tools/testing/selftests/kvm/memslot_modification_stress_test.c index 49f162573126..e3343f0df9e1 100644 --- a/tools/testing/selftests/kvm/memslot_modification_stress_test.c +++ b/tools/testing/selftests/kvm/memslot_modification_stress_test.c @@ -79,6 +79,7 @@ struct test_params { useconds_t delay; uint64_t nr_iterations; bool partition_vcpu_memory_access; + bool disable_slot_zap_quirk; }; static void run_test(enum vm_guest_mode mode, void *arg) @@ -89,6 +90,13 @@ static void run_test(enum vm_guest_mode mode, void *arg) vm = memstress_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 1, VM_MEM_SRC_ANONYMOUS, p->partition_vcpu_memory_access); +#ifdef __x86_64__ + if (p->disable_slot_zap_quirk) + vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, KVM_X86_QUIRK_SLOT_ZAP_ALL); + + pr_info("Memslot zap quirk %s\n", p->disable_slot_zap_quirk ? + "disabled" : "enabled"); +#endif pr_info("Finished creating vCPUs\n"); @@ -107,11 +115,12 @@ static void run_test(enum vm_guest_mode mode, void *arg) static void help(char *name) { puts(""); - printf("usage: %s [-h] [-m mode] [-d delay_usec]\n" + printf("usage: %s [-h] [-m mode] [-d delay_usec] [-q]\n" " [-b memory] [-v vcpus] [-o] [-i iterations]\n", name); guest_modes_help(); printf(" -d: add a delay between each iteration of adding and\n" " deleting a memslot in usec.\n"); + printf(" -q: Disable memslot zap quirk.\n"); printf(" -b: specify the size of the memory region which should be\n" " accessed by each vCPU. e.g. 10M or 3G.\n" " Default: 1G\n"); @@ -137,7 +146,7 @@ int main(int argc, char *argv[]) guest_modes_append_default(); - while ((opt = getopt(argc, argv, "hm:d:b:v:oi:")) != -1) { + while ((opt = getopt(argc, argv, "hm:d:qb:v:oi:")) != -1) { switch (opt) { case 'm': guest_modes_cmdline(optarg); @@ -160,6 +169,12 @@ int main(int argc, char *argv[]) case 'i': p.nr_iterations = atoi_positive("Number of iterations", optarg); break; + case 'q': + p.disable_slot_zap_quirk = true; + + TEST_REQUIRE(kvm_check_cap(KVM_CAP_DISABLE_QUIRKS2) & + KVM_X86_QUIRK_SLOT_ZAP_ALL); + break; case 'h': default: help(argv[0]); diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c index 579a64f97333..893366982f77 100644 --- a/tools/testing/selftests/kvm/memslot_perf_test.c +++ b/tools/testing/selftests/kvm/memslot_perf_test.c @@ -113,6 +113,7 @@ static_assert(ATOMIC_BOOL_LOCK_FREE == 2, "atomic bool is not lockless"); static sem_t vcpu_ready; static bool map_unmap_verify; +static bool disable_slot_zap_quirk; static bool verbose; #define pr_info_v(...) \ @@ -578,6 +579,9 @@ static bool test_memslot_move_prepare(struct vm_data *data, uint32_t guest_page_size = data->vm->page_size; uint64_t movesrcgpa, movetestgpa; + if (disable_slot_zap_quirk) + vm_enable_cap(data->vm, KVM_CAP_DISABLE_QUIRKS2, KVM_X86_QUIRK_SLOT_ZAP_ALL); + movesrcgpa = vm_slot2gpa(data, data->nslots - 1); if (isactive) { @@ -896,6 +900,7 @@ static void help(char *name, struct test_args *targs) pr_info(" -h: print this help screen.\n"); pr_info(" -v: enable verbose mode (not for benchmarking).\n"); pr_info(" -d: enable extra debug checks.\n"); + pr_info(" -q: Disable memslot zap quirk during memslot move.\n"); pr_info(" -s: specify memslot count cap (-1 means no cap; currently: %i)\n", targs->nslots); pr_info(" -f: specify the first test to run (currently: %i; max %zu)\n", @@ -954,7 +959,7 @@ static bool parse_args(int argc, char *argv[], uint32_t max_mem_slots; int opt; - while ((opt = getopt(argc, argv, "hvds:f:e:l:r:")) != -1) { + while ((opt = getopt(argc, argv, "hvdqs:f:e:l:r:")) != -1) { switch (opt) { case 'h': default: @@ -966,6 +971,11 @@ static bool parse_args(int argc, char *argv[], case 'd': map_unmap_verify = true; break; + case 'q': + disable_slot_zap_quirk = true; + TEST_REQUIRE(kvm_check_cap(KVM_CAP_DISABLE_QUIRKS2) & + KVM_X86_QUIRK_SLOT_ZAP_ALL); + break; case 's': targs->nslots = atoi_paranoid(optarg); if (targs->nslots <= 1 && targs->nslots != -1) { diff --git a/tools/testing/selftests/kvm/s390x/cmma_test.c b/tools/testing/selftests/kvm/s390x/cmma_test.c index b39033844756..e32dd59703a0 100644 --- a/tools/testing/selftests/kvm/s390x/cmma_test.c +++ b/tools/testing/selftests/kvm/s390x/cmma_test.c @@ -17,16 +17,17 @@ #include "kvm_util.h" #include "kselftest.h" #include "ucall_common.h" +#include "processor.h" #define MAIN_PAGE_COUNT 512 #define TEST_DATA_PAGE_COUNT 512 #define TEST_DATA_MEMSLOT 1 -#define TEST_DATA_START_GFN 4096 +#define TEST_DATA_START_GFN PAGE_SIZE #define TEST_DATA_TWO_PAGE_COUNT 256 #define TEST_DATA_TWO_MEMSLOT 2 -#define TEST_DATA_TWO_START_GFN 8192 +#define TEST_DATA_TWO_START_GFN (2 * PAGE_SIZE) static char cmma_value_buf[MAIN_PAGE_COUNT + TEST_DATA_PAGE_COUNT]; @@ -66,7 +67,7 @@ static void guest_dirty_test_data(void) " lghi 5,%[page_count]\n" /* r5 += r1 */ "2: agfr 5,1\n" - /* r2 = r1 << 12 */ + /* r2 = r1 << PAGE_SHIFT */ "1: sllg 2,1,12(0)\n" /* essa(r4, r2, SET_STABLE) */ " .insn rrf,0xb9ab0000,4,2,1,0\n" diff --git a/tools/testing/selftests/kvm/s390x/config b/tools/testing/selftests/kvm/s390x/config new file mode 100644 index 000000000000..23270f2d679f --- /dev/null +++ b/tools/testing/selftests/kvm/s390x/config @@ -0,0 +1,2 @@ +CONFIG_KVM=y +CONFIG_KVM_S390_UCONTROL=y diff --git a/tools/testing/selftests/kvm/s390x/debug_test.c b/tools/testing/selftests/kvm/s390x/debug_test.c index 84313fb27529..ad8095968601 100644 --- a/tools/testing/selftests/kvm/s390x/debug_test.c +++ b/tools/testing/selftests/kvm/s390x/debug_test.c @@ -2,12 +2,12 @@ /* Test KVM debugging features. */ #include "kvm_util.h" #include "test_util.h" +#include "sie.h" #include <linux/kvm.h> #define __LC_SVC_NEW_PSW 0x1c0 #define __LC_PGM_NEW_PSW 0x1d0 -#define ICPT_INSTRUCTION 0x04 #define IPA0_DIAG 0x8300 #define PGM_SPECIFICATION 0x06 @@ -85,7 +85,7 @@ static void test_step_pgm_diag(void) vm = test_step_int_1(&vcpu, test_step_pgm_diag_guest_code, __LC_PGM_NEW_PSW, new_psw); TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC); - TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_INSTRUCTION); + TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_INST); TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa & 0xff00, IPA0_DIAG); vcpu_ioctl(vcpu, KVM_S390_IRQ, &irq); vcpu_run(vcpu); diff --git a/tools/testing/selftests/kvm/s390x/memop.c b/tools/testing/selftests/kvm/s390x/memop.c index f2df7416be84..4374b4cd2a80 100644 --- a/tools/testing/selftests/kvm/s390x/memop.c +++ b/tools/testing/selftests/kvm/s390x/memop.c @@ -16,6 +16,7 @@ #include "kvm_util.h" #include "kselftest.h" #include "ucall_common.h" +#include "processor.h" enum mop_target { LOGICAL, @@ -226,9 +227,6 @@ static void memop_ioctl(struct test_info info, struct kvm_s390_mem_op *ksmo, #define CHECK_N_DO(f, ...) ({ f(__VA_ARGS__, CHECK_ONLY); f(__VA_ARGS__); }) -#define PAGE_SHIFT 12 -#define PAGE_SIZE (1ULL << PAGE_SHIFT) -#define PAGE_MASK (~(PAGE_SIZE - 1)) #define CR0_FETCH_PROTECTION_OVERRIDE (1UL << (63 - 38)) #define CR0_STORAGE_PROTECTION_OVERRIDE (1UL << (63 - 39)) diff --git a/tools/testing/selftests/kvm/s390x/tprot.c b/tools/testing/selftests/kvm/s390x/tprot.c index 7a742a673b7c..12d5e1cb62e3 100644 --- a/tools/testing/selftests/kvm/s390x/tprot.c +++ b/tools/testing/selftests/kvm/s390x/tprot.c @@ -9,9 +9,8 @@ #include "kvm_util.h" #include "kselftest.h" #include "ucall_common.h" +#include "processor.h" -#define PAGE_SHIFT 12 -#define PAGE_SIZE (1 << PAGE_SHIFT) #define CR0_FETCH_PROTECTION_OVERRIDE (1UL << (63 - 38)) #define CR0_STORAGE_PROTECTION_OVERRIDE (1UL << (63 - 39)) @@ -151,7 +150,7 @@ static enum stage perform_next_stage(int *i, bool mapped_0) * instead. * In order to skip these tests we detect this inside the guest */ - skip = tests[*i].addr < (void *)4096 && + skip = tests[*i].addr < (void *)PAGE_SIZE && tests[*i].expected != TRANSL_UNAVAIL && !mapped_0; if (!skip) { diff --git a/tools/testing/selftests/kvm/s390x/ucontrol_test.c b/tools/testing/selftests/kvm/s390x/ucontrol_test.c new file mode 100644 index 000000000000..f257beec1430 --- /dev/null +++ b/tools/testing/selftests/kvm/s390x/ucontrol_test.c @@ -0,0 +1,332 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Test code for the s390x kvm ucontrol interface + * + * Copyright IBM Corp. 2024 + * + * Authors: + * Christoph Schlameuss <schlameuss@linux.ibm.com> + */ +#include "debug_print.h" +#include "kselftest_harness.h" +#include "kvm_util.h" +#include "processor.h" +#include "sie.h" + +#include <linux/capability.h> +#include <linux/sizes.h> + +#define VM_MEM_SIZE (4 * SZ_1M) + +/* so directly declare capget to check caps without libcap */ +int capget(cap_user_header_t header, cap_user_data_t data); + +/** + * In order to create user controlled virtual machines on S390, + * check KVM_CAP_S390_UCONTROL and use the flag KVM_VM_S390_UCONTROL + * as privileged user (SYS_ADMIN). + */ +void require_ucontrol_admin(void) +{ + struct __user_cap_data_struct data[_LINUX_CAPABILITY_U32S_3]; + struct __user_cap_header_struct hdr = { + .version = _LINUX_CAPABILITY_VERSION_3, + }; + int rc; + + rc = capget(&hdr, data); + TEST_ASSERT_EQ(0, rc); + TEST_REQUIRE((data->effective & CAP_TO_MASK(CAP_SYS_ADMIN)) > 0); + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_S390_UCONTROL)); +} + +/* Test program setting some registers and looping */ +extern char test_gprs_asm[]; +asm("test_gprs_asm:\n" + "xgr %r0, %r0\n" + "lgfi %r1,1\n" + "lgfi %r2,2\n" + "lgfi %r3,3\n" + "lgfi %r4,4\n" + "lgfi %r5,5\n" + "lgfi %r6,6\n" + "lgfi %r7,7\n" + "0:\n" + " diag 0,0,0x44\n" + " ahi %r0,1\n" + " j 0b\n" +); + +FIXTURE(uc_kvm) +{ + struct kvm_s390_sie_block *sie_block; + struct kvm_run *run; + uintptr_t base_gpa; + uintptr_t code_gpa; + uintptr_t base_hva; + uintptr_t code_hva; + int kvm_run_size; + void *vm_mem; + int vcpu_fd; + int kvm_fd; + int vm_fd; +}; + +/** + * create VM with single vcpu, map kvm_run and SIE control block for easy access + */ +FIXTURE_SETUP(uc_kvm) +{ + struct kvm_s390_vm_cpu_processor info; + int rc; + + require_ucontrol_admin(); + + self->kvm_fd = open_kvm_dev_path_or_exit(); + self->vm_fd = ioctl(self->kvm_fd, KVM_CREATE_VM, KVM_VM_S390_UCONTROL); + ASSERT_GE(self->vm_fd, 0); + + kvm_device_attr_get(self->vm_fd, KVM_S390_VM_CPU_MODEL, + KVM_S390_VM_CPU_PROCESSOR, &info); + TH_LOG("create VM 0x%llx", info.cpuid); + + self->vcpu_fd = ioctl(self->vm_fd, KVM_CREATE_VCPU, 0); + ASSERT_GE(self->vcpu_fd, 0); + + self->kvm_run_size = ioctl(self->kvm_fd, KVM_GET_VCPU_MMAP_SIZE, NULL); + ASSERT_GE(self->kvm_run_size, sizeof(struct kvm_run)) + TH_LOG(KVM_IOCTL_ERROR(KVM_GET_VCPU_MMAP_SIZE, self->kvm_run_size)); + self->run = (struct kvm_run *)mmap(NULL, self->kvm_run_size, + PROT_READ | PROT_WRITE, MAP_SHARED, self->vcpu_fd, 0); + ASSERT_NE(self->run, MAP_FAILED); + /** + * For virtual cpus that have been created with S390 user controlled + * virtual machines, the resulting vcpu fd can be memory mapped at page + * offset KVM_S390_SIE_PAGE_OFFSET in order to obtain a memory map of + * the virtual cpu's hardware control block. + */ + self->sie_block = (struct kvm_s390_sie_block *)mmap(NULL, PAGE_SIZE, + PROT_READ | PROT_WRITE, MAP_SHARED, + self->vcpu_fd, KVM_S390_SIE_PAGE_OFFSET << PAGE_SHIFT); + ASSERT_NE(self->sie_block, MAP_FAILED); + + TH_LOG("VM created %p %p", self->run, self->sie_block); + + self->base_gpa = 0; + self->code_gpa = self->base_gpa + (3 * SZ_1M); + + self->vm_mem = aligned_alloc(SZ_1M, VM_MEM_SIZE); + ASSERT_NE(NULL, self->vm_mem) TH_LOG("malloc failed %u", errno); + self->base_hva = (uintptr_t)self->vm_mem; + self->code_hva = self->base_hva - self->base_gpa + self->code_gpa; + struct kvm_s390_ucas_mapping map = { + .user_addr = self->base_hva, + .vcpu_addr = self->base_gpa, + .length = VM_MEM_SIZE, + }; + TH_LOG("ucas map %p %p 0x%llx", + (void *)map.user_addr, (void *)map.vcpu_addr, map.length); + rc = ioctl(self->vcpu_fd, KVM_S390_UCAS_MAP, &map); + ASSERT_EQ(0, rc) TH_LOG("ucas map result %d not expected, %s", + rc, strerror(errno)); + + TH_LOG("page in %p", (void *)self->base_gpa); + rc = ioctl(self->vcpu_fd, KVM_S390_VCPU_FAULT, self->base_gpa); + ASSERT_EQ(0, rc) TH_LOG("vcpu fault (%p) result %d not expected, %s", + (void *)self->base_hva, rc, strerror(errno)); + + self->sie_block->cpuflags &= ~CPUSTAT_STOPPED; +} + +FIXTURE_TEARDOWN(uc_kvm) +{ + munmap(self->sie_block, PAGE_SIZE); + munmap(self->run, self->kvm_run_size); + close(self->vcpu_fd); + close(self->vm_fd); + close(self->kvm_fd); + free(self->vm_mem); +} + +TEST_F(uc_kvm, uc_sie_assertions) +{ + /* assert interception of Code 08 (Program Interruption) is set */ + EXPECT_EQ(0, self->sie_block->ecb & ECB_SPECI); +} + +TEST_F(uc_kvm, uc_attr_mem_limit) +{ + u64 limit; + struct kvm_device_attr attr = { + .group = KVM_S390_VM_MEM_CTRL, + .attr = KVM_S390_VM_MEM_LIMIT_SIZE, + .addr = (unsigned long)&limit, + }; + int rc; + + rc = ioctl(self->vm_fd, KVM_GET_DEVICE_ATTR, &attr); + EXPECT_EQ(0, rc); + EXPECT_EQ(~0UL, limit); + + /* assert set not supported */ + rc = ioctl(self->vm_fd, KVM_SET_DEVICE_ATTR, &attr); + EXPECT_EQ(-1, rc); + EXPECT_EQ(EINVAL, errno); +} + +TEST_F(uc_kvm, uc_no_dirty_log) +{ + struct kvm_dirty_log dlog; + int rc; + + rc = ioctl(self->vm_fd, KVM_GET_DIRTY_LOG, &dlog); + EXPECT_EQ(-1, rc); + EXPECT_EQ(EINVAL, errno); +} + +/** + * Assert HPAGE CAP cannot be enabled on UCONTROL VM + */ +TEST(uc_cap_hpage) +{ + int rc, kvm_fd, vm_fd, vcpu_fd; + struct kvm_enable_cap cap = { + .cap = KVM_CAP_S390_HPAGE_1M, + }; + + require_ucontrol_admin(); + + kvm_fd = open_kvm_dev_path_or_exit(); + vm_fd = ioctl(kvm_fd, KVM_CREATE_VM, KVM_VM_S390_UCONTROL); + ASSERT_GE(vm_fd, 0); + + /* assert hpages are not supported on ucontrol vm */ + rc = ioctl(vm_fd, KVM_CHECK_EXTENSION, KVM_CAP_S390_HPAGE_1M); + EXPECT_EQ(0, rc); + + /* Test that KVM_CAP_S390_HPAGE_1M can't be enabled for a ucontrol vm */ + rc = ioctl(vm_fd, KVM_ENABLE_CAP, cap); + EXPECT_EQ(-1, rc); + EXPECT_EQ(EINVAL, errno); + + /* assert HPAGE CAP is rejected after vCPU creation */ + vcpu_fd = ioctl(vm_fd, KVM_CREATE_VCPU, 0); + ASSERT_GE(vcpu_fd, 0); + rc = ioctl(vm_fd, KVM_ENABLE_CAP, cap); + EXPECT_EQ(-1, rc); + EXPECT_EQ(EBUSY, errno); + + close(vcpu_fd); + close(vm_fd); + close(kvm_fd); +} + +/* verify SIEIC exit + * * fail on codes not expected in the test cases + */ +static bool uc_handle_sieic(FIXTURE_DATA(uc_kvm) * self) +{ + struct kvm_s390_sie_block *sie_block = self->sie_block; + struct kvm_run *run = self->run; + + /* check SIE interception code */ + pr_info("sieic: 0x%.2x 0x%.4x 0x%.4x\n", + run->s390_sieic.icptcode, + run->s390_sieic.ipa, + run->s390_sieic.ipb); + switch (run->s390_sieic.icptcode) { + case ICPT_INST: + /* end execution in caller on intercepted instruction */ + pr_info("sie instruction interception\n"); + return false; + case ICPT_OPEREXC: + /* operation exception */ + TEST_FAIL("sie exception on %.4x%.8x", sie_block->ipa, sie_block->ipb); + default: + TEST_FAIL("UNEXPECTED SIEIC CODE %d", run->s390_sieic.icptcode); + } + return true; +} + +/* verify VM state on exit */ +static bool uc_handle_exit(FIXTURE_DATA(uc_kvm) * self) +{ + struct kvm_run *run = self->run; + + switch (run->exit_reason) { + case KVM_EXIT_S390_SIEIC: + return uc_handle_sieic(self); + default: + pr_info("exit_reason %2d not handled\n", run->exit_reason); + } + return true; +} + +/* run the VM until interrupted */ +static int uc_run_once(FIXTURE_DATA(uc_kvm) * self) +{ + int rc; + + rc = ioctl(self->vcpu_fd, KVM_RUN, NULL); + print_run(self->run, self->sie_block); + print_regs(self->run); + pr_debug("run %d / %d %s\n", rc, errno, strerror(errno)); + return rc; +} + +static void uc_assert_diag44(FIXTURE_DATA(uc_kvm) * self) +{ + struct kvm_s390_sie_block *sie_block = self->sie_block; + + /* assert vm was interrupted by diag 0x0044 */ + TEST_ASSERT_EQ(KVM_EXIT_S390_SIEIC, self->run->exit_reason); + TEST_ASSERT_EQ(ICPT_INST, sie_block->icptcode); + TEST_ASSERT_EQ(0x8300, sie_block->ipa); + TEST_ASSERT_EQ(0x440000, sie_block->ipb); +} + +TEST_F(uc_kvm, uc_gprs) +{ + struct kvm_sync_regs *sync_regs = &self->run->s.regs; + struct kvm_run *run = self->run; + struct kvm_regs regs = {}; + + /* Set registers to values that are different from the ones that we expect below */ + for (int i = 0; i < 8; i++) + sync_regs->gprs[i] = 8; + run->kvm_dirty_regs |= KVM_SYNC_GPRS; + + /* copy test_gprs_asm to code_hva / code_gpa */ + TH_LOG("copy code %p to vm mapped memory %p / %p", + &test_gprs_asm, (void *)self->code_hva, (void *)self->code_gpa); + memcpy((void *)self->code_hva, &test_gprs_asm, PAGE_SIZE); + + /* DAT disabled + 64 bit mode */ + run->psw_mask = 0x0000000180000000ULL; + run->psw_addr = self->code_gpa; + + /* run and expect interception of diag 44 */ + ASSERT_EQ(0, uc_run_once(self)); + ASSERT_EQ(false, uc_handle_exit(self)); + uc_assert_diag44(self); + + /* Retrieve and check guest register values */ + ASSERT_EQ(0, ioctl(self->vcpu_fd, KVM_GET_REGS, ®s)); + for (int i = 0; i < 8; i++) { + ASSERT_EQ(i, regs.gprs[i]); + ASSERT_EQ(i, sync_regs->gprs[i]); + } + + /* run and expect interception of diag 44 again */ + ASSERT_EQ(0, uc_run_once(self)); + ASSERT_EQ(false, uc_handle_exit(self)); + uc_assert_diag44(self); + + /* check continued increment of register 0 value */ + ASSERT_EQ(0, ioctl(self->vcpu_fd, KVM_GET_REGS, ®s)); + ASSERT_EQ(1, regs.gprs[0]); + ASSERT_EQ(1, sync_regs->gprs[0]); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c index bb8002084f52..a8267628e9ed 100644 --- a/tools/testing/selftests/kvm/set_memory_region_test.c +++ b/tools/testing/selftests/kvm/set_memory_region_test.c @@ -175,7 +175,7 @@ static void guest_code_move_memory_region(void) GUEST_DONE(); } -static void test_move_memory_region(void) +static void test_move_memory_region(bool disable_slot_zap_quirk) { pthread_t vcpu_thread; struct kvm_vcpu *vcpu; @@ -184,6 +184,9 @@ static void test_move_memory_region(void) vm = spawn_vm(&vcpu, &vcpu_thread, guest_code_move_memory_region); + if (disable_slot_zap_quirk) + vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, KVM_X86_QUIRK_SLOT_ZAP_ALL); + hva = addr_gpa2hva(vm, MEM_REGION_GPA); /* @@ -266,7 +269,7 @@ static void guest_code_delete_memory_region(void) GUEST_ASSERT(0); } -static void test_delete_memory_region(void) +static void test_delete_memory_region(bool disable_slot_zap_quirk) { pthread_t vcpu_thread; struct kvm_vcpu *vcpu; @@ -276,6 +279,9 @@ static void test_delete_memory_region(void) vm = spawn_vm(&vcpu, &vcpu_thread, guest_code_delete_memory_region); + if (disable_slot_zap_quirk) + vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, KVM_X86_QUIRK_SLOT_ZAP_ALL); + /* Delete the memory region, the guest should not die. */ vm_mem_region_delete(vm, MEM_REGION_SLOT); wait_for_vcpu(); @@ -553,7 +559,10 @@ int main(int argc, char *argv[]) { #ifdef __x86_64__ int i, loops; + int j, disable_slot_zap_quirk = 0; + if (kvm_check_cap(KVM_CAP_DISABLE_QUIRKS2) & KVM_X86_QUIRK_SLOT_ZAP_ALL) + disable_slot_zap_quirk = 1; /* * FIXME: the zero-memslot test fails on aarch64 and s390x because * KVM_RUN fails with ENOEXEC or EFAULT. @@ -579,13 +588,17 @@ int main(int argc, char *argv[]) else loops = 10; - pr_info("Testing MOVE of in-use region, %d loops\n", loops); - for (i = 0; i < loops; i++) - test_move_memory_region(); + for (j = 0; j <= disable_slot_zap_quirk; j++) { + pr_info("Testing MOVE of in-use region, %d loops, slot zap quirk %s\n", + loops, j ? "disabled" : "enabled"); + for (i = 0; i < loops; i++) + test_move_memory_region(!!j); - pr_info("Testing DELETE of in-use region, %d loops\n", loops); - for (i = 0; i < loops; i++) - test_delete_memory_region(); + pr_info("Testing DELETE of in-use region, %d loops, slot zap quirk %s\n", + loops, j ? "disabled" : "enabled"); + for (i = 0; i < loops; i++) + test_delete_memory_region(!!j); + } #endif return 0; diff --git a/tools/testing/selftests/kvm/x86_64/debug_regs.c b/tools/testing/selftests/kvm/x86_64/debug_regs.c index f6b295e0b2d2..76cc2df9238a 100644 --- a/tools/testing/selftests/kvm/x86_64/debug_regs.c +++ b/tools/testing/selftests/kvm/x86_64/debug_regs.c @@ -47,15 +47,18 @@ static void guest_code(void) /* * Single step test, covers 2 basic instructions and 2 emulated * - * Enable interrupts during the single stepping to see that - * pending interrupt we raised is not handled due to KVM_GUESTDBG_BLOCKIRQ + * Enable interrupts during the single stepping to see that pending + * interrupt we raised is not handled due to KVM_GUESTDBG_BLOCKIRQ. + * + * Write MSR_IA32_TSC_DEADLINE to verify that KVM's fastpath handler + * exits to userspace due to single-step being enabled. */ asm volatile("ss_start: " "sti\n\t" "xor %%eax,%%eax\n\t" "cpuid\n\t" - "movl $0x1a0,%%ecx\n\t" - "rdmsr\n\t" + "movl $" __stringify(MSR_IA32_TSC_DEADLINE) ", %%ecx\n\t" + "wrmsr\n\t" "cli\n\t" : : : "eax", "ebx", "ecx", "edx"); diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c b/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c index e192720bfe14..74cf19661309 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c @@ -242,7 +242,7 @@ int main(int argc, char *argv[]) TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE)); TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)); - TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_DIRECT_TLBFLUSH)); + TEST_REQUIRE(kvm_hv_cpu_has(HV_X64_NESTED_DIRECT_FLUSH)); vm = vm_create_with_one_vcpu(&vcpu, guest_code); diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c index b987a3d79715..0ddb63229bcb 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c @@ -157,7 +157,7 @@ int main(int argc, char *argv[]) int stage; TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM)); - TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_DIRECT_TLBFLUSH)); + TEST_REQUIRE(kvm_hv_cpu_has(HV_X64_NESTED_DIRECT_FLUSH)); /* Create VM */ vm = vm_create_with_one_vcpu(&vcpu, guest_code); diff --git a/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c b/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c index 7c70c0da4fb7..2e9197eb1652 100644 --- a/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c +++ b/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c @@ -160,6 +160,36 @@ static void test_sev(void *guest_code, uint64_t policy) kvm_vm_free(vm); } +static void guest_shutdown_code(void) +{ + struct desc_ptr idt; + + /* Clobber the IDT so that #UD is guaranteed to trigger SHUTDOWN. */ + memset(&idt, 0, sizeof(idt)); + __asm__ __volatile__("lidt %0" :: "m"(idt)); + + __asm__ __volatile__("ud2"); +} + +static void test_sev_es_shutdown(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + uint32_t type = KVM_X86_SEV_ES_VM; + + vm = vm_sev_create_with_one_vcpu(type, guest_shutdown_code, &vcpu); + + vm_sev_launch(vm, SEV_POLICY_ES, NULL); + + vcpu_run(vcpu); + TEST_ASSERT(vcpu->run->exit_reason == KVM_EXIT_SHUTDOWN, + "Wanted SHUTDOWN, got %s", + exit_reason_str(vcpu->run->exit_reason)); + + kvm_vm_free(vm); +} + int main(int argc, char *argv[]) { TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SEV)); @@ -171,6 +201,8 @@ int main(int argc, char *argv[]) test_sev(guest_sev_es_code, SEV_POLICY_ES | SEV_POLICY_NO_DBG); test_sev(guest_sev_es_code, SEV_POLICY_ES); + test_sev_es_shutdown(); + if (kvm_has_cap(KVM_CAP_XCRS) && (xgetbv(0) & XFEATURE_MASK_X87_AVX) == XFEATURE_MASK_X87_AVX) { test_sync_vmsa(0); diff --git a/tools/testing/selftests/kvm/x86_64/xapic_state_test.c b/tools/testing/selftests/kvm/x86_64/xapic_state_test.c index 618cd2442390..88bcca188799 100644 --- a/tools/testing/selftests/kvm/x86_64/xapic_state_test.c +++ b/tools/testing/selftests/kvm/x86_64/xapic_state_test.c @@ -13,6 +13,7 @@ struct xapic_vcpu { struct kvm_vcpu *vcpu; bool is_x2apic; + bool has_xavic_errata; }; static void xapic_guest_code(void) @@ -31,6 +32,10 @@ static void xapic_guest_code(void) } } +#define X2APIC_RSVD_BITS_MASK (GENMASK_ULL(31, 20) | \ + GENMASK_ULL(17, 16) | \ + GENMASK_ULL(13, 13)) + static void x2apic_guest_code(void) { asm volatile("cli"); @@ -41,7 +46,12 @@ static void x2apic_guest_code(void) uint64_t val = x2apic_read_reg(APIC_IRR) | x2apic_read_reg(APIC_IRR + 0x10) << 32; - x2apic_write_reg(APIC_ICR, val); + if (val & X2APIC_RSVD_BITS_MASK) { + x2apic_write_reg_fault(APIC_ICR, val); + } else { + x2apic_write_reg(APIC_ICR, val); + GUEST_ASSERT_EQ(x2apic_read_reg(APIC_ICR), val); + } GUEST_SYNC(val); } while (1); } @@ -71,27 +81,28 @@ static void ____test_icr(struct xapic_vcpu *x, uint64_t val) icr = (u64)(*((u32 *)&xapic.regs[APIC_ICR])) | (u64)(*((u32 *)&xapic.regs[APIC_ICR2])) << 32; if (!x->is_x2apic) { - val &= (-1u | (0xffull << (32 + 24))); - TEST_ASSERT_EQ(icr, val & ~APIC_ICR_BUSY); - } else { - TEST_ASSERT_EQ(icr & ~APIC_ICR_BUSY, val & ~APIC_ICR_BUSY); + if (!x->has_xavic_errata) + val &= (-1u | (0xffull << (32 + 24))); + } else if (val & X2APIC_RSVD_BITS_MASK) { + return; } -} -#define X2APIC_RSVED_BITS_MASK (GENMASK_ULL(31,20) | \ - GENMASK_ULL(17,16) | \ - GENMASK_ULL(13,13)) + if (x->has_xavic_errata) + TEST_ASSERT_EQ(icr & ~APIC_ICR_BUSY, val & ~APIC_ICR_BUSY); + else + TEST_ASSERT_EQ(icr, val & ~APIC_ICR_BUSY); +} static void __test_icr(struct xapic_vcpu *x, uint64_t val) { - if (x->is_x2apic) { - /* Hardware writing vICR register requires reserved bits 31:20, - * 17:16 and 13 kept as zero to avoid #GP exception. Data value - * written to vICR should mask out those bits above. - */ - val &= ~X2APIC_RSVED_BITS_MASK; - } - ____test_icr(x, val | APIC_ICR_BUSY); + /* + * The BUSY bit is reserved on both AMD and Intel, but only AMD treats + * it is as _must_ be zero. Intel simply ignores the bit. Don't test + * the BUSY bit for x2APIC, as there is no single correct behavior. + */ + if (!x->is_x2apic) + ____test_icr(x, val | APIC_ICR_BUSY); + ____test_icr(x, val & ~(u64)APIC_ICR_BUSY); } @@ -231,6 +242,15 @@ int main(int argc, char *argv[]) vm = vm_create_with_one_vcpu(&x.vcpu, xapic_guest_code); x.is_x2apic = false; + /* + * AMD's AVIC implementation is buggy (fails to clear the ICR BUSY bit), + * and also diverges from KVM with respect to ICR2[23:0] (KVM and Intel + * drops writes, AMD does not). Account for the errata when checking + * that KVM reads back what was written. + */ + x.has_xavic_errata = host_cpu_is_amd && + get_kvm_amd_param_bool("avic"); + vcpu_clear_cpuid_feature(x.vcpu, X86_FEATURE_X2APIC); virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA); diff --git a/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c b/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c index e149d0574961..2585087cdf5c 100644 --- a/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c +++ b/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c @@ -10,6 +10,7 @@ #include "test_util.h" #include "kvm_util.h" #include "processor.h" +#include "hyperv.h" #define HCALL_REGION_GPA 0xc0000000ULL #define HCALL_REGION_SLOT 10 |