diff options
Diffstat (limited to 'tools/testing/selftests/kvm/x86/xapic_ipi_test.c')
-rw-r--r-- | tools/testing/selftests/kvm/x86/xapic_ipi_test.c | 501 |
1 files changed, 501 insertions, 0 deletions
diff --git a/tools/testing/selftests/kvm/x86/xapic_ipi_test.c b/tools/testing/selftests/kvm/x86/xapic_ipi_test.c new file mode 100644 index 000000000000..35cb9de54a82 --- /dev/null +++ b/tools/testing/selftests/kvm/x86/xapic_ipi_test.c @@ -0,0 +1,501 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * xapic_ipi_test + * + * Copyright (C) 2020, Google LLC. + * + * This work is licensed under the terms of the GNU GPL, version 2. + * + * Test that when the APIC is in xAPIC mode, a vCPU can send an IPI to wake + * another vCPU that is halted when KVM's backing page for the APIC access + * address has been moved by mm. + * + * The test starts two vCPUs: one that sends IPIs and one that continually + * executes HLT. The sender checks that the halter has woken from the HLT and + * has reentered HLT before sending the next IPI. While the vCPUs are running, + * the host continually calls migrate_pages to move all of the process' pages + * amongst the available numa nodes on the machine. + * + * Migration is a command line option. When used on non-numa machines will + * exit with error. Test is still usefull on non-numa for testing IPIs. + */ +#include <getopt.h> +#include <pthread.h> +#include <inttypes.h> +#include <string.h> +#include <time.h> + +#include "kvm_util.h" +#include "numaif.h" +#include "processor.h" +#include "test_util.h" +#include "vmx.h" + +/* Default running time for the test */ +#define DEFAULT_RUN_SECS 3 + +/* Default delay between migrate_pages calls (microseconds) */ +#define DEFAULT_DELAY_USECS 500000 + +/* + * Vector for IPI from sender vCPU to halting vCPU. + * Value is arbitrary and was chosen for the alternating bit pattern. Any + * value should work. + */ +#define IPI_VECTOR 0xa5 + +/* + * Incremented in the IPI handler. Provides evidence to the sender that the IPI + * arrived at the destination + */ +static volatile uint64_t ipis_rcvd; + +/* Data struct shared between host main thread and vCPUs */ +struct test_data_page { + uint32_t halter_apic_id; + volatile uint64_t hlt_count; + volatile uint64_t wake_count; + uint64_t ipis_sent; + uint64_t migrations_attempted; + uint64_t migrations_completed; + uint32_t icr; + uint32_t icr2; + uint32_t halter_tpr; + uint32_t halter_ppr; + + /* + * Record local version register as a cross-check that APIC access + * worked. Value should match what KVM reports (APIC_VERSION in + * arch/x86/kvm/lapic.c). If test is failing, check that values match + * to determine whether APIC access exits are working. + */ + uint32_t halter_lvr; +}; + +struct thread_params { + struct test_data_page *data; + struct kvm_vcpu *vcpu; + uint64_t *pipis_rcvd; /* host address of ipis_rcvd global */ +}; + +void verify_apic_base_addr(void) +{ + uint64_t msr = rdmsr(MSR_IA32_APICBASE); + uint64_t base = GET_APIC_BASE(msr); + + GUEST_ASSERT(base == APIC_DEFAULT_GPA); +} + +static void halter_guest_code(struct test_data_page *data) +{ + verify_apic_base_addr(); + xapic_enable(); + + data->halter_apic_id = GET_APIC_ID_FIELD(xapic_read_reg(APIC_ID)); + data->halter_lvr = xapic_read_reg(APIC_LVR); + + /* + * Loop forever HLTing and recording halts & wakes. Disable interrupts + * each time around to minimize window between signaling the pending + * halt to the sender vCPU and executing the halt. No need to disable on + * first run as this vCPU executes first and the host waits for it to + * signal going into first halt before starting the sender vCPU. Record + * TPR and PPR for diagnostic purposes in case the test fails. + */ + for (;;) { + data->halter_tpr = xapic_read_reg(APIC_TASKPRI); + data->halter_ppr = xapic_read_reg(APIC_PROCPRI); + data->hlt_count++; + safe_halt(); + cli(); + data->wake_count++; + } +} + +/* + * Runs on halter vCPU when IPI arrives. Write an arbitrary non-zero value to + * enable diagnosing errant writes to the APIC access address backing page in + * case of test failure. + */ +static void guest_ipi_handler(struct ex_regs *regs) +{ + ipis_rcvd++; + xapic_write_reg(APIC_EOI, 77); +} + +static void sender_guest_code(struct test_data_page *data) +{ + uint64_t last_wake_count; + uint64_t last_hlt_count; + uint64_t last_ipis_rcvd_count; + uint32_t icr_val; + uint32_t icr2_val; + uint64_t tsc_start; + + verify_apic_base_addr(); + xapic_enable(); + + /* + * Init interrupt command register for sending IPIs + * + * Delivery mode=fixed, per SDM: + * "Delivers the interrupt specified in the vector field to the target + * processor." + * + * Destination mode=physical i.e. specify target by its local APIC + * ID. This vCPU assumes that the halter vCPU has already started and + * set data->halter_apic_id. + */ + icr_val = (APIC_DEST_PHYSICAL | APIC_DM_FIXED | IPI_VECTOR); + icr2_val = SET_APIC_DEST_FIELD(data->halter_apic_id); + data->icr = icr_val; + data->icr2 = icr2_val; + + last_wake_count = data->wake_count; + last_hlt_count = data->hlt_count; + last_ipis_rcvd_count = ipis_rcvd; + for (;;) { + /* + * Send IPI to halter vCPU. + * First IPI can be sent unconditionally because halter vCPU + * starts earlier. + */ + xapic_write_reg(APIC_ICR2, icr2_val); + xapic_write_reg(APIC_ICR, icr_val); + data->ipis_sent++; + + /* + * Wait up to ~1 sec for halter to indicate that it has: + * 1. Received the IPI + * 2. Woken up from the halt + * 3. Gone back into halt + * Current CPUs typically run at 2.x Ghz which is ~2 + * billion ticks per second. + */ + tsc_start = rdtsc(); + while (rdtsc() - tsc_start < 2000000000) { + if ((ipis_rcvd != last_ipis_rcvd_count) && + (data->wake_count != last_wake_count) && + (data->hlt_count != last_hlt_count)) + break; + } + + GUEST_ASSERT((ipis_rcvd != last_ipis_rcvd_count) && + (data->wake_count != last_wake_count) && + (data->hlt_count != last_hlt_count)); + + last_wake_count = data->wake_count; + last_hlt_count = data->hlt_count; + last_ipis_rcvd_count = ipis_rcvd; + } +} + +static void *vcpu_thread(void *arg) +{ + struct thread_params *params = (struct thread_params *)arg; + struct kvm_vcpu *vcpu = params->vcpu; + struct ucall uc; + int old; + int r; + + r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old); + TEST_ASSERT(r == 0, + "pthread_setcanceltype failed on vcpu_id=%u with errno=%d", + vcpu->id, r); + + fprintf(stderr, "vCPU thread running vCPU %u\n", vcpu->id); + vcpu_run(vcpu); + + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + + if (get_ucall(vcpu, &uc) == UCALL_ABORT) { + TEST_ASSERT(false, + "vCPU %u exited with error: %s.\n" + "Sending vCPU sent %lu IPIs to halting vCPU\n" + "Halting vCPU halted %lu times, woke %lu times, received %lu IPIs.\n" + "Halter TPR=%#x PPR=%#x LVR=%#x\n" + "Migrations attempted: %lu\n" + "Migrations completed: %lu", + vcpu->id, (const char *)uc.args[0], + params->data->ipis_sent, params->data->hlt_count, + params->data->wake_count, + *params->pipis_rcvd, params->data->halter_tpr, + params->data->halter_ppr, params->data->halter_lvr, + params->data->migrations_attempted, + params->data->migrations_completed); + } + + return NULL; +} + +static void cancel_join_vcpu_thread(pthread_t thread, struct kvm_vcpu *vcpu) +{ + void *retval; + int r; + + r = pthread_cancel(thread); + TEST_ASSERT(r == 0, + "pthread_cancel on vcpu_id=%d failed with errno=%d", + vcpu->id, r); + + r = pthread_join(thread, &retval); + TEST_ASSERT(r == 0, + "pthread_join on vcpu_id=%d failed with errno=%d", + vcpu->id, r); + TEST_ASSERT(retval == PTHREAD_CANCELED, + "expected retval=%p, got %p", PTHREAD_CANCELED, + retval); +} + +void do_migrations(struct test_data_page *data, int run_secs, int delay_usecs, + uint64_t *pipis_rcvd) +{ + long pages_not_moved; + unsigned long nodemask = 0; + unsigned long nodemasks[sizeof(nodemask) * 8]; + int nodes = 0; + time_t start_time, last_update, now; + time_t interval_secs = 1; + int i, r; + int from, to; + unsigned long bit; + uint64_t hlt_count; + uint64_t wake_count; + uint64_t ipis_sent; + + fprintf(stderr, "Calling migrate_pages every %d microseconds\n", + delay_usecs); + + /* Get set of first 64 numa nodes available */ + r = get_mempolicy(NULL, &nodemask, sizeof(nodemask) * 8, + 0, MPOL_F_MEMS_ALLOWED); + TEST_ASSERT(r == 0, "get_mempolicy failed errno=%d", errno); + + fprintf(stderr, "Numa nodes found amongst first %lu possible nodes " + "(each 1-bit indicates node is present): %#lx\n", + sizeof(nodemask) * 8, nodemask); + + /* Init array of masks containing a single-bit in each, one for each + * available node. migrate_pages called below requires specifying nodes + * as bit masks. + */ + for (i = 0, bit = 1; i < sizeof(nodemask) * 8; i++, bit <<= 1) { + if (nodemask & bit) { + nodemasks[nodes] = nodemask & bit; + nodes++; + } + } + + TEST_ASSERT(nodes > 1, + "Did not find at least 2 numa nodes. Can't do migration"); + + fprintf(stderr, "Migrating amongst %d nodes found\n", nodes); + + from = 0; + to = 1; + start_time = time(NULL); + last_update = start_time; + + ipis_sent = data->ipis_sent; + hlt_count = data->hlt_count; + wake_count = data->wake_count; + + while ((int)(time(NULL) - start_time) < run_secs) { + data->migrations_attempted++; + + /* + * migrate_pages with PID=0 will migrate all pages of this + * process between the nodes specified as bitmasks. The page + * backing the APIC access address belongs to this process + * because it is allocated by KVM in the context of the + * KVM_CREATE_VCPU ioctl. If that assumption ever changes this + * test may break or give a false positive signal. + */ + pages_not_moved = migrate_pages(0, sizeof(nodemasks[from]), + &nodemasks[from], + &nodemasks[to]); + if (pages_not_moved < 0) + fprintf(stderr, + "migrate_pages failed, errno=%d\n", errno); + else if (pages_not_moved > 0) + fprintf(stderr, + "migrate_pages could not move %ld pages\n", + pages_not_moved); + else + data->migrations_completed++; + + from = to; + to++; + if (to == nodes) + to = 0; + + now = time(NULL); + if (((now - start_time) % interval_secs == 0) && + (now != last_update)) { + last_update = now; + fprintf(stderr, + "%lu seconds: Migrations attempted=%lu completed=%lu, " + "IPIs sent=%lu received=%lu, HLTs=%lu wakes=%lu\n", + now - start_time, data->migrations_attempted, + data->migrations_completed, + data->ipis_sent, *pipis_rcvd, + data->hlt_count, data->wake_count); + + TEST_ASSERT(ipis_sent != data->ipis_sent && + hlt_count != data->hlt_count && + wake_count != data->wake_count, + "IPI, HLT and wake count have not increased " + "in the last %lu seconds. " + "HLTer is likely hung.", interval_secs); + + ipis_sent = data->ipis_sent; + hlt_count = data->hlt_count; + wake_count = data->wake_count; + } + usleep(delay_usecs); + } +} + +void get_cmdline_args(int argc, char *argv[], int *run_secs, + bool *migrate, int *delay_usecs) +{ + for (;;) { + int opt = getopt(argc, argv, "s:d:m"); + + if (opt == -1) + break; + switch (opt) { + case 's': + *run_secs = parse_size(optarg); + break; + case 'm': + *migrate = true; + break; + case 'd': + *delay_usecs = parse_size(optarg); + break; + default: + TEST_ASSERT(false, + "Usage: -s <runtime seconds>. Default is %d seconds.\n" + "-m adds calls to migrate_pages while vCPUs are running." + " Default is no migrations.\n" + "-d <delay microseconds> - delay between migrate_pages() calls." + " Default is %d microseconds.", + DEFAULT_RUN_SECS, DEFAULT_DELAY_USECS); + } + } +} + +int main(int argc, char *argv[]) +{ + int r; + int wait_secs; + const int max_halter_wait = 10; + int run_secs = 0; + int delay_usecs = 0; + struct test_data_page *data; + vm_vaddr_t test_data_page_vaddr; + bool migrate = false; + pthread_t threads[2]; + struct thread_params params[2]; + struct kvm_vm *vm; + uint64_t *pipis_rcvd; + + get_cmdline_args(argc, argv, &run_secs, &migrate, &delay_usecs); + if (run_secs <= 0) + run_secs = DEFAULT_RUN_SECS; + if (delay_usecs <= 0) + delay_usecs = DEFAULT_DELAY_USECS; + + vm = vm_create_with_one_vcpu(¶ms[0].vcpu, halter_guest_code); + + vm_install_exception_handler(vm, IPI_VECTOR, guest_ipi_handler); + + virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA); + + params[1].vcpu = vm_vcpu_add(vm, 1, sender_guest_code); + + test_data_page_vaddr = vm_vaddr_alloc_page(vm); + data = addr_gva2hva(vm, test_data_page_vaddr); + memset(data, 0, sizeof(*data)); + params[0].data = data; + params[1].data = data; + + vcpu_args_set(params[0].vcpu, 1, test_data_page_vaddr); + vcpu_args_set(params[1].vcpu, 1, test_data_page_vaddr); + + pipis_rcvd = (uint64_t *)addr_gva2hva(vm, (uint64_t)&ipis_rcvd); + params[0].pipis_rcvd = pipis_rcvd; + params[1].pipis_rcvd = pipis_rcvd; + + /* Start halter vCPU thread and wait for it to execute first HLT. */ + r = pthread_create(&threads[0], NULL, vcpu_thread, ¶ms[0]); + TEST_ASSERT(r == 0, + "pthread_create halter failed errno=%d", errno); + fprintf(stderr, "Halter vCPU thread started\n"); + + wait_secs = 0; + while ((wait_secs < max_halter_wait) && !data->hlt_count) { + sleep(1); + wait_secs++; + } + + TEST_ASSERT(data->hlt_count, + "Halter vCPU did not execute first HLT within %d seconds", + max_halter_wait); + + fprintf(stderr, + "Halter vCPU thread reported its APIC ID: %u after %d seconds.\n", + data->halter_apic_id, wait_secs); + + r = pthread_create(&threads[1], NULL, vcpu_thread, ¶ms[1]); + TEST_ASSERT(r == 0, "pthread_create sender failed errno=%d", errno); + + fprintf(stderr, + "IPI sender vCPU thread started. Letting vCPUs run for %d seconds.\n", + run_secs); + + if (!migrate) + sleep(run_secs); + else + do_migrations(data, run_secs, delay_usecs, pipis_rcvd); + + /* + * Cancel threads and wait for them to stop. + */ + cancel_join_vcpu_thread(threads[0], params[0].vcpu); + cancel_join_vcpu_thread(threads[1], params[1].vcpu); + + /* + * If the host support Idle HLT, i.e. KVM *might* be using Idle HLT, + * then the number of HLT exits may be less than the number of HLTs + * that were executed, as Idle HLT elides the exit if the vCPU has an + * unmasked, pending IRQ (or NMI). + */ + if (this_cpu_has(X86_FEATURE_IDLE_HLT)) + TEST_ASSERT(data->hlt_count >= vcpu_get_stat(params[0].vcpu, halt_exits), + "HLT insns = %lu, HLT exits = %lu", + data->hlt_count, vcpu_get_stat(params[0].vcpu, halt_exits)); + else + TEST_ASSERT_EQ(data->hlt_count, vcpu_get_stat(params[0].vcpu, halt_exits)); + + fprintf(stderr, + "Test successful after running for %d seconds.\n" + "Sending vCPU sent %lu IPIs to halting vCPU\n" + "Halting vCPU halted %lu times, woke %lu times, received %lu IPIs.\n" + "Halter APIC ID=%#x\n" + "Sender ICR value=%#x ICR2 value=%#x\n" + "Halter TPR=%#x PPR=%#x LVR=%#x\n" + "Migrations attempted: %lu\n" + "Migrations completed: %lu\n", + run_secs, data->ipis_sent, + data->hlt_count, data->wake_count, *pipis_rcvd, + data->halter_apic_id, + data->icr, data->icr2, + data->halter_tpr, data->halter_ppr, data->halter_lvr, + data->migrations_attempted, data->migrations_completed); + + kvm_vm_free(vm); + + return 0; +} |