diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-08-19 10:38:36 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-08-19 10:38:36 -0700 |
commit | e61cf2e3a5b452cfefcb145021f5a8ea88735cc1 (patch) | |
tree | bbabaf0d4753d6880ecbaddd8daa0164d49c1c61 /tools | |
parent | 1009aa1205c2c5e9101437dcadfa195708d863bf (diff) | |
parent | 28a1f3ac1d0c8558ee4453d9634dad891a6e922e (diff) | |
download | lwn-e61cf2e3a5b452cfefcb145021f5a8ea88735cc1.tar.gz lwn-e61cf2e3a5b452cfefcb145021f5a8ea88735cc1.zip |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull first set of KVM updates from Paolo Bonzini:
"PPC:
- minor code cleanups
x86:
- PCID emulation and CR3 caching for shadow page tables
- nested VMX live migration
- nested VMCS shadowing
- optimized IPI hypercall
- some optimizations
ARM will come next week"
* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (85 commits)
kvm: x86: Set highest physical address bits in non-present/reserved SPTEs
KVM/x86: Use CC_SET()/CC_OUT in arch/x86/kvm/vmx.c
KVM: X86: Implement PV IPIs in linux guest
KVM: X86: Add kvm hypervisor init time platform setup callback
KVM: X86: Implement "send IPI" hypercall
KVM/x86: Move X86_CR4_OSXSAVE check into kvm_valid_sregs()
KVM: x86: Skip pae_root shadow allocation if tdp enabled
KVM/MMU: Combine flushing remote tlb in mmu_set_spte()
KVM: vmx: skip VMWRITE of HOST_{FS,GS}_BASE when possible
KVM: vmx: skip VMWRITE of HOST_{FS,GS}_SEL when possible
KVM: vmx: always initialize HOST_{FS,GS}_BASE to zero during setup
KVM: vmx: move struct host_state usage to struct loaded_vmcs
KVM: vmx: compute need to reload FS/GS/LDT on demand
KVM: nVMX: remove a misleading comment regarding vmcs02 fields
KVM: vmx: rename __vmx_load_host_state() and vmx_save_host_state()
KVM: vmx: add dedicated utility to access guest's kernel_gs_base
KVM: vmx: track host_state.loaded using a loaded_vmcs pointer
KVM: vmx: refactor segmentation code in vmx_save_host_state()
kvm: nVMX: Fix fault priority for VMX operations
kvm: nVMX: Fix fault vector for VMX operation at CPL > 0
...
Diffstat (limited to 'tools')
-rw-r--r-- | tools/testing/selftests/kvm/.gitignore | 2 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/Makefile | 2 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/cr4_cpuid_sync_test.c | 129 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/include/kvm_util.h | 4 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/include/vmx.h | 66 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/include/x86.h | 8 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/lib/kvm_util.c | 94 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/lib/kvm_util_internal.h | 7 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/lib/vmx.c | 104 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/lib/x86.c | 256 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/state_test.c | 218 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/vmx_tsc_adjust_test.c | 69 |
12 files changed, 801 insertions, 158 deletions
diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 63fc1ab9248f..4202139d81d9 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -1,3 +1,5 @@ +cr4_cpuid_sync_test set_sregs_test sync_regs_test vmx_tsc_adjust_test +state_test diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index d9d00319b07c..dd0e5163f01f 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -9,6 +9,8 @@ LIBKVM_x86_64 = lib/x86.c lib/vmx.c TEST_GEN_PROGS_x86_64 = set_sregs_test TEST_GEN_PROGS_x86_64 += sync_regs_test TEST_GEN_PROGS_x86_64 += vmx_tsc_adjust_test +TEST_GEN_PROGS_x86_64 += cr4_cpuid_sync_test +TEST_GEN_PROGS_x86_64 += state_test TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M)) LIBKVM += $(LIBKVM_$(UNAME_M)) diff --git a/tools/testing/selftests/kvm/cr4_cpuid_sync_test.c b/tools/testing/selftests/kvm/cr4_cpuid_sync_test.c new file mode 100644 index 000000000000..8346b33c2073 --- /dev/null +++ b/tools/testing/selftests/kvm/cr4_cpuid_sync_test.c @@ -0,0 +1,129 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * CR4 and CPUID sync test + * + * Copyright 2018, Red Hat, Inc. and/or its affiliates. + * + * Author: + * Wei Huang <wei@redhat.com> + */ + +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> + +#include "test_util.h" + +#include "kvm_util.h" +#include "x86.h" + +#define X86_FEATURE_XSAVE (1<<26) +#define X86_FEATURE_OSXSAVE (1<<27) +#define VCPU_ID 1 + +enum { + GUEST_UPDATE_CR4 = 0x1000, + GUEST_FAILED, + GUEST_DONE, +}; + +static void exit_to_hv(uint16_t port) +{ + __asm__ __volatile__("in %[port], %%al" + : + : [port]"d"(port) + : "rax"); +} + +static inline bool cr4_cpuid_is_sync(void) +{ + int func, subfunc; + uint32_t eax, ebx, ecx, edx; + uint64_t cr4; + + func = 0x1; + subfunc = 0x0; + __asm__ __volatile__("cpuid" + : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx) + : "a"(func), "c"(subfunc)); + + cr4 = get_cr4(); + + return (!!(ecx & X86_FEATURE_OSXSAVE)) == (!!(cr4 & X86_CR4_OSXSAVE)); +} + +static void guest_code(void) +{ + uint64_t cr4; + + /* turn on CR4.OSXSAVE */ + cr4 = get_cr4(); + cr4 |= X86_CR4_OSXSAVE; + set_cr4(cr4); + + /* verify CR4.OSXSAVE == CPUID.OSXSAVE */ + if (!cr4_cpuid_is_sync()) + exit_to_hv(GUEST_FAILED); + + /* notify hypervisor to change CR4 */ + exit_to_hv(GUEST_UPDATE_CR4); + + /* check again */ + if (!cr4_cpuid_is_sync()) + exit_to_hv(GUEST_FAILED); + + exit_to_hv(GUEST_DONE); +} + +int main(int argc, char *argv[]) +{ + struct kvm_run *run; + struct kvm_vm *vm; + struct kvm_sregs sregs; + struct kvm_cpuid_entry2 *entry; + int rc; + + entry = kvm_get_supported_cpuid_entry(1); + if (!(entry->ecx & X86_FEATURE_XSAVE)) { + printf("XSAVE feature not supported, skipping test\n"); + return 0; + } + + /* Tell stdout not to buffer its content */ + setbuf(stdout, NULL); + + /* Create VM */ + vm = vm_create_default(VCPU_ID, guest_code); + vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); + run = vcpu_state(vm, VCPU_ID); + + while (1) { + rc = _vcpu_run(vm, VCPU_ID); + + if (run->exit_reason == KVM_EXIT_IO) { + switch (run->io.port) { + case GUEST_UPDATE_CR4: + /* emulate hypervisor clearing CR4.OSXSAVE */ + vcpu_sregs_get(vm, VCPU_ID, &sregs); + sregs.cr4 &= ~X86_CR4_OSXSAVE; + vcpu_sregs_set(vm, VCPU_ID, &sregs); + break; + case GUEST_FAILED: + TEST_ASSERT(false, "Guest CR4 bit (OSXSAVE) unsynchronized with CPUID bit."); + break; + case GUEST_DONE: + goto done; + default: + TEST_ASSERT(false, "Unknown port 0x%x.", + run->io.port); + } + } + } + + kvm_vm_free(vm); + +done: + return 0; +} diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 637b7017b6ee..d32632f71ab8 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -53,6 +53,8 @@ int kvm_check_cap(long cap); struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm); void kvm_vm_free(struct kvm_vm *vmp); +void kvm_vm_restart(struct kvm_vm *vmp, int perm); +void kvm_vm_release(struct kvm_vm *vmp); int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, const vm_vaddr_t gva, size_t len); @@ -75,7 +77,7 @@ void vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, unsigned long ioctl, void *arg); void vm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg); void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags); -void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid); +void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid, int pgd_memslot, int gdt_memslot); vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min, uint32_t data_memslot, uint32_t pgd_memslot); void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa); diff --git a/tools/testing/selftests/kvm/include/vmx.h b/tools/testing/selftests/kvm/include/vmx.h index 6ed8499807fd..b9ffe1024d3a 100644 --- a/tools/testing/selftests/kvm/include/vmx.h +++ b/tools/testing/selftests/kvm/include/vmx.h @@ -380,6 +380,30 @@ static inline int vmptrld(uint64_t vmcs_pa) return ret; } +static inline int vmptrst(uint64_t *value) +{ + uint64_t tmp; + uint8_t ret; + + __asm__ __volatile__("vmptrst %[value]; setna %[ret]" + : [value]"=m"(tmp), [ret]"=rm"(ret) + : : "cc", "memory"); + + *value = tmp; + return ret; +} + +/* + * A wrapper around vmptrst that ignores errors and returns zero if the + * vmptrst instruction fails. + */ +static inline uint64_t vmptrstz(void) +{ + uint64_t value = 0; + vmptrst(&value); + return value; +} + /* * No guest state (e.g. GPRs) is established by this vmlaunch. */ @@ -444,6 +468,15 @@ static inline int vmresume(void) return ret; } +static inline void vmcall(void) +{ + /* Currently, L1 destroys our GPRs during vmexits. */ + __asm__ __volatile__("push %%rbp; vmcall; pop %%rbp" : : : + "rax", "rbx", "rcx", "rdx", + "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", + "r13", "r14", "r15"); +} + static inline int vmread(uint64_t encoding, uint64_t *value) { uint64_t tmp; @@ -486,9 +519,34 @@ static inline uint32_t vmcs_revision(void) return rdmsr(MSR_IA32_VMX_BASIC); } -void prepare_for_vmx_operation(void); -void prepare_vmcs(void *guest_rip, void *guest_rsp); -struct kvm_vm *vm_create_default_vmx(uint32_t vcpuid, - vmx_guest_code_t guest_code); +struct vmx_pages { + void *vmxon_hva; + uint64_t vmxon_gpa; + void *vmxon; + + void *vmcs_hva; + uint64_t vmcs_gpa; + void *vmcs; + + void *msr_hva; + uint64_t msr_gpa; + void *msr; + + void *shadow_vmcs_hva; + uint64_t shadow_vmcs_gpa; + void *shadow_vmcs; + + void *vmread_hva; + uint64_t vmread_gpa; + void *vmread; + + void *vmwrite_hva; + uint64_t vmwrite_gpa; + void *vmwrite; +}; + +struct vmx_pages *vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva); +bool prepare_for_vmx_operation(struct vmx_pages *vmx); +void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp); #endif /* !SELFTEST_KVM_VMX_H */ diff --git a/tools/testing/selftests/kvm/include/x86.h b/tools/testing/selftests/kvm/include/x86.h index 4a5b2c4c1a0f..42c3596815b8 100644 --- a/tools/testing/selftests/kvm/include/x86.h +++ b/tools/testing/selftests/kvm/include/x86.h @@ -59,8 +59,8 @@ enum x86_register { struct desc64 { uint16_t limit0; uint16_t base0; - unsigned base1:8, type:5, dpl:2, p:1; - unsigned limit1:4, zero0:3, g:1, base2:8; + unsigned base1:8, s:1, type:4, dpl:2, p:1; + unsigned limit1:4, avl:1, l:1, db:1, g:1, base2:8; uint32_t base3; uint32_t zero1; } __attribute__((packed)); @@ -303,6 +303,10 @@ static inline unsigned long get_xmm(int n) return 0; } +struct kvm_x86_state; +struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid); +void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *state); + /* * Basic CPU control in CR0 */ diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 37e2a787d2fc..643309d6de74 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -62,6 +62,18 @@ int kvm_check_cap(long cap) return ret; } +static void vm_open(struct kvm_vm *vm, int perm) +{ + vm->kvm_fd = open(KVM_DEV_PATH, perm); + if (vm->kvm_fd < 0) + exit(KSFT_SKIP); + + /* Create VM. */ + vm->fd = ioctl(vm->kvm_fd, KVM_CREATE_VM, NULL); + TEST_ASSERT(vm->fd >= 0, "KVM_CREATE_VM ioctl failed, " + "rc: %i errno: %i", vm->fd, errno); +} + /* VM Create * * Input Args: @@ -90,16 +102,7 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) TEST_ASSERT(vm != NULL, "Insufficent Memory"); vm->mode = mode; - kvm_fd = open(KVM_DEV_PATH, perm); - if (kvm_fd < 0) - exit(KSFT_SKIP); - - /* Create VM. */ - vm->fd = ioctl(kvm_fd, KVM_CREATE_VM, NULL); - TEST_ASSERT(vm->fd >= 0, "KVM_CREATE_VM ioctl failed, " - "rc: %i errno: %i", vm->fd, errno); - - close(kvm_fd); + vm_open(vm, perm); /* Setup mode specific traits. */ switch (vm->mode) { @@ -132,6 +135,39 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) return vm; } +/* VM Restart + * + * Input Args: + * vm - VM that has been released before + * perm - permission + * + * Output Args: None + * + * Reopens the file descriptors associated to the VM and reinstates the + * global state, such as the irqchip and the memory regions that are mapped + * into the guest. + */ +void kvm_vm_restart(struct kvm_vm *vmp, int perm) +{ + struct userspace_mem_region *region; + + vm_open(vmp, perm); + if (vmp->has_irqchip) + vm_create_irqchip(vmp); + + for (region = vmp->userspace_mem_region_head; region; + region = region->next) { + int ret = ioctl(vmp->fd, KVM_SET_USER_MEMORY_REGION, ®ion->region); + TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n" + " rc: %i errno: %i\n" + " slot: %u flags: 0x%x\n" + " guest_phys_addr: 0x%lx size: 0x%lx", + ret, errno, region->region.slot, region->region.flags, + region->region.guest_phys_addr, + region->region.memory_size); + } +} + /* Userspace Memory Region Find * * Input Args: @@ -238,8 +274,12 @@ struct vcpu *vcpu_find(struct kvm_vm *vm, static void vm_vcpu_rm(struct kvm_vm *vm, uint32_t vcpuid) { struct vcpu *vcpu = vcpu_find(vm, vcpuid); + int ret; - int ret = close(vcpu->fd); + ret = munmap(vcpu->state, sizeof(*vcpu->state)); + TEST_ASSERT(ret == 0, "munmap of VCPU fd failed, rc: %i " + "errno: %i", ret, errno); + close(vcpu->fd); TEST_ASSERT(ret == 0, "Close of VCPU fd failed, rc: %i " "errno: %i", ret, errno); @@ -252,6 +292,23 @@ static void vm_vcpu_rm(struct kvm_vm *vm, uint32_t vcpuid) free(vcpu); } +void kvm_vm_release(struct kvm_vm *vmp) +{ + int ret; + + /* Free VCPUs. */ + while (vmp->vcpu_head) + vm_vcpu_rm(vmp, vmp->vcpu_head->id); + + /* Close file descriptor for the VM. */ + ret = close(vmp->fd); + TEST_ASSERT(ret == 0, "Close of vm fd failed,\n" + " vmp->fd: %i rc: %i errno: %i", vmp->fd, ret, errno); + + close(vmp->kvm_fd); + TEST_ASSERT(ret == 0, "Close of /dev/kvm fd failed,\n" + " vmp->kvm_fd: %i rc: %i errno: %i", vmp->kvm_fd, ret, errno); +} /* Destroys and frees the VM pointed to by vmp. */ @@ -282,18 +339,11 @@ void kvm_vm_free(struct kvm_vm *vmp) free(region); } - /* Free VCPUs. */ - while (vmp->vcpu_head) - vm_vcpu_rm(vmp, vmp->vcpu_head->id); - /* Free sparsebit arrays. */ sparsebit_free(&vmp->vpages_valid); sparsebit_free(&vmp->vpages_mapped); - /* Close file descriptor for the VM. */ - ret = close(vmp->fd); - TEST_ASSERT(ret == 0, "Close of vm fd failed,\n" - " vmp->fd: %i rc: %i errno: %i", vmp->fd, ret, errno); + kvm_vm_release(vmp); /* Free the structure describing the VM. */ free(vmp); @@ -701,7 +751,7 @@ static int vcpu_mmap_sz(void) * Creates and adds to the VM specified by vm and virtual CPU with * the ID given by vcpuid. */ -void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid) +void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid, int pgd_memslot, int gdt_memslot) { struct vcpu *vcpu; @@ -736,7 +786,7 @@ void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid) vcpu->next = vm->vcpu_head; vm->vcpu_head = vcpu; - vcpu_setup(vm, vcpuid); + vcpu_setup(vm, vcpuid, pgd_memslot, gdt_memslot); } /* VM Virtual Address Unused Gap @@ -957,6 +1007,8 @@ void vm_create_irqchip(struct kvm_vm *vm) ret = ioctl(vm->fd, KVM_CREATE_IRQCHIP, 0); TEST_ASSERT(ret == 0, "KVM_CREATE_IRQCHIP IOCTL failed, " "rc: %i errno: %i", ret, errno); + + vm->has_irqchip = true; } /* VM VCPU State diff --git a/tools/testing/selftests/kvm/lib/kvm_util_internal.h b/tools/testing/selftests/kvm/lib/kvm_util_internal.h index a0bd1980c81c..542ed606b338 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util_internal.h +++ b/tools/testing/selftests/kvm/lib/kvm_util_internal.h @@ -43,6 +43,7 @@ struct vcpu { struct kvm_vm { int mode; + int kvm_fd; int fd; unsigned int page_size; unsigned int page_shift; @@ -51,13 +52,17 @@ struct kvm_vm { struct userspace_mem_region *userspace_mem_region_head; struct sparsebit *vpages_valid; struct sparsebit *vpages_mapped; + + bool has_irqchip; bool pgd_created; vm_paddr_t pgd; + vm_vaddr_t gdt; + vm_vaddr_t tss; }; struct vcpu *vcpu_find(struct kvm_vm *vm, uint32_t vcpuid); -void vcpu_setup(struct kvm_vm *vm, int vcpuid); +void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_memslot); void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent); void regs_dump(FILE *stream, struct kvm_regs *regs, uint8_t indent); diff --git a/tools/testing/selftests/kvm/lib/vmx.c b/tools/testing/selftests/kvm/lib/vmx.c index 0231bc0aae7b..b987c3c970eb 100644 --- a/tools/testing/selftests/kvm/lib/vmx.c +++ b/tools/testing/selftests/kvm/lib/vmx.c @@ -13,47 +13,60 @@ #include "x86.h" #include "vmx.h" -/* Create a default VM for VMX tests. +/* Allocate memory regions for nested VMX tests. * * Input Args: - * vcpuid - The id of the single VCPU to add to the VM. - * guest_code - The vCPU's entry point + * vm - The VM to allocate guest-virtual addresses in. * - * Output Args: None + * Output Args: + * p_vmx_gva - The guest virtual address for the struct vmx_pages. * * Return: - * Pointer to opaque structure that describes the created VM. + * Pointer to structure with the addresses of the VMX areas. */ -struct kvm_vm * -vm_create_default_vmx(uint32_t vcpuid, vmx_guest_code_t guest_code) +struct vmx_pages * +vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva) { - struct kvm_cpuid2 *cpuid; - struct kvm_vm *vm; - vm_vaddr_t vmxon_vaddr; - vm_paddr_t vmxon_paddr; - vm_vaddr_t vmcs_vaddr; - vm_paddr_t vmcs_paddr; - - vm = vm_create_default(vcpuid, (void *) guest_code); - - /* Enable nesting in CPUID */ - vcpu_set_cpuid(vm, vcpuid, kvm_get_supported_cpuid()); + vm_vaddr_t vmx_gva = vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0); + struct vmx_pages *vmx = addr_gva2hva(vm, vmx_gva); /* Setup of a region of guest memory for the vmxon region. */ - vmxon_vaddr = vm_vaddr_alloc(vm, getpagesize(), 0, 0, 0); - vmxon_paddr = addr_gva2gpa(vm, vmxon_vaddr); + vmx->vmxon = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0); + vmx->vmxon_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmxon); + vmx->vmxon_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmxon); /* Setup of a region of guest memory for a vmcs. */ - vmcs_vaddr = vm_vaddr_alloc(vm, getpagesize(), 0, 0, 0); - vmcs_paddr = addr_gva2gpa(vm, vmcs_vaddr); + vmx->vmcs = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0); + vmx->vmcs_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmcs); + vmx->vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmcs); + + /* Setup of a region of guest memory for the MSR bitmap. */ + vmx->msr = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0); + vmx->msr_hva = addr_gva2hva(vm, (uintptr_t)vmx->msr); + vmx->msr_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->msr); + memset(vmx->msr_hva, 0, getpagesize()); - vcpu_args_set(vm, vcpuid, 4, vmxon_vaddr, vmxon_paddr, vmcs_vaddr, - vmcs_paddr); + /* Setup of a region of guest memory for the shadow VMCS. */ + vmx->shadow_vmcs = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0); + vmx->shadow_vmcs_hva = addr_gva2hva(vm, (uintptr_t)vmx->shadow_vmcs); + vmx->shadow_vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->shadow_vmcs); - return vm; + /* Setup of a region of guest memory for the VMREAD and VMWRITE bitmaps. */ + vmx->vmread = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0); + vmx->vmread_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmread); + vmx->vmread_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmread); + memset(vmx->vmread_hva, 0, getpagesize()); + + vmx->vmwrite = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0); + vmx->vmwrite_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmwrite); + vmx->vmwrite_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmwrite); + memset(vmx->vmwrite_hva, 0, getpagesize()); + + *p_vmx_gva = vmx_gva; + return vmx; } -void prepare_for_vmx_operation(void) +bool prepare_for_vmx_operation(struct vmx_pages *vmx) { uint64_t feature_control; uint64_t required; @@ -88,18 +101,42 @@ void prepare_for_vmx_operation(void) feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL); if ((feature_control & required) != required) wrmsr(MSR_IA32_FEATURE_CONTROL, feature_control | required); + + /* Enter VMX root operation. */ + *(uint32_t *)(vmx->vmxon) = vmcs_revision(); + if (vmxon(vmx->vmxon_gpa)) + return false; + + /* Load a VMCS. */ + *(uint32_t *)(vmx->vmcs) = vmcs_revision(); + if (vmclear(vmx->vmcs_gpa)) + return false; + + if (vmptrld(vmx->vmcs_gpa)) + return false; + + /* Setup shadow VMCS, do not load it yet. */ + *(uint32_t *)(vmx->shadow_vmcs) = vmcs_revision() | 0x80000000ul; + if (vmclear(vmx->shadow_vmcs_gpa)) + return false; + + return true; } /* * Initialize the control fields to the most basic settings possible. */ -static inline void init_vmcs_control_fields(void) +static inline void init_vmcs_control_fields(struct vmx_pages *vmx) { vmwrite(VIRTUAL_PROCESSOR_ID, 0); vmwrite(POSTED_INTR_NV, 0); - vmwrite(PIN_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_PINBASED_CTLS)); - vmwrite(CPU_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_PROCBASED_CTLS)); + vmwrite(PIN_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_TRUE_PINBASED_CTLS)); + if (!vmwrite(SECONDARY_VM_EXEC_CONTROL, 0)) + vmwrite(CPU_BASED_VM_EXEC_CONTROL, + rdmsr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS) | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS); + else + vmwrite(CPU_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS)); vmwrite(EXCEPTION_BITMAP, 0); vmwrite(PAGE_FAULT_ERROR_CODE_MASK, 0); vmwrite(PAGE_FAULT_ERROR_CODE_MATCH, -1); /* Never match */ @@ -113,12 +150,15 @@ static inline void init_vmcs_control_fields(void) vmwrite(VM_ENTRY_MSR_LOAD_COUNT, 0); vmwrite(VM_ENTRY_INTR_INFO_FIELD, 0); vmwrite(TPR_THRESHOLD, 0); - vmwrite(SECONDARY_VM_EXEC_CONTROL, 0); vmwrite(CR0_GUEST_HOST_MASK, 0); vmwrite(CR4_GUEST_HOST_MASK, 0); vmwrite(CR0_READ_SHADOW, get_cr0()); vmwrite(CR4_READ_SHADOW, get_cr4()); + + vmwrite(MSR_BITMAP, vmx->msr_gpa); + vmwrite(VMREAD_BITMAP, vmx->vmread_gpa); + vmwrite(VMWRITE_BITMAP, vmx->vmwrite_gpa); } /* @@ -235,9 +275,9 @@ static inline void init_vmcs_guest_state(void *rip, void *rsp) vmwrite(GUEST_SYSENTER_EIP, vmreadz(HOST_IA32_SYSENTER_EIP)); } -void prepare_vmcs(void *guest_rip, void *guest_rsp) +void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp) { - init_vmcs_control_fields(); + init_vmcs_control_fields(vmx); init_vmcs_host_state(); init_vmcs_guest_state(guest_rip, guest_rsp); } diff --git a/tools/testing/selftests/kvm/lib/x86.c b/tools/testing/selftests/kvm/lib/x86.c index 2f17675f4275..e38345252df5 100644 --- a/tools/testing/selftests/kvm/lib/x86.c +++ b/tools/testing/selftests/kvm/lib/x86.c @@ -239,25 +239,6 @@ void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot) vm_paddr_t paddr = vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot); vm->pgd = paddr; - - /* Set pointer to pgd tables in all the VCPUs that - * have already been created. Future VCPUs will have - * the value set as each one is created. - */ - for (struct vcpu *vcpu = vm->vcpu_head; vcpu; - vcpu = vcpu->next) { - struct kvm_sregs sregs; - - /* Obtain the current system register settings */ - vcpu_sregs_get(vm, vcpu->id, &sregs); - - /* Set and store the pointer to the start of the - * pgd tables. - */ - sregs.cr3 = vm->pgd; - vcpu_sregs_set(vm, vcpu->id, &sregs); - } - vm->pgd_created = true; } } @@ -460,9 +441,32 @@ static void kvm_seg_set_unusable(struct kvm_segment *segp) segp->unusable = true; } +static void kvm_seg_fill_gdt_64bit(struct kvm_vm *vm, struct kvm_segment *segp) +{ + void *gdt = addr_gva2hva(vm, vm->gdt); + struct desc64 *desc = gdt + (segp->selector >> 3) * 8; + + desc->limit0 = segp->limit & 0xFFFF; + desc->base0 = segp->base & 0xFFFF; + desc->base1 = segp->base >> 16; + desc->s = segp->s; + desc->type = segp->type; + desc->dpl = segp->dpl; + desc->p = segp->present; + desc->limit1 = segp->limit >> 16; + desc->l = segp->l; + desc->db = segp->db; + desc->g = segp->g; + desc->base2 = segp->base >> 24; + if (!segp->s) + desc->base3 = segp->base >> 32; +} + + /* Set Long Mode Flat Kernel Code Segment * * Input Args: + * vm - VM whose GDT is being filled, or NULL to only write segp * selector - selector value * * Output Args: @@ -473,7 +477,7 @@ static void kvm_seg_set_unusable(struct kvm_segment *segp) * Sets up the KVM segment pointed to by segp, to be a code segment * with the selector value given by selector. */ -static void kvm_seg_set_kernel_code_64bit(uint16_t selector, +static void kvm_seg_set_kernel_code_64bit(struct kvm_vm *vm, uint16_t selector, struct kvm_segment *segp) { memset(segp, 0, sizeof(*segp)); @@ -486,11 +490,14 @@ static void kvm_seg_set_kernel_code_64bit(uint16_t selector, segp->g = true; segp->l = true; segp->present = 1; + if (vm) + kvm_seg_fill_gdt_64bit(vm, segp); } /* Set Long Mode Flat Kernel Data Segment * * Input Args: + * vm - VM whose GDT is being filled, or NULL to only write segp * selector - selector value * * Output Args: @@ -501,7 +508,7 @@ static void kvm_seg_set_kernel_code_64bit(uint16_t selector, * Sets up the KVM segment pointed to by segp, to be a data segment * with the selector value given by selector. */ -static void kvm_seg_set_kernel_data_64bit(uint16_t selector, +static void kvm_seg_set_kernel_data_64bit(struct kvm_vm *vm, uint16_t selector, struct kvm_segment *segp) { memset(segp, 0, sizeof(*segp)); @@ -513,6 +520,8 @@ static void kvm_seg_set_kernel_data_64bit(uint16_t selector, */ segp->g = true; segp->present = true; + if (vm) + kvm_seg_fill_gdt_64bit(vm, segp); } /* Address Guest Virtual to Guest Physical @@ -575,13 +584,45 @@ unmapped_gva: "gva: 0x%lx", gva); } -void vcpu_setup(struct kvm_vm *vm, int vcpuid) +static void kvm_setup_gdt(struct kvm_vm *vm, struct kvm_dtable *dt, int gdt_memslot, + int pgd_memslot) +{ + if (!vm->gdt) + vm->gdt = vm_vaddr_alloc(vm, getpagesize(), + KVM_UTIL_MIN_VADDR, gdt_memslot, pgd_memslot); + + dt->base = vm->gdt; + dt->limit = getpagesize(); +} + +static void kvm_setup_tss_64bit(struct kvm_vm *vm, struct kvm_segment *segp, + int selector, int gdt_memslot, + int pgd_memslot) +{ + if (!vm->tss) + vm->tss = vm_vaddr_alloc(vm, getpagesize(), + KVM_UTIL_MIN_VADDR, gdt_memslot, pgd_memslot); + + memset(segp, 0, sizeof(*segp)); + segp->base = vm->tss; + segp->limit = 0x67; + segp->selector = selector; + segp->type = 0xb; + segp->present = 1; + kvm_seg_fill_gdt_64bit(vm, segp); +} + +void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_memslot) { struct kvm_sregs sregs; /* Set mode specific system register values. */ vcpu_sregs_get(vm, vcpuid, &sregs); + sregs.idt.limit = 0; + + kvm_setup_gdt(vm, &sregs.gdt, gdt_memslot, pgd_memslot); + switch (vm->mode) { case VM_MODE_FLAT48PG: sregs.cr0 = X86_CR0_PE | X86_CR0_NE | X86_CR0_PG; @@ -589,30 +630,18 @@ void vcpu_setup(struct kvm_vm *vm, int vcpuid) sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX); kvm_seg_set_unusable(&sregs.ldt); - kvm_seg_set_kernel_code_64bit(0x8, &sregs.cs); - kvm_seg_set_kernel_data_64bit(0x10, &sregs.ds); - kvm_seg_set_kernel_data_64bit(0x10, &sregs.es); + kvm_seg_set_kernel_code_64bit(vm, 0x8, &sregs.cs); + kvm_seg_set_kernel_data_64bit(vm, 0x10, &sregs.ds); + kvm_seg_set_kernel_data_64bit(vm, 0x10, &sregs.es); + kvm_setup_tss_64bit(vm, &sregs.tr, 0x18, gdt_memslot, pgd_memslot); break; default: TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", vm->mode); } - vcpu_sregs_set(vm, vcpuid, &sregs); - /* If virtual translation table have been setup, set system register - * to point to the tables. It's okay if they haven't been setup yet, - * in that the code that sets up the virtual translation tables, will - * go back through any VCPUs that have already been created and set - * their values. - */ - if (vm->pgd_created) { - struct kvm_sregs sregs; - - vcpu_sregs_get(vm, vcpuid, &sregs); - - sregs.cr3 = vm->pgd; - vcpu_sregs_set(vm, vcpuid, &sregs); - } + sregs.cr3 = vm->pgd; + vcpu_sregs_set(vm, vcpuid, &sregs); } /* Adds a vCPU with reasonable defaults (i.e., a stack) * @@ -629,7 +658,7 @@ void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code) DEFAULT_GUEST_STACK_VADDR_MIN, 0, 0); /* Create VCPU */ - vm_vcpu_add(vm, vcpuid); + vm_vcpu_add(vm, vcpuid, 0, 0); /* Setup guest general purpose registers */ vcpu_regs_get(vm, vcpuid, ®s); @@ -698,3 +727,148 @@ struct kvm_vm *vm_create_default(uint32_t vcpuid, void *guest_code) return vm; } + +struct kvm_x86_state { + struct kvm_vcpu_events events; + struct kvm_mp_state mp_state; + struct kvm_regs regs; + struct kvm_xsave xsave; + struct kvm_xcrs xcrs; + struct kvm_sregs sregs; + struct kvm_debugregs debugregs; + union { + struct kvm_nested_state nested; + char nested_[16384]; + }; + struct kvm_msrs msrs; +}; + +static int kvm_get_num_msrs(struct kvm_vm *vm) +{ + struct kvm_msr_list nmsrs; + int r; + + nmsrs.nmsrs = 0; + r = ioctl(vm->kvm_fd, KVM_GET_MSR_INDEX_LIST, &nmsrs); + TEST_ASSERT(r == -1 && errno == E2BIG, "Unexpected result from KVM_GET_MSR_INDEX_LIST probe, r: %i", + r); + + return nmsrs.nmsrs; +} + +struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid) +{ + struct vcpu *vcpu = vcpu_find(vm, vcpuid); + struct kvm_msr_list *list; + struct kvm_x86_state *state; + int nmsrs, r, i; + static int nested_size = -1; + + if (nested_size == -1) { + nested_size = kvm_check_cap(KVM_CAP_NESTED_STATE); + TEST_ASSERT(nested_size <= sizeof(state->nested_), + "Nested state size too big, %i > %zi", + nested_size, sizeof(state->nested_)); + } + + nmsrs = kvm_get_num_msrs(vm); + list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0])); + list->nmsrs = nmsrs; + r = ioctl(vm->kvm_fd, KVM_GET_MSR_INDEX_LIST, list); + TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MSR_INDEX_LIST, r: %i", + r); + + state = malloc(sizeof(*state) + nmsrs * sizeof(state->msrs.entries[0])); + r = ioctl(vcpu->fd, KVM_GET_VCPU_EVENTS, &state->events); + TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_VCPU_EVENTS, r: %i", + r); + + r = ioctl(vcpu->fd, KVM_GET_MP_STATE, &state->mp_state); + TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MP_STATE, r: %i", + r); + + r = ioctl(vcpu->fd, KVM_GET_REGS, &state->regs); + TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_REGS, r: %i", + r); + + r = ioctl(vcpu->fd, KVM_GET_XSAVE, &state->xsave); + TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_XSAVE, r: %i", + r); + + r = ioctl(vcpu->fd, KVM_GET_XCRS, &state->xcrs); + TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_XCRS, r: %i", + r); + + r = ioctl(vcpu->fd, KVM_GET_SREGS, &state->sregs); + TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_SREGS, r: %i", + r); + + if (nested_size) { + state->nested.size = sizeof(state->nested_); + r = ioctl(vcpu->fd, KVM_GET_NESTED_STATE, &state->nested); + TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_NESTED_STATE, r: %i", + r); + TEST_ASSERT(state->nested.size <= nested_size, + "Nested state size too big, %i (KVM_CHECK_CAP gave %i)", + state->nested.size, nested_size); + } else + state->nested.size = 0; + + state->msrs.nmsrs = nmsrs; + for (i = 0; i < nmsrs; i++) + state->msrs.entries[i].index = list->indices[i]; + r = ioctl(vcpu->fd, KVM_GET_MSRS, &state->msrs); + TEST_ASSERT(r == nmsrs, "Unexpected result from KVM_GET_MSRS, r: %i (failed at %x)", + r, r == nmsrs ? -1 : list->indices[r]); + + r = ioctl(vcpu->fd, KVM_GET_DEBUGREGS, &state->debugregs); + TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_DEBUGREGS, r: %i", + r); + + free(list); + return state; +} + +void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *state) +{ + struct vcpu *vcpu = vcpu_find(vm, vcpuid); + int r; + + if (state->nested.size) { + r = ioctl(vcpu->fd, KVM_SET_NESTED_STATE, &state->nested); + TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_NESTED_STATE, r: %i", + r); + } + + r = ioctl(vcpu->fd, KVM_SET_XSAVE, &state->xsave); + TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XSAVE, r: %i", + r); + + r = ioctl(vcpu->fd, KVM_SET_XCRS, &state->xcrs); + TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XCRS, r: %i", + r); + + r = ioctl(vcpu->fd, KVM_SET_SREGS, &state->sregs); + TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_SREGS, r: %i", + r); + + r = ioctl(vcpu->fd, KVM_SET_MSRS, &state->msrs); + TEST_ASSERT(r == state->msrs.nmsrs, "Unexpected result from KVM_SET_MSRS, r: %i (failed at %x)", + r, r == state->msrs.nmsrs ? -1 : state->msrs.entries[r].index); + + r = ioctl(vcpu->fd, KVM_SET_VCPU_EVENTS, &state->events); + TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_VCPU_EVENTS, r: %i", + r); + + r = ioctl(vcpu->fd, KVM_SET_MP_STATE, &state->mp_state); + TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_MP_STATE, r: %i", + r); + + r = ioctl(vcpu->fd, KVM_SET_DEBUGREGS, &state->debugregs); + TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_DEBUGREGS, r: %i", + r); + + r = ioctl(vcpu->fd, KVM_SET_REGS, &state->regs); + TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_REGS, r: %i", + r); +} diff --git a/tools/testing/selftests/kvm/state_test.c b/tools/testing/selftests/kvm/state_test.c new file mode 100644 index 000000000000..ecabf25b7077 --- /dev/null +++ b/tools/testing/selftests/kvm/state_test.c @@ -0,0 +1,218 @@ +/* + * KVM_GET/SET_* tests + * + * Copyright (C) 2018, Red Hat, Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2. + * + * Tests for vCPU state save/restore, including nested guest state. + */ +#define _GNU_SOURCE /* for program_invocation_short_name */ +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> + +#include "test_util.h" + +#include "kvm_util.h" +#include "x86.h" +#include "vmx.h" + +#define VCPU_ID 5 +#define PORT_SYNC 0x1000 +#define PORT_ABORT 0x1001 +#define PORT_DONE 0x1002 + +static inline void __exit_to_l0(uint16_t port, uint64_t arg0, uint64_t arg1) +{ + __asm__ __volatile__("in %[port], %%al" + : + : [port]"d"(port), "D"(arg0), "S"(arg1) + : "rax"); +} + +#define exit_to_l0(_port, _arg0, _arg1) \ + __exit_to_l0(_port, (uint64_t) (_arg0), (uint64_t) (_arg1)) + +#define GUEST_ASSERT(_condition) do { \ + if (!(_condition)) \ + exit_to_l0(PORT_ABORT, "Failed guest assert: " #_condition, __LINE__);\ +} while (0) + +#define GUEST_SYNC(stage) \ + exit_to_l0(PORT_SYNC, "hello", stage); + +static bool have_nested_state; + +void l2_guest_code(void) +{ + GUEST_SYNC(5); + + /* Exit to L1 */ + vmcall(); + + /* L1 has now set up a shadow VMCS for us. */ + GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee); + GUEST_SYNC(9); + GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee); + GUEST_ASSERT(!vmwrite(GUEST_RIP, 0xc0fffee)); + GUEST_SYNC(10); + GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0fffee); + GUEST_ASSERT(!vmwrite(GUEST_RIP, 0xc0ffffee)); + GUEST_SYNC(11); + + /* Done, exit to L1 and never come back. */ + vmcall(); +} + +void l1_guest_code(struct vmx_pages *vmx_pages) +{ +#define L2_GUEST_STACK_SIZE 64 + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + + GUEST_ASSERT(vmx_pages->vmcs_gpa); + GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); + GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa); + + GUEST_SYNC(3); + GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa); + + prepare_vmcs(vmx_pages, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + GUEST_SYNC(4); + GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa); + GUEST_ASSERT(!vmlaunch()); + GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + + /* Check that the launched state is preserved. */ + GUEST_ASSERT(vmlaunch()); + + GUEST_ASSERT(!vmresume()); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + + GUEST_SYNC(6); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + + GUEST_ASSERT(!vmresume()); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + + vmwrite(GUEST_RIP, vmreadz(GUEST_RIP) + 3); + + vmwrite(SECONDARY_VM_EXEC_CONTROL, SECONDARY_EXEC_SHADOW_VMCS); + vmwrite(VMCS_LINK_POINTER, vmx_pages->shadow_vmcs_gpa); + + GUEST_ASSERT(!vmptrld(vmx_pages->shadow_vmcs_gpa)); + GUEST_ASSERT(vmlaunch()); + GUEST_SYNC(7); + GUEST_ASSERT(vmlaunch()); + GUEST_ASSERT(vmresume()); + + vmwrite(GUEST_RIP, 0xc0ffee); + GUEST_SYNC(8); + GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee); + + GUEST_ASSERT(!vmptrld(vmx_pages->vmcs_gpa)); + GUEST_ASSERT(!vmresume()); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + + GUEST_ASSERT(!vmptrld(vmx_pages->shadow_vmcs_gpa)); + GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffffee); + GUEST_ASSERT(vmlaunch()); + GUEST_ASSERT(vmresume()); + GUEST_SYNC(12); + GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffffee); + GUEST_ASSERT(vmlaunch()); + GUEST_ASSERT(vmresume()); +} + +void guest_code(struct vmx_pages *vmx_pages) +{ + GUEST_SYNC(1); + GUEST_SYNC(2); + + if (vmx_pages) + l1_guest_code(vmx_pages); + + exit_to_l0(PORT_DONE, 0, 0); +} + +int main(int argc, char *argv[]) +{ + struct vmx_pages *vmx_pages = NULL; + vm_vaddr_t vmx_pages_gva = 0; + + struct kvm_regs regs1, regs2; + struct kvm_vm *vm; + struct kvm_run *run; + struct kvm_x86_state *state; + int stage; + + struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1); + + /* Create VM */ + vm = vm_create_default(VCPU_ID, guest_code); + vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); + run = vcpu_state(vm, VCPU_ID); + + vcpu_regs_get(vm, VCPU_ID, ®s1); + + if (kvm_check_cap(KVM_CAP_NESTED_STATE)) { + vmx_pages = vcpu_alloc_vmx(vm, &vmx_pages_gva); + vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva); + } else { + printf("will skip nested state checks\n"); + vcpu_args_set(vm, VCPU_ID, 1, 0); + } + + for (stage = 1;; stage++) { + _vcpu_run(vm, VCPU_ID); + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Unexpected exit reason: %u (%s),\n", + run->exit_reason, + exit_reason_str(run->exit_reason)); + + memset(®s1, 0, sizeof(regs1)); + vcpu_regs_get(vm, VCPU_ID, ®s1); + switch (run->io.port) { + case PORT_ABORT: + TEST_ASSERT(false, "%s at %s:%d", (const char *) regs1.rdi, + __FILE__, regs1.rsi); + /* NOT REACHED */ + case PORT_SYNC: + break; + case PORT_DONE: + goto done; + default: + TEST_ASSERT(false, "Unknown port 0x%x.", run->io.port); + } + + /* PORT_SYNC is handled here. */ + TEST_ASSERT(!strcmp((const char *)regs1.rdi, "hello") && + regs1.rsi == stage, "Unexpected register values vmexit #%lx, got %lx", + stage, (ulong) regs1.rsi); + + state = vcpu_save_state(vm, VCPU_ID); + kvm_vm_release(vm); + + /* Restore state in a new VM. */ + kvm_vm_restart(vm, O_RDWR); + vm_vcpu_add(vm, VCPU_ID, 0, 0); + vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); + vcpu_load_state(vm, VCPU_ID, state); + run = vcpu_state(vm, VCPU_ID); + free(state); + + memset(®s2, 0, sizeof(regs2)); + vcpu_regs_get(vm, VCPU_ID, ®s2); + TEST_ASSERT(!memcmp(®s1, ®s2, sizeof(regs2)), + "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx", + (ulong) regs2.rdi, (ulong) regs2.rsi); + } + +done: + kvm_vm_free(vm); +} diff --git a/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c index d7cb7944a42e..fc414c284368 100644 --- a/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c +++ b/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c @@ -46,11 +46,6 @@ enum { PORT_DONE, }; -struct vmx_page { - vm_vaddr_t virt; - vm_paddr_t phys; -}; - enum { VMXON_PAGE = 0, VMCS_PAGE, @@ -67,9 +62,6 @@ struct kvm_single_msr { /* The virtual machine object. */ static struct kvm_vm *vm; -/* Array of vmx_page descriptors that is shared with the guest. */ -struct vmx_page *vmx_pages; - #define exit_to_l0(_port, _arg) do_exit_to_l0(_port, (unsigned long) (_arg)) static void do_exit_to_l0(uint16_t port, unsigned long arg) { @@ -105,7 +97,7 @@ static void l2_guest_code(void) __asm__ __volatile__("vmcall"); } -static void l1_guest_code(struct vmx_page *vmx_pages) +static void l1_guest_code(struct vmx_pages *vmx_pages) { #define L2_GUEST_STACK_SIZE 64 unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; @@ -116,23 +108,14 @@ static void l1_guest_code(struct vmx_page *vmx_pages) wrmsr(MSR_IA32_TSC, rdtsc() - TSC_ADJUST_VALUE); check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE); - prepare_for_vmx_operation(); - - /* Enter VMX root operation. */ - *(uint32_t *)vmx_pages[VMXON_PAGE].virt = vmcs_revision(); - GUEST_ASSERT(!vmxon(vmx_pages[VMXON_PAGE].phys)); - - /* Load a VMCS. */ - *(uint32_t *)vmx_pages[VMCS_PAGE].virt = vmcs_revision(); - GUEST_ASSERT(!vmclear(vmx_pages[VMCS_PAGE].phys)); - GUEST_ASSERT(!vmptrld(vmx_pages[VMCS_PAGE].phys)); + GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); /* Prepare the VMCS for L2 execution. */ - prepare_vmcs(l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]); + prepare_vmcs(vmx_pages, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); control = vmreadz(CPU_BASED_VM_EXEC_CONTROL); control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETING; vmwrite(CPU_BASED_VM_EXEC_CONTROL, control); - vmwrite(MSR_BITMAP, vmx_pages[MSR_BITMAP_PAGE].phys); vmwrite(TSC_OFFSET, TSC_OFFSET_VALUE); /* Jump into L2. First, test failure to load guest CR3. */ @@ -152,33 +135,6 @@ static void l1_guest_code(struct vmx_page *vmx_pages) exit_to_l0(PORT_DONE, 0); } -static void allocate_vmx_page(struct vmx_page *page) -{ - vm_vaddr_t virt; - - virt = vm_vaddr_alloc(vm, PAGE_SIZE, 0, 0, 0); - memset(addr_gva2hva(vm, virt), 0, PAGE_SIZE); - - page->virt = virt; - page->phys = addr_gva2gpa(vm, virt); -} - -static vm_vaddr_t allocate_vmx_pages(void) -{ - vm_vaddr_t vmx_pages_vaddr; - int i; - - vmx_pages_vaddr = vm_vaddr_alloc( - vm, sizeof(struct vmx_page) * NUM_VMX_PAGES, 0, 0, 0); - - vmx_pages = (void *) addr_gva2hva(vm, vmx_pages_vaddr); - - for (i = 0; i < NUM_VMX_PAGES; i++) - allocate_vmx_page(&vmx_pages[i]); - - return vmx_pages_vaddr; -} - void report(int64_t val) { printf("IA32_TSC_ADJUST is %ld (%lld * TSC_ADJUST_VALUE + %lld).\n", @@ -187,7 +143,8 @@ void report(int64_t val) int main(int argc, char *argv[]) { - vm_vaddr_t vmx_pages_vaddr; + struct vmx_pages *vmx_pages; + vm_vaddr_t vmx_pages_gva; struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1); if (!(entry->ecx & CPUID_VMX)) { @@ -195,23 +152,23 @@ int main(int argc, char *argv[]) exit(KSFT_SKIP); } - vm = vm_create_default_vmx(VCPU_ID, (void *) l1_guest_code); + vm = vm_create_default(VCPU_ID, (void *) l1_guest_code); + vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); /* Allocate VMX pages and shared descriptors (vmx_pages). */ - vmx_pages_vaddr = allocate_vmx_pages(); - vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_vaddr); + vmx_pages = vcpu_alloc_vmx(vm, &vmx_pages_gva); + vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva); for (;;) { volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID); struct kvm_regs regs; vcpu_run(vm, VCPU_ID); + vcpu_regs_get(vm, VCPU_ID, ®s); TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, - "Got exit_reason other than KVM_EXIT_IO: %u (%s),\n", + "Got exit_reason other than KVM_EXIT_IO: %u (%s), rip=%lx\n", run->exit_reason, - exit_reason_str(run->exit_reason)); - - vcpu_regs_get(vm, VCPU_ID, ®s); + exit_reason_str(run->exit_reason), regs.rip); switch (run->io.port) { case PORT_ABORT: |