diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-06-20 13:50:37 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-06-20 13:50:37 -0700 |
commit | b3e978337b25b042aa653652a029e3d798814c12 (patch) | |
tree | f03c4b0b6d9eadb4cef3daa8c1f9920415cdcefe | |
parent | e929387449cf631e96840296a01922be1ef3c832 (diff) | |
parent | b21e31b253048b7f9768ca7cc270e67765fd6ba2 (diff) | |
download | lwn-b3e978337b25b042aa653652a029e3d798814c12.tar.gz lwn-b3e978337b25b042aa653652a029e3d798814c12.zip |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm fixes from Paolo Bonzini:
"Fixes for ARM and x86, plus selftest patches and nicer structs for
nested state save/restore"
* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
KVM: nVMX: reorganize initial steps of vmx_set_nested_state
KVM: arm/arm64: Fix emulated ptimer irq injection
tests: kvm: Check for a kernel warning
kvm: tests: Sort tests in the Makefile alphabetically
KVM: x86/mmu: Allocate PAE root array when using SVM's 32-bit NPT
KVM: x86: Modify struct kvm_nested_state to have explicit fields for data
KVM: fix typo in documentation
KVM: nVMX: use correct clean fields when copying from eVMCS
KVM: arm/arm64: vgic: Fix kvm_device leak in vgic_its_destroy
KVM: arm64: Filter out invalid core register IDs in KVM_GET_REG_LIST
KVM: arm64: Implement vq_present() as a macro
-rw-r--r-- | Documentation/virtual/kvm/api.txt | 48 | ||||
-rw-r--r-- | arch/arm64/kvm/guest.c | 65 | ||||
-rw-r--r-- | arch/x86/include/uapi/asm/kvm.h | 33 | ||||
-rw-r--r-- | arch/x86/kvm/mmu.c | 16 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/nested.c | 103 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/vmcs12.h | 5 | ||||
-rw-r--r-- | tools/arch/x86/include/uapi/asm/kvm.h | 2 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/.gitignore | 1 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/Makefile | 21 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/include/kvm_util.h | 2 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/include/x86_64/processor.h | 2 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/lib/kvm_util.c | 36 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/lib/x86_64/processor.c | 16 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/x86_64/mmio_warning_test.c | 126 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c | 68 | ||||
-rw-r--r-- | virt/kvm/arm/arch_timer.c | 5 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-its.c | 1 |
17 files changed, 405 insertions, 145 deletions
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index ba6c42c576dd..2a4531bb06bd 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1079,7 +1079,7 @@ yet and must be cleared on entry. 4.35 KVM_SET_USER_MEMORY_REGION -Capability: KVM_CAP_USER_MEM +Capability: KVM_CAP_USER_MEMORY Architectures: all Type: vm ioctl Parameters: struct kvm_userspace_memory_region (in) @@ -3857,43 +3857,59 @@ Type: vcpu ioctl Parameters: struct kvm_nested_state (in/out) Returns: 0 on success, -1 on error Errors: - E2BIG: the total state size (including the fixed-size part of struct - kvm_nested_state) exceeds the value of 'size' specified by + E2BIG: the total state size exceeds the value of 'size' specified by the user; the size required will be written into size. struct kvm_nested_state { __u16 flags; __u16 format; __u32 size; + union { - struct kvm_vmx_nested_state vmx; - struct kvm_svm_nested_state svm; + struct kvm_vmx_nested_state_hdr vmx; + struct kvm_svm_nested_state_hdr svm; + + /* Pad the header to 128 bytes. */ __u8 pad[120]; - }; - __u8 data[0]; + } hdr; + + union { + struct kvm_vmx_nested_state_data vmx[0]; + struct kvm_svm_nested_state_data svm[0]; + } data; }; #define KVM_STATE_NESTED_GUEST_MODE 0x00000001 #define KVM_STATE_NESTED_RUN_PENDING 0x00000002 +#define KVM_STATE_NESTED_EVMCS 0x00000004 -#define KVM_STATE_NESTED_SMM_GUEST_MODE 0x00000001 -#define KVM_STATE_NESTED_SMM_VMXON 0x00000002 +#define KVM_STATE_NESTED_FORMAT_VMX 0 +#define KVM_STATE_NESTED_FORMAT_SVM 1 -struct kvm_vmx_nested_state { +#define KVM_STATE_NESTED_VMX_VMCS_SIZE 0x1000 + +#define KVM_STATE_NESTED_VMX_SMM_GUEST_MODE 0x00000001 +#define KVM_STATE_NESTED_VMX_SMM_VMXON 0x00000002 + +struct kvm_vmx_nested_state_hdr { __u64 vmxon_pa; - __u64 vmcs_pa; + __u64 vmcs12_pa; struct { __u16 flags; } smm; }; +struct kvm_vmx_nested_state_data { + __u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE]; + __u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE]; +}; + This ioctl copies the vcpu's nested virtualization state from the kernel to userspace. -The maximum size of the state, including the fixed-size part of struct -kvm_nested_state, can be retrieved by passing KVM_CAP_NESTED_STATE to -the KVM_CHECK_EXTENSION ioctl(). +The maximum size of the state can be retrieved by passing KVM_CAP_NESTED_STATE +to the KVM_CHECK_EXTENSION ioctl(). 4.115 KVM_SET_NESTED_STATE @@ -3903,8 +3919,8 @@ Type: vcpu ioctl Parameters: struct kvm_nested_state (in) Returns: 0 on success, -1 on error -This copies the vcpu's kvm_nested_state struct from userspace to the kernel. For -the definition of struct kvm_nested_state, see KVM_GET_NESTED_STATE. +This copies the vcpu's kvm_nested_state struct from userspace to the kernel. +For the definition of struct kvm_nested_state, see KVM_GET_NESTED_STATE. 4.116 KVM_(UN)REGISTER_COALESCED_MMIO diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 3ae2f82fca46..c8aa00179363 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -70,10 +70,8 @@ static u64 core_reg_offset_from_id(u64 id) return id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_CORE); } -static int validate_core_offset(const struct kvm_vcpu *vcpu, - const struct kvm_one_reg *reg) +static int core_reg_size_from_offset(const struct kvm_vcpu *vcpu, u64 off) { - u64 off = core_reg_offset_from_id(reg->id); int size; switch (off) { @@ -103,8 +101,7 @@ static int validate_core_offset(const struct kvm_vcpu *vcpu, return -EINVAL; } - if (KVM_REG_SIZE(reg->id) != size || - !IS_ALIGNED(off, size / sizeof(__u32))) + if (!IS_ALIGNED(off, size / sizeof(__u32))) return -EINVAL; /* @@ -115,6 +112,21 @@ static int validate_core_offset(const struct kvm_vcpu *vcpu, if (vcpu_has_sve(vcpu) && core_reg_offset_is_vreg(off)) return -EINVAL; + return size; +} + +static int validate_core_offset(const struct kvm_vcpu *vcpu, + const struct kvm_one_reg *reg) +{ + u64 off = core_reg_offset_from_id(reg->id); + int size = core_reg_size_from_offset(vcpu, off); + + if (size < 0) + return -EINVAL; + + if (KVM_REG_SIZE(reg->id) != size) + return -EINVAL; + return 0; } @@ -207,13 +219,7 @@ out: #define vq_word(vq) (((vq) - SVE_VQ_MIN) / 64) #define vq_mask(vq) ((u64)1 << ((vq) - SVE_VQ_MIN) % 64) - -static bool vq_present( - const u64 (*const vqs)[KVM_ARM64_SVE_VLS_WORDS], - unsigned int vq) -{ - return (*vqs)[vq_word(vq)] & vq_mask(vq); -} +#define vq_present(vqs, vq) ((vqs)[vq_word(vq)] & vq_mask(vq)) static int get_sve_vls(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) { @@ -258,7 +264,7 @@ static int set_sve_vls(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) max_vq = 0; for (vq = SVE_VQ_MIN; vq <= SVE_VQ_MAX; ++vq) - if (vq_present(&vqs, vq)) + if (vq_present(vqs, vq)) max_vq = vq; if (max_vq > sve_vq_from_vl(kvm_sve_max_vl)) @@ -272,7 +278,7 @@ static int set_sve_vls(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) * maximum: */ for (vq = SVE_VQ_MIN; vq <= max_vq; ++vq) - if (vq_present(&vqs, vq) != sve_vq_available(vq)) + if (vq_present(vqs, vq) != sve_vq_available(vq)) return -EINVAL; /* Can't run with no vector lengths at all: */ @@ -453,19 +459,34 @@ static int copy_core_reg_indices(const struct kvm_vcpu *vcpu, { unsigned int i; int n = 0; - const u64 core_reg = KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE; for (i = 0; i < sizeof(struct kvm_regs) / sizeof(__u32); i++) { - /* - * The KVM_REG_ARM64_SVE regs must be used instead of - * KVM_REG_ARM_CORE for accessing the FPSIMD V-registers on - * SVE-enabled vcpus: - */ - if (vcpu_has_sve(vcpu) && core_reg_offset_is_vreg(i)) + u64 reg = KVM_REG_ARM64 | KVM_REG_ARM_CORE | i; + int size = core_reg_size_from_offset(vcpu, i); + + if (size < 0) + continue; + + switch (size) { + case sizeof(__u32): + reg |= KVM_REG_SIZE_U32; + break; + + case sizeof(__u64): + reg |= KVM_REG_SIZE_U64; + break; + + case sizeof(__uint128_t): + reg |= KVM_REG_SIZE_U128; + break; + + default: + WARN_ON(1); continue; + } if (uindices) { - if (put_user(core_reg | i, uindices)) + if (put_user(reg, uindices)) return -EFAULT; uindices++; } diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index 7a0e64ccd6ff..d6ab5b4d15e5 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -383,6 +383,9 @@ struct kvm_sync_regs { #define KVM_X86_QUIRK_LAPIC_MMIO_HOLE (1 << 2) #define KVM_X86_QUIRK_OUT_7E_INC_RIP (1 << 3) +#define KVM_STATE_NESTED_FORMAT_VMX 0 +#define KVM_STATE_NESTED_FORMAT_SVM 1 /* unused */ + #define KVM_STATE_NESTED_GUEST_MODE 0x00000001 #define KVM_STATE_NESTED_RUN_PENDING 0x00000002 #define KVM_STATE_NESTED_EVMCS 0x00000004 @@ -390,9 +393,16 @@ struct kvm_sync_regs { #define KVM_STATE_NESTED_SMM_GUEST_MODE 0x00000001 #define KVM_STATE_NESTED_SMM_VMXON 0x00000002 -struct kvm_vmx_nested_state { +#define KVM_STATE_NESTED_VMX_VMCS_SIZE 0x1000 + +struct kvm_vmx_nested_state_data { + __u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE]; + __u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE]; +}; + +struct kvm_vmx_nested_state_hdr { __u64 vmxon_pa; - __u64 vmcs_pa; + __u64 vmcs12_pa; struct { __u16 flags; @@ -401,24 +411,25 @@ struct kvm_vmx_nested_state { /* for KVM_CAP_NESTED_STATE */ struct kvm_nested_state { - /* KVM_STATE_* flags */ __u16 flags; - - /* 0 for VMX, 1 for SVM. */ __u16 format; - - /* 128 for SVM, 128 + VMCS size for VMX. */ __u32 size; union { - /* VMXON, VMCS */ - struct kvm_vmx_nested_state vmx; + struct kvm_vmx_nested_state_hdr vmx; /* Pad the header to 128 bytes. */ __u8 pad[120]; - }; + } hdr; - __u8 data[0]; + /* + * Define data region as 0 bytes to preserve backwards-compatability + * to old definition of kvm_nested_state in order to avoid changing + * KVM_{GET,PUT}_NESTED_STATE ioctl values. + */ + union { + struct kvm_vmx_nested_state_data vmx[0]; + } data; }; #endif /* _ASM_X86_KVM_H */ diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 1e9ba81accba..d3c3d5e5ffd4 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -5602,14 +5602,18 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu) struct page *page; int i; - if (tdp_enabled) - return 0; - /* - * When emulating 32-bit mode, cr3 is only 32 bits even on x86_64. - * Therefore we need to allocate shadow page tables in the first - * 4GB of memory, which happens to fit the DMA32 zone. + * When using PAE paging, the four PDPTEs are treated as 'root' pages, + * while the PDP table is a per-vCPU construct that's allocated at MMU + * creation. When emulating 32-bit mode, cr3 is only 32 bits even on + * x86_64. Therefore we need to allocate the PDP table in the first + * 4GB of memory, which happens to fit the DMA32 zone. Except for + * SVM's 32-bit NPT support, TDP paging doesn't use PAE paging and can + * skip allocating the PDP table. */ + if (tdp_enabled && kvm_x86_ops->get_tdp_level(vcpu) > PT32E_ROOT_LEVEL) + return 0; + page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_DMA32); if (!page) return -ENOMEM; diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 1032f068f0b9..5f9c1a200201 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -1397,7 +1397,7 @@ static int copy_enlightened_to_vmcs12(struct vcpu_vmx *vmx) } if (unlikely(!(evmcs->hv_clean_fields & - HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC))) { + HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EXCPN))) { vmcs12->exception_bitmap = evmcs->exception_bitmap; } @@ -1437,7 +1437,7 @@ static int copy_enlightened_to_vmcs12(struct vcpu_vmx *vmx) } if (unlikely(!(evmcs->hv_clean_fields & - HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1))) { + HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1))) { vmcs12->pin_based_vm_exec_control = evmcs->pin_based_vm_exec_control; vmcs12->vm_exit_controls = evmcs->vm_exit_controls; @@ -5226,14 +5226,16 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12; struct kvm_nested_state kvm_state = { .flags = 0, - .format = 0, + .format = KVM_STATE_NESTED_FORMAT_VMX, .size = sizeof(kvm_state), - .vmx.vmxon_pa = -1ull, - .vmx.vmcs_pa = -1ull, + .hdr.vmx.vmxon_pa = -1ull, + .hdr.vmx.vmcs12_pa = -1ull, }; + struct kvm_vmx_nested_state_data __user *user_vmx_nested_state = + &user_kvm_nested_state->data.vmx[0]; if (!vcpu) - return kvm_state.size + 2 * VMCS12_SIZE; + return kvm_state.size + sizeof(*user_vmx_nested_state); vmx = to_vmx(vcpu); vmcs12 = get_vmcs12(vcpu); @@ -5243,23 +5245,23 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu, if (nested_vmx_allowed(vcpu) && (vmx->nested.vmxon || vmx->nested.smm.vmxon)) { - kvm_state.vmx.vmxon_pa = vmx->nested.vmxon_ptr; - kvm_state.vmx.vmcs_pa = vmx->nested.current_vmptr; + kvm_state.hdr.vmx.vmxon_pa = vmx->nested.vmxon_ptr; + kvm_state.hdr.vmx.vmcs12_pa = vmx->nested.current_vmptr; if (vmx_has_valid_vmcs12(vcpu)) { - kvm_state.size += VMCS12_SIZE; + kvm_state.size += sizeof(user_vmx_nested_state->vmcs12); if (is_guest_mode(vcpu) && nested_cpu_has_shadow_vmcs(vmcs12) && vmcs12->vmcs_link_pointer != -1ull) - kvm_state.size += VMCS12_SIZE; + kvm_state.size += sizeof(user_vmx_nested_state->shadow_vmcs12); } if (vmx->nested.smm.vmxon) - kvm_state.vmx.smm.flags |= KVM_STATE_NESTED_SMM_VMXON; + kvm_state.hdr.vmx.smm.flags |= KVM_STATE_NESTED_SMM_VMXON; if (vmx->nested.smm.guest_mode) - kvm_state.vmx.smm.flags |= KVM_STATE_NESTED_SMM_GUEST_MODE; + kvm_state.hdr.vmx.smm.flags |= KVM_STATE_NESTED_SMM_GUEST_MODE; if (is_guest_mode(vcpu)) { kvm_state.flags |= KVM_STATE_NESTED_GUEST_MODE; @@ -5294,16 +5296,19 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu, copy_shadow_to_vmcs12(vmx); } + BUILD_BUG_ON(sizeof(user_vmx_nested_state->vmcs12) < VMCS12_SIZE); + BUILD_BUG_ON(sizeof(user_vmx_nested_state->shadow_vmcs12) < VMCS12_SIZE); + /* * Copy over the full allocated size of vmcs12 rather than just the size * of the struct. */ - if (copy_to_user(user_kvm_nested_state->data, vmcs12, VMCS12_SIZE)) + if (copy_to_user(user_vmx_nested_state->vmcs12, vmcs12, VMCS12_SIZE)) return -EFAULT; if (nested_cpu_has_shadow_vmcs(vmcs12) && vmcs12->vmcs_link_pointer != -1ull) { - if (copy_to_user(user_kvm_nested_state->data + VMCS12_SIZE, + if (copy_to_user(user_vmx_nested_state->shadow_vmcs12, get_shadow_vmcs12(vcpu), VMCS12_SIZE)) return -EFAULT; } @@ -5331,33 +5336,35 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx = to_vmx(vcpu); struct vmcs12 *vmcs12; u32 exit_qual; + struct kvm_vmx_nested_state_data __user *user_vmx_nested_state = + &user_kvm_nested_state->data.vmx[0]; int ret; - if (kvm_state->format != 0) + if (kvm_state->format != KVM_STATE_NESTED_FORMAT_VMX) return -EINVAL; - if (!nested_vmx_allowed(vcpu)) - return kvm_state->vmx.vmxon_pa == -1ull ? 0 : -EINVAL; - - if (kvm_state->vmx.vmxon_pa == -1ull) { - if (kvm_state->vmx.smm.flags) + if (kvm_state->hdr.vmx.vmxon_pa == -1ull) { + if (kvm_state->hdr.vmx.smm.flags) return -EINVAL; - if (kvm_state->vmx.vmcs_pa != -1ull) + if (kvm_state->hdr.vmx.vmcs12_pa != -1ull) return -EINVAL; - vmx_leave_nested(vcpu); - return 0; - } + if (kvm_state->flags & ~KVM_STATE_NESTED_EVMCS) + return -EINVAL; + } else { + if (!nested_vmx_allowed(vcpu)) + return -EINVAL; - if (!page_address_valid(vcpu, kvm_state->vmx.vmxon_pa)) - return -EINVAL; + if (!page_address_valid(vcpu, kvm_state->hdr.vmx.vmxon_pa)) + return -EINVAL; + } - if ((kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) && + if ((kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) && (kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE)) return -EINVAL; - if (kvm_state->vmx.smm.flags & + if (kvm_state->hdr.vmx.smm.flags & ~(KVM_STATE_NESTED_SMM_GUEST_MODE | KVM_STATE_NESTED_SMM_VMXON)) return -EINVAL; @@ -5366,21 +5373,25 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, * nor can VMLAUNCH/VMRESUME be pending. Outside SMM, SMM flags * must be zero. */ - if (is_smm(vcpu) ? kvm_state->flags : kvm_state->vmx.smm.flags) + if (is_smm(vcpu) ? kvm_state->flags : kvm_state->hdr.vmx.smm.flags) return -EINVAL; - if ((kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) && - !(kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON)) + if ((kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) && + !(kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON)) return -EINVAL; vmx_leave_nested(vcpu); - if (kvm_state->vmx.vmxon_pa == -1ull) - return 0; + if (kvm_state->flags & KVM_STATE_NESTED_EVMCS) { + if (!nested_vmx_allowed(vcpu)) + return -EINVAL; - if (kvm_state->flags & KVM_STATE_NESTED_EVMCS) nested_enable_evmcs(vcpu, NULL); + } + + if (kvm_state->hdr.vmx.vmxon_pa == -1ull) + return 0; - vmx->nested.vmxon_ptr = kvm_state->vmx.vmxon_pa; + vmx->nested.vmxon_ptr = kvm_state->hdr.vmx.vmxon_pa; ret = enter_vmx_operation(vcpu); if (ret) return ret; @@ -5389,12 +5400,12 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, if (kvm_state->size < sizeof(*kvm_state) + sizeof(*vmcs12)) return 0; - if (kvm_state->vmx.vmcs_pa != -1ull) { - if (kvm_state->vmx.vmcs_pa == kvm_state->vmx.vmxon_pa || - !page_address_valid(vcpu, kvm_state->vmx.vmcs_pa)) + if (kvm_state->hdr.vmx.vmcs12_pa != -1ull) { + if (kvm_state->hdr.vmx.vmcs12_pa == kvm_state->hdr.vmx.vmxon_pa || + !page_address_valid(vcpu, kvm_state->hdr.vmx.vmcs12_pa)) return -EINVAL; - set_current_vmptr(vmx, kvm_state->vmx.vmcs_pa); + set_current_vmptr(vmx, kvm_state->hdr.vmx.vmcs12_pa); } else if (kvm_state->flags & KVM_STATE_NESTED_EVMCS) { /* * Sync eVMCS upon entry as we may not have @@ -5405,16 +5416,16 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, return -EINVAL; } - if (kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON) { + if (kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON) { vmx->nested.smm.vmxon = true; vmx->nested.vmxon = false; - if (kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) + if (kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) vmx->nested.smm.guest_mode = true; } vmcs12 = get_vmcs12(vcpu); - if (copy_from_user(vmcs12, user_kvm_nested_state->data, sizeof(*vmcs12))) + if (copy_from_user(vmcs12, user_vmx_nested_state->vmcs12, sizeof(*vmcs12))) return -EFAULT; if (vmcs12->hdr.revision_id != VMCS12_REVISION) @@ -5431,12 +5442,14 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, vmcs12->vmcs_link_pointer != -1ull) { struct vmcs12 *shadow_vmcs12 = get_shadow_vmcs12(vcpu); - if (kvm_state->size < sizeof(*kvm_state) + VMCS12_SIZE + sizeof(*vmcs12)) + if (kvm_state->size < + sizeof(*kvm_state) + + sizeof(user_vmx_nested_state->vmcs12) + sizeof(*shadow_vmcs12)) goto error_guest_mode; if (copy_from_user(shadow_vmcs12, - user_kvm_nested_state->data + VMCS12_SIZE, - sizeof(*vmcs12))) { + user_vmx_nested_state->shadow_vmcs12, + sizeof(*shadow_vmcs12))) { ret = -EFAULT; goto error_guest_mode; } diff --git a/arch/x86/kvm/vmx/vmcs12.h b/arch/x86/kvm/vmx/vmcs12.h index 3a742428ad17..337718fc8a36 100644 --- a/arch/x86/kvm/vmx/vmcs12.h +++ b/arch/x86/kvm/vmx/vmcs12.h @@ -201,9 +201,10 @@ struct __packed vmcs12 { /* * VMCS12_SIZE is the number of bytes L1 should allocate for the VMXON region * and any VMCS region. Although only sizeof(struct vmcs12) are used by the - * current implementation, 4K are reserved to avoid future complications. + * current implementation, 4K are reserved to avoid future complications and + * to preserve userspace ABI. */ -#define VMCS12_SIZE 0x1000 +#define VMCS12_SIZE KVM_STATE_NESTED_VMX_VMCS_SIZE /* * VMCS12_MAX_FIELD_INDEX is the highest index value used in any diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index 7a0e64ccd6ff..24a8cd229df6 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -392,7 +392,7 @@ struct kvm_sync_regs { struct kvm_vmx_nested_state { __u64 vmxon_pa; - __u64 vmcs_pa; + __u64 vmcs12_pa; struct { __u16 flags; diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index df1bf9230a74..41266af0d3dc 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -2,6 +2,7 @@ /x86_64/evmcs_test /x86_64/hyperv_cpuid /x86_64/kvm_create_max_vcpus +/x86_64/mmio_warning_test /x86_64/platform_info_test /x86_64/set_sregs_test /x86_64/smm_test diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 41280dc06297..62afd0b43074 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -11,23 +11,24 @@ LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/ucall.c lib/sparsebi LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c LIBKVM_aarch64 = lib/aarch64/processor.c -TEST_GEN_PROGS_x86_64 = x86_64/platform_info_test -TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test -TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test -TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test -TEST_GEN_PROGS_x86_64 += x86_64/cr4_cpuid_sync_test -TEST_GEN_PROGS_x86_64 += x86_64/state_test +TEST_GEN_PROGS_x86_64 = x86_64/cr4_cpuid_sync_test TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid -TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test -TEST_GEN_PROGS_x86_64 += x86_64/smm_test TEST_GEN_PROGS_x86_64 += x86_64/kvm_create_max_vcpus +TEST_GEN_PROGS_x86_64 += x86_64/mmio_warning_test +TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test +TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test +TEST_GEN_PROGS_x86_64 += x86_64/smm_test +TEST_GEN_PROGS_x86_64 += x86_64/state_test +TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test +TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test -TEST_GEN_PROGS_x86_64 += dirty_log_test +TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test TEST_GEN_PROGS_x86_64 += clear_dirty_log_test +TEST_GEN_PROGS_x86_64 += dirty_log_test -TEST_GEN_PROGS_aarch64 += dirty_log_test TEST_GEN_PROGS_aarch64 += clear_dirty_log_test +TEST_GEN_PROGS_aarch64 += dirty_log_test TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M)) LIBKVM += $(LIBKVM_$(UNAME_M)) diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index a5a4b28f14d8..bd8eb5579028 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -139,6 +139,8 @@ struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_size, void *guest_code); void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code); +bool vm_is_unrestricted_guest(struct kvm_vm *vm); + struct kvm_userspace_memory_region * kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end); diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 6063d5b2f356..af4d26de32d1 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -303,6 +303,8 @@ static inline unsigned long get_xmm(int n) return 0; } +bool is_intel_cpu(void); + struct kvm_x86_state; struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid); void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 633b22df46a4..267f2353e4ab 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -1583,3 +1583,39 @@ void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva) { return addr_gpa2hva(vm, addr_gva2gpa(vm, gva)); } + +/* + * Is Unrestricted Guest + * + * Input Args: + * vm - Virtual Machine + * + * Output Args: None + * + * Return: True if the unrestricted guest is set to 'Y', otherwise return false. + * + * Check if the unrestricted guest flag is enabled. + */ +bool vm_is_unrestricted_guest(struct kvm_vm *vm) +{ + char val = 'N'; + size_t count; + FILE *f; + + if (vm == NULL) { + /* Ensure that the KVM vendor-specific module is loaded. */ + f = fopen(KVM_DEV_PATH, "r"); + TEST_ASSERT(f != NULL, "Error in opening KVM dev file: %d", + errno); + fclose(f); + } + + f = fopen("/sys/module/kvm_intel/parameters/unrestricted_guest", "r"); + if (f) { + count = fread(&val, sizeof(char), 1, f); + TEST_ASSERT(count == 1, "Unable to read from param file."); + fclose(f); + } + + return val == 'Y'; +} diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index 21f3040d90cb..2fe78bdf3bee 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -1137,3 +1137,19 @@ void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *s r); } } + +bool is_intel_cpu(void) +{ + int eax, ebx, ecx, edx; + const uint32_t *chunk; + const int leaf = 0; + + __asm__ __volatile__( + "cpuid" + : /* output */ "=a"(eax), "=b"(ebx), + "=c"(ecx), "=d"(edx) + : /* input */ "0"(leaf), "2"(0)); + + chunk = (const uint32_t *)("GenuineIntel"); + return (ebx == chunk[0] && edx == chunk[1] && ecx == chunk[2]); +} diff --git a/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c b/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c new file mode 100644 index 000000000000..00bb97d76000 --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c @@ -0,0 +1,126 @@ +/* + * mmio_warning_test + * + * Copyright (C) 2019, Google LLC. + * + * This work is licensed under the terms of the GNU GPL, version 2. + * + * Test that we don't get a kernel warning when we call KVM_RUN after a + * triple fault occurs. To get the triple fault to occur we call KVM_RUN + * on a VCPU that hasn't been properly setup. + * + */ + +#define _GNU_SOURCE +#include <fcntl.h> +#include <kvm_util.h> +#include <linux/kvm.h> +#include <processor.h> +#include <pthread.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <test_util.h> +#include <unistd.h> + +#define NTHREAD 4 +#define NPROCESS 5 + +struct thread_context { + int kvmcpu; + struct kvm_run *run; +}; + +void *thr(void *arg) +{ + struct thread_context *tc = (struct thread_context *)arg; + int res; + int kvmcpu = tc->kvmcpu; + struct kvm_run *run = tc->run; + + res = ioctl(kvmcpu, KVM_RUN, 0); + printf("ret1=%d exit_reason=%d suberror=%d\n", + res, run->exit_reason, run->internal.suberror); + + return 0; +} + +void test(void) +{ + int i, kvm, kvmvm, kvmcpu; + pthread_t th[NTHREAD]; + struct kvm_run *run; + struct thread_context tc; + + kvm = open("/dev/kvm", O_RDWR); + TEST_ASSERT(kvm != -1, "failed to open /dev/kvm"); + kvmvm = ioctl(kvm, KVM_CREATE_VM, 0); + TEST_ASSERT(kvmvm != -1, "KVM_CREATE_VM failed"); + kvmcpu = ioctl(kvmvm, KVM_CREATE_VCPU, 0); + TEST_ASSERT(kvmcpu != -1, "KVM_CREATE_VCPU failed"); + run = (struct kvm_run *)mmap(0, 4096, PROT_READ|PROT_WRITE, MAP_SHARED, + kvmcpu, 0); + tc.kvmcpu = kvmcpu; + tc.run = run; + srand(getpid()); + for (i = 0; i < NTHREAD; i++) { + pthread_create(&th[i], NULL, thr, (void *)(uintptr_t)&tc); + usleep(rand() % 10000); + } + for (i = 0; i < NTHREAD; i++) + pthread_join(th[i], NULL); +} + +int get_warnings_count(void) +{ + int warnings; + FILE *f; + + f = popen("dmesg | grep \"WARNING:\" | wc -l", "r"); + fscanf(f, "%d", &warnings); + fclose(f); + + return warnings; +} + +int main(void) +{ + int warnings_before, warnings_after; + + if (!is_intel_cpu()) { + printf("Must be run on an Intel CPU, skipping test\n"); + exit(KSFT_SKIP); + } + + if (vm_is_unrestricted_guest(NULL)) { + printf("Unrestricted guest must be disabled, skipping test\n"); + exit(KSFT_SKIP); + } + + warnings_before = get_warnings_count(); + + for (int i = 0; i < NPROCESS; ++i) { + int status; + int pid = fork(); + + if (pid < 0) + exit(1); + if (pid == 0) { + test(); + exit(0); + } + while (waitpid(pid, &status, __WALL) != pid) + ; + } + + warnings_after = get_warnings_count(); + TEST_ASSERT(warnings_before == warnings_after, + "Warnings found in kernel. Run 'dmesg' to inspect them."); + + return 0; +} diff --git a/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c b/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c index 9d62e2c7e024..e64ca20b315a 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c @@ -75,7 +75,7 @@ void set_revision_id_for_vmcs12(struct kvm_nested_state *state, u32 vmcs12_revision) { /* Set revision_id in vmcs12 to vmcs12_revision. */ - memcpy(state->data, &vmcs12_revision, sizeof(u32)); + memcpy(&state->data, &vmcs12_revision, sizeof(u32)); } void set_default_state(struct kvm_nested_state *state) @@ -95,9 +95,9 @@ void set_default_vmx_state(struct kvm_nested_state *state, int size) KVM_STATE_NESTED_EVMCS; state->format = 0; state->size = size; - state->vmx.vmxon_pa = 0x1000; - state->vmx.vmcs_pa = 0x2000; - state->vmx.smm.flags = 0; + state->hdr.vmx.vmxon_pa = 0x1000; + state->hdr.vmx.vmcs12_pa = 0x2000; + state->hdr.vmx.smm.flags = 0; set_revision_id_for_vmcs12(state, VMCS12_REVISION); } @@ -123,39 +123,47 @@ void test_vmx_nested_state(struct kvm_vm *vm) /* * We cannot virtualize anything if the guest does not have VMX * enabled. We expect KVM_SET_NESTED_STATE to return 0 if vmxon_pa - * is set to -1ull. + * is set to -1ull, but the flags must be zero. */ set_default_vmx_state(state, state_sz); - state->vmx.vmxon_pa = -1ull; + state->hdr.vmx.vmxon_pa = -1ull; + test_nested_state_expect_einval(vm, state); + + state->hdr.vmx.vmcs12_pa = -1ull; + state->flags = KVM_STATE_NESTED_EVMCS; + test_nested_state_expect_einval(vm, state); + + state->flags = 0; test_nested_state(vm, state); /* Enable VMX in the guest CPUID. */ vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); - /* It is invalid to have vmxon_pa == -1ull and SMM flags non-zero. */ + /* + * Setting vmxon_pa == -1ull and vmcs_pa == -1ull exits early without + * setting the nested state but flags other than eVMCS must be clear. + */ set_default_vmx_state(state, state_sz); - state->vmx.vmxon_pa = -1ull; - state->vmx.smm.flags = 1; + state->hdr.vmx.vmxon_pa = -1ull; + state->hdr.vmx.vmcs12_pa = -1ull; test_nested_state_expect_einval(vm, state); - /* It is invalid to have vmxon_pa == -1ull and vmcs_pa != -1ull. */ - set_default_vmx_state(state, state_sz); - state->vmx.vmxon_pa = -1ull; - state->vmx.vmcs_pa = 0; + state->flags = KVM_STATE_NESTED_EVMCS; + test_nested_state(vm, state); + + /* It is invalid to have vmxon_pa == -1ull and SMM flags non-zero. */ + state->hdr.vmx.smm.flags = 1; test_nested_state_expect_einval(vm, state); - /* - * Setting vmxon_pa == -1ull and vmcs_pa == -1ull exits early without - * setting the nested state. - */ + /* It is invalid to have vmxon_pa == -1ull and vmcs_pa != -1ull. */ set_default_vmx_state(state, state_sz); - state->vmx.vmxon_pa = -1ull; - state->vmx.vmcs_pa = -1ull; - test_nested_state(vm, state); + state->hdr.vmx.vmxon_pa = -1ull; + state->flags = 0; + test_nested_state_expect_einval(vm, state); /* It is invalid to have vmxon_pa set to a non-page aligned address. */ set_default_vmx_state(state, state_sz); - state->vmx.vmxon_pa = 1; + state->hdr.vmx.vmxon_pa = 1; test_nested_state_expect_einval(vm, state); /* @@ -165,7 +173,7 @@ void test_vmx_nested_state(struct kvm_vm *vm) set_default_vmx_state(state, state_sz); state->flags = KVM_STATE_NESTED_GUEST_MODE | KVM_STATE_NESTED_RUN_PENDING; - state->vmx.smm.flags = KVM_STATE_NESTED_SMM_GUEST_MODE; + state->hdr.vmx.smm.flags = KVM_STATE_NESTED_SMM_GUEST_MODE; test_nested_state_expect_einval(vm, state); /* @@ -174,14 +182,14 @@ void test_vmx_nested_state(struct kvm_vm *vm) * KVM_STATE_NESTED_SMM_VMXON */ set_default_vmx_state(state, state_sz); - state->vmx.smm.flags = ~(KVM_STATE_NESTED_SMM_GUEST_MODE | + state->hdr.vmx.smm.flags = ~(KVM_STATE_NESTED_SMM_GUEST_MODE | KVM_STATE_NESTED_SMM_VMXON); test_nested_state_expect_einval(vm, state); /* Outside SMM, SMM flags must be zero. */ set_default_vmx_state(state, state_sz); state->flags = 0; - state->vmx.smm.flags = KVM_STATE_NESTED_SMM_GUEST_MODE; + state->hdr.vmx.smm.flags = KVM_STATE_NESTED_SMM_GUEST_MODE; test_nested_state_expect_einval(vm, state); /* Size must be large enough to fit kvm_nested_state and vmcs12. */ @@ -191,8 +199,8 @@ void test_vmx_nested_state(struct kvm_vm *vm) /* vmxon_pa cannot be the same address as vmcs_pa. */ set_default_vmx_state(state, state_sz); - state->vmx.vmxon_pa = 0; - state->vmx.vmcs_pa = 0; + state->hdr.vmx.vmxon_pa = 0; + state->hdr.vmx.vmcs12_pa = 0; test_nested_state_expect_einval(vm, state); /* The revision id for vmcs12 must be VMCS12_REVISION. */ @@ -205,16 +213,16 @@ void test_vmx_nested_state(struct kvm_vm *vm) * it again. */ set_default_vmx_state(state, state_sz); - state->vmx.vmxon_pa = -1ull; - state->vmx.vmcs_pa = -1ull; + state->hdr.vmx.vmxon_pa = -1ull; + state->hdr.vmx.vmcs12_pa = -1ull; state->flags = 0; test_nested_state(vm, state); vcpu_nested_state_get(vm, VCPU_ID, state); TEST_ASSERT(state->size >= sizeof(*state) && state->size <= state_sz, "Size must be between %d and %d. The size returned was %d.", sizeof(*state), state_sz, state->size); - TEST_ASSERT(state->vmx.vmxon_pa == -1ull, "vmxon_pa must be -1ull."); - TEST_ASSERT(state->vmx.vmcs_pa == -1ull, "vmcs_pa must be -1ull."); + TEST_ASSERT(state->hdr.vmx.vmxon_pa == -1ull, "vmxon_pa must be -1ull."); + TEST_ASSERT(state->hdr.vmx.vmcs12_pa == -1ull, "vmcs_pa must be -1ull."); free(state); } diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 05ddb6293b79..1be486d5d7cb 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -309,14 +309,15 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, } } +/* Only called for a fully emulated timer */ static void timer_emulate(struct arch_timer_context *ctx) { bool should_fire = kvm_timer_should_fire(ctx); trace_kvm_timer_emulate(ctx, should_fire); - if (should_fire) { - kvm_timer_update_irq(ctx->vcpu, true, ctx); + if (should_fire != ctx->irq.level) { + kvm_timer_update_irq(ctx->vcpu, should_fire, ctx); return; } diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index 44ceaccb18cf..8c9fe831bce4 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -1734,6 +1734,7 @@ static void vgic_its_destroy(struct kvm_device *kvm_dev) mutex_unlock(&its->its_lock); kfree(its); + kfree(kvm_dev);/* alloc by kvm_ioctl_create_device, free by .destroy */ } static int vgic_its_has_attr_regs(struct kvm_device *dev, |