diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-06-12 11:34:04 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-06-12 11:34:04 -0700 |
commit | b357bf6023a948cf6a9472f07a1b0caac0e4f8e8 (patch) | |
tree | 1471a2691cd56e8640cf6ad51e255b54903a164b /virt | |
parent | 0725d4e1b8b08a60838db3a6e65c23ea8824a048 (diff) | |
parent | 766d3571d8e50d3a73b77043dc632226f9e6b389 (diff) | |
download | lwn-b357bf6023a948cf6a9472f07a1b0caac0e4f8e8.tar.gz lwn-b357bf6023a948cf6a9472f07a1b0caac0e4f8e8.zip |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Paolo Bonzini:
"Small update for KVM:
ARM:
- lazy context-switching of FPSIMD registers on arm64
- "split" regions for vGIC redistributor
s390:
- cleanups for nested
- clock handling
- crypto
- storage keys
- control register bits
x86:
- many bugfixes
- implement more Hyper-V super powers
- implement lapic_timer_advance_ns even when the LAPIC timer is
emulated using the processor's VMX preemption timer.
- two security-related bugfixes at the top of the branch"
* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (79 commits)
kvm: fix typo in flag name
kvm: x86: use correct privilege level for sgdt/sidt/fxsave/fxrstor access
KVM: x86: pass kvm_vcpu to kvm_read_guest_virt and kvm_write_guest_virt_system
KVM: x86: introduce linear_{read,write}_system
kvm: nVMX: Enforce cpl=0 for VMX instructions
kvm: nVMX: Add support for "VMWRITE to any supported field"
kvm: nVMX: Restrict VMX capability MSR changes
KVM: VMX: Optimize tscdeadline timer latency
KVM: docs: nVMX: Remove known limitations as they do not exist now
KVM: docs: mmu: KVM support exposing SLAT to guests
kvm: no need to check return value of debugfs_create functions
kvm: Make VM ioctl do valloc for some archs
kvm: Change return type to vm_fault_t
KVM: docs: mmu: Fix link to NPT presentation from KVM Forum 2008
kvm: x86: Amend the KVM_GET_SUPPORTED_CPUID API documentation
KVM: x86: hyperv: declare KVM_CAP_HYPERV_TLBFLUSH capability
KVM: x86: hyperv: simplistic HVCALL_FLUSH_VIRTUAL_ADDRESS_{LIST,SPACE}_EX implementation
KVM: x86: hyperv: simplistic HVCALL_FLUSH_VIRTUAL_ADDRESS_{LIST,SPACE} implementation
KVM: introduce kvm_make_vcpus_request_mask() API
KVM: x86: hyperv: do rep check for each hypercall separately
...
Diffstat (limited to 'virt')
-rw-r--r-- | virt/kvm/Kconfig | 3 | ||||
-rw-r--r-- | virt/kvm/arm/arm.c | 32 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-debug.c | 17 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-init.c | 100 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-kvm-device.c | 53 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-mmio-v3.c | 112 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-v3.c | 99 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic.h | 46 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 83 |
9 files changed, 404 insertions, 141 deletions
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index cca7e065a075..72143cfaf6ec 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -54,3 +54,6 @@ config HAVE_KVM_IRQ_BYPASS config HAVE_KVM_VCPU_ASYNC_IOCTL bool + +config HAVE_KVM_VCPU_RUN_PID_CHANGE + bool diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 2d9b4795edb2..04e554cae3a2 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -16,6 +16,7 @@ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ +#include <linux/bug.h> #include <linux/cpu_pm.h> #include <linux/errno.h> #include <linux/err.h> @@ -41,6 +42,7 @@ #include <asm/mman.h> #include <asm/tlbflush.h> #include <asm/cacheflush.h> +#include <asm/cpufeature.h> #include <asm/virt.h> #include <asm/kvm_arm.h> #include <asm/kvm_asm.h> @@ -163,7 +165,7 @@ int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu) return 0; } -int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) +vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) { return VM_FAULT_SIGBUS; } @@ -249,6 +251,21 @@ long kvm_arch_dev_ioctl(struct file *filp, return -EINVAL; } +struct kvm *kvm_arch_alloc_vm(void) +{ + if (!has_vhe()) + return kzalloc(sizeof(struct kvm), GFP_KERNEL); + + return vzalloc(sizeof(struct kvm)); +} + +void kvm_arch_free_vm(struct kvm *kvm) +{ + if (!has_vhe()) + kfree(kvm); + else + vfree(kvm); +} struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) { @@ -290,7 +307,6 @@ out: void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) { - kvm_vgic_vcpu_early_init(vcpu); } void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) @@ -363,10 +379,12 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) kvm_vgic_load(vcpu); kvm_timer_vcpu_load(vcpu); kvm_vcpu_load_sysregs(vcpu); + kvm_arch_vcpu_load_fp(vcpu); } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { + kvm_arch_vcpu_put_fp(vcpu); kvm_vcpu_put_sysregs(vcpu); kvm_timer_vcpu_put(vcpu); kvm_vgic_put(vcpu); @@ -678,9 +696,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) */ preempt_disable(); - /* Flush FP/SIMD state that can't survive guest entry/exit */ - kvm_fpsimd_flush_cpu_state(); - kvm_pmu_flush_hwstate(vcpu); local_irq_disable(); @@ -778,6 +793,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) if (static_branch_unlikely(&userspace_irqchip_in_use)) kvm_timer_sync_hwstate(vcpu); + kvm_arch_vcpu_ctxsync_fp(vcpu); + /* * We may have taken a host interrupt in HYP mode (ie * while executing the guest). This interrupt is still @@ -1574,6 +1591,11 @@ int kvm_arch_init(void *opaque) return -ENODEV; } + if (!kvm_arch_check_sve_has_vhe()) { + kvm_pr_unimpl("SVE system without VHE unsupported. Broken cpu?"); + return -ENODEV; + } + for_each_online_cpu(cpu) { smp_call_function_single(cpu, check_kvm_target_cpu, &ret, 1); if (ret < 0) { diff --git a/virt/kvm/arm/vgic/vgic-debug.c b/virt/kvm/arm/vgic/vgic-debug.c index 4ffc0b5e6105..c589d4c2b478 100644 --- a/virt/kvm/arm/vgic/vgic-debug.c +++ b/virt/kvm/arm/vgic/vgic-debug.c @@ -264,21 +264,12 @@ static const struct file_operations vgic_debug_fops = { .release = seq_release }; -int vgic_debug_init(struct kvm *kvm) +void vgic_debug_init(struct kvm *kvm) { - if (!kvm->debugfs_dentry) - return -ENOENT; - - if (!debugfs_create_file("vgic-state", 0444, - kvm->debugfs_dentry, - kvm, - &vgic_debug_fops)) - return -ENOMEM; - - return 0; + debugfs_create_file("vgic-state", 0444, kvm->debugfs_dentry, kvm, + &vgic_debug_fops); } -int vgic_debug_destroy(struct kvm *kvm) +void vgic_debug_destroy(struct kvm *kvm) { - return 0; } diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c index e07156c30323..2673efce65f3 100644 --- a/virt/kvm/arm/vgic/vgic-init.c +++ b/virt/kvm/arm/vgic/vgic-init.c @@ -44,7 +44,7 @@ * * CPU Interface: * - * - kvm_vgic_vcpu_early_init(): initialization of static data that + * - kvm_vgic_vcpu_init(): initialization of static data that * doesn't depend on any sizing information or emulation type. No * allocation is allowed there. */ @@ -67,46 +67,6 @@ void kvm_vgic_early_init(struct kvm *kvm) spin_lock_init(&dist->lpi_list_lock); } -/** - * kvm_vgic_vcpu_early_init() - Initialize static VGIC VCPU data structures - * @vcpu: The VCPU whose VGIC data structures whould be initialized - * - * Only do initialization, but do not actually enable the VGIC CPU interface - * yet. - */ -void kvm_vgic_vcpu_early_init(struct kvm_vcpu *vcpu) -{ - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; - int i; - - INIT_LIST_HEAD(&vgic_cpu->ap_list_head); - spin_lock_init(&vgic_cpu->ap_list_lock); - - /* - * Enable and configure all SGIs to be edge-triggered and - * configure all PPIs as level-triggered. - */ - for (i = 0; i < VGIC_NR_PRIVATE_IRQS; i++) { - struct vgic_irq *irq = &vgic_cpu->private_irqs[i]; - - INIT_LIST_HEAD(&irq->ap_list); - spin_lock_init(&irq->irq_lock); - irq->intid = i; - irq->vcpu = NULL; - irq->target_vcpu = vcpu; - irq->targets = 1U << vcpu->vcpu_id; - kref_init(&irq->refcount); - if (vgic_irq_is_sgi(i)) { - /* SGIs */ - irq->enabled = 1; - irq->config = VGIC_CONFIG_EDGE; - } else { - /* PPIs */ - irq->config = VGIC_CONFIG_LEVEL; - } - } -} - /* CREATION */ /** @@ -167,8 +127,11 @@ int kvm_vgic_create(struct kvm *kvm, u32 type) kvm->arch.vgic.vgic_model = type; kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF; - kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF; - kvm->arch.vgic.vgic_redist_base = VGIC_ADDR_UNDEF; + + if (type == KVM_DEV_TYPE_ARM_VGIC_V2) + kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF; + else + INIT_LIST_HEAD(&kvm->arch.vgic.rd_regions); out_unlock: for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) { @@ -221,13 +184,50 @@ static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int nr_spis) } /** - * kvm_vgic_vcpu_init() - Register VCPU-specific KVM iodevs + * kvm_vgic_vcpu_init() - Initialize static VGIC VCPU data + * structures and register VCPU-specific KVM iodevs + * * @vcpu: pointer to the VCPU being created and initialized + * + * Only do initialization, but do not actually enable the + * VGIC CPU interface */ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) { - int ret = 0; + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + int ret = 0; + int i; + + vgic_cpu->rd_iodev.base_addr = VGIC_ADDR_UNDEF; + vgic_cpu->sgi_iodev.base_addr = VGIC_ADDR_UNDEF; + + INIT_LIST_HEAD(&vgic_cpu->ap_list_head); + spin_lock_init(&vgic_cpu->ap_list_lock); + + /* + * Enable and configure all SGIs to be edge-triggered and + * configure all PPIs as level-triggered. + */ + for (i = 0; i < VGIC_NR_PRIVATE_IRQS; i++) { + struct vgic_irq *irq = &vgic_cpu->private_irqs[i]; + + INIT_LIST_HEAD(&irq->ap_list); + spin_lock_init(&irq->irq_lock); + irq->intid = i; + irq->vcpu = NULL; + irq->target_vcpu = vcpu; + irq->targets = 1U << vcpu->vcpu_id; + kref_init(&irq->refcount); + if (vgic_irq_is_sgi(i)) { + /* SGIs */ + irq->enabled = 1; + irq->config = VGIC_CONFIG_EDGE; + } else { + /* PPIs */ + irq->config = VGIC_CONFIG_LEVEL; + } + } if (!irqchip_in_kernel(vcpu->kvm)) return 0; @@ -303,13 +303,23 @@ out: static void kvm_vgic_dist_destroy(struct kvm *kvm) { struct vgic_dist *dist = &kvm->arch.vgic; + struct vgic_redist_region *rdreg, *next; dist->ready = false; dist->initialized = false; kfree(dist->spis); + dist->spis = NULL; dist->nr_spis = 0; + if (kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) { + list_for_each_entry_safe(rdreg, next, &dist->rd_regions, list) { + list_del(&rdreg->list); + kfree(rdreg); + } + INIT_LIST_HEAD(&dist->rd_regions); + } + if (vgic_supports_direct_msis(kvm)) vgic_v4_teardown(kvm); } diff --git a/virt/kvm/arm/vgic/vgic-kvm-device.c b/virt/kvm/arm/vgic/vgic-kvm-device.c index 10ae6f394b71..6ada2432e37c 100644 --- a/virt/kvm/arm/vgic/vgic-kvm-device.c +++ b/virt/kvm/arm/vgic/vgic-kvm-device.c @@ -66,6 +66,7 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write) int r = 0; struct vgic_dist *vgic = &kvm->arch.vgic; phys_addr_t *addr_ptr, alignment; + u64 undef_value = VGIC_ADDR_UNDEF; mutex_lock(&kvm->lock); switch (type) { @@ -84,16 +85,61 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write) addr_ptr = &vgic->vgic_dist_base; alignment = SZ_64K; break; - case KVM_VGIC_V3_ADDR_TYPE_REDIST: + case KVM_VGIC_V3_ADDR_TYPE_REDIST: { + struct vgic_redist_region *rdreg; + r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V3); if (r) break; if (write) { - r = vgic_v3_set_redist_base(kvm, *addr); + r = vgic_v3_set_redist_base(kvm, 0, *addr, 0); goto out; } - addr_ptr = &vgic->vgic_redist_base; + rdreg = list_first_entry(&vgic->rd_regions, + struct vgic_redist_region, list); + if (!rdreg) + addr_ptr = &undef_value; + else + addr_ptr = &rdreg->base; break; + } + case KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION: + { + struct vgic_redist_region *rdreg; + u8 index; + + r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V3); + if (r) + break; + + index = *addr & KVM_VGIC_V3_RDIST_INDEX_MASK; + + if (write) { + gpa_t base = *addr & KVM_VGIC_V3_RDIST_BASE_MASK; + u32 count = (*addr & KVM_VGIC_V3_RDIST_COUNT_MASK) + >> KVM_VGIC_V3_RDIST_COUNT_SHIFT; + u8 flags = (*addr & KVM_VGIC_V3_RDIST_FLAGS_MASK) + >> KVM_VGIC_V3_RDIST_FLAGS_SHIFT; + + if (!count || flags) + r = -EINVAL; + else + r = vgic_v3_set_redist_base(kvm, index, + base, count); + goto out; + } + + rdreg = vgic_v3_rdist_region_from_index(kvm, index); + if (!rdreg) { + r = -ENOENT; + goto out; + } + + *addr = index; + *addr |= rdreg->base; + *addr |= (u64)rdreg->count << KVM_VGIC_V3_RDIST_COUNT_SHIFT; + goto out; + } default: r = -ENODEV; } @@ -665,6 +711,7 @@ static int vgic_v3_has_attr(struct kvm_device *dev, switch (attr->attr) { case KVM_VGIC_V3_ADDR_TYPE_DIST: case KVM_VGIC_V3_ADDR_TYPE_REDIST: + case KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION: return 0; } break; diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c index 671fe81f8e1d..287784095b5b 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v3.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c @@ -184,12 +184,17 @@ static unsigned long vgic_mmio_read_v3r_typer(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len) { unsigned long mpidr = kvm_vcpu_get_mpidr_aff(vcpu); + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + struct vgic_redist_region *rdreg = vgic_cpu->rdreg; int target_vcpu_id = vcpu->vcpu_id; + gpa_t last_rdist_typer = rdreg->base + GICR_TYPER + + (rdreg->free_index - 1) * KVM_VGIC_V3_REDIST_SIZE; u64 value; value = (u64)(mpidr & GENMASK(23, 0)) << 32; value |= ((target_vcpu_id & 0xffff) << 8); - if (target_vcpu_id == atomic_read(&vcpu->kvm->online_vcpus) - 1) + + if (addr == last_rdist_typer) value |= GICR_TYPER_LAST; if (vgic_has_its(vcpu->kvm)) value |= GICR_TYPER_PLPIS; @@ -580,24 +585,32 @@ int vgic_register_redist_iodev(struct kvm_vcpu *vcpu) { struct kvm *kvm = vcpu->kvm; struct vgic_dist *vgic = &kvm->arch.vgic; + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; struct vgic_io_device *rd_dev = &vcpu->arch.vgic_cpu.rd_iodev; struct vgic_io_device *sgi_dev = &vcpu->arch.vgic_cpu.sgi_iodev; + struct vgic_redist_region *rdreg; gpa_t rd_base, sgi_base; int ret; + if (!IS_VGIC_ADDR_UNDEF(vgic_cpu->rd_iodev.base_addr)) + return 0; + /* * We may be creating VCPUs before having set the base address for the * redistributor region, in which case we will come back to this * function for all VCPUs when the base address is set. Just return * without doing any work for now. */ - if (IS_VGIC_ADDR_UNDEF(vgic->vgic_redist_base)) + rdreg = vgic_v3_rdist_free_slot(&vgic->rd_regions); + if (!rdreg) return 0; if (!vgic_v3_check_base(kvm)) return -EINVAL; - rd_base = vgic->vgic_redist_base + vgic->vgic_redist_free_offset; + vgic_cpu->rdreg = rdreg; + + rd_base = rdreg->base + rdreg->free_index * KVM_VGIC_V3_REDIST_SIZE; sgi_base = rd_base + SZ_64K; kvm_iodevice_init(&rd_dev->dev, &kvm_io_gic_ops); @@ -631,7 +644,7 @@ int vgic_register_redist_iodev(struct kvm_vcpu *vcpu) goto out; } - vgic->vgic_redist_free_offset += 2 * SZ_64K; + rdreg->free_index++; out: mutex_unlock(&kvm->slots_lock); return ret; @@ -670,23 +683,96 @@ static int vgic_register_all_redist_iodevs(struct kvm *kvm) return ret; } -int vgic_v3_set_redist_base(struct kvm *kvm, u64 addr) +/** + * vgic_v3_insert_redist_region - Insert a new redistributor region + * + * Performs various checks before inserting the rdist region in the list. + * Those tests depend on whether the size of the rdist region is known + * (ie. count != 0). The list is sorted by rdist region index. + * + * @kvm: kvm handle + * @index: redist region index + * @base: base of the new rdist region + * @count: number of redistributors the region is made of (0 in the old style + * single region, whose size is induced from the number of vcpus) + * + * Return 0 on success, < 0 otherwise + */ +static int vgic_v3_insert_redist_region(struct kvm *kvm, uint32_t index, + gpa_t base, uint32_t count) { - struct vgic_dist *vgic = &kvm->arch.vgic; + struct vgic_dist *d = &kvm->arch.vgic; + struct vgic_redist_region *rdreg; + struct list_head *rd_regions = &d->rd_regions; + size_t size = count * KVM_VGIC_V3_REDIST_SIZE; int ret; - /* vgic_check_ioaddr makes sure we don't do this twice */ - ret = vgic_check_ioaddr(kvm, &vgic->vgic_redist_base, addr, SZ_64K); - if (ret) - return ret; + /* single rdist region already set ?*/ + if (!count && !list_empty(rd_regions)) + return -EINVAL; - vgic->vgic_redist_base = addr; - if (!vgic_v3_check_base(kvm)) { - vgic->vgic_redist_base = VGIC_ADDR_UNDEF; + /* cross the end of memory ? */ + if (base + size < base) return -EINVAL; + + if (list_empty(rd_regions)) { + if (index != 0) + return -EINVAL; + } else { + rdreg = list_last_entry(rd_regions, + struct vgic_redist_region, list); + if (index != rdreg->index + 1) + return -EINVAL; + + /* Cannot add an explicitly sized regions after legacy region */ + if (!rdreg->count) + return -EINVAL; } /* + * For legacy single-region redistributor regions (!count), + * check that the redistributor region does not overlap with the + * distributor's address space. + */ + if (!count && !IS_VGIC_ADDR_UNDEF(d->vgic_dist_base) && + vgic_dist_overlap(kvm, base, size)) + return -EINVAL; + + /* collision with any other rdist region? */ + if (vgic_v3_rdist_overlap(kvm, base, size)) + return -EINVAL; + + rdreg = kzalloc(sizeof(*rdreg), GFP_KERNEL); + if (!rdreg) + return -ENOMEM; + + rdreg->base = VGIC_ADDR_UNDEF; + + ret = vgic_check_ioaddr(kvm, &rdreg->base, base, SZ_64K); + if (ret) + goto free; + + rdreg->base = base; + rdreg->count = count; + rdreg->free_index = 0; + rdreg->index = index; + + list_add_tail(&rdreg->list, rd_regions); + return 0; +free: + kfree(rdreg); + return ret; +} + +int vgic_v3_set_redist_base(struct kvm *kvm, u32 index, u64 addr, u32 count) +{ + int ret; + + ret = vgic_v3_insert_redist_region(kvm, index, addr, count); + if (ret) + return ret; + + /* * Register iodevs for each existing VCPU. Adding more VCPUs * afterwards will register the iodevs when needed. */ diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index bdcf8e7a6161..ff7dc890941a 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -419,6 +419,29 @@ int vgic_v3_save_pending_tables(struct kvm *kvm) return 0; } +/** + * vgic_v3_rdist_overlap - check if a region overlaps with any + * existing redistributor region + * + * @kvm: kvm handle + * @base: base of the region + * @size: size of region + * + * Return: true if there is an overlap + */ +bool vgic_v3_rdist_overlap(struct kvm *kvm, gpa_t base, size_t size) +{ + struct vgic_dist *d = &kvm->arch.vgic; + struct vgic_redist_region *rdreg; + + list_for_each_entry(rdreg, &d->rd_regions, list) { + if ((base + size > rdreg->base) && + (base < rdreg->base + vgic_v3_rd_region_size(kvm, rdreg))) + return true; + } + return false; +} + /* * Check for overlapping regions and for regions crossing the end of memory * for base addresses which have already been set. @@ -426,41 +449,83 @@ int vgic_v3_save_pending_tables(struct kvm *kvm) bool vgic_v3_check_base(struct kvm *kvm) { struct vgic_dist *d = &kvm->arch.vgic; - gpa_t redist_size = KVM_VGIC_V3_REDIST_SIZE; - - redist_size *= atomic_read(&kvm->online_vcpus); + struct vgic_redist_region *rdreg; if (!IS_VGIC_ADDR_UNDEF(d->vgic_dist_base) && d->vgic_dist_base + KVM_VGIC_V3_DIST_SIZE < d->vgic_dist_base) return false; - if (!IS_VGIC_ADDR_UNDEF(d->vgic_redist_base) && - d->vgic_redist_base + redist_size < d->vgic_redist_base) - return false; + list_for_each_entry(rdreg, &d->rd_regions, list) { + if (rdreg->base + vgic_v3_rd_region_size(kvm, rdreg) < + rdreg->base) + return false; + } - /* Both base addresses must be set to check if they overlap */ - if (IS_VGIC_ADDR_UNDEF(d->vgic_dist_base) || - IS_VGIC_ADDR_UNDEF(d->vgic_redist_base)) + if (IS_VGIC_ADDR_UNDEF(d->vgic_dist_base)) return true; - if (d->vgic_dist_base + KVM_VGIC_V3_DIST_SIZE <= d->vgic_redist_base) - return true; - if (d->vgic_redist_base + redist_size <= d->vgic_dist_base) - return true; + return !vgic_v3_rdist_overlap(kvm, d->vgic_dist_base, + KVM_VGIC_V3_DIST_SIZE); +} - return false; +/** + * vgic_v3_rdist_free_slot - Look up registered rdist regions and identify one + * which has free space to put a new rdist region. + * + * @rd_regions: redistributor region list head + * + * A redistributor regions maps n redistributors, n = region size / (2 x 64kB). + * Stride between redistributors is 0 and regions are filled in the index order. + * + * Return: the redist region handle, if any, that has space to map a new rdist + * region. + */ +struct vgic_redist_region *vgic_v3_rdist_free_slot(struct list_head *rd_regions) +{ + struct vgic_redist_region *rdreg; + + list_for_each_entry(rdreg, rd_regions, list) { + if (!vgic_v3_redist_region_full(rdreg)) + return rdreg; + } + return NULL; } +struct vgic_redist_region *vgic_v3_rdist_region_from_index(struct kvm *kvm, + u32 index) +{ + struct list_head *rd_regions = &kvm->arch.vgic.rd_regions; + struct vgic_redist_region *rdreg; + + list_for_each_entry(rdreg, rd_regions, list) { + if (rdreg->index == index) + return rdreg; + } + return NULL; +} + + int vgic_v3_map_resources(struct kvm *kvm) { - int ret = 0; struct vgic_dist *dist = &kvm->arch.vgic; + struct kvm_vcpu *vcpu; + int ret = 0; + int c; if (vgic_ready(kvm)) goto out; - if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base) || - IS_VGIC_ADDR_UNDEF(dist->vgic_redist_base)) { + kvm_for_each_vcpu(c, vcpu, kvm) { + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + + if (IS_VGIC_ADDR_UNDEF(vgic_cpu->rd_iodev.base_addr)) { + kvm_debug("vcpu %d redistributor base not set\n", c); + ret = -ENXIO; + goto out; + } + } + + if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base)) { kvm_err("Need to set vgic distributor addresses first\n"); ret = -ENXIO; goto out; diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h index 32c25d42c93f..ead00b2072b2 100644 --- a/virt/kvm/arm/vgic/vgic.h +++ b/virt/kvm/arm/vgic/vgic.h @@ -96,6 +96,13 @@ /* we only support 64 kB translation table page size */ #define KVM_ITS_L1E_ADDR_MASK GENMASK_ULL(51, 16) +#define KVM_VGIC_V3_RDIST_INDEX_MASK GENMASK_ULL(11, 0) +#define KVM_VGIC_V3_RDIST_FLAGS_MASK GENMASK_ULL(15, 12) +#define KVM_VGIC_V3_RDIST_FLAGS_SHIFT 12 +#define KVM_VGIC_V3_RDIST_BASE_MASK GENMASK_ULL(51, 16) +#define KVM_VGIC_V3_RDIST_COUNT_MASK GENMASK_ULL(63, 52) +#define KVM_VGIC_V3_RDIST_COUNT_SHIFT 52 + /* Requires the irq_lock to be held by the caller. */ static inline bool irq_is_pending(struct vgic_irq *irq) { @@ -215,7 +222,7 @@ int vgic_v3_probe(const struct gic_kvm_info *info); int vgic_v3_map_resources(struct kvm *kvm); int vgic_v3_lpi_sync_pending_status(struct kvm *kvm, struct vgic_irq *irq); int vgic_v3_save_pending_tables(struct kvm *kvm); -int vgic_v3_set_redist_base(struct kvm *kvm, u64 addr); +int vgic_v3_set_redist_base(struct kvm *kvm, u32 index, u64 addr, u32 count); int vgic_register_redist_iodev(struct kvm_vcpu *vcpu); bool vgic_v3_check_base(struct kvm *kvm); @@ -243,8 +250,8 @@ void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); int vgic_lazy_init(struct kvm *kvm); int vgic_init(struct kvm *kvm); -int vgic_debug_init(struct kvm *kvm); -int vgic_debug_destroy(struct kvm *kvm); +void vgic_debug_init(struct kvm *kvm); +void vgic_debug_destroy(struct kvm *kvm); bool lock_all_vcpus(struct kvm *kvm); void unlock_all_vcpus(struct kvm *kvm); @@ -265,6 +272,39 @@ static inline int vgic_v3_max_apr_idx(struct kvm_vcpu *vcpu) } } +static inline bool +vgic_v3_redist_region_full(struct vgic_redist_region *region) +{ + if (!region->count) + return false; + + return (region->free_index >= region->count); +} + +struct vgic_redist_region *vgic_v3_rdist_free_slot(struct list_head *rdregs); + +static inline size_t +vgic_v3_rd_region_size(struct kvm *kvm, struct vgic_redist_region *rdreg) +{ + if (!rdreg->count) + return atomic_read(&kvm->online_vcpus) * KVM_VGIC_V3_REDIST_SIZE; + else + return rdreg->count * KVM_VGIC_V3_REDIST_SIZE; +} + +struct vgic_redist_region *vgic_v3_rdist_region_from_index(struct kvm *kvm, + u32 index); + +bool vgic_v3_rdist_overlap(struct kvm *kvm, gpa_t base, size_t size); + +static inline bool vgic_dist_overlap(struct kvm *kvm, gpa_t base, size_t size) +{ + struct vgic_dist *d = &kvm->arch.vgic; + + return (base + size > d->vgic_dist_base) && + (base < d->vgic_dist_base + KVM_VGIC_V3_DIST_SIZE); +} + int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its, u32 devid, u32 eventid, struct vgic_irq **irq); struct vgic_its *vgic_msi_to_its(struct kvm *kvm, struct kvm_msi *msi); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index c7b2e927f699..aa7da1d8ece2 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -203,29 +203,47 @@ static inline bool kvm_kick_many_cpus(const struct cpumask *cpus, bool wait) return true; } -bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req) +bool kvm_make_vcpus_request_mask(struct kvm *kvm, unsigned int req, + unsigned long *vcpu_bitmap, cpumask_var_t tmp) { int i, cpu, me; - cpumask_var_t cpus; - bool called; struct kvm_vcpu *vcpu; - - zalloc_cpumask_var(&cpus, GFP_ATOMIC); + bool called; me = get_cpu(); + kvm_for_each_vcpu(i, vcpu, kvm) { + if (!test_bit(i, vcpu_bitmap)) + continue; + kvm_make_request(req, vcpu); cpu = vcpu->cpu; if (!(req & KVM_REQUEST_NO_WAKEUP) && kvm_vcpu_wake_up(vcpu)) continue; - if (cpus != NULL && cpu != -1 && cpu != me && + if (tmp != NULL && cpu != -1 && cpu != me && kvm_request_needs_ipi(vcpu, req)) - __cpumask_set_cpu(cpu, cpus); + __cpumask_set_cpu(cpu, tmp); } - called = kvm_kick_many_cpus(cpus, !!(req & KVM_REQUEST_WAIT)); + + called = kvm_kick_many_cpus(tmp, !!(req & KVM_REQUEST_WAIT)); put_cpu(); + + return called; +} + +bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req) +{ + cpumask_var_t cpus; + bool called; + static unsigned long vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)] + = {[0 ... BITS_TO_LONGS(KVM_MAX_VCPUS)-1] = ULONG_MAX}; + + zalloc_cpumask_var(&cpus, GFP_ATOMIC); + + called = kvm_make_vcpus_request_mask(kvm, req, vcpu_bitmap, cpus); + free_cpumask_var(cpus); return called; } @@ -572,10 +590,7 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd) return 0; snprintf(dir_name, sizeof(dir_name), "%d-%d", task_pid_nr(current), fd); - kvm->debugfs_dentry = debugfs_create_dir(dir_name, - kvm_debugfs_dir); - if (!kvm->debugfs_dentry) - return -ENOMEM; + kvm->debugfs_dentry = debugfs_create_dir(dir_name, kvm_debugfs_dir); kvm->debugfs_stat_data = kcalloc(kvm_debugfs_num_entries, sizeof(*kvm->debugfs_stat_data), @@ -591,11 +606,8 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd) stat_data->kvm = kvm; stat_data->offset = p->offset; kvm->debugfs_stat_data[p - debugfs_entries] = stat_data; - if (!debugfs_create_file(p->name, 0644, - kvm->debugfs_dentry, - stat_data, - stat_fops_per_vm[p->kind])) - return -ENOMEM; + debugfs_create_file(p->name, 0644, kvm->debugfs_dentry, + stat_data, stat_fops_per_vm[p->kind]); } return 0; } @@ -2340,7 +2352,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode) } EXPORT_SYMBOL_GPL(kvm_vcpu_on_spin); -static int kvm_vcpu_fault(struct vm_fault *vmf) +static vm_fault_t kvm_vcpu_fault(struct vm_fault *vmf) { struct kvm_vcpu *vcpu = vmf->vma->vm_file->private_data; struct page *page; @@ -2550,8 +2562,13 @@ static long kvm_vcpu_ioctl(struct file *filp, oldpid = rcu_access_pointer(vcpu->pid); if (unlikely(oldpid != current->pids[PIDTYPE_PID].pid)) { /* The thread running this VCPU changed. */ - struct pid *newpid = get_task_pid(current, PIDTYPE_PID); + struct pid *newpid; + + r = kvm_arch_vcpu_run_pid_change(vcpu); + if (r) + break; + newpid = get_task_pid(current, PIDTYPE_PID); rcu_assign_pointer(vcpu->pid, newpid); if (oldpid) synchronize_rcu(); @@ -3896,29 +3913,18 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm) kfree(env); } -static int kvm_init_debug(void) +static void kvm_init_debug(void) { - int r = -EEXIST; struct kvm_stats_debugfs_item *p; kvm_debugfs_dir = debugfs_create_dir("kvm", NULL); - if (kvm_debugfs_dir == NULL) - goto out; kvm_debugfs_num_entries = 0; for (p = debugfs_entries; p->name; ++p, kvm_debugfs_num_entries++) { - if (!debugfs_create_file(p->name, 0644, kvm_debugfs_dir, - (void *)(long)p->offset, - stat_fops[p->kind])) - goto out_dir; + debugfs_create_file(p->name, 0644, kvm_debugfs_dir, + (void *)(long)p->offset, + stat_fops[p->kind]); } - - return 0; - -out_dir: - debugfs_remove_recursive(kvm_debugfs_dir); -out: - return r; } static int kvm_suspend(void) @@ -4046,20 +4052,13 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, kvm_preempt_ops.sched_in = kvm_sched_in; kvm_preempt_ops.sched_out = kvm_sched_out; - r = kvm_init_debug(); - if (r) { - pr_err("kvm: create debugfs files failed\n"); - goto out_undebugfs; - } + kvm_init_debug(); r = kvm_vfio_ops_init(); WARN_ON(r); return 0; -out_undebugfs: - unregister_syscore_ops(&kvm_syscore_ops); - misc_deregister(&kvm_dev); out_unreg: kvm_async_pf_deinit(); out_free: |