diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-11-24 19:44:25 -1000 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-11-24 19:44:25 -1000 |
commit | 7753ea09640807104c8e353f6d5dc47ee55307cf (patch) | |
tree | 7db719affec0564593ed85a283de8fd9464177d3 /virt | |
parent | 83ada0319606c6bbaebda16fad456e37ed64d518 (diff) | |
parent | d02fcf50779ec9d8eb7a81473fd76efe3f04b3a5 (diff) | |
download | lwn-7753ea09640807104c8e353f6d5dc47ee55307cf.tar.gz lwn-7753ea09640807104c8e353f6d5dc47ee55307cf.zip |
Merge tag 'kvm-4.15-2' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Radim Krčmář:
"Trimmed second batch of KVM changes for Linux 4.15:
- GICv4 Support for KVM/ARM
- re-introduce support for CPUs without virtual NMI (cc stable) and
allow testing of KVM without virtual NMI on available CPUs
- fix long-standing performance issues with assigned devices on AMD
(cc stable)"
* tag 'kvm-4.15-2' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (30 commits)
kvm: vmx: Allow disabling virtual NMI support
kvm: vmx: Reinstate support for CPUs without virtual NMI
KVM: SVM: obey guest PAT
KVM: arm/arm64: Don't queue VLPIs on INV/INVALL
KVM: arm/arm64: Fix GICv4 ITS initialization issues
KVM: arm/arm64: GICv4: Theory of operations
KVM: arm/arm64: GICv4: Enable VLPI support
KVM: arm/arm64: GICv4: Prevent userspace from changing doorbell affinity
KVM: arm/arm64: GICv4: Prevent a VM using GICv4 from being saved
KVM: arm/arm64: GICv4: Enable virtual cpuif if VLPIs can be delivered
KVM: arm/arm64: GICv4: Hook vPE scheduling into vgic flush/sync
KVM: arm/arm64: GICv4: Use the doorbell interrupt as an unblocking source
KVM: arm/arm64: GICv4: Add doorbell interrupt handling
KVM: arm/arm64: GICv4: Use pending_last as a scheduling hint
KVM: arm/arm64: GICv4: Handle INVALL applied to a vPE
KVM: arm/arm64: GICv4: Propagate property updates to VLPIs
KVM: arm/arm64: GICv4: Handle MOVALL applied to a vPE
KVM: arm/arm64: GICv4: Handle CLEAR applied to a VLPI
KVM: arm/arm64: GICv4: Propagate affinity changes to the physical ITS
KVM: arm/arm64: GICv4: Unmap VLPI when freeing an LPI
...
Diffstat (limited to 'virt')
-rw-r--r-- | virt/kvm/arm/arch_timer.c | 24 | ||||
-rw-r--r-- | virt/kvm/arm/arm.c | 48 | ||||
-rw-r--r-- | virt/kvm/arm/hyp/vgic-v3-sr.c | 9 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-init.c | 7 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-its.c | 204 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-mmio-v3.c | 5 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-v3.c | 14 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-v4.c | 364 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic.c | 67 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic.h | 10 |
10 files changed, 641 insertions, 111 deletions
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 4db54ff08d9e..4151250ce8da 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -817,9 +817,6 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); - struct irq_desc *desc; - struct irq_data *data; - int phys_irq; int ret; if (timer->enabled) @@ -837,26 +834,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu) return -EINVAL; } - /* - * Find the physical IRQ number corresponding to the host_vtimer_irq - */ - desc = irq_to_desc(host_vtimer_irq); - if (!desc) { - kvm_err("%s: no interrupt descriptor\n", __func__); - return -EINVAL; - } - - data = irq_desc_get_irq_data(desc); - while (data->parent_data) - data = data->parent_data; - - phys_irq = data->hwirq; - - /* - * Tell the VGIC that the virtual interrupt is tied to a - * physical interrupt. We do that once per VCPU. - */ - ret = kvm_vgic_map_phys_irq(vcpu, vtimer->irq.irq, phys_irq); + ret = kvm_vgic_map_phys_irq(vcpu, host_vtimer_irq, vtimer->irq.irq); if (ret) return ret; diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 772bf74ac2e9..a6524ff27de4 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -27,6 +27,8 @@ #include <linux/mman.h> #include <linux/sched.h> #include <linux/kvm.h> +#include <linux/kvm_irqfd.h> +#include <linux/irqbypass.h> #include <trace/events/kvm.h> #include <kvm/arm_pmu.h> @@ -175,6 +177,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm) { int i; + kvm_vgic_destroy(kvm); + free_percpu(kvm->arch.last_vcpu_ran); kvm->arch.last_vcpu_ran = NULL; @@ -184,8 +188,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm) kvm->vcpus[i] = NULL; } } - - kvm_vgic_destroy(kvm); } int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) @@ -313,11 +315,13 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) { kvm_timer_schedule(vcpu); + kvm_vgic_v4_enable_doorbell(vcpu); } void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) { kvm_timer_unschedule(vcpu); + kvm_vgic_v4_disable_doorbell(vcpu); } int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) @@ -1450,6 +1454,46 @@ struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr) return NULL; } +bool kvm_arch_has_irq_bypass(void) +{ + return true; +} + +int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons, + struct irq_bypass_producer *prod) +{ + struct kvm_kernel_irqfd *irqfd = + container_of(cons, struct kvm_kernel_irqfd, consumer); + + return kvm_vgic_v4_set_forwarding(irqfd->kvm, prod->irq, + &irqfd->irq_entry); +} +void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons, + struct irq_bypass_producer *prod) +{ + struct kvm_kernel_irqfd *irqfd = + container_of(cons, struct kvm_kernel_irqfd, consumer); + + kvm_vgic_v4_unset_forwarding(irqfd->kvm, prod->irq, + &irqfd->irq_entry); +} + +void kvm_arch_irq_bypass_stop(struct irq_bypass_consumer *cons) +{ + struct kvm_kernel_irqfd *irqfd = + container_of(cons, struct kvm_kernel_irqfd, consumer); + + kvm_arm_halt_guest(irqfd->kvm); +} + +void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *cons) +{ + struct kvm_kernel_irqfd *irqfd = + container_of(cons, struct kvm_kernel_irqfd, consumer); + + kvm_arm_resume_guest(irqfd->kvm); +} + /** * Initialize Hyp-mode and memory mappings on all CPUs. */ diff --git a/virt/kvm/arm/hyp/vgic-v3-sr.c b/virt/kvm/arm/hyp/vgic-v3-sr.c index 91728faa13fd..f5c3d6d7019e 100644 --- a/virt/kvm/arm/hyp/vgic-v3-sr.c +++ b/virt/kvm/arm/hyp/vgic-v3-sr.c @@ -258,7 +258,8 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu) cpu_if->vgic_ap1r[0] = __vgic_v3_read_ap1rn(0); } } else { - if (static_branch_unlikely(&vgic_v3_cpuif_trap)) + if (static_branch_unlikely(&vgic_v3_cpuif_trap) || + cpu_if->its_vpe.its_vm) write_gicreg(0, ICH_HCR_EL2); cpu_if->vgic_elrsr = 0xffff; @@ -337,9 +338,11 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu) /* * If we need to trap system registers, we must write * ICH_HCR_EL2 anyway, even if no interrupts are being - * injected, + * injected. Same thing if GICv4 is used, as VLPI + * delivery is gated by ICH_HCR_EL2.En. */ - if (static_branch_unlikely(&vgic_v3_cpuif_trap)) + if (static_branch_unlikely(&vgic_v3_cpuif_trap) || + cpu_if->its_vpe.its_vm) write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2); } diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c index 5801261f3add..62310122ee78 100644 --- a/virt/kvm/arm/vgic/vgic-init.c +++ b/virt/kvm/arm/vgic/vgic-init.c @@ -285,6 +285,10 @@ int vgic_init(struct kvm *kvm) if (ret) goto out; + ret = vgic_v4_init(kvm); + if (ret) + goto out; + kvm_for_each_vcpu(i, vcpu, kvm) kvm_vgic_vcpu_enable(vcpu); @@ -320,6 +324,9 @@ static void kvm_vgic_dist_destroy(struct kvm *kvm) kfree(dist->spis); dist->nr_spis = 0; + + if (vgic_supports_direct_msis(kvm)) + vgic_v4_teardown(kvm); } void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index d2a99ab0ade7..1f761a9991e7 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -38,7 +38,7 @@ static int vgic_its_save_tables_v0(struct vgic_its *its); static int vgic_its_restore_tables_v0(struct vgic_its *its); static int vgic_its_commit_v0(struct vgic_its *its); static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq, - struct kvm_vcpu *filter_vcpu); + struct kvm_vcpu *filter_vcpu, bool needs_inv); /* * Creates a new (reference to a) struct vgic_irq for a given LPI. @@ -106,7 +106,7 @@ out_unlock: * However we only have those structs for mapped IRQs, so we read in * the respective config data from memory here upon mapping the LPI. */ - ret = update_lpi_config(kvm, irq, NULL); + ret = update_lpi_config(kvm, irq, NULL, false); if (ret) return ERR_PTR(ret); @@ -273,7 +273,7 @@ static struct its_collection *find_collection(struct vgic_its *its, int coll_id) * VCPU. Unconditionally applies if filter_vcpu is NULL. */ static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq, - struct kvm_vcpu *filter_vcpu) + struct kvm_vcpu *filter_vcpu, bool needs_inv) { u64 propbase = GICR_PROPBASER_ADDRESS(kvm->arch.vgic.propbaser); u8 prop; @@ -292,11 +292,17 @@ static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq, irq->priority = LPI_PROP_PRIORITY(prop); irq->enabled = LPI_PROP_ENABLE_BIT(prop); - vgic_queue_irq_unlock(kvm, irq, flags); - } else { - spin_unlock_irqrestore(&irq->irq_lock, flags); + if (!irq->hw) { + vgic_queue_irq_unlock(kvm, irq, flags); + return 0; + } } + spin_unlock_irqrestore(&irq->irq_lock, flags); + + if (irq->hw) + return its_prop_update_vlpi(irq->host_irq, prop, needs_inv); + return 0; } @@ -336,6 +342,29 @@ static int vgic_copy_lpi_list(struct kvm_vcpu *vcpu, u32 **intid_ptr) return i; } +static int update_affinity(struct vgic_irq *irq, struct kvm_vcpu *vcpu) +{ + int ret = 0; + + spin_lock(&irq->irq_lock); + irq->target_vcpu = vcpu; + spin_unlock(&irq->irq_lock); + + if (irq->hw) { + struct its_vlpi_map map; + + ret = its_get_vlpi(irq->host_irq, &map); + if (ret) + return ret; + + map.vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe; + + ret = its_map_vlpi(irq->host_irq, &map); + } + + return ret; +} + /* * Promotes the ITS view of affinity of an ITTE (which redistributor this LPI * is targeting) to the VGIC's view, which deals with target VCPUs. @@ -350,10 +379,7 @@ static void update_affinity_ite(struct kvm *kvm, struct its_ite *ite) return; vcpu = kvm_get_vcpu(kvm, ite->collection->target_addr); - - spin_lock(&ite->irq->irq_lock); - ite->irq->target_vcpu = vcpu; - spin_unlock(&ite->irq->irq_lock); + update_affinity(ite->irq, vcpu); } /* @@ -505,19 +531,11 @@ static unsigned long vgic_mmio_read_its_idregs(struct kvm *kvm, return 0; } -/* - * Find the target VCPU and the LPI number for a given devid/eventid pair - * and make this IRQ pending, possibly injecting it. - * Must be called with the its_lock mutex held. - * Returns 0 on success, a positive error value for any ITS mapping - * related errors and negative error values for generic errors. - */ -static int vgic_its_trigger_msi(struct kvm *kvm, struct vgic_its *its, - u32 devid, u32 eventid) +int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its, + u32 devid, u32 eventid, struct vgic_irq **irq) { struct kvm_vcpu *vcpu; struct its_ite *ite; - unsigned long flags; if (!its->enabled) return -EBUSY; @@ -533,26 +551,65 @@ static int vgic_its_trigger_msi(struct kvm *kvm, struct vgic_its *its, if (!vcpu->arch.vgic_cpu.lpis_enabled) return -EBUSY; - spin_lock_irqsave(&ite->irq->irq_lock, flags); - ite->irq->pending_latch = true; - vgic_queue_irq_unlock(kvm, ite->irq, flags); - + *irq = ite->irq; return 0; } -static struct vgic_io_device *vgic_get_its_iodev(struct kvm_io_device *dev) +struct vgic_its *vgic_msi_to_its(struct kvm *kvm, struct kvm_msi *msi) { + u64 address; + struct kvm_io_device *kvm_io_dev; struct vgic_io_device *iodev; - if (dev->ops != &kvm_io_gic_ops) - return NULL; + if (!vgic_has_its(kvm)) + return ERR_PTR(-ENODEV); - iodev = container_of(dev, struct vgic_io_device, dev); + if (!(msi->flags & KVM_MSI_VALID_DEVID)) + return ERR_PTR(-EINVAL); + address = (u64)msi->address_hi << 32 | msi->address_lo; + + kvm_io_dev = kvm_io_bus_get_dev(kvm, KVM_MMIO_BUS, address); + if (!kvm_io_dev) + return ERR_PTR(-EINVAL); + + if (kvm_io_dev->ops != &kvm_io_gic_ops) + return ERR_PTR(-EINVAL); + + iodev = container_of(kvm_io_dev, struct vgic_io_device, dev); if (iodev->iodev_type != IODEV_ITS) - return NULL; + return ERR_PTR(-EINVAL); + + return iodev->its; +} + +/* + * Find the target VCPU and the LPI number for a given devid/eventid pair + * and make this IRQ pending, possibly injecting it. + * Must be called with the its_lock mutex held. + * Returns 0 on success, a positive error value for any ITS mapping + * related errors and negative error values for generic errors. + */ +static int vgic_its_trigger_msi(struct kvm *kvm, struct vgic_its *its, + u32 devid, u32 eventid) +{ + struct vgic_irq *irq = NULL; + unsigned long flags; + int err; + + err = vgic_its_resolve_lpi(kvm, its, devid, eventid, &irq); + if (err) + return err; + + if (irq->hw) + return irq_set_irqchip_state(irq->host_irq, + IRQCHIP_STATE_PENDING, true); + + spin_lock_irqsave(&irq->irq_lock, flags); + irq->pending_latch = true; + vgic_queue_irq_unlock(kvm, irq, flags); - return iodev; + return 0; } /* @@ -563,30 +620,16 @@ static struct vgic_io_device *vgic_get_its_iodev(struct kvm_io_device *dev) */ int vgic_its_inject_msi(struct kvm *kvm, struct kvm_msi *msi) { - u64 address; - struct kvm_io_device *kvm_io_dev; - struct vgic_io_device *iodev; + struct vgic_its *its; int ret; - if (!vgic_has_its(kvm)) - return -ENODEV; - - if (!(msi->flags & KVM_MSI_VALID_DEVID)) - return -EINVAL; + its = vgic_msi_to_its(kvm, msi); + if (IS_ERR(its)) + return PTR_ERR(its); - address = (u64)msi->address_hi << 32 | msi->address_lo; - - kvm_io_dev = kvm_io_bus_get_dev(kvm, KVM_MMIO_BUS, address); - if (!kvm_io_dev) - return -EINVAL; - - iodev = vgic_get_its_iodev(kvm_io_dev); - if (!iodev) - return -EINVAL; - - mutex_lock(&iodev->its->its_lock); - ret = vgic_its_trigger_msi(kvm, iodev->its, msi->devid, msi->data); - mutex_unlock(&iodev->its->its_lock); + mutex_lock(&its->its_lock); + ret = vgic_its_trigger_msi(kvm, its, msi->devid, msi->data); + mutex_unlock(&its->its_lock); if (ret < 0) return ret; @@ -608,8 +651,12 @@ static void its_free_ite(struct kvm *kvm, struct its_ite *ite) list_del(&ite->ite_list); /* This put matches the get in vgic_add_lpi. */ - if (ite->irq) + if (ite->irq) { + if (ite->irq->hw) + WARN_ON(its_unmap_vlpi(ite->irq->host_irq)); + vgic_put_irq(kvm, ite->irq); + } kfree(ite); } @@ -683,11 +730,7 @@ static int vgic_its_cmd_handle_movi(struct kvm *kvm, struct vgic_its *its, ite->collection = collection; vcpu = kvm_get_vcpu(kvm, collection->target_addr); - spin_lock(&ite->irq->irq_lock); - ite->irq->target_vcpu = vcpu; - spin_unlock(&ite->irq->irq_lock); - - return 0; + return update_affinity(ite->irq, vcpu); } /* @@ -1054,6 +1097,10 @@ static int vgic_its_cmd_handle_clear(struct kvm *kvm, struct vgic_its *its, ite->irq->pending_latch = false; + if (ite->irq->hw) + return irq_set_irqchip_state(ite->irq->host_irq, + IRQCHIP_STATE_PENDING, false); + return 0; } @@ -1073,7 +1120,7 @@ static int vgic_its_cmd_handle_inv(struct kvm *kvm, struct vgic_its *its, if (!ite) return E_ITS_INV_UNMAPPED_INTERRUPT; - return update_lpi_config(kvm, ite->irq, NULL); + return update_lpi_config(kvm, ite->irq, NULL, true); } /* @@ -1108,12 +1155,15 @@ static int vgic_its_cmd_handle_invall(struct kvm *kvm, struct vgic_its *its, irq = vgic_get_irq(kvm, NULL, intids[i]); if (!irq) continue; - update_lpi_config(kvm, irq, vcpu); + update_lpi_config(kvm, irq, vcpu, false); vgic_put_irq(kvm, irq); } kfree(intids); + if (vcpu->arch.vgic_cpu.vgic_v3.its_vpe.its_vm) + its_invall_vpe(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe); + return 0; } @@ -1128,11 +1178,12 @@ static int vgic_its_cmd_handle_invall(struct kvm *kvm, struct vgic_its *its, static int vgic_its_cmd_handle_movall(struct kvm *kvm, struct vgic_its *its, u64 *its_cmd) { - struct vgic_dist *dist = &kvm->arch.vgic; u32 target1_addr = its_cmd_get_target_addr(its_cmd); u32 target2_addr = its_cmd_mask_field(its_cmd, 3, 16, 32); struct kvm_vcpu *vcpu1, *vcpu2; struct vgic_irq *irq; + u32 *intids; + int irq_count, i; if (target1_addr >= atomic_read(&kvm->online_vcpus) || target2_addr >= atomic_read(&kvm->online_vcpus)) @@ -1144,19 +1195,19 @@ static int vgic_its_cmd_handle_movall(struct kvm *kvm, struct vgic_its *its, vcpu1 = kvm_get_vcpu(kvm, target1_addr); vcpu2 = kvm_get_vcpu(kvm, target2_addr); - spin_lock(&dist->lpi_list_lock); + irq_count = vgic_copy_lpi_list(vcpu1, &intids); + if (irq_count < 0) + return irq_count; - list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) { - spin_lock(&irq->irq_lock); + for (i = 0; i < irq_count; i++) { + irq = vgic_get_irq(kvm, NULL, intids[i]); - if (irq->target_vcpu == vcpu1) - irq->target_vcpu = vcpu2; + update_affinity(irq, vcpu2); - spin_unlock(&irq->irq_lock); + vgic_put_irq(kvm, irq); } - spin_unlock(&dist->lpi_list_lock); - + kfree(intids); return 0; } @@ -1634,6 +1685,14 @@ static int vgic_its_create(struct kvm_device *dev, u32 type) if (!its) return -ENOMEM; + if (vgic_initialized(dev->kvm)) { + int ret = vgic_v4_init(dev->kvm); + if (ret < 0) { + kfree(its); + return ret; + } + } + mutex_init(&its->its_lock); mutex_init(&its->cmd_lock); @@ -1946,6 +2005,15 @@ static int vgic_its_save_itt(struct vgic_its *its, struct its_device *device) list_for_each_entry(ite, &device->itt_head, ite_list) { gpa_t gpa = base + ite->event_id * ite_esz; + /* + * If an LPI carries the HW bit, this means that this + * interrupt is controlled by GICv4, and we do not + * have direct access to that state. Let's simply fail + * the save operation... + */ + if (ite->irq->hw) + return -EACCES; + ret = vgic_its_save_ite(its, device, ite, gpa, ite_esz); if (ret) return ret; diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c index 83786108829e..671fe81f8e1d 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v3.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c @@ -54,6 +54,11 @@ bool vgic_has_its(struct kvm *kvm) return dist->has_its; } +bool vgic_supports_direct_msis(struct kvm *kvm) +{ + return kvm_vgic_global_state.has_gicv4 && vgic_has_its(kvm); +} + static unsigned long vgic_mmio_read_v3_misc(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len) { diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index 863351c090d8..2f05f732d3fd 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -24,6 +24,7 @@ static bool group0_trap; static bool group1_trap; static bool common_trap; +static bool gicv4_enable; void vgic_v3_set_underflow(struct kvm_vcpu *vcpu) { @@ -461,6 +462,12 @@ static int __init early_common_trap_cfg(char *buf) } early_param("kvm-arm.vgic_v3_common_trap", early_common_trap_cfg); +static int __init early_gicv4_enable(char *buf) +{ + return strtobool(buf, &gicv4_enable); +} +early_param("kvm-arm.vgic_v4_enable", early_gicv4_enable); + /** * vgic_v3_probe - probe for a GICv3 compatible interrupt controller in DT * @node: pointer to the DT node @@ -480,6 +487,13 @@ int vgic_v3_probe(const struct gic_kvm_info *info) kvm_vgic_global_state.can_emulate_gicv2 = false; kvm_vgic_global_state.ich_vtr_el2 = ich_vtr_el2; + /* GICv4 support? */ + if (info->has_v4) { + kvm_vgic_global_state.has_gicv4 = gicv4_enable; + kvm_info("GICv4 support %sabled\n", + gicv4_enable ? "en" : "dis"); + } + if (!info->vcpu.start) { kvm_info("GICv3: no GICV resource entry\n"); kvm_vgic_global_state.vcpu_base = 0; diff --git a/virt/kvm/arm/vgic/vgic-v4.c b/virt/kvm/arm/vgic/vgic-v4.c new file mode 100644 index 000000000000..53c324aa44ef --- /dev/null +++ b/virt/kvm/arm/vgic/vgic-v4.c @@ -0,0 +1,364 @@ +/* + * Copyright (C) 2017 ARM Ltd. + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/irqdomain.h> +#include <linux/kvm_host.h> +#include <linux/irqchip/arm-gic-v3.h> + +#include "vgic.h" + +/* + * How KVM uses GICv4 (insert rude comments here): + * + * The vgic-v4 layer acts as a bridge between several entities: + * - The GICv4 ITS representation offered by the ITS driver + * - VFIO, which is in charge of the PCI endpoint + * - The virtual ITS, which is the only thing the guest sees + * + * The configuration of VLPIs is triggered by a callback from VFIO, + * instructing KVM that a PCI device has been configured to deliver + * MSIs to a vITS. + * + * kvm_vgic_v4_set_forwarding() is thus called with the routing entry, + * and this is used to find the corresponding vITS data structures + * (ITS instance, device, event and irq) using a process that is + * extremely similar to the injection of an MSI. + * + * At this stage, we can link the guest's view of an LPI (uniquely + * identified by the routing entry) and the host irq, using the GICv4 + * driver mapping operation. Should the mapping succeed, we've then + * successfully upgraded the guest's LPI to a VLPI. We can then start + * with updating GICv4's view of the property table and generating an + * INValidation in order to kickstart the delivery of this VLPI to the + * guest directly, without software intervention. Well, almost. + * + * When the PCI endpoint is deconfigured, this operation is reversed + * with VFIO calling kvm_vgic_v4_unset_forwarding(). + * + * Once the VLPI has been mapped, it needs to follow any change the + * guest performs on its LPI through the vITS. For that, a number of + * command handlers have hooks to communicate these changes to the HW: + * - Any invalidation triggers a call to its_prop_update_vlpi() + * - The INT command results in a irq_set_irqchip_state(), which + * generates an INT on the corresponding VLPI. + * - The CLEAR command results in a irq_set_irqchip_state(), which + * generates an CLEAR on the corresponding VLPI. + * - DISCARD translates into an unmap, similar to a call to + * kvm_vgic_v4_unset_forwarding(). + * - MOVI is translated by an update of the existing mapping, changing + * the target vcpu, resulting in a VMOVI being generated. + * - MOVALL is translated by a string of mapping updates (similar to + * the handling of MOVI). MOVALL is horrible. + * + * Note that a DISCARD/MAPTI sequence emitted from the guest without + * reprogramming the PCI endpoint after MAPTI does not result in a + * VLPI being mapped, as there is no callback from VFIO (the guest + * will get the interrupt via the normal SW injection). Fixing this is + * not trivial, and requires some horrible messing with the VFIO + * internals. Not fun. Don't do that. + * + * Then there is the scheduling. Each time a vcpu is about to run on a + * physical CPU, KVM must tell the corresponding redistributor about + * it. And if we've migrated our vcpu from one CPU to another, we must + * tell the ITS (so that the messages reach the right redistributor). + * This is done in two steps: first issue a irq_set_affinity() on the + * irq corresponding to the vcpu, then call its_schedule_vpe(). You + * must be in a non-preemptible context. On exit, another call to + * its_schedule_vpe() tells the redistributor that we're done with the + * vcpu. + * + * Finally, the doorbell handling: Each vcpu is allocated an interrupt + * which will fire each time a VLPI is made pending whilst the vcpu is + * not running. Each time the vcpu gets blocked, the doorbell + * interrupt gets enabled. When the vcpu is unblocked (for whatever + * reason), the doorbell interrupt is disabled. + */ + +#define DB_IRQ_FLAGS (IRQ_NOAUTOEN | IRQ_DISABLE_UNLAZY | IRQ_NO_BALANCING) + +static irqreturn_t vgic_v4_doorbell_handler(int irq, void *info) +{ + struct kvm_vcpu *vcpu = info; + + vcpu->arch.vgic_cpu.vgic_v3.its_vpe.pending_last = true; + kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); + kvm_vcpu_kick(vcpu); + + return IRQ_HANDLED; +} + +/** + * vgic_v4_init - Initialize the GICv4 data structures + * @kvm: Pointer to the VM being initialized + * + * We may be called each time a vITS is created, or when the + * vgic is initialized. This relies on kvm->lock to be + * held. In both cases, the number of vcpus should now be + * fixed. + */ +int vgic_v4_init(struct kvm *kvm) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct kvm_vcpu *vcpu; + int i, nr_vcpus, ret; + + if (!vgic_supports_direct_msis(kvm)) + return 0; /* Nothing to see here... move along. */ + + if (dist->its_vm.vpes) + return 0; + + nr_vcpus = atomic_read(&kvm->online_vcpus); + + dist->its_vm.vpes = kzalloc(sizeof(*dist->its_vm.vpes) * nr_vcpus, + GFP_KERNEL); + if (!dist->its_vm.vpes) + return -ENOMEM; + + dist->its_vm.nr_vpes = nr_vcpus; + + kvm_for_each_vcpu(i, vcpu, kvm) + dist->its_vm.vpes[i] = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe; + + ret = its_alloc_vcpu_irqs(&dist->its_vm); + if (ret < 0) { + kvm_err("VPE IRQ allocation failure\n"); + kfree(dist->its_vm.vpes); + dist->its_vm.nr_vpes = 0; + dist->its_vm.vpes = NULL; + return ret; + } + + kvm_for_each_vcpu(i, vcpu, kvm) { + int irq = dist->its_vm.vpes[i]->irq; + + /* + * Don't automatically enable the doorbell, as we're + * flipping it back and forth when the vcpu gets + * blocked. Also disable the lazy disabling, as the + * doorbell could kick us out of the guest too + * early... + */ + irq_set_status_flags(irq, DB_IRQ_FLAGS); + ret = request_irq(irq, vgic_v4_doorbell_handler, + 0, "vcpu", vcpu); + if (ret) { + kvm_err("failed to allocate vcpu IRQ%d\n", irq); + /* + * Trick: adjust the number of vpes so we know + * how many to nuke on teardown... + */ + dist->its_vm.nr_vpes = i; + break; + } + } + + if (ret) + vgic_v4_teardown(kvm); + + return ret; +} + +/** + * vgic_v4_teardown - Free the GICv4 data structures + * @kvm: Pointer to the VM being destroyed + * + * Relies on kvm->lock to be held. + */ +void vgic_v4_teardown(struct kvm *kvm) +{ + struct its_vm *its_vm = &kvm->arch.vgic.its_vm; + int i; + + if (!its_vm->vpes) + return; + + for (i = 0; i < its_vm->nr_vpes; i++) { + struct kvm_vcpu *vcpu = kvm_get_vcpu(kvm, i); + int irq = its_vm->vpes[i]->irq; + + irq_clear_status_flags(irq, DB_IRQ_FLAGS); + free_irq(irq, vcpu); + } + + its_free_vcpu_irqs(its_vm); + kfree(its_vm->vpes); + its_vm->nr_vpes = 0; + its_vm->vpes = NULL; +} + +int vgic_v4_sync_hwstate(struct kvm_vcpu *vcpu) +{ + if (!vgic_supports_direct_msis(vcpu->kvm)) + return 0; + + return its_schedule_vpe(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe, false); +} + +int vgic_v4_flush_hwstate(struct kvm_vcpu *vcpu) +{ + int irq = vcpu->arch.vgic_cpu.vgic_v3.its_vpe.irq; + int err; + + if (!vgic_supports_direct_msis(vcpu->kvm)) + return 0; + + /* + * Before making the VPE resident, make sure the redistributor + * corresponding to our current CPU expects us here. See the + * doc in drivers/irqchip/irq-gic-v4.c to understand how this + * turns into a VMOVP command at the ITS level. + */ + err = irq_set_affinity(irq, cpumask_of(smp_processor_id())); + if (err) + return err; + + err = its_schedule_vpe(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe, true); + if (err) + return err; + + /* + * Now that the VPE is resident, let's get rid of a potential + * doorbell interrupt that would still be pending. + */ + err = irq_set_irqchip_state(irq, IRQCHIP_STATE_PENDING, false); + + return err; +} + +static struct vgic_its *vgic_get_its(struct kvm *kvm, + struct kvm_kernel_irq_routing_entry *irq_entry) +{ + struct kvm_msi msi = (struct kvm_msi) { + .address_lo = irq_entry->msi.address_lo, + .address_hi = irq_entry->msi.address_hi, + .data = irq_entry->msi.data, + .flags = irq_entry->msi.flags, + .devid = irq_entry->msi.devid, + }; + + return vgic_msi_to_its(kvm, &msi); +} + +int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int virq, + struct kvm_kernel_irq_routing_entry *irq_entry) +{ + struct vgic_its *its; + struct vgic_irq *irq; + struct its_vlpi_map map; + int ret; + + if (!vgic_supports_direct_msis(kvm)) + return 0; + + /* + * Get the ITS, and escape early on error (not a valid + * doorbell for any of our vITSs). + */ + its = vgic_get_its(kvm, irq_entry); + if (IS_ERR(its)) + return 0; + + mutex_lock(&its->its_lock); + + /* Perform then actual DevID/EventID -> LPI translation. */ + ret = vgic_its_resolve_lpi(kvm, its, irq_entry->msi.devid, + irq_entry->msi.data, &irq); + if (ret) + goto out; + + /* + * Emit the mapping request. If it fails, the ITS probably + * isn't v4 compatible, so let's silently bail out. Holding + * the ITS lock should ensure that nothing can modify the + * target vcpu. + */ + map = (struct its_vlpi_map) { + .vm = &kvm->arch.vgic.its_vm, + .vpe = &irq->target_vcpu->arch.vgic_cpu.vgic_v3.its_vpe, + .vintid = irq->intid, + .properties = ((irq->priority & 0xfc) | + (irq->enabled ? LPI_PROP_ENABLED : 0) | + LPI_PROP_GROUP1), + .db_enabled = true, + }; + + ret = its_map_vlpi(virq, &map); + if (ret) + goto out; + + irq->hw = true; + irq->host_irq = virq; + +out: + mutex_unlock(&its->its_lock); + return ret; +} + +int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int virq, + struct kvm_kernel_irq_routing_entry *irq_entry) +{ + struct vgic_its *its; + struct vgic_irq *irq; + int ret; + + if (!vgic_supports_direct_msis(kvm)) + return 0; + + /* + * Get the ITS, and escape early on error (not a valid + * doorbell for any of our vITSs). + */ + its = vgic_get_its(kvm, irq_entry); + if (IS_ERR(its)) + return 0; + + mutex_lock(&its->its_lock); + + ret = vgic_its_resolve_lpi(kvm, its, irq_entry->msi.devid, + irq_entry->msi.data, &irq); + if (ret) + goto out; + + WARN_ON(!(irq->hw && irq->host_irq == virq)); + irq->hw = false; + ret = its_unmap_vlpi(virq); + +out: + mutex_unlock(&its->its_lock); + return ret; +} + +void kvm_vgic_v4_enable_doorbell(struct kvm_vcpu *vcpu) +{ + if (vgic_supports_direct_msis(vcpu->kvm)) { + int irq = vcpu->arch.vgic_cpu.vgic_v3.its_vpe.irq; + if (irq) + enable_irq(irq); + } +} + +void kvm_vgic_v4_disable_doorbell(struct kvm_vcpu *vcpu) +{ + if (vgic_supports_direct_msis(vcpu->kvm)) { + int irq = vcpu->arch.vgic_cpu.vgic_v3.its_vpe.irq; + if (irq) + disable_irq(irq); + } +} diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c index e54ef2fdf73d..b168a328a9e0 100644 --- a/virt/kvm/arm/vgic/vgic.c +++ b/virt/kvm/arm/vgic/vgic.c @@ -17,6 +17,8 @@ #include <linux/kvm.h> #include <linux/kvm_host.h> #include <linux/list_sort.h> +#include <linux/interrupt.h> +#include <linux/irq.h> #include "vgic.h" @@ -409,25 +411,56 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid, return 0; } -int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, u32 virt_irq, u32 phys_irq) +/* @irq->irq_lock must be held */ +static int kvm_vgic_map_irq(struct kvm_vcpu *vcpu, struct vgic_irq *irq, + unsigned int host_irq) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, virt_irq); + struct irq_desc *desc; + struct irq_data *data; + + /* + * Find the physical IRQ number corresponding to @host_irq + */ + desc = irq_to_desc(host_irq); + if (!desc) { + kvm_err("%s: no interrupt descriptor\n", __func__); + return -EINVAL; + } + data = irq_desc_get_irq_data(desc); + while (data->parent_data) + data = data->parent_data; + + irq->hw = true; + irq->host_irq = host_irq; + irq->hwintid = data->hwirq; + return 0; +} + +/* @irq->irq_lock must be held */ +static inline void kvm_vgic_unmap_irq(struct vgic_irq *irq) +{ + irq->hw = false; + irq->hwintid = 0; +} + +int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq, + u32 vintid) +{ + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid); unsigned long flags; + int ret; BUG_ON(!irq); spin_lock_irqsave(&irq->irq_lock, flags); - - irq->hw = true; - irq->hwintid = phys_irq; - + ret = kvm_vgic_map_irq(vcpu, irq, host_irq); spin_unlock_irqrestore(&irq->irq_lock, flags); vgic_put_irq(vcpu->kvm, irq); - return 0; + return ret; } -int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int virt_irq) +int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid) { struct vgic_irq *irq; unsigned long flags; @@ -435,14 +468,11 @@ int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int virt_irq) if (!vgic_initialized(vcpu->kvm)) return -EAGAIN; - irq = vgic_get_irq(vcpu->kvm, vcpu, virt_irq); + irq = vgic_get_irq(vcpu->kvm, vcpu, vintid); BUG_ON(!irq); spin_lock_irqsave(&irq->irq_lock, flags); - - irq->hw = false; - irq->hwintid = 0; - + kvm_vgic_unmap_irq(irq); spin_unlock_irqrestore(&irq->irq_lock, flags); vgic_put_irq(vcpu->kvm, irq); @@ -688,6 +718,8 @@ void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + WARN_ON(vgic_v4_sync_hwstate(vcpu)); + /* An empty ap_list_head implies used_lrs == 0 */ if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head)) return; @@ -700,6 +732,8 @@ void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) /* Flush our emulation state into the GIC hardware before entering the guest. */ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) { + WARN_ON(vgic_v4_flush_hwstate(vcpu)); + /* * If there are no virtual interrupts active or pending for this * VCPU, then there is no work to do and we can bail out without @@ -751,6 +785,9 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu) if (!vcpu->kvm->arch.vgic.enabled) return false; + if (vcpu->arch.vgic_cpu.vgic_v3.its_vpe.pending_last) + return true; + spin_lock_irqsave(&vgic_cpu->ap_list_lock, flags); list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) { @@ -784,9 +821,9 @@ void vgic_kick_vcpus(struct kvm *kvm) } } -bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int virt_irq) +bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int vintid) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, virt_irq); + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid); bool map_is_active; unsigned long flags; diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h index 4f8aecb07ae6..efbcf8f96f9c 100644 --- a/virt/kvm/arm/vgic/vgic.h +++ b/virt/kvm/arm/vgic/vgic.h @@ -237,4 +237,14 @@ static inline int vgic_v3_max_apr_idx(struct kvm_vcpu *vcpu) } } +int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its, + u32 devid, u32 eventid, struct vgic_irq **irq); +struct vgic_its *vgic_msi_to_its(struct kvm *kvm, struct kvm_msi *msi); + +bool vgic_supports_direct_msis(struct kvm *kvm); +int vgic_v4_init(struct kvm *kvm); +void vgic_v4_teardown(struct kvm *kvm); +int vgic_v4_sync_hwstate(struct kvm_vcpu *vcpu); +int vgic_v4_flush_hwstate(struct kvm_vcpu *vcpu); + #endif |