diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-05-05 14:47:31 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-05-05 14:47:31 -0700 |
commit | 01227a889ed56ae53aeebb9f93be9d54dd8b2de8 (patch) | |
tree | d5eba9359a9827e84d4112b84d48c54df5c5acde /virt/kvm/kvm_main.c | |
parent | 9e6879460c8edb0cd3c24c09b83d06541b5af0dc (diff) | |
parent | db6ae6158186a17165ef990bda2895ae7594b039 (diff) | |
download | lwn-01227a889ed56ae53aeebb9f93be9d54dd8b2de8.tar.gz lwn-01227a889ed56ae53aeebb9f93be9d54dd8b2de8.zip |
Merge tag 'kvm-3.10-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm updates from Gleb Natapov:
"Highlights of the updates are:
general:
- new emulated device API
- legacy device assignment is now optional
- irqfd interface is more generic and can be shared between arches
x86:
- VMCS shadow support and other nested VMX improvements
- APIC virtualization and Posted Interrupt hardware support
- Optimize mmio spte zapping
ppc:
- BookE: in-kernel MPIC emulation with irqfd support
- Book3S: in-kernel XICS emulation (incomplete)
- Book3S: HV: migration fixes
- BookE: more debug support preparation
- BookE: e6500 support
ARM:
- reworking of Hyp idmaps
s390:
- ioeventfd for virtio-ccw
And many other bug fixes, cleanups and improvements"
* tag 'kvm-3.10-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (204 commits)
kvm: Add compat_ioctl for device control API
KVM: x86: Account for failing enable_irq_window for NMI window request
KVM: PPC: Book3S: Add API for in-kernel XICS emulation
kvm/ppc/mpic: fix missing unlock in set_base_addr()
kvm/ppc: Hold srcu lock when calling kvm_io_bus_read/write
kvm/ppc/mpic: remove users
kvm/ppc/mpic: fix mmio region lists when multiple guests used
kvm/ppc/mpic: remove default routes from documentation
kvm: KVM_CAP_IOMMU only available with device assignment
ARM: KVM: iterate over all CPUs for CPU compatibility check
KVM: ARM: Fix spelling in error message
ARM: KVM: define KVM_ARM_MAX_VCPUS unconditionally
KVM: ARM: Fix API documentation for ONE_REG encoding
ARM: KVM: promote vfp_host pointer to generic host cpu context
ARM: KVM: add architecture specific hook for capabilities
ARM: KVM: perform HYP initilization for hotplugged CPUs
ARM: KVM: switch to a dual-step HYP init code
ARM: KVM: rework HYP page table freeing
ARM: KVM: enforce maximum size for identity mapped code
ARM: KVM: move to a KVM provided HYP idmap
...
Diffstat (limited to 'virt/kvm/kvm_main.c')
-rw-r--r-- | virt/kvm/kvm_main.c | 258 |
1 files changed, 211 insertions, 47 deletions
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index f18013f09e68..45f09362ee7b 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -217,9 +217,9 @@ void kvm_make_mclock_inprogress_request(struct kvm *kvm) make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS); } -void kvm_make_update_eoibitmap_request(struct kvm *kvm) +void kvm_make_scan_ioapic_request(struct kvm *kvm) { - make_all_cpus_request(kvm, KVM_REQ_EOIBITMAP); + make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC); } int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) @@ -244,6 +244,7 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) kvm_vcpu_set_in_spin_loop(vcpu, false); kvm_vcpu_set_dy_eligible(vcpu, false); + vcpu->preempted = false; r = kvm_arch_vcpu_init(vcpu); if (r < 0) @@ -503,6 +504,7 @@ static struct kvm *kvm_create_vm(unsigned long type) mutex_init(&kvm->irq_lock); mutex_init(&kvm->slots_lock); atomic_set(&kvm->users_count, 1); + INIT_LIST_HEAD(&kvm->devices); r = kvm_init_mmu_notifier(kvm); if (r) @@ -580,6 +582,19 @@ void kvm_free_physmem(struct kvm *kvm) kfree(kvm->memslots); } +static void kvm_destroy_devices(struct kvm *kvm) +{ + struct list_head *node, *tmp; + + list_for_each_safe(node, tmp, &kvm->devices) { + struct kvm_device *dev = + list_entry(node, struct kvm_device, vm_node); + + list_del(node); + dev->ops->destroy(dev); + } +} + static void kvm_destroy_vm(struct kvm *kvm) { int i; @@ -599,6 +614,7 @@ static void kvm_destroy_vm(struct kvm *kvm) kvm_arch_flush_shadow_all(kvm); #endif kvm_arch_destroy_vm(kvm); + kvm_destroy_devices(kvm); kvm_free_physmem(kvm); cleanup_srcu_struct(&kvm->srcu); kvm_arch_free_vm(kvm); @@ -719,24 +735,6 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm, } /* - * KVM_SET_USER_MEMORY_REGION ioctl allows the following operations: - * - create a new memory slot - * - delete an existing memory slot - * - modify an existing memory slot - * -- move it in the guest physical memory space - * -- just change its flags - * - * Since flags can be changed by some of these operations, the following - * differentiation is the best we can do for __kvm_set_memory_region(): - */ -enum kvm_mr_change { - KVM_MR_CREATE, - KVM_MR_DELETE, - KVM_MR_MOVE, - KVM_MR_FLAGS_ONLY, -}; - -/* * Allocate some memory and give it an address in the guest physical address * space. * @@ -745,8 +743,7 @@ enum kvm_mr_change { * Must be called holding mmap_sem for write. */ int __kvm_set_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem, - bool user_alloc) + struct kvm_userspace_memory_region *mem) { int r; gfn_t base_gfn; @@ -767,7 +764,7 @@ int __kvm_set_memory_region(struct kvm *kvm, if (mem->guest_phys_addr & (PAGE_SIZE - 1)) goto out; /* We can read the guest memory with __xxx_user() later on. */ - if (user_alloc && + if ((mem->slot < KVM_USER_MEM_SLOTS) && ((mem->userspace_addr & (PAGE_SIZE - 1)) || !access_ok(VERIFY_WRITE, (void __user *)(unsigned long)mem->userspace_addr, @@ -875,7 +872,7 @@ int __kvm_set_memory_region(struct kvm *kvm, slots = old_memslots; } - r = kvm_arch_prepare_memory_region(kvm, &new, old, mem, user_alloc); + r = kvm_arch_prepare_memory_region(kvm, &new, mem, change); if (r) goto out_slots; @@ -915,7 +912,7 @@ int __kvm_set_memory_region(struct kvm *kvm, old_memslots = install_new_memslots(kvm, slots, &new); - kvm_arch_commit_memory_region(kvm, mem, old, user_alloc); + kvm_arch_commit_memory_region(kvm, mem, &old, change); kvm_free_physmem_slot(&old, &new); kfree(old_memslots); @@ -932,26 +929,23 @@ out: EXPORT_SYMBOL_GPL(__kvm_set_memory_region); int kvm_set_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem, - bool user_alloc) + struct kvm_userspace_memory_region *mem) { int r; mutex_lock(&kvm->slots_lock); - r = __kvm_set_memory_region(kvm, mem, user_alloc); + r = __kvm_set_memory_region(kvm, mem); mutex_unlock(&kvm->slots_lock); return r; } EXPORT_SYMBOL_GPL(kvm_set_memory_region); int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, - struct - kvm_userspace_memory_region *mem, - bool user_alloc) + struct kvm_userspace_memory_region *mem) { if (mem->slot >= KVM_USER_MEM_SLOTS) return -EINVAL; - return kvm_set_memory_region(kvm, mem, user_alloc); + return kvm_set_memory_region(kvm, mem); } int kvm_get_dirty_log(struct kvm *kvm, @@ -1099,7 +1093,7 @@ static int kvm_read_hva_atomic(void *data, void __user *hva, int len) return __copy_from_user_inatomic(data, hva, len); } -int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm, +static int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, int write, struct page **page) { int flags = FOLL_TOUCH | FOLL_NOWAIT | FOLL_HWPOISON | FOLL_GET; @@ -1719,6 +1713,7 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu) smp_send_reschedule(cpu); put_cpu(); } +EXPORT_SYMBOL_GPL(kvm_vcpu_kick); #endif /* !CONFIG_S390 */ void kvm_resched(struct kvm_vcpu *vcpu) @@ -1816,6 +1811,8 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me) continue; } else if (pass && i > last_boosted_vcpu) break; + if (!ACCESS_ONCE(vcpu->preempted)) + continue; if (vcpu == me) continue; if (waitqueue_active(&vcpu->wq)) @@ -2204,6 +2201,119 @@ out: } #endif +static int kvm_device_ioctl_attr(struct kvm_device *dev, + int (*accessor)(struct kvm_device *dev, + struct kvm_device_attr *attr), + unsigned long arg) +{ + struct kvm_device_attr attr; + + if (!accessor) + return -EPERM; + + if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) + return -EFAULT; + + return accessor(dev, &attr); +} + +static long kvm_device_ioctl(struct file *filp, unsigned int ioctl, + unsigned long arg) +{ + struct kvm_device *dev = filp->private_data; + + switch (ioctl) { + case KVM_SET_DEVICE_ATTR: + return kvm_device_ioctl_attr(dev, dev->ops->set_attr, arg); + case KVM_GET_DEVICE_ATTR: + return kvm_device_ioctl_attr(dev, dev->ops->get_attr, arg); + case KVM_HAS_DEVICE_ATTR: + return kvm_device_ioctl_attr(dev, dev->ops->has_attr, arg); + default: + if (dev->ops->ioctl) + return dev->ops->ioctl(dev, ioctl, arg); + + return -ENOTTY; + } +} + +static int kvm_device_release(struct inode *inode, struct file *filp) +{ + struct kvm_device *dev = filp->private_data; + struct kvm *kvm = dev->kvm; + + kvm_put_kvm(kvm); + return 0; +} + +static const struct file_operations kvm_device_fops = { + .unlocked_ioctl = kvm_device_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = kvm_device_ioctl, +#endif + .release = kvm_device_release, +}; + +struct kvm_device *kvm_device_from_filp(struct file *filp) +{ + if (filp->f_op != &kvm_device_fops) + return NULL; + + return filp->private_data; +} + +static int kvm_ioctl_create_device(struct kvm *kvm, + struct kvm_create_device *cd) +{ + struct kvm_device_ops *ops = NULL; + struct kvm_device *dev; + bool test = cd->flags & KVM_CREATE_DEVICE_TEST; + int ret; + + switch (cd->type) { +#ifdef CONFIG_KVM_MPIC + case KVM_DEV_TYPE_FSL_MPIC_20: + case KVM_DEV_TYPE_FSL_MPIC_42: + ops = &kvm_mpic_ops; + break; +#endif +#ifdef CONFIG_KVM_XICS + case KVM_DEV_TYPE_XICS: + ops = &kvm_xics_ops; + break; +#endif + default: + return -ENODEV; + } + + if (test) + return 0; + + dev = kzalloc(sizeof(*dev), GFP_KERNEL); + if (!dev) + return -ENOMEM; + + dev->ops = ops; + dev->kvm = kvm; + + ret = ops->create(dev, cd->type); + if (ret < 0) { + kfree(dev); + return ret; + } + + ret = anon_inode_getfd(ops->name, &kvm_device_fops, dev, O_RDWR); + if (ret < 0) { + ops->destroy(dev); + return ret; + } + + list_add(&dev->vm_node, &kvm->devices); + kvm_get_kvm(kvm); + cd->fd = ret; + return 0; +} + static long kvm_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -2225,7 +2335,7 @@ static long kvm_vm_ioctl(struct file *filp, sizeof kvm_userspace_mem)) goto out; - r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem, true); + r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem); break; } case KVM_GET_DIRTY_LOG: { @@ -2304,7 +2414,8 @@ static long kvm_vm_ioctl(struct file *filp, if (copy_from_user(&irq_event, argp, sizeof irq_event)) goto out; - r = kvm_vm_ioctl_irq_line(kvm, &irq_event); + r = kvm_vm_ioctl_irq_line(kvm, &irq_event, + ioctl == KVM_IRQ_LINE_STATUS); if (r) goto out; @@ -2318,6 +2429,54 @@ static long kvm_vm_ioctl(struct file *filp, break; } #endif +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING + case KVM_SET_GSI_ROUTING: { + struct kvm_irq_routing routing; + struct kvm_irq_routing __user *urouting; + struct kvm_irq_routing_entry *entries; + + r = -EFAULT; + if (copy_from_user(&routing, argp, sizeof(routing))) + goto out; + r = -EINVAL; + if (routing.nr >= KVM_MAX_IRQ_ROUTES) + goto out; + if (routing.flags) + goto out; + r = -ENOMEM; + entries = vmalloc(routing.nr * sizeof(*entries)); + if (!entries) + goto out; + r = -EFAULT; + urouting = argp; + if (copy_from_user(entries, urouting->entries, + routing.nr * sizeof(*entries))) + goto out_free_irq_routing; + r = kvm_set_irq_routing(kvm, entries, routing.nr, + routing.flags); + out_free_irq_routing: + vfree(entries); + break; + } +#endif /* CONFIG_HAVE_KVM_IRQ_ROUTING */ + case KVM_CREATE_DEVICE: { + struct kvm_create_device cd; + + r = -EFAULT; + if (copy_from_user(&cd, argp, sizeof(cd))) + goto out; + + r = kvm_ioctl_create_device(kvm, &cd); + if (r) + goto out; + + r = -EFAULT; + if (copy_to_user(argp, &cd, sizeof(cd))) + goto out; + + r = 0; + break; + } default: r = kvm_arch_vm_ioctl(filp, ioctl, arg); if (r == -ENOTTY) @@ -2447,8 +2606,11 @@ static long kvm_dev_ioctl_check_extension_generic(long arg) #ifdef CONFIG_HAVE_KVM_MSI case KVM_CAP_SIGNAL_MSI: #endif +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING + case KVM_CAP_IRQFD_RESAMPLE: +#endif return 1; -#ifdef KVM_CAP_IRQ_ROUTING +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING case KVM_CAP_IRQ_ROUTING: return KVM_MAX_IRQ_ROUTES; #endif @@ -2618,14 +2780,6 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, return NOTIFY_OK; } - -asmlinkage void kvm_spurious_fault(void) -{ - /* Fault while not rebooting. We want the trace. */ - BUG(); -} -EXPORT_SYMBOL_GPL(kvm_spurious_fault); - static int kvm_reboot(struct notifier_block *notifier, unsigned long val, void *v) { @@ -2658,7 +2812,7 @@ static void kvm_io_bus_destroy(struct kvm_io_bus *bus) kfree(bus); } -int kvm_io_bus_sort_cmp(const void *p1, const void *p2) +static int kvm_io_bus_sort_cmp(const void *p1, const void *p2) { const struct kvm_io_range *r1 = p1; const struct kvm_io_range *r2 = p2; @@ -2670,7 +2824,7 @@ int kvm_io_bus_sort_cmp(const void *p1, const void *p2) return 0; } -int kvm_io_bus_insert_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev, +static int kvm_io_bus_insert_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev, gpa_t addr, int len) { bus->range[bus->dev_count++] = (struct kvm_io_range) { @@ -2685,7 +2839,7 @@ int kvm_io_bus_insert_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev, return 0; } -int kvm_io_bus_get_first_dev(struct kvm_io_bus *bus, +static int kvm_io_bus_get_first_dev(struct kvm_io_bus *bus, gpa_t addr, int len) { struct kvm_io_range *range, key; @@ -2929,6 +3083,8 @@ struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn) static void kvm_sched_in(struct preempt_notifier *pn, int cpu) { struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn); + if (vcpu->preempted) + vcpu->preempted = false; kvm_arch_vcpu_load(vcpu, cpu); } @@ -2938,6 +3094,8 @@ static void kvm_sched_out(struct preempt_notifier *pn, { struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn); + if (current->state == TASK_RUNNING) + vcpu->preempted = true; kvm_arch_vcpu_put(vcpu); } @@ -2947,6 +3105,9 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, int r; int cpu; + r = kvm_irqfd_init(); + if (r) + goto out_irqfd; r = kvm_arch_init(opaque); if (r) goto out_fail; @@ -3027,6 +3188,8 @@ out_free_0a: out_free_0: kvm_arch_exit(); out_fail: + kvm_irqfd_exit(); +out_irqfd: return r; } EXPORT_SYMBOL_GPL(kvm_init); @@ -3043,6 +3206,7 @@ void kvm_exit(void) on_each_cpu(hardware_disable_nolock, NULL, 1); kvm_arch_hardware_unsetup(); kvm_arch_exit(); + kvm_irqfd_exit(); free_cpumask_var(cpus_hardware_enabled); } EXPORT_SYMBOL_GPL(kvm_exit); |