From 42c4e0c77ac91505ab94284b14025e3a0865c0a5 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 30 Sep 2013 14:20:07 +0530 Subject: ARM/ARM64: KVM: Implement KVM_ARM_PREFERRED_TARGET ioctl For implementing CPU=host, we need a mechanism for querying preferred VCPU target type on underlying Host. This patch implements KVM_ARM_PREFERRED_TARGET vm ioctl which returns struct kvm_vcpu_init instance containing information about preferred VCPU target type and target specific features available for it. Signed-off-by: Anup Patel Signed-off-by: Pranavkumar Sawargaonkar Signed-off-by: Christoffer Dall --- include/uapi/linux/kvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 99c25338ede8..e32e776f20c0 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1012,6 +1012,7 @@ struct kvm_s390_ucas_mapping { /* VM is being stopped by host */ #define KVM_KVMCLOCK_CTRL _IO(KVMIO, 0xad) #define KVM_ARM_VCPU_INIT _IOW(KVMIO, 0xae, struct kvm_vcpu_init) +#define KVM_ARM_PREFERRED_TARGET _IOR(KVMIO, 0xaf, struct kvm_vcpu_init) #define KVM_GET_REG_LIST _IOWR(KVMIO, 0xb0, struct kvm_reg_list) #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) -- cgit v1.2.3 From cbbc58d4fdfab1a39a6ac1b41fcb17885952157a Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 7 Oct 2013 22:18:01 +0530 Subject: kvm: powerpc: book3s: Allow the HV and PR selection per virtual machine This moves the kvmppc_ops callbacks to be a per VM entity. This enables us to select HV and PR mode when creating a VM. We also allow both kvm-hv and kvm-pr kernel module to be loaded. To achieve this we move /dev/kvm ownership to kvm.ko module. Depending on which KVM mode we select during VM creation we take a reference count on respective module Signed-off-by: Aneesh Kumar K.V [agraf: fix coding style] Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_host.h | 1 + arch/powerpc/include/asm/kvm_ppc.h | 7 +-- arch/powerpc/kvm/44x.c | 7 ++- arch/powerpc/kvm/book3s.c | 89 +++++++++++++++++++++++++------------ arch/powerpc/kvm/book3s.h | 2 + arch/powerpc/kvm/book3s_hv.c | 18 ++++---- arch/powerpc/kvm/book3s_pr.c | 25 +++++++---- arch/powerpc/kvm/book3s_xics.c | 2 +- arch/powerpc/kvm/booke.c | 22 ++++----- arch/powerpc/kvm/e500.c | 8 +++- arch/powerpc/kvm/e500mc.c | 6 ++- arch/powerpc/kvm/emulate.c | 11 ++--- arch/powerpc/kvm/powerpc.c | 76 ++++++++++++++++++++++--------- include/uapi/linux/kvm.h | 4 ++ 14 files changed, 187 insertions(+), 91 deletions(-) (limited to 'include/uapi') diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 61ce4dca45d3..237d1d25b448 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -270,6 +270,7 @@ struct kvm_arch { #ifdef CONFIG_KVM_XICS struct kvmppc_xics *xics; #endif + struct kvmppc_ops *kvm_ops; }; /* diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 20f461637090..3069cf4dcc88 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -182,6 +182,7 @@ union kvmppc_one_reg { }; struct kvmppc_ops { + struct module *owner; bool is_hv_enabled; int (*get_sregs)(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); int (*set_sregs)(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); @@ -217,7 +218,6 @@ struct kvmppc_ops { unsigned long npages); int (*init_vm)(struct kvm *kvm); void (*destroy_vm)(struct kvm *kvm); - int (*check_processor_compat)(void); int (*get_smmu_info)(struct kvm *kvm, struct kvm_ppc_smmu_info *info); int (*emulate_op)(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int inst, int *advance); @@ -229,7 +229,8 @@ struct kvmppc_ops { }; -extern struct kvmppc_ops *kvmppc_ops; +extern struct kvmppc_ops *kvmppc_hv_ops; +extern struct kvmppc_ops *kvmppc_pr_ops; /* * Cuts out inst bits with ordering according to spec. @@ -326,7 +327,7 @@ static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi) static inline void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu) { - kvmppc_ops->fast_vcpu_kick(vcpu); + vcpu->kvm->arch.kvm_ops->fast_vcpu_kick(vcpu); } #else diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c index a765bcd74fbb..93221e87b911 100644 --- a/arch/powerpc/kvm/44x.c +++ b/arch/powerpc/kvm/44x.c @@ -213,16 +213,19 @@ static int __init kvmppc_44x_init(void) if (r) goto err_out; - r = kvm_init(&kvm_ops_44x, sizeof(struct kvmppc_vcpu_44x), - 0, THIS_MODULE); + r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_44x), 0, THIS_MODULE); if (r) goto err_out; + kvm_ops_44x.owner = THIS_MODULE; + kvmppc_pr_ops = &kvm_ops_44x; + err_out: return r; } static void __exit kvmppc_44x_exit(void) { + kvmppc_pr_ops = NULL; kvmppc_booke_exit(); } diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 130fe1d75bac..ad8f6ed3f136 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -34,6 +34,7 @@ #include #include +#include "book3s.h" #include "trace.h" #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU @@ -71,7 +72,7 @@ void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu) static inline unsigned long kvmppc_interrupt_offset(struct kvm_vcpu *vcpu) { - if (!kvmppc_ops->is_hv_enabled) + if (!vcpu->kvm->arch.kvm_ops->is_hv_enabled) return to_book3s(vcpu)->hior; return 0; } @@ -79,7 +80,7 @@ static inline unsigned long kvmppc_interrupt_offset(struct kvm_vcpu *vcpu) static inline void kvmppc_update_int_pending(struct kvm_vcpu *vcpu, unsigned long pending_now, unsigned long old_pending) { - if (kvmppc_ops->is_hv_enabled) + if (vcpu->kvm->arch.kvm_ops->is_hv_enabled) return; if (pending_now) vcpu->arch.shared->int_pending = 1; @@ -93,7 +94,7 @@ static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu) ulong crit_r1; bool crit; - if (kvmppc_ops->is_hv_enabled) + if (vcpu->kvm->arch.kvm_ops->is_hv_enabled) return false; crit_raw = vcpu->arch.shared->critical; @@ -477,13 +478,13 @@ void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu) int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { - return kvmppc_ops->get_sregs(vcpu, sregs); + return vcpu->kvm->arch.kvm_ops->get_sregs(vcpu, sregs); } int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { - return kvmppc_ops->set_sregs(vcpu, sregs); + return vcpu->kvm->arch.kvm_ops->set_sregs(vcpu, sregs); } int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) @@ -562,7 +563,7 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) if (size > sizeof(val)) return -EINVAL; - r = kvmppc_ops->get_one_reg(vcpu, reg->id, &val); + r = vcpu->kvm->arch.kvm_ops->get_one_reg(vcpu, reg->id, &val); if (r == -EINVAL) { r = 0; switch (reg->id) { @@ -641,7 +642,7 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) if (copy_from_user(&val, (char __user *)(unsigned long)reg->addr, size)) return -EFAULT; - r = kvmppc_ops->set_one_reg(vcpu, reg->id, &val); + r = vcpu->kvm->arch.kvm_ops->set_one_reg(vcpu, reg->id, &val); if (r == -EINVAL) { r = 0; switch (reg->id) { @@ -702,23 +703,23 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { - kvmppc_ops->vcpu_load(vcpu, cpu); + vcpu->kvm->arch.kvm_ops->vcpu_load(vcpu, cpu); } void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) { - kvmppc_ops->vcpu_put(vcpu); + vcpu->kvm->arch.kvm_ops->vcpu_put(vcpu); } void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr) { - kvmppc_ops->set_msr(vcpu, msr); + vcpu->kvm->arch.kvm_ops->set_msr(vcpu, msr); } EXPORT_SYMBOL_GPL(kvmppc_set_msr); int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) { - return kvmppc_ops->vcpu_run(kvm_run, vcpu); + return vcpu->kvm->arch.kvm_ops->vcpu_run(kvm_run, vcpu); } int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, @@ -743,84 +744,84 @@ void kvmppc_decrementer_func(unsigned long data) struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) { - return kvmppc_ops->vcpu_create(kvm, id); + return kvm->arch.kvm_ops->vcpu_create(kvm, id); } void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) { - kvmppc_ops->vcpu_free(vcpu); + vcpu->kvm->arch.kvm_ops->vcpu_free(vcpu); } int kvmppc_core_check_requests(struct kvm_vcpu *vcpu) { - return kvmppc_ops->check_requests(vcpu); + return vcpu->kvm->arch.kvm_ops->check_requests(vcpu); } int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) { - return kvmppc_ops->get_dirty_log(kvm, log); + return kvm->arch.kvm_ops->get_dirty_log(kvm, log); } void kvmppc_core_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont) { - kvmppc_ops->free_memslot(free, dont); + kvm->arch.kvm_ops->free_memslot(free, dont); } int kvmppc_core_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, unsigned long npages) { - return kvmppc_ops->create_memslot(slot, npages); + return kvm->arch.kvm_ops->create_memslot(slot, npages); } void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot) { - kvmppc_ops->flush_memslot(kvm, memslot); + kvm->arch.kvm_ops->flush_memslot(kvm, memslot); } int kvmppc_core_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, struct kvm_userspace_memory_region *mem) { - return kvmppc_ops->prepare_memory_region(kvm, memslot, mem); + return kvm->arch.kvm_ops->prepare_memory_region(kvm, memslot, mem); } void kvmppc_core_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, const struct kvm_memory_slot *old) { - kvmppc_ops->commit_memory_region(kvm, mem, old); + kvm->arch.kvm_ops->commit_memory_region(kvm, mem, old); } int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) { - return kvmppc_ops->unmap_hva(kvm, hva); + return kvm->arch.kvm_ops->unmap_hva(kvm, hva); } EXPORT_SYMBOL_GPL(kvm_unmap_hva); int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) { - return kvmppc_ops->unmap_hva_range(kvm, start, end); + return kvm->arch.kvm_ops->unmap_hva_range(kvm, start, end); } int kvm_age_hva(struct kvm *kvm, unsigned long hva) { - return kvmppc_ops->age_hva(kvm, hva); + return kvm->arch.kvm_ops->age_hva(kvm, hva); } int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) { - return kvmppc_ops->test_age_hva(kvm, hva); + return kvm->arch.kvm_ops->test_age_hva(kvm, hva); } void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) { - kvmppc_ops->set_spte_hva(kvm, hva, pte); + kvm->arch.kvm_ops->set_spte_hva(kvm, hva, pte); } void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) { - kvmppc_ops->mmu_destroy(vcpu); + vcpu->kvm->arch.kvm_ops->mmu_destroy(vcpu); } int kvmppc_core_init_vm(struct kvm *kvm) @@ -831,12 +832,12 @@ int kvmppc_core_init_vm(struct kvm *kvm) INIT_LIST_HEAD(&kvm->arch.rtas_tokens); #endif - return kvmppc_ops->init_vm(kvm); + return kvm->arch.kvm_ops->init_vm(kvm); } void kvmppc_core_destroy_vm(struct kvm *kvm) { - kvmppc_ops->destroy_vm(kvm); + kvm->arch.kvm_ops->destroy_vm(kvm); #ifdef CONFIG_PPC64 kvmppc_rtas_tokens_free(kvm); @@ -846,5 +847,35 @@ void kvmppc_core_destroy_vm(struct kvm *kvm) int kvmppc_core_check_processor_compat(void) { - return kvmppc_ops->check_processor_compat(); + /* + * We always return 0 for book3s. We check + * for compatability while loading the HV + * or PR module + */ + return 0; +} + +static int kvmppc_book3s_init(void) +{ + int r; + + r = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); + if (r) + return r; +#ifdef CONFIG_KVM_BOOK3S_32 + r = kvmppc_book3s_init_pr(); +#endif + return r; + +} + +static void kvmppc_book3s_exit(void) +{ +#ifdef CONFIG_KVM_BOOK3S_32 + kvmppc_book3s_exit_pr(); +#endif + kvm_exit(); } + +module_init(kvmppc_book3s_init); +module_exit(kvmppc_book3s_exit); diff --git a/arch/powerpc/kvm/book3s.h b/arch/powerpc/kvm/book3s.h index 9e5b3a341943..4bf956cf94d6 100644 --- a/arch/powerpc/kvm/book3s.h +++ b/arch/powerpc/kvm/book3s.h @@ -28,5 +28,7 @@ extern int kvmppc_core_emulate_mtspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val); extern int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val); +extern int kvmppc_book3s_init_pr(void); +extern void kvmppc_book3s_exit_pr(void); #endif diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 9e954a81c078..8743048881b7 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -2159,7 +2159,7 @@ static long kvm_arch_vm_ioctl_hv(struct file *filp, return r; } -static struct kvmppc_ops kvmppc_hv_ops = { +static struct kvmppc_ops kvm_ops_hv = { .is_hv_enabled = true, .get_sregs = kvm_arch_vcpu_ioctl_get_sregs_hv, .set_sregs = kvm_arch_vcpu_ioctl_set_sregs_hv, @@ -2186,7 +2186,6 @@ static struct kvmppc_ops kvmppc_hv_ops = { .create_memslot = kvmppc_core_create_memslot_hv, .init_vm = kvmppc_core_init_vm_hv, .destroy_vm = kvmppc_core_destroy_vm_hv, - .check_processor_compat = kvmppc_core_check_processor_compat_hv, .get_smmu_info = kvm_vm_ioctl_get_smmu_info_hv, .emulate_op = kvmppc_core_emulate_op_hv, .emulate_mtspr = kvmppc_core_emulate_mtspr_hv, @@ -2198,20 +2197,23 @@ static struct kvmppc_ops kvmppc_hv_ops = { static int kvmppc_book3s_init_hv(void) { int r; - - r = kvm_init(&kvmppc_hv_ops, sizeof(struct kvm_vcpu), 0, THIS_MODULE); - - if (r) + /* + * FIXME!! Do we need to check on all cpus ? + */ + r = kvmppc_core_check_processor_compat_hv(); + if (r < 0) return r; - r = kvmppc_mmu_hv_init(); + kvm_ops_hv.owner = THIS_MODULE; + kvmppc_hv_ops = &kvm_ops_hv; + r = kvmppc_mmu_hv_init(); return r; } static void kvmppc_book3s_exit_hv(void) { - kvm_exit(); + kvmppc_hv_ops = NULL; } module_init(kvmppc_book3s_init_hv); diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 7f583a482161..fbd985f0cb02 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -1525,7 +1525,7 @@ static long kvm_arch_vm_ioctl_pr(struct file *filp, return -ENOTTY; } -static struct kvmppc_ops kvmppc_pr_ops = { +static struct kvmppc_ops kvm_ops_pr = { .is_hv_enabled = false, .get_sregs = kvm_arch_vcpu_ioctl_get_sregs_pr, .set_sregs = kvm_arch_vcpu_ioctl_set_sregs_pr, @@ -1552,7 +1552,6 @@ static struct kvmppc_ops kvmppc_pr_ops = { .create_memslot = kvmppc_core_create_memslot_pr, .init_vm = kvmppc_core_init_vm_pr, .destroy_vm = kvmppc_core_destroy_vm_pr, - .check_processor_compat = kvmppc_core_check_processor_compat_pr, .get_smmu_info = kvm_vm_ioctl_get_smmu_info_pr, .emulate_op = kvmppc_core_emulate_op_pr, .emulate_mtspr = kvmppc_core_emulate_mtspr_pr, @@ -1561,27 +1560,35 @@ static struct kvmppc_ops kvmppc_pr_ops = { .arch_vm_ioctl = kvm_arch_vm_ioctl_pr, }; -static int kvmppc_book3s_init_pr(void) + +int kvmppc_book3s_init_pr(void) { int r; - r = kvm_init(&kvmppc_pr_ops, sizeof(struct kvm_vcpu), 0, THIS_MODULE); - - if (r) + r = kvmppc_core_check_processor_compat_pr(); + if (r < 0) return r; - r = kvmppc_mmu_hpte_sysinit(); + kvm_ops_pr.owner = THIS_MODULE; + kvmppc_pr_ops = &kvm_ops_pr; + r = kvmppc_mmu_hpte_sysinit(); return r; } -static void kvmppc_book3s_exit_pr(void) +void kvmppc_book3s_exit_pr(void) { + kvmppc_pr_ops = NULL; kvmppc_mmu_hpte_sysexit(); - kvm_exit(); } +/* + * We only support separate modules for book3s 64 + */ +#ifdef CONFIG_PPC_BOOK3S_64 + module_init(kvmppc_book3s_init_pr); module_exit(kvmppc_book3s_exit_pr); MODULE_LICENSE("GPL"); +#endif diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c index c3c832b27ee5..f7a5108a3483 100644 --- a/arch/powerpc/kvm/book3s_xics.c +++ b/arch/powerpc/kvm/book3s_xics.c @@ -818,7 +818,7 @@ int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 req) } /* Check for real mode returning too hard */ - if (xics->real_mode && kvmppc_ops->is_hv_enabled) + if (xics->real_mode && vcpu->kvm->arch.kvm_ops->is_hv_enabled) return kvmppc_xics_rm_complete(vcpu, req); switch (req) { diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index cb2d986a3382..15d0149511eb 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -1472,7 +1472,7 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, get_sregs_base(vcpu, sregs); get_sregs_arch206(vcpu, sregs); - return kvmppc_ops->get_sregs(vcpu, sregs); + return vcpu->kvm->arch.kvm_ops->get_sregs(vcpu, sregs); } int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, @@ -1491,7 +1491,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, if (ret < 0) return ret; - return kvmppc_ops->set_sregs(vcpu, sregs); + return vcpu->kvm->arch.kvm_ops->set_sregs(vcpu, sregs); } int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) @@ -1548,7 +1548,7 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) val = get_reg_val(reg->id, vcpu->arch.vrsave); break; default: - r = kvmppc_ops->get_one_reg(vcpu, reg->id, &val); + r = vcpu->kvm->arch.kvm_ops->get_one_reg(vcpu, reg->id, &val); break; } @@ -1631,7 +1631,7 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) vcpu->arch.vrsave = set_reg_val(reg->id, val); break; default: - r = kvmppc_ops->set_one_reg(vcpu, reg->id, &val); + r = vcpu->kvm->arch.kvm_ops->set_one_reg(vcpu, reg->id, &val); break; } @@ -1911,37 +1911,37 @@ void kvmppc_booke_vcpu_put(struct kvm_vcpu *vcpu) void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) { - kvmppc_ops->mmu_destroy(vcpu); + vcpu->kvm->arch.kvm_ops->mmu_destroy(vcpu); } int kvmppc_core_init_vm(struct kvm *kvm) { - return kvmppc_ops->init_vm(kvm); + return kvm->arch.kvm_ops->init_vm(kvm); } struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) { - return kvmppc_ops->vcpu_create(kvm, id); + return kvm->arch.kvm_ops->vcpu_create(kvm, id); } void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) { - kvmppc_ops->vcpu_free(vcpu); + vcpu->kvm->arch.kvm_ops->vcpu_free(vcpu); } void kvmppc_core_destroy_vm(struct kvm *kvm) { - kvmppc_ops->destroy_vm(kvm); + kvm->arch.kvm_ops->destroy_vm(kvm); } void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { - kvmppc_ops->vcpu_load(vcpu, cpu); + vcpu->kvm->arch.kvm_ops->vcpu_load(vcpu, cpu); } void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) { - kvmppc_ops->vcpu_put(vcpu); + vcpu->kvm->arch.kvm_ops->vcpu_put(vcpu); } int __init kvmppc_booke_init(void) diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c index d225d5ebddcc..497b142f651c 100644 --- a/arch/powerpc/kvm/e500.c +++ b/arch/powerpc/kvm/e500.c @@ -555,13 +555,19 @@ static int __init kvmppc_e500_init(void) flush_icache_range(kvmppc_booke_handlers, kvmppc_booke_handlers + ivor[max_ivor] + handler_len); - r = kvm_init(&kvm_ops_e500, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE); + r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE); + if (r) + goto err_out; + kvm_ops_e500.owner = THIS_MODULE; + kvmppc_pr_ops = &kvm_ops_e500; + err_out: return r; } static void __exit kvmppc_e500_exit(void) { + kvmppc_pr_ops = NULL; kvmppc_booke_exit(); } diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c index db6a383401c7..4132cd2fc171 100644 --- a/arch/powerpc/kvm/e500mc.c +++ b/arch/powerpc/kvm/e500mc.c @@ -373,15 +373,19 @@ static int __init kvmppc_e500mc_init(void) kvmppc_init_lpid(64); kvmppc_claim_lpid(0); /* host */ - r = kvm_init(&kvm_ops_e500mc, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE); + r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE); if (r) goto err_out; + kvm_ops_e500mc.owner = THIS_MODULE; + kvmppc_pr_ops = &kvm_ops_e500mc; + err_out: return r; } static void __exit kvmppc_e500mc_exit(void) { + kvmppc_pr_ops = NULL; kvmppc_booke_exit(); } diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index de9a340d22ed..2f9a0873b44f 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c @@ -130,8 +130,8 @@ static int kvmppc_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) case SPRN_PIR: break; default: - emulated = kvmppc_ops->emulate_mtspr(vcpu, sprn, - spr_val); + emulated = vcpu->kvm->arch.kvm_ops->emulate_mtspr(vcpu, sprn, + spr_val); if (emulated == EMULATE_FAIL) printk(KERN_INFO "mtspr: unknown spr " "0x%x\n", sprn); @@ -191,8 +191,8 @@ static int kvmppc_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) spr_val = kvmppc_get_dec(vcpu, get_tb()); break; default: - emulated = kvmppc_ops->emulate_mfspr(vcpu, sprn, - &spr_val); + emulated = vcpu->kvm->arch.kvm_ops->emulate_mfspr(vcpu, sprn, + &spr_val); if (unlikely(emulated == EMULATE_FAIL)) { printk(KERN_INFO "mfspr: unknown spr " "0x%x\n", sprn); @@ -464,7 +464,8 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) } if (emulated == EMULATE_FAIL) { - emulated = kvmppc_ops->emulate_op(run, vcpu, inst, &advance); + emulated = vcpu->kvm->arch.kvm_ops->emulate_op(run, vcpu, inst, + &advance); if (emulated == EMULATE_AGAIN) { advance = 0; } else if (emulated == EMULATE_FAIL) { diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index b103d747934a..0320c1721caa 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -39,7 +40,11 @@ #define CREATE_TRACE_POINTS #include "trace.h" -struct kvmppc_ops *kvmppc_ops; +struct kvmppc_ops *kvmppc_hv_ops; +EXPORT_SYMBOL_GPL(kvmppc_hv_ops); +struct kvmppc_ops *kvmppc_pr_ops; +EXPORT_SYMBOL_GPL(kvmppc_pr_ops); + int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) { @@ -195,7 +200,7 @@ int kvmppc_sanity_check(struct kvm_vcpu *vcpu) goto out; /* HV KVM can only do PAPR mode for now */ - if (!vcpu->arch.papr_enabled && kvmppc_ops->is_hv_enabled) + if (!vcpu->arch.papr_enabled && vcpu->kvm->arch.kvm_ops->is_hv_enabled) goto out; #ifdef CONFIG_KVM_BOOKE_HV @@ -271,10 +276,35 @@ void kvm_arch_check_processor_compat(void *rtn) int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) { - if (type) - return -EINVAL; - + struct kvmppc_ops *kvm_ops = NULL; + /* + * if we have both HV and PR enabled, default is HV + */ + if (type == 0) { + if (kvmppc_hv_ops) + kvm_ops = kvmppc_hv_ops; + else + kvm_ops = kvmppc_pr_ops; + if (!kvm_ops) + goto err_out; + } else if (type == KVM_VM_PPC_HV) { + if (!kvmppc_hv_ops) + goto err_out; + kvm_ops = kvmppc_hv_ops; + } else if (type == KVM_VM_PPC_PR) { + if (!kvmppc_pr_ops) + goto err_out; + kvm_ops = kvmppc_pr_ops; + } else + goto err_out; + + if (kvm_ops->owner && !try_module_get(kvm_ops->owner)) + return -ENOENT; + + kvm->arch.kvm_ops = kvm_ops; return kvmppc_core_init_vm(kvm); +err_out: + return -EINVAL; } void kvm_arch_destroy_vm(struct kvm *kvm) @@ -294,6 +324,9 @@ void kvm_arch_destroy_vm(struct kvm *kvm) kvmppc_core_destroy_vm(kvm); mutex_unlock(&kvm->lock); + + /* drop the module reference */ + module_put(kvm->arch.kvm_ops->owner); } void kvm_arch_sync_events(struct kvm *kvm) @@ -303,6 +336,10 @@ void kvm_arch_sync_events(struct kvm *kvm) int kvm_dev_ioctl_check_extension(long ext) { int r; + /* FIXME!! + * Should some of this be vm ioctl ? is it possible now ? + */ + int hv_enabled = kvmppc_hv_ops ? 1 : 0; switch (ext) { #ifdef CONFIG_BOOKE @@ -329,7 +366,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_SW_TLB: #endif /* We support this only for PR */ - r = !kvmppc_ops->is_hv_enabled; + r = !hv_enabled; break; #ifdef CONFIG_KVM_MMIO case KVM_CAP_COALESCED_MMIO: @@ -354,13 +391,13 @@ int kvm_dev_ioctl_check_extension(long ext) #endif /* CONFIG_PPC_BOOK3S_64 */ #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE case KVM_CAP_PPC_SMT: - if (kvmppc_ops->is_hv_enabled) + if (hv_enabled) r = threads_per_core; else r = 0; break; case KVM_CAP_PPC_RMA: - r = kvmppc_ops->is_hv_enabled; + r = hv_enabled; /* PPC970 requires an RMA */ if (r && cpu_has_feature(CPU_FTR_ARCH_201)) r = 2; @@ -368,7 +405,7 @@ int kvm_dev_ioctl_check_extension(long ext) #endif case KVM_CAP_SYNC_MMU: #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE - if (kvmppc_ops->is_hv_enabled) + if (hv_enabled) r = cpu_has_feature(CPU_FTR_ARCH_206) ? 1 : 0; else r = 0; @@ -380,7 +417,7 @@ int kvm_dev_ioctl_check_extension(long ext) break; #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE case KVM_CAP_PPC_HTAB_FD: - r = kvmppc_ops->is_hv_enabled; + r = hv_enabled; break; #endif case KVM_CAP_NR_VCPUS: @@ -390,7 +427,7 @@ int kvm_dev_ioctl_check_extension(long ext) * will have secondary threads "offline"), and for other KVM * implementations just count online CPUs. */ - if (kvmppc_ops->is_hv_enabled) + if (hv_enabled) r = num_present_cpus(); else r = num_online_cpus(); @@ -1039,9 +1076,10 @@ long kvm_arch_vm_ioctl(struct file *filp, } case KVM_PPC_GET_SMMU_INFO: { struct kvm_ppc_smmu_info info; + struct kvm *kvm = filp->private_data; memset(&info, 0, sizeof(info)); - r = kvmppc_ops->get_smmu_info(kvm, &info); + r = kvm->arch.kvm_ops->get_smmu_info(kvm, &info); if (r >= 0 && copy_to_user(argp, &info, sizeof(info))) r = -EFAULT; break; @@ -1052,9 +1090,10 @@ long kvm_arch_vm_ioctl(struct file *filp, r = kvm_vm_ioctl_rtas_define_token(kvm, argp); break; } - default: - r = kvmppc_ops->arch_vm_ioctl(filp, ioctl, arg); - + default: { + struct kvm *kvm = filp->private_data; + r = kvm->arch.kvm_ops->arch_vm_ioctl(filp, ioctl, arg); + } #else /* CONFIG_PPC_BOOK3S_64 */ default: r = -ENOTTY; @@ -1104,15 +1143,10 @@ EXPORT_SYMBOL_GPL(kvmppc_init_lpid); int kvm_arch_init(void *opaque) { - if (kvmppc_ops) { - printk(KERN_ERR "kvm: already loaded the other module\n"); - return -EEXIST; - } - kvmppc_ops = (struct kvmppc_ops *)opaque; return 0; } void kvm_arch_exit(void) { - kvmppc_ops = NULL; + } diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index e32e776f20c0..5b5341f78368 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -518,6 +518,10 @@ struct kvm_ppc_smmu_info { /* machine type bits, to be used as argument to KVM_CREATE_VM */ #define KVM_VM_S390_UCONTROL 1 +/* on ppc, 0 indicate default, 1 should force HV and 2 PR */ +#define KVM_VM_PPC_HV 1 +#define KVM_VM_PPC_PR 2 + #define KVM_S390_SIE_PAGE_OFFSET 1 /* -- cgit v1.2.3 From 9c15bb1d0a8411f9bb3395d21d5309bde7da0c1c Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sun, 22 Sep 2013 16:44:50 +0200 Subject: kvm: Add KVM_GET_EMULATED_CPUID Add a kvm ioctl which states which system functionality kvm emulates. The format used is that of CPUID and we return the corresponding CPUID bits set for which we do emulate functionality. Make sure ->padding is being passed on clean from userspace so that we can use it for something in the future, after the ioctl gets cast in stone. s/kvm_dev_ioctl_get_supported_cpuid/kvm_dev_ioctl_get_cpuid/ while at it. Signed-off-by: Borislav Petkov Signed-off-by: Paolo Bonzini --- Documentation/virtual/kvm/api.txt | 77 +++++++++++++++++++++++++++++++++++++-- arch/x86/include/uapi/asm/kvm.h | 6 +-- arch/x86/kvm/cpuid.c | 57 ++++++++++++++++++++++++++--- arch/x86/kvm/cpuid.h | 5 ++- arch/x86/kvm/x86.c | 9 +++-- include/uapi/linux/kvm.h | 2 + 6 files changed, 139 insertions(+), 17 deletions(-) (limited to 'include/uapi') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index a89a5ee0b940..488964414f04 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1122,9 +1122,9 @@ struct kvm_cpuid2 { struct kvm_cpuid_entry2 entries[0]; }; -#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX 1 -#define KVM_CPUID_FLAG_STATEFUL_FUNC 2 -#define KVM_CPUID_FLAG_STATE_READ_NEXT 4 +#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX BIT(0) +#define KVM_CPUID_FLAG_STATEFUL_FUNC BIT(1) +#define KVM_CPUID_FLAG_STATE_READ_NEXT BIT(2) struct kvm_cpuid_entry2 { __u32 function; @@ -2684,6 +2684,77 @@ and usually define the validity of a groups of registers. (e.g. one bit }; +4.81 KVM_GET_EMULATED_CPUID + +Capability: KVM_CAP_EXT_EMUL_CPUID +Architectures: x86 +Type: system ioctl +Parameters: struct kvm_cpuid2 (in/out) +Returns: 0 on success, -1 on error + +struct kvm_cpuid2 { + __u32 nent; + __u32 flags; + struct kvm_cpuid_entry2 entries[0]; +}; + +The member 'flags' is used for passing flags from userspace. + +#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX BIT(0) +#define KVM_CPUID_FLAG_STATEFUL_FUNC BIT(1) +#define KVM_CPUID_FLAG_STATE_READ_NEXT BIT(2) + +struct kvm_cpuid_entry2 { + __u32 function; + __u32 index; + __u32 flags; + __u32 eax; + __u32 ebx; + __u32 ecx; + __u32 edx; + __u32 padding[3]; +}; + +This ioctl returns x86 cpuid features which are emulated by +kvm.Userspace can use the information returned by this ioctl to query +which features are emulated by kvm instead of being present natively. + +Userspace invokes KVM_GET_EMULATED_CPUID by passing a kvm_cpuid2 +structure with the 'nent' field indicating the number of entries in +the variable-size array 'entries'. If the number of entries is too low +to describe the cpu capabilities, an error (E2BIG) is returned. If the +number is too high, the 'nent' field is adjusted and an error (ENOMEM) +is returned. If the number is just right, the 'nent' field is adjusted +to the number of valid entries in the 'entries' array, which is then +filled. + +The entries returned are the set CPUID bits of the respective features +which kvm emulates, as returned by the CPUID instruction, with unknown +or unsupported feature bits cleared. + +Features like x2apic, for example, may not be present in the host cpu +but are exposed by kvm in KVM_GET_SUPPORTED_CPUID because they can be +emulated efficiently and thus not included here. + +The fields in each entry are defined as follows: + + function: the eax value used to obtain the entry + index: the ecx value used to obtain the entry (for entries that are + affected by ecx) + flags: an OR of zero or more of the following: + KVM_CPUID_FLAG_SIGNIFCANT_INDEX: + if the index field is valid + KVM_CPUID_FLAG_STATEFUL_FUNC: + if cpuid for this function returns different values for successive + invocations; there will be several entries with the same function, + all with this flag set + KVM_CPUID_FLAG_STATE_READ_NEXT: + for KVM_CPUID_FLAG_STATEFUL_FUNC entries, set if this entry is + the first entry to be read by a cpu + eax, ebx, ecx, edx: the values returned by the cpuid instruction for + this function/index combination + + 6. Capabilities that can be enabled ----------------------------------- diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index 5d9a3033b3d7..d3a87780c70b 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -211,9 +211,9 @@ struct kvm_cpuid_entry2 { __u32 padding[3]; }; -#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX 1 -#define KVM_CPUID_FLAG_STATEFUL_FUNC 2 -#define KVM_CPUID_FLAG_STATE_READ_NEXT 4 +#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX BIT(0) +#define KVM_CPUID_FLAG_STATEFUL_FUNC BIT(1) +#define KVM_CPUID_FLAG_STATE_READ_NEXT BIT(2) /* for KVM_SET_CPUID2 */ struct kvm_cpuid2 { diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 0a1e3b8b964d..78a4439bfdc5 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -219,8 +219,14 @@ static bool supported_xcr0_bit(unsigned bit) #define F(x) bit(X86_FEATURE_##x) -static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, - u32 index, int *nent, int maxnent) +static int __do_cpuid_ent_emulated(struct kvm_cpuid_entry2 *entry, + u32 func, u32 index, int *nent, int maxnent) +{ + return 0; +} + +static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, + u32 index, int *nent, int maxnent) { int r; unsigned f_nx = is_efer_nx() ? F(NX) : 0; @@ -515,6 +521,15 @@ out: return r; } +static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 func, + u32 idx, int *nent, int maxnent, unsigned int type) +{ + if (type == KVM_GET_EMULATED_CPUID) + return __do_cpuid_ent_emulated(entry, func, idx, nent, maxnent); + + return __do_cpuid_ent(entry, func, idx, nent, maxnent); +} + #undef F struct kvm_cpuid_param { @@ -529,8 +544,34 @@ static bool is_centaur_cpu(const struct kvm_cpuid_param *param) return boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR; } -int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, - struct kvm_cpuid_entry2 __user *entries) +static bool sanity_check_entries(struct kvm_cpuid_entry2 __user *entries, + __u32 num_entries, unsigned int ioctl_type) +{ + int i; + + if (ioctl_type != KVM_GET_EMULATED_CPUID) + return false; + + /* + * We want to make sure that ->padding is being passed clean from + * userspace in case we want to use it for something in the future. + * + * Sadly, this wasn't enforced for KVM_GET_SUPPORTED_CPUID and so we + * have to give ourselves satisfied only with the emulated side. /me + * sheds a tear. + */ + for (i = 0; i < num_entries; i++) { + if (entries[i].padding[0] || + entries[i].padding[1] || + entries[i].padding[2]) + return true; + } + return false; +} + +int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid, + struct kvm_cpuid_entry2 __user *entries, + unsigned int type) { struct kvm_cpuid_entry2 *cpuid_entries; int limit, nent = 0, r = -E2BIG, i; @@ -547,6 +588,10 @@ int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, goto out; if (cpuid->nent > KVM_MAX_CPUID_ENTRIES) cpuid->nent = KVM_MAX_CPUID_ENTRIES; + + if (sanity_check_entries(entries, cpuid->nent, type)) + return -EINVAL; + r = -ENOMEM; cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent); if (!cpuid_entries) @@ -560,7 +605,7 @@ int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, continue; r = do_cpuid_ent(&cpuid_entries[nent], ent->func, ent->idx, - &nent, cpuid->nent); + &nent, cpuid->nent, type); if (r) goto out_free; @@ -571,7 +616,7 @@ int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, limit = cpuid_entries[nent - 1].eax; for (func = ent->func + 1; func <= limit && nent < cpuid->nent && r == 0; ++func) r = do_cpuid_ent(&cpuid_entries[nent], func, ent->idx, - &nent, cpuid->nent); + &nent, cpuid->nent, type); if (r) goto out_free; diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index b7fd07984888..f1e4895174b2 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -6,8 +6,9 @@ void kvm_update_cpuid(struct kvm_vcpu *vcpu); struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, u32 function, u32 index); -int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, - struct kvm_cpuid_entry2 __user *entries); +int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid, + struct kvm_cpuid_entry2 __user *entries, + unsigned int type); int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid *cpuid, struct kvm_cpuid_entry __user *entries); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index edf2a07df3a3..3d1e3ee8b39e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2564,6 +2564,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_MMU_SHADOW_CACHE_CONTROL: case KVM_CAP_SET_TSS_ADDR: case KVM_CAP_EXT_CPUID: + case KVM_CAP_EXT_EMUL_CPUID: case KVM_CAP_CLOCKSOURCE: case KVM_CAP_PIT: case KVM_CAP_NOP_IO_DELAY: @@ -2673,15 +2674,17 @@ long kvm_arch_dev_ioctl(struct file *filp, r = 0; break; } - case KVM_GET_SUPPORTED_CPUID: { + case KVM_GET_SUPPORTED_CPUID: + case KVM_GET_EMULATED_CPUID: { struct kvm_cpuid2 __user *cpuid_arg = argp; struct kvm_cpuid2 cpuid; r = -EFAULT; if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid)) goto out; - r = kvm_dev_ioctl_get_supported_cpuid(&cpuid, - cpuid_arg->entries); + + r = kvm_dev_ioctl_get_cpuid(&cpuid, cpuid_arg->entries, + ioctl); if (r) goto out; diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index e32e776f20c0..32c60b98ddf8 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -541,6 +541,7 @@ struct kvm_ppc_smmu_info { #define KVM_TRACE_ENABLE __KVM_DEPRECATED_MAIN_W_0x06 #define KVM_TRACE_PAUSE __KVM_DEPRECATED_MAIN_0x07 #define KVM_TRACE_DISABLE __KVM_DEPRECATED_MAIN_0x08 +#define KVM_GET_EMULATED_CPUID _IOWR(KVMIO, 0x09, struct kvm_cpuid2) /* * Extension capability list. @@ -668,6 +669,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_IRQ_XICS 92 #define KVM_CAP_ARM_EL1_32BIT 93 #define KVM_CAP_SPAPR_MULTITCE 94 +#define KVM_CAP_EXT_EMUL_CPUID 95 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From ec53500fae421e07c5d035918ca454a429732ef4 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 30 Oct 2013 11:02:17 -0600 Subject: kvm: Add VFIO device So far we've succeeded at making KVM and VFIO mostly unaware of each other, but areas are cropping up where a connection beyond eventfds and irqfds needs to be made. This patch introduces a KVM-VFIO device that is meant to be a gateway for such interaction. The user creates the device and can add and remove VFIO groups to it via file descriptors. When a group is added, KVM verifies the group is valid and gets a reference to it via the VFIO external user interface. Signed-off-by: Alex Williamson Signed-off-by: Paolo Bonzini --- Documentation/virtual/kvm/devices/vfio.txt | 22 +++ arch/x86/kvm/Kconfig | 1 + arch/x86/kvm/Makefile | 2 +- include/linux/kvm_host.h | 1 + include/uapi/linux/kvm.h | 4 + virt/kvm/Kconfig | 3 + virt/kvm/kvm_main.c | 5 + virt/kvm/vfio.c | 220 +++++++++++++++++++++++++++++ 8 files changed, 257 insertions(+), 1 deletion(-) create mode 100644 Documentation/virtual/kvm/devices/vfio.txt create mode 100644 virt/kvm/vfio.c (limited to 'include/uapi') diff --git a/Documentation/virtual/kvm/devices/vfio.txt b/Documentation/virtual/kvm/devices/vfio.txt new file mode 100644 index 000000000000..ef51740c67ca --- /dev/null +++ b/Documentation/virtual/kvm/devices/vfio.txt @@ -0,0 +1,22 @@ +VFIO virtual device +=================== + +Device types supported: + KVM_DEV_TYPE_VFIO + +Only one VFIO instance may be created per VM. The created device +tracks VFIO groups in use by the VM and features of those groups +important to the correctness and acceleration of the VM. As groups +are enabled and disabled for use by the VM, KVM should be updated +about their presence. When registered with KVM, a reference to the +VFIO-group is held by KVM. + +Groups: + KVM_DEV_VFIO_GROUP + +KVM_DEV_VFIO_GROUP attributes: + KVM_DEV_VFIO_GROUP_ADD: Add a VFIO group to VFIO-KVM device tracking + KVM_DEV_VFIO_GROUP_DEL: Remove a VFIO group from VFIO-KVM device tracking + +For each, kvm_device_attr.addr points to an int32_t file descriptor +for the VFIO group. diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index a47a3e54b964..b89c5db2b832 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -38,6 +38,7 @@ config KVM select PERF_EVENTS select HAVE_KVM_MSI select HAVE_KVM_CPU_RELAX_INTERCEPT + select KVM_VFIO ---help--- Support hosting fully virtualized guest machines using hardware virtualization extensions. You will need a fairly recent diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index bf4fb04d0112..25d22b2d6509 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -9,7 +9,7 @@ KVM := ../../../virt/kvm kvm-y += $(KVM)/kvm_main.o $(KVM)/ioapic.o \ $(KVM)/coalesced_mmio.o $(KVM)/irq_comm.o \ - $(KVM)/eventfd.o $(KVM)/irqchip.o + $(KVM)/eventfd.o $(KVM)/irqchip.o $(KVM)/vfio.o kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT) += $(KVM)/assigned-dev.o $(KVM)/iommu.o kvm-$(CONFIG_KVM_ASYNC_PF) += $(KVM)/async_pf.o diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index c9d4236ab442..7beddbd38ac7 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1058,6 +1058,7 @@ struct kvm_device *kvm_device_from_filp(struct file *filp); extern struct kvm_device_ops kvm_mpic_ops; extern struct kvm_device_ops kvm_xics_ops; +extern struct kvm_device_ops kvm_vfio_ops; #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 32c60b98ddf8..509cfbfe9658 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -845,6 +845,10 @@ struct kvm_device_attr { #define KVM_DEV_TYPE_FSL_MPIC_20 1 #define KVM_DEV_TYPE_FSL_MPIC_42 2 #define KVM_DEV_TYPE_XICS 3 +#define KVM_DEV_TYPE_VFIO 4 +#define KVM_DEV_VFIO_GROUP 1 +#define KVM_DEV_VFIO_GROUP_ADD 1 +#define KVM_DEV_VFIO_GROUP_DEL 2 /* * ioctls for VM fds diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index 779262f59e25..fbe1a48bd629 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -27,3 +27,6 @@ config HAVE_KVM_MSI config HAVE_KVM_CPU_RELAX_INTERCEPT bool + +config KVM_VFIO + bool diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 9ca014da134d..82c4047aa0e3 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2270,6 +2270,11 @@ static int kvm_ioctl_create_device(struct kvm *kvm, case KVM_DEV_TYPE_XICS: ops = &kvm_xics_ops; break; +#endif +#ifdef CONFIG_KVM_VFIO + case KVM_DEV_TYPE_VFIO: + ops = &kvm_vfio_ops; + break; #endif default: return -ENODEV; diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c new file mode 100644 index 000000000000..597c258245ea --- /dev/null +++ b/virt/kvm/vfio.c @@ -0,0 +1,220 @@ +/* + * VFIO-KVM bridge pseudo device + * + * Copyright (C) 2013 Red Hat, Inc. All rights reserved. + * Author: Alex Williamson + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct kvm_vfio_group { + struct list_head node; + struct vfio_group *vfio_group; +}; + +struct kvm_vfio { + struct list_head group_list; + struct mutex lock; +}; + +static struct vfio_group *kvm_vfio_group_get_external_user(struct file *filep) +{ + struct vfio_group *vfio_group; + struct vfio_group *(*fn)(struct file *); + + fn = symbol_get(vfio_group_get_external_user); + if (!fn) + return ERR_PTR(-EINVAL); + + vfio_group = fn(filep); + + symbol_put(vfio_group_get_external_user); + + return vfio_group; +} + +static void kvm_vfio_group_put_external_user(struct vfio_group *vfio_group) +{ + void (*fn)(struct vfio_group *); + + fn = symbol_get(vfio_group_put_external_user); + if (!fn) + return; + + fn(vfio_group); + + symbol_put(vfio_group_put_external_user); +} + +static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg) +{ + struct kvm_vfio *kv = dev->private; + struct vfio_group *vfio_group; + struct kvm_vfio_group *kvg; + void __user *argp = (void __user *)arg; + struct fd f; + int32_t fd; + int ret; + + switch (attr) { + case KVM_DEV_VFIO_GROUP_ADD: + if (get_user(fd, (int32_t __user *)argp)) + return -EFAULT; + + f = fdget(fd); + if (!f.file) + return -EBADF; + + vfio_group = kvm_vfio_group_get_external_user(f.file); + fdput(f); + + if (IS_ERR(vfio_group)) + return PTR_ERR(vfio_group); + + mutex_lock(&kv->lock); + + list_for_each_entry(kvg, &kv->group_list, node) { + if (kvg->vfio_group == vfio_group) { + mutex_unlock(&kv->lock); + kvm_vfio_group_put_external_user(vfio_group); + return -EEXIST; + } + } + + kvg = kzalloc(sizeof(*kvg), GFP_KERNEL); + if (!kvg) { + mutex_unlock(&kv->lock); + kvm_vfio_group_put_external_user(vfio_group); + return -ENOMEM; + } + + list_add_tail(&kvg->node, &kv->group_list); + kvg->vfio_group = vfio_group; + + mutex_unlock(&kv->lock); + + return 0; + + case KVM_DEV_VFIO_GROUP_DEL: + if (get_user(fd, (int32_t __user *)argp)) + return -EFAULT; + + f = fdget(fd); + if (!f.file) + return -EBADF; + + vfio_group = kvm_vfio_group_get_external_user(f.file); + fdput(f); + + if (IS_ERR(vfio_group)) + return PTR_ERR(vfio_group); + + ret = -ENOENT; + + mutex_lock(&kv->lock); + + list_for_each_entry(kvg, &kv->group_list, node) { + if (kvg->vfio_group != vfio_group) + continue; + + list_del(&kvg->node); + kvm_vfio_group_put_external_user(kvg->vfio_group); + kfree(kvg); + ret = 0; + break; + } + + mutex_unlock(&kv->lock); + + kvm_vfio_group_put_external_user(vfio_group); + + return ret; + } + + return -ENXIO; +} + +static int kvm_vfio_set_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + switch (attr->group) { + case KVM_DEV_VFIO_GROUP: + return kvm_vfio_set_group(dev, attr->attr, attr->addr); + } + + return -ENXIO; +} + +static int kvm_vfio_has_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + switch (attr->group) { + case KVM_DEV_VFIO_GROUP: + switch (attr->attr) { + case KVM_DEV_VFIO_GROUP_ADD: + case KVM_DEV_VFIO_GROUP_DEL: + return 0; + } + + break; + } + + return -ENXIO; +} + +static void kvm_vfio_destroy(struct kvm_device *dev) +{ + struct kvm_vfio *kv = dev->private; + struct kvm_vfio_group *kvg, *tmp; + + list_for_each_entry_safe(kvg, tmp, &kv->group_list, node) { + kvm_vfio_group_put_external_user(kvg->vfio_group); + list_del(&kvg->node); + kfree(kvg); + } + + kfree(kv); + kfree(dev); /* alloc by kvm_ioctl_create_device, free by .destroy */ +} + +static int kvm_vfio_create(struct kvm_device *dev, u32 type) +{ + struct kvm_device *tmp; + struct kvm_vfio *kv; + + /* Only one VFIO "device" per VM */ + list_for_each_entry(tmp, &dev->kvm->devices, vm_node) + if (tmp->ops == &kvm_vfio_ops) + return -EBUSY; + + kv = kzalloc(sizeof(*kv), GFP_KERNEL); + if (!kv) + return -ENOMEM; + + INIT_LIST_HEAD(&kv->group_list); + mutex_init(&kv->lock); + + dev->private = kv; + + return 0; +} + +struct kvm_device_ops kvm_vfio_ops = { + .name = "kvm-vfio", + .create = kvm_vfio_create, + .destroy = kvm_vfio_destroy, + .set_attr = kvm_vfio_set_attr, + .has_attr = kvm_vfio_has_attr, +}; -- cgit v1.2.3