From d98e6346350ac909f095768beb28b82368bd126f Mon Sep 17 00:00:00 2001 From: Hollis Blanchard Date: Tue, 1 Jul 2008 16:23:49 -0500 Subject: KVM: Move KVM TRACE DEFINITIONS to common header Move KVM trace definitions from x86 specific kvm headers to common kvm headers to create a cross-architecture numbering scheme for trace events. This means the kvmtrace_format userspace tool won't need to know which architecture produced the log file being processed. Signed-off-by: Jerone Young Signed-off-by: Hollis Blanchard Signed-off-by: Avi Kivity --- include/linux/kvm.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/linux/kvm.h') diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 70a30651cd12..8a3ceadb1366 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -440,4 +440,25 @@ struct kvm_trace_rec { #define KVM_GET_MP_STATE _IOR(KVMIO, 0x98, struct kvm_mp_state) #define KVM_SET_MP_STATE _IOW(KVMIO, 0x99, struct kvm_mp_state) +#define KVM_TRC_INJ_VIRQ (KVM_TRC_HANDLER + 0x02) +#define KVM_TRC_REDELIVER_EVT (KVM_TRC_HANDLER + 0x03) +#define KVM_TRC_PEND_INTR (KVM_TRC_HANDLER + 0x04) +#define KVM_TRC_IO_READ (KVM_TRC_HANDLER + 0x05) +#define KVM_TRC_IO_WRITE (KVM_TRC_HANDLER + 0x06) +#define KVM_TRC_CR_READ (KVM_TRC_HANDLER + 0x07) +#define KVM_TRC_CR_WRITE (KVM_TRC_HANDLER + 0x08) +#define KVM_TRC_DR_READ (KVM_TRC_HANDLER + 0x09) +#define KVM_TRC_DR_WRITE (KVM_TRC_HANDLER + 0x0A) +#define KVM_TRC_MSR_READ (KVM_TRC_HANDLER + 0x0B) +#define KVM_TRC_MSR_WRITE (KVM_TRC_HANDLER + 0x0C) +#define KVM_TRC_CPUID (KVM_TRC_HANDLER + 0x0D) +#define KVM_TRC_INTR (KVM_TRC_HANDLER + 0x0E) +#define KVM_TRC_NMI (KVM_TRC_HANDLER + 0x0F) +#define KVM_TRC_VMMCALL (KVM_TRC_HANDLER + 0x10) +#define KVM_TRC_HLT (KVM_TRC_HANDLER + 0x11) +#define KVM_TRC_CLTS (KVM_TRC_HANDLER + 0x12) +#define KVM_TRC_LMSW (KVM_TRC_HANDLER + 0x13) +#define KVM_TRC_APIC_ACCESS (KVM_TRC_HANDLER + 0x14) +#define KVM_TRC_TDP_FAULT (KVM_TRC_HANDLER + 0x15) + #endif -- cgit v1.2.3 From e32c8f2c0720fb21c6f4a5f6ccbebdadc878f707 Mon Sep 17 00:00:00 2001 From: Christian Ehrhardt Date: Mon, 14 Jul 2008 14:00:00 +0200 Subject: KVM: kvmtrace: Remove use of bit fields in kvm trace structure This patch fixes kvmtrace use on big endian systems. When using bit fields the compiler will lay data out in the wrong order expected when laid down into a file. This fixes it by using one variable instead of using bit fields. Signed-off-by: Jerone Young Signed-off-by: Christian Ehrhardt Signed-off-by: Avi Kivity --- include/linux/kvm.h | 17 ++++++++++++++--- virt/kvm/kvm_trace.c | 17 +++++++++-------- 2 files changed, 23 insertions(+), 11 deletions(-) (limited to 'include/linux/kvm.h') diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 8a3ceadb1366..8a16b083df2e 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -311,9 +311,13 @@ struct kvm_s390_interrupt { /* This structure represents a single trace buffer record. */ struct kvm_trace_rec { - __u32 event:28; - __u32 extra_u32:3; - __u32 cycle_in:1; + /* variable rec_val + * is split into: + * bits 0 - 27 -> event id + * bits 28 -30 -> number of extra data args of size u32 + * bits 31 -> binary indicator for if tsc is in record + */ + __u32 rec_val; __u32 pid; __u32 vcpu_id; union { @@ -327,6 +331,13 @@ struct kvm_trace_rec { } u; }; +#define TRACE_REC_EVENT_ID(val) \ + (0x0fffffff & (val)) +#define TRACE_REC_NUM_DATA_ARGS(val) \ + (0x70000000 & ((val) << 28)) +#define TRACE_REC_TCS(val) \ + (0x80000000 & ((val) << 31)) + #define KVMIO 0xAE /* diff --git a/virt/kvm/kvm_trace.c b/virt/kvm/kvm_trace.c index 58141f31ea8f..9acb78b3cc9f 100644 --- a/virt/kvm/kvm_trace.c +++ b/virt/kvm/kvm_trace.c @@ -54,12 +54,13 @@ static void kvm_add_trace(void *probe_private, void *call_data, struct kvm_trace *kt = kvm_trace; struct kvm_trace_rec rec; struct kvm_vcpu *vcpu; - int i, extra, size; + int i, size; + u32 extra; if (unlikely(kt->trace_state != KVM_TRACE_STATE_RUNNING)) return; - rec.event = va_arg(*args, u32); + rec.rec_val = TRACE_REC_EVENT_ID(va_arg(*args, u32)); vcpu = va_arg(*args, struct kvm_vcpu *); rec.pid = current->tgid; rec.vcpu_id = vcpu->vcpu_id; @@ -67,21 +68,21 @@ static void kvm_add_trace(void *probe_private, void *call_data, extra = va_arg(*args, u32); WARN_ON(!(extra <= KVM_TRC_EXTRA_MAX)); extra = min_t(u32, extra, KVM_TRC_EXTRA_MAX); - rec.extra_u32 = extra; - rec.cycle_in = p->cycle_in; + rec.rec_val |= TRACE_REC_TCS(p->cycle_in) + | TRACE_REC_NUM_DATA_ARGS(extra); - if (rec.cycle_in) { + if (p->cycle_in) { rec.u.cycle.cycle_u64 = get_cycles(); - for (i = 0; i < rec.extra_u32; i++) + for (i = 0; i < extra; i++) rec.u.cycle.extra_u32[i] = va_arg(*args, u32); } else { - for (i = 0; i < rec.extra_u32; i++) + for (i = 0; i < extra; i++) rec.u.nocycle.extra_u32[i] = va_arg(*args, u32); } - size = calc_rec_size(rec.cycle_in, rec.extra_u32 * sizeof(u32)); + size = calc_rec_size(p->cycle_in, extra * sizeof(u32)); relay_write(kt->rchan, &rec, size); } -- cgit v1.2.3 From 3f7f95c65ef6a89472a28da1b9436eaeee288831 Mon Sep 17 00:00:00 2001 From: Christian Ehrhardt Date: Mon, 14 Jul 2008 14:00:01 +0200 Subject: KVM: kvmtrace: replace get_cycles with ktime_get v3 The current kvmtrace code uses get_cycles() while the interpretation would be easier using using nanoseconds. ktime_get() should give at least the same accuracy as get_cycles on all architectures (even better on 32bit archs) but at a better unit (e.g. comparable between hosts with different frequencies. [avi: avoid ktime_t in public header] Signed-off-by: Christian Ehrhardt Acked-by: Christian Borntraeger Signed-off-by: Avi Kivity --- include/linux/kvm.h | 6 +++--- virt/kvm/kvm_trace.c | 19 ++++++++++--------- 2 files changed, 13 insertions(+), 12 deletions(-) (limited to 'include/linux/kvm.h') diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 8a16b083df2e..5d08f11bb27f 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -322,12 +322,12 @@ struct kvm_trace_rec { __u32 vcpu_id; union { struct { - __u64 cycle_u64; + __u64 timestamp; __u32 extra_u32[KVM_TRC_EXTRA_MAX]; - } __attribute__((packed)) cycle; + } __attribute__((packed)) timestamp; struct { __u32 extra_u32[KVM_TRC_EXTRA_MAX]; - } nocycle; + } notimestamp; } u; }; diff --git a/virt/kvm/kvm_trace.c b/virt/kvm/kvm_trace.c index 9acb78b3cc9f..41dcc845f78c 100644 --- a/virt/kvm/kvm_trace.c +++ b/virt/kvm/kvm_trace.c @@ -17,6 +17,7 @@ #include #include #include +#include #include @@ -35,16 +36,16 @@ static struct kvm_trace *kvm_trace; struct kvm_trace_probe { const char *name; const char *format; - u32 cycle_in; + u32 timestamp_in; marker_probe_func *probe_func; }; -static inline int calc_rec_size(int cycle, int extra) +static inline int calc_rec_size(int timestamp, int extra) { int rec_size = KVM_TRC_HEAD_SIZE; rec_size += extra; - return cycle ? rec_size += KVM_TRC_CYCLE_SIZE : rec_size; + return timestamp ? rec_size += KVM_TRC_CYCLE_SIZE : rec_size; } static void kvm_add_trace(void *probe_private, void *call_data, @@ -69,20 +70,20 @@ static void kvm_add_trace(void *probe_private, void *call_data, WARN_ON(!(extra <= KVM_TRC_EXTRA_MAX)); extra = min_t(u32, extra, KVM_TRC_EXTRA_MAX); - rec.rec_val |= TRACE_REC_TCS(p->cycle_in) + rec.rec_val |= TRACE_REC_TCS(p->timestamp_in) | TRACE_REC_NUM_DATA_ARGS(extra); - if (p->cycle_in) { - rec.u.cycle.cycle_u64 = get_cycles(); + if (p->timestamp_in) { + rec.u.timestamp.timestamp = ktime_to_ns(ktime_get()); for (i = 0; i < extra; i++) - rec.u.cycle.extra_u32[i] = va_arg(*args, u32); + rec.u.timestamp.extra_u32[i] = va_arg(*args, u32); } else { for (i = 0; i < extra; i++) - rec.u.nocycle.extra_u32[i] = va_arg(*args, u32); + rec.u.notimestamp.extra_u32[i] = va_arg(*args, u32); } - size = calc_rec_size(p->cycle_in, extra * sizeof(u32)); + size = calc_rec_size(p->timestamp_in, extra * sizeof(u32)); relay_write(kt->rchan, &rec, size); } -- cgit v1.2.3 From 31711f2294b38d8334efaf7dbac6da4781fd151e Mon Sep 17 00:00:00 2001 From: Jerone Young Date: Mon, 14 Jul 2008 14:00:03 +0200 Subject: KVM: ppc: adds trace points for ppc tlb activity This patch adds trace points to track powerpc TLB activities using the KVM_TRACE infrastructure. Signed-off-by: Jerone Young Signed-off-by: Christian Ehrhardt Signed-off-by: Avi Kivity --- arch/powerpc/kvm/44x_tlb.c | 15 ++++++++++++++- arch/powerpc/kvm/emulate.c | 4 ++++ include/linux/kvm.h | 3 +++ 3 files changed, 21 insertions(+), 1 deletion(-) (limited to 'include/linux/kvm.h') diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c index 5a5602da5091..a207d16b9dbb 100644 --- a/arch/powerpc/kvm/44x_tlb.c +++ b/arch/powerpc/kvm/44x_tlb.c @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -175,6 +176,10 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, u64 asid, stlbe->word1 = (hpaddr & 0xfffffc00) | ((hpaddr >> 32) & 0xf); stlbe->word2 = kvmppc_44x_tlb_shadow_attrib(flags, vcpu->arch.msr & MSR_PR); + + KVMTRACE_5D(STLB_WRITE, vcpu, victim, + stlbe->tid, stlbe->word0, stlbe->word1, stlbe->word2, + handler); } void kvmppc_mmu_invalidate(struct kvm_vcpu *vcpu, gva_t eaddr, @@ -204,6 +209,9 @@ void kvmppc_mmu_invalidate(struct kvm_vcpu *vcpu, gva_t eaddr, kvmppc_44x_shadow_release(vcpu, i); stlbe->word0 = 0; + KVMTRACE_5D(STLB_INVAL, vcpu, i, + stlbe->tid, stlbe->word0, stlbe->word1, + stlbe->word2, handler); } up_write(¤t->mm->mmap_sem); } @@ -217,8 +225,13 @@ void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode) /* XXX Replace loop with fancy data structures. */ down_write(¤t->mm->mmap_sem); for (i = 0; i <= tlb_44x_hwater; i++) { + struct tlbe *stlbe = &vcpu->arch.shadow_tlb[i]; + kvmppc_44x_shadow_release(vcpu, i); - vcpu->arch.shadow_tlb[i].word0 = 0; + stlbe->word0 = 0; + KVMTRACE_5D(STLB_INVAL, vcpu, i, + stlbe->tid, stlbe->word0, stlbe->word1, + stlbe->word2, handler); } up_write(¤t->mm->mmap_sem); } diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index 8c605d0a5488..4a3e274bac12 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c @@ -170,6 +170,10 @@ static int kvmppc_emul_tlbwe(struct kvm_vcpu *vcpu, u32 inst) kvmppc_mmu_map(vcpu, eaddr, raddr >> PAGE_SHIFT, asid, flags); } + KVMTRACE_5D(GTLB_WRITE, vcpu, index, + tlbe->tid, tlbe->word0, tlbe->word1, tlbe->word2, + handler); + return EMULATE_DONE; } diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 5d08f11bb27f..e21a5050d4d6 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -471,5 +471,8 @@ struct kvm_trace_rec { #define KVM_TRC_LMSW (KVM_TRC_HANDLER + 0x13) #define KVM_TRC_APIC_ACCESS (KVM_TRC_HANDLER + 0x14) #define KVM_TRC_TDP_FAULT (KVM_TRC_HANDLER + 0x15) +#define KVM_TRC_GTLB_WRITE (KVM_TRC_HANDLER + 0x16) +#define KVM_TRC_STLB_WRITE (KVM_TRC_HANDLER + 0x17) +#define KVM_TRC_STLB_INVAL (KVM_TRC_HANDLER + 0x18) #endif -- cgit v1.2.3 From 3b4bd7969f7b61a1ab455bff084ee4f0a2411055 Mon Sep 17 00:00:00 2001 From: Christian Ehrhardt Date: Mon, 14 Jul 2008 14:00:04 +0200 Subject: KVM: ppc: trace powerpc instruction emulation This patch adds a trace point for the instruction emulation on embedded powerpc utilizing the KVM_TRACE interface. Signed-off-by: Christian Ehrhardt Signed-off-by: Avi Kivity --- arch/powerpc/kvm/emulate.c | 2 ++ include/linux/kvm.h | 1 + 2 files changed, 3 insertions(+) (limited to 'include/linux/kvm.h') diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index 4a3e274bac12..c3ed63b22210 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c @@ -769,6 +769,8 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) break; } + KVMTRACE_3D(PPC_INSTR, vcpu, inst, vcpu->arch.pc, emulated, entryexit); + if (advance) vcpu->arch.pc += 4; /* Advance past emulated instruction. */ diff --git a/include/linux/kvm.h b/include/linux/kvm.h index e21a5050d4d6..d29b64881447 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -474,5 +474,6 @@ struct kvm_trace_rec { #define KVM_TRC_GTLB_WRITE (KVM_TRC_HANDLER + 0x16) #define KVM_TRC_STLB_WRITE (KVM_TRC_HANDLER + 0x17) #define KVM_TRC_STLB_INVAL (KVM_TRC_HANDLER + 0x18) +#define KVM_TRC_PPC_INSTR (KVM_TRC_HANDLER + 0x19) #endif -- cgit v1.2.3 From 4d5c5d0fe89c921336b95f5e7e4f529a9df92f53 Mon Sep 17 00:00:00 2001 From: Ben-Ami Yassour Date: Mon, 28 Jul 2008 19:26:26 +0300 Subject: KVM: pci device assignment Based on a patch from: Amit Shah This patch adds support for handling PCI devices that are assigned to the guest. The device to be assigned to the guest is registered in the host kernel and interrupt delivery is handled. If a device is already assigned, or the device driver for it is still loaded on the host, the device assignment is failed by conveying a -EBUSY reply to the userspace. Devices that share their interrupt line are not supported at the moment. By itself, this patch will not make devices work within the guest. The VT-d extension is required to enable the device to perform DMA. Another alternative is PVDMA. Signed-off-by: Amit Shah Signed-off-by: Ben-Ami Yassour Signed-off-by: Weidong Han Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 243 +++++++++++++++++++++++++++++++++++++++++++++ include/asm-x86/kvm_host.h | 16 +++ include/linux/kvm.h | 19 ++++ 3 files changed, 278 insertions(+) (limited to 'include/linux/kvm.h') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 94a216562f10..a97157cc42ae 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4,10 +4,14 @@ * derived from drivers/kvm/kvm_main.c * * Copyright (C) 2006 Qumranet, Inc. + * Copyright (C) 2008 Qumranet, Inc. + * Copyright IBM Corporation, 2008 * * Authors: * Avi Kivity * Yaniv Kamay + * Amit Shah + * Ben-Ami Yassour * * This work is licensed under the terms of the GNU GPL, version 2. See * the COPYING file in the top-level directory. @@ -23,8 +27,10 @@ #include "x86.h" #include +#include #include #include +#include #include #include #include @@ -98,6 +104,219 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { NULL } }; +struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head, + int assigned_dev_id) +{ + struct list_head *ptr; + struct kvm_assigned_dev_kernel *match; + + list_for_each(ptr, head) { + match = list_entry(ptr, struct kvm_assigned_dev_kernel, list); + if (match->assigned_dev_id == assigned_dev_id) + return match; + } + return NULL; +} + +static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work) +{ + struct kvm_assigned_dev_kernel *assigned_dev; + + assigned_dev = container_of(work, struct kvm_assigned_dev_kernel, + interrupt_work); + + /* This is taken to safely inject irq inside the guest. When + * the interrupt injection (or the ioapic code) uses a + * finer-grained lock, update this + */ + mutex_lock(&assigned_dev->kvm->lock); + kvm_set_irq(assigned_dev->kvm, + assigned_dev->guest_irq, 1); + mutex_unlock(&assigned_dev->kvm->lock); + kvm_put_kvm(assigned_dev->kvm); +} + +/* FIXME: Implement the OR logic needed to make shared interrupts on + * this line behave properly + */ +static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id) +{ + struct kvm_assigned_dev_kernel *assigned_dev = + (struct kvm_assigned_dev_kernel *) dev_id; + + kvm_get_kvm(assigned_dev->kvm); + schedule_work(&assigned_dev->interrupt_work); + disable_irq_nosync(irq); + return IRQ_HANDLED; +} + +/* Ack the irq line for an assigned device */ +static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) +{ + struct kvm_assigned_dev_kernel *dev; + + if (kian->gsi == -1) + return; + + dev = container_of(kian, struct kvm_assigned_dev_kernel, + ack_notifier); + kvm_set_irq(dev->kvm, dev->guest_irq, 0); + enable_irq(dev->host_irq); +} + +static int kvm_vm_ioctl_assign_irq(struct kvm *kvm, + struct kvm_assigned_irq + *assigned_irq) +{ + int r = 0; + struct kvm_assigned_dev_kernel *match; + + mutex_lock(&kvm->lock); + + match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, + assigned_irq->assigned_dev_id); + if (!match) { + mutex_unlock(&kvm->lock); + return -EINVAL; + } + + if (match->irq_requested) { + match->guest_irq = assigned_irq->guest_irq; + match->ack_notifier.gsi = assigned_irq->guest_irq; + mutex_unlock(&kvm->lock); + return 0; + } + + INIT_WORK(&match->interrupt_work, + kvm_assigned_dev_interrupt_work_handler); + + if (irqchip_in_kernel(kvm)) { + if (assigned_irq->host_irq) + match->host_irq = assigned_irq->host_irq; + else + match->host_irq = match->dev->irq; + match->guest_irq = assigned_irq->guest_irq; + match->ack_notifier.gsi = assigned_irq->guest_irq; + match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq; + kvm_register_irq_ack_notifier(kvm, &match->ack_notifier); + + /* Even though this is PCI, we don't want to use shared + * interrupts. Sharing host devices with guest-assigned devices + * on the same interrupt line is not a happy situation: there + * are going to be long delays in accepting, acking, etc. + */ + if (request_irq(match->host_irq, kvm_assigned_dev_intr, 0, + "kvm_assigned_device", (void *)match)) { + printk(KERN_INFO "%s: couldn't allocate irq for pv " + "device\n", __func__); + r = -EIO; + goto out; + } + } + + match->irq_requested = true; +out: + mutex_unlock(&kvm->lock); + return r; +} + +static int kvm_vm_ioctl_assign_device(struct kvm *kvm, + struct kvm_assigned_pci_dev *assigned_dev) +{ + int r = 0; + struct kvm_assigned_dev_kernel *match; + struct pci_dev *dev; + + mutex_lock(&kvm->lock); + + match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, + assigned_dev->assigned_dev_id); + if (match) { + /* device already assigned */ + r = -EINVAL; + goto out; + } + + match = kzalloc(sizeof(struct kvm_assigned_dev_kernel), GFP_KERNEL); + if (match == NULL) { + printk(KERN_INFO "%s: Couldn't allocate memory\n", + __func__); + r = -ENOMEM; + goto out; + } + dev = pci_get_bus_and_slot(assigned_dev->busnr, + assigned_dev->devfn); + if (!dev) { + printk(KERN_INFO "%s: host device not found\n", __func__); + r = -EINVAL; + goto out_free; + } + if (pci_enable_device(dev)) { + printk(KERN_INFO "%s: Could not enable PCI device\n", __func__); + r = -EBUSY; + goto out_put; + } + r = pci_request_regions(dev, "kvm_assigned_device"); + if (r) { + printk(KERN_INFO "%s: Could not get access to device regions\n", + __func__); + goto out_disable; + } + match->assigned_dev_id = assigned_dev->assigned_dev_id; + match->host_busnr = assigned_dev->busnr; + match->host_devfn = assigned_dev->devfn; + match->dev = dev; + + match->kvm = kvm; + + list_add(&match->list, &kvm->arch.assigned_dev_head); + +out: + mutex_unlock(&kvm->lock); + return r; +out_disable: + pci_disable_device(dev); +out_put: + pci_dev_put(dev); +out_free: + kfree(match); + mutex_unlock(&kvm->lock); + return r; +} + +static void kvm_free_assigned_devices(struct kvm *kvm) +{ + struct list_head *ptr, *ptr2; + struct kvm_assigned_dev_kernel *assigned_dev; + + list_for_each_safe(ptr, ptr2, &kvm->arch.assigned_dev_head) { + assigned_dev = list_entry(ptr, + struct kvm_assigned_dev_kernel, + list); + + if (irqchip_in_kernel(kvm) && assigned_dev->irq_requested) { + free_irq(assigned_dev->host_irq, + (void *)assigned_dev); + + kvm_unregister_irq_ack_notifier(kvm, + &assigned_dev-> + ack_notifier); + } + + if (cancel_work_sync(&assigned_dev->interrupt_work)) + /* We had pending work. That means we will have to take + * care of kvm_put_kvm. + */ + kvm_put_kvm(kvm); + + pci_release_regions(assigned_dev->dev); + pci_disable_device(assigned_dev->dev); + pci_dev_put(assigned_dev->dev); + + list_del(&assigned_dev->list); + kfree(assigned_dev); + } +} unsigned long segment_base(u16 selector) { @@ -1766,6 +1985,28 @@ long kvm_arch_vm_ioctl(struct file *filp, r = 0; break; } + case KVM_ASSIGN_PCI_DEVICE: { + struct kvm_assigned_pci_dev assigned_dev; + + r = -EFAULT; + if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) + goto out; + r = kvm_vm_ioctl_assign_device(kvm, &assigned_dev); + if (r) + goto out; + break; + } + case KVM_ASSIGN_IRQ: { + struct kvm_assigned_irq assigned_irq; + + r = -EFAULT; + if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq)) + goto out; + r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq); + if (r) + goto out; + break; + } case KVM_GET_PIT: { struct kvm_pit_state ps; r = -EFAULT; @@ -3945,6 +4186,7 @@ struct kvm *kvm_arch_create_vm(void) return ERR_PTR(-ENOMEM); INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); + INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); return kvm; } @@ -3977,6 +4219,7 @@ static void kvm_free_vcpus(struct kvm *kvm) void kvm_arch_destroy_vm(struct kvm *kvm) { + kvm_free_assigned_devices(kvm); kvm_free_pit(kvm); kfree(kvm->arch.vpic); kfree(kvm->arch.vioapic); diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h index d451928fc841..99dddfcecf60 100644 --- a/include/asm-x86/kvm_host.h +++ b/include/asm-x86/kvm_host.h @@ -327,6 +327,21 @@ struct kvm_irq_ack_notifier { void (*irq_acked)(struct kvm_irq_ack_notifier *kian); }; +struct kvm_assigned_dev_kernel { + struct kvm_irq_ack_notifier ack_notifier; + struct work_struct interrupt_work; + struct list_head list; + struct kvm_assigned_pci_dev assigned_dev; + int assigned_dev_id; + int host_busnr; + int host_devfn; + int host_irq; + int guest_irq; + int irq_requested; + struct pci_dev *dev; + struct kvm *kvm; +}; + struct kvm_arch{ int naliases; struct kvm_mem_alias aliases[KVM_ALIAS_SLOTS]; @@ -339,6 +354,7 @@ struct kvm_arch{ * Hash table of struct kvm_mmu_page. */ struct list_head active_mmu_pages; + struct list_head assigned_dev_head; struct kvm_pic *vpic; struct kvm_ioapic *vioapic; struct kvm_pit *vpit; diff --git a/include/linux/kvm.h b/include/linux/kvm.h index d29b64881447..ef4bc6f89778 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -383,6 +383,7 @@ struct kvm_trace_rec { #define KVM_CAP_MP_STATE 14 #define KVM_CAP_COALESCED_MMIO 15 #define KVM_CAP_SYNC_MMU 16 /* Changes to host mmap are reflected in guest */ +#define KVM_CAP_DEVICE_ASSIGNMENT 17 /* * ioctls for VM fds @@ -412,6 +413,10 @@ struct kvm_trace_rec { _IOW(KVMIO, 0x67, struct kvm_coalesced_mmio_zone) #define KVM_UNREGISTER_COALESCED_MMIO \ _IOW(KVMIO, 0x68, struct kvm_coalesced_mmio_zone) +#define KVM_ASSIGN_PCI_DEVICE _IOR(KVMIO, 0x69, \ + struct kvm_assigned_pci_dev) +#define KVM_ASSIGN_IRQ _IOR(KVMIO, 0x70, \ + struct kvm_assigned_irq) /* * ioctls for vcpu fds @@ -476,4 +481,18 @@ struct kvm_trace_rec { #define KVM_TRC_STLB_INVAL (KVM_TRC_HANDLER + 0x18) #define KVM_TRC_PPC_INSTR (KVM_TRC_HANDLER + 0x19) +struct kvm_assigned_pci_dev { + __u32 assigned_dev_id; + __u32 busnr; + __u32 devfn; + __u32 flags; +}; + +struct kvm_assigned_irq { + __u32 assigned_dev_id; + __u32 host_irq; + __u32 guest_irq; + __u32 flags; +}; + #endif -- cgit v1.2.3 From 62c476c7c7f25a5b245b9902a935636e6316e58c Mon Sep 17 00:00:00 2001 From: Ben-Ami Yassour Date: Sun, 14 Sep 2008 03:48:28 +0300 Subject: KVM: Device Assignment with VT-d Based on a patch by: Kay, Allen M This patch enables PCI device assignment based on VT-d support. When a device is assigned to the guest, the guest memory is pinned and the mapping is updated in the VT-d IOMMU. [Amit: Expose KVM_CAP_IOMMU so we can check if an IOMMU is present and also control enable/disable from userspace] Signed-off-by: Kay, Allen M Signed-off-by: Weidong Han Signed-off-by: Ben-Ami Yassour Signed-off-by: Amit Shah Acked-by: Mark Gross Signed-off-by: Avi Kivity --- arch/x86/kvm/Makefile | 3 + arch/x86/kvm/vtd.c | 198 +++++++++++++++++++++++++++++++++++++++++++++ arch/x86/kvm/x86.c | 14 ++++ include/asm-x86/kvm_host.h | 23 +----- include/linux/kvm.h | 3 + include/linux/kvm_host.h | 52 ++++++++++++ virt/kvm/kvm_main.c | 9 ++- 7 files changed, 281 insertions(+), 21 deletions(-) create mode 100644 arch/x86/kvm/vtd.c (limited to 'include/linux/kvm.h') diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index d0e940bb6f40..3072b17447ab 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -12,6 +12,9 @@ EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o \ i8254.o +ifeq ($(CONFIG_DMAR),y) +kvm-objs += vtd.o +endif obj-$(CONFIG_KVM) += kvm.o kvm-intel-objs = vmx.o obj-$(CONFIG_KVM_INTEL) += kvm-intel.o diff --git a/arch/x86/kvm/vtd.c b/arch/x86/kvm/vtd.c new file mode 100644 index 000000000000..667bf3fb64bf --- /dev/null +++ b/arch/x86/kvm/vtd.c @@ -0,0 +1,198 @@ +/* + * Copyright (c) 2006, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Copyright (C) 2006-2008 Intel Corporation + * Copyright IBM Corporation, 2008 + * Author: Allen M. Kay + * Author: Weidong Han + * Author: Ben-Ami Yassour + */ + +#include +#include +#include +#include +#include + +static int kvm_iommu_unmap_memslots(struct kvm *kvm); +static void kvm_iommu_put_pages(struct kvm *kvm, + gfn_t base_gfn, unsigned long npages); + +int kvm_iommu_map_pages(struct kvm *kvm, + gfn_t base_gfn, unsigned long npages) +{ + gfn_t gfn = base_gfn; + pfn_t pfn; + int i, r; + struct dmar_domain *domain = kvm->arch.intel_iommu_domain; + + /* check if iommu exists and in use */ + if (!domain) + return 0; + + r = -EINVAL; + for (i = 0; i < npages; i++) { + /* check if already mapped */ + pfn = (pfn_t)intel_iommu_iova_to_pfn(domain, + gfn_to_gpa(gfn)); + if (pfn && !is_mmio_pfn(pfn)) + continue; + + pfn = gfn_to_pfn(kvm, gfn); + if (!is_mmio_pfn(pfn)) { + r = intel_iommu_page_mapping(domain, + gfn_to_gpa(gfn), + pfn_to_hpa(pfn), + PAGE_SIZE, + DMA_PTE_READ | + DMA_PTE_WRITE); + if (r) { + printk(KERN_DEBUG "kvm_iommu_map_pages:" + "iommu failed to map pfn=%lx\n", pfn); + goto unmap_pages; + } + } else { + printk(KERN_DEBUG "kvm_iommu_map_page:" + "invalid pfn=%lx\n", pfn); + goto unmap_pages; + } + gfn++; + } + return 0; + +unmap_pages: + kvm_iommu_put_pages(kvm, base_gfn, i); + return r; +} + +static int kvm_iommu_map_memslots(struct kvm *kvm) +{ + int i, r; + + down_read(&kvm->slots_lock); + for (i = 0; i < kvm->nmemslots; i++) { + r = kvm_iommu_map_pages(kvm, kvm->memslots[i].base_gfn, + kvm->memslots[i].npages); + if (r) + break; + } + up_read(&kvm->slots_lock); + return r; +} + +int kvm_iommu_map_guest(struct kvm *kvm, + struct kvm_assigned_dev_kernel *assigned_dev) +{ + struct pci_dev *pdev = NULL; + int r; + + if (!intel_iommu_found()) { + printk(KERN_ERR "%s: intel iommu not found\n", __func__); + return -ENODEV; + } + + printk(KERN_DEBUG "VT-d direct map: host bdf = %x:%x:%x\n", + assigned_dev->host_busnr, + PCI_SLOT(assigned_dev->host_devfn), + PCI_FUNC(assigned_dev->host_devfn)); + + pdev = assigned_dev->dev; + + if (pdev == NULL) { + if (kvm->arch.intel_iommu_domain) { + intel_iommu_domain_exit(kvm->arch.intel_iommu_domain); + kvm->arch.intel_iommu_domain = NULL; + } + return -ENODEV; + } + + kvm->arch.intel_iommu_domain = intel_iommu_domain_alloc(pdev); + if (!kvm->arch.intel_iommu_domain) + return -ENODEV; + + r = kvm_iommu_map_memslots(kvm); + if (r) + goto out_unmap; + + intel_iommu_detach_dev(kvm->arch.intel_iommu_domain, + pdev->bus->number, pdev->devfn); + + r = intel_iommu_context_mapping(kvm->arch.intel_iommu_domain, + pdev); + if (r) { + printk(KERN_ERR "Domain context map for %s failed", + pci_name(pdev)); + goto out_unmap; + } + return 0; + +out_unmap: + kvm_iommu_unmap_memslots(kvm); + return r; +} + +static void kvm_iommu_put_pages(struct kvm *kvm, + gfn_t base_gfn, unsigned long npages) +{ + gfn_t gfn = base_gfn; + pfn_t pfn; + struct dmar_domain *domain = kvm->arch.intel_iommu_domain; + int i; + + for (i = 0; i < npages; i++) { + pfn = (pfn_t)intel_iommu_iova_to_pfn(domain, + gfn_to_gpa(gfn)); + kvm_release_pfn_clean(pfn); + gfn++; + } +} + +static int kvm_iommu_unmap_memslots(struct kvm *kvm) +{ + int i; + down_read(&kvm->slots_lock); + for (i = 0; i < kvm->nmemslots; i++) { + kvm_iommu_put_pages(kvm, kvm->memslots[i].base_gfn, + kvm->memslots[i].npages); + } + up_read(&kvm->slots_lock); + + return 0; +} + +int kvm_iommu_unmap_guest(struct kvm *kvm) +{ + struct kvm_assigned_dev_kernel *entry; + struct dmar_domain *domain = kvm->arch.intel_iommu_domain; + + /* check if iommu exists and in use */ + if (!domain) + return 0; + + list_for_each_entry(entry, &kvm->arch.assigned_dev_head, list) { + printk(KERN_DEBUG "VT-d unmap: host bdf = %x:%x:%x\n", + entry->host_busnr, + PCI_SLOT(entry->host_devfn), + PCI_FUNC(entry->host_devfn)); + + /* detach kvm dmar domain */ + intel_iommu_detach_dev(domain, entry->host_busnr, + entry->host_devfn); + } + kvm_iommu_unmap_memslots(kvm); + intel_iommu_domain_exit(domain); + return 0; +} diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 2134f3e0a516..c8a2793626ec 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -277,9 +278,18 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm, list_add(&match->list, &kvm->arch.assigned_dev_head); + if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) { + r = kvm_iommu_map_guest(kvm, match); + if (r) + goto out_list_del; + } + out: mutex_unlock(&kvm->lock); return r; +out_list_del: + list_del(&match->list); + pci_release_regions(dev); out_disable: pci_disable_device(dev); out_put: @@ -1147,6 +1157,9 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_PV_MMU: r = !tdp_enabled; break; + case KVM_CAP_IOMMU: + r = intel_iommu_found(); + break; default: r = 0; break; @@ -4282,6 +4295,7 @@ static void kvm_free_vcpus(struct kvm *kvm) void kvm_arch_destroy_vm(struct kvm *kvm) { + kvm_iommu_unmap_guest(kvm); kvm_free_assigned_devices(kvm); kvm_free_pit(kvm); kfree(kvm->arch.vpic); diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h index 68a3ac13afce..805629c0f15f 100644 --- a/include/asm-x86/kvm_host.h +++ b/include/asm-x86/kvm_host.h @@ -331,26 +331,6 @@ struct kvm_mem_alias { gfn_t target_gfn; }; -struct kvm_irq_ack_notifier { - struct hlist_node link; - unsigned gsi; - void (*irq_acked)(struct kvm_irq_ack_notifier *kian); -}; - -struct kvm_assigned_dev_kernel { - struct kvm_irq_ack_notifier ack_notifier; - struct work_struct interrupt_work; - struct list_head list; - int assigned_dev_id; - int host_busnr; - int host_devfn; - int host_irq; - int guest_irq; - int irq_requested; - struct pci_dev *dev; - struct kvm *kvm; -}; - struct kvm_arch{ int naliases; struct kvm_mem_alias aliases[KVM_ALIAS_SLOTS]; @@ -364,6 +344,7 @@ struct kvm_arch{ */ struct list_head active_mmu_pages; struct list_head assigned_dev_head; + struct dmar_domain *intel_iommu_domain; struct kvm_pic *vpic; struct kvm_ioapic *vioapic; struct kvm_pit *vpit; @@ -514,6 +495,8 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes, gpa_t addr, unsigned long *ret); +int is_mmio_pfn(pfn_t pfn); + extern bool tdp_enabled; enum emulation_result { diff --git a/include/linux/kvm.h b/include/linux/kvm.h index ef4bc6f89778..4269be171faf 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -384,6 +384,7 @@ struct kvm_trace_rec { #define KVM_CAP_COALESCED_MMIO 15 #define KVM_CAP_SYNC_MMU 16 /* Changes to host mmap are reflected in guest */ #define KVM_CAP_DEVICE_ASSIGNMENT 17 +#define KVM_CAP_IOMMU 18 /* * ioctls for VM fds @@ -495,4 +496,6 @@ struct kvm_assigned_irq { __u32 flags; }; +#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) + #endif diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 4b036430ea23..6252802c3cc0 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -286,6 +286,53 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v); int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu); void kvm_vcpu_kick(struct kvm_vcpu *vcpu); +struct kvm_irq_ack_notifier { + struct hlist_node link; + unsigned gsi; + void (*irq_acked)(struct kvm_irq_ack_notifier *kian); +}; + +struct kvm_assigned_dev_kernel { + struct kvm_irq_ack_notifier ack_notifier; + struct work_struct interrupt_work; + struct list_head list; + int assigned_dev_id; + int host_busnr; + int host_devfn; + int host_irq; + int guest_irq; + int irq_requested; + struct pci_dev *dev; + struct kvm *kvm; +}; + +#ifdef CONFIG_DMAR +int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn, + unsigned long npages); +int kvm_iommu_map_guest(struct kvm *kvm, + struct kvm_assigned_dev_kernel *assigned_dev); +int kvm_iommu_unmap_guest(struct kvm *kvm); +#else /* CONFIG_DMAR */ +static inline int kvm_iommu_map_pages(struct kvm *kvm, + gfn_t base_gfn, + unsigned long npages) +{ + return 0; +} + +static inline int kvm_iommu_map_guest(struct kvm *kvm, + struct kvm_assigned_dev_kernel + *assigned_dev) +{ + return -ENODEV; +} + +static inline int kvm_iommu_unmap_guest(struct kvm *kvm) +{ + return 0; +} +#endif /* CONFIG_DMAR */ + static inline void kvm_guest_enter(void) { account_system_vtime(current); @@ -308,6 +355,11 @@ static inline gpa_t gfn_to_gpa(gfn_t gfn) return (gpa_t)gfn << PAGE_SHIFT; } +static inline hpa_t pfn_to_hpa(pfn_t pfn) +{ + return (hpa_t)pfn << PAGE_SHIFT; +} + static inline void kvm_migrate_timers(struct kvm_vcpu *vcpu) { set_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 63e661be040a..f42d5c2a396d 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include @@ -76,7 +77,7 @@ static inline int valid_vcpu(int n) return likely(n >= 0 && n < KVM_MAX_VCPUS); } -static inline int is_mmio_pfn(pfn_t pfn) +inline int is_mmio_pfn(pfn_t pfn) { if (pfn_valid(pfn)) return PageReserved(pfn_to_page(pfn)); @@ -578,6 +579,12 @@ int __kvm_set_memory_region(struct kvm *kvm, } kvm_free_physmem_slot(&old, &new); + + /* map the pages in iommu page table */ + r = kvm_iommu_map_pages(kvm, base_gfn, npages); + if (r) + goto out; + return 0; out_free: -- cgit v1.2.3 From 8a98f6648a2b0756d8f26d6c13332f5526355fec Mon Sep 17 00:00:00 2001 From: Xiantao Zhang Date: Mon, 6 Oct 2008 13:47:38 +0800 Subject: KVM: Move device assignment logic to common code To share with other archs, this patch moves device assignment logic to common parts. Signed-off-by: Xiantao Zhang Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 255 -------------------------------------------- include/linux/kvm.h | 2 + include/linux/kvm_host.h | 1 + virt/kvm/kvm_main.c | 268 ++++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 269 insertions(+), 257 deletions(-) (limited to 'include/linux/kvm.h') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index d6d7123d2644..f8bde01ba8e6 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -30,7 +30,6 @@ #include #include #include -#include #include #include #include @@ -107,238 +106,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { NULL } }; -static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head, - int assigned_dev_id) -{ - struct list_head *ptr; - struct kvm_assigned_dev_kernel *match; - - list_for_each(ptr, head) { - match = list_entry(ptr, struct kvm_assigned_dev_kernel, list); - if (match->assigned_dev_id == assigned_dev_id) - return match; - } - return NULL; -} - -static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work) -{ - struct kvm_assigned_dev_kernel *assigned_dev; - - assigned_dev = container_of(work, struct kvm_assigned_dev_kernel, - interrupt_work); - - /* This is taken to safely inject irq inside the guest. When - * the interrupt injection (or the ioapic code) uses a - * finer-grained lock, update this - */ - mutex_lock(&assigned_dev->kvm->lock); - kvm_set_irq(assigned_dev->kvm, - assigned_dev->guest_irq, 1); - mutex_unlock(&assigned_dev->kvm->lock); - kvm_put_kvm(assigned_dev->kvm); -} - -/* FIXME: Implement the OR logic needed to make shared interrupts on - * this line behave properly - */ -static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id) -{ - struct kvm_assigned_dev_kernel *assigned_dev = - (struct kvm_assigned_dev_kernel *) dev_id; - - kvm_get_kvm(assigned_dev->kvm); - schedule_work(&assigned_dev->interrupt_work); - disable_irq_nosync(irq); - return IRQ_HANDLED; -} - -/* Ack the irq line for an assigned device */ -static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) -{ - struct kvm_assigned_dev_kernel *dev; - - if (kian->gsi == -1) - return; - - dev = container_of(kian, struct kvm_assigned_dev_kernel, - ack_notifier); - kvm_set_irq(dev->kvm, dev->guest_irq, 0); - enable_irq(dev->host_irq); -} - -static void kvm_free_assigned_device(struct kvm *kvm, - struct kvm_assigned_dev_kernel - *assigned_dev) -{ - if (irqchip_in_kernel(kvm) && assigned_dev->irq_requested) - free_irq(assigned_dev->host_irq, (void *)assigned_dev); - - kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier); - - if (cancel_work_sync(&assigned_dev->interrupt_work)) - /* We had pending work. That means we will have to take - * care of kvm_put_kvm. - */ - kvm_put_kvm(kvm); - - pci_release_regions(assigned_dev->dev); - pci_disable_device(assigned_dev->dev); - pci_dev_put(assigned_dev->dev); - - list_del(&assigned_dev->list); - kfree(assigned_dev); -} - -static void kvm_free_all_assigned_devices(struct kvm *kvm) -{ - struct list_head *ptr, *ptr2; - struct kvm_assigned_dev_kernel *assigned_dev; - - list_for_each_safe(ptr, ptr2, &kvm->arch.assigned_dev_head) { - assigned_dev = list_entry(ptr, - struct kvm_assigned_dev_kernel, - list); - - kvm_free_assigned_device(kvm, assigned_dev); - } -} - -static int kvm_vm_ioctl_assign_irq(struct kvm *kvm, - struct kvm_assigned_irq - *assigned_irq) -{ - int r = 0; - struct kvm_assigned_dev_kernel *match; - - mutex_lock(&kvm->lock); - - match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, - assigned_irq->assigned_dev_id); - if (!match) { - mutex_unlock(&kvm->lock); - return -EINVAL; - } - - if (match->irq_requested) { - match->guest_irq = assigned_irq->guest_irq; - match->ack_notifier.gsi = assigned_irq->guest_irq; - mutex_unlock(&kvm->lock); - return 0; - } - - INIT_WORK(&match->interrupt_work, - kvm_assigned_dev_interrupt_work_handler); - - if (irqchip_in_kernel(kvm)) { - if (!capable(CAP_SYS_RAWIO)) { - r = -EPERM; - goto out_release; - } - - if (assigned_irq->host_irq) - match->host_irq = assigned_irq->host_irq; - else - match->host_irq = match->dev->irq; - match->guest_irq = assigned_irq->guest_irq; - match->ack_notifier.gsi = assigned_irq->guest_irq; - match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq; - kvm_register_irq_ack_notifier(kvm, &match->ack_notifier); - - /* Even though this is PCI, we don't want to use shared - * interrupts. Sharing host devices with guest-assigned devices - * on the same interrupt line is not a happy situation: there - * are going to be long delays in accepting, acking, etc. - */ - if (request_irq(match->host_irq, kvm_assigned_dev_intr, 0, - "kvm_assigned_device", (void *)match)) { - r = -EIO; - goto out_release; - } - } - - match->irq_requested = true; - mutex_unlock(&kvm->lock); - return r; -out_release: - mutex_unlock(&kvm->lock); - kvm_free_assigned_device(kvm, match); - return r; -} - -static int kvm_vm_ioctl_assign_device(struct kvm *kvm, - struct kvm_assigned_pci_dev *assigned_dev) -{ - int r = 0; - struct kvm_assigned_dev_kernel *match; - struct pci_dev *dev; - - mutex_lock(&kvm->lock); - - match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, - assigned_dev->assigned_dev_id); - if (match) { - /* device already assigned */ - r = -EINVAL; - goto out; - } - - match = kzalloc(sizeof(struct kvm_assigned_dev_kernel), GFP_KERNEL); - if (match == NULL) { - printk(KERN_INFO "%s: Couldn't allocate memory\n", - __func__); - r = -ENOMEM; - goto out; - } - dev = pci_get_bus_and_slot(assigned_dev->busnr, - assigned_dev->devfn); - if (!dev) { - printk(KERN_INFO "%s: host device not found\n", __func__); - r = -EINVAL; - goto out_free; - } - if (pci_enable_device(dev)) { - printk(KERN_INFO "%s: Could not enable PCI device\n", __func__); - r = -EBUSY; - goto out_put; - } - r = pci_request_regions(dev, "kvm_assigned_device"); - if (r) { - printk(KERN_INFO "%s: Could not get access to device regions\n", - __func__); - goto out_disable; - } - match->assigned_dev_id = assigned_dev->assigned_dev_id; - match->host_busnr = assigned_dev->busnr; - match->host_devfn = assigned_dev->devfn; - match->dev = dev; - - match->kvm = kvm; - - list_add(&match->list, &kvm->arch.assigned_dev_head); - - if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) { - r = kvm_iommu_map_guest(kvm, match); - if (r) - goto out_list_del; - } - -out: - mutex_unlock(&kvm->lock); - return r; -out_list_del: - list_del(&match->list); - pci_release_regions(dev); -out_disable: - pci_disable_device(dev); -out_put: - pci_dev_put(dev); -out_free: - kfree(match); - mutex_unlock(&kvm->lock); - return r; -} - unsigned long segment_base(u16 selector) { struct descriptor_table gdt; @@ -2030,28 +1797,6 @@ long kvm_arch_vm_ioctl(struct file *filp, goto out; break; } - case KVM_ASSIGN_PCI_DEVICE: { - struct kvm_assigned_pci_dev assigned_dev; - - r = -EFAULT; - if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) - goto out; - r = kvm_vm_ioctl_assign_device(kvm, &assigned_dev); - if (r) - goto out; - break; - } - case KVM_ASSIGN_IRQ: { - struct kvm_assigned_irq assigned_irq; - - r = -EFAULT; - if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq)) - goto out; - r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq); - if (r) - goto out; - break; - } case KVM_GET_PIT: { r = -EFAULT; if (copy_from_user(&u.ps, argp, sizeof(struct kvm_pit_state))) diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 4269be171faf..9acf34a6dfbb 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -383,7 +383,9 @@ struct kvm_trace_rec { #define KVM_CAP_MP_STATE 14 #define KVM_CAP_COALESCED_MMIO 15 #define KVM_CAP_SYNC_MMU 16 /* Changes to host mmap are reflected in guest */ +#ifdef CONFIG_X86 #define KVM_CAP_DEVICE_ASSIGNMENT 17 +#endif #define KVM_CAP_IOMMU 18 /* diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 73b7c52b9493..10c1146cd009 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -281,6 +281,7 @@ void kvm_free_physmem(struct kvm *kvm); struct kvm *kvm_arch_create_vm(void); void kvm_arch_destroy_vm(struct kvm *kvm); +void kvm_free_all_assigned_devices(struct kvm *kvm); int kvm_cpu_get_interrupt(struct kvm_vcpu *v); int kvm_cpu_has_interrupt(struct kvm_vcpu *v); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 98cd916448a8..485bcdc16552 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -51,6 +51,12 @@ #include "coalesced_mmio.h" #endif +#ifdef KVM_CAP_DEVICE_ASSIGNMENT +#include +#include +#include "irq.h" +#endif + MODULE_AUTHOR("Qumranet"); MODULE_LICENSE("GPL"); @@ -71,6 +77,240 @@ static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, bool kvm_rebooting; +#ifdef KVM_CAP_DEVICE_ASSIGNMENT +static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head, + int assigned_dev_id) +{ + struct list_head *ptr; + struct kvm_assigned_dev_kernel *match; + + list_for_each(ptr, head) { + match = list_entry(ptr, struct kvm_assigned_dev_kernel, list); + if (match->assigned_dev_id == assigned_dev_id) + return match; + } + return NULL; +} + +static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work) +{ + struct kvm_assigned_dev_kernel *assigned_dev; + + assigned_dev = container_of(work, struct kvm_assigned_dev_kernel, + interrupt_work); + + /* This is taken to safely inject irq inside the guest. When + * the interrupt injection (or the ioapic code) uses a + * finer-grained lock, update this + */ + mutex_lock(&assigned_dev->kvm->lock); + kvm_set_irq(assigned_dev->kvm, + assigned_dev->guest_irq, 1); + mutex_unlock(&assigned_dev->kvm->lock); + kvm_put_kvm(assigned_dev->kvm); +} + +/* FIXME: Implement the OR logic needed to make shared interrupts on + * this line behave properly + */ +static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id) +{ + struct kvm_assigned_dev_kernel *assigned_dev = + (struct kvm_assigned_dev_kernel *) dev_id; + + kvm_get_kvm(assigned_dev->kvm); + schedule_work(&assigned_dev->interrupt_work); + disable_irq_nosync(irq); + return IRQ_HANDLED; +} + +/* Ack the irq line for an assigned device */ +static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) +{ + struct kvm_assigned_dev_kernel *dev; + + if (kian->gsi == -1) + return; + + dev = container_of(kian, struct kvm_assigned_dev_kernel, + ack_notifier); + kvm_set_irq(dev->kvm, dev->guest_irq, 0); + enable_irq(dev->host_irq); +} + +static void kvm_free_assigned_device(struct kvm *kvm, + struct kvm_assigned_dev_kernel + *assigned_dev) +{ + if (irqchip_in_kernel(kvm) && assigned_dev->irq_requested) + free_irq(assigned_dev->host_irq, (void *)assigned_dev); + + kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier); + + if (cancel_work_sync(&assigned_dev->interrupt_work)) + /* We had pending work. That means we will have to take + * care of kvm_put_kvm. + */ + kvm_put_kvm(kvm); + + pci_release_regions(assigned_dev->dev); + pci_disable_device(assigned_dev->dev); + pci_dev_put(assigned_dev->dev); + + list_del(&assigned_dev->list); + kfree(assigned_dev); +} + +void kvm_free_all_assigned_devices(struct kvm *kvm) +{ + struct list_head *ptr, *ptr2; + struct kvm_assigned_dev_kernel *assigned_dev; + + list_for_each_safe(ptr, ptr2, &kvm->arch.assigned_dev_head) { + assigned_dev = list_entry(ptr, + struct kvm_assigned_dev_kernel, + list); + + kvm_free_assigned_device(kvm, assigned_dev); + } +} + +static int kvm_vm_ioctl_assign_irq(struct kvm *kvm, + struct kvm_assigned_irq + *assigned_irq) +{ + int r = 0; + struct kvm_assigned_dev_kernel *match; + + mutex_lock(&kvm->lock); + + match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, + assigned_irq->assigned_dev_id); + if (!match) { + mutex_unlock(&kvm->lock); + return -EINVAL; + } + + if (match->irq_requested) { + match->guest_irq = assigned_irq->guest_irq; + match->ack_notifier.gsi = assigned_irq->guest_irq; + mutex_unlock(&kvm->lock); + return 0; + } + + INIT_WORK(&match->interrupt_work, + kvm_assigned_dev_interrupt_work_handler); + + if (irqchip_in_kernel(kvm)) { + if (!capable(CAP_SYS_RAWIO)) { + r = -EPERM; + goto out_release; + } + + if (assigned_irq->host_irq) + match->host_irq = assigned_irq->host_irq; + else + match->host_irq = match->dev->irq; + match->guest_irq = assigned_irq->guest_irq; + match->ack_notifier.gsi = assigned_irq->guest_irq; + match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq; + kvm_register_irq_ack_notifier(kvm, &match->ack_notifier); + + /* Even though this is PCI, we don't want to use shared + * interrupts. Sharing host devices with guest-assigned devices + * on the same interrupt line is not a happy situation: there + * are going to be long delays in accepting, acking, etc. + */ + if (request_irq(match->host_irq, kvm_assigned_dev_intr, 0, + "kvm_assigned_device", (void *)match)) { + r = -EIO; + goto out_release; + } + } + + match->irq_requested = true; + mutex_unlock(&kvm->lock); + return r; +out_release: + mutex_unlock(&kvm->lock); + kvm_free_assigned_device(kvm, match); + return r; +} + +static int kvm_vm_ioctl_assign_device(struct kvm *kvm, + struct kvm_assigned_pci_dev *assigned_dev) +{ + int r = 0; + struct kvm_assigned_dev_kernel *match; + struct pci_dev *dev; + + mutex_lock(&kvm->lock); + + match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, + assigned_dev->assigned_dev_id); + if (match) { + /* device already assigned */ + r = -EINVAL; + goto out; + } + + match = kzalloc(sizeof(struct kvm_assigned_dev_kernel), GFP_KERNEL); + if (match == NULL) { + printk(KERN_INFO "%s: Couldn't allocate memory\n", + __func__); + r = -ENOMEM; + goto out; + } + dev = pci_get_bus_and_slot(assigned_dev->busnr, + assigned_dev->devfn); + if (!dev) { + printk(KERN_INFO "%s: host device not found\n", __func__); + r = -EINVAL; + goto out_free; + } + if (pci_enable_device(dev)) { + printk(KERN_INFO "%s: Could not enable PCI device\n", __func__); + r = -EBUSY; + goto out_put; + } + r = pci_request_regions(dev, "kvm_assigned_device"); + if (r) { + printk(KERN_INFO "%s: Could not get access to device regions\n", + __func__); + goto out_disable; + } + match->assigned_dev_id = assigned_dev->assigned_dev_id; + match->host_busnr = assigned_dev->busnr; + match->host_devfn = assigned_dev->devfn; + match->dev = dev; + + match->kvm = kvm; + + list_add(&match->list, &kvm->arch.assigned_dev_head); + + if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) { + r = kvm_iommu_map_guest(kvm, match); + if (r) + goto out_list_del; + } + +out: + mutex_unlock(&kvm->lock); + return r; +out_list_del: + list_del(&match->list); + pci_release_regions(dev); +out_disable: + pci_disable_device(dev); +out_put: + pci_dev_put(dev); +out_free: + kfree(match); + mutex_unlock(&kvm->lock); + return r; +} +#endif + static inline int valid_vcpu(int n) { return likely(n >= 0 && n < KVM_MAX_VCPUS); @@ -578,12 +818,12 @@ int __kvm_set_memory_region(struct kvm *kvm, } kvm_free_physmem_slot(&old, &new); - +#ifdef CONFIG_DMAR /* map the pages in iommu page table */ r = kvm_iommu_map_pages(kvm, base_gfn, npages); if (r) goto out; - +#endif return 0; out_free: @@ -1382,6 +1622,30 @@ static long kvm_vm_ioctl(struct file *filp, r = 0; break; } +#endif +#ifdef KVM_CAP_DEVICE_ASSIGNMENT + case KVM_ASSIGN_PCI_DEVICE: { + struct kvm_assigned_pci_dev assigned_dev; + + r = -EFAULT; + if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) + goto out; + r = kvm_vm_ioctl_assign_device(kvm, &assigned_dev); + if (r) + goto out; + break; + } + case KVM_ASSIGN_IRQ: { + struct kvm_assigned_irq assigned_irq; + + r = -EFAULT; + if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq)) + goto out; + r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq); + if (r) + goto out; + break; + } #endif default: r = kvm_arch_vm_ioctl(filp, ioctl, arg); -- cgit v1.2.3 From 2381ad241d0bea1253a37f314b270848067640bb Mon Sep 17 00:00:00 2001 From: Xiantao Zhang Date: Wed, 8 Oct 2008 08:29:33 +0800 Subject: KVM: ia64: Add intel iommu support for guests. With intel iommu hardware, we can assign devices to kvm/ia64 guests. Signed-off-by: Xiantao Zhang Signed-off-by: Avi Kivity --- arch/ia64/kvm/Makefile | 4 ++++ arch/ia64/kvm/kvm-ia64.c | 9 +++++++++ include/linux/kvm.h | 2 +- 3 files changed, 14 insertions(+), 1 deletion(-) (limited to 'include/linux/kvm.h') diff --git a/arch/ia64/kvm/Makefile b/arch/ia64/kvm/Makefile index 3b1a1c156ef9..cf37f8f490c0 100644 --- a/arch/ia64/kvm/Makefile +++ b/arch/ia64/kvm/Makefile @@ -46,6 +46,10 @@ EXTRA_AFLAGS += -Ivirt/kvm -Iarch/ia64/kvm/ common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \ coalesced_mmio.o irq_comm.o) +ifeq ($(CONFIG_DMAR),y) +common-objs += $(addprefix ../../../virt/kvm/, vtd.o) +endif + kvm-objs := $(common-objs) kvm-ia64.o kvm_fw.o obj-$(CONFIG_KVM) += kvm.o diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 3df82f3fe547..c0699f0e35a9 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -187,6 +188,9 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_COALESCED_MMIO: r = KVM_COALESCED_MMIO_PAGE_OFFSET; break; + case KVM_CAP_IOMMU: + r = intel_iommu_found(); + break; default: r = 0; } @@ -773,6 +777,7 @@ static void kvm_init_vm(struct kvm *kvm) */ kvm_build_io_pmt(kvm); + INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); } struct kvm *kvm_arch_create_vm(void) @@ -1336,6 +1341,10 @@ static void kvm_release_vm_pages(struct kvm *kvm) void kvm_arch_destroy_vm(struct kvm *kvm) { + kvm_iommu_unmap_guest(kvm); +#ifdef KVM_CAP_DEVICE_ASSIGNMENT + kvm_free_all_assigned_devices(kvm); +#endif kfree(kvm->arch.vioapic); kvm_release_vm_pages(kvm); kvm_free_physmem(kvm); diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 9acf34a6dfbb..797fcd781242 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -383,7 +383,7 @@ struct kvm_trace_rec { #define KVM_CAP_MP_STATE 14 #define KVM_CAP_COALESCED_MMIO 15 #define KVM_CAP_SYNC_MMU 16 /* Changes to host mmap are reflected in guest */ -#ifdef CONFIG_X86 +#if defined(CONFIG_X86)||defined(CONFIG_IA64) #define KVM_CAP_DEVICE_ASSIGNMENT 17 #endif #define KVM_CAP_IOMMU 18 -- cgit v1.2.3