KVM: Add coalesced MMIO support (common part)

This patch adds all needed structures to coalesce MMIOs. Until an architecture uses it, it is not compiled. Coalesced MMIO introduces two ioctl() to define where are the MMIO zones that can be coalesced: - KVM_REGISTER_COALESCED_MMIO registers a coalesced MMIO zone. It requests one parameter (struct kvm_coalesced_mmio_zone) which defines a memory area where MMIOs can be coalesced until the next switch to user space. The maximum number of MMIO zones is KVM_COALESCED_MMIO_ZONE_MAX. - KVM_UNREGISTER_COALESCED_MMIO cancels all registered zones inside the given bounds (bounds are also given by struct kvm_coalesced_mmio_zone). The userspace client can check kernel coalesced MMIO availability by asking ioctl(KVM_CHECK_EXTENSION) for the KVM_CAP_COALESCED_MMIO capability. The ioctl() call to KVM_CAP_COALESCED_MMIO will return 0 if not supported, or the page offset where will be stored the ring buffer. The page offset depends on the architecture. After an ioctl(KVM_RUN), the first page of the KVM memory mapped points to a kvm_run structure. The offset given by KVM_CAP_COALESCED_MMIO is an offset to the coalesced MMIO ring expressed in PAGE_SIZE relatively to the address of the start of th kvm_run structure. The MMIO ring buffer is defined by the structure kvm_coalesced_mmio_ring. [akio: fix oops during guest shutdown] Signed-off-by: Laurent Vivier <Laurent.Vivier@bull.net> Signed-off-by: Akio Takebe <takebe_akio@jp.fujitsu.com> Signed-off-by: Avi Kivity <avi@qumranet.com>
author: Laurent Vivier <Laurent.Vivier@bull.net> 2008-05-30 16:05:54 +0200
committer: Avi Kivity <avi@qumranet.com> 2008-07-20 12:42:31 +0300
commit: 5f94c1741bdc7a336553122036e8a779e616ccbf (patch)
tree: fb0dc4edf0c346dc266eb2ee2d433cb2678a0bc4 /virt
parent: 92760499d01ef91518119908eb9b8798b6c9bd3f (diff)
download: lwn-5f94c1741bdc7a336553122036e8a779e616ccbf.tar.gz
lwn-5f94c1741bdc7a336553122036e8a779e616ccbf.zip
3 files changed, 236 insertions, 0 deletions
diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c
new file mode 100644
index 000000000000..5ae620d32fac
--- /dev/null
+++ b/virt/kvm/coalesced_mmio.c
@@ -0,0 +1,156 @@
+/*
+ * KVM coalesced MMIO
+ *
+ * Copyright (c) 2008 Bull S.A.S.
+ *
+ *  Author: Laurent Vivier <Laurent.Vivier@bull.net>
+ *
+ */
+
+#include "iodev.h"
+
+#include <linux/kvm_host.h>
+#include <linux/kvm.h>
+
+#include "coalesced_mmio.h"
+
+static int coalesced_mmio_in_range(struct kvm_io_device *this,
+				   gpa_t addr, int len, int is_write)
+{
+	struct kvm_coalesced_mmio_dev *dev =
+				(struct kvm_coalesced_mmio_dev*)this->private;
+	struct kvm_coalesced_mmio_zone *zone;
+	int next;
+	int i;
+
+	if (!is_write)
+		return 0;
+
+	/* kvm->lock is taken by the caller and must be not released before
+         * dev.read/write
+         */
+
+	/* Are we able to batch it ? */
+
+	/* last is the first free entry
+	 * check if we don't meet the first used entry
+	 * there is always one unused entry in the buffer
+	 */
+
+	next = (dev->kvm->coalesced_mmio_ring->last + 1) %
+							KVM_COALESCED_MMIO_MAX;
+	if (next == dev->kvm->coalesced_mmio_ring->first) {
+		/* full */
+		return 0;
+	}
+
+	/* is it in a batchable area ? */
+
+	for (i = 0; i < dev->nb_zones; i++) {
+		zone = &dev->zone[i];
+
+		/* (addr,len) is fully included in
+		 * (zone->addr, zone->size)
+		 */
+
+		if (zone->addr <= addr &&
+		    addr + len <= zone->addr + zone->size)
+			return 1;
+	}
+	return 0;
+}
+
+static void coalesced_mmio_write(struct kvm_io_device *this,
+				 gpa_t addr, int len, const void *val)
+{
+	struct kvm_coalesced_mmio_dev *dev =
+				(struct kvm_coalesced_mmio_dev*)this->private;
+	struct kvm_coalesced_mmio_ring *ring = dev->kvm->coalesced_mmio_ring;
+
+	/* kvm->lock must be taken by caller before call to in_range()*/
+
+	/* copy data in first free entry of the ring */
+
+	ring->coalesced_mmio[ring->last].phys_addr = addr;
+	ring->coalesced_mmio[ring->last].len = len;
+	memcpy(ring->coalesced_mmio[ring->last].data, val, len);
+	smp_wmb();
+	ring->last = (ring->last + 1) % KVM_COALESCED_MMIO_MAX;
+}
+
+static void coalesced_mmio_destructor(struct kvm_io_device *this)
+{
+	kfree(this);
+}
+
+int kvm_coalesced_mmio_init(struct kvm *kvm)
+{
+	struct kvm_coalesced_mmio_dev *dev;
+
+	dev = kzalloc(sizeof(struct kvm_coalesced_mmio_dev), GFP_KERNEL);
+	if (!dev)
+		return -ENOMEM;
+	dev->dev.write  = coalesced_mmio_write;
+	dev->dev.in_range  = coalesced_mmio_in_range;
+	dev->dev.destructor  = coalesced_mmio_destructor;
+	dev->dev.private  = dev;
+	dev->kvm = kvm;
+	kvm->coalesced_mmio_dev = dev;
+	kvm_io_bus_register_dev(&kvm->mmio_bus, &dev->dev);
+
+	return 0;
+}
+
+int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm,
+				         struct kvm_coalesced_mmio_zone *zone)
+{
+	struct kvm_coalesced_mmio_dev *dev = kvm->coalesced_mmio_dev;
+
+	if (dev == NULL)
+		return -EINVAL;
+
+	mutex_lock(&kvm->lock);
+	if (dev->nb_zones >= KVM_COALESCED_MMIO_ZONE_MAX) {
+		mutex_unlock(&kvm->lock);
+		return -ENOBUFS;
+	}
+
+	dev->zone[dev->nb_zones] = *zone;
+	dev->nb_zones++;
+
+	mutex_unlock(&kvm->lock);
+	return 0;
+}
+
+int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm,
+					   struct kvm_coalesced_mmio_zone *zone)
+{
+	int i;
+	struct kvm_coalesced_mmio_dev *dev = kvm->coalesced_mmio_dev;
+	struct kvm_coalesced_mmio_zone *z;
+
+	if (dev == NULL)
+		return -EINVAL;
+
+	mutex_lock(&kvm->lock);
+
+	i = dev->nb_zones;
+	while(i) {
+		z = &dev->zone[i - 1];
+
+		/* unregister all zones
+		 * included in (zone->addr, zone->size)
+		 */
+
+		if (zone->addr <= z->addr &&
+		    z->addr + z->size <= zone->addr + zone->size) {
+			dev->nb_zones--;
+			*z = dev->zone[dev->nb_zones];
+		}
+		i--;
+	}
+
+	mutex_unlock(&kvm->lock);
+
+	return 0;
+}
diff --git a/virt/kvm/coalesced_mmio.h b/virt/kvm/coalesced_mmio.h
new file mode 100644
index 000000000000..5ac0ec628461
--- /dev/null
+++ b/virt/kvm/coalesced_mmio.h
@@ -0,0 +1,23 @@
+/*
+ * KVM coalesced MMIO
+ *
+ * Copyright (c) 2008 Bull S.A.S.
+ *
+ *  Author: Laurent Vivier <Laurent.Vivier@bull.net>
+ *
+ */
+
+#define KVM_COALESCED_MMIO_ZONE_MAX 100
+
+struct kvm_coalesced_mmio_dev {
+	struct kvm_io_device dev;
+	struct kvm *kvm;
+	int nb_zones;
+	struct kvm_coalesced_mmio_zone zone[KVM_COALESCED_MMIO_ZONE_MAX];
+};
+
+int kvm_coalesced_mmio_init(struct kvm *kvm);
+int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm,
+                                       struct kvm_coalesced_mmio_zone *zone);
+int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm,
+                                         struct kvm_coalesced_mmio_zone *zone);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 9330fad2b918..7d10dfa0d388 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -47,6 +47,10 @@
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 
+#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
+#include "coalesced_mmio.h"
+#endif
+
 MODULE_AUTHOR("Qumranet");
 MODULE_LICENSE("GPL");
 
@@ -185,10 +189,23 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_uninit);
 static struct kvm *kvm_create_vm(void)
 {
 	struct kvm *kvm = kvm_arch_create_vm();
+#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
+	struct page *page;
+#endif
 
 	if (IS_ERR(kvm))
 		goto out;
 
+#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
+	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+	if (!page) {
+		kfree(kvm);
+		return ERR_PTR(-ENOMEM);
+	}
+	kvm->coalesced_mmio_ring =
+			(struct kvm_coalesced_mmio_ring *)page_address(page);
+#endif
+
 	kvm->mm = current->mm;
 	atomic_inc(&kvm->mm->mm_count);
 	spin_lock_init(&kvm->mmu_lock);
@@ -200,6 +217,9 @@ static struct kvm *kvm_create_vm(void)
 	spin_lock(&kvm_lock);
 	list_add(&kvm->vm_list, &vm_list);
 	spin_unlock(&kvm_lock);
+#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
+	kvm_coalesced_mmio_init(kvm);
+#endif
 out:
 	return kvm;
 }
@@ -242,6 +262,10 @@ static void kvm_destroy_vm(struct kvm *kvm)
 	spin_unlock(&kvm_lock);
 	kvm_io_bus_destroy(&kvm->pio_bus);
 	kvm_io_bus_destroy(&kvm->mmio_bus);
+#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
+	if (kvm->coalesced_mmio_ring != NULL)
+		free_page((unsigned long)kvm->coalesced_mmio_ring);
+#endif
 	kvm_arch_destroy_vm(kvm);
 	mmdrop(mm);
 }
@@ -826,6 +850,10 @@ static int kvm_vcpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	else if (vmf->pgoff == KVM_PIO_PAGE_OFFSET)
 		page = virt_to_page(vcpu->arch.pio_data);
 #endif
+#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
+	else if (vmf->pgoff == KVM_COALESCED_MMIO_PAGE_OFFSET)
+		page = virt_to_page(vcpu->kvm->coalesced_mmio_ring);
+#endif
 	else
 		return VM_FAULT_SIGBUS;
 	get_page(page);
@@ -1148,6 +1176,32 @@ static long kvm_vm_ioctl(struct file *filp,
 			goto out;
 		break;
 	}
+#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
+	case KVM_REGISTER_COALESCED_MMIO: {
+		struct kvm_coalesced_mmio_zone zone;
+		r = -EFAULT;
+		if (copy_from_user(&zone, argp, sizeof zone))
+			goto out;
+		r = -ENXIO;
+		r = kvm_vm_ioctl_register_coalesced_mmio(kvm, &zone);
+		if (r)
+			goto out;
+		r = 0;
+		break;
+	}
+	case KVM_UNREGISTER_COALESCED_MMIO: {
+		struct kvm_coalesced_mmio_zone zone;
+		r = -EFAULT;
+		if (copy_from_user(&zone, argp, sizeof zone))
+			goto out;
+		r = -ENXIO;
+		r = kvm_vm_ioctl_unregister_coalesced_mmio(kvm, &zone);
+		if (r)
+			goto out;
+		r = 0;
+		break;
+	}
+#endif
 	default:
 		r = kvm_arch_vm_ioctl(filp, ioctl, arg);
 	}
@@ -1232,6 +1286,9 @@ static long kvm_dev_ioctl(struct file *filp,
 #ifdef CONFIG_X86
 		r += PAGE_SIZE;    /* pio data page */
 #endif
+#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
+		r += PAGE_SIZE;    /* coalesced mmio ring page */
+#endif
 		break;
 	case KVM_TRACE_ENABLE:
 	case KVM_TRACE_PAUSE:
author	Laurent Vivier <Laurent.Vivier@bull.net>	2008-05-30 16:05:54 +0200
committer	Avi Kivity <avi@qumranet.com>	2008-07-20 12:42:31 +0300
commit	5f94c1741bdc7a336553122036e8a779e616ccbf (patch)
tree	fb0dc4edf0c346dc266eb2ee2d433cb2678a0bc4 /virt
parent	92760499d01ef91518119908eb9b8798b6c9bd3f (diff)
download	lwn-5f94c1741bdc7a336553122036e8a779e616ccbf.tar.gz lwn-5f94c1741bdc7a336553122036e8a779e616ccbf.zip