summaryrefslogtreecommitdiff
path: root/arch/powerpc/kvm/book3s_64_vio.c
diff options
context:
space:
mode:
authorAlexey Kardashevskiy <aik@ozlabs.ru>2019-03-29 16:43:26 +1100
committerPaul Mackerras <paulus@ozlabs.org>2019-04-30 14:43:13 +1000
commite1a1ef84cd07f72ce12f139eb9a37d3f9028e7a7 (patch)
tree0a22fce388d2adee4edb79f60c9ba694756c80d5 /arch/powerpc/kvm/book3s_64_vio.c
parent2001825efcea75e4209e4956f6cd619fbc246d16 (diff)
downloadlwn-e1a1ef84cd07f72ce12f139eb9a37d3f9028e7a7.tar.gz
lwn-e1a1ef84cd07f72ce12f139eb9a37d3f9028e7a7.zip
KVM: PPC: Book3S: Allocate guest TCEs on demand too
We already allocate hardware TCE tables in multiple levels and skip intermediate levels when we can, now it is a turn of the KVM TCE tables. Thankfully these are allocated already in 2 levels. This moves the table's last level allocation from the creating helper to kvmppc_tce_put() and kvm_spapr_tce_fault(). Since such allocation cannot be done in real mode, this creates a virtual mode version of kvmppc_tce_put() which handles allocations. This adds kvmppc_rm_ioba_validate() to do an additional test if the consequent kvmppc_tce_put() needs a page which has not been allocated; if this is the case, we bail out to virtual mode handlers. The allocations are protected by a new mutex as kvm->lock is not suitable for the task because the fault handler is called with the mmap_sem held but kvmhv_setup_mmu() locks kvm->lock and mmap_sem in the reverse order. Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru> Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
Diffstat (limited to 'arch/powerpc/kvm/book3s_64_vio.c')
-rw-r--r--arch/powerpc/kvm/book3s_64_vio.c72
1 files changed, 58 insertions, 14 deletions
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index 5e3b62c491f8..66270e07449a 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -228,11 +228,33 @@ static void release_spapr_tce_table(struct rcu_head *head)
unsigned long i, npages = kvmppc_tce_pages(stt->size);
for (i = 0; i < npages; i++)
- __free_page(stt->pages[i]);
+ if (stt->pages[i])
+ __free_page(stt->pages[i]);
kfree(stt);
}
+static struct page *kvm_spapr_get_tce_page(struct kvmppc_spapr_tce_table *stt,
+ unsigned long sttpage)
+{
+ struct page *page = stt->pages[sttpage];
+
+ if (page)
+ return page;
+
+ mutex_lock(&stt->alloc_lock);
+ page = stt->pages[sttpage];
+ if (!page) {
+ page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+ WARN_ON_ONCE(!page);
+ if (page)
+ stt->pages[sttpage] = page;
+ }
+ mutex_unlock(&stt->alloc_lock);
+
+ return page;
+}
+
static vm_fault_t kvm_spapr_tce_fault(struct vm_fault *vmf)
{
struct kvmppc_spapr_tce_table *stt = vmf->vma->vm_file->private_data;
@@ -241,7 +263,10 @@ static vm_fault_t kvm_spapr_tce_fault(struct vm_fault *vmf)
if (vmf->pgoff >= kvmppc_tce_pages(stt->size))
return VM_FAULT_SIGBUS;
- page = stt->pages[vmf->pgoff];
+ page = kvm_spapr_get_tce_page(stt, vmf->pgoff);
+ if (!page)
+ return VM_FAULT_OOM;
+
get_page(page);
vmf->page = page;
return 0;
@@ -296,7 +321,6 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
struct kvmppc_spapr_tce_table *siter;
unsigned long npages, size = args->size;
int ret = -ENOMEM;
- int i;
if (!args->size || args->page_shift < 12 || args->page_shift > 34 ||
(args->offset + args->size > (ULLONG_MAX >> args->page_shift)))
@@ -318,14 +342,9 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
stt->offset = args->offset;
stt->size = size;
stt->kvm = kvm;
+ mutex_init(&stt->alloc_lock);
INIT_LIST_HEAD_RCU(&stt->iommu_tables);
- for (i = 0; i < npages; i++) {
- stt->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO);
- if (!stt->pages[i])
- goto fail;
- }
-
mutex_lock(&kvm->lock);
/* Check this LIOBN hasn't been previously allocated */
@@ -352,11 +371,6 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
if (ret >= 0)
return ret;
- fail:
- for (i = 0; i < npages; i++)
- if (stt->pages[i])
- __free_page(stt->pages[i]);
-
kfree(stt);
fail_acct:
kvmppc_account_memlimit(kvmppc_stt_pages(npages), false);
@@ -413,6 +427,36 @@ static long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt,
return H_SUCCESS;
}
+/*
+ * Handles TCE requests for emulated devices.
+ * Puts guest TCE values to the table and expects user space to convert them.
+ * Cannot fail so kvmppc_tce_validate must be called before it.
+ */
+static void kvmppc_tce_put(struct kvmppc_spapr_tce_table *stt,
+ unsigned long idx, unsigned long tce)
+{
+ struct page *page;
+ u64 *tbl;
+ unsigned long sttpage;
+
+ idx -= stt->offset;
+ sttpage = idx / TCES_PER_PAGE;
+ page = stt->pages[sttpage];
+
+ if (!page) {
+ /* We allow any TCE, not just with read|write permissions */
+ if (!tce)
+ return;
+
+ page = kvm_spapr_get_tce_page(stt, sttpage);
+ if (!page)
+ return;
+ }
+ tbl = page_to_virt(page);
+
+ tbl[idx % TCES_PER_PAGE] = tce;
+}
+
static void kvmppc_clear_tce(struct mm_struct *mm, struct iommu_table *tbl,
unsigned long entry)
{