diff options
author | Alexey Kardashevskiy <aik@ozlabs.ru> | 2019-03-29 16:43:26 +1100 |
---|---|---|
committer | Paul Mackerras <paulus@ozlabs.org> | 2019-04-30 14:43:13 +1000 |
commit | e1a1ef84cd07f72ce12f139eb9a37d3f9028e7a7 (patch) | |
tree | 0a22fce388d2adee4edb79f60c9ba694756c80d5 /arch/powerpc/kvm/book3s_64_vio.c | |
parent | 2001825efcea75e4209e4956f6cd619fbc246d16 (diff) | |
download | lwn-e1a1ef84cd07f72ce12f139eb9a37d3f9028e7a7.tar.gz lwn-e1a1ef84cd07f72ce12f139eb9a37d3f9028e7a7.zip |
KVM: PPC: Book3S: Allocate guest TCEs on demand too
We already allocate hardware TCE tables in multiple levels and skip
intermediate levels when we can, now it is a turn of the KVM TCE tables.
Thankfully these are allocated already in 2 levels.
This moves the table's last level allocation from the creating helper to
kvmppc_tce_put() and kvm_spapr_tce_fault(). Since such allocation cannot
be done in real mode, this creates a virtual mode version of
kvmppc_tce_put() which handles allocations.
This adds kvmppc_rm_ioba_validate() to do an additional test if
the consequent kvmppc_tce_put() needs a page which has not been allocated;
if this is the case, we bail out to virtual mode handlers.
The allocations are protected by a new mutex as kvm->lock is not suitable
for the task because the fault handler is called with the mmap_sem held
but kvmhv_setup_mmu() locks kvm->lock and mmap_sem in the reverse order.
Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
Diffstat (limited to 'arch/powerpc/kvm/book3s_64_vio.c')
-rw-r--r-- | arch/powerpc/kvm/book3s_64_vio.c | 72 |
1 files changed, 58 insertions, 14 deletions
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index 5e3b62c491f8..66270e07449a 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -228,11 +228,33 @@ static void release_spapr_tce_table(struct rcu_head *head) unsigned long i, npages = kvmppc_tce_pages(stt->size); for (i = 0; i < npages; i++) - __free_page(stt->pages[i]); + if (stt->pages[i]) + __free_page(stt->pages[i]); kfree(stt); } +static struct page *kvm_spapr_get_tce_page(struct kvmppc_spapr_tce_table *stt, + unsigned long sttpage) +{ + struct page *page = stt->pages[sttpage]; + + if (page) + return page; + + mutex_lock(&stt->alloc_lock); + page = stt->pages[sttpage]; + if (!page) { + page = alloc_page(GFP_KERNEL | __GFP_ZERO); + WARN_ON_ONCE(!page); + if (page) + stt->pages[sttpage] = page; + } + mutex_unlock(&stt->alloc_lock); + + return page; +} + static vm_fault_t kvm_spapr_tce_fault(struct vm_fault *vmf) { struct kvmppc_spapr_tce_table *stt = vmf->vma->vm_file->private_data; @@ -241,7 +263,10 @@ static vm_fault_t kvm_spapr_tce_fault(struct vm_fault *vmf) if (vmf->pgoff >= kvmppc_tce_pages(stt->size)) return VM_FAULT_SIGBUS; - page = stt->pages[vmf->pgoff]; + page = kvm_spapr_get_tce_page(stt, vmf->pgoff); + if (!page) + return VM_FAULT_OOM; + get_page(page); vmf->page = page; return 0; @@ -296,7 +321,6 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, struct kvmppc_spapr_tce_table *siter; unsigned long npages, size = args->size; int ret = -ENOMEM; - int i; if (!args->size || args->page_shift < 12 || args->page_shift > 34 || (args->offset + args->size > (ULLONG_MAX >> args->page_shift))) @@ -318,14 +342,9 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, stt->offset = args->offset; stt->size = size; stt->kvm = kvm; + mutex_init(&stt->alloc_lock); INIT_LIST_HEAD_RCU(&stt->iommu_tables); - for (i = 0; i < npages; i++) { - stt->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO); - if (!stt->pages[i]) - goto fail; - } - mutex_lock(&kvm->lock); /* Check this LIOBN hasn't been previously allocated */ @@ -352,11 +371,6 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, if (ret >= 0) return ret; - fail: - for (i = 0; i < npages; i++) - if (stt->pages[i]) - __free_page(stt->pages[i]); - kfree(stt); fail_acct: kvmppc_account_memlimit(kvmppc_stt_pages(npages), false); @@ -413,6 +427,36 @@ static long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt, return H_SUCCESS; } +/* + * Handles TCE requests for emulated devices. + * Puts guest TCE values to the table and expects user space to convert them. + * Cannot fail so kvmppc_tce_validate must be called before it. + */ +static void kvmppc_tce_put(struct kvmppc_spapr_tce_table *stt, + unsigned long idx, unsigned long tce) +{ + struct page *page; + u64 *tbl; + unsigned long sttpage; + + idx -= stt->offset; + sttpage = idx / TCES_PER_PAGE; + page = stt->pages[sttpage]; + + if (!page) { + /* We allow any TCE, not just with read|write permissions */ + if (!tce) + return; + + page = kvm_spapr_get_tce_page(stt, sttpage); + if (!page) + return; + } + tbl = page_to_virt(page); + + tbl[idx % TCES_PER_PAGE] = tce; +} + static void kvmppc_clear_tce(struct mm_struct *mm, struct iommu_table *tbl, unsigned long entry) { |