diff options
author | Will Deacon <will@kernel.org> | 2020-09-11 14:25:13 +0100 |
---|---|---|
committer | Marc Zyngier <maz@kernel.org> | 2020-09-11 15:51:13 +0100 |
commit | 71233d05f4b5e3560f0d3d5607d01e9beff8dcbd (patch) | |
tree | 59f6a7ef8bf7a35cc48c4a8c0180e0aea25cefa5 | |
parent | 0f9d09b8e29bc8166f6584279aedc4a7a4038f68 (diff) | |
download | lwn-71233d05f4b5e3560f0d3d5607d01e9beff8dcbd.tar.gz lwn-71233d05f4b5e3560f0d3d5607d01e9beff8dcbd.zip |
KVM: arm64: Add support for creating kernel-agnostic stage-2 page tables
Introduce alloc() and free() functions to the generic page-table code
for guest stage-2 page-tables and plumb these into the existing KVM
page-table allocator. Subsequent patches will convert other operations
within the KVM allocator over to the generic code.
Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Reviewed-by: Gavin Shan <gshan@redhat.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Quentin Perret <qperret@google.com>
Link: https://lore.kernel.org/r/20200911132529.19844-6-will@kernel.org
-rw-r--r-- | arch/arm64/include/asm/kvm_host.h | 1 | ||||
-rw-r--r-- | arch/arm64/include/asm/kvm_pgtable.h | 18 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/pgtable.c | 54 | ||||
-rw-r--r-- | arch/arm64/kvm/mmu.c | 55 |
4 files changed, 102 insertions, 26 deletions
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index e52c927aade5..0b7c702b2151 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -81,6 +81,7 @@ struct kvm_s2_mmu { */ pgd_t *pgd; phys_addr_t pgd_phys; + struct kvm_pgtable *pgt; /* The last vcpu id that ran on each physical CPU */ int __percpu *last_vcpu_ran; diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index ff5d7d27eb39..21d71395a377 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -123,6 +123,24 @@ int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys, enum kvm_pgtable_prot prot); /** + * kvm_pgtable_stage2_init() - Initialise a guest stage-2 page-table. + * @pgt: Uninitialised page-table structure to initialise. + * @kvm: KVM structure representing the guest virtual machine. + * + * Return: 0 on success, negative error code on failure. + */ +int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm *kvm); + +/** + * kvm_pgtable_stage2_destroy() - Destroy an unused guest stage-2 page-table. + * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init(). + * + * The page-table is assumed to be unreachable by any hardware walkers prior + * to freeing and therefore no TLB invalidation is performed. + */ +void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt); + +/** * kvm_pgtable_walk() - Walk a page-table. * @pgt: Page-table structure initialised by kvm_pgtable_*_init(). * @addr: Input address for the start of the walk. diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 23a1006aa4ef..16b34d11e7cf 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -416,3 +416,57 @@ void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt) free_page((unsigned long)pgt->pgd); pgt->pgd = NULL; } + +int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm *kvm) +{ + size_t pgd_sz; + u64 vtcr = kvm->arch.vtcr; + u32 ia_bits = VTCR_EL2_IPA(vtcr); + u32 sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr); + u32 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0; + + pgd_sz = kvm_pgd_pages(ia_bits, start_level) * PAGE_SIZE; + pgt->pgd = alloc_pages_exact(pgd_sz, GFP_KERNEL | __GFP_ZERO); + if (!pgt->pgd) + return -ENOMEM; + + pgt->ia_bits = ia_bits; + pgt->start_level = start_level; + pgt->mmu = &kvm->arch.mmu; + + /* Ensure zeroed PGD pages are visible to the hardware walker */ + dsb(ishst); + return 0; +} + +static int stage2_free_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, + enum kvm_pgtable_walk_flags flag, + void * const arg) +{ + kvm_pte_t pte = *ptep; + + if (!kvm_pte_valid(pte)) + return 0; + + put_page(virt_to_page(ptep)); + + if (kvm_pte_table(pte, level)) + free_page((unsigned long)kvm_pte_follow(pte)); + + return 0; +} + +void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt) +{ + size_t pgd_sz; + struct kvm_pgtable_walker walker = { + .cb = stage2_free_walker, + .flags = KVM_PGTABLE_WALK_LEAF | + KVM_PGTABLE_WALK_TABLE_POST, + }; + + WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker)); + pgd_sz = kvm_pgd_pages(pgt->ia_bits, pgt->start_level) * PAGE_SIZE; + free_pages_exact(pgt->pgd, pgd_sz); + pgt->pgd = NULL; +} diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index fabd72b0c8a4..4607e9ca60a2 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -668,47 +668,49 @@ int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size, * @kvm: The pointer to the KVM structure * @mmu: The pointer to the s2 MMU structure * - * Allocates only the stage-2 HW PGD level table(s) of size defined by - * stage2_pgd_size(mmu->kvm). - * + * Allocates only the stage-2 HW PGD level table(s). * Note we don't need locking here as this is only called when the VM is * created, which can only be done once. */ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu) { - phys_addr_t pgd_phys; - pgd_t *pgd; - int cpu; + int cpu, err; + struct kvm_pgtable *pgt; - if (mmu->pgd != NULL) { + if (mmu->pgt != NULL) { kvm_err("kvm_arch already initialized?\n"); return -EINVAL; } - /* Allocate the HW PGD, making sure that each page gets its own refcount */ - pgd = alloc_pages_exact(stage2_pgd_size(kvm), GFP_KERNEL | __GFP_ZERO); - if (!pgd) + pgt = kzalloc(sizeof(*pgt), GFP_KERNEL); + if (!pgt) return -ENOMEM; - pgd_phys = virt_to_phys(pgd); - if (WARN_ON(pgd_phys & ~kvm_vttbr_baddr_mask(kvm))) - return -EINVAL; + err = kvm_pgtable_stage2_init(pgt, kvm); + if (err) + goto out_free_pgtable; mmu->last_vcpu_ran = alloc_percpu(typeof(*mmu->last_vcpu_ran)); if (!mmu->last_vcpu_ran) { - free_pages_exact(pgd, stage2_pgd_size(kvm)); - return -ENOMEM; + err = -ENOMEM; + goto out_destroy_pgtable; } for_each_possible_cpu(cpu) *per_cpu_ptr(mmu->last_vcpu_ran, cpu) = -1; mmu->kvm = kvm; - mmu->pgd = pgd; - mmu->pgd_phys = pgd_phys; + mmu->pgt = pgt; + mmu->pgd_phys = __pa(pgt->pgd); + mmu->pgd = (void *)pgt->pgd; mmu->vmid.vmid_gen = 0; - return 0; + +out_destroy_pgtable: + kvm_pgtable_stage2_destroy(pgt); +out_free_pgtable: + kfree(pgt); + return err; } static void stage2_unmap_memslot(struct kvm *kvm, @@ -781,20 +783,21 @@ void stage2_unmap_vm(struct kvm *kvm) void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu) { struct kvm *kvm = mmu->kvm; - void *pgd = NULL; + struct kvm_pgtable *pgt = NULL; spin_lock(&kvm->mmu_lock); - if (mmu->pgd) { - unmap_stage2_range(mmu, 0, kvm_phys_size(kvm)); - pgd = READ_ONCE(mmu->pgd); + pgt = mmu->pgt; + if (pgt) { mmu->pgd = NULL; + mmu->pgd_phys = 0; + mmu->pgt = NULL; + free_percpu(mmu->last_vcpu_ran); } spin_unlock(&kvm->mmu_lock); - /* Free the HW pgd, one page at a time */ - if (pgd) { - free_pages_exact(pgd, stage2_pgd_size(kvm)); - free_percpu(mmu->last_vcpu_ran); + if (pgt) { + kvm_pgtable_stage2_destroy(pgt); + kfree(pgt); } } |