diff options
Diffstat (limited to 'arch/arm64/kvm/hyp/pgtable.c')
| -rw-r--r-- | arch/arm64/kvm/hyp/pgtable.c | 230 |
1 files changed, 168 insertions, 62 deletions
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index df5cc74a7dd0..0c1defa5fb0f 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -11,12 +11,6 @@ #include <asm/kvm_pgtable.h> #include <asm/stage2_pgtable.h> - -#define KVM_PTE_TYPE BIT(1) -#define KVM_PTE_TYPE_BLOCK 0 -#define KVM_PTE_TYPE_PAGE 1 -#define KVM_PTE_TYPE_TABLE 1 - struct kvm_pgtable_walk_data { struct kvm_pgtable_walker *walker; @@ -120,11 +114,6 @@ static kvm_pte_t kvm_init_valid_leaf_pte(u64 pa, kvm_pte_t attr, s8 level) return pte; } -static kvm_pte_t kvm_init_invalid_leaf_owner(u8 owner_id) -{ - return FIELD_PREP(KVM_INVALID_PTE_OWNER_MASK, owner_id); -} - static int kvm_pgtable_visitor_cb(struct kvm_pgtable_walk_data *data, const struct kvm_pgtable_visit_ctx *ctx, enum kvm_pgtable_walk_flags visit) @@ -150,7 +139,7 @@ static bool kvm_pgtable_walk_continue(const struct kvm_pgtable_walker *walker, * page table walk. */ if (r == -EAGAIN) - return !(walker->flags & KVM_PGTABLE_WALK_HANDLE_FAULT); + return walker->flags & KVM_PGTABLE_WALK_IGNORE_EAGAIN; return !r; } @@ -348,6 +337,9 @@ static int hyp_set_prot_attr(enum kvm_pgtable_prot prot, kvm_pte_t *ptep) if (!(prot & KVM_PGTABLE_PROT_R)) return -EINVAL; + if (!cpus_have_final_cap(ARM64_KVM_HVHE)) + prot &= ~KVM_PGTABLE_PROT_UX; + if (prot & KVM_PGTABLE_PROT_X) { if (prot & KVM_PGTABLE_PROT_W) return -EINVAL; @@ -357,8 +349,16 @@ static int hyp_set_prot_attr(enum kvm_pgtable_prot prot, kvm_pte_t *ptep) if (system_supports_bti_kernel()) attr |= KVM_PTE_LEAF_ATTR_HI_S1_GP; + } + + if (cpus_have_final_cap(ARM64_KVM_HVHE)) { + if (!(prot & KVM_PGTABLE_PROT_PX)) + attr |= KVM_PTE_LEAF_ATTR_HI_S1_PXN; + if (!(prot & KVM_PGTABLE_PROT_UX)) + attr |= KVM_PTE_LEAF_ATTR_HI_S1_UXN; } else { - attr |= KVM_PTE_LEAF_ATTR_HI_S1_XN; + if (!(prot & KVM_PGTABLE_PROT_PX)) + attr |= KVM_PTE_LEAF_ATTR_HI_S1_XN; } attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_AP, ap); @@ -379,8 +379,15 @@ enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte) if (!kvm_pte_valid(pte)) return prot; - if (!(pte & KVM_PTE_LEAF_ATTR_HI_S1_XN)) - prot |= KVM_PGTABLE_PROT_X; + if (cpus_have_final_cap(ARM64_KVM_HVHE)) { + if (!(pte & KVM_PTE_LEAF_ATTR_HI_S1_PXN)) + prot |= KVM_PGTABLE_PROT_PX; + if (!(pte & KVM_PTE_LEAF_ATTR_HI_S1_UXN)) + prot |= KVM_PGTABLE_PROT_UX; + } else { + if (!(pte & KVM_PTE_LEAF_ATTR_HI_S1_XN)) + prot |= KVM_PGTABLE_PROT_PX; + } ap = FIELD_GET(KVM_PTE_LEAF_ATTR_LO_S1_AP, pte); if (ap == KVM_PTE_LEAF_ATTR_LO_S1_AP_RO) @@ -478,18 +485,18 @@ static int hyp_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx, kvm_clear_pte(ctx->ptep); dsb(ishst); - __tlbi_level(vae2is, __TLBI_VADDR(ctx->addr, 0), TLBI_TTL_UNKNOWN); + __tlbi_level(vae2is, ctx->addr, TLBI_TTL_UNKNOWN); } else { if (ctx->end - ctx->addr < granule) return -EINVAL; kvm_clear_pte(ctx->ptep); dsb(ishst); - __tlbi_level(vale2is, __TLBI_VADDR(ctx->addr, 0), ctx->level); + __tlbi_level(vale2is, ctx->addr, ctx->level); *unmapped += granule; } - dsb(ish); + __tlbi_sync_s1ish_hyp(); isb(); mm_ops->put_page(ctx->ptep); @@ -569,7 +576,7 @@ void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt) struct stage2_map_data { const u64 phys; kvm_pte_t attr; - u8 owner_id; + kvm_pte_t pte_annot; kvm_pte_t *anchor; kvm_pte_t *childp; @@ -589,8 +596,8 @@ u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift) u64 vtcr = VTCR_EL2_FLAGS; s8 lvls; - vtcr |= kvm_get_parange(mmfr0) << VTCR_EL2_PS_SHIFT; - vtcr |= VTCR_EL2_T0SZ(phys_shift); + vtcr |= FIELD_PREP(VTCR_EL2_PS, kvm_get_parange(mmfr0)); + vtcr |= FIELD_PREP(VTCR_EL2_T0SZ, (UL(64) - phys_shift)); /* * Use a minimum 2 level page table to prevent splitting * host PMD huge pages at stage2. @@ -630,21 +637,11 @@ u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift) vtcr |= VTCR_EL2_DS; /* Set the vmid bits */ - vtcr |= (get_vmid_bits(mmfr1) == 16) ? - VTCR_EL2_VS_16BIT : - VTCR_EL2_VS_8BIT; + vtcr |= (get_vmid_bits(mmfr1) == 16) ? VTCR_EL2_VS : 0; return vtcr; } -static bool stage2_has_fwb(struct kvm_pgtable *pgt) -{ - if (!cpus_have_final_cap(ARM64_HAS_STAGE2_FWB)) - return false; - - return !(pgt->flags & KVM_PGTABLE_S2_NOFWB); -} - void kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu, phys_addr_t addr, size_t size) { @@ -665,13 +662,49 @@ void kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu, } } -#define KVM_S2_MEMATTR(pgt, attr) PAGE_S2_MEMATTR(attr, stage2_has_fwb(pgt)) +#define KVM_S2_MEMATTR(pgt, attr) \ + ({ \ + kvm_pte_t __attr; \ + \ + if ((pgt)->flags & KVM_PGTABLE_S2_AS_S1) \ + __attr = PAGE_S2_MEMATTR(AS_S1); \ + else \ + __attr = PAGE_S2_MEMATTR(attr); \ + \ + __attr; \ + }) + +static int stage2_set_xn_attr(enum kvm_pgtable_prot prot, kvm_pte_t *attr) +{ + bool px, ux; + u8 xn; + + px = prot & KVM_PGTABLE_PROT_PX; + ux = prot & KVM_PGTABLE_PROT_UX; + + if (!cpus_have_final_cap(ARM64_HAS_XNX) && px != ux) + return -EINVAL; + + if (px && ux) + xn = 0b00; + else if (!px && ux) + xn = 0b01; + else if (!px && !ux) + xn = 0b10; + else + xn = 0b11; + + *attr &= ~KVM_PTE_LEAF_ATTR_HI_S2_XN; + *attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_HI_S2_XN, xn); + return 0; +} static int stage2_set_prot_attr(struct kvm_pgtable *pgt, enum kvm_pgtable_prot prot, kvm_pte_t *ptep) { kvm_pte_t attr; u32 sh = KVM_PTE_LEAF_ATTR_LO_S2_SH_IS; + int r; switch (prot & (KVM_PGTABLE_PROT_DEVICE | KVM_PGTABLE_PROT_NORMAL_NC)) { @@ -691,8 +724,9 @@ static int stage2_set_prot_attr(struct kvm_pgtable *pgt, enum kvm_pgtable_prot p attr = KVM_S2_MEMATTR(pgt, NORMAL); } - if (!(prot & KVM_PGTABLE_PROT_X)) - attr |= KVM_PTE_LEAF_ATTR_HI_S2_XN; + r = stage2_set_xn_attr(prot, &attr); + if (r) + return r; if (prot & KVM_PGTABLE_PROT_R) attr |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R; @@ -721,8 +755,20 @@ enum kvm_pgtable_prot kvm_pgtable_stage2_pte_prot(kvm_pte_t pte) prot |= KVM_PGTABLE_PROT_R; if (pte & KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W) prot |= KVM_PGTABLE_PROT_W; - if (!(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN)) - prot |= KVM_PGTABLE_PROT_X; + + switch (FIELD_GET(KVM_PTE_LEAF_ATTR_HI_S2_XN, pte)) { + case 0b00: + prot |= KVM_PGTABLE_PROT_PX | KVM_PGTABLE_PROT_UX; + break; + case 0b01: + prot |= KVM_PGTABLE_PROT_UX; + break; + case 0b11: + prot |= KVM_PGTABLE_PROT_PX; + break; + default: + break; + } return prot; } @@ -747,7 +793,11 @@ static bool stage2_pte_is_counted(kvm_pte_t pte) static bool stage2_pte_is_locked(kvm_pte_t pte) { - return !kvm_pte_valid(pte) && (pte & KVM_INVALID_PTE_LOCKED); + if (kvm_pte_valid(pte)) + return false; + + return FIELD_GET(KVM_INVALID_PTE_TYPE_MASK, pte) == + KVM_INVALID_PTE_TYPE_LOCKED; } static bool stage2_try_set_pte(const struct kvm_pgtable_visit_ctx *ctx, kvm_pte_t new) @@ -778,6 +828,7 @@ static bool stage2_try_break_pte(const struct kvm_pgtable_visit_ctx *ctx, struct kvm_s2_mmu *mmu) { struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops; + kvm_pte_t locked_pte; if (stage2_pte_is_locked(ctx->old)) { /* @@ -788,7 +839,9 @@ static bool stage2_try_break_pte(const struct kvm_pgtable_visit_ctx *ctx, return false; } - if (!stage2_try_set_pte(ctx, KVM_INVALID_PTE_LOCKED)) + locked_pte = FIELD_PREP(KVM_INVALID_PTE_TYPE_MASK, + KVM_INVALID_PTE_TYPE_LOCKED); + if (!stage2_try_set_pte(ctx, locked_pte)) return false; if (!kvm_pgtable_walk_skip_bbm_tlbi(ctx)) { @@ -835,7 +888,7 @@ static bool stage2_unmap_defer_tlb_flush(struct kvm_pgtable *pgt) * system supporting FWB as the optimization is entirely * pointless when the unmap walker needs to perform CMOs. */ - return system_supports_tlb_range() && stage2_has_fwb(pgt); + return system_supports_tlb_range() && cpus_have_final_cap(ARM64_HAS_STAGE2_FWB); } static void stage2_unmap_put_pte(const struct kvm_pgtable_visit_ctx *ctx, @@ -913,7 +966,7 @@ static int stage2_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx, if (!data->annotation) new = kvm_init_valid_leaf_pte(phys, data->attr, ctx->level); else - new = kvm_init_invalid_leaf_owner(data->owner_id); + new = data->pte_annot; /* * Skip updating the PTE if we are trying to recreate the exact @@ -1067,16 +1120,18 @@ int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, return ret; } -int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size, - void *mc, u8 owner_id) +int kvm_pgtable_stage2_annotate(struct kvm_pgtable *pgt, u64 addr, u64 size, + void *mc, enum kvm_invalid_pte_type type, + kvm_pte_t pte_annot) { int ret; struct stage2_map_data map_data = { .mmu = pgt->mmu, .memcache = mc, - .owner_id = owner_id, .force_pte = true, .annotation = true, + .pte_annot = pte_annot | + FIELD_PREP(KVM_INVALID_PTE_TYPE_MASK, type), }; struct kvm_pgtable_walker walker = { .cb = stage2_map_walker, @@ -1085,7 +1140,10 @@ int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size, .arg = &map_data, }; - if (owner_id > KVM_MAX_OWNER_ID) + if (pte_annot & ~KVM_INVALID_PTE_ANNOT_MASK) + return -EINVAL; + + if (!type || type == KVM_INVALID_PTE_TYPE_LOCKED) return -EINVAL; ret = kvm_pgtable_walk(pgt, addr, size, &walker); @@ -1115,7 +1173,7 @@ static int stage2_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx, if (mm_ops->page_count(childp) != 1) return 0; } else if (stage2_pte_cacheable(pgt, ctx->old)) { - need_flush = !stage2_has_fwb(pgt); + need_flush = !cpus_have_final_cap(ARM64_HAS_STAGE2_FWB); } /* @@ -1229,7 +1287,8 @@ int kvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size) { return stage2_update_leaf_attrs(pgt, addr, size, 0, KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W, - NULL, NULL, 0); + NULL, NULL, + KVM_PGTABLE_WALK_IGNORE_EAGAIN); } void kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr, @@ -1296,9 +1355,9 @@ bool kvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr, int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, enum kvm_pgtable_prot prot, enum kvm_pgtable_walk_flags flags) { - int ret; + kvm_pte_t xn = 0, set = 0, clr = 0; s8 level; - kvm_pte_t set = 0, clr = 0; + int ret; if (prot & KVM_PTE_LEAF_ATTR_HI_SW) return -EINVAL; @@ -1309,8 +1368,12 @@ int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, if (prot & KVM_PGTABLE_PROT_W) set |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W; - if (prot & KVM_PGTABLE_PROT_X) - clr |= KVM_PTE_LEAF_ATTR_HI_S2_XN; + ret = stage2_set_xn_attr(prot, &xn); + if (ret) + return ret; + + set |= xn & KVM_PTE_LEAF_ATTR_HI_S2_XN; + clr |= ~xn & KVM_PTE_LEAF_ATTR_HI_S2_XN; ret = stage2_update_leaf_attrs(pgt, addr, 1, set, clr, NULL, &level, flags); if (!ret || ret == -EAGAIN) @@ -1341,7 +1404,7 @@ int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size) .arg = pgt, }; - if (stage2_has_fwb(pgt)) + if (cpus_have_final_cap(ARM64_HAS_STAGE2_FWB)) return 0; return kvm_pgtable_walk(pgt, addr, size, &walker); @@ -1541,37 +1604,80 @@ size_t kvm_pgtable_stage2_pgd_size(u64 vtcr) return kvm_pgd_pages(ia_bits, start_level) * PAGE_SIZE; } -static int stage2_free_walker(const struct kvm_pgtable_visit_ctx *ctx, - enum kvm_pgtable_walk_flags visit) +static int stage2_free_leaf(const struct kvm_pgtable_visit_ctx *ctx) { struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops; - if (!stage2_pte_is_counted(ctx->old)) + mm_ops->put_page(ctx->ptep); + return 0; +} + +static int stage2_free_table_post(const struct kvm_pgtable_visit_ctx *ctx) +{ + struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops; + kvm_pte_t *childp = kvm_pte_follow(ctx->old, mm_ops); + + if (mm_ops->page_count(childp) != 1) return 0; + /* + * Drop references and clear the now stale PTE to avoid rewalking the + * freed page table. + */ mm_ops->put_page(ctx->ptep); + mm_ops->put_page(childp); + kvm_clear_pte(ctx->ptep); + return 0; +} - if (kvm_pte_table(ctx->old, ctx->level)) - mm_ops->put_page(kvm_pte_follow(ctx->old, mm_ops)); +static int stage2_free_walker(const struct kvm_pgtable_visit_ctx *ctx, + enum kvm_pgtable_walk_flags visit) +{ + if (!stage2_pte_is_counted(ctx->old)) + return 0; - return 0; + switch (visit) { + case KVM_PGTABLE_WALK_LEAF: + return stage2_free_leaf(ctx); + case KVM_PGTABLE_WALK_TABLE_POST: + return stage2_free_table_post(ctx); + default: + return -EINVAL; + } } -void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt) +void kvm_pgtable_stage2_destroy_range(struct kvm_pgtable *pgt, + u64 addr, u64 size) { - size_t pgd_sz; struct kvm_pgtable_walker walker = { .cb = stage2_free_walker, .flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST, }; - WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker)); + WARN_ON(kvm_pgtable_walk(pgt, addr, size, &walker)); +} + +void kvm_pgtable_stage2_destroy_pgd(struct kvm_pgtable *pgt) +{ + size_t pgd_sz; + pgd_sz = kvm_pgd_pages(pgt->ia_bits, pgt->start_level) * PAGE_SIZE; - pgt->mm_ops->free_pages_exact(kvm_dereference_pteref(&walker, pgt->pgd), pgd_sz); + + /* + * Since the pgtable is unlinked at this point, and not shared with + * other walkers, safely deference pgd with kvm_dereference_pteref_raw() + */ + pgt->mm_ops->free_pages_exact(kvm_dereference_pteref_raw(pgt->pgd), pgd_sz); pgt->pgd = NULL; } +void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt) +{ + kvm_pgtable_stage2_destroy_range(pgt, 0, BIT(pgt->ia_bits)); + kvm_pgtable_stage2_destroy_pgd(pgt); +} + void kvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, s8 level) { kvm_pteref_t ptep = (kvm_pteref_t)pgtable; |
