diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-10-18 14:32:31 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-10-18 14:32:31 -0700 |
commit | 8a5de18239e418fe7b1f36504834689f754d8ccc (patch) | |
tree | 8d05ae77da1d4a8512b6052e2ba23571543666c7 | |
parent | 857b50f5d0eed113428c864e927289d8f5f2b864 (diff) | |
parent | 2df36a5dd6792870bef48f63bfca42055ea5b79c (diff) | |
download | lwn-8a5de18239e418fe7b1f36504834689f754d8ccc.tar.gz lwn-8a5de18239e418fe7b1f36504834689f754d8ccc.zip |
Merge tag 'kvm-arm-for-3.18-take-2' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm
Pull second batch of changes for KVM/{arm,arm64} from Marc Zyngier:
"The most obvious thing is the sizeable MMU changes to support 48bit
VAs on arm64.
Summary:
- support for 48bit IPA and VA (EL2)
- a number of fixes for devices mapped into guests
- yet another VGIC fix for BE
- a fix for CPU hotplug
- a few compile fixes (disabled VGIC, strict mm checks)"
[ I'm pulling directly from Marc at the request of Paolo Bonzini, whose
backpack was stolen at Düsseldorf airport and will do new keys and
rebuild his web of trust. - Linus ]
* tag 'kvm-arm-for-3.18-take-2' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm:
arm/arm64: KVM: Fix BE accesses to GICv2 EISR and ELRSR regs
arm: kvm: STRICT_MM_TYPECHECKS fix for user_mem_abort
arm/arm64: KVM: Ensure memslots are within KVM_PHYS_SIZE
arm64: KVM: Implement 48 VA support for KVM EL2 and Stage-2
arm/arm64: KVM: map MMIO regions at creation time
arm64: kvm: define PAGE_S2_DEVICE as read-only by default
ARM: kvm: define PAGE_S2_DEVICE as read-only by default
arm/arm64: KVM: add 'writable' parameter to kvm_phys_addr_ioremap
arm/arm64: KVM: fix potential NULL dereference in user_mem_abort()
arm/arm64: KVM: use __GFP_ZERO not memset() to get zeroed pages
ARM: KVM: fix vgic-disabled build
arm: kvm: fix CPU hotplug
-rw-r--r-- | arch/arm/include/asm/kvm_mmu.h | 31 | ||||
-rw-r--r-- | arch/arm/include/asm/pgtable.h | 2 | ||||
-rw-r--r-- | arch/arm/kvm/arm.c | 5 | ||||
-rw-r--r-- | arch/arm/kvm/interrupts_head.S | 7 | ||||
-rw-r--r-- | arch/arm/kvm/mmu.c | 235 | ||||
-rw-r--r-- | arch/arm64/include/asm/kvm_mmu.h | 127 | ||||
-rw-r--r-- | arch/arm64/include/asm/pgtable.h | 2 | ||||
-rw-r--r-- | arch/arm64/kvm/vgic-v2-switch.S | 12 | ||||
-rw-r--r-- | include/kvm/arm_vgic.h | 12 | ||||
-rw-r--r-- | virt/kvm/arm/vgic-v2.c | 24 | ||||
-rw-r--r-- | virt/kvm/arm/vgic.c | 21 |
11 files changed, 392 insertions, 86 deletions
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 3f688b458143..acb0d5712716 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -37,6 +37,11 @@ */ #define TRAMPOLINE_VA UL(CONFIG_VECTORS_BASE) +/* + * KVM_MMU_CACHE_MIN_PAGES is the number of stage2 page table translation levels. + */ +#define KVM_MMU_CACHE_MIN_PAGES 2 + #ifndef __ASSEMBLY__ #include <asm/cacheflush.h> @@ -50,7 +55,7 @@ void free_hyp_pgds(void); int kvm_alloc_stage2_pgd(struct kvm *kvm); void kvm_free_stage2_pgd(struct kvm *kvm); int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, - phys_addr_t pa, unsigned long size); + phys_addr_t pa, unsigned long size, bool writable); int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run); @@ -83,6 +88,11 @@ static inline void kvm_clean_pgd(pgd_t *pgd) clean_dcache_area(pgd, PTRS_PER_S2_PGD * sizeof(pgd_t)); } +static inline void kvm_clean_pmd(pmd_t *pmd) +{ + clean_dcache_area(pmd, PTRS_PER_PMD * sizeof(pmd_t)); +} + static inline void kvm_clean_pmd_entry(pmd_t *pmd) { clean_pmd_entry(pmd); @@ -123,10 +133,23 @@ static inline bool kvm_page_empty(void *ptr) } -#define kvm_pte_table_empty(ptep) kvm_page_empty(ptep) -#define kvm_pmd_table_empty(pmdp) kvm_page_empty(pmdp) -#define kvm_pud_table_empty(pudp) (0) +#define kvm_pte_table_empty(kvm, ptep) kvm_page_empty(ptep) +#define kvm_pmd_table_empty(kvm, pmdp) kvm_page_empty(pmdp) +#define kvm_pud_table_empty(kvm, pudp) (0) + +#define KVM_PREALLOC_LEVEL 0 +static inline int kvm_prealloc_hwpgd(struct kvm *kvm, pgd_t *pgd) +{ + return 0; +} + +static inline void kvm_free_hwpgd(struct kvm *kvm) { } + +static inline void *kvm_get_hwpgd(struct kvm *kvm) +{ + return kvm->arch.pgd; +} struct kvm; diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index 90aa4583b308..3b30062975b2 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -100,7 +100,7 @@ extern pgprot_t pgprot_s2_device; #define PAGE_HYP _MOD_PROT(pgprot_kernel, L_PTE_HYP) #define PAGE_HYP_DEVICE _MOD_PROT(pgprot_hyp_device, L_PTE_HYP) #define PAGE_S2 _MOD_PROT(pgprot_s2, L_PTE_S2_RDONLY) -#define PAGE_S2_DEVICE _MOD_PROT(pgprot_s2_device, L_PTE_S2_RDWR) +#define PAGE_S2_DEVICE _MOD_PROT(pgprot_s2_device, L_PTE_S2_RDONLY) #define __PAGE_NONE __pgprot(_L_PTE_DEFAULT | L_PTE_RDONLY | L_PTE_XN | L_PTE_NONE) #define __PAGE_SHARED __pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_XN) diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 779605122f32..9e193c8a959e 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -409,7 +409,7 @@ static void update_vttbr(struct kvm *kvm) kvm_next_vmid++; /* update vttbr to be used with the new vmid */ - pgd_phys = virt_to_phys(kvm->arch.pgd); + pgd_phys = virt_to_phys(kvm_get_hwpgd(kvm)); BUG_ON(pgd_phys & ~VTTBR_BADDR_MASK); vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK; kvm->arch.vttbr = pgd_phys | vmid; @@ -808,7 +808,8 @@ static int hyp_init_cpu_notify(struct notifier_block *self, switch (action) { case CPU_STARTING: case CPU_STARTING_FROZEN: - cpu_init_hyp_mode(NULL); + if (__hyp_get_vectors() == hyp_default_vectors) + cpu_init_hyp_mode(NULL); break; } diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S index 98c8c5b9a87f..14d488388480 100644 --- a/arch/arm/kvm/interrupts_head.S +++ b/arch/arm/kvm/interrupts_head.S @@ -433,10 +433,17 @@ ARM_BE8(rev r10, r10 ) str r3, [r11, #VGIC_V2_CPU_HCR] str r4, [r11, #VGIC_V2_CPU_VMCR] str r5, [r11, #VGIC_V2_CPU_MISR] +#ifdef CONFIG_CPU_ENDIAN_BE8 + str r6, [r11, #(VGIC_V2_CPU_EISR + 4)] + str r7, [r11, #VGIC_V2_CPU_EISR] + str r8, [r11, #(VGIC_V2_CPU_ELRSR + 4)] + str r9, [r11, #VGIC_V2_CPU_ELRSR] +#else str r6, [r11, #VGIC_V2_CPU_EISR] str r7, [r11, #(VGIC_V2_CPU_EISR + 4)] str r8, [r11, #VGIC_V2_CPU_ELRSR] str r9, [r11, #(VGIC_V2_CPU_ELRSR + 4)] +#endif str r10, [r11, #VGIC_V2_CPU_APR] /* Clear GICH_HCR */ diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index eea03069161b..57a403a5c22b 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -42,7 +42,7 @@ static unsigned long hyp_idmap_start; static unsigned long hyp_idmap_end; static phys_addr_t hyp_idmap_vector; -#define pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t)) +#define hyp_pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t)) #define kvm_pmd_huge(_x) (pmd_huge(_x) || pmd_trans_huge(_x)) @@ -134,7 +134,7 @@ static void unmap_ptes(struct kvm *kvm, pmd_t *pmd, } } while (pte++, addr += PAGE_SIZE, addr != end); - if (kvm_pte_table_empty(start_pte)) + if (kvm_pte_table_empty(kvm, start_pte)) clear_pmd_entry(kvm, pmd, start_addr); } @@ -158,7 +158,7 @@ static void unmap_pmds(struct kvm *kvm, pud_t *pud, } } while (pmd++, addr = next, addr != end); - if (kvm_pmd_table_empty(start_pmd)) + if (kvm_pmd_table_empty(kvm, start_pmd)) clear_pud_entry(kvm, pud, start_addr); } @@ -182,7 +182,7 @@ static void unmap_puds(struct kvm *kvm, pgd_t *pgd, } } while (pud++, addr = next, addr != end); - if (kvm_pud_table_empty(start_pud)) + if (kvm_pud_table_empty(kvm, start_pud)) clear_pgd_entry(kvm, pgd, start_addr); } @@ -306,7 +306,7 @@ void free_boot_hyp_pgd(void) if (boot_hyp_pgd) { unmap_range(NULL, boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE); unmap_range(NULL, boot_hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE); - free_pages((unsigned long)boot_hyp_pgd, pgd_order); + free_pages((unsigned long)boot_hyp_pgd, hyp_pgd_order); boot_hyp_pgd = NULL; } @@ -343,7 +343,7 @@ void free_hyp_pgds(void) for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE) unmap_range(NULL, hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE); - free_pages((unsigned long)hyp_pgd, pgd_order); + free_pages((unsigned long)hyp_pgd, hyp_pgd_order); hyp_pgd = NULL; } @@ -401,13 +401,46 @@ static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start, return 0; } +static int create_hyp_pud_mappings(pgd_t *pgd, unsigned long start, + unsigned long end, unsigned long pfn, + pgprot_t prot) +{ + pud_t *pud; + pmd_t *pmd; + unsigned long addr, next; + int ret; + + addr = start; + do { + pud = pud_offset(pgd, addr); + + if (pud_none_or_clear_bad(pud)) { + pmd = pmd_alloc_one(NULL, addr); + if (!pmd) { + kvm_err("Cannot allocate Hyp pmd\n"); + return -ENOMEM; + } + pud_populate(NULL, pud, pmd); + get_page(virt_to_page(pud)); + kvm_flush_dcache_to_poc(pud, sizeof(*pud)); + } + + next = pud_addr_end(addr, end); + ret = create_hyp_pmd_mappings(pud, addr, next, pfn, prot); + if (ret) + return ret; + pfn += (next - addr) >> PAGE_SHIFT; + } while (addr = next, addr != end); + + return 0; +} + static int __create_hyp_mappings(pgd_t *pgdp, unsigned long start, unsigned long end, unsigned long pfn, pgprot_t prot) { pgd_t *pgd; pud_t *pud; - pmd_t *pmd; unsigned long addr, next; int err = 0; @@ -416,22 +449,21 @@ static int __create_hyp_mappings(pgd_t *pgdp, end = PAGE_ALIGN(end); do { pgd = pgdp + pgd_index(addr); - pud = pud_offset(pgd, addr); - if (pud_none_or_clear_bad(pud)) { - pmd = pmd_alloc_one(NULL, addr); - if (!pmd) { - kvm_err("Cannot allocate Hyp pmd\n"); + if (pgd_none(*pgd)) { + pud = pud_alloc_one(NULL, addr); + if (!pud) { + kvm_err("Cannot allocate Hyp pud\n"); err = -ENOMEM; goto out; } - pud_populate(NULL, pud, pmd); - get_page(virt_to_page(pud)); - kvm_flush_dcache_to_poc(pud, sizeof(*pud)); + pgd_populate(NULL, pgd, pud); + get_page(virt_to_page(pgd)); + kvm_flush_dcache_to_poc(pgd, sizeof(*pgd)); } next = pgd_addr_end(addr, end); - err = create_hyp_pmd_mappings(pud, addr, next, pfn, prot); + err = create_hyp_pud_mappings(pgd, addr, next, pfn, prot); if (err) goto out; pfn += (next - addr) >> PAGE_SHIFT; @@ -521,6 +553,7 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr) */ int kvm_alloc_stage2_pgd(struct kvm *kvm) { + int ret; pgd_t *pgd; if (kvm->arch.pgd != NULL) { @@ -528,15 +561,38 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm) return -EINVAL; } - pgd = (pgd_t *)__get_free_pages(GFP_KERNEL, S2_PGD_ORDER); + if (KVM_PREALLOC_LEVEL > 0) { + /* + * Allocate fake pgd for the page table manipulation macros to + * work. This is not used by the hardware and we have no + * alignment requirement for this allocation. + */ + pgd = (pgd_t *)kmalloc(PTRS_PER_S2_PGD * sizeof(pgd_t), + GFP_KERNEL | __GFP_ZERO); + } else { + /* + * Allocate actual first-level Stage-2 page table used by the + * hardware for Stage-2 page table walks. + */ + pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, S2_PGD_ORDER); + } + if (!pgd) return -ENOMEM; - memset(pgd, 0, PTRS_PER_S2_PGD * sizeof(pgd_t)); + ret = kvm_prealloc_hwpgd(kvm, pgd); + if (ret) + goto out_err; + kvm_clean_pgd(pgd); kvm->arch.pgd = pgd; - return 0; +out_err: + if (KVM_PREALLOC_LEVEL > 0) + kfree(pgd); + else + free_pages((unsigned long)pgd, S2_PGD_ORDER); + return ret; } /** @@ -572,19 +628,39 @@ void kvm_free_stage2_pgd(struct kvm *kvm) return; unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE); - free_pages((unsigned long)kvm->arch.pgd, S2_PGD_ORDER); + kvm_free_hwpgd(kvm); + if (KVM_PREALLOC_LEVEL > 0) + kfree(kvm->arch.pgd); + else + free_pages((unsigned long)kvm->arch.pgd, S2_PGD_ORDER); kvm->arch.pgd = NULL; } -static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, +static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, phys_addr_t addr) { pgd_t *pgd; pud_t *pud; - pmd_t *pmd; pgd = kvm->arch.pgd + pgd_index(addr); - pud = pud_offset(pgd, addr); + if (WARN_ON(pgd_none(*pgd))) { + if (!cache) + return NULL; + pud = mmu_memory_cache_alloc(cache); + pgd_populate(NULL, pgd, pud); + get_page(virt_to_page(pgd)); + } + + return pud_offset(pgd, addr); +} + +static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, + phys_addr_t addr) +{ + pud_t *pud; + pmd_t *pmd; + + pud = stage2_get_pud(kvm, cache, addr); if (pud_none(*pud)) { if (!cache) return NULL; @@ -630,7 +706,7 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, pmd_t *pmd; pte_t *pte, old_pte; - /* Create stage-2 page table mapping - Level 1 */ + /* Create stage-2 page table mapping - Levels 0 and 1 */ pmd = stage2_get_pmd(kvm, cache, addr); if (!pmd) { /* @@ -675,7 +751,7 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, * @size: The size of the mapping */ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, - phys_addr_t pa, unsigned long size) + phys_addr_t pa, unsigned long size, bool writable) { phys_addr_t addr, end; int ret = 0; @@ -688,7 +764,11 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, for (addr = guest_ipa; addr < end; addr += PAGE_SIZE) { pte_t pte = pfn_pte(pfn, PAGE_S2_DEVICE); - ret = mmu_topup_memory_cache(&cache, 2, 2); + if (writable) + kvm_set_s2pte_writable(&pte); + + ret = mmu_topup_memory_cache(&cache, KVM_MMU_CACHE_MIN_PAGES, + KVM_NR_MEM_OBJS); if (ret) goto out; spin_lock(&kvm->mmu_lock); @@ -777,6 +857,12 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, /* Let's check if we will get back a huge page backed by hugetlbfs */ down_read(¤t->mm->mmap_sem); vma = find_vma_intersection(current->mm, hva, hva + 1); + if (unlikely(!vma)) { + kvm_err("Failed to find VMA for hva 0x%lx\n", hva); + up_read(¤t->mm->mmap_sem); + return -EFAULT; + } + if (is_vm_hugetlb_page(vma)) { hugetlb = true; gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT; @@ -797,7 +883,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, up_read(¤t->mm->mmap_sem); /* We need minimum second+third level pages */ - ret = mmu_topup_memory_cache(memcache, 2, KVM_NR_MEM_OBJS); + ret = mmu_topup_memory_cache(memcache, KVM_MMU_CACHE_MIN_PAGES, + KVM_NR_MEM_OBJS); if (ret) return ret; @@ -843,7 +930,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, } coherent_cache_guest_page(vcpu, hva, PAGE_SIZE); ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, - mem_type == PAGE_S2_DEVICE); + pgprot_val(mem_type) == pgprot_val(PAGE_S2_DEVICE)); } @@ -916,6 +1003,9 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) goto out_unlock; } + /* Userspace should not be able to register out-of-bounds IPAs */ + VM_BUG_ON(fault_ipa >= KVM_PHYS_SIZE); + ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status); if (ret == 0) ret = 1; @@ -1072,8 +1162,8 @@ int kvm_mmu_init(void) (unsigned long)phys_base); } - hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, pgd_order); - boot_hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, pgd_order); + hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, hyp_pgd_order); + boot_hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, hyp_pgd_order); if (!hyp_pgd || !boot_hyp_pgd) { kvm_err("Hyp mode PGD not allocated\n"); @@ -1126,13 +1216,6 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, const struct kvm_memory_slot *old, enum kvm_mr_change change) { - gpa_t gpa = old->base_gfn << PAGE_SHIFT; - phys_addr_t size = old->npages << PAGE_SHIFT; - if (change == KVM_MR_DELETE || change == KVM_MR_MOVE) { - spin_lock(&kvm->mmu_lock); - unmap_stage2_range(kvm, gpa, size); - spin_unlock(&kvm->mmu_lock); - } } int kvm_arch_prepare_memory_region(struct kvm *kvm, @@ -1140,7 +1223,77 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, enum kvm_mr_change change) { - return 0; + hva_t hva = mem->userspace_addr; + hva_t reg_end = hva + mem->memory_size; + bool writable = !(mem->flags & KVM_MEM_READONLY); + int ret = 0; + + if (change != KVM_MR_CREATE && change != KVM_MR_MOVE) + return 0; + + /* + * Prevent userspace from creating a memory region outside of the IPA + * space addressable by the KVM guest IPA space. + */ + if (memslot->base_gfn + memslot->npages >= + (KVM_PHYS_SIZE >> PAGE_SHIFT)) + return -EFAULT; + + /* + * A memory region could potentially cover multiple VMAs, and any holes + * between them, so iterate over all of them to find out if we can map + * any of them right now. + * + * +--------------------------------------------+ + * +---------------+----------------+ +----------------+ + * | : VMA 1 | VMA 2 | | VMA 3 : | + * +---------------+----------------+ +----------------+ + * | memory region | + * +--------------------------------------------+ + */ + do { + struct vm_area_struct *vma = find_vma(current->mm, hva); + hva_t vm_start, vm_end; + + if (!vma || vma->vm_start >= reg_end) + break; + + /* + * Mapping a read-only VMA is only allowed if the + * memory region is configured as read-only. + */ + if (writable && !(vma->vm_flags & VM_WRITE)) { + ret = -EPERM; + break; + } + + /* + * Take the intersection of this VMA with the memory region + */ + vm_start = max(hva, vma->vm_start); + vm_end = min(reg_end, vma->vm_end); + + if (vma->vm_flags & VM_PFNMAP) { + gpa_t gpa = mem->guest_phys_addr + + (vm_start - mem->userspace_addr); + phys_addr_t pa = (vma->vm_pgoff << PAGE_SHIFT) + + vm_start - vma->vm_start; + + ret = kvm_phys_addr_ioremap(kvm, gpa, pa, + vm_end - vm_start, + writable); + if (ret) + break; + } + hva = vm_end; + } while (hva < reg_end); + + if (ret) { + spin_lock(&kvm->mmu_lock); + unmap_stage2_range(kvm, mem->guest_phys_addr, mem->memory_size); + spin_unlock(&kvm->mmu_lock); + } + return ret; } void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, @@ -1165,4 +1318,10 @@ void kvm_arch_flush_shadow_all(struct kvm *kvm) void kvm_arch_flush_shadow_memslot(struct kvm *kvm, struct kvm_memory_slot *slot) { + gpa_t gpa = slot->base_gfn << PAGE_SHIFT; + phys_addr_t size = slot->npages << PAGE_SHIFT; + + spin_lock(&kvm->mmu_lock); + unmap_stage2_range(kvm, gpa, size); + spin_unlock(&kvm->mmu_lock); } diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index a030d163840b..0caf7a59f6a1 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -41,6 +41,18 @@ */ #define TRAMPOLINE_VA (HYP_PAGE_OFFSET_MASK & PAGE_MASK) +/* + * KVM_MMU_CACHE_MIN_PAGES is the number of stage2 page table translation + * levels in addition to the PGD and potentially the PUD which are + * pre-allocated (we pre-allocate the fake PGD and the PUD when the Stage-2 + * tables use one level of tables less than the kernel. + */ +#ifdef CONFIG_ARM64_64K_PAGES +#define KVM_MMU_CACHE_MIN_PAGES 1 +#else +#define KVM_MMU_CACHE_MIN_PAGES 2 +#endif + #ifdef __ASSEMBLY__ /* @@ -53,6 +65,7 @@ #else +#include <asm/pgalloc.h> #include <asm/cachetype.h> #include <asm/cacheflush.h> @@ -65,10 +78,6 @@ #define KVM_PHYS_SIZE (1UL << KVM_PHYS_SHIFT) #define KVM_PHYS_MASK (KVM_PHYS_SIZE - 1UL) -/* Make sure we get the right size, and thus the right alignment */ -#define PTRS_PER_S2_PGD (1 << (KVM_PHYS_SHIFT - PGDIR_SHIFT)) -#define S2_PGD_ORDER get_order(PTRS_PER_S2_PGD * sizeof(pgd_t)) - int create_hyp_mappings(void *from, void *to); int create_hyp_io_mappings(void *from, void *to, phys_addr_t); void free_boot_hyp_pgd(void); @@ -77,7 +86,7 @@ void free_hyp_pgds(void); int kvm_alloc_stage2_pgd(struct kvm *kvm); void kvm_free_stage2_pgd(struct kvm *kvm); int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, - phys_addr_t pa, unsigned long size); + phys_addr_t pa, unsigned long size, bool writable); int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run); @@ -93,6 +102,7 @@ void kvm_clear_hyp_idmap(void); #define kvm_set_pmd(pmdp, pmd) set_pmd(pmdp, pmd) static inline void kvm_clean_pgd(pgd_t *pgd) {} +static inline void kvm_clean_pmd(pmd_t *pmd) {} static inline void kvm_clean_pmd_entry(pmd_t *pmd) {} static inline void kvm_clean_pte(pte_t *pte) {} static inline void kvm_clean_pte_entry(pte_t *pte) {} @@ -111,19 +121,116 @@ static inline void kvm_set_s2pmd_writable(pmd_t *pmd) #define kvm_pud_addr_end(addr, end) pud_addr_end(addr, end) #define kvm_pmd_addr_end(addr, end) pmd_addr_end(addr, end) +/* + * In the case where PGDIR_SHIFT is larger than KVM_PHYS_SHIFT, we can address + * the entire IPA input range with a single pgd entry, and we would only need + * one pgd entry. Note that in this case, the pgd is actually not used by + * the MMU for Stage-2 translations, but is merely a fake pgd used as a data + * structure for the kernel pgtable macros to work. + */ +#if PGDIR_SHIFT > KVM_PHYS_SHIFT +#define PTRS_PER_S2_PGD_SHIFT 0 +#else +#define PTRS_PER_S2_PGD_SHIFT (KVM_PHYS_SHIFT - PGDIR_SHIFT) +#endif +#define PTRS_PER_S2_PGD (1 << PTRS_PER_S2_PGD_SHIFT) +#define S2_PGD_ORDER get_order(PTRS_PER_S2_PGD * sizeof(pgd_t)) + +/* + * If we are concatenating first level stage-2 page tables, we would have less + * than or equal to 16 pointers in the fake PGD, because that's what the + * architecture allows. In this case, (4 - CONFIG_ARM64_PGTABLE_LEVELS) + * represents the first level for the host, and we add 1 to go to the next + * level (which uses contatenation) for the stage-2 tables. + */ +#if PTRS_PER_S2_PGD <= 16 +#define KVM_PREALLOC_LEVEL (4 - CONFIG_ARM64_PGTABLE_LEVELS + 1) +#else +#define KVM_PREALLOC_LEVEL (0) +#endif + +/** + * kvm_prealloc_hwpgd - allocate inital table for VTTBR + * @kvm: The KVM struct pointer for the VM. + * @pgd: The kernel pseudo pgd + * + * When the kernel uses more levels of page tables than the guest, we allocate + * a fake PGD and pre-populate it to point to the next-level page table, which + * will be the real initial page table pointed to by the VTTBR. + * + * When KVM_PREALLOC_LEVEL==2, we allocate a single page for the PMD and + * the kernel will use folded pud. When KVM_PREALLOC_LEVEL==1, we + * allocate 2 consecutive PUD pages. + */ +static inline int kvm_prealloc_hwpgd(struct kvm *kvm, pgd_t *pgd) +{ + unsigned int i; + unsigned long hwpgd; + + if (KVM_PREALLOC_LEVEL == 0) + return 0; + + hwpgd = __get_free_pages(GFP_KERNEL | __GFP_ZERO, PTRS_PER_S2_PGD_SHIFT); + if (!hwpgd) + return -ENOMEM; + + for (i = 0; i < PTRS_PER_S2_PGD; i++) { + if (KVM_PREALLOC_LEVEL == 1) + pgd_populate(NULL, pgd + i, + (pud_t *)hwpgd + i * PTRS_PER_PUD); + else if (KVM_PREALLOC_LEVEL == 2) + pud_populate(NULL, pud_offset(pgd, 0) + i, + (pmd_t *)hwpgd + i * PTRS_PER_PMD); + } + + return 0; +} + +static inline void *kvm_get_hwpgd(struct kvm *kvm) +{ + pgd_t *pgd = kvm->arch.pgd; + pud_t *pud; + + if (KVM_PREALLOC_LEVEL == 0) + return pgd; + + pud = pud_offset(pgd, 0); + if (KVM_PREALLOC_LEVEL == 1) + return pud; + + BUG_ON(KVM_PREALLOC_LEVEL != 2); + return pmd_offset(pud, 0); +} + +static inline void kvm_free_hwpgd(struct kvm *kvm) +{ + if (KVM_PREALLOC_LEVEL > 0) { + unsigned long hwpgd = (unsigned long)kvm_get_hwpgd(kvm); + free_pages(hwpgd, PTRS_PER_S2_PGD_SHIFT); + } +} + static inline bool kvm_page_empty(void *ptr) { struct page *ptr_page = virt_to_page(ptr); return page_count(ptr_page) == 1; } -#define kvm_pte_table_empty(ptep) kvm_page_empty(ptep) -#ifndef CONFIG_ARM64_64K_PAGES -#define kvm_pmd_table_empty(pmdp) kvm_page_empty(pmdp) +#define kvm_pte_table_empty(kvm, ptep) kvm_page_empty(ptep) + +#ifdef __PAGETABLE_PMD_FOLDED +#define kvm_pmd_table_empty(kvm, pmdp) (0) +#else +#define kvm_pmd_table_empty(kvm, pmdp) \ + (kvm_page_empty(pmdp) && (!(kvm) || KVM_PREALLOC_LEVEL < 2)) +#endif + +#ifdef __PAGETABLE_PUD_FOLDED +#define kvm_pud_table_empty(kvm, pudp) (0) #else -#define kvm_pmd_table_empty(pmdp) (0) +#define kvm_pud_table_empty(kvm, pudp) \ + (kvm_page_empty(pudp) && (!(kvm) || KVM_PREALLOC_LEVEL < 1)) #endif -#define kvm_pud_table_empty(pudp) (0) struct kvm; diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index cefd3e825612..41a43bf26492 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -79,7 +79,7 @@ extern void __pgd_error(const char *file, int line, unsigned long val); #define PAGE_HYP_DEVICE __pgprot(PROT_DEVICE_nGnRE | PTE_HYP) #define PAGE_S2 __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY) -#define PAGE_S2_DEVICE __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDWR | PTE_UXN) +#define PAGE_S2_DEVICE __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDONLY | PTE_UXN) #define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_TYPE_MASK) | PTE_PROT_NONE | PTE_PXN | PTE_UXN) #define PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE) diff --git a/arch/arm64/kvm/vgic-v2-switch.S b/arch/arm64/kvm/vgic-v2-switch.S index ae211772f991..f002fe1c3700 100644 --- a/arch/arm64/kvm/vgic-v2-switch.S +++ b/arch/arm64/kvm/vgic-v2-switch.S @@ -67,10 +67,14 @@ CPU_BE( rev w11, w11 ) str w4, [x3, #VGIC_V2_CPU_HCR] str w5, [x3, #VGIC_V2_CPU_VMCR] str w6, [x3, #VGIC_V2_CPU_MISR] - str w7, [x3, #VGIC_V2_CPU_EISR] - str w8, [x3, #(VGIC_V2_CPU_EISR + 4)] - str w9, [x3, #VGIC_V2_CPU_ELRSR] - str w10, [x3, #(VGIC_V2_CPU_ELRSR + 4)] +CPU_LE( str w7, [x3, #VGIC_V2_CPU_EISR] ) +CPU_LE( str w8, [x3, #(VGIC_V2_CPU_EISR + 4)] ) +CPU_LE( str w9, [x3, #VGIC_V2_CPU_ELRSR] ) +CPU_LE( str w10, [x3, #(VGIC_V2_CPU_ELRSR + 4)] ) +CPU_BE( str w7, [x3, #(VGIC_V2_CPU_EISR + 4)] ) +CPU_BE( str w8, [x3, #VGIC_V2_CPU_EISR] ) +CPU_BE( str w9, [x3, #(VGIC_V2_CPU_ELRSR + 4)] ) +CPU_BE( str w10, [x3, #VGIC_V2_CPU_ELRSR] ) str w11, [x3, #VGIC_V2_CPU_APR] /* Clear GICH_HCR */ diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 2f2aac8448a4..206dcc3b3f7a 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -219,8 +219,8 @@ struct vgic_v2_cpu_if { u32 vgic_hcr; u32 vgic_vmcr; u32 vgic_misr; /* Saved only */ - u32 vgic_eisr[2]; /* Saved only */ - u32 vgic_elrsr[2]; /* Saved only */ + u64 vgic_eisr; /* Saved only */ + u64 vgic_elrsr; /* Saved only */ u32 vgic_apr; u32 vgic_lr[VGIC_V2_MAX_LRS]; }; @@ -331,6 +331,14 @@ static inline int kvm_vgic_create(struct kvm *kvm) return 0; } +static inline void kvm_vgic_destroy(struct kvm *kvm) +{ +} + +static inline void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) +{ +} + static inline int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) { return 0; diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c index 416baedfc89f..2935405ad22f 100644 --- a/virt/kvm/arm/vgic-v2.c +++ b/virt/kvm/arm/vgic-v2.c @@ -71,35 +71,17 @@ static void vgic_v2_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc) { if (!(lr_desc.state & LR_STATE_MASK)) - __set_bit(lr, (unsigned long *)vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr); + vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr |= (1ULL << lr); } static u64 vgic_v2_get_elrsr(const struct kvm_vcpu *vcpu) { - u64 val; - -#if BITS_PER_LONG == 64 - val = vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr[1]; - val <<= 32; - val |= vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr[0]; -#else - val = *(u64 *)vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr; -#endif - return val; + return vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr; } static u64 vgic_v2_get_eisr(const struct kvm_vcpu *vcpu) { - u64 val; - -#if BITS_PER_LONG == 64 - val = vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr[1]; - val <<= 32; - val |= vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr[0]; -#else - val = *(u64 *)vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr; -#endif - return val; + return vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr; } static u32 vgic_v2_get_interrupt_status(const struct kvm_vcpu *vcpu) diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 862967852d5a..3aaca49de325 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -145,6 +145,20 @@ static void vgic_free_bitmap(struct vgic_bitmap *b) b->shared = NULL; } +/* + * Call this function to convert a u64 value to an unsigned long * bitmask + * in a way that works on both 32-bit and 64-bit LE and BE platforms. + * + * Warning: Calling this function may modify *val. + */ +static unsigned long *u64_to_bitmask(u64 *val) +{ +#if defined(CONFIG_CPU_BIG_ENDIAN) && BITS_PER_LONG == 32 + *val = (*val >> 32) | (*val << 32); +#endif + return (unsigned long *)val; +} + static u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x, int cpuid, u32 offset) { @@ -1442,7 +1456,7 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) * active bit. */ u64 eisr = vgic_get_eisr(vcpu); - unsigned long *eisr_ptr = (unsigned long *)&eisr; + unsigned long *eisr_ptr = u64_to_bitmask(&eisr); int lr; for_each_set_bit(lr, eisr_ptr, vgic->nr_lr) { @@ -1505,7 +1519,7 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) level_pending = vgic_process_maintenance(vcpu); elrsr = vgic_get_elrsr(vcpu); - elrsr_ptr = (unsigned long *)&elrsr; + elrsr_ptr = u64_to_bitmask(&elrsr); /* Clear mappings for empty LRs */ for_each_set_bit(lr, elrsr_ptr, vgic->nr_lr) { @@ -1899,7 +1913,8 @@ int kvm_vgic_init(struct kvm *kvm) } ret = kvm_phys_addr_ioremap(kvm, kvm->arch.vgic.vgic_cpu_base, - vgic->vcpu_base, KVM_VGIC_V2_CPU_SIZE); + vgic->vcpu_base, KVM_VGIC_V2_CPU_SIZE, + true); if (ret) { kvm_err("Unable to remap VGIC CPU to VCPU\n"); goto out; |