summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
authorMarcelo Tosatti <mtosatt@redhat.com>2015-03-16 20:08:56 -0300
committerMarcelo Tosatti <mtosatti@redhat.com>2015-03-16 20:08:56 -0300
commitf710a12d73dfa1c3a5d2417f2482b970f03bb850 (patch)
tree5e3be52f50fe01bb05ab4371a901cfc6e65108f7 /arch
parent670125bda1d86edfadf81dc56a87582ac7fbd47b (diff)
parentae705930fca6322600690df9dc1c7d0516145a93 (diff)
downloadlwn-f710a12d73dfa1c3a5d2417f2482b970f03bb850.tar.gz
lwn-f710a12d73dfa1c3a5d2417f2482b970f03bb850.zip
Merge tag 'kvm-arm-fixes-4.0-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm
Fixes for KVM/ARM for 4.0-rc5. Fixes page refcounting issues in our Stage-2 page table management code, fixes a missing unlock in a gicv3 error path, and fixes a race that can cause lost interrupts if signals are pending just prior to entering the guest.
Diffstat (limited to 'arch')
-rw-r--r--arch/arm/include/asm/kvm_mmu.h13
-rw-r--r--arch/arm/kvm/mmu.c75
-rw-r--r--arch/arm64/include/asm/kvm_arm.h5
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h48
4 files changed, 68 insertions, 73 deletions
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index bf0fe99e8ca9..4cf48c3aca13 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -149,29 +149,28 @@ static inline bool kvm_s2pmd_readonly(pmd_t *pmd)
(__boundary - 1 < (end) - 1)? __boundary: (end); \
})
+#define kvm_pgd_index(addr) pgd_index(addr)
+
static inline bool kvm_page_empty(void *ptr)
{
struct page *ptr_page = virt_to_page(ptr);
return page_count(ptr_page) == 1;
}
-
#define kvm_pte_table_empty(kvm, ptep) kvm_page_empty(ptep)
#define kvm_pmd_table_empty(kvm, pmdp) kvm_page_empty(pmdp)
#define kvm_pud_table_empty(kvm, pudp) (0)
#define KVM_PREALLOC_LEVEL 0
-static inline int kvm_prealloc_hwpgd(struct kvm *kvm, pgd_t *pgd)
+static inline void *kvm_get_hwpgd(struct kvm *kvm)
{
- return 0;
+ return kvm->arch.pgd;
}
-static inline void kvm_free_hwpgd(struct kvm *kvm) { }
-
-static inline void *kvm_get_hwpgd(struct kvm *kvm)
+static inline unsigned int kvm_get_hwpgd_size(void)
{
- return kvm->arch.pgd;
+ return PTRS_PER_S2_PGD * sizeof(pgd_t);
}
struct kvm;
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 3e6859bc3e11..5656d79c5a44 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -290,7 +290,7 @@ static void unmap_range(struct kvm *kvm, pgd_t *pgdp,
phys_addr_t addr = start, end = start + size;
phys_addr_t next;
- pgd = pgdp + pgd_index(addr);
+ pgd = pgdp + kvm_pgd_index(addr);
do {
next = kvm_pgd_addr_end(addr, end);
if (!pgd_none(*pgd))
@@ -355,7 +355,7 @@ static void stage2_flush_memslot(struct kvm *kvm,
phys_addr_t next;
pgd_t *pgd;
- pgd = kvm->arch.pgd + pgd_index(addr);
+ pgd = kvm->arch.pgd + kvm_pgd_index(addr);
do {
next = kvm_pgd_addr_end(addr, end);
stage2_flush_puds(kvm, pgd, addr, next);
@@ -632,6 +632,20 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr)
__phys_to_pfn(phys_addr), PAGE_HYP_DEVICE);
}
+/* Free the HW pgd, one page at a time */
+static void kvm_free_hwpgd(void *hwpgd)
+{
+ free_pages_exact(hwpgd, kvm_get_hwpgd_size());
+}
+
+/* Allocate the HW PGD, making sure that each page gets its own refcount */
+static void *kvm_alloc_hwpgd(void)
+{
+ unsigned int size = kvm_get_hwpgd_size();
+
+ return alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO);
+}
+
/**
* kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation.
* @kvm: The KVM struct pointer for the VM.
@@ -645,15 +659,31 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr)
*/
int kvm_alloc_stage2_pgd(struct kvm *kvm)
{
- int ret;
pgd_t *pgd;
+ void *hwpgd;
if (kvm->arch.pgd != NULL) {
kvm_err("kvm_arch already initialized?\n");
return -EINVAL;
}
+ hwpgd = kvm_alloc_hwpgd();
+ if (!hwpgd)
+ return -ENOMEM;
+
+ /* When the kernel uses more levels of page tables than the
+ * guest, we allocate a fake PGD and pre-populate it to point
+ * to the next-level page table, which will be the real
+ * initial page table pointed to by the VTTBR.
+ *
+ * When KVM_PREALLOC_LEVEL==2, we allocate a single page for
+ * the PMD and the kernel will use folded pud.
+ * When KVM_PREALLOC_LEVEL==1, we allocate 2 consecutive PUD
+ * pages.
+ */
if (KVM_PREALLOC_LEVEL > 0) {
+ int i;
+
/*
* Allocate fake pgd for the page table manipulation macros to
* work. This is not used by the hardware and we have no
@@ -661,30 +691,32 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm)
*/
pgd = (pgd_t *)kmalloc(PTRS_PER_S2_PGD * sizeof(pgd_t),
GFP_KERNEL | __GFP_ZERO);
+
+ if (!pgd) {
+ kvm_free_hwpgd(hwpgd);
+ return -ENOMEM;
+ }
+
+ /* Plug the HW PGD into the fake one. */
+ for (i = 0; i < PTRS_PER_S2_PGD; i++) {
+ if (KVM_PREALLOC_LEVEL == 1)
+ pgd_populate(NULL, pgd + i,
+ (pud_t *)hwpgd + i * PTRS_PER_PUD);
+ else if (KVM_PREALLOC_LEVEL == 2)
+ pud_populate(NULL, pud_offset(pgd, 0) + i,
+ (pmd_t *)hwpgd + i * PTRS_PER_PMD);
+ }
} else {
/*
* Allocate actual first-level Stage-2 page table used by the
* hardware for Stage-2 page table walks.
*/
- pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, S2_PGD_ORDER);
+ pgd = (pgd_t *)hwpgd;
}
- if (!pgd)
- return -ENOMEM;
-
- ret = kvm_prealloc_hwpgd(kvm, pgd);
- if (ret)
- goto out_err;
-
kvm_clean_pgd(pgd);
kvm->arch.pgd = pgd;
return 0;
-out_err:
- if (KVM_PREALLOC_LEVEL > 0)
- kfree(pgd);
- else
- free_pages((unsigned long)pgd, S2_PGD_ORDER);
- return ret;
}
/**
@@ -785,11 +817,10 @@ void kvm_free_stage2_pgd(struct kvm *kvm)
return;
unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE);
- kvm_free_hwpgd(kvm);
+ kvm_free_hwpgd(kvm_get_hwpgd(kvm));
if (KVM_PREALLOC_LEVEL > 0)
kfree(kvm->arch.pgd);
- else
- free_pages((unsigned long)kvm->arch.pgd, S2_PGD_ORDER);
+
kvm->arch.pgd = NULL;
}
@@ -799,7 +830,7 @@ static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache
pgd_t *pgd;
pud_t *pud;
- pgd = kvm->arch.pgd + pgd_index(addr);
+ pgd = kvm->arch.pgd + kvm_pgd_index(addr);
if (WARN_ON(pgd_none(*pgd))) {
if (!cache)
return NULL;
@@ -1089,7 +1120,7 @@ static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
pgd_t *pgd;
phys_addr_t next;
- pgd = kvm->arch.pgd + pgd_index(addr);
+ pgd = kvm->arch.pgd + kvm_pgd_index(addr);
do {
/*
* Release kvm_mmu_lock periodically if the memory region is
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 94674eb7e7bb..54bb4ba97441 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -129,6 +129,9 @@
* 40 bits wide (T0SZ = 24). Systems with a PARange smaller than 40 bits are
* not known to exist and will break with this configuration.
*
+ * VTCR_EL2.PS is extracted from ID_AA64MMFR0_EL1.PARange at boot time
+ * (see hyp-init.S).
+ *
* Note that when using 4K pages, we concatenate two first level page tables
* together.
*
@@ -138,7 +141,6 @@
#ifdef CONFIG_ARM64_64K_PAGES
/*
* Stage2 translation configuration:
- * 40bits output (PS = 2)
* 40bits input (T0SZ = 24)
* 64kB pages (TG0 = 1)
* 2 level page tables (SL = 1)
@@ -150,7 +152,6 @@
#else
/*
* Stage2 translation configuration:
- * 40bits output (PS = 2)
* 40bits input (T0SZ = 24)
* 4kB pages (TG0 = 0)
* 3 level page tables (SL = 1)
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 6458b5373142..bbfb600fa822 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -158,6 +158,8 @@ static inline bool kvm_s2pmd_readonly(pmd_t *pmd)
#define PTRS_PER_S2_PGD (1 << PTRS_PER_S2_PGD_SHIFT)
#define S2_PGD_ORDER get_order(PTRS_PER_S2_PGD * sizeof(pgd_t))
+#define kvm_pgd_index(addr) (((addr) >> PGDIR_SHIFT) & (PTRS_PER_S2_PGD - 1))
+
/*
* If we are concatenating first level stage-2 page tables, we would have less
* than or equal to 16 pointers in the fake PGD, because that's what the
@@ -171,43 +173,6 @@ static inline bool kvm_s2pmd_readonly(pmd_t *pmd)
#define KVM_PREALLOC_LEVEL (0)
#endif
-/**
- * kvm_prealloc_hwpgd - allocate inital table for VTTBR
- * @kvm: The KVM struct pointer for the VM.
- * @pgd: The kernel pseudo pgd
- *
- * When the kernel uses more levels of page tables than the guest, we allocate
- * a fake PGD and pre-populate it to point to the next-level page table, which
- * will be the real initial page table pointed to by the VTTBR.
- *
- * When KVM_PREALLOC_LEVEL==2, we allocate a single page for the PMD and
- * the kernel will use folded pud. When KVM_PREALLOC_LEVEL==1, we
- * allocate 2 consecutive PUD pages.
- */
-static inline int kvm_prealloc_hwpgd(struct kvm *kvm, pgd_t *pgd)
-{
- unsigned int i;
- unsigned long hwpgd;
-
- if (KVM_PREALLOC_LEVEL == 0)
- return 0;
-
- hwpgd = __get_free_pages(GFP_KERNEL | __GFP_ZERO, PTRS_PER_S2_PGD_SHIFT);
- if (!hwpgd)
- return -ENOMEM;
-
- for (i = 0; i < PTRS_PER_S2_PGD; i++) {
- if (KVM_PREALLOC_LEVEL == 1)
- pgd_populate(NULL, pgd + i,
- (pud_t *)hwpgd + i * PTRS_PER_PUD);
- else if (KVM_PREALLOC_LEVEL == 2)
- pud_populate(NULL, pud_offset(pgd, 0) + i,
- (pmd_t *)hwpgd + i * PTRS_PER_PMD);
- }
-
- return 0;
-}
-
static inline void *kvm_get_hwpgd(struct kvm *kvm)
{
pgd_t *pgd = kvm->arch.pgd;
@@ -224,12 +189,11 @@ static inline void *kvm_get_hwpgd(struct kvm *kvm)
return pmd_offset(pud, 0);
}
-static inline void kvm_free_hwpgd(struct kvm *kvm)
+static inline unsigned int kvm_get_hwpgd_size(void)
{
- if (KVM_PREALLOC_LEVEL > 0) {
- unsigned long hwpgd = (unsigned long)kvm_get_hwpgd(kvm);
- free_pages(hwpgd, PTRS_PER_S2_PGD_SHIFT);
- }
+ if (KVM_PREALLOC_LEVEL > 0)
+ return PTRS_PER_S2_PGD * PAGE_SIZE;
+ return PTRS_PER_S2_PGD * sizeof(pgd_t);
}
static inline bool kvm_page_empty(void *ptr)