From 21828c99ee91bec94c3d2c32b3d5562ffdea980a Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 16 Apr 2018 16:57:15 +0530 Subject: powerpc/kvm: Switch kvm pmd allocator to custom allocator In the next set of patches, we will switch pmd allocator to use page fragments and the locking will be updated to split pmd ptlock. We want to avoid using fragments for partition-scoped table. Use slab cache similar to level 4 table Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/kvm/book3s_64_mmu_radix.c | 36 +++++++++++++++++++++++++++++----- 1 file changed, 31 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index a57eafec4dc2..ccdf3761eec0 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -200,6 +200,7 @@ void kvmppc_radix_set_pte_at(struct kvm *kvm, unsigned long addr, } static struct kmem_cache *kvm_pte_cache; +static struct kmem_cache *kvm_pmd_cache; static pte_t *kvmppc_pte_alloc(void) { @@ -217,6 +218,16 @@ static inline int pmd_is_leaf(pmd_t pmd) return !!(pmd_val(pmd) & _PAGE_PTE); } +static pmd_t *kvmppc_pmd_alloc(void) +{ + return kmem_cache_alloc(kvm_pmd_cache, GFP_KERNEL); +} + +static void kvmppc_pmd_free(pmd_t *pmdp) +{ + kmem_cache_free(kvm_pmd_cache, pmdp); +} + static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa, unsigned int level, unsigned long mmu_seq) { @@ -239,7 +250,7 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa, if (pud && pud_present(*pud) && !pud_huge(*pud)) pmd = pmd_offset(pud, gpa); else if (level <= 1) - new_pmd = pmd_alloc_one(kvm->mm, gpa); + new_pmd = kvmppc_pmd_alloc(); if (level == 0 && !(pmd && pmd_present(*pmd) && !pmd_is_leaf(*pmd))) new_ptep = kvmppc_pte_alloc(); @@ -382,7 +393,7 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa, if (new_pud) pud_free(kvm->mm, new_pud); if (new_pmd) - pmd_free(kvm->mm, new_pmd); + kvmppc_pmd_free(new_pmd); if (new_ptep) kvmppc_pte_free(new_ptep); return ret; @@ -758,7 +769,7 @@ void kvmppc_free_radix(struct kvm *kvm) kvmppc_pte_free(pte); pmd_clear(pmd); } - pmd_free(kvm->mm, pmd_offset(pud, 0)); + kvmppc_pmd_free(pmd_offset(pud, 0)); pud_clear(pud); } pud_free(kvm->mm, pud_offset(pgd, 0)); @@ -770,20 +781,35 @@ void kvmppc_free_radix(struct kvm *kvm) static void pte_ctor(void *addr) { - memset(addr, 0, PTE_TABLE_SIZE); + memset(addr, 0, RADIX_PTE_TABLE_SIZE); +} + +static void pmd_ctor(void *addr) +{ + memset(addr, 0, RADIX_PMD_TABLE_SIZE); } int kvmppc_radix_init(void) { - unsigned long size = sizeof(void *) << PTE_INDEX_SIZE; + unsigned long size = sizeof(void *) << RADIX_PTE_INDEX_SIZE; kvm_pte_cache = kmem_cache_create("kvm-pte", size, size, 0, pte_ctor); if (!kvm_pte_cache) return -ENOMEM; + + size = sizeof(void *) << RADIX_PMD_INDEX_SIZE; + + kvm_pmd_cache = kmem_cache_create("kvm-pmd", size, size, 0, pmd_ctor); + if (!kvm_pmd_cache) { + kmem_cache_destroy(kvm_pte_cache); + return -ENOMEM; + } + return 0; } void kvmppc_radix_exit(void) { kmem_cache_destroy(kvm_pte_cache); + kmem_cache_destroy(kvm_pmd_cache); } -- cgit v1.2.3 From 0078778a86b14f85bf50e96d9ddeb3b70b55805d Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 9 May 2018 12:20:18 +1000 Subject: powerpc/mm/radix: implement LPID based TLB flushes to be used by KVM Implement a local TLB flush for invalidating an LPID with variants for process or partition scope. And a global TLB flush for invalidating a partition scoped page of an LPID. These will be used by KVM in subsequent patches. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- .../powerpc/include/asm/book3s/64/tlbflush-radix.h | 7 + arch/powerpc/mm/tlb-radix.c | 207 +++++++++++++++++++++ 2 files changed, 214 insertions(+) diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h index 19b45ba6caf9..ef5c3f2994c9 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h @@ -51,4 +51,11 @@ extern void radix__flush_tlb_all(void); extern void radix__flush_tlb_pte_p9_dd1(unsigned long old_pte, struct mm_struct *mm, unsigned long address); +extern void radix__flush_tlb_lpid_page(unsigned int lpid, + unsigned long addr, + unsigned long page_size); +extern void radix__flush_pwc_lpid(unsigned int lpid); +extern void radix__local_flush_tlb_lpid(unsigned int lpid); +extern void radix__local_flush_tlb_lpid_guest(unsigned int lpid); + #endif diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c index a5d7309c2d05..5ac3206c51cc 100644 --- a/arch/powerpc/mm/tlb-radix.c +++ b/arch/powerpc/mm/tlb-radix.c @@ -118,6 +118,53 @@ static inline void __tlbie_pid(unsigned long pid, unsigned long ric) trace_tlbie(0, 0, rb, rs, ric, prs, r); } +static inline void __tlbiel_lpid(unsigned long lpid, int set, + unsigned long ric) +{ + unsigned long rb,rs,prs,r; + + rb = PPC_BIT(52); /* IS = 2 */ + rb |= set << PPC_BITLSHIFT(51); + rs = 0; /* LPID comes from LPIDR */ + prs = 0; /* partition scoped */ + r = 1; /* radix format */ + + asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) + : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); + trace_tlbie(lpid, 1, rb, rs, ric, prs, r); +} + +static inline void __tlbie_lpid(unsigned long lpid, unsigned long ric) +{ + unsigned long rb,rs,prs,r; + + rb = PPC_BIT(52); /* IS = 2 */ + rs = lpid; + prs = 0; /* partition scoped */ + r = 1; /* radix format */ + + asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) + : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); + trace_tlbie(lpid, 0, rb, rs, ric, prs, r); +} + +static inline void __tlbiel_lpid_guest(unsigned long lpid, int set, + unsigned long ric) +{ + unsigned long rb,rs,prs,r; + + rb = PPC_BIT(52); /* IS = 2 */ + rb |= set << PPC_BITLSHIFT(51); + rs = 0; /* LPID comes from LPIDR */ + prs = 1; /* process scoped */ + r = 1; /* radix format */ + + asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) + : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); + trace_tlbie(lpid, 1, rb, rs, ric, prs, r); +} + + static inline void __tlbiel_va(unsigned long va, unsigned long pid, unsigned long ap, unsigned long ric) { @@ -150,6 +197,22 @@ static inline void __tlbie_va(unsigned long va, unsigned long pid, trace_tlbie(0, 0, rb, rs, ric, prs, r); } +static inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid, + unsigned long ap, unsigned long ric) +{ + unsigned long rb,rs,prs,r; + + rb = va & ~(PPC_BITMASK(52, 63)); + rb |= ap << PPC_BITLSHIFT(58); + rs = lpid; + prs = 0; /* partition scoped */ + r = 1; /* radix format */ + + asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) + : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); + trace_tlbie(lpid, 0, rb, rs, ric, prs, r); +} + static inline void fixup_tlbie(void) { unsigned long pid = 0; @@ -161,6 +224,16 @@ static inline void fixup_tlbie(void) } } +static inline void fixup_tlbie_lpid(unsigned long lpid) +{ + unsigned long va = ((1UL << 52) - 1); + + if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) { + asm volatile("ptesync": : :"memory"); + __tlbie_lpid_va(va, lpid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB); + } +} + /* * We use 128 set in radix mode and 256 set in hpt mode. */ @@ -214,6 +287,86 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric) asm volatile("eieio; tlbsync; ptesync": : :"memory"); } +static inline void _tlbiel_lpid(unsigned long lpid, unsigned long ric) +{ + int set; + + VM_BUG_ON(mfspr(SPRN_LPID) != lpid); + + asm volatile("ptesync": : :"memory"); + + /* + * Flush the first set of the TLB, and if we're doing a RIC_FLUSH_ALL, + * also flush the entire Page Walk Cache. + */ + __tlbiel_lpid(lpid, 0, ric); + + /* For PWC, only one flush is needed */ + if (ric == RIC_FLUSH_PWC) { + asm volatile("ptesync": : :"memory"); + return; + } + + /* For the remaining sets, just flush the TLB */ + for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++) + __tlbiel_lpid(lpid, set, RIC_FLUSH_TLB); + + asm volatile("ptesync": : :"memory"); + asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory"); +} + +static inline void _tlbie_lpid(unsigned long lpid, unsigned long ric) +{ + asm volatile("ptesync": : :"memory"); + + /* + * Workaround the fact that the "ric" argument to __tlbie_pid + * must be a compile-time contraint to match the "i" constraint + * in the asm statement. + */ + switch (ric) { + case RIC_FLUSH_TLB: + __tlbie_lpid(lpid, RIC_FLUSH_TLB); + break; + case RIC_FLUSH_PWC: + __tlbie_lpid(lpid, RIC_FLUSH_PWC); + break; + case RIC_FLUSH_ALL: + default: + __tlbie_lpid(lpid, RIC_FLUSH_ALL); + } + fixup_tlbie_lpid(lpid); + asm volatile("eieio; tlbsync; ptesync": : :"memory"); +} + +static inline void _tlbiel_lpid_guest(unsigned long lpid, unsigned long ric) +{ + int set; + + VM_BUG_ON(mfspr(SPRN_LPID) != lpid); + + asm volatile("ptesync": : :"memory"); + + /* + * Flush the first set of the TLB, and if we're doing a RIC_FLUSH_ALL, + * also flush the entire Page Walk Cache. + */ + __tlbiel_lpid_guest(lpid, 0, ric); + + /* For PWC, only one flush is needed */ + if (ric == RIC_FLUSH_PWC) { + asm volatile("ptesync": : :"memory"); + return; + } + + /* For the remaining sets, just flush the TLB */ + for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++) + __tlbiel_lpid_guest(lpid, set, RIC_FLUSH_TLB); + + asm volatile("ptesync": : :"memory"); +} + + static inline void __tlbiel_va_range(unsigned long start, unsigned long end, unsigned long pid, unsigned long page_size, unsigned long psize) @@ -268,6 +421,17 @@ static inline void _tlbie_va(unsigned long va, unsigned long pid, asm volatile("eieio; tlbsync; ptesync": : :"memory"); } +static inline void _tlbie_lpid_va(unsigned long va, unsigned long lpid, + unsigned long psize, unsigned long ric) +{ + unsigned long ap = mmu_get_ap(psize); + + asm volatile("ptesync": : :"memory"); + __tlbie_lpid_va(va, lpid, ap, ric); + fixup_tlbie_lpid(lpid); + asm volatile("eieio; tlbsync; ptesync": : :"memory"); +} + static inline void _tlbie_va_range(unsigned long start, unsigned long end, unsigned long pid, unsigned long page_size, unsigned long psize, bool also_pwc) @@ -534,6 +698,49 @@ static int radix_get_mmu_psize(int page_size) return psize; } +/* + * Flush partition scoped LPID address translation for all CPUs. + */ +void radix__flush_tlb_lpid_page(unsigned int lpid, + unsigned long addr, + unsigned long page_size) +{ + int psize = radix_get_mmu_psize(page_size); + + _tlbie_lpid_va(addr, lpid, psize, RIC_FLUSH_TLB); +} +EXPORT_SYMBOL_GPL(radix__flush_tlb_lpid_page); + +/* + * Flush partition scoped PWC from LPID for all CPUs. + */ +void radix__flush_pwc_lpid(unsigned int lpid) +{ + _tlbie_lpid(lpid, RIC_FLUSH_PWC); +} +EXPORT_SYMBOL_GPL(radix__flush_pwc_lpid); + +/* + * Flush partition scoped translations from LPID (=LPIDR) + */ +void radix__local_flush_tlb_lpid(unsigned int lpid) +{ + _tlbiel_lpid(lpid, RIC_FLUSH_ALL); +} +EXPORT_SYMBOL_GPL(radix__local_flush_tlb_lpid); + +/* + * Flush process scoped translations from LPID (=LPIDR). + * Important difference, the guest normally manages its own translations, + * but some cases e.g., vCPU CPU migration require KVM to flush. + */ +void radix__local_flush_tlb_lpid_guest(unsigned int lpid) +{ + _tlbiel_lpid_guest(lpid, RIC_FLUSH_ALL); +} +EXPORT_SYMBOL_GPL(radix__local_flush_tlb_lpid_guest); + + static void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start, unsigned long end, int psize); -- cgit v1.2.3 From 9f9eae5ce717f497812dfc1bda5219bc589b455d Mon Sep 17 00:00:00 2001 From: Mathieu Malaterre Date: Wed, 28 Mar 2018 21:58:11 +0200 Subject: powerpc/kvm: Prefer fault_in_pages_readable function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Directly use fault_in_pages_readable instead of manual __get_user code. Fix warning treated as error with W=1: arch/powerpc/kernel/kvm.c:675:6: error: variable ‘tmp’ set but not used [-Werror=unused-but-set-variable] Suggested-by: Christophe Leroy Signed-off-by: Mathieu Malaterre Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/kvm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c index 9ad37f827a97..683b5b3805bd 100644 --- a/arch/powerpc/kernel/kvm.c +++ b/arch/powerpc/kernel/kvm.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -672,14 +673,13 @@ static void kvm_use_magic_page(void) { u32 *p; u32 *start, *end; - u32 tmp; u32 features; /* Tell the host to map the magic page to -4096 on all CPUs */ on_each_cpu(kvm_map_magic_page, &features, 1); /* Quick self-test to see if the mapping works */ - if (__get_user(tmp, (u32*)KVM_MAGIC_PAGE)) { + if (!fault_in_pages_readable((const char *)KVM_MAGIC_PAGE, sizeof(u32))) { kvm_patching_worked = false; return; } -- cgit v1.2.3