From d1ae8c0057921681ca489bba7efbfacbb60d0f28 Mon Sep 17 00:00:00 2001 From: Yi Li Date: Sat, 4 Oct 2014 23:46:43 +0800 Subject: arm64: dmi: Add SMBIOS/DMI support SMBIOS is important for server hardware vendors. It implements a spec for providing descriptive information about the platform. Things like serial numbers, physical layout of the ports, build configuration data, and the like. Signed-off-by: Yi Li Tested-by: Suravee Suthikulpanit Tested-by: Leif Lindholm Signed-off-by: Ard Biesheuvel --- arch/arm64/include/asm/dmi.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 arch/arm64/include/asm/dmi.h (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/dmi.h b/arch/arm64/include/asm/dmi.h new file mode 100644 index 000000000000..69d37d87b159 --- /dev/null +++ b/arch/arm64/include/asm/dmi.h @@ -0,0 +1,31 @@ +/* + * arch/arm64/include/asm/dmi.h + * + * Copyright (C) 2013 Linaro Limited. + * Written by: Yi Li (yi.li@linaro.org) + * + * based on arch/ia64/include/asm/dmi.h + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ + +#ifndef __ASM_DMI_H +#define __ASM_DMI_H + +#include +#include + +/* + * According to section 2.3.6 of the UEFI spec, the firmware should not + * request a virtual mapping for configuration tables such as SMBIOS. + * This means we have to map them before use. + */ +#define dmi_early_remap(x, l) ioremap_cache(x, l) +#define dmi_early_unmap(x, l) iounmap(x) +#define dmi_remap(x, l) ioremap_cache(x, l) +#define dmi_unmap(x) iounmap(x) +#define dmi_alloc(l) kzalloc(l, GFP_KERNEL) + +#endif -- cgit v1.2.3 From 5284e1b4bc8ae6fcc1c92c63cf6c876a53292f82 Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Fri, 24 Oct 2014 13:22:20 +0100 Subject: arm64: xchg: Implement cmpxchg_double The arm64 architecture has the ability to exclusively load and store a pair of registers from an address (ldxp/stxp). Also the SLUB can take advantage of a cmpxchg_double implementation to avoid taking some locks. This patch provides an implementation of cmpxchg_double for 64-bit pairs, and activates the logic required for the SLUB to use these functions (HAVE_ALIGNED_STRUCT_PAGE and HAVE_CMPXCHG_DOUBLE). Also definitions of this_cpu_cmpxchg_8 and this_cpu_cmpxchg_double_8 are wired up to cmpxchg_local and cmpxchg_double_local (rather than the stock implementations that perform non-atomic operations with interrupts disabled) as they are used by the SLUB. On a Juno platform running on only the A57s I get quite a noticeable performance improvement with 5 runs of hackbench on v3.17: Baseline | With Patch -----------------+----------- Mean 119.2312 | 106.1782 StdDev 0.4919 | 0.4494 (times taken to complete `./hackbench 100 process 1000', in seconds) Signed-off-by: Steve Capper Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 2 ++ arch/arm64/include/asm/cmpxchg.h | 71 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+) (limited to 'arch/arm64/include') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 2c3c2ca6f8bc..1e0e4671dd25 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -34,6 +34,7 @@ config ARM64 select GENERIC_TIME_VSYSCALL select HANDLE_DOMAIN_IRQ select HARDIRQS_SW_RESEND + select HAVE_ALIGNED_STRUCT_PAGE if SLUB select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_JUMP_LABEL select HAVE_ARCH_KGDB @@ -41,6 +42,7 @@ config ARM64 select HAVE_BPF_JIT select HAVE_C_RECORDMCOUNT select HAVE_CC_STACKPROTECTOR + select HAVE_CMPXCHG_DOUBLE select HAVE_DEBUG_BUGVERBOSE select HAVE_DEBUG_KMEMLEAK select HAVE_DMA_API_DEBUG diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h index ddb9d7830558..89e397befad5 100644 --- a/arch/arm64/include/asm/cmpxchg.h +++ b/arch/arm64/include/asm/cmpxchg.h @@ -19,6 +19,7 @@ #define __ASM_CMPXCHG_H #include +#include #include @@ -152,6 +153,51 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, return oldval; } +#define system_has_cmpxchg_double() 1 + +static inline int __cmpxchg_double(volatile void *ptr1, volatile void *ptr2, + unsigned long old1, unsigned long old2, + unsigned long new1, unsigned long new2, int size) +{ + unsigned long loop, lost; + + switch (size) { + case 8: + VM_BUG_ON((unsigned long *)ptr2 - (unsigned long *)ptr1 != 1); + do { + asm volatile("// __cmpxchg_double8\n" + " ldxp %0, %1, %2\n" + " eor %0, %0, %3\n" + " eor %1, %1, %4\n" + " orr %1, %0, %1\n" + " mov %w0, #0\n" + " cbnz %1, 1f\n" + " stxp %w0, %5, %6, %2\n" + "1:\n" + : "=&r"(loop), "=&r"(lost), "+Q" (*(u64 *)ptr1) + : "r" (old1), "r"(old2), "r"(new1), "r"(new2)); + } while (loop); + break; + default: + BUILD_BUG(); + } + + return !lost; +} + +static inline int __cmpxchg_double_mb(volatile void *ptr1, volatile void *ptr2, + unsigned long old1, unsigned long old2, + unsigned long new1, unsigned long new2, int size) +{ + int ret; + + smp_mb(); + ret = __cmpxchg_double(ptr1, ptr2, old1, old2, new1, new2, size); + smp_mb(); + + return ret; +} + static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old, unsigned long new, int size) { @@ -182,6 +228,31 @@ static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old, __ret; \ }) +#define cmpxchg_double(ptr1, ptr2, o1, o2, n1, n2) \ +({\ + int __ret;\ + __ret = __cmpxchg_double_mb((ptr1), (ptr2), (unsigned long)(o1), \ + (unsigned long)(o2), (unsigned long)(n1), \ + (unsigned long)(n2), sizeof(*(ptr1)));\ + __ret; \ +}) + +#define cmpxchg_double_local(ptr1, ptr2, o1, o2, n1, n2) \ +({\ + int __ret;\ + __ret = __cmpxchg_double((ptr1), (ptr2), (unsigned long)(o1), \ + (unsigned long)(o2), (unsigned long)(n1), \ + (unsigned long)(n2), sizeof(*(ptr1)));\ + __ret; \ +}) + +#define this_cpu_cmpxchg_8(ptr, o, n) \ + cmpxchg_local(raw_cpu_ptr(&(ptr)), o, n) + +#define this_cpu_cmpxchg_double_8(ptr1, ptr2, o1, o2, n1, n2) \ + cmpxchg_double_local(raw_cpu_ptr(&(ptr1)), raw_cpu_ptr(&(ptr2)), \ + o1, o2, n1, n2) + #define cmpxchg64(ptr,o,n) cmpxchg((ptr),(o),(n)) #define cmpxchg64_local(ptr,o,n) cmpxchg_local((ptr),(o),(n)) -- cgit v1.2.3 From 286fb1cc32b11c18da3573a8c8c37a4f9da16e30 Mon Sep 17 00:00:00 2001 From: Geoff Levand Date: Fri, 31 Oct 2014 23:06:47 +0000 Subject: arm64/kvm: Fix assembler compatibility of macros Some of the macros defined in kvm_arm.h are useful in assembly files, but are not compatible with the assembler. Change any C language integer constant definitions using appended U, UL, or ULL to the UL() preprocessor macro. Also, add a preprocessor include of the asm/memory.h file which defines the UL() macro. Fixes build errors like these when using kvm_arm.h in assembly source files: Error: unexpected characters following instruction at operand 3 -- `and x0,x1,#((1U<<25)-1)' Acked-by: Mark Rutland Signed-off-by: Geoff Levand Signed-off-by: Will Deacon --- arch/arm64/include/asm/kvm_arm.h | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 7fd3e27e3ccc..8afb863f5a9e 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -18,6 +18,7 @@ #ifndef __ARM64_KVM_ARM_H__ #define __ARM64_KVM_ARM_H__ +#include #include /* Hyp Configuration Register (HCR) bits */ @@ -160,9 +161,9 @@ #endif #define VTTBR_BADDR_SHIFT (VTTBR_X - 1) -#define VTTBR_BADDR_MASK (((1LLU << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT) -#define VTTBR_VMID_SHIFT (48LLU) -#define VTTBR_VMID_MASK (0xffLLU << VTTBR_VMID_SHIFT) +#define VTTBR_BADDR_MASK (((UL(1) << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT) +#define VTTBR_VMID_SHIFT (UL(48)) +#define VTTBR_VMID_MASK (UL(0xFF) << VTTBR_VMID_SHIFT) /* Hyp System Trap Register */ #define HSTR_EL2_TTEE (1 << 16) @@ -185,13 +186,13 @@ /* Exception Syndrome Register (ESR) bits */ #define ESR_EL2_EC_SHIFT (26) -#define ESR_EL2_EC (0x3fU << ESR_EL2_EC_SHIFT) -#define ESR_EL2_IL (1U << 25) +#define ESR_EL2_EC (UL(0x3f) << ESR_EL2_EC_SHIFT) +#define ESR_EL2_IL (UL(1) << 25) #define ESR_EL2_ISS (ESR_EL2_IL - 1) #define ESR_EL2_ISV_SHIFT (24) -#define ESR_EL2_ISV (1U << ESR_EL2_ISV_SHIFT) +#define ESR_EL2_ISV (UL(1) << ESR_EL2_ISV_SHIFT) #define ESR_EL2_SAS_SHIFT (22) -#define ESR_EL2_SAS (3U << ESR_EL2_SAS_SHIFT) +#define ESR_EL2_SAS (UL(3) << ESR_EL2_SAS_SHIFT) #define ESR_EL2_SSE (1 << 21) #define ESR_EL2_SRT_SHIFT (16) #define ESR_EL2_SRT_MASK (0x1f << ESR_EL2_SRT_SHIFT) @@ -205,16 +206,16 @@ #define ESR_EL2_FSC_TYPE (0x3c) #define ESR_EL2_CV_SHIFT (24) -#define ESR_EL2_CV (1U << ESR_EL2_CV_SHIFT) +#define ESR_EL2_CV (UL(1) << ESR_EL2_CV_SHIFT) #define ESR_EL2_COND_SHIFT (20) -#define ESR_EL2_COND (0xfU << ESR_EL2_COND_SHIFT) +#define ESR_EL2_COND (UL(0xf) << ESR_EL2_COND_SHIFT) #define FSC_FAULT (0x04) #define FSC_PERM (0x0c) /* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */ -#define HPFAR_MASK (~0xFUL) +#define HPFAR_MASK (~UL(0xf)) #define ESR_EL2_EC_UNKNOWN (0x00) #define ESR_EL2_EC_WFI (0x01) -- cgit v1.2.3 From fb7332a9fedfd62b1ba6530c86f39f0fa38afd49 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 29 Oct 2014 10:03:09 +0000 Subject: mmu_gather: move minimal range calculations into generic code On architectures with hardware broadcasting of TLB invalidation messages , it makes sense to reduce the range of the mmu_gather structure when unmapping page ranges based on the dirty address information passed to tlb_remove_tlb_entry. arm64 already does this by directly manipulating the start/end fields of the gather structure, but this confuses the generic code which does not expect these fields to change and can end up calculating invalid, negative ranges when forcing a flush in zap_pte_range. This patch moves the minimal range calculation out of the arm64 code and into the generic implementation, simplifying zap_pte_range in the process (which no longer needs to care about start/end, since they will point to the appropriate ranges already). With the range being tracked by core code, the need_flush flag is dropped in favour of checking that the end of the range has actually been set. Cc: Benjamin Herrenschmidt Cc: Peter Zijlstra Cc: Russell King - ARM Linux Cc: Michal Simek Acked-by: Linus Torvalds Signed-off-by: Will Deacon --- arch/arm64/include/asm/tlb.h | 67 ++------------------------------------ arch/microblaze/include/asm/tlb.h | 3 +- arch/powerpc/include/asm/pgalloc.h | 3 +- arch/powerpc/include/asm/tlb.h | 1 + arch/powerpc/mm/hugetlbpage.c | 2 -- include/asm-generic/tlb.h | 57 ++++++++++++++++++++++++++------ mm/memory.c | 30 +++++------------ 7 files changed, 63 insertions(+), 100 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h index a82c0c5c8b52..c028fe37456f 100644 --- a/arch/arm64/include/asm/tlb.h +++ b/arch/arm64/include/asm/tlb.h @@ -19,10 +19,6 @@ #ifndef __ASM_TLB_H #define __ASM_TLB_H -#define __tlb_remove_pmd_tlb_entry __tlb_remove_pmd_tlb_entry - -#include - #include #include @@ -37,71 +33,22 @@ static inline void __tlb_remove_table(void *_table) #define tlb_remove_entry(tlb, entry) tlb_remove_page(tlb, entry) #endif /* CONFIG_HAVE_RCU_TABLE_FREE */ -/* - * There's three ways the TLB shootdown code is used: - * 1. Unmapping a range of vmas. See zap_page_range(), unmap_region(). - * tlb->fullmm = 0, and tlb_start_vma/tlb_end_vma will be called. - * 2. Unmapping all vmas. See exit_mmap(). - * tlb->fullmm = 1, and tlb_start_vma/tlb_end_vma will be called. - * Page tables will be freed. - * 3. Unmapping argument pages. See shift_arg_pages(). - * tlb->fullmm = 0, but tlb_start_vma/tlb_end_vma will not be called. - */ +#include + static inline void tlb_flush(struct mmu_gather *tlb) { if (tlb->fullmm) { flush_tlb_mm(tlb->mm); - } else if (tlb->end > 0) { + } else { struct vm_area_struct vma = { .vm_mm = tlb->mm, }; flush_tlb_range(&vma, tlb->start, tlb->end); - tlb->start = TASK_SIZE; - tlb->end = 0; - } -} - -static inline void tlb_add_flush(struct mmu_gather *tlb, unsigned long addr) -{ - if (!tlb->fullmm) { - tlb->start = min(tlb->start, addr); - tlb->end = max(tlb->end, addr + PAGE_SIZE); - } -} - -/* - * Memorize the range for the TLB flush. - */ -static inline void __tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep, - unsigned long addr) -{ - tlb_add_flush(tlb, addr); -} - -/* - * In the case of tlb vma handling, we can optimise these away in the - * case where we're doing a full MM flush. When we're doing a munmap, - * the vmas are adjusted to only cover the region to be torn down. - */ -static inline void tlb_start_vma(struct mmu_gather *tlb, - struct vm_area_struct *vma) -{ - if (!tlb->fullmm) { - tlb->start = TASK_SIZE; - tlb->end = 0; } } -static inline void tlb_end_vma(struct mmu_gather *tlb, - struct vm_area_struct *vma) -{ - if (!tlb->fullmm) - tlb_flush(tlb); -} - static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, unsigned long addr) { pgtable_page_dtor(pte); - tlb_add_flush(tlb, addr); tlb_remove_entry(tlb, pte); } @@ -109,7 +56,6 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, unsigned long addr) { - tlb_add_flush(tlb, addr); tlb_remove_entry(tlb, virt_to_page(pmdp)); } #endif @@ -118,15 +64,8 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pudp, unsigned long addr) { - tlb_add_flush(tlb, addr); tlb_remove_entry(tlb, virt_to_page(pudp)); } #endif -static inline void __tlb_remove_pmd_tlb_entry(struct mmu_gather *tlb, pmd_t *pmdp, - unsigned long address) -{ - tlb_add_flush(tlb, address); -} - #endif diff --git a/arch/microblaze/include/asm/tlb.h b/arch/microblaze/include/asm/tlb.h index 8aa97817cc8c..99b6ded54849 100644 --- a/arch/microblaze/include/asm/tlb.h +++ b/arch/microblaze/include/asm/tlb.h @@ -14,7 +14,6 @@ #define tlb_flush(tlb) flush_tlb_mm((tlb)->mm) #include -#include #ifdef CONFIG_MMU #define tlb_start_vma(tlb, vma) do { } while (0) @@ -22,4 +21,6 @@ #define __tlb_remove_tlb_entry(tlb, pte, address) do { } while (0) #endif +#include + #endif /* _ASM_MICROBLAZE_TLB_H */ diff --git a/arch/powerpc/include/asm/pgalloc.h b/arch/powerpc/include/asm/pgalloc.h index e9a9f60e596d..fc3ee06eab87 100644 --- a/arch/powerpc/include/asm/pgalloc.h +++ b/arch/powerpc/include/asm/pgalloc.h @@ -3,7 +3,6 @@ #ifdef __KERNEL__ #include -#include #ifdef CONFIG_PPC_BOOK3E extern void tlb_flush_pgtable(struct mmu_gather *tlb, unsigned long address); @@ -14,6 +13,8 @@ static inline void tlb_flush_pgtable(struct mmu_gather *tlb, } #endif /* !CONFIG_PPC_BOOK3E */ +extern void tlb_remove_table(struct mmu_gather *tlb, void *table); + #ifdef CONFIG_PPC64 #include #else diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h index e2b428b0f7ba..20733fa518ae 100644 --- a/arch/powerpc/include/asm/tlb.h +++ b/arch/powerpc/include/asm/tlb.h @@ -27,6 +27,7 @@ #define tlb_start_vma(tlb, vma) do { } while (0) #define tlb_end_vma(tlb, vma) do { } while (0) +#define __tlb_remove_tlb_entry __tlb_remove_tlb_entry extern void tlb_flush(struct mmu_gather *tlb); diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 7e70ae968e5f..6a4a5fcb9730 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -517,8 +517,6 @@ static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshif for (i = 0; i < num_hugepd; i++, hpdp++) hpdp->pd = 0; - tlb->need_flush = 1; - #ifdef CONFIG_PPC_FSL_BOOK3E hugepd_free(tlb, hugepte); #else diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index 5672d7ea1fa0..08848050922e 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -96,10 +96,9 @@ struct mmu_gather { #endif unsigned long start; unsigned long end; - unsigned int need_flush : 1, /* Did free PTEs */ /* we are in the middle of an operation to clear * a full mm and can make some optimizations */ - fullmm : 1, + unsigned int fullmm : 1, /* we have performed an operation which * requires a complete flush of the tlb */ need_flush_all : 1; @@ -128,16 +127,54 @@ static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) tlb_flush_mmu(tlb); } +static inline void __tlb_adjust_range(struct mmu_gather *tlb, + unsigned long address) +{ + tlb->start = min(tlb->start, address); + tlb->end = max(tlb->end, address + PAGE_SIZE); +} + +static inline void __tlb_reset_range(struct mmu_gather *tlb) +{ + tlb->start = TASK_SIZE; + tlb->end = 0; +} + +/* + * In the case of tlb vma handling, we can optimise these away in the + * case where we're doing a full MM flush. When we're doing a munmap, + * the vmas are adjusted to only cover the region to be torn down. + */ +#ifndef tlb_start_vma +#define tlb_start_vma(tlb, vma) do { } while (0) +#endif + +#define __tlb_end_vma(tlb, vma) \ + do { \ + if (!tlb->fullmm && tlb->end) { \ + tlb_flush(tlb); \ + __tlb_reset_range(tlb); \ + } \ + } while (0) + +#ifndef tlb_end_vma +#define tlb_end_vma __tlb_end_vma +#endif + +#ifndef __tlb_remove_tlb_entry +#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0) +#endif + /** * tlb_remove_tlb_entry - remember a pte unmapping for later tlb invalidation. * - * Record the fact that pte's were really umapped in ->need_flush, so we can - * later optimise away the tlb invalidate. This helps when userspace is - * unmapping already-unmapped pages, which happens quite a lot. + * Record the fact that pte's were really unmapped by updating the range, + * so we can later optimise away the tlb invalidate. This helps when + * userspace is unmapping already-unmapped pages, which happens quite a lot. */ #define tlb_remove_tlb_entry(tlb, ptep, address) \ do { \ - tlb->need_flush = 1; \ + __tlb_adjust_range(tlb, address); \ __tlb_remove_tlb_entry(tlb, ptep, address); \ } while (0) @@ -151,27 +188,27 @@ static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) #define tlb_remove_pmd_tlb_entry(tlb, pmdp, address) \ do { \ - tlb->need_flush = 1; \ + __tlb_adjust_range(tlb, address); \ __tlb_remove_pmd_tlb_entry(tlb, pmdp, address); \ } while (0) #define pte_free_tlb(tlb, ptep, address) \ do { \ - tlb->need_flush = 1; \ + __tlb_adjust_range(tlb, address); \ __pte_free_tlb(tlb, ptep, address); \ } while (0) #ifndef __ARCH_HAS_4LEVEL_HACK #define pud_free_tlb(tlb, pudp, address) \ do { \ - tlb->need_flush = 1; \ + __tlb_adjust_range(tlb, address); \ __pud_free_tlb(tlb, pudp, address); \ } while (0) #endif #define pmd_free_tlb(tlb, pmdp, address) \ do { \ - tlb->need_flush = 1; \ + __tlb_adjust_range(tlb, address); \ __pmd_free_tlb(tlb, pmdp, address); \ } while (0) diff --git a/mm/memory.c b/mm/memory.c index 1cc6bfbd872e..c71edae9ba44 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -220,9 +220,6 @@ void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long /* Is it from 0 to ~0? */ tlb->fullmm = !(start | (end+1)); tlb->need_flush_all = 0; - tlb->start = start; - tlb->end = end; - tlb->need_flush = 0; tlb->local.next = NULL; tlb->local.nr = 0; tlb->local.max = ARRAY_SIZE(tlb->__pages); @@ -232,15 +229,20 @@ void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long #ifdef CONFIG_HAVE_RCU_TABLE_FREE tlb->batch = NULL; #endif + + __tlb_reset_range(tlb); } static void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb) { - tlb->need_flush = 0; + if (!tlb->end) + return; + tlb_flush(tlb); #ifdef CONFIG_HAVE_RCU_TABLE_FREE tlb_table_flush(tlb); #endif + __tlb_reset_range(tlb); } static void tlb_flush_mmu_free(struct mmu_gather *tlb) @@ -256,8 +258,6 @@ static void tlb_flush_mmu_free(struct mmu_gather *tlb) void tlb_flush_mmu(struct mmu_gather *tlb) { - if (!tlb->need_flush) - return; tlb_flush_mmu_tlbonly(tlb); tlb_flush_mmu_free(tlb); } @@ -292,7 +292,7 @@ int __tlb_remove_page(struct mmu_gather *tlb, struct page *page) { struct mmu_gather_batch *batch; - VM_BUG_ON(!tlb->need_flush); + VM_BUG_ON(!tlb->end); batch = tlb->active; batch->pages[batch->nr++] = page; @@ -359,8 +359,6 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table) { struct mmu_table_batch **batch = &tlb->batch; - tlb->need_flush = 1; - /* * When there's less then two users of this mm there cannot be a * concurrent page-table walk. @@ -1185,20 +1183,8 @@ again: arch_leave_lazy_mmu_mode(); /* Do the actual TLB flush before dropping ptl */ - if (force_flush) { - unsigned long old_end; - - /* - * Flush the TLB just for the previous segment, - * then update the range to be the remaining - * TLB range. - */ - old_end = tlb->end; - tlb->end = addr; + if (force_flush) tlb_flush_mmu_tlbonly(tlb); - tlb->start = addr; - tlb->end = old_end; - } pte_unmap_unlock(start_pte, ptl); /* -- cgit v1.2.3 From 7d57511d2dba03a8046c8b428dd9192a4bfc1e73 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 17 Nov 2014 10:37:40 +0000 Subject: arm64: Add COMPAT_HWCAP_LPAE Commit a469abd0f868 (ARM: elf: add new hwcap for identifying atomic ldrd/strd instructions) introduces HWCAP_ELF for 32-bit ARM applications. As LPAE is always present on arm64, report the corresponding compat HWCAP to user space. Signed-off-by: Catalin Marinas Cc: # 3.11+ Signed-off-by: Will Deacon --- arch/arm64/include/asm/hwcap.h | 1 + arch/arm64/kernel/setup.c | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index 024c46183c3c..0ad735166d9f 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -30,6 +30,7 @@ #define COMPAT_HWCAP_IDIVA (1 << 17) #define COMPAT_HWCAP_IDIVT (1 << 18) #define COMPAT_HWCAP_IDIV (COMPAT_HWCAP_IDIVA|COMPAT_HWCAP_IDIVT) +#define COMPAT_HWCAP_LPAE (1 << 20) #define COMPAT_HWCAP_EVTSTRM (1 << 21) #define COMPAT_HWCAP2_AES (1 << 0) diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 831c97fe1ae9..2e17bd3806c8 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -73,7 +73,8 @@ EXPORT_SYMBOL_GPL(elf_hwcap); COMPAT_HWCAP_FAST_MULT|COMPAT_HWCAP_EDSP|\ COMPAT_HWCAP_TLS|COMPAT_HWCAP_VFP|\ COMPAT_HWCAP_VFPv3|COMPAT_HWCAP_VFPv4|\ - COMPAT_HWCAP_NEON|COMPAT_HWCAP_IDIV) + COMPAT_HWCAP_NEON|COMPAT_HWCAP_IDIV|\ + COMPAT_HWCAP_LPAE) unsigned int compat_elf_hwcap __read_mostly = COMPAT_ELF_HWCAP_DEFAULT; unsigned int compat_elf_hwcap2 __read_mostly; #endif -- cgit v1.2.3 From 15670ef1eac9817cf48da12c885aabcdd88e9add Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 19 Nov 2014 17:44:12 +0000 Subject: arm64: pgalloc: consistently use PGALLOC_GFP We currently allocate different levels of page tables with a variety of differing flags, and the PGALLOC_GFP flags, intended for use when allocating any level of page table, are only used for ptes in pte_alloc_one. On x86, PGALLOC_GFP is used for all page table allocations. Currently the major differences are: * __GFP_NOTRACK -- Needed to ensure page tables are always accessible in the presence of kmemcheck to prevent recursive faults. Currently kmemcheck cannot be selected for arm64. * __GFP_REPEAT -- Causes the allocator to try to reclaim pages and retry upon a failure to allocate. * __GFP_ZERO -- Sometimes passed explicitly, sometimes zalloc variants are used. While we've no encountered issues so far, it would be preferable to be consistent. This patch ensures all levels of table are allocated in the same manner, with PGALLOC_GFP. Cc: Steve Capper Acked-by: Catalin Marinas Signed-off-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm64/include/asm/pgalloc.h | 8 ++++---- arch/arm64/mm/pgd.c | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h index d5bed02073d6..e20df38a8ff3 100644 --- a/arch/arm64/include/asm/pgalloc.h +++ b/arch/arm64/include/asm/pgalloc.h @@ -26,11 +26,13 @@ #define check_pgt_cache() do { } while (0) +#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO) + #if CONFIG_ARM64_PGTABLE_LEVELS > 2 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) { - return (pmd_t *)get_zeroed_page(GFP_KERNEL | __GFP_REPEAT); + return (pmd_t *)__get_free_page(PGALLOC_GFP); } static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) @@ -50,7 +52,7 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) { - return (pud_t *)get_zeroed_page(GFP_KERNEL | __GFP_REPEAT); + return (pud_t *)__get_free_page(PGALLOC_GFP); } static inline void pud_free(struct mm_struct *mm, pud_t *pud) @@ -69,8 +71,6 @@ static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) extern pgd_t *pgd_alloc(struct mm_struct *mm); extern void pgd_free(struct mm_struct *mm, pgd_t *pgd); -#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO) - static inline pte_t * pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr) { diff --git a/arch/arm64/mm/pgd.c b/arch/arm64/mm/pgd.c index 6682b361d3ac..71ca104f97bd 100644 --- a/arch/arm64/mm/pgd.c +++ b/arch/arm64/mm/pgd.c @@ -35,9 +35,9 @@ static struct kmem_cache *pgd_cache; pgd_t *pgd_alloc(struct mm_struct *mm) { if (PGD_SIZE == PAGE_SIZE) - return (pgd_t *)get_zeroed_page(GFP_KERNEL); + return (pgd_t *)__get_free_page(PGALLOC_GFP); else - return kmem_cache_zalloc(pgd_cache, GFP_KERNEL); + return kmem_cache_alloc(pgd_cache, PGALLOC_GFP); } void pgd_free(struct mm_struct *mm, pgd_t *pgd) -- cgit v1.2.3 From f97fc810798c261b2790c2a1660461a508a479e0 Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Wed, 19 Nov 2014 16:53:43 +0000 Subject: arm64: percpu: Implement this_cpu operations The generic this_cpu operations disable interrupts to ensure that the requested operation is protected from pre-emption. For arm64, this is overkill and can hurt throughput and latency. This patch provides arm64 specific implementations for the this_cpu operations. Rather than disable interrupts, we use the exclusive monitor or atomic operations as appropriate. The following operations are implemented: add, add_return, and, or, read, write, xchg. We also wire up a cmpxchg implementation from cmpxchg.h. Testing was performed using the percpu_test module and hackbench on a Juno board running 3.18-rc4. Signed-off-by: Steve Capper Reviewed-by: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/include/asm/cmpxchg.h | 6 +- arch/arm64/include/asm/percpu.h | 215 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 219 insertions(+), 2 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h index 89e397befad5..cb9593079f29 100644 --- a/arch/arm64/include/asm/cmpxchg.h +++ b/arch/arm64/include/asm/cmpxchg.h @@ -246,8 +246,10 @@ static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old, __ret; \ }) -#define this_cpu_cmpxchg_8(ptr, o, n) \ - cmpxchg_local(raw_cpu_ptr(&(ptr)), o, n) +#define this_cpu_cmpxchg_1(ptr, o, n) cmpxchg_local(raw_cpu_ptr(&(ptr)), o, n) +#define this_cpu_cmpxchg_2(ptr, o, n) cmpxchg_local(raw_cpu_ptr(&(ptr)), o, n) +#define this_cpu_cmpxchg_4(ptr, o, n) cmpxchg_local(raw_cpu_ptr(&(ptr)), o, n) +#define this_cpu_cmpxchg_8(ptr, o, n) cmpxchg_local(raw_cpu_ptr(&(ptr)), o, n) #define this_cpu_cmpxchg_double_8(ptr1, ptr2, o1, o2, n1, n2) \ cmpxchg_double_local(raw_cpu_ptr(&(ptr1)), raw_cpu_ptr(&(ptr2)), \ diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h index 5279e5733386..09da25bc596f 100644 --- a/arch/arm64/include/asm/percpu.h +++ b/arch/arm64/include/asm/percpu.h @@ -44,6 +44,221 @@ static inline unsigned long __my_cpu_offset(void) #endif /* CONFIG_SMP */ +#define PERCPU_OP(op, asm_op) \ +static inline unsigned long __percpu_##op(void *ptr, \ + unsigned long val, int size) \ +{ \ + unsigned long loop, ret; \ + \ + switch (size) { \ + case 1: \ + do { \ + asm ("//__per_cpu_" #op "_1\n" \ + "ldxrb %w[ret], %[ptr]\n" \ + #asm_op " %w[ret], %w[ret], %w[val]\n" \ + "stxrb %w[loop], %w[ret], %[ptr]\n" \ + : [loop] "=&r" (loop), [ret] "=&r" (ret), \ + [ptr] "+Q"(*(u8 *)ptr) \ + : [val] "Ir" (val)); \ + } while (loop); \ + break; \ + case 2: \ + do { \ + asm ("//__per_cpu_" #op "_2\n" \ + "ldxrh %w[ret], %[ptr]\n" \ + #asm_op " %w[ret], %w[ret], %w[val]\n" \ + "stxrh %w[loop], %w[ret], %[ptr]\n" \ + : [loop] "=&r" (loop), [ret] "=&r" (ret), \ + [ptr] "+Q"(*(u16 *)ptr) \ + : [val] "Ir" (val)); \ + } while (loop); \ + break; \ + case 4: \ + do { \ + asm ("//__per_cpu_" #op "_4\n" \ + "ldxr %w[ret], %[ptr]\n" \ + #asm_op " %w[ret], %w[ret], %w[val]\n" \ + "stxr %w[loop], %w[ret], %[ptr]\n" \ + : [loop] "=&r" (loop), [ret] "=&r" (ret), \ + [ptr] "+Q"(*(u32 *)ptr) \ + : [val] "Ir" (val)); \ + } while (loop); \ + break; \ + case 8: \ + do { \ + asm ("//__per_cpu_" #op "_8\n" \ + "ldxr %[ret], %[ptr]\n" \ + #asm_op " %[ret], %[ret], %[val]\n" \ + "stxr %w[loop], %[ret], %[ptr]\n" \ + : [loop] "=&r" (loop), [ret] "=&r" (ret), \ + [ptr] "+Q"(*(u64 *)ptr) \ + : [val] "Ir" (val)); \ + } while (loop); \ + break; \ + default: \ + BUILD_BUG(); \ + } \ + \ + return ret; \ +} + +PERCPU_OP(add, add) +PERCPU_OP(and, and) +PERCPU_OP(or, orr) +#undef PERCPU_OP + +static inline unsigned long __percpu_read(void *ptr, int size) +{ + unsigned long ret; + + switch (size) { + case 1: + ret = ACCESS_ONCE(*(u8 *)ptr); + break; + case 2: + ret = ACCESS_ONCE(*(u16 *)ptr); + break; + case 4: + ret = ACCESS_ONCE(*(u32 *)ptr); + break; + case 8: + ret = ACCESS_ONCE(*(u64 *)ptr); + break; + default: + BUILD_BUG(); + } + + return ret; +} + +static inline void __percpu_write(void *ptr, unsigned long val, int size) +{ + switch (size) { + case 1: + ACCESS_ONCE(*(u8 *)ptr) = (u8)val; + break; + case 2: + ACCESS_ONCE(*(u16 *)ptr) = (u16)val; + break; + case 4: + ACCESS_ONCE(*(u32 *)ptr) = (u32)val; + break; + case 8: + ACCESS_ONCE(*(u64 *)ptr) = (u64)val; + break; + default: + BUILD_BUG(); + } +} + +static inline unsigned long __percpu_xchg(void *ptr, unsigned long val, + int size) +{ + unsigned long ret, loop; + + switch (size) { + case 1: + do { + asm ("//__percpu_xchg_1\n" + "ldxrb %w[ret], %[ptr]\n" + "stxrb %w[loop], %w[val], %[ptr]\n" + : [loop] "=&r"(loop), [ret] "=&r"(ret), + [ptr] "+Q"(*(u8 *)ptr) + : [val] "r" (val)); + } while (loop); + break; + case 2: + do { + asm ("//__percpu_xchg_2\n" + "ldxrh %w[ret], %[ptr]\n" + "stxrh %w[loop], %w[val], %[ptr]\n" + : [loop] "=&r"(loop), [ret] "=&r"(ret), + [ptr] "+Q"(*(u16 *)ptr) + : [val] "r" (val)); + } while (loop); + break; + case 4: + do { + asm ("//__percpu_xchg_4\n" + "ldxr %w[ret], %[ptr]\n" + "stxr %w[loop], %w[val], %[ptr]\n" + : [loop] "=&r"(loop), [ret] "=&r"(ret), + [ptr] "+Q"(*(u32 *)ptr) + : [val] "r" (val)); + } while (loop); + break; + case 8: + do { + asm ("//__percpu_xchg_8\n" + "ldxr %[ret], %[ptr]\n" + "stxr %w[loop], %[val], %[ptr]\n" + : [loop] "=&r"(loop), [ret] "=&r"(ret), + [ptr] "+Q"(*(u64 *)ptr) + : [val] "r" (val)); + } while (loop); + break; + default: + BUILD_BUG(); + } + + return ret; +} + +#define _percpu_add(pcp, val) \ + __percpu_add(raw_cpu_ptr(&(pcp)), val, sizeof(pcp)) + +#define _percpu_add_return(pcp, val) (typeof(pcp)) (_percpu_add(pcp, val)) + +#define _percpu_and(pcp, val) \ + __percpu_and(raw_cpu_ptr(&(pcp)), val, sizeof(pcp)) + +#define _percpu_or(pcp, val) \ + __percpu_or(raw_cpu_ptr(&(pcp)), val, sizeof(pcp)) + +#define _percpu_read(pcp) (typeof(pcp)) \ + (__percpu_read(raw_cpu_ptr(&(pcp)), sizeof(pcp))) + +#define _percpu_write(pcp, val) \ + __percpu_write(raw_cpu_ptr(&(pcp)), (unsigned long)(val), sizeof(pcp)) + +#define _percpu_xchg(pcp, val) (typeof(pcp)) \ + (__percpu_xchg(raw_cpu_ptr(&(pcp)), (unsigned long)(val), sizeof(pcp))) + +#define this_cpu_add_1(pcp, val) _percpu_add(pcp, val) +#define this_cpu_add_2(pcp, val) _percpu_add(pcp, val) +#define this_cpu_add_4(pcp, val) _percpu_add(pcp, val) +#define this_cpu_add_8(pcp, val) _percpu_add(pcp, val) + +#define this_cpu_add_return_1(pcp, val) _percpu_add_return(pcp, val) +#define this_cpu_add_return_2(pcp, val) _percpu_add_return(pcp, val) +#define this_cpu_add_return_4(pcp, val) _percpu_add_return(pcp, val) +#define this_cpu_add_return_8(pcp, val) _percpu_add_return(pcp, val) + +#define this_cpu_and_1(pcp, val) _percpu_and(pcp, val) +#define this_cpu_and_2(pcp, val) _percpu_and(pcp, val) +#define this_cpu_and_4(pcp, val) _percpu_and(pcp, val) +#define this_cpu_and_8(pcp, val) _percpu_and(pcp, val) + +#define this_cpu_or_1(pcp, val) _percpu_or(pcp, val) +#define this_cpu_or_2(pcp, val) _percpu_or(pcp, val) +#define this_cpu_or_4(pcp, val) _percpu_or(pcp, val) +#define this_cpu_or_8(pcp, val) _percpu_or(pcp, val) + +#define this_cpu_read_1(pcp) _percpu_read(pcp) +#define this_cpu_read_2(pcp) _percpu_read(pcp) +#define this_cpu_read_4(pcp) _percpu_read(pcp) +#define this_cpu_read_8(pcp) _percpu_read(pcp) + +#define this_cpu_write_1(pcp, val) _percpu_write(pcp, val) +#define this_cpu_write_2(pcp, val) _percpu_write(pcp, val) +#define this_cpu_write_4(pcp, val) _percpu_write(pcp, val) +#define this_cpu_write_8(pcp, val) _percpu_write(pcp, val) + +#define this_cpu_xchg_1(pcp, val) _percpu_xchg(pcp, val) +#define this_cpu_xchg_2(pcp, val) _percpu_xchg(pcp, val) +#define this_cpu_xchg_4(pcp, val) _percpu_xchg(pcp, val) +#define this_cpu_xchg_8(pcp, val) _percpu_xchg(pcp, val) + #include #endif /* __ASM_PERCPU_H */ -- cgit v1.2.3 From 9b79f52d1a702dd5b160f9d2ee0199c3122809bb Mon Sep 17 00:00:00 2001 From: Punit Agrawal Date: Tue, 18 Nov 2014 11:41:22 +0000 Subject: arm64: Add support for hooks to handle undefined instructions Add support to register hooks for undefined instructions. The handlers will be called when the undefined instruction and the processor state (as contained in pstate) match criteria used at registration. Signed-off-by: Punit Agrawal Reviewed-by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/include/asm/insn.h | 2 ++ arch/arm64/include/asm/traps.h | 16 ++++++++++ arch/arm64/kernel/insn.c | 5 ++++ arch/arm64/kernel/traps.c | 66 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 89 insertions(+) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h index 56a9e63b6c33..1bb043018315 100644 --- a/arch/arm64/include/asm/insn.h +++ b/arch/arm64/include/asm/insn.h @@ -354,6 +354,8 @@ bool aarch64_insn_hotpatch_safe(u32 old_insn, u32 new_insn); int aarch64_insn_patch_text_nosync(void *addr, u32 insn); int aarch64_insn_patch_text_sync(void *addrs[], u32 insns[], int cnt); int aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt); + +bool aarch32_insn_is_wide(u32 insn); #endif /* __ASSEMBLY__ */ #endif /* __ASM_INSN_H */ diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h index 10ca8ff93cc2..232e4ba5d314 100644 --- a/arch/arm64/include/asm/traps.h +++ b/arch/arm64/include/asm/traps.h @@ -18,6 +18,22 @@ #ifndef __ASM_TRAP_H #define __ASM_TRAP_H +#include + +struct pt_regs; + +struct undef_hook { + struct list_head node; + u32 instr_mask; + u32 instr_val; + u64 pstate_mask; + u64 pstate_val; + int (*fn)(struct pt_regs *regs, u32 instr); +}; + +void register_undef_hook(struct undef_hook *hook); +void unregister_undef_hook(struct undef_hook *hook); + static inline int in_exception_text(unsigned long ptr) { extern char __exception_text_start[]; diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index e007714ded04..ab00eb58d385 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -959,3 +959,8 @@ u32 aarch64_insn_gen_logical_shifted_reg(enum aarch64_insn_register dst, return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_6, insn, shift); } + +bool aarch32_insn_is_wide(u32 insn) +{ + return insn >= 0xe800; +} diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index de1b085e7963..0a801e3743d5 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -259,6 +259,69 @@ void arm64_notify_die(const char *str, struct pt_regs *regs, } } +static LIST_HEAD(undef_hook); +static DEFINE_RAW_SPINLOCK(undef_lock); + +void register_undef_hook(struct undef_hook *hook) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&undef_lock, flags); + list_add(&hook->node, &undef_hook); + raw_spin_unlock_irqrestore(&undef_lock, flags); +} + +void unregister_undef_hook(struct undef_hook *hook) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&undef_lock, flags); + list_del(&hook->node); + raw_spin_unlock_irqrestore(&undef_lock, flags); +} + +static int call_undef_hook(struct pt_regs *regs) +{ + struct undef_hook *hook; + unsigned long flags; + u32 instr; + int (*fn)(struct pt_regs *regs, u32 instr) = NULL; + void __user *pc = (void __user *)instruction_pointer(regs); + + if (!user_mode(regs)) + return 1; + + if (compat_thumb_mode(regs)) { + /* 16-bit Thumb instruction */ + if (get_user(instr, (u16 __user *)pc)) + goto exit; + instr = le16_to_cpu(instr); + if (aarch32_insn_is_wide(instr)) { + u32 instr2; + + if (get_user(instr2, (u16 __user *)(pc + 2))) + goto exit; + instr2 = le16_to_cpu(instr2); + instr = (instr << 16) | instr2; + } + } else { + /* 32-bit ARM instruction */ + if (get_user(instr, (u32 __user *)pc)) + goto exit; + instr = le32_to_cpu(instr); + } + + raw_spin_lock_irqsave(&undef_lock, flags); + list_for_each_entry(hook, &undef_hook, node) + if ((instr & hook->instr_mask) == hook->instr_val && + (regs->pstate & hook->pstate_mask) == hook->pstate_val) + fn = hook->fn; + + raw_spin_unlock_irqrestore(&undef_lock, flags); +exit: + return fn ? fn(regs, instr) : 1; +} + asmlinkage void __exception do_undefinstr(struct pt_regs *regs) { siginfo_t info; @@ -268,6 +331,9 @@ asmlinkage void __exception do_undefinstr(struct pt_regs *regs) if (!aarch32_break_handler(regs)) return; + if (call_undef_hook(regs) == 0) + return; + if (show_unhandled_signals && unhandled_signal(current, SIGILL) && printk_ratelimit()) { pr_info("%s[%d]: undefined instruction: pc=%p\n", -- cgit v1.2.3 From 0be0e44c182c4f13df13903fd1377671d157d7b7 Mon Sep 17 00:00:00 2001 From: Punit Agrawal Date: Tue, 18 Nov 2014 11:41:23 +0000 Subject: arm64: Add AArch32 instruction set condition code checks Port support for AArch32 instruction condition code checking from arm to arm64. Signed-off-by: Punit Agrawal Reviewed-by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/include/asm/opcodes.h | 1 + arch/arm64/kernel/Makefile | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) create mode 100644 arch/arm64/include/asm/opcodes.h (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/opcodes.h b/arch/arm64/include/asm/opcodes.h new file mode 100644 index 000000000000..4e603ea36ad3 --- /dev/null +++ b/arch/arm64/include/asm/opcodes.h @@ -0,0 +1 @@ +#include <../../arm/include/asm/opcodes.h> diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 5bd029b43644..a4d8671d6d11 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -18,7 +18,8 @@ arm64-obj-y := cputable.o debug-monitors.o entry.o irq.o fpsimd.o \ cpuinfo.o arm64-obj-$(CONFIG_COMPAT) += sys32.o kuser32.o signal32.o \ - sys_compat.o + sys_compat.o \ + ../../arm/kernel/opcodes.o arm64-obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o entry-ftrace.o arm64-obj-$(CONFIG_MODULES) += arm64ksyms.o module.o arm64-obj-$(CONFIG_SMP) += smp.o smp_spin_table.o topology.o -- cgit v1.2.3 From bd35a4adc4131c530ec7d90242555eac7b3dbe3f Mon Sep 17 00:00:00 2001 From: Punit Agrawal Date: Tue, 18 Nov 2014 11:41:25 +0000 Subject: arm64: Port SWP/SWPB emulation support from arm The SWP instruction was deprecated in the ARMv6 architecture. The ARMv7 multiprocessing extensions mandate that SWP/SWPB instructions are treated as undefined from reset, with the ability to enable them through the System Control Register SW bit. With ARMv8, the option to enable these instructions through System Control Register was dropped as well. To support legacy applications using these instructions, port the emulation of the SWP and SWPB instructions from the arm port to arm64. Reviewed-by: Catalin Marinas Signed-off-by: Punit Agrawal Signed-off-by: Will Deacon --- Documentation/arm64/legacy_instructions.txt | 7 + arch/arm64/Kconfig | 21 +++ arch/arm64/include/asm/insn.h | 6 + arch/arm64/kernel/armv8_deprecated.c | 191 ++++++++++++++++++++++++++++ arch/arm64/kernel/insn.c | 8 ++ 5 files changed, 233 insertions(+) (limited to 'arch/arm64/include') diff --git a/Documentation/arm64/legacy_instructions.txt b/Documentation/arm64/legacy_instructions.txt index 49d4867c0c09..5ab58614b7ed 100644 --- a/Documentation/arm64/legacy_instructions.txt +++ b/Documentation/arm64/legacy_instructions.txt @@ -31,3 +31,10 @@ behaviours and the corresponding values of the sysctl nodes - The default mode depends on the status of the instruction in the architecture. Deprecated instructions should default to emulation while obsolete instructions must be undefined by default. + +Supported legacy instructions +----------------------------- +* SWP{B} +Node: /proc/sys/abi/swp +Status: Obsolete +Default: Undef (0) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index aa8f4bea3738..2b6213840ec8 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -180,6 +180,27 @@ menuconfig ARMV8_DEPRECATED if ARMV8_DEPRECATED +config SWP_EMULATION + bool "Emulate SWP/SWPB instructions" + help + ARMv8 obsoletes the use of A32 SWP/SWPB instructions such that + they are always undefined. Say Y here to enable software + emulation of these instructions for userspace using LDXR/STXR. + + In some older versions of glibc [<=2.8] SWP is used during futex + trylock() operations with the assumption that the code will not + be preempted. This invalid assumption may be more likely to fail + with SWP emulation enabled, leading to deadlock of the user + application. + + NOTE: when accessing uncached shared regions, LDXR/STXR rely + on an external transaction monitoring block called a global + monitor to maintain update atomicity. If your system does not + implement a global monitor, this option can cause programs that + perform SWP operations to uncached memory to deadlock. + + If unsure, say Y + endif endmenu diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h index 1bb043018315..3ecc57c47c04 100644 --- a/arch/arm64/include/asm/insn.h +++ b/arch/arm64/include/asm/insn.h @@ -356,6 +356,12 @@ int aarch64_insn_patch_text_sync(void *addrs[], u32 insns[], int cnt); int aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt); bool aarch32_insn_is_wide(u32 insn); + +#define A32_RN_OFFSET 16 +#define A32_RT_OFFSET 12 +#define A32_RT2_OFFSET 0 + +u32 aarch32_insn_extract_reg_num(u32 insn, int offset); #endif /* __ASSEMBLY__ */ #endif /* __ASM_INSN_H */ diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index db3b79da6a48..98865a7868f1 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -8,10 +8,16 @@ #include #include +#include +#include #include #include +#include +#include +#include #include +#include /* * The runtime support for deprecated instruction support can be in one of @@ -203,11 +209,196 @@ static void register_insn_emulation_sysctl(struct ctl_table *table) register_sysctl_table(table); } +/* + * Implement emulation of the SWP/SWPB instructions using load-exclusive and + * store-exclusive. + * + * Syntax of SWP{B} instruction: SWP{B} , , [] + * Where: Rt = destination + * Rt2 = source + * Rn = address + */ + +/* + * Error-checking SWP macros implemented using ldxr{b}/stxr{b} + */ +#define __user_swpX_asm(data, addr, res, temp, B) \ + __asm__ __volatile__( \ + " mov %w2, %w1\n" \ + "0: ldxr"B" %w1, [%3]\n" \ + "1: stxr"B" %w0, %w2, [%3]\n" \ + " cbz %w0, 2f\n" \ + " mov %w0, %w4\n" \ + "2:\n" \ + " .pushsection .fixup,\"ax\"\n" \ + " .align 2\n" \ + "3: mov %w0, %w5\n" \ + " b 2b\n" \ + " .popsection" \ + " .pushsection __ex_table,\"a\"\n" \ + " .align 3\n" \ + " .quad 0b, 3b\n" \ + " .quad 1b, 3b\n" \ + " .popsection" \ + : "=&r" (res), "+r" (data), "=&r" (temp) \ + : "r" (addr), "i" (-EAGAIN), "i" (-EFAULT) \ + : "memory") + +#define __user_swp_asm(data, addr, res, temp) \ + __user_swpX_asm(data, addr, res, temp, "") +#define __user_swpb_asm(data, addr, res, temp) \ + __user_swpX_asm(data, addr, res, temp, "b") + +/* + * Bit 22 of the instruction encoding distinguishes between + * the SWP and SWPB variants (bit set means SWPB). + */ +#define TYPE_SWPB (1 << 22) + +/* + * Set up process info to signal segmentation fault - called on access error. + */ +static void set_segfault(struct pt_regs *regs, unsigned long addr) +{ + siginfo_t info; + + down_read(¤t->mm->mmap_sem); + if (find_vma(current->mm, addr) == NULL) + info.si_code = SEGV_MAPERR; + else + info.si_code = SEGV_ACCERR; + up_read(¤t->mm->mmap_sem); + + info.si_signo = SIGSEGV; + info.si_errno = 0; + info.si_addr = (void *) instruction_pointer(regs); + + pr_debug("SWP{B} emulation: access caused memory abort!\n"); + arm64_notify_die("Illegal memory access", regs, &info, 0); +} + +static int emulate_swpX(unsigned int address, unsigned int *data, + unsigned int type) +{ + unsigned int res = 0; + + if ((type != TYPE_SWPB) && (address & 0x3)) { + /* SWP to unaligned address not permitted */ + pr_debug("SWP instruction on unaligned pointer!\n"); + return -EFAULT; + } + + while (1) { + unsigned long temp; + + if (type == TYPE_SWPB) + __user_swpb_asm(*data, address, res, temp); + else + __user_swp_asm(*data, address, res, temp); + + if (likely(res != -EAGAIN) || signal_pending(current)) + break; + + cond_resched(); + } + + return res; +} + +/* + * swp_handler logs the id of calling process, dissects the instruction, sanity + * checks the memory location, calls emulate_swpX for the actual operation and + * deals with fixup/error handling before returning + */ +static int swp_handler(struct pt_regs *regs, u32 instr) +{ + u32 destreg, data, type, address = 0; + int rn, rt2, res = 0; + + perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, regs->pc); + + type = instr & TYPE_SWPB; + + switch (arm_check_condition(instr, regs->pstate)) { + case ARM_OPCODE_CONDTEST_PASS: + break; + case ARM_OPCODE_CONDTEST_FAIL: + /* Condition failed - return to next instruction */ + goto ret; + case ARM_OPCODE_CONDTEST_UNCOND: + /* If unconditional encoding - not a SWP, undef */ + return -EFAULT; + default: + return -EINVAL; + } + + rn = aarch32_insn_extract_reg_num(instr, A32_RN_OFFSET); + rt2 = aarch32_insn_extract_reg_num(instr, A32_RT2_OFFSET); + + address = (u32)regs->user_regs.regs[rn]; + data = (u32)regs->user_regs.regs[rt2]; + destreg = aarch32_insn_extract_reg_num(instr, A32_RT_OFFSET); + + pr_debug("addr in r%d->0x%08x, dest is r%d, source in r%d->0x%08x)\n", + rn, address, destreg, + aarch32_insn_extract_reg_num(instr, A32_RT2_OFFSET), data); + + /* Check access in reasonable access range for both SWP and SWPB */ + if (!access_ok(VERIFY_WRITE, (address & ~3), 4)) { + pr_debug("SWP{B} emulation: access to 0x%08x not allowed!\n", + address); + goto fault; + } + + res = emulate_swpX(address, &data, type); + if (res == -EFAULT) + goto fault; + else if (res == 0) + regs->user_regs.regs[destreg] = data; + +ret: + pr_warn_ratelimited("\"%s\" (%ld) uses obsolete SWP{B} instruction at 0x%llx\n", + current->comm, (unsigned long)current->pid, regs->pc); + + regs->pc += 4; + return 0; + +fault: + set_segfault(regs, address); + + return 0; +} + +/* + * Only emulate SWP/SWPB executed in ARM state/User mode. + * The kernel must be SWP free and SWP{B} does not exist in Thumb. + */ +static struct undef_hook swp_hooks[] = { + { + .instr_mask = 0x0fb00ff0, + .instr_val = 0x01000090, + .pstate_mask = COMPAT_PSR_MODE_MASK, + .pstate_val = COMPAT_PSR_MODE_USR, + .fn = swp_handler + }, + { } +}; + +static struct insn_emulation_ops swp_ops = { + .name = "swp", + .status = INSN_OBSOLETE, + .hooks = swp_hooks, + .set_hw_mode = NULL, +}; + /* * Invoked as late_initcall, since not needed before init spawned. */ static int __init armv8_deprecated_init(void) { + if (IS_ENABLED(CONFIG_SWP_EMULATION)) + register_insn_emulation(&swp_ops); + register_insn_emulation_sysctl(ctl_abi); return 0; diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index ab00eb58d385..63122dcd8524 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -964,3 +964,11 @@ bool aarch32_insn_is_wide(u32 insn) { return insn >= 0xe800; } + +/* + * Macros/defines for extracting register numbers from instruction. + */ +u32 aarch32_insn_extract_reg_num(u32 insn, int offset) +{ + return (insn & (0xf << offset)) >> offset; +} -- cgit v1.2.3 From c852f320584600a372646055d8229e063949eee7 Mon Sep 17 00:00:00 2001 From: Punit Agrawal Date: Tue, 18 Nov 2014 11:41:26 +0000 Subject: arm64: Emulate CP15 Barrier instructions The CP15 barrier instructions (CP15ISB, CP15DSB and CP15DMB) are deprecated in the ARMv7 architecture, superseded by ISB, DSB and DMB instructions respectively. Some implementations may provide the ability to disable the CP15 barriers by disabling the CP15BEN bit in SCTLR_EL1. If not enabled, the encodings for these instructions become undefined. To support legacy software using these instructions, this patch register hooks to - * emulate CP15 barriers and warn the user about their use * toggle CP15BEN in SCTLR_EL1 Signed-off-by: Punit Agrawal Reviewed-by: Catalin Marinas Signed-off-by: Will Deacon --- Documentation/arm64/legacy_instructions.txt | 5 ++ arch/arm64/Kconfig | 15 ++++ arch/arm64/include/asm/insn.h | 2 + arch/arm64/kernel/armv8_deprecated.c | 131 ++++++++++++++++++++++++++++ arch/arm64/kernel/insn.c | 13 +++ 5 files changed, 166 insertions(+) (limited to 'arch/arm64/include') diff --git a/Documentation/arm64/legacy_instructions.txt b/Documentation/arm64/legacy_instructions.txt index 5ab58614b7ed..a3b3da2ec6ed 100644 --- a/Documentation/arm64/legacy_instructions.txt +++ b/Documentation/arm64/legacy_instructions.txt @@ -38,3 +38,8 @@ Supported legacy instructions Node: /proc/sys/abi/swp Status: Obsolete Default: Undef (0) + +* CP15 Barriers +Node: /proc/sys/abi/cp15_barrier +Status: Deprecated +Default: Emulate (1) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 2b6213840ec8..8e74dfe1e74d 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -201,6 +201,21 @@ config SWP_EMULATION If unsure, say Y +config CP15_BARRIER_EMULATION + bool "Emulate CP15 Barrier instructions" + help + The CP15 barrier instructions - CP15ISB, CP15DSB, and + CP15DMB - are deprecated in ARMv8 (and ARMv7). It is + strongly recommended to use the ISB, DSB, and DMB + instructions instead. + + Say Y here to enable software emulation of these + instructions for AArch32 userspace code. When this option is + enabled, CP15 barrier usage is traced which can help + identify software that needs updating. + + If unsure, say Y + endif endmenu diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h index 3ecc57c47c04..e2ff32a93b5c 100644 --- a/arch/arm64/include/asm/insn.h +++ b/arch/arm64/include/asm/insn.h @@ -362,6 +362,8 @@ bool aarch32_insn_is_wide(u32 insn); #define A32_RT2_OFFSET 0 u32 aarch32_insn_extract_reg_num(u32 insn, int offset); +u32 aarch32_insn_mcr_extract_opc2(u32 insn); +u32 aarch32_insn_mcr_extract_crm(u32 insn); #endif /* __ASSEMBLY__ */ #endif /* __ASM_INSN_H */ diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index 98865a7868f1..401c2e544ede 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -6,6 +6,7 @@ * published by the Free Software Foundation. */ +#include #include #include #include @@ -391,6 +392,133 @@ static struct insn_emulation_ops swp_ops = { .set_hw_mode = NULL, }; +static int cp15barrier_handler(struct pt_regs *regs, u32 instr) +{ + perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, regs->pc); + + switch (arm_check_condition(instr, regs->pstate)) { + case ARM_OPCODE_CONDTEST_PASS: + break; + case ARM_OPCODE_CONDTEST_FAIL: + /* Condition failed - return to next instruction */ + goto ret; + case ARM_OPCODE_CONDTEST_UNCOND: + /* If unconditional encoding - not a barrier instruction */ + return -EFAULT; + default: + return -EINVAL; + } + + switch (aarch32_insn_mcr_extract_crm(instr)) { + case 10: + /* + * dmb - mcr p15, 0, Rt, c7, c10, 5 + * dsb - mcr p15, 0, Rt, c7, c10, 4 + */ + if (aarch32_insn_mcr_extract_opc2(instr) == 5) + dmb(sy); + else + dsb(sy); + break; + case 5: + /* + * isb - mcr p15, 0, Rt, c7, c5, 4 + * + * Taking an exception or returning from one acts as an + * instruction barrier. So no explicit barrier needed here. + */ + break; + } + +ret: + pr_warn_ratelimited("\"%s\" (%ld) uses deprecated CP15 Barrier instruction at 0x%llx\n", + current->comm, (unsigned long)current->pid, regs->pc); + + regs->pc += 4; + return 0; +} + +#define SCTLR_EL1_CP15BEN (1 << 5) + +static inline void config_sctlr_el1(u32 clear, u32 set) +{ + u32 val; + + asm volatile("mrs %0, sctlr_el1" : "=r" (val)); + val &= ~clear; + val |= set; + asm volatile("msr sctlr_el1, %0" : : "r" (val)); +} + +static void enable_cp15_ben(void *info) +{ + config_sctlr_el1(0, SCTLR_EL1_CP15BEN); +} + +static void disable_cp15_ben(void *info) +{ + config_sctlr_el1(SCTLR_EL1_CP15BEN, 0); +} + +static int cpu_hotplug_notify(struct notifier_block *b, + unsigned long action, void *hcpu) +{ + switch (action) { + case CPU_STARTING: + case CPU_STARTING_FROZEN: + enable_cp15_ben(NULL); + return NOTIFY_DONE; + case CPU_DYING: + case CPU_DYING_FROZEN: + disable_cp15_ben(NULL); + return NOTIFY_DONE; + } + + return NOTIFY_OK; +} + +static struct notifier_block cpu_hotplug_notifier = { + .notifier_call = cpu_hotplug_notify, +}; + +static int cp15_barrier_set_hw_mode(bool enable) +{ + if (enable) { + register_cpu_notifier(&cpu_hotplug_notifier); + on_each_cpu(enable_cp15_ben, NULL, true); + } else { + unregister_cpu_notifier(&cpu_hotplug_notifier); + on_each_cpu(disable_cp15_ben, NULL, true); + } + + return true; +} + +static struct undef_hook cp15_barrier_hooks[] = { + { + .instr_mask = 0x0fff0fdf, + .instr_val = 0x0e070f9a, + .pstate_mask = COMPAT_PSR_MODE_MASK, + .pstate_val = COMPAT_PSR_MODE_USR, + .fn = cp15barrier_handler, + }, + { + .instr_mask = 0x0fff0fff, + .instr_val = 0x0e070f95, + .pstate_mask = COMPAT_PSR_MODE_MASK, + .pstate_val = COMPAT_PSR_MODE_USR, + .fn = cp15barrier_handler, + }, + { } +}; + +static struct insn_emulation_ops cp15_barrier_ops = { + .name = "cp15_barrier", + .status = INSN_DEPRECATED, + .hooks = cp15_barrier_hooks, + .set_hw_mode = cp15_barrier_set_hw_mode, +}; + /* * Invoked as late_initcall, since not needed before init spawned. */ @@ -399,6 +527,9 @@ static int __init armv8_deprecated_init(void) if (IS_ENABLED(CONFIG_SWP_EMULATION)) register_insn_emulation(&swp_ops); + if (IS_ENABLED(CONFIG_CP15_BARRIER_EMULATION)) + register_insn_emulation(&cp15_barrier_ops); + register_insn_emulation_sysctl(ctl_abi); return 0; diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index 63122dcd8524..819e409029ce 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -972,3 +972,16 @@ u32 aarch32_insn_extract_reg_num(u32 insn, int offset) { return (insn & (0xf << offset)) >> offset; } + +#define OPC2_MASK 0x7 +#define OPC2_OFFSET 5 +u32 aarch32_insn_mcr_extract_opc2(u32 insn) +{ + return (insn & (OPC2_MASK << OPC2_OFFSET)) >> OPC2_OFFSET; +} + +#define CRM_MASK 0xf +u32 aarch32_insn_mcr_extract_crm(u32 insn) +{ + return insn & CRM_MASK; +} -- cgit v1.2.3 From 930da09f5e50dd22fb0a8600388da8677d62d671 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Fri, 14 Nov 2014 15:54:07 +0000 Subject: arm64: add cpu_capabilities bitmap For taking note if at least one CPU in the system needs a bug workaround or would benefit from a code optimization, we create a new bitmap to hold (artificial) feature bits. Since elf_hwcap is part of the userland ABI, we keep it alone and introduce a new data structure for that (along with some accessors). Signed-off-by: Andre Przywara Signed-off-by: Will Deacon --- arch/arm64/include/asm/cpufeature.h | 20 ++++++++++++++++++++ arch/arm64/kernel/setup.c | 3 +++ 2 files changed, 23 insertions(+) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index cd4ac0516488..20b2b3d6b702 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -21,9 +21,29 @@ #define MAX_CPU_FEATURES (8 * sizeof(elf_hwcap)) #define cpu_feature(x) ilog2(HWCAP_ ## x) +#define NCAPS 0 + +extern DECLARE_BITMAP(cpu_hwcaps, NCAPS); + static inline bool cpu_have_feature(unsigned int num) { return elf_hwcap & (1UL << num); } +static inline bool cpus_have_cap(unsigned int num) +{ + if (num >= NCAPS) + return false; + return test_bit(num, cpu_hwcaps); +} + +static inline void cpus_set_cap(unsigned int num) +{ + if (num >= NCAPS) + pr_warn("Attempt to set an illegal CPU capability (%d >= %d)\n", + num, NCAPS); + else + __set_bit(num, cpu_hwcaps); +} + #endif diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 2e17bd3806c8..c8629eb07ba6 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -50,6 +50,7 @@ #include #include #include +#include #include #include #include @@ -79,6 +80,8 @@ unsigned int compat_elf_hwcap __read_mostly = COMPAT_ELF_HWCAP_DEFAULT; unsigned int compat_elf_hwcap2 __read_mostly; #endif +DECLARE_BITMAP(cpu_hwcaps, NCAPS); + static const char *cpu_name; phys_addr_t __fdt_pointer __initdata; -- cgit v1.2.3 From e039ee4ee3fcf174736f2cb0a2eed6cb908348a6 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Fri, 14 Nov 2014 15:54:08 +0000 Subject: arm64: add alternative runtime patching With a blatant copy of some x86 bits we introduce the alternative runtime patching "framework" to arm64. This is quite basic for now and we only provide the functions we need at this time. This is connected to the newly introduced feature bits. Signed-off-by: Andre Przywara Signed-off-by: Will Deacon --- arch/arm64/include/asm/alternative-asm.h | 16 ++++++++ arch/arm64/include/asm/alternative.h | 43 +++++++++++++++++++++ arch/arm64/kernel/Makefile | 2 +- arch/arm64/kernel/alternative.c | 64 ++++++++++++++++++++++++++++++++ arch/arm64/kernel/smp.c | 2 + arch/arm64/kernel/vmlinux.lds.S | 11 ++++++ arch/arm64/mm/init.c | 2 + 7 files changed, 139 insertions(+), 1 deletion(-) create mode 100644 arch/arm64/include/asm/alternative-asm.h create mode 100644 arch/arm64/include/asm/alternative.h create mode 100644 arch/arm64/kernel/alternative.c (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/alternative-asm.h b/arch/arm64/include/asm/alternative-asm.h new file mode 100644 index 000000000000..5ee9340459b8 --- /dev/null +++ b/arch/arm64/include/asm/alternative-asm.h @@ -0,0 +1,16 @@ +#ifndef __ASM_ALTERNATIVE_ASM_H +#define __ASM_ALTERNATIVE_ASM_H + +#ifdef __ASSEMBLY__ + +.macro altinstruction_entry orig_offset alt_offset feature orig_len alt_len + .word \orig_offset - . + .word \alt_offset - . + .hword \feature + .byte \orig_len + .byte \alt_len +.endm + +#endif /* __ASSEMBLY__ */ + +#endif /* __ASM_ALTERNATIVE_ASM_H */ diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h new file mode 100644 index 000000000000..f6d206e7f9e9 --- /dev/null +++ b/arch/arm64/include/asm/alternative.h @@ -0,0 +1,43 @@ +#ifndef __ASM_ALTERNATIVE_H +#define __ASM_ALTERNATIVE_H + +#include +#include +#include + +struct alt_instr { + s32 orig_offset; /* offset to original instruction */ + s32 alt_offset; /* offset to replacement instruction */ + u16 cpufeature; /* cpufeature bit set for replacement */ + u8 orig_len; /* size of original instruction(s) */ + u8 alt_len; /* size of new instruction(s), <= orig_len */ +}; + +void apply_alternatives(void); +void free_alternatives_memory(void); + +#define ALTINSTR_ENTRY(feature) \ + " .word 661b - .\n" /* label */ \ + " .word 663f - .\n" /* new instruction */ \ + " .hword " __stringify(feature) "\n" /* feature bit */ \ + " .byte 662b-661b\n" /* source len */ \ + " .byte 664f-663f\n" /* replacement len */ + +/* alternative assembly primitive: */ +#define ALTERNATIVE(oldinstr, newinstr, feature) \ + "661:\n\t" \ + oldinstr "\n" \ + "662:\n" \ + ".pushsection .altinstructions,\"a\"\n" \ + ALTINSTR_ENTRY(feature) \ + ".popsection\n" \ + ".pushsection .altinstr_replacement, \"a\"\n" \ + "663:\n\t" \ + newinstr "\n" \ + "664:\n\t" \ + ".popsection\n\t" \ + ".if ((664b-663b) != (662b-661b))\n\t" \ + " .error \"Alternatives instruction length mismatch\"\n\t"\ + ".endif\n" + +#endif /* __ASM_ALTERNATIVE_H */ diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index b36ebd0aacbb..591a65dc5c9b 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -16,7 +16,7 @@ arm64-obj-y := cputable.o debug-monitors.o entry.o irq.o fpsimd.o \ entry-fpsimd.o process.o ptrace.o setup.o signal.o \ sys.o stacktrace.o time.o traps.o io.o vdso.o \ hyp-stub.o psci.o cpu_ops.o insn.o return_address.o \ - cpuinfo.o + cpuinfo.o alternative.o arm64-obj-$(CONFIG_COMPAT) += sys32.o kuser32.o signal32.o \ sys_compat.o \ diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c new file mode 100644 index 000000000000..1a3badab800a --- /dev/null +++ b/arch/arm64/kernel/alternative.c @@ -0,0 +1,64 @@ +/* + * alternative runtime patching + * inspired by the x86 version + * + * Copyright (C) 2014 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#define pr_fmt(fmt) "alternatives: " fmt + +#include +#include +#include +#include +#include +#include + +extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; + +static int __apply_alternatives(void *dummy) +{ + struct alt_instr *alt; + u8 *origptr, *replptr; + + for (alt = __alt_instructions; alt < __alt_instructions_end; alt++) { + if (!cpus_have_cap(alt->cpufeature)) + continue; + + BUG_ON(alt->alt_len > alt->orig_len); + + pr_info_once("patching kernel code\n"); + + origptr = (u8 *)&alt->orig_offset + alt->orig_offset; + replptr = (u8 *)&alt->alt_offset + alt->alt_offset; + memcpy(origptr, replptr, alt->alt_len); + flush_icache_range((uintptr_t)origptr, + (uintptr_t)(origptr + alt->alt_len)); + } + + return 0; +} + +void apply_alternatives(void) +{ + /* better not try code patching on a live SMP system */ + stop_machine(__apply_alternatives, NULL, NULL); +} + +void free_alternatives_memory(void) +{ + free_reserved_area(__alt_instructions, __alt_instructions_end, + 0, "alternatives"); +} diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index b06d1d90ee8c..0ef87896e4ae 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -37,6 +37,7 @@ #include #include +#include #include #include #include @@ -309,6 +310,7 @@ void cpu_die(void) void __init smp_cpus_done(unsigned int max_cpus) { pr_info("SMP: Total of %d processors activated.\n", num_online_cpus()); + apply_alternatives(); } void __init smp_prepare_boot_cpu(void) diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 4596f46d0244..3236727be2b9 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -116,6 +116,17 @@ SECTIONS . = ALIGN(PAGE_SIZE); __init_end = .; + . = ALIGN(4); + .altinstructions : { + __alt_instructions = .; + *(.altinstructions) + __alt_instructions_end = .; + } + .altinstr_replacement : { + *(.altinstr_replacement) + } + + . = ALIGN(PAGE_SIZE); _data = .; _sdata = .; RW_DATA_SECTION(64, PAGE_SIZE, THREAD_SIZE) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 494297c698ca..bac492c12fcc 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -39,6 +39,7 @@ #include #include #include +#include #include "mm.h" @@ -325,6 +326,7 @@ void __init mem_init(void) void free_initmem(void) { free_initmem_default(0); + free_alternatives_memory(); } #ifdef CONFIG_BLK_DEV_INITRD -- cgit v1.2.3 From e116a375423393cdb94714e90a96857005d58428 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Fri, 14 Nov 2014 15:54:09 +0000 Subject: arm64: detect silicon revisions and set cap bits accordingly After each CPU has been started, we iterate through a list of CPU features or bugs to detect CPUs which need (or could benefit from) kernel code patches. For each feature/bug there is a function which checks if that particular CPU is affected. We will later provide some more generic functions for common things like testing for certain MIDR ranges. We do this for every CPU to cover big.LITTLE systems properly as well. If a certain feature/bug has been detected, the capability bit will be set, so that later the call to apply_alternatives() will trigger the actual code patching. Signed-off-by: Andre Przywara Signed-off-by: Will Deacon --- arch/arm64/include/asm/cpufeature.h | 2 ++ arch/arm64/kernel/Makefile | 2 +- arch/arm64/kernel/cpu_errata.c | 59 +++++++++++++++++++++++++++++++++++++ arch/arm64/kernel/cpuinfo.c | 3 ++ 4 files changed, 65 insertions(+), 1 deletion(-) create mode 100644 arch/arm64/kernel/cpu_errata.c (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 20b2b3d6b702..744eaf7fab0f 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -46,4 +46,6 @@ static inline void cpus_set_cap(unsigned int num) __set_bit(num, cpu_hwcaps); } +void check_local_cpu_errata(void); + #endif diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 591a65dc5c9b..eaa77ed7766a 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -16,7 +16,7 @@ arm64-obj-y := cputable.o debug-monitors.o entry.o irq.o fpsimd.o \ entry-fpsimd.o process.o ptrace.o setup.o signal.o \ sys.o stacktrace.o time.o traps.o io.o vdso.o \ hyp-stub.o psci.o cpu_ops.o insn.o return_address.o \ - cpuinfo.o alternative.o + cpuinfo.o cpu_errata.o alternative.o arm64-obj-$(CONFIG_COMPAT) += sys32.o kuser32.o signal32.o \ sys_compat.o \ diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c new file mode 100644 index 000000000000..9332cf7c0826 --- /dev/null +++ b/arch/arm64/kernel/cpu_errata.c @@ -0,0 +1,59 @@ +/* + * Contains CPU specific errata definitions + * + * Copyright (C) 2014 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#define pr_fmt(fmt) "alternative: " fmt + +#include +#include +#include +#include + +/* + * Add a struct or another datatype to the union below if you need + * different means to detect an affected CPU. + */ +struct arm64_cpu_capabilities { + const char *desc; + u16 capability; + bool (*is_affected)(struct arm64_cpu_capabilities *); + union { + struct { + u32 midr_model; + u32 midr_range_min, midr_range_max; + }; + }; +}; + +struct arm64_cpu_capabilities arm64_errata[] = { + {} +}; + +void check_local_cpu_errata(void) +{ + struct arm64_cpu_capabilities *cpus = arm64_errata; + int i; + + for (i = 0; cpus[i].desc; i++) { + if (!cpus[i].is_affected(&cpus[i])) + continue; + + if (!cpus_have_cap(cpus[i].capability)) + pr_info("enabling workaround for %s\n", cpus[i].desc); + cpus_set_cap(cpus[i].capability); + } +} diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 504fdaa8367e..16d6d032ecf1 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -186,6 +187,8 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) info->reg_id_pfr1 = read_cpuid(ID_PFR1_EL1); cpuinfo_detect_icache_policy(info); + + check_local_cpu_errata(); } void cpuinfo_store_cpu(void) -- cgit v1.2.3 From 301bcfac42897dbd1b0b3c1be49f24654a1bc49e Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Fri, 14 Nov 2014 15:54:10 +0000 Subject: arm64: add Cortex-A53 cache errata workaround The ARM errata 819472, 826319, 827319 and 824069 define the same workaround for these hardware issues in certain Cortex-A53 parts. Use the new alternatives framework and the CPU MIDR detection to patch "cache clean" into "cache clean and invalidate" instructions if an affected CPU is detected at runtime. Signed-off-by: Andre Przywara [will: add __maybe_unused to squash gcc warning] Signed-off-by: Will Deacon --- arch/arm64/include/asm/alternative-asm.h | 13 +++++++++++++ arch/arm64/include/asm/cpufeature.h | 8 +++++++- arch/arm64/include/asm/cputype.h | 5 +++++ arch/arm64/kernel/cpu_errata.c | 33 +++++++++++++++++++++++++++++++- arch/arm64/mm/cache.S | 4 +++- 5 files changed, 60 insertions(+), 3 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/alternative-asm.h b/arch/arm64/include/asm/alternative-asm.h index 5ee9340459b8..919a67855b63 100644 --- a/arch/arm64/include/asm/alternative-asm.h +++ b/arch/arm64/include/asm/alternative-asm.h @@ -11,6 +11,19 @@ .byte \alt_len .endm +.macro alternative_insn insn1 insn2 cap +661: \insn1 +662: .pushsection .altinstructions, "a" + altinstruction_entry 661b, 663f, \cap, 662b-661b, 664f-663f + .popsection + .pushsection .altinstr_replacement, "ax" +663: \insn2 +664: .popsection + .if ((664b-663b) != (662b-661b)) + .error "Alternatives instruction length mismatch" + .endif +.endm + #endif /* __ASSEMBLY__ */ #endif /* __ASM_ALTERNATIVE_ASM_H */ diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 744eaf7fab0f..92b6ee44669b 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -21,7 +21,11 @@ #define MAX_CPU_FEATURES (8 * sizeof(elf_hwcap)) #define cpu_feature(x) ilog2(HWCAP_ ## x) -#define NCAPS 0 +#define ARM64_WORKAROUND_CLEAN_CACHE 0 + +#define NCAPS 1 + +#ifndef __ASSEMBLY__ extern DECLARE_BITMAP(cpu_hwcaps, NCAPS); @@ -48,4 +52,6 @@ static inline void cpus_set_cap(unsigned int num) void check_local_cpu_errata(void); +#endif /* __ASSEMBLY__ */ + #endif diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 379d0b874328..8adb986a3086 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -57,6 +57,11 @@ #define MIDR_IMPLEMENTOR(midr) \ (((midr) & MIDR_IMPLEMENTOR_MASK) >> MIDR_IMPLEMENTOR_SHIFT) +#define MIDR_CPU_PART(imp, partnum) \ + (((imp) << MIDR_IMPLEMENTOR_SHIFT) | \ + (0xf << MIDR_ARCHITECTURE_SHIFT) | \ + ((partnum) << MIDR_PARTNUM_SHIFT)) + #define ARM_CPU_IMP_ARM 0x41 #define ARM_CPU_IMP_APM 0x50 diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 9332cf7c0826..e107ed2d66dc 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -23,6 +23,8 @@ #include #include +#define MIDR_CORTEX_A53 MIDR_CPU_PART(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53) + /* * Add a struct or another datatype to the union below if you need * different means to detect an affected CPU. @@ -39,8 +41,37 @@ struct arm64_cpu_capabilities { }; }; +#define CPU_MODEL_MASK (MIDR_IMPLEMENTOR_MASK | MIDR_PARTNUM_MASK | \ + MIDR_ARCHITECTURE_MASK) + +static bool __maybe_unused +is_affected_midr_range(struct arm64_cpu_capabilities *entry) +{ + u32 midr = read_cpuid_id(); + + if ((midr & CPU_MODEL_MASK) != entry->midr_model) + return false; + + midr &= MIDR_REVISION_MASK | MIDR_VARIANT_MASK; + + return (midr >= entry->midr_range_min && midr <= entry->midr_range_max); +} + +#define MIDR_RANGE(model, min, max) \ + .is_affected = is_affected_midr_range, \ + .midr_model = model, \ + .midr_range_min = min, \ + .midr_range_max = max + struct arm64_cpu_capabilities arm64_errata[] = { - {} + { + /* Cortex-A53 r0p[012] */ + .desc = "ARM errata 826319, 827319, 824069", + .capability = ARM64_WORKAROUND_CLEAN_CACHE, + MIDR_RANGE(MIDR_CORTEX_A53, 0x00, 0x02), + }, + { + } }; void check_local_cpu_errata(void) diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index 23663837acff..8eaf18577d71 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -20,6 +20,8 @@ #include #include #include +#include +#include #include "proc-macros.S" @@ -210,7 +212,7 @@ __dma_clean_range: dcache_line_size x2, x3 sub x3, x2, #1 bic x0, x0, x3 -1: dc cvac, x0 // clean D / U line +1: alternative_insn "dc cvac, x0", "dc civac, x0", ARM64_WORKAROUND_CLEAN_CACHE add x0, x0, x2 cmp x0, x1 b.lo 1b -- cgit v1.2.3 From 5afaa1fc1b320cec48affa7e6949f2493f875c12 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Fri, 14 Nov 2014 15:54:11 +0000 Subject: arm64: add Cortex-A57 erratum 832075 workaround The ARM erratum 832075 applies to certain revisions of Cortex-A57, one of the workarounds is to change device loads into using load-aquire semantics. This is achieved using the alternatives framework. Signed-off-by: Andre Przywara Signed-off-by: Will Deacon --- arch/arm64/include/asm/cpufeature.h | 5 +++-- arch/arm64/include/asm/io.h | 23 +++++++++++++++++++---- arch/arm64/kernel/cpu_errata.c | 7 +++++++ 3 files changed, 29 insertions(+), 6 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 92b6ee44669b..0362f8020d46 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -21,9 +21,10 @@ #define MAX_CPU_FEATURES (8 * sizeof(elf_hwcap)) #define cpu_feature(x) ilog2(HWCAP_ ## x) -#define ARM64_WORKAROUND_CLEAN_CACHE 0 +#define ARM64_WORKAROUND_CLEAN_CACHE 0 +#define ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE 1 -#define NCAPS 1 +#define NCAPS 2 #ifndef __ASSEMBLY__ diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h index 79f1d519221f..75825b63464d 100644 --- a/arch/arm64/include/asm/io.h +++ b/arch/arm64/include/asm/io.h @@ -28,6 +28,8 @@ #include #include #include +#include +#include #include @@ -57,28 +59,41 @@ static inline void __raw_writeq(u64 val, volatile void __iomem *addr) static inline u8 __raw_readb(const volatile void __iomem *addr) { u8 val; - asm volatile("ldrb %w0, [%1]" : "=r" (val) : "r" (addr)); + asm volatile(ALTERNATIVE("ldrb %w0, [%1]", + "ldarb %w0, [%1]", + ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE) + : "=r" (val) : "r" (addr)); return val; } static inline u16 __raw_readw(const volatile void __iomem *addr) { u16 val; - asm volatile("ldrh %w0, [%1]" : "=r" (val) : "r" (addr)); + + asm volatile(ALTERNATIVE("ldrh %w0, [%1]", + "ldarh %w0, [%1]", + ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE) + : "=r" (val) : "r" (addr)); return val; } static inline u32 __raw_readl(const volatile void __iomem *addr) { u32 val; - asm volatile("ldr %w0, [%1]" : "=r" (val) : "r" (addr)); + asm volatile(ALTERNATIVE("ldr %w0, [%1]", + "ldar %w0, [%1]", + ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE) + : "=r" (val) : "r" (addr)); return val; } static inline u64 __raw_readq(const volatile void __iomem *addr) { u64 val; - asm volatile("ldr %0, [%1]" : "=r" (val) : "r" (addr)); + asm volatile(ALTERNATIVE("ldr %0, [%1]", + "ldar %0, [%1]", + ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE) + : "=r" (val) : "r" (addr)); return val; } diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index e107ed2d66dc..30935d2da55a 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -24,6 +24,7 @@ #include #define MIDR_CORTEX_A53 MIDR_CPU_PART(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53) +#define MIDR_CORTEX_A57 MIDR_CPU_PART(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57) /* * Add a struct or another datatype to the union below if you need @@ -71,6 +72,12 @@ struct arm64_cpu_capabilities arm64_errata[] = { MIDR_RANGE(MIDR_CORTEX_A53, 0x00, 0x02), }, { + /* Cortex-A57 r0p0 - r1p2 */ + .desc = "ARM erratum 832075", + .capability = ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE, + MIDR_RANGE(MIDR_CORTEX_A57, 0x00, 0x12), + }, + { } }; -- cgit v1.2.3 From 3eebdbe5fc7d64c7a6ef14cc5b8be518ffd563fa Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 25 Nov 2014 13:27:43 +0000 Subject: arm64: sanity checks: add ID_AA64DFR{0,1}_EL1 While we currently expect self-hosted debug support to be identical across CPUs, we don't currently sanity check this. This patch adds logging of the ID_AA64DFR{0,1}_EL1 values and associated sanity checking code. It's not clear to me whether we need to check PMUVer, TraceVer, and DebugVer, as we don't currently rely on these fields at all. Signed-off-by: Mark Rutland Cc: Catalin Marinas Acked-by: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/include/asm/cpu.h | 2 ++ arch/arm64/kernel/cpuinfo.c | 11 +++++++++++ 2 files changed, 13 insertions(+) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h index 056443086019..ace70682499b 100644 --- a/arch/arm64/include/asm/cpu.h +++ b/arch/arm64/include/asm/cpu.h @@ -30,6 +30,8 @@ struct cpuinfo_arm64 { u32 reg_dczid; u32 reg_midr; + u64 reg_id_aa64dfr0; + u64 reg_id_aa64dfr1; u64 reg_id_aa64isar0; u64 reg_id_aa64isar1; u64 reg_id_aa64mmfr0; diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 9da33f96b678..57b641747534 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -111,6 +111,15 @@ static void cpuinfo_sanity_check(struct cpuinfo_arm64 *cur) /* If different, timekeeping will be broken (especially with KVM) */ diff |= CHECK(cntfrq, boot, cur, cpu); + /* + * The kernel uses self-hosted debug features and expects CPUs to + * support identical debug features. We presently need CTX_CMPs, WRPs, + * and BRPs to be identical. + * ID_AA64DFR1 is currently RES0. + */ + diff |= CHECK(id_aa64dfr0, boot, cur, cpu); + diff |= CHECK(id_aa64dfr1, boot, cur, cpu); + /* * Even in big.LITTLE, processors should be identical instruction-set * wise. @@ -171,6 +180,8 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) info->reg_dczid = read_cpuid(DCZID_EL0); info->reg_midr = read_cpuid_id(); + info->reg_id_aa64dfr0 = read_cpuid(ID_AA64DFR0_EL1); + info->reg_id_aa64dfr1 = read_cpuid(ID_AA64DFR1_EL1); info->reg_id_aa64isar0 = read_cpuid(ID_AA64ISAR0_EL1); info->reg_id_aa64isar1 = read_cpuid(ID_AA64ISAR1_EL1); info->reg_id_aa64mmfr0 = read_cpuid(ID_AA64MMFR0_EL1); -- cgit v1.2.3 From fcff588633e848aa728a4437ef96d437299ba03d Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Fri, 21 Nov 2014 21:50:38 +0000 Subject: arm64: Treat handle_arch_irq as a function pointer handle_arch_irq isn't actually text, it's just a function pointer. It doesn't need to be stored in the text section and doing so causes problesm if we ever want to make the kernel text read only. Declare handle_arch_irq as a proper function pointer stored in the data section. Reviewed-by: Kees Cook Reviewed-by: Mark Rutland Acked-by: Ard Biesheuvel Tested-by: Mark Rutland Tested-by: Kees Cook Signed-off-by: Laura Abbott Signed-off-by: Will Deacon --- arch/arm64/include/asm/irq.h | 1 - arch/arm64/kernel/entry.S | 6 ++---- arch/arm64/kernel/irq.c | 2 ++ 3 files changed, 4 insertions(+), 5 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h index e1f7ecdde11f..1eebf5bb0b58 100644 --- a/arch/arm64/include/asm/irq.h +++ b/arch/arm64/include/asm/irq.h @@ -3,7 +3,6 @@ #include -extern void (*handle_arch_irq)(struct pt_regs *); extern void migrate_irqs(void); extern void set_handle_irq(void (*handle_irq)(struct pt_regs *)); diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 622a409916f3..99c8d13fc00d 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -167,7 +167,8 @@ tsk .req x28 // current thread_info * Interrupt handling. */ .macro irq_handler - ldr x1, handle_arch_irq + adrp x1, handle_arch_irq + ldr x1, [x1, #:lo12:handle_arch_irq] mov x0, sp blr x1 .endm @@ -699,6 +700,3 @@ ENTRY(sys_rt_sigreturn_wrapper) mov x0, sp b sys_rt_sigreturn ENDPROC(sys_rt_sigreturn_wrapper) - -ENTRY(handle_arch_irq) - .quad 0 diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c index 071a6ec13bd8..240b75c0e94f 100644 --- a/arch/arm64/kernel/irq.c +++ b/arch/arm64/kernel/irq.c @@ -40,6 +40,8 @@ int arch_show_interrupts(struct seq_file *p, int prec) return 0; } +void (*handle_arch_irq)(struct pt_regs *) = NULL; + void __init set_handle_irq(void (*handle_irq)(struct pt_regs *)) { if (handle_arch_irq) -- cgit v1.2.3 From af86e5974d3069bd26ebcf7c046c6e59726acaaa Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Fri, 21 Nov 2014 21:50:42 +0000 Subject: arm64: Factor out fixmap initialization from ioremap The fixmap API was originally added for arm64 for early_ioremap purposes. It can be used for other purposes too so move the initialization from ioremap to somewhere more generic. This makes it obvious where the fixmap is being set up and allows for a cleaner implementation of __set_fixmap. Reviewed-by: Kees Cook Acked-by: Mark Rutland Tested-by: Mark Rutland Tested-by: Kees Cook Signed-off-by: Laura Abbott Signed-off-by: Will Deacon --- arch/arm64/include/asm/fixmap.h | 7 +-- arch/arm64/kernel/setup.c | 1 + arch/arm64/mm/ioremap.c | 93 ++-------------------------------------- arch/arm64/mm/mmu.c | 94 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 102 insertions(+), 93 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h index 5f7bfe6df723..db26a2f2b7a7 100644 --- a/arch/arm64/include/asm/fixmap.h +++ b/arch/arm64/include/asm/fixmap.h @@ -56,10 +56,11 @@ enum fixed_addresses { #define FIXMAP_PAGE_IO __pgprot(PROT_DEVICE_nGnRE) -extern void __early_set_fixmap(enum fixed_addresses idx, - phys_addr_t phys, pgprot_t flags); +void __init early_fixmap_init(void); -#define __set_fixmap __early_set_fixmap +#define __early_set_fixmap __set_fixmap + +extern void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot); #include diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index c8629eb07ba6..1de0e8a895ee 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -384,6 +384,7 @@ void __init setup_arch(char **cmdline_p) *cmdline_p = boot_command_line; + early_fixmap_init(); early_ioremap_init(); parse_early_param(); diff --git a/arch/arm64/mm/ioremap.c b/arch/arm64/mm/ioremap.c index 4a07630a6616..cbb99c8f1e04 100644 --- a/arch/arm64/mm/ioremap.c +++ b/arch/arm64/mm/ioremap.c @@ -103,97 +103,10 @@ void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size) } EXPORT_SYMBOL(ioremap_cache); -static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss; -#if CONFIG_ARM64_PGTABLE_LEVELS > 2 -static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss; -#endif -#if CONFIG_ARM64_PGTABLE_LEVELS > 3 -static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss; -#endif - -static inline pud_t * __init early_ioremap_pud(unsigned long addr) -{ - pgd_t *pgd; - - pgd = pgd_offset_k(addr); - BUG_ON(pgd_none(*pgd) || pgd_bad(*pgd)); - - return pud_offset(pgd, addr); -} - -static inline pmd_t * __init early_ioremap_pmd(unsigned long addr) -{ - pud_t *pud = early_ioremap_pud(addr); - - BUG_ON(pud_none(*pud) || pud_bad(*pud)); - - return pmd_offset(pud, addr); -} - -static inline pte_t * __init early_ioremap_pte(unsigned long addr) -{ - pmd_t *pmd = early_ioremap_pmd(addr); - - BUG_ON(pmd_none(*pmd) || pmd_bad(*pmd)); - - return pte_offset_kernel(pmd, addr); -} - +/* + * Must be called after early_fixmap_init + */ void __init early_ioremap_init(void) { - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - unsigned long addr = fix_to_virt(FIX_BTMAP_BEGIN); - - pgd = pgd_offset_k(addr); - pgd_populate(&init_mm, pgd, bm_pud); - pud = pud_offset(pgd, addr); - pud_populate(&init_mm, pud, bm_pmd); - pmd = pmd_offset(pud, addr); - pmd_populate_kernel(&init_mm, pmd, bm_pte); - - /* - * The boot-ioremap range spans multiple pmds, for which - * we are not prepared: - */ - BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT) - != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT)); - - if (pmd != early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))) { - WARN_ON(1); - pr_warn("pmd %p != %p\n", - pmd, early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))); - pr_warn("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n", - fix_to_virt(FIX_BTMAP_BEGIN)); - pr_warn("fix_to_virt(FIX_BTMAP_END): %08lx\n", - fix_to_virt(FIX_BTMAP_END)); - - pr_warn("FIX_BTMAP_END: %d\n", FIX_BTMAP_END); - pr_warn("FIX_BTMAP_BEGIN: %d\n", - FIX_BTMAP_BEGIN); - } - early_ioremap_setup(); } - -void __init __early_set_fixmap(enum fixed_addresses idx, - phys_addr_t phys, pgprot_t flags) -{ - unsigned long addr = __fix_to_virt(idx); - pte_t *pte; - - if (idx >= __end_of_fixed_addresses) { - BUG(); - return; - } - - pte = early_ioremap_pte(addr); - - if (pgprot_val(flags)) - set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags)); - else { - pte_clear(&init_mm, addr, pte); - flush_tlb_kernel_range(addr, addr+PAGE_SIZE); - } -} diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index f4f8b500f74c..6032f3e3056a 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -28,6 +28,7 @@ #include #include +#include #include #include #include @@ -463,3 +464,96 @@ void vmemmap_free(unsigned long start, unsigned long end) { } #endif /* CONFIG_SPARSEMEM_VMEMMAP */ + +static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss; +#if CONFIG_ARM64_PGTABLE_LEVELS > 2 +static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss; +#endif +#if CONFIG_ARM64_PGTABLE_LEVELS > 3 +static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss; +#endif + +static inline pud_t * fixmap_pud(unsigned long addr) +{ + pgd_t *pgd = pgd_offset_k(addr); + + BUG_ON(pgd_none(*pgd) || pgd_bad(*pgd)); + + return pud_offset(pgd, addr); +} + +static inline pmd_t * fixmap_pmd(unsigned long addr) +{ + pud_t *pud = fixmap_pud(addr); + + BUG_ON(pud_none(*pud) || pud_bad(*pud)); + + return pmd_offset(pud, addr); +} + +static inline pte_t * fixmap_pte(unsigned long addr) +{ + pmd_t *pmd = fixmap_pmd(addr); + + BUG_ON(pmd_none(*pmd) || pmd_bad(*pmd)); + + return pte_offset_kernel(pmd, addr); +} + +void __init early_fixmap_init(void) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + unsigned long addr = FIXADDR_START; + + pgd = pgd_offset_k(addr); + pgd_populate(&init_mm, pgd, bm_pud); + pud = pud_offset(pgd, addr); + pud_populate(&init_mm, pud, bm_pmd); + pmd = pmd_offset(pud, addr); + pmd_populate_kernel(&init_mm, pmd, bm_pte); + + /* + * The boot-ioremap range spans multiple pmds, for which + * we are not preparted: + */ + BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT) + != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT)); + + if ((pmd != fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN))) + || pmd != fixmap_pmd(fix_to_virt(FIX_BTMAP_END))) { + WARN_ON(1); + pr_warn("pmd %p != %p, %p\n", + pmd, fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)), + fixmap_pmd(fix_to_virt(FIX_BTMAP_END))); + pr_warn("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n", + fix_to_virt(FIX_BTMAP_BEGIN)); + pr_warn("fix_to_virt(FIX_BTMAP_END): %08lx\n", + fix_to_virt(FIX_BTMAP_END)); + + pr_warn("FIX_BTMAP_END: %d\n", FIX_BTMAP_END); + pr_warn("FIX_BTMAP_BEGIN: %d\n", FIX_BTMAP_BEGIN); + } +} + +void __set_fixmap(enum fixed_addresses idx, + phys_addr_t phys, pgprot_t flags) +{ + unsigned long addr = __fix_to_virt(idx); + pte_t *pte; + + if (idx >= __end_of_fixed_addresses) { + BUG(); + return; + } + + pte = fixmap_pte(addr); + + if (pgprot_val(flags)) { + set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags)); + } else { + pte_clear(&init_mm, addr, pte); + flush_tlb_kernel_range(addr, addr+PAGE_SIZE); + } +} -- cgit v1.2.3 From dab78b6dcb2bfc90038f35ada826844273dde4d6 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Wed, 26 Nov 2014 00:14:16 +0000 Subject: arm64: Add FIX_HOLE to permanent fixed addresses Every other architecture with permanent fixed addresses has FIX_HOLE as the first entry. This seems to be designed as a debugging aid but there are a couple of side effects of not having FIX_HOLE: - If the first fixed address is 0, fix_to_virt -> virt_to_fix triggers a BUG_ON for the virtual address being equal to FIXADDR_TOP - fix_to_virt may return a value outside of FIXADDR_START and FIXADDR_TOP which may look like a bug to a developer. Match up with other architectures and make everything clearer by adding FIX_HOLE. Signed-off-by: Laura Abbott Signed-off-by: Will Deacon --- arch/arm64/include/asm/fixmap.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h index db26a2f2b7a7..9ef6eca905ca 100644 --- a/arch/arm64/include/asm/fixmap.h +++ b/arch/arm64/include/asm/fixmap.h @@ -31,6 +31,7 @@ * */ enum fixed_addresses { + FIX_HOLE, FIX_EARLYCON_MEM_BASE, __end_of_permanent_fixed_addresses, -- cgit v1.2.3 From e185fab7e13087cbd1f7e281d2698f07dbcdd44e Mon Sep 17 00:00:00 2001 From: AKASHI Takahiro Date: Fri, 28 Nov 2014 05:26:37 +0000 Subject: arm64: add seccomp syscall for compat task This patch allows compat task to issue seccomp() system call. Reviewed-by: Kees Cook Signed-off-by: AKASHI Takahiro Signed-off-by: Will Deacon --- arch/arm64/include/asm/unistd32.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index da1f06b535e3..812f19212b23 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -787,7 +787,8 @@ __SYSCALL(__NR_sched_setattr, sys_sched_setattr) __SYSCALL(__NR_sched_getattr, sys_sched_getattr) #define __NR_renameat2 382 __SYSCALL(__NR_renameat2, sys_renameat2) - /* 383 for seccomp */ +#define __NR_seccomp 383 +__SYSCALL(__NR_seccomp, sys_seccomp) #define __NR_getrandom 384 __SYSCALL(__NR_getrandom, sys_getrandom) #define __NR_memfd_create 385 -- cgit v1.2.3 From cc5e9097c9aad6b186a568c534e26746d6bfa483 Mon Sep 17 00:00:00 2001 From: AKASHI Takahiro Date: Fri, 28 Nov 2014 05:26:38 +0000 Subject: arm64: add SIGSYS siginfo for compat task SIGSYS is primarily used in secure computing to notify tracer of syscall events. This patch allows signal handler on compat task to get correct information with SA_SIGINFO specified when this signal is delivered. Reviewed-by: Kees Cook Signed-off-by: AKASHI Takahiro Signed-off-by: Will Deacon --- arch/arm64/include/asm/compat.h | 7 +++++++ arch/arm64/kernel/signal32.c | 6 ++++++ 2 files changed, 13 insertions(+) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h index 56de5aadede2..3fb053fa6e98 100644 --- a/arch/arm64/include/asm/compat.h +++ b/arch/arm64/include/asm/compat.h @@ -205,6 +205,13 @@ typedef struct compat_siginfo { compat_long_t _band; /* POLL_IN, POLL_OUT, POLL_MSG */ int _fd; } _sigpoll; + + /* SIGSYS */ + struct { + compat_uptr_t _call_addr; /* calling user insn */ + int _syscall; /* triggering system call number */ + compat_uint_t _arch; /* AUDIT_ARCH_* of syscall */ + } _sigsys; } _sifields; } compat_siginfo_t; diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index 1b9ad02837cf..5a1ba6e80d4e 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -186,6 +186,12 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) err |= __put_user(from->si_uid, &to->si_uid); err |= __put_user((compat_uptr_t)(unsigned long)from->si_ptr, &to->si_ptr); break; + case __SI_SYS: + err |= __put_user((compat_uptr_t)(unsigned long) + from->si_call_addr, &to->si_call_addr); + err |= __put_user(from->si_syscall, &to->si_syscall); + err |= __put_user(from->si_arch, &to->si_arch); + break; default: /* this is just in case for now ... */ err |= __put_user(from->si_pid, &to->si_pid); err |= __put_user(from->si_uid, &to->si_uid); -- cgit v1.2.3 From a1ae65b219416a72c15577bd4c8c11174fffbb8b Mon Sep 17 00:00:00 2001 From: AKASHI Takahiro Date: Fri, 28 Nov 2014 05:26:39 +0000 Subject: arm64: add seccomp support secure_computing() is called first in syscall_trace_enter() so that a system call will be aborted quickly without doing succeeding syscall tracing if seccomp rules want to deny that system call. On compat task, syscall numbers for system calls allowed in seccomp mode 1 are different from those on normal tasks, and so _NR_seccomp_xxx_32's need to be redefined. Signed-off-by: AKASHI Takahiro Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 14 ++++++++++++++ arch/arm64/include/asm/seccomp.h | 25 +++++++++++++++++++++++++ arch/arm64/include/asm/unistd.h | 3 +++ arch/arm64/kernel/ptrace.c | 5 +++++ 4 files changed, 47 insertions(+) create mode 100644 arch/arm64/include/asm/seccomp.h (limited to 'arch/arm64/include') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index f5412d628ff6..7c79c6494379 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -38,6 +38,7 @@ config ARM64 select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_JUMP_LABEL select HAVE_ARCH_KGDB + select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK select HAVE_BPF_JIT select HAVE_C_RECORDMCOUNT @@ -455,6 +456,19 @@ config ARCH_HAS_CACHE_LINE_SIZE source "mm/Kconfig" +config SECCOMP + bool "Enable seccomp to safely compute untrusted bytecode" + ---help--- + This kernel feature is useful for number crunching applications + that may need to compute untrusted bytecode during their + execution. By using pipes or other transports made available to + the process as file descriptors supporting the read/write + syscalls, it's possible to isolate those applications in + their own address space using seccomp. Once seccomp is + enabled via prctl(PR_SET_SECCOMP), it cannot be disabled + and the task is only allowed to execute a few safe syscalls + defined by each seccomp mode. + config XEN_DOM0 def_bool y depends on XEN diff --git a/arch/arm64/include/asm/seccomp.h b/arch/arm64/include/asm/seccomp.h new file mode 100644 index 000000000000..c76fac979629 --- /dev/null +++ b/arch/arm64/include/asm/seccomp.h @@ -0,0 +1,25 @@ +/* + * arch/arm64/include/asm/seccomp.h + * + * Copyright (C) 2014 Linaro Limited + * Author: AKASHI Takahiro + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef _ASM_SECCOMP_H +#define _ASM_SECCOMP_H + +#include + +#ifdef CONFIG_COMPAT +#define __NR_seccomp_read_32 __NR_compat_read +#define __NR_seccomp_write_32 __NR_compat_write +#define __NR_seccomp_exit_32 __NR_compat_exit +#define __NR_seccomp_sigreturn_32 __NR_compat_rt_sigreturn +#endif /* CONFIG_COMPAT */ + +#include + +#endif /* _ASM_SECCOMP_H */ diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index 6d2bf419431d..49c9aefd24a5 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -31,6 +31,9 @@ * Compat syscall numbers used by the AArch64 kernel. */ #define __NR_compat_restart_syscall 0 +#define __NR_compat_exit 1 +#define __NR_compat_read 3 +#define __NR_compat_write 4 #define __NR_compat_sigreturn 119 #define __NR_compat_rt_sigreturn 173 diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index f576781d8d3b..d882b833dbdb 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -1149,6 +1150,10 @@ static void tracehook_report_syscall(struct pt_regs *regs, asmlinkage int syscall_trace_enter(struct pt_regs *regs) { + /* Do the secure computing check first; failures should be fast. */ + if (secure_computing() == -1) + return -1; + if (test_thread_flag(TIF_SYSCALL_TRACE)) tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER); -- cgit v1.2.3 From a2d25a5391ca219f196f9fee7b535c40d201c6bf Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Mon, 1 Dec 2014 10:53:08 +0000 Subject: arm64: compat: align cacheflush syscall with arch/arm Update handling of cacheflush syscall with changes made in arch/arm counterpart: - return error to userspace when flushing syscall fails - split user cache-flushing into interruptible chunks - don't bother rounding to nearest vma Signed-off-by: Vladimir Murzin [will: changed internal return value from -EINTR to 0 to match arch/arm/] Signed-off-by: Will Deacon --- arch/arm64/include/asm/cacheflush.h | 2 +- arch/arm64/kernel/sys_compat.c | 49 ++++++++++++++++++++++--------------- arch/arm64/mm/cache.S | 6 ++++- 3 files changed, 35 insertions(+), 22 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index 689b6379188c..7ae31a2cc6c0 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -73,7 +73,7 @@ extern void flush_cache_all(void); extern void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); extern void flush_icache_range(unsigned long start, unsigned long end); extern void __flush_dcache_area(void *addr, size_t len); -extern void __flush_cache_user_range(unsigned long start, unsigned long end); +extern long __flush_cache_user_range(unsigned long start, unsigned long end); static inline void flush_cache_mm(struct mm_struct *mm) { diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c index dc47e53e9e28..28c511b06edf 100644 --- a/arch/arm64/kernel/sys_compat.c +++ b/arch/arm64/kernel/sys_compat.c @@ -28,29 +28,39 @@ #include #include -static inline void -do_compat_cache_op(unsigned long start, unsigned long end, int flags) +static long +__do_compat_cache_op(unsigned long start, unsigned long end) { - struct mm_struct *mm = current->active_mm; - struct vm_area_struct *vma; + long ret; - if (end < start || flags) - return; + do { + unsigned long chunk = min(PAGE_SIZE, end - start); - down_read(&mm->mmap_sem); - vma = find_vma(mm, start); - if (vma && vma->vm_start < end) { - if (start < vma->vm_start) - start = vma->vm_start; - if (end > vma->vm_end) - end = vma->vm_end; - up_read(&mm->mmap_sem); - __flush_cache_user_range(start & PAGE_MASK, PAGE_ALIGN(end)); - return; - } - up_read(&mm->mmap_sem); + if (fatal_signal_pending(current)) + return 0; + + ret = __flush_cache_user_range(start, start + chunk); + if (ret) + return ret; + + cond_resched(); + start += chunk; + } while (start < end); + + return 0; } +static inline long +do_compat_cache_op(unsigned long start, unsigned long end, int flags) +{ + if (end < start || flags) + return -EINVAL; + + if (!access_ok(VERIFY_READ, start, end - start)) + return -EFAULT; + + return __do_compat_cache_op(start, end); +} /* * Handle all unrecognised system calls. */ @@ -74,8 +84,7 @@ long compat_arm_syscall(struct pt_regs *regs) * the specified region). */ case __ARM_NR_compat_cacheflush: - do_compat_cache_op(regs->regs[0], regs->regs[1], regs->regs[2]); - return 0; + return do_compat_cache_op(regs->regs[0], regs->regs[1], regs->regs[2]); case __ARM_NR_compat_set_tls: current->thread.tp_value = regs->regs[0]; diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index 8eaf18577d71..2560e1e1562e 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -17,6 +17,7 @@ * along with this program. If not, see . */ +#include #include #include #include @@ -140,9 +141,12 @@ USER(9f, ic ivau, x4 ) // invalidate I line PoU add x4, x4, x2 cmp x4, x1 b.lo 1b -9: // ignore any faulting cache operation dsb ish isb + mov x0, #0 + ret +9: + mov x0, #-EFAULT ret ENDPROC(flush_icache_range) ENDPROC(__flush_cache_user_range) -- cgit v1.2.3 From e4f88d833bec29b8e6fadc1b2488f0c6370935e1 Mon Sep 17 00:00:00 2001 From: Jungseok Lee Date: Tue, 2 Dec 2014 17:49:24 +0000 Subject: arm64: Implement support for read-mostly sections As putting data which is read mostly together, we can avoid unnecessary cache line bouncing. Other architectures, such as ARM and x86, adopted the same idea. Acked-by: Catalin Marinas Signed-off-by: Jungseok Lee Signed-off-by: Will Deacon --- arch/arm64/include/asm/cache.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h index 88cc05b5f3ac..bde449936e2f 100644 --- a/arch/arm64/include/asm/cache.h +++ b/arch/arm64/include/asm/cache.h @@ -32,6 +32,8 @@ #ifndef __ASSEMBLY__ +#define __read_mostly __attribute__((__section__(".data..read_mostly"))) + static inline int cache_line_size(void) { u32 cwg = cache_type_cwg(); -- cgit v1.2.3 From 06f9eb884be81431d54d7d37390043e3b5b7f14a Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Thu, 4 Dec 2014 01:17:01 +0000 Subject: arm64: Provide a namespace to NCAPS Building arm64.allmodconfig leads to the following warning: usb/gadget/function/f_ncm.c:203:0: warning: "NCAPS" redefined #define NCAPS (USB_CDC_NCM_NCAP_ETH_FILTER | USB_CDC_NCM_NCAP_CRC_MODE) ^ In file included from /home/build/work/batch/arch/arm64/include/asm/io.h:32:0, from /home/build/work/batch/include/linux/clocksource.h:19, from /home/build/work/batch/include/clocksource/arm_arch_timer.h:19, from /home/build/work/batch/arch/arm64/include/asm/arch_timer.h:27, from /home/build/work/batch/arch/arm64/include/asm/timex.h:19, from /home/build/work/batch/include/linux/timex.h:65, from /home/build/work/batch/include/linux/sched.h:19, from /home/build/work/batch/arch/arm64/include/asm/compat.h:25, from /home/build/work/batch/arch/arm64/include/asm/stat.h:23, from /home/build/work/batch/include/linux/stat.h:5, from /home/build/work/batch/include/linux/module.h:10, from /home/build/work/batch/drivers/usb/gadget/function/f_ncm.c:19: arch/arm64/include/asm/cpufeature.h:27:0: note: this is the location of the previous definition #define NCAPS 2 So add a ARM64 prefix to avoid such problem. Reported-by: Olof's autobuilder Signed-off-by: Fabio Estevam Signed-off-by: Will Deacon --- arch/arm64/include/asm/cpufeature.h | 10 +++++----- arch/arm64/kernel/setup.c | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 0362f8020d46..07547ccc1f2b 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -24,11 +24,11 @@ #define ARM64_WORKAROUND_CLEAN_CACHE 0 #define ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE 1 -#define NCAPS 2 +#define ARM64_NCAPS 2 #ifndef __ASSEMBLY__ -extern DECLARE_BITMAP(cpu_hwcaps, NCAPS); +extern DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS); static inline bool cpu_have_feature(unsigned int num) { @@ -37,16 +37,16 @@ static inline bool cpu_have_feature(unsigned int num) static inline bool cpus_have_cap(unsigned int num) { - if (num >= NCAPS) + if (num >= ARM64_NCAPS) return false; return test_bit(num, cpu_hwcaps); } static inline void cpus_set_cap(unsigned int num) { - if (num >= NCAPS) + if (num >= ARM64_NCAPS) pr_warn("Attempt to set an illegal CPU capability (%d >= %d)\n", - num, NCAPS); + num, ARM64_NCAPS); else __set_bit(num, cpu_hwcaps); } diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 1de0e8a895ee..b80991166754 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -80,7 +80,7 @@ unsigned int compat_elf_hwcap __read_mostly = COMPAT_ELF_HWCAP_DEFAULT; unsigned int compat_elf_hwcap2 __read_mostly; #endif -DECLARE_BITMAP(cpu_hwcaps, NCAPS); +DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS); static const char *cpu_name; phys_addr_t __fdt_pointer __initdata; -- cgit v1.2.3 From af2c632e234f7158e891c27cc2270f8843f08855 Mon Sep 17 00:00:00 2001 From: Chunyan Zhang Date: Thu, 4 Dec 2014 06:29:35 +0000 Subject: arm64/include/asm: Fixed a warning about 'struct pt_regs' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If I include asm/irq.h on the top of my code, and set ARCH=arm64, I'll get a compile warning, details are below: warning: ‘struct pt_regs’ declared inside parameter list [enabled by default] This patch is suggested by Arnd, see: http://lists.infradead.org/pipermail/linux-arm-kernel/2014-December/308270.html Signed-off-by: Chunyan Zhang Signed-off-by: Will Deacon --- arch/arm64/include/asm/irq.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h index 1eebf5bb0b58..94c53674a31d 100644 --- a/arch/arm64/include/asm/irq.h +++ b/arch/arm64/include/asm/irq.h @@ -3,6 +3,8 @@ #include +struct pt_regs; + extern void migrate_irqs(void); extern void set_handle_irq(void (*handle_irq)(struct pt_regs *)); -- cgit v1.2.3 From 932ded4b0b9bf111fbf9d176ec12152a0d29b0fd Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Fri, 28 Nov 2014 13:40:45 +0000 Subject: arm64: add module support for alternatives fixups Currently the kernel patches all necessary instructions once at boot time, so modules are not covered by this. Change the apply_alternatives() function to take a beginning and an end pointer and introduce a new variant (apply_alternatives_all()) to cover the existing use case for the static kernel image section. Add a module_finalize() function to arm64 to check for an alternatives section in a module and patch only the instructions from that specific area. Since that module code is not touched before the module initialization has ended, we don't need to halt the machine before doing the patching in the module's code. Signed-off-by: Andre Przywara Signed-off-by: Will Deacon --- arch/arm64/include/asm/alternative.h | 3 ++- arch/arm64/kernel/alternative.c | 29 +++++++++++++++++++++++++---- arch/arm64/kernel/module.c | 18 ++++++++++++++++++ arch/arm64/kernel/smp.c | 2 +- 4 files changed, 46 insertions(+), 6 deletions(-) (limited to 'arch/arm64/include') diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h index f6d206e7f9e9..d261f01e2bae 100644 --- a/arch/arm64/include/asm/alternative.h +++ b/arch/arm64/include/asm/alternative.h @@ -13,7 +13,8 @@ struct alt_instr { u8 alt_len; /* size of new instruction(s), <= orig_len */ }; -void apply_alternatives(void); +void apply_alternatives_all(void); +void apply_alternatives(void *start, size_t length); void free_alternatives_memory(void); #define ALTINSTR_ENTRY(feature) \ diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index 1a3badab800a..ad7821d64a1d 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -28,12 +28,18 @@ extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; -static int __apply_alternatives(void *dummy) +struct alt_region { + struct alt_instr *begin; + struct alt_instr *end; +}; + +static int __apply_alternatives(void *alt_region) { struct alt_instr *alt; + struct alt_region *region = alt_region; u8 *origptr, *replptr; - for (alt = __alt_instructions; alt < __alt_instructions_end; alt++) { + for (alt = region->begin; alt < region->end; alt++) { if (!cpus_have_cap(alt->cpufeature)) continue; @@ -51,10 +57,25 @@ static int __apply_alternatives(void *dummy) return 0; } -void apply_alternatives(void) +void apply_alternatives_all(void) { + struct alt_region region = { + .begin = __alt_instructions, + .end = __alt_instructions_end, + }; + /* better not try code patching on a live SMP system */ - stop_machine(__apply_alternatives, NULL, NULL); + stop_machine(__apply_alternatives, ®ion, NULL); +} + +void apply_alternatives(void *start, size_t length) +{ + struct alt_region region = { + .begin = start, + .end = start + length, + }; + + __apply_alternatives(®ion); } void free_alternatives_memory(void) diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index 1eb1cc955139..fd027b101de5 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -26,6 +26,7 @@ #include #include #include +#include #define AARCH64_INSN_IMM_MOVNZ AARCH64_INSN_IMM_MAX #define AARCH64_INSN_IMM_MOVK AARCH64_INSN_IMM_16 @@ -394,3 +395,20 @@ overflow: me->name, (int)ELF64_R_TYPE(rel[i].r_info), val); return -ENOEXEC; } + +int module_finalize(const Elf_Ehdr *hdr, + const Elf_Shdr *sechdrs, + struct module *me) +{ + const Elf_Shdr *s, *se; + const char *secstrs = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; + + for (s = sechdrs, se = sechdrs + hdr->e_shnum; s < se; s++) { + if (strcmp(".altinstructions", secstrs + s->sh_name) == 0) { + apply_alternatives((void *)s->sh_addr, s->sh_size); + return 0; + } + } + + return 0; +} diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 0ef87896e4ae..7ae6ee085261 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -310,7 +310,7 @@ void cpu_die(void) void __init smp_cpus_done(unsigned int max_cpus) { pr_info("SMP: Total of %d processors activated.\n", num_online_cpus()); - apply_alternatives(); + apply_alternatives_all(); } void __init smp_prepare_boot_cpu(void) -- cgit v1.2.3