From 4beba9486abd2f86d125271d6946f7c38ed0fe77 Mon Sep 17 00:00:00 2001 From: Steven Price Date: Wed, 22 Apr 2020 15:25:27 +0100 Subject: mm: Add PG_arch_2 page flag For arm64 MTE support it is necessary to be able to mark pages that contain user space visible tags that will need to be saved/restored e.g. when swapped out. To support this add a new arch specific flag (PG_arch_2). This flag is only available on 64-bit architectures due to the limited number of spare page flags on the 32-bit ones. Signed-off-by: Steven Price [catalin.marinas@arm.com: use CONFIG_64BIT for guarding this new flag] Signed-off-by: Catalin Marinas Cc: Andrew Morton --- fs/proc/page.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs') diff --git a/fs/proc/page.c b/fs/proc/page.c index f909243d4a66..9f1077d94cde 100644 --- a/fs/proc/page.c +++ b/fs/proc/page.c @@ -217,6 +217,9 @@ u64 stable_page_flags(struct page *page) u |= kpf_copy_bit(k, KPF_PRIVATE_2, PG_private_2); u |= kpf_copy_bit(k, KPF_OWNER_PRIVATE, PG_owner_priv_1); u |= kpf_copy_bit(k, KPF_ARCH, PG_arch_1); +#ifdef CONFIG_64BIT + u |= kpf_copy_bit(k, KPF_ARCH_2, PG_arch_2); +#endif return u; }; -- cgit v1.2.3 From 9f3419315f3cdc41a7318e4d50ba18a592b30c8c Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Wed, 27 Nov 2019 10:00:27 +0000 Subject: arm64: mte: Add PROT_MTE support to mmap() and mprotect() To enable tagging on a memory range, the user must explicitly opt in via a new PROT_MTE flag passed to mmap() or mprotect(). Since this is a new memory type in the AttrIndx field of a pte, simplify the or'ing of these bits over the protection_map[] attributes by making MT_NORMAL index 0. There are two conditions for arch_vm_get_page_prot() to return the MT_NORMAL_TAGGED memory type: (1) the user requested it via PROT_MTE, registered as VM_MTE in the vm_flags, and (2) the vma supports MTE, decided during the mmap() call (only) and registered as VM_MTE_ALLOWED. arch_calc_vm_prot_bits() is responsible for registering the user request as VM_MTE. The newly introduced arch_calc_vm_flag_bits() sets VM_MTE_ALLOWED if the mapping is MAP_ANONYMOUS. An MTE-capable filesystem (RAM-based) may be able to set VM_MTE_ALLOWED during its mmap() file ops call. In addition, update VM_DATA_DEFAULT_FLAGS to allow mprotect(PROT_MTE) on stack or brk area. The Linux mmap() syscall currently ignores unknown PROT_* flags. In the presence of MTE, an mmap(PROT_MTE) on a file which does not support MTE will not report an error and the memory will not be mapped as Normal Tagged. For consistency, mprotect(PROT_MTE) will not report an error either if the memory range does not support MTE. Two subsequent patches in the series will propose tightening of this behaviour. Co-developed-by: Vincenzo Frascino Signed-off-by: Vincenzo Frascino Signed-off-by: Catalin Marinas Cc: Will Deacon --- arch/arm64/include/asm/memory.h | 18 ++++++++++------ arch/arm64/include/asm/mman.h | 44 +++++++++++++++++++++++++++++++++++--- arch/arm64/include/asm/page.h | 2 +- arch/arm64/include/asm/pgtable.h | 7 +++++- arch/arm64/include/uapi/asm/mman.h | 1 + fs/proc/task_mmu.c | 4 ++++ include/linux/mm.h | 8 +++++++ 7 files changed, 72 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 1e0a78266410..e424fc3a68cb 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -126,14 +126,18 @@ /* * Memory types available. + * + * IMPORTANT: MT_NORMAL must be index 0 since vm_get_page_prot() may 'or' in + * the MT_NORMAL_TAGGED memory type for PROT_MTE mappings. Note + * that protection_map[] only contains MT_NORMAL attributes. */ -#define MT_DEVICE_nGnRnE 0 -#define MT_DEVICE_nGnRE 1 -#define MT_DEVICE_GRE 2 -#define MT_NORMAL_NC 3 -#define MT_NORMAL 4 -#define MT_NORMAL_WT 5 -#define MT_NORMAL_TAGGED 6 +#define MT_NORMAL 0 +#define MT_NORMAL_TAGGED 1 +#define MT_NORMAL_NC 2 +#define MT_NORMAL_WT 3 +#define MT_DEVICE_nGnRnE 4 +#define MT_DEVICE_nGnRE 5 +#define MT_DEVICE_GRE 6 /* * Memory types for Stage-2 translation diff --git a/arch/arm64/include/asm/mman.h b/arch/arm64/include/asm/mman.h index 081ec8de9ea6..b01051be7750 100644 --- a/arch/arm64/include/asm/mman.h +++ b/arch/arm64/include/asm/mman.h @@ -9,16 +9,51 @@ static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot, unsigned long pkey __always_unused) { + unsigned long ret = 0; + if (system_supports_bti() && (prot & PROT_BTI)) - return VM_ARM64_BTI; + ret |= VM_ARM64_BTI; - return 0; + if (system_supports_mte() && (prot & PROT_MTE)) + ret |= VM_MTE; + + return ret; } #define arch_calc_vm_prot_bits(prot, pkey) arch_calc_vm_prot_bits(prot, pkey) +static inline unsigned long arch_calc_vm_flag_bits(unsigned long flags) +{ + /* + * Only allow MTE on anonymous mappings as these are guaranteed to be + * backed by tags-capable memory. The vm_flags may be overridden by a + * filesystem supporting MTE (RAM-based). + */ + if (system_supports_mte() && (flags & MAP_ANONYMOUS)) + return VM_MTE_ALLOWED; + + return 0; +} +#define arch_calc_vm_flag_bits(flags) arch_calc_vm_flag_bits(flags) + static inline pgprot_t arch_vm_get_page_prot(unsigned long vm_flags) { - return (vm_flags & VM_ARM64_BTI) ? __pgprot(PTE_GP) : __pgprot(0); + pteval_t prot = 0; + + if (vm_flags & VM_ARM64_BTI) + prot |= PTE_GP; + + /* + * There are two conditions required for returning a Normal Tagged + * memory type: (1) the user requested it via PROT_MTE passed to + * mmap() or mprotect() and (2) the corresponding vma supports MTE. We + * register (1) as VM_MTE in the vma->vm_flags and (2) as + * VM_MTE_ALLOWED. Note that the latter can only be set during the + * mmap() call since mprotect() does not accept MAP_* flags. + */ + if ((vm_flags & VM_MTE) && (vm_flags & VM_MTE_ALLOWED)) + prot |= PTE_ATTRINDX(MT_NORMAL_TAGGED); + + return __pgprot(prot); } #define arch_vm_get_page_prot(vm_flags) arch_vm_get_page_prot(vm_flags) @@ -30,6 +65,9 @@ static inline bool arch_validate_prot(unsigned long prot, if (system_supports_bti()) supported |= PROT_BTI; + if (system_supports_mte()) + supported |= PROT_MTE; + return (prot & ~supported) == 0; } #define arch_validate_prot(prot, addr) arch_validate_prot(prot, addr) diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h index d918cb1d83a6..012cffc574e8 100644 --- a/arch/arm64/include/asm/page.h +++ b/arch/arm64/include/asm/page.h @@ -43,7 +43,7 @@ extern int pfn_valid(unsigned long); #endif /* !__ASSEMBLY__ */ -#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_TSK_EXEC +#define VM_DATA_DEFAULT_FLAGS (VM_DATA_FLAGS_TSK_EXEC | VM_MTE_ALLOWED) #include diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 0a205a8e91b2..057c40b6f5e0 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -681,8 +681,13 @@ static inline unsigned long p4d_page_vaddr(p4d_t p4d) static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { + /* + * Normal and Normal-Tagged are two different memory types and indices + * in MAIR_EL1. The mask below has to include PTE_ATTRINDX_MASK. + */ const pteval_t mask = PTE_USER | PTE_PXN | PTE_UXN | PTE_RDONLY | - PTE_PROT_NONE | PTE_VALID | PTE_WRITE | PTE_GP; + PTE_PROT_NONE | PTE_VALID | PTE_WRITE | PTE_GP | + PTE_ATTRINDX_MASK; /* preserve the hardware dirty information */ if (pte_hw_dirty(pte)) pte = pte_mkdirty(pte); diff --git a/arch/arm64/include/uapi/asm/mman.h b/arch/arm64/include/uapi/asm/mman.h index 6fdd71eb644f..1e6482a838e1 100644 --- a/arch/arm64/include/uapi/asm/mman.h +++ b/arch/arm64/include/uapi/asm/mman.h @@ -5,5 +5,6 @@ #include #define PROT_BTI 0x10 /* BTI guarded page */ +#define PROT_MTE 0x20 /* Normal Tagged mapping */ #endif /* ! _UAPI__ASM_MMAN_H */ diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 5066b0251ed8..35172a91148e 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -653,6 +653,10 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma) [ilog2(VM_MERGEABLE)] = "mg", [ilog2(VM_UFFD_MISSING)]= "um", [ilog2(VM_UFFD_WP)] = "uw", +#ifdef CONFIG_ARM64_MTE + [ilog2(VM_MTE)] = "mt", + [ilog2(VM_MTE_ALLOWED)] = "", +#endif #ifdef CONFIG_ARCH_HAS_PKEYS /* These come out via ProtectionKey: */ [ilog2(VM_PKEY_BIT0)] = "", diff --git a/include/linux/mm.h b/include/linux/mm.h index ca6e6a81576b..4312c6c808e9 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -340,6 +340,14 @@ extern unsigned int kobjsize(const void *objp); # define VM_MAPPED_COPY VM_ARCH_1 /* T if mapped copy of data (nommu mmap) */ #endif +#if defined(CONFIG_ARM64_MTE) +# define VM_MTE VM_HIGH_ARCH_0 /* Use Tagged memory for access control */ +# define VM_MTE_ALLOWED VM_HIGH_ARCH_1 /* Tagged memory permitted */ +#else +# define VM_MTE VM_NONE +# define VM_MTE_ALLOWED VM_NONE +#endif + #ifndef VM_GROWSUP # define VM_GROWSUP VM_NONE #endif -- cgit v1.2.3 From d563d678aa0be06e7bff2953c986f5ff0355f79c Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Wed, 1 Jul 2020 17:46:06 +0100 Subject: fs: Handle intra-page faults in copy_mount_options() The copy_mount_options() function takes a user pointer argument but no size and it tries to read up to a PAGE_SIZE. However, copy_from_user() is not guaranteed to return all the accessible bytes if, for example, the access crosses a page boundary and gets a fault on the second page. To work around this, the current copy_mount_options() implementation performs two copy_from_user() passes, first to the end of the current page and the second to what's left in the subsequent page. On arm64 with MTE enabled, access to a user page may trigger a fault after part of the buffer in a page has been copied (when the user pointer tag, bits 56-59, no longer matches the allocation tag stored in memory). Allow copy_mount_options() to handle such intra-page faults by resorting to byte at a time copy in case of copy_from_user() failure. Note that copy_from_user() handles the zeroing of the kernel buffer in case of error. Signed-off-by: Catalin Marinas Cc: Alexander Viro --- fs/namespace.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/namespace.c b/fs/namespace.c index bae0e95b3713..32a0b9146757 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -3075,7 +3075,7 @@ static void shrink_submounts(struct mount *mnt) void *copy_mount_options(const void __user * data) { char *copy; - unsigned size; + unsigned left, offset; if (!data) return NULL; @@ -3084,16 +3084,27 @@ void *copy_mount_options(const void __user * data) if (!copy) return ERR_PTR(-ENOMEM); - size = PAGE_SIZE - offset_in_page(data); + left = copy_from_user(copy, data, PAGE_SIZE); - if (copy_from_user(copy, data, size)) { + /* + * Not all architectures have an exact copy_from_user(). Resort to + * byte at a time. + */ + offset = PAGE_SIZE - left; + while (left) { + char c; + if (get_user(c, (const char __user *)data + offset)) + break; + copy[offset] = c; + left--; + offset++; + } + + if (left == PAGE_SIZE) { kfree(copy); return ERR_PTR(-EFAULT); } - if (size != PAGE_SIZE) { - if (copy_from_user(copy + size, data + size, PAGE_SIZE - size)) - memset(copy + size, 0, PAGE_SIZE - size); - } + return copy; } -- cgit v1.2.3