diff options
author | Will Deacon <will@kernel.org> | 2020-10-02 12:16:11 +0100 |
---|---|---|
committer | Will Deacon <will@kernel.org> | 2020-10-02 12:16:11 +0100 |
commit | baab853229ec1f291cec6a70ed61ce93159d0997 (patch) | |
tree | 0da235bb4a50e32d80a5d703eea2a56626113cc1 /arch/arm64/kernel | |
parent | 0a21ac0d3094349de50f42b3712931de208ba74a (diff) | |
parent | b5756146db3ad57a9c0e841ea01ce915db27b7de (diff) | |
download | lwn-baab853229ec1f291cec6a70ed61ce93159d0997.tar.gz lwn-baab853229ec1f291cec6a70ed61ce93159d0997.zip |
Merge branch 'for-next/mte' into for-next/core
Add userspace support for the Memory Tagging Extension introduced by
Armv8.5.
(Catalin Marinas and others)
* for-next/mte: (30 commits)
arm64: mte: Fix typo in memory tagging ABI documentation
arm64: mte: Add Memory Tagging Extension documentation
arm64: mte: Kconfig entry
arm64: mte: Save tags when hibernating
arm64: mte: Enable swap of tagged pages
mm: Add arch hooks for saving/restoring tags
fs: Handle intra-page faults in copy_mount_options()
arm64: mte: ptrace: Add NT_ARM_TAGGED_ADDR_CTRL regset
arm64: mte: ptrace: Add PTRACE_{PEEK,POKE}MTETAGS support
arm64: mte: Allow {set,get}_tagged_addr_ctrl() on non-current tasks
arm64: mte: Restore the GCR_EL1 register after a suspend
arm64: mte: Allow user control of the generated random tags via prctl()
arm64: mte: Allow user control of the tag check mode via prctl()
mm: Allow arm64 mmap(PROT_MTE) on RAM-based files
arm64: mte: Validate the PROT_MTE request via arch_validate_flags()
mm: Introduce arch_validate_flags()
arm64: mte: Add PROT_MTE support to mmap() and mprotect()
mm: Introduce arch_calc_vm_flag_bits()
arm64: mte: Tags-aware aware memcmp_pages() implementation
arm64: Avoid unnecessary clear_user_page() indirection
...
Diffstat (limited to 'arch/arm64/kernel')
-rw-r--r-- | arch/arm64/kernel/Makefile | 1 | ||||
-rw-r--r-- | arch/arm64/kernel/cpufeature.c | 35 | ||||
-rw-r--r-- | arch/arm64/kernel/cpuinfo.c | 1 | ||||
-rw-r--r-- | arch/arm64/kernel/entry.S | 37 | ||||
-rw-r--r-- | arch/arm64/kernel/hibernate.c | 118 | ||||
-rw-r--r-- | arch/arm64/kernel/mte.c | 336 | ||||
-rw-r--r-- | arch/arm64/kernel/process.c | 48 | ||||
-rw-r--r-- | arch/arm64/kernel/ptrace.c | 51 | ||||
-rw-r--r-- | arch/arm64/kernel/signal.c | 9 | ||||
-rw-r--r-- | arch/arm64/kernel/suspend.c | 4 | ||||
-rw-r--r-- | arch/arm64/kernel/syscall.c | 10 |
11 files changed, 639 insertions, 11 deletions
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 88f160afc468..bbaf0bc4ad60 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -59,6 +59,7 @@ obj-$(CONFIG_CRASH_CORE) += crash_core.o obj-$(CONFIG_ARM_SDE_INTERFACE) += sdei.o obj-$(CONFIG_ARM64_PTR_AUTH) += pointer_auth.o obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o +obj-$(CONFIG_ARM64_MTE) += mte.o obj-y += vdso/ probes/ obj-$(CONFIG_COMPAT_VDSO) += vdso32/ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 79207c323553..dcc165b3fc04 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -75,6 +75,7 @@ #include <asm/cpu_ops.h> #include <asm/fpsimd.h> #include <asm/mmu_context.h> +#include <asm/mte.h> #include <asm/processor.h> #include <asm/sysreg.h> #include <asm/traps.h> @@ -227,6 +228,8 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = { static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_MPAMFRAC_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_RASFRAC_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_MTE), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_MTE_SHIFT, 4, ID_AA64PFR1_MTE_NI), ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR1_SSBS_SHIFT, 4, ID_AA64PFR1_SSBS_PSTATE_NI), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_BTI), FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_BT_SHIFT, 4, 0), @@ -1688,6 +1691,22 @@ static void bti_enable(const struct arm64_cpu_capabilities *__unused) } #endif /* CONFIG_ARM64_BTI */ +#ifdef CONFIG_ARM64_MTE +static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap) +{ + static bool cleared_zero_page = false; + + /* + * Clear the tags in the zero page. This needs to be done via the + * linear map which has the Tagged attribute. + */ + if (!cleared_zero_page) { + cleared_zero_page = true; + mte_clear_page_tags(lm_alias(empty_zero_page)); + } +} +#endif /* CONFIG_ARM64_MTE */ + /* Internal helper functions to match cpu capability type */ static bool cpucap_late_cpu_optional(const struct arm64_cpu_capabilities *cap) @@ -2104,6 +2123,19 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sign = FTR_UNSIGNED, }, #endif +#ifdef CONFIG_ARM64_MTE + { + .desc = "Memory Tagging Extension", + .capability = ARM64_MTE, + .type = ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE, + .matches = has_cpuid_feature, + .sys_reg = SYS_ID_AA64PFR1_EL1, + .field_pos = ID_AA64PFR1_MTE_SHIFT, + .min_field_value = ID_AA64PFR1_MTE, + .sign = FTR_UNSIGNED, + .cpu_enable = cpu_enable_mte, + }, +#endif /* CONFIG_ARM64_MTE */ {}, }; @@ -2220,6 +2252,9 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_MULTI_CAP(ptr_auth_hwcap_addr_matches, CAP_HWCAP, KERNEL_HWCAP_PACA), HWCAP_MULTI_CAP(ptr_auth_hwcap_gen_matches, CAP_HWCAP, KERNEL_HWCAP_PACG), #endif +#ifdef CONFIG_ARM64_MTE + HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_MTE_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_MTE, CAP_HWCAP, KERNEL_HWCAP_MTE), +#endif /* CONFIG_ARM64_MTE */ {}, }; diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 25113245825c..6a7bb3729d60 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -93,6 +93,7 @@ static const char *const hwcap_str[] = { [KERNEL_HWCAP_DGH] = "dgh", [KERNEL_HWCAP_RNG] = "rng", [KERNEL_HWCAP_BTI] = "bti", + [KERNEL_HWCAP_MTE] = "mte", }; #ifdef CONFIG_COMPAT diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index aeb337029d56..f30007dff35f 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -147,6 +147,32 @@ alternative_cb_end .L__asm_ssbd_skip\@: .endm + /* Check for MTE asynchronous tag check faults */ + .macro check_mte_async_tcf, flgs, tmp +#ifdef CONFIG_ARM64_MTE +alternative_if_not ARM64_MTE + b 1f +alternative_else_nop_endif + mrs_s \tmp, SYS_TFSRE0_EL1 + tbz \tmp, #SYS_TFSR_EL1_TF0_SHIFT, 1f + /* Asynchronous TCF occurred for TTBR0 access, set the TI flag */ + orr \flgs, \flgs, #_TIF_MTE_ASYNC_FAULT + str \flgs, [tsk, #TSK_TI_FLAGS] + msr_s SYS_TFSRE0_EL1, xzr +1: +#endif + .endm + + /* Clear the MTE asynchronous tag check faults */ + .macro clear_mte_async_tcf +#ifdef CONFIG_ARM64_MTE +alternative_if ARM64_MTE + dsb ish + msr_s SYS_TFSRE0_EL1, xzr +alternative_else_nop_endif +#endif + .endm + .macro kernel_entry, el, regsize = 64 .if \regsize == 32 mov w0, w0 // zero upper 32 bits of x0 @@ -180,6 +206,8 @@ alternative_cb_end ldr x19, [tsk, #TSK_TI_FLAGS] disable_step_tsk x19, x20 + /* Check for asynchronous tag check faults in user space */ + check_mte_async_tcf x19, x22 apply_ssbd 1, x22, x23 ptrauth_keys_install_kernel tsk, x20, x22, x23 @@ -231,6 +259,13 @@ alternative_if ARM64_HAS_IRQ_PRIO_MASKING str x20, [sp, #S_PMR_SAVE] alternative_else_nop_endif + /* Re-enable tag checking (TCO set on exception entry) */ +#ifdef CONFIG_ARM64_MTE +alternative_if ARM64_MTE + SET_PSTATE_TCO(0) +alternative_else_nop_endif +#endif + /* * Registers that may be useful after this macro is invoked: * @@ -740,6 +775,8 @@ SYM_CODE_START_LOCAL(ret_to_user) and x2, x1, #_TIF_WORK_MASK cbnz x2, work_pending finish_ret_to_user: + /* Ignore asynchronous tag check faults in the uaccess routines */ + clear_mte_async_tcf enable_step_tsk x1, x2 #ifdef CONFIG_GCC_PLUGIN_STACKLEAK bl stackleak_erase diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index 3a1662392c95..42003774d261 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -30,6 +30,7 @@ #include <asm/kexec.h> #include <asm/memory.h> #include <asm/mmu_context.h> +#include <asm/mte.h> #include <asm/pgalloc.h> #include <asm/pgtable-hwdef.h> #include <asm/sections.h> @@ -284,6 +285,117 @@ static int create_safe_exec_page(void *src_start, size_t length, #define dcache_clean_range(start, end) __flush_dcache_area(start, (end - start)) +#ifdef CONFIG_ARM64_MTE + +static DEFINE_XARRAY(mte_pages); + +static int save_tags(struct page *page, unsigned long pfn) +{ + void *tag_storage, *ret; + + tag_storage = mte_allocate_tag_storage(); + if (!tag_storage) + return -ENOMEM; + + mte_save_page_tags(page_address(page), tag_storage); + + ret = xa_store(&mte_pages, pfn, tag_storage, GFP_KERNEL); + if (WARN(xa_is_err(ret), "Failed to store MTE tags")) { + mte_free_tag_storage(tag_storage); + return xa_err(ret); + } else if (WARN(ret, "swsusp: %s: Duplicate entry", __func__)) { + mte_free_tag_storage(ret); + } + + return 0; +} + +static void swsusp_mte_free_storage(void) +{ + XA_STATE(xa_state, &mte_pages, 0); + void *tags; + + xa_lock(&mte_pages); + xas_for_each(&xa_state, tags, ULONG_MAX) { + mte_free_tag_storage(tags); + } + xa_unlock(&mte_pages); + + xa_destroy(&mte_pages); +} + +static int swsusp_mte_save_tags(void) +{ + struct zone *zone; + unsigned long pfn, max_zone_pfn; + int ret = 0; + int n = 0; + + if (!system_supports_mte()) + return 0; + + for_each_populated_zone(zone) { + max_zone_pfn = zone_end_pfn(zone); + for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) { + struct page *page = pfn_to_online_page(pfn); + + if (!page) + continue; + + if (!test_bit(PG_mte_tagged, &page->flags)) + continue; + + ret = save_tags(page, pfn); + if (ret) { + swsusp_mte_free_storage(); + goto out; + } + + n++; + } + } + pr_info("Saved %d MTE pages\n", n); + +out: + return ret; +} + +static void swsusp_mte_restore_tags(void) +{ + XA_STATE(xa_state, &mte_pages, 0); + int n = 0; + void *tags; + + xa_lock(&mte_pages); + xas_for_each(&xa_state, tags, ULONG_MAX) { + unsigned long pfn = xa_state.xa_index; + struct page *page = pfn_to_online_page(pfn); + + mte_restore_page_tags(page_address(page), tags); + + mte_free_tag_storage(tags); + n++; + } + xa_unlock(&mte_pages); + + pr_info("Restored %d MTE pages\n", n); + + xa_destroy(&mte_pages); +} + +#else /* CONFIG_ARM64_MTE */ + +static int swsusp_mte_save_tags(void) +{ + return 0; +} + +static void swsusp_mte_restore_tags(void) +{ +} + +#endif /* CONFIG_ARM64_MTE */ + int swsusp_arch_suspend(void) { int ret = 0; @@ -301,6 +413,10 @@ int swsusp_arch_suspend(void) /* make the crash dump kernel image visible/saveable */ crash_prepare_suspend(); + ret = swsusp_mte_save_tags(); + if (ret) + return ret; + sleep_cpu = smp_processor_id(); ret = swsusp_save(); } else { @@ -314,6 +430,8 @@ int swsusp_arch_suspend(void) dcache_clean_range(__hyp_text_start, __hyp_text_end); } + swsusp_mte_restore_tags(); + /* make the crash dump kernel image protected again */ crash_post_resume(); diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c new file mode 100644 index 000000000000..52a0638ed967 --- /dev/null +++ b/arch/arm64/kernel/mte.c @@ -0,0 +1,336 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020 ARM Ltd. + */ + +#include <linux/bitops.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/prctl.h> +#include <linux/sched.h> +#include <linux/sched/mm.h> +#include <linux/string.h> +#include <linux/swap.h> +#include <linux/swapops.h> +#include <linux/thread_info.h> +#include <linux/uio.h> + +#include <asm/cpufeature.h> +#include <asm/mte.h> +#include <asm/ptrace.h> +#include <asm/sysreg.h> + +static void mte_sync_page_tags(struct page *page, pte_t *ptep, bool check_swap) +{ + pte_t old_pte = READ_ONCE(*ptep); + + if (check_swap && is_swap_pte(old_pte)) { + swp_entry_t entry = pte_to_swp_entry(old_pte); + + if (!non_swap_entry(entry) && mte_restore_tags(entry, page)) + return; + } + + mte_clear_page_tags(page_address(page)); +} + +void mte_sync_tags(pte_t *ptep, pte_t pte) +{ + struct page *page = pte_page(pte); + long i, nr_pages = compound_nr(page); + bool check_swap = nr_pages == 1; + + /* if PG_mte_tagged is set, tags have already been initialised */ + for (i = 0; i < nr_pages; i++, page++) { + if (!test_and_set_bit(PG_mte_tagged, &page->flags)) + mte_sync_page_tags(page, ptep, check_swap); + } +} + +int memcmp_pages(struct page *page1, struct page *page2) +{ + char *addr1, *addr2; + int ret; + + addr1 = page_address(page1); + addr2 = page_address(page2); + ret = memcmp(addr1, addr2, PAGE_SIZE); + + if (!system_supports_mte() || ret) + return ret; + + /* + * If the page content is identical but at least one of the pages is + * tagged, return non-zero to avoid KSM merging. If only one of the + * pages is tagged, set_pte_at() may zero or change the tags of the + * other page via mte_sync_tags(). + */ + if (test_bit(PG_mte_tagged, &page1->flags) || + test_bit(PG_mte_tagged, &page2->flags)) + return addr1 != addr2; + + return ret; +} + +static void update_sctlr_el1_tcf0(u64 tcf0) +{ + /* ISB required for the kernel uaccess routines */ + sysreg_clear_set(sctlr_el1, SCTLR_EL1_TCF0_MASK, tcf0); + isb(); +} + +static void set_sctlr_el1_tcf0(u64 tcf0) +{ + /* + * mte_thread_switch() checks current->thread.sctlr_tcf0 as an + * optimisation. Disable preemption so that it does not see + * the variable update before the SCTLR_EL1.TCF0 one. + */ + preempt_disable(); + current->thread.sctlr_tcf0 = tcf0; + update_sctlr_el1_tcf0(tcf0); + preempt_enable(); +} + +static void update_gcr_el1_excl(u64 incl) +{ + u64 excl = ~incl & SYS_GCR_EL1_EXCL_MASK; + + /* + * Note that 'incl' is an include mask (controlled by the user via + * prctl()) while GCR_EL1 accepts an exclude mask. + * No need for ISB since this only affects EL0 currently, implicit + * with ERET. + */ + sysreg_clear_set_s(SYS_GCR_EL1, SYS_GCR_EL1_EXCL_MASK, excl); +} + +static void set_gcr_el1_excl(u64 incl) +{ + current->thread.gcr_user_incl = incl; + update_gcr_el1_excl(incl); +} + +void flush_mte_state(void) +{ + if (!system_supports_mte()) + return; + + /* clear any pending asynchronous tag fault */ + dsb(ish); + write_sysreg_s(0, SYS_TFSRE0_EL1); + clear_thread_flag(TIF_MTE_ASYNC_FAULT); + /* disable tag checking */ + set_sctlr_el1_tcf0(SCTLR_EL1_TCF0_NONE); + /* reset tag generation mask */ + set_gcr_el1_excl(0); +} + +void mte_thread_switch(struct task_struct *next) +{ + if (!system_supports_mte()) + return; + + /* avoid expensive SCTLR_EL1 accesses if no change */ + if (current->thread.sctlr_tcf0 != next->thread.sctlr_tcf0) + update_sctlr_el1_tcf0(next->thread.sctlr_tcf0); + update_gcr_el1_excl(next->thread.gcr_user_incl); +} + +void mte_suspend_exit(void) +{ + if (!system_supports_mte()) + return; + + update_gcr_el1_excl(current->thread.gcr_user_incl); +} + +long set_mte_ctrl(struct task_struct *task, unsigned long arg) +{ + u64 tcf0; + u64 gcr_incl = (arg & PR_MTE_TAG_MASK) >> PR_MTE_TAG_SHIFT; + + if (!system_supports_mte()) + return 0; + + switch (arg & PR_MTE_TCF_MASK) { + case PR_MTE_TCF_NONE: + tcf0 = SCTLR_EL1_TCF0_NONE; + break; + case PR_MTE_TCF_SYNC: + tcf0 = SCTLR_EL1_TCF0_SYNC; + break; + case PR_MTE_TCF_ASYNC: + tcf0 = SCTLR_EL1_TCF0_ASYNC; + break; + default: + return -EINVAL; + } + + if (task != current) { + task->thread.sctlr_tcf0 = tcf0; + task->thread.gcr_user_incl = gcr_incl; + } else { + set_sctlr_el1_tcf0(tcf0); + set_gcr_el1_excl(gcr_incl); + } + + return 0; +} + +long get_mte_ctrl(struct task_struct *task) +{ + unsigned long ret; + + if (!system_supports_mte()) + return 0; + + ret = task->thread.gcr_user_incl << PR_MTE_TAG_SHIFT; + + switch (task->thread.sctlr_tcf0) { + case SCTLR_EL1_TCF0_NONE: + return PR_MTE_TCF_NONE; + case SCTLR_EL1_TCF0_SYNC: + ret |= PR_MTE_TCF_SYNC; + break; + case SCTLR_EL1_TCF0_ASYNC: + ret |= PR_MTE_TCF_ASYNC; + break; + } + + return ret; +} + +/* + * Access MTE tags in another process' address space as given in mm. Update + * the number of tags copied. Return 0 if any tags copied, error otherwise. + * Inspired by __access_remote_vm(). + */ +static int __access_remote_tags(struct mm_struct *mm, unsigned long addr, + struct iovec *kiov, unsigned int gup_flags) +{ + struct vm_area_struct *vma; + void __user *buf = kiov->iov_base; + size_t len = kiov->iov_len; + int ret; + int write = gup_flags & FOLL_WRITE; + + if (!access_ok(buf, len)) + return -EFAULT; + + if (mmap_read_lock_killable(mm)) + return -EIO; + + while (len) { + unsigned long tags, offset; + void *maddr; + struct page *page = NULL; + + ret = get_user_pages_remote(mm, addr, 1, gup_flags, &page, + &vma, NULL); + if (ret <= 0) + break; + + /* + * Only copy tags if the page has been mapped as PROT_MTE + * (PG_mte_tagged set). Otherwise the tags are not valid and + * not accessible to user. Moreover, an mprotect(PROT_MTE) + * would cause the existing tags to be cleared if the page + * was never mapped with PROT_MTE. + */ + if (!test_bit(PG_mte_tagged, &page->flags)) { + ret = -EOPNOTSUPP; + put_page(page); + break; + } + + /* limit access to the end of the page */ + offset = offset_in_page(addr); + tags = min(len, (PAGE_SIZE - offset) / MTE_GRANULE_SIZE); + + maddr = page_address(page); + if (write) { + tags = mte_copy_tags_from_user(maddr + offset, buf, tags); + set_page_dirty_lock(page); + } else { + tags = mte_copy_tags_to_user(buf, maddr + offset, tags); + } + put_page(page); + + /* error accessing the tracer's buffer */ + if (!tags) + break; + + len -= tags; + buf += tags; + addr += tags * MTE_GRANULE_SIZE; + } + mmap_read_unlock(mm); + + /* return an error if no tags copied */ + kiov->iov_len = buf - kiov->iov_base; + if (!kiov->iov_len) { + /* check for error accessing the tracee's address space */ + if (ret <= 0) + return -EIO; + else + return -EFAULT; + } + + return 0; +} + +/* + * Copy MTE tags in another process' address space at 'addr' to/from tracer's + * iovec buffer. Return 0 on success. Inspired by ptrace_access_vm(). + */ +static int access_remote_tags(struct task_struct *tsk, unsigned long addr, + struct iovec *kiov, unsigned int gup_flags) +{ + struct mm_struct *mm; + int ret; + + mm = get_task_mm(tsk); + if (!mm) + return -EPERM; + + if (!tsk->ptrace || (current != tsk->parent) || + ((get_dumpable(mm) != SUID_DUMP_USER) && + !ptracer_capable(tsk, mm->user_ns))) { + mmput(mm); + return -EPERM; + } + + ret = __access_remote_tags(mm, addr, kiov, gup_flags); + mmput(mm); + + return ret; +} + +int mte_ptrace_copy_tags(struct task_struct *child, long request, + unsigned long addr, unsigned long data) +{ + int ret; + struct iovec kiov; + struct iovec __user *uiov = (void __user *)data; + unsigned int gup_flags = FOLL_FORCE; + + if (!system_supports_mte()) + return -EIO; + + if (get_user(kiov.iov_base, &uiov->iov_base) || + get_user(kiov.iov_len, &uiov->iov_len)) + return -EFAULT; + + if (request == PTRACE_POKEMTETAGS) + gup_flags |= FOLL_WRITE; + + /* align addr to the MTE tag granule */ + addr &= MTE_GRANULE_MASK; + + ret = access_remote_tags(child, addr, &kiov, gup_flags); + if (!ret) + ret = put_user(kiov.iov_len, &uiov->iov_len); + + return ret; +} diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 085d8ca39e47..4784011cecac 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -53,6 +53,7 @@ #include <asm/exec.h> #include <asm/fpsimd.h> #include <asm/mmu_context.h> +#include <asm/mte.h> #include <asm/processor.h> #include <asm/pointer_auth.h> #include <asm/stacktrace.h> @@ -240,7 +241,7 @@ static void print_pstate(struct pt_regs *regs) const char *btype_str = btypes[(pstate & PSR_BTYPE_MASK) >> PSR_BTYPE_SHIFT]; - printk("pstate: %08llx (%c%c%c%c %c%c%c%c %cPAN %cUAO BTYPE=%s)\n", + printk("pstate: %08llx (%c%c%c%c %c%c%c%c %cPAN %cUAO %cTCO BTYPE=%s)\n", pstate, pstate & PSR_N_BIT ? 'N' : 'n', pstate & PSR_Z_BIT ? 'Z' : 'z', @@ -252,6 +253,7 @@ static void print_pstate(struct pt_regs *regs) pstate & PSR_F_BIT ? 'F' : 'f', pstate & PSR_PAN_BIT ? '+' : '-', pstate & PSR_UAO_BIT ? '+' : '-', + pstate & PSR_TCO_BIT ? '+' : '-', btype_str); } } @@ -337,6 +339,7 @@ void flush_thread(void) tls_thread_flush(); flush_ptrace_hw_breakpoint(current); flush_tagged_addr_state(); + flush_mte_state(); } void release_thread(struct task_struct *dead_task) @@ -369,6 +372,9 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) dst->thread.sve_state = NULL; clear_tsk_thread_flag(dst, TIF_SVE); + /* clear any pending asynchronous tag fault raised by the parent */ + clear_tsk_thread_flag(dst, TIF_MTE_ASYNC_FAULT); + return 0; } @@ -561,6 +567,13 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev, */ dsb(ish); + /* + * MTE thread switching must happen after the DSB above to ensure that + * any asynchronous tag check faults have been logged in the TFSR*_EL1 + * registers. + */ + mte_thread_switch(next); + /* the actual thread switch */ last = cpu_switch_to(prev, next); @@ -623,11 +636,18 @@ void arch_setup_new_exec(void) */ static unsigned int tagged_addr_disabled; -long set_tagged_addr_ctrl(unsigned long arg) +long set_tagged_addr_ctrl(struct task_struct *task, unsigned long arg) { - if (is_compat_task()) + unsigned long valid_mask = PR_TAGGED_ADDR_ENABLE; + struct thread_info *ti = task_thread_info(task); + + if (is_compat_thread(ti)) return -EINVAL; - if (arg & ~PR_TAGGED_ADDR_ENABLE) + + if (system_supports_mte()) + valid_mask |= PR_MTE_TCF_MASK | PR_MTE_TAG_MASK; + + if (arg & ~valid_mask) return -EINVAL; /* @@ -637,20 +657,28 @@ long set_tagged_addr_ctrl(unsigned long arg) if (arg & PR_TAGGED_ADDR_ENABLE && tagged_addr_disabled) return -EINVAL; - update_thread_flag(TIF_TAGGED_ADDR, arg & PR_TAGGED_ADDR_ENABLE); + if (set_mte_ctrl(task, arg) != 0) + return -EINVAL; + + update_ti_thread_flag(ti, TIF_TAGGED_ADDR, arg & PR_TAGGED_ADDR_ENABLE); return 0; } -long get_tagged_addr_ctrl(void) +long get_tagged_addr_ctrl(struct task_struct *task) { - if (is_compat_task()) + long ret = 0; + struct thread_info *ti = task_thread_info(task); + + if (is_compat_thread(ti)) return -EINVAL; - if (test_thread_flag(TIF_TAGGED_ADDR)) - return PR_TAGGED_ADDR_ENABLE; + if (test_ti_thread_flag(ti, TIF_TAGGED_ADDR)) + ret = PR_TAGGED_ADDR_ENABLE; - return 0; + ret |= get_mte_ctrl(task); + + return ret; } /* diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index d8ebfd813e28..f49b349e16a3 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -34,6 +34,7 @@ #include <asm/cpufeature.h> #include <asm/debug-monitors.h> #include <asm/fpsimd.h> +#include <asm/mte.h> #include <asm/pointer_auth.h> #include <asm/stacktrace.h> #include <asm/syscall.h> @@ -1032,6 +1033,35 @@ static int pac_generic_keys_set(struct task_struct *target, #endif /* CONFIG_CHECKPOINT_RESTORE */ #endif /* CONFIG_ARM64_PTR_AUTH */ +#ifdef CONFIG_ARM64_TAGGED_ADDR_ABI +static int tagged_addr_ctrl_get(struct task_struct *target, + const struct user_regset *regset, + struct membuf to) +{ + long ctrl = get_tagged_addr_ctrl(target); + + if (IS_ERR_VALUE(ctrl)) + return ctrl; + + return membuf_write(&to, &ctrl, sizeof(ctrl)); +} + +static int tagged_addr_ctrl_set(struct task_struct *target, const struct + user_regset *regset, unsigned int pos, + unsigned int count, const void *kbuf, const + void __user *ubuf) +{ + int ret; + long ctrl; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ctrl, 0, -1); + if (ret) + return ret; + + return set_tagged_addr_ctrl(target, ctrl); +} +#endif + enum aarch64_regset { REGSET_GPR, REGSET_FPR, @@ -1051,6 +1081,9 @@ enum aarch64_regset { REGSET_PACG_KEYS, #endif #endif +#ifdef CONFIG_ARM64_TAGGED_ADDR_ABI + REGSET_TAGGED_ADDR_CTRL, +#endif }; static const struct user_regset aarch64_regsets[] = { @@ -1148,6 +1181,16 @@ static const struct user_regset aarch64_regsets[] = { }, #endif #endif +#ifdef CONFIG_ARM64_TAGGED_ADDR_ABI + [REGSET_TAGGED_ADDR_CTRL] = { + .core_note_type = NT_ARM_TAGGED_ADDR_CTRL, + .n = 1, + .size = sizeof(long), + .align = sizeof(long), + .regset_get = tagged_addr_ctrl_get, + .set = tagged_addr_ctrl_set, + }, +#endif }; static const struct user_regset_view user_aarch64_view = { @@ -1691,6 +1734,12 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task) long arch_ptrace(struct task_struct *child, long request, unsigned long addr, unsigned long data) { + switch (request) { + case PTRACE_PEEKMTETAGS: + case PTRACE_POKEMTETAGS: + return mte_ptrace_copy_tags(child, request, addr, data); + } + return ptrace_request(child, request, addr, data); } @@ -1793,7 +1842,7 @@ void syscall_trace_exit(struct pt_regs *regs) * We also reserve IL for the kernel; SS is handled dynamically. */ #define SPSR_EL1_AARCH64_RES0_BITS \ - (GENMASK_ULL(63, 32) | GENMASK_ULL(27, 25) | GENMASK_ULL(23, 22) | \ + (GENMASK_ULL(63, 32) | GENMASK_ULL(27, 26) | GENMASK_ULL(23, 22) | \ GENMASK_ULL(20, 13) | GENMASK_ULL(5, 5)) #define SPSR_EL1_AARCH32_RES0_BITS \ (GENMASK_ULL(63, 32) | GENMASK_ULL(22, 22) | GENMASK_ULL(20, 20)) diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index f3af68dc1cf8..bdcaaf091e1e 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -749,6 +749,9 @@ static void setup_return(struct pt_regs *regs, struct k_sigaction *ka, regs->pstate |= PSR_BTYPE_C; } + /* TCO (Tag Check Override) always cleared for signal handlers */ + regs->pstate &= ~PSR_TCO_BIT; + if (ka->sa.sa_flags & SA_RESTORER) sigtramp = ka->sa.sa_restorer; else @@ -933,6 +936,12 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, if (thread_flags & _TIF_UPROBE) uprobe_notify_resume(regs); + if (thread_flags & _TIF_MTE_ASYNC_FAULT) { + clear_thread_flag(TIF_MTE_ASYNC_FAULT); + send_sig_fault(SIGSEGV, SEGV_MTEAERR, + (void __user *)NULL, current); + } + if (thread_flags & _TIF_SIGPENDING) do_signal(regs); diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index 584c14ce3c86..96cd347c7a46 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -10,6 +10,7 @@ #include <asm/daifflags.h> #include <asm/debug-monitors.h> #include <asm/exec.h> +#include <asm/mte.h> #include <asm/memory.h> #include <asm/mmu_context.h> #include <asm/smp_plat.h> @@ -73,6 +74,9 @@ void notrace __cpu_suspend_exit(void) * disabled it, make sure their wishes are obeyed. */ spectre_v4_enable_mitigation(NULL); + + /* Restore additional MTE-specific configuration */ + mte_suspend_exit(); } /* diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index 5f0c04863d2c..e4c0dadf0d92 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -123,6 +123,16 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr, local_daif_restore(DAIF_PROCCTX); user_exit(); + if (system_supports_mte() && (flags & _TIF_MTE_ASYNC_FAULT)) { + /* + * Process the asynchronous tag check fault before the actual + * syscall. do_notify_resume() will send a signal to userspace + * before the syscall is restarted. + */ + regs->regs[0] = -ERESTARTNOINTR; + return; + } + if (has_syscall_work(flags)) { /* * The de-facto standard way to skip a system call using ptrace |